xref: /titanic_44/usr/src/uts/sun4v/io/ldc.c (revision 941880d67612f78a4bf94f75f083b521da6bd316)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/mmu.h>
56 #include <sys/pte.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/hat_sfmmu.h>
60 #include <sys/vm_machparam.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/seg_kpm.h>
63 #include <sys/note.h>
64 #include <sys/ivintr.h>
65 #include <sys/hypervisor_api.h>
66 #include <sys/ldc.h>
67 #include <sys/ldc_impl.h>
68 #include <sys/cnex.h>
69 #include <sys/hsvc.h>
70 
71 /* Core internal functions */
72 static int i_ldc_h2v_error(int h_error);
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
76 static void i_ldc_reset_state(ldc_chan_t *ldcp);
77 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
78 
79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
80 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
81 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
82 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
83     uint8_t ctrlmsg);
84 
85 /* Interrupt handling functions */
86 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
87 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
88 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
89 
90 /* Read method functions */
91 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
92 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
93 	size_t *sizep);
94 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
95 	size_t *sizep);
96 
97 /* Write method functions */
98 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
99 	size_t *sizep);
100 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
101 	size_t *sizep);
102 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
103 	size_t *sizep);
104 
105 /* Pkt processing internal functions */
106 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
107 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
108 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
109 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
110 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
111 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
112 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
113 
114 /* Memory synchronization internal functions */
115 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
116     uint8_t direction, uint64_t offset, size_t size);
117 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
118     uint8_t direction, uint64_t start, uint64_t end);
119 
120 /* LDC Version */
121 static ldc_ver_t ldc_versions[] = { {1, 0} };
122 
123 /* number of supported versions */
124 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
125 
126 /* Module State Pointer */
127 static ldc_soft_state_t *ldcssp;
128 
129 static struct modldrv md = {
130 	&mod_miscops,			/* This is a misc module */
131 	"sun4v LDC module v%I%",	/* Name of the module */
132 };
133 
134 static struct modlinkage ml = {
135 	MODREV_1,
136 	&md,
137 	NULL
138 };
139 
140 static uint64_t ldc_sup_minor;		/* Supported minor number */
141 static hsvc_info_t ldc_hsvc = {
142 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
143 };
144 
145 static uint64_t intr_sup_minor;		/* Supported minor number */
146 static hsvc_info_t intr_hsvc = {
147 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
148 };
149 
150 /*
151  * LDC framework supports mapping remote domain's memory
152  * either directly or via shadow memory pages. Default
153  * support is currently implemented via shadow copy.
154  * Direct map can be enabled by setting 'ldc_shmem_enabled'
155  */
156 int ldc_shmem_enabled = 0;
157 
158 /*
159  * The no. of MTU size messages that can be stored in
160  * the LDC Tx queue. The number of Tx queue entries is
161  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
162  */
163 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
164 
165 /*
166  * The minimum queue length. This is the size of the smallest
167  * LDC queue. If the computed value is less than this default,
168  * the queue length is rounded up to 'ldc_queue_entries'.
169  */
170 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
171 
172 /*
173  * Pages exported for remote access over each channel is
174  * maintained in a table registered with the Hypervisor.
175  * The default number of entries in the table is set to
176  * 'ldc_mtbl_entries'.
177  */
178 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
179 
180 /*
181  * LDC retry count and delay - when the HV returns EWOULDBLOCK
182  * the operation is retried 'ldc_max_retries' times with a
183  * wait of 'ldc_delay' usecs between each retry.
184  */
185 int ldc_max_retries = LDC_MAX_RETRIES;
186 clock_t ldc_delay = LDC_DELAY;
187 
188 #ifdef DEBUG
189 
190 /*
191  * Print debug messages
192  *
193  * set ldcdbg to 0x7 for enabling all msgs
194  * 0x4 - Warnings
195  * 0x2 - All debug messages
196  * 0x1 - Minimal debug messages
197  *
198  * set ldcdbgchan to the channel number you want to debug
199  * setting it to -1 prints debug messages for all channels
200  * NOTE: ldcdbgchan has no effect on error messages
201  */
202 
203 #define	DBG_ALL_LDCS -1
204 
205 int ldcdbg = 0x0;
206 int64_t ldcdbgchan = DBG_ALL_LDCS;
207 boolean_t ldc_inject_reset_flag = B_FALSE;
208 
209 static void
210 ldcdebug(int64_t id, const char *fmt, ...)
211 {
212 	char buf[512];
213 	va_list ap;
214 
215 	/*
216 	 * Do not return if,
217 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
218 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
219 	 * debug channel = caller specified channel
220 	 */
221 	if ((id != DBG_ALL_LDCS) &&
222 	    (ldcdbgchan != DBG_ALL_LDCS) &&
223 	    (ldcdbgchan != id)) {
224 		return;
225 	}
226 
227 	va_start(ap, fmt);
228 	(void) vsprintf(buf, fmt, ap);
229 	va_end(ap);
230 
231 	cmn_err(CE_CONT, "?%s", buf);
232 }
233 
234 static boolean_t
235 ldc_inject_reset(ldc_chan_t *ldcp)
236 {
237 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
238 		return (B_FALSE);
239 
240 	if (!ldc_inject_reset_flag)
241 		return (B_FALSE);
242 
243 	/* clear the injection state */
244 	ldc_inject_reset_flag = 0;
245 
246 	return (B_TRUE);
247 }
248 
249 #define	D1		\
250 if (ldcdbg & 0x01)	\
251 	ldcdebug
252 
253 #define	D2		\
254 if (ldcdbg & 0x02)	\
255 	ldcdebug
256 
257 #define	DWARN		\
258 if (ldcdbg & 0x04)	\
259 	ldcdebug
260 
261 #define	DUMP_PAYLOAD(id, addr)						\
262 {									\
263 	char buf[65*3];							\
264 	int i;								\
265 	uint8_t *src = (uint8_t *)addr;					\
266 	for (i = 0; i < 64; i++, src++)					\
267 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
268 	(void) sprintf(&buf[i * 3], "|\n");				\
269 	D2((id), "payload: %s", buf);					\
270 }
271 
272 #define	DUMP_LDC_PKT(c, s, addr)					\
273 {									\
274 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
275 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
276 	if (msg->type == LDC_DATA) {                                    \
277 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
278 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
279 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
280 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
281 	    (msg->env & LDC_LEN_MASK));					\
282 	} else { 							\
283 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
284 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
285 	} 								\
286 }
287 
288 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_reset(_ldcp)
289 
290 #else
291 
292 #define	DBG_ALL_LDCS -1
293 
294 #define	D1
295 #define	D2
296 #define	DWARN
297 
298 #define	DUMP_PAYLOAD(id, addr)
299 #define	DUMP_LDC_PKT(c, s, addr)
300 
301 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
302 
303 #endif
304 
305 #define	ZERO_PKT(p)			\
306 	bzero((p), sizeof (ldc_msg_t));
307 
308 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
309 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
310 
311 
312 int
313 _init(void)
314 {
315 	int status;
316 
317 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
318 	if (status != 0) {
319 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
320 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
321 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
322 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
323 		return (-1);
324 	}
325 
326 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
327 	if (status != 0) {
328 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
329 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
330 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
331 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
332 		(void) hsvc_unregister(&ldc_hsvc);
333 		return (-1);
334 	}
335 
336 	/* allocate soft state structure */
337 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
338 
339 	/* Link the module into the system */
340 	status = mod_install(&ml);
341 	if (status != 0) {
342 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
343 		return (status);
344 	}
345 
346 	/* Initialize the LDC state structure */
347 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
348 
349 	mutex_enter(&ldcssp->lock);
350 
351 	/* Create a cache for memory handles */
352 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
353 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
354 	if (ldcssp->memhdl_cache == NULL) {
355 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
356 		mutex_exit(&ldcssp->lock);
357 		return (-1);
358 	}
359 
360 	/* Create cache for memory segment structures */
361 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
362 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
363 	if (ldcssp->memseg_cache == NULL) {
364 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
365 		mutex_exit(&ldcssp->lock);
366 		return (-1);
367 	}
368 
369 
370 	ldcssp->channel_count = 0;
371 	ldcssp->channels_open = 0;
372 	ldcssp->chan_list = NULL;
373 	ldcssp->dring_list = NULL;
374 
375 	mutex_exit(&ldcssp->lock);
376 
377 	return (0);
378 }
379 
380 int
381 _info(struct modinfo *modinfop)
382 {
383 	/* Report status of the dynamically loadable driver module */
384 	return (mod_info(&ml, modinfop));
385 }
386 
387 int
388 _fini(void)
389 {
390 	int 		rv, status;
391 	ldc_chan_t 	*ldcp;
392 	ldc_dring_t 	*dringp;
393 	ldc_mem_info_t 	minfo;
394 
395 	/* Unlink the driver module from the system */
396 	status = mod_remove(&ml);
397 	if (status) {
398 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
399 		return (EIO);
400 	}
401 
402 	/* close and finalize channels */
403 	ldcp = ldcssp->chan_list;
404 	while (ldcp != NULL) {
405 		(void) ldc_close((ldc_handle_t)ldcp);
406 		(void) ldc_fini((ldc_handle_t)ldcp);
407 
408 		ldcp = ldcp->next;
409 	}
410 
411 	/* Free descriptor rings */
412 	dringp = ldcssp->dring_list;
413 	while (dringp != NULL) {
414 		dringp = dringp->next;
415 
416 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
417 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
418 			if (minfo.status == LDC_BOUND) {
419 				(void) ldc_mem_dring_unbind(
420 						(ldc_dring_handle_t)dringp);
421 			}
422 			if (minfo.status == LDC_MAPPED) {
423 				(void) ldc_mem_dring_unmap(
424 						(ldc_dring_handle_t)dringp);
425 			}
426 		}
427 
428 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
429 	}
430 	ldcssp->dring_list = NULL;
431 
432 	/* Destroy kmem caches */
433 	kmem_cache_destroy(ldcssp->memhdl_cache);
434 	kmem_cache_destroy(ldcssp->memseg_cache);
435 
436 	/*
437 	 * We have successfully "removed" the driver.
438 	 * Destroying soft states
439 	 */
440 	mutex_destroy(&ldcssp->lock);
441 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
442 
443 	(void) hsvc_unregister(&ldc_hsvc);
444 	(void) hsvc_unregister(&intr_hsvc);
445 
446 	return (status);
447 }
448 
449 /* -------------------------------------------------------------------------- */
450 
451 /*
452  * LDC Link Layer Internal Functions
453  */
454 
455 /*
456  * Translate HV Errors to sun4v error codes
457  */
458 static int
459 i_ldc_h2v_error(int h_error)
460 {
461 	switch (h_error) {
462 
463 	case	H_EOK:
464 		return (0);
465 
466 	case	H_ENORADDR:
467 		return (EFAULT);
468 
469 	case	H_EBADPGSZ:
470 	case	H_EINVAL:
471 		return (EINVAL);
472 
473 	case	H_EWOULDBLOCK:
474 		return (EWOULDBLOCK);
475 
476 	case	H_ENOACCESS:
477 	case	H_ENOMAP:
478 		return (EACCES);
479 
480 	case	H_EIO:
481 	case	H_ECPUERROR:
482 		return (EIO);
483 
484 	case	H_ENOTSUPPORTED:
485 		return (ENOTSUP);
486 
487 	case 	H_ETOOMANY:
488 		return (ENOSPC);
489 
490 	case	H_ECHANNEL:
491 		return (ECHRNG);
492 	default:
493 		break;
494 	}
495 
496 	return (EIO);
497 }
498 
499 /*
500  * Reconfigure the transmit queue
501  */
502 static int
503 i_ldc_txq_reconf(ldc_chan_t *ldcp)
504 {
505 	int rv;
506 
507 	ASSERT(MUTEX_HELD(&ldcp->lock));
508 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
509 
510 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
511 	if (rv) {
512 		cmn_err(CE_WARN,
513 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
514 		return (EIO);
515 	}
516 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
517 	    &(ldcp->tx_tail), &(ldcp->link_state));
518 	if (rv) {
519 		cmn_err(CE_WARN,
520 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
521 		return (EIO);
522 	}
523 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
524 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
525 	    ldcp->link_state);
526 
527 	return (0);
528 }
529 
530 /*
531  * Reconfigure the receive queue
532  */
533 static int
534 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
535 {
536 	int rv;
537 	uint64_t rx_head, rx_tail;
538 
539 	ASSERT(MUTEX_HELD(&ldcp->lock));
540 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
541 	    &(ldcp->link_state));
542 	if (rv) {
543 		cmn_err(CE_WARN,
544 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
545 		    ldcp->id);
546 		return (EIO);
547 	}
548 
549 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
550 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
551 			ldcp->rx_q_entries);
552 		if (rv) {
553 			cmn_err(CE_WARN,
554 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
555 			    ldcp->id);
556 			return (EIO);
557 		}
558 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
559 		    ldcp->id);
560 	}
561 
562 	return (0);
563 }
564 
565 
566 /*
567  * Drain the contents of the receive queue
568  */
569 static int
570 i_ldc_rxq_drain(ldc_chan_t *ldcp)
571 {
572 	int rv;
573 	uint64_t rx_head, rx_tail;
574 
575 	ASSERT(MUTEX_HELD(&ldcp->lock));
576 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
577 	    &(ldcp->link_state));
578 	if (rv) {
579 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
580 		    ldcp->id);
581 		return (EIO);
582 	}
583 
584 	/* flush contents by setting the head = tail */
585 	return (i_ldc_set_rx_head(ldcp, rx_tail));
586 }
587 
588 
589 /*
590  * Reset LDC state structure and its contents
591  */
592 static void
593 i_ldc_reset_state(ldc_chan_t *ldcp)
594 {
595 	ASSERT(MUTEX_HELD(&ldcp->lock));
596 	ldcp->last_msg_snt = LDC_INIT_SEQID;
597 	ldcp->last_ack_rcd = 0;
598 	ldcp->last_msg_rcd = 0;
599 	ldcp->tx_ackd_head = ldcp->tx_head;
600 	ldcp->next_vidx = 0;
601 	ldcp->hstate = 0;
602 	ldcp->tstate = TS_OPEN;
603 	ldcp->status = LDC_OPEN;
604 
605 	if (ldcp->link_state == LDC_CHANNEL_UP ||
606 	    ldcp->link_state == LDC_CHANNEL_RESET) {
607 
608 		if (ldcp->mode == LDC_MODE_RAW) {
609 			ldcp->status = LDC_UP;
610 			ldcp->tstate = TS_UP;
611 		} else {
612 			ldcp->status = LDC_READY;
613 			ldcp->tstate |= TS_LINK_READY;
614 		}
615 	}
616 }
617 
618 /*
619  * Reset a LDC channel
620  */
621 static void
622 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
623 {
624 	D1(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
625 
626 	ASSERT(MUTEX_HELD(&ldcp->lock));
627 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
628 
629 	/* reconfig Tx and Rx queues */
630 	(void) i_ldc_txq_reconf(ldcp);
631 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
632 
633 	/* Clear Tx and Rx interrupts */
634 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
635 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
636 
637 	/* Reset channel state */
638 	i_ldc_reset_state(ldcp);
639 
640 	/* Mark channel in reset */
641 	ldcp->tstate |= TS_IN_RESET;
642 }
643 
644 
645 /*
646  * Clear pending interrupts
647  */
648 static void
649 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
650 {
651 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
652 
653 	ASSERT(MUTEX_HELD(&ldcp->lock));
654 	ASSERT(cinfo->dip != NULL);
655 
656 	switch (itype) {
657 	case CNEX_TX_INTR:
658 		/* check Tx interrupt */
659 		if (ldcp->tx_intr_state)
660 			ldcp->tx_intr_state = LDC_INTR_NONE;
661 		else
662 			return;
663 		break;
664 
665 	case CNEX_RX_INTR:
666 		/* check Rx interrupt */
667 		if (ldcp->rx_intr_state)
668 			ldcp->rx_intr_state = LDC_INTR_NONE;
669 		else
670 			return;
671 		break;
672 	}
673 
674 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
675 	D2(ldcp->id,
676 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
677 	    ldcp->id, itype);
678 }
679 
680 /*
681  * Set the receive queue head
682  * Resets connection and returns an error if it fails.
683  */
684 static int
685 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
686 {
687 	int 	rv;
688 	int 	retries;
689 
690 	ASSERT(MUTEX_HELD(&ldcp->lock));
691 	for (retries = 0; retries < ldc_max_retries; retries++) {
692 
693 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
694 			return (0);
695 
696 		if (rv != H_EWOULDBLOCK)
697 			break;
698 
699 		/* wait for ldc_delay usecs */
700 		drv_usecwait(ldc_delay);
701 	}
702 
703 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
704 		ldcp->id, head);
705 	mutex_enter(&ldcp->tx_lock);
706 	i_ldc_reset(ldcp, B_TRUE);
707 	mutex_exit(&ldcp->tx_lock);
708 
709 	return (ECONNRESET);
710 }
711 
712 
713 /*
714  * Returns the tx_tail to be used for transfer
715  * Re-reads the TX queue ptrs if and only if the
716  * the cached head and tail are equal (queue is full)
717  */
718 static int
719 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
720 {
721 	int 		rv;
722 	uint64_t 	current_head, new_tail;
723 
724 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
725 	/* Read the head and tail ptrs from HV */
726 	rv = hv_ldc_tx_get_state(ldcp->id,
727 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
728 	if (rv) {
729 		cmn_err(CE_WARN,
730 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
731 		    ldcp->id);
732 		return (EIO);
733 	}
734 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
735 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
736 		    ldcp->id);
737 		return (ECONNRESET);
738 	}
739 
740 	/* In reliable mode, check against last ACKd msg */
741 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
742 		ldcp->mode == LDC_MODE_STREAM)
743 		? ldcp->tx_ackd_head : ldcp->tx_head;
744 
745 	/* increment the tail */
746 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
747 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
748 
749 	if (new_tail == current_head) {
750 		DWARN(ldcp->id,
751 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
752 		    ldcp->id);
753 		return (EWOULDBLOCK);
754 	}
755 
756 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
757 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
758 
759 	*tail = ldcp->tx_tail;
760 	return (0);
761 }
762 
763 /*
764  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
765  * and retry ldc_max_retries times before returning an error.
766  * Returns 0, EWOULDBLOCK or EIO
767  */
768 static int
769 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
770 {
771 	int		rv, retval = EWOULDBLOCK;
772 	int 		retries;
773 
774 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
775 	for (retries = 0; retries < ldc_max_retries; retries++) {
776 
777 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
778 			retval = 0;
779 			break;
780 		}
781 		if (rv != H_EWOULDBLOCK) {
782 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
783 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
784 			retval = EIO;
785 			break;
786 		}
787 
788 		/* wait for ldc_delay usecs */
789 		drv_usecwait(ldc_delay);
790 	}
791 	return (retval);
792 }
793 
794 /*
795  * Send a LDC message
796  */
797 static int
798 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
799     uint8_t ctrlmsg)
800 {
801 	int		rv;
802 	ldc_msg_t 	*pkt;
803 	uint64_t	tx_tail;
804 	uint32_t	curr_seqid = ldcp->last_msg_snt;
805 
806 	/* Obtain Tx lock */
807 	mutex_enter(&ldcp->tx_lock);
808 
809 	/* get the current tail for the message */
810 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
811 	if (rv) {
812 		DWARN(ldcp->id,
813 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
814 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
815 		    ldcp->id, pkttype, subtype, ctrlmsg);
816 		mutex_exit(&ldcp->tx_lock);
817 		return (rv);
818 	}
819 
820 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
821 	ZERO_PKT(pkt);
822 
823 	/* Initialize the packet */
824 	pkt->type = pkttype;
825 	pkt->stype = subtype;
826 	pkt->ctrl = ctrlmsg;
827 
828 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
829 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
830 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
831 		curr_seqid++;
832 		if (ldcp->mode != LDC_MODE_RAW) {
833 			pkt->seqid = curr_seqid;
834 			pkt->ackid = ldcp->last_msg_rcd;
835 		}
836 	}
837 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
838 
839 	/* initiate the send by calling into HV and set the new tail */
840 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
841 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
842 
843 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
844 	if (rv) {
845 		DWARN(ldcp->id,
846 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
847 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
848 		    ldcp->id, pkttype, subtype, ctrlmsg);
849 		mutex_exit(&ldcp->tx_lock);
850 		return (EIO);
851 	}
852 
853 	ldcp->last_msg_snt = curr_seqid;
854 	ldcp->tx_tail = tx_tail;
855 
856 	mutex_exit(&ldcp->tx_lock);
857 	return (0);
858 }
859 
860 /*
861  * Checks if packet was received in right order
862  * in the case of a reliable link.
863  * Returns 0 if in order, else EIO
864  */
865 static int
866 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
867 {
868 	/* No seqid checking for RAW mode */
869 	if (ldcp->mode == LDC_MODE_RAW)
870 		return (0);
871 
872 	/* No seqid checking for version, RTS, RTR message */
873 	if (msg->ctrl == LDC_VER ||
874 	    msg->ctrl == LDC_RTS ||
875 	    msg->ctrl == LDC_RTR)
876 		return (0);
877 
878 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
879 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
880 		DWARN(ldcp->id,
881 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
882 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
883 		    (ldcp->last_msg_rcd + 1));
884 		return (EIO);
885 	}
886 
887 	return (0);
888 }
889 
890 
891 /*
892  * Process an incoming version ctrl message
893  */
894 static int
895 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
896 {
897 	int 		rv = 0, idx = ldcp->next_vidx;
898 	ldc_msg_t 	*pkt;
899 	uint64_t	tx_tail;
900 	ldc_ver_t	*rcvd_ver;
901 
902 	/* get the received version */
903 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
904 
905 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
906 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
907 
908 	/* Obtain Tx lock */
909 	mutex_enter(&ldcp->tx_lock);
910 
911 	switch (msg->stype) {
912 	case LDC_INFO:
913 
914 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
915 			(void) i_ldc_txq_reconf(ldcp);
916 			i_ldc_reset_state(ldcp);
917 			mutex_exit(&ldcp->tx_lock);
918 			return (EAGAIN);
919 		}
920 
921 		/* get the current tail and pkt for the response */
922 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
923 		if (rv != 0) {
924 			DWARN(ldcp->id,
925 			    "i_ldc_process_VER: (0x%llx) err sending "
926 			    "version ACK/NACK\n", ldcp->id);
927 			i_ldc_reset(ldcp, B_TRUE);
928 			mutex_exit(&ldcp->tx_lock);
929 			return (ECONNRESET);
930 		}
931 
932 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
933 		ZERO_PKT(pkt);
934 
935 		/* initialize the packet */
936 		pkt->type = LDC_CTRL;
937 		pkt->ctrl = LDC_VER;
938 
939 		for (;;) {
940 
941 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
942 			    rcvd_ver->major, rcvd_ver->minor,
943 			    ldc_versions[idx].major, ldc_versions[idx].minor);
944 
945 			if (rcvd_ver->major == ldc_versions[idx].major) {
946 				/* major version match - ACK version */
947 				pkt->stype = LDC_ACK;
948 
949 				/*
950 				 * lower minor version to the one this endpt
951 				 * supports, if necessary
952 				 */
953 				if (rcvd_ver->minor > ldc_versions[idx].minor)
954 					rcvd_ver->minor =
955 						ldc_versions[idx].minor;
956 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
957 
958 				break;
959 			}
960 
961 			if (rcvd_ver->major > ldc_versions[idx].major) {
962 
963 				D1(ldcp->id, "i_ldc_process_VER: using next"
964 				    " lower idx=%d, v%u.%u\n", idx,
965 				    ldc_versions[idx].major,
966 				    ldc_versions[idx].minor);
967 
968 				/* nack with next lower version */
969 				pkt->stype = LDC_NACK;
970 				bcopy(&ldc_versions[idx], pkt->udata,
971 				    sizeof (ldc_versions[idx]));
972 				ldcp->next_vidx = idx;
973 				break;
974 			}
975 
976 			/* next major version */
977 			idx++;
978 
979 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
980 
981 			if (idx == LDC_NUM_VERS) {
982 				/* no version match - send NACK */
983 				pkt->stype = LDC_NACK;
984 				bzero(pkt->udata, sizeof (ldc_ver_t));
985 				ldcp->next_vidx = 0;
986 				break;
987 			}
988 		}
989 
990 		/* initiate the send by calling into HV and set the new tail */
991 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
992 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
993 
994 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
995 		if (rv == 0) {
996 			ldcp->tx_tail = tx_tail;
997 			if (pkt->stype == LDC_ACK) {
998 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
999 				    " version ACK\n", ldcp->id);
1000 				/* Save the ACK'd version */
1001 				ldcp->version.major = rcvd_ver->major;
1002 				ldcp->version.minor = rcvd_ver->minor;
1003 				ldcp->hstate |= TS_RCVD_VER;
1004 				ldcp->tstate |= TS_VER_DONE;
1005 				DWARN(DBG_ALL_LDCS,
1006 				    "(0x%llx) Sent ACK, "
1007 				    "Agreed on version v%u.%u\n",
1008 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1009 			}
1010 		} else {
1011 			DWARN(ldcp->id,
1012 			    "i_ldc_process_VER: (0x%llx) error sending "
1013 			    "ACK/NACK\n", ldcp->id);
1014 			i_ldc_reset(ldcp, B_TRUE);
1015 			mutex_exit(&ldcp->tx_lock);
1016 			return (ECONNRESET);
1017 		}
1018 
1019 		break;
1020 
1021 	case LDC_ACK:
1022 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1023 			if (ldcp->version.major != rcvd_ver->major ||
1024 				ldcp->version.minor != rcvd_ver->minor) {
1025 
1026 				/* mismatched version - reset connection */
1027 				DWARN(ldcp->id,
1028 					"i_ldc_process_VER: (0x%llx) recvd"
1029 					" ACK ver != sent ACK ver\n", ldcp->id);
1030 				i_ldc_reset(ldcp, B_TRUE);
1031 				mutex_exit(&ldcp->tx_lock);
1032 				return (ECONNRESET);
1033 			}
1034 		} else {
1035 			/* SUCCESS - we have agreed on a version */
1036 			ldcp->version.major = rcvd_ver->major;
1037 			ldcp->version.minor = rcvd_ver->minor;
1038 			ldcp->tstate |= TS_VER_DONE;
1039 		}
1040 
1041 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1042 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1043 
1044 		/* initiate RTS-RTR-RDX handshake */
1045 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1046 		if (rv) {
1047 			DWARN(ldcp->id,
1048 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1049 			    ldcp->id);
1050 			i_ldc_reset(ldcp, B_TRUE);
1051 			mutex_exit(&ldcp->tx_lock);
1052 			return (ECONNRESET);
1053 		}
1054 
1055 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1056 		ZERO_PKT(pkt);
1057 
1058 		pkt->type = LDC_CTRL;
1059 		pkt->stype = LDC_INFO;
1060 		pkt->ctrl = LDC_RTS;
1061 		pkt->env = ldcp->mode;
1062 		if (ldcp->mode != LDC_MODE_RAW)
1063 			pkt->seqid = LDC_INIT_SEQID;
1064 
1065 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1066 
1067 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1068 
1069 		/* initiate the send by calling into HV and set the new tail */
1070 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1071 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1072 
1073 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1074 		if (rv) {
1075 			D2(ldcp->id,
1076 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1077 			    ldcp->id);
1078 			i_ldc_reset(ldcp, B_TRUE);
1079 			mutex_exit(&ldcp->tx_lock);
1080 			return (ECONNRESET);
1081 		}
1082 
1083 		ldcp->tx_tail = tx_tail;
1084 		ldcp->hstate |= TS_SENT_RTS;
1085 
1086 		break;
1087 
1088 	case LDC_NACK:
1089 		/* check if version in NACK is zero */
1090 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1091 			/* version handshake failure */
1092 			DWARN(DBG_ALL_LDCS,
1093 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1094 			    ldcp->id);
1095 			i_ldc_reset(ldcp, B_TRUE);
1096 			mutex_exit(&ldcp->tx_lock);
1097 			return (ECONNRESET);
1098 		}
1099 
1100 		/* get the current tail and pkt for the response */
1101 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1102 		if (rv != 0) {
1103 			cmn_err(CE_NOTE,
1104 			    "i_ldc_process_VER: (0x%lx) err sending "
1105 			    "version ACK/NACK\n", ldcp->id);
1106 			i_ldc_reset(ldcp, B_TRUE);
1107 			mutex_exit(&ldcp->tx_lock);
1108 			return (ECONNRESET);
1109 		}
1110 
1111 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1112 		ZERO_PKT(pkt);
1113 
1114 		/* initialize the packet */
1115 		pkt->type = LDC_CTRL;
1116 		pkt->ctrl = LDC_VER;
1117 		pkt->stype = LDC_INFO;
1118 
1119 		/* check ver in NACK msg has a match */
1120 		for (;;) {
1121 			if (rcvd_ver->major == ldc_versions[idx].major) {
1122 				/*
1123 				 * major version match - resubmit request
1124 				 * if lower minor version to the one this endpt
1125 				 * supports, if necessary
1126 				 */
1127 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1128 					rcvd_ver->minor =
1129 						ldc_versions[idx].minor;
1130 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1131 				break;
1132 
1133 			}
1134 
1135 			if (rcvd_ver->major > ldc_versions[idx].major) {
1136 
1137 				D1(ldcp->id, "i_ldc_process_VER: using next"
1138 				    " lower idx=%d, v%u.%u\n", idx,
1139 				    ldc_versions[idx].major,
1140 				    ldc_versions[idx].minor);
1141 
1142 				/* send next lower version */
1143 				bcopy(&ldc_versions[idx], pkt->udata,
1144 				    sizeof (ldc_versions[idx]));
1145 				ldcp->next_vidx = idx;
1146 				break;
1147 			}
1148 
1149 			/* next version */
1150 			idx++;
1151 
1152 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1153 
1154 			if (idx == LDC_NUM_VERS) {
1155 				/* no version match - terminate */
1156 				ldcp->next_vidx = 0;
1157 				mutex_exit(&ldcp->tx_lock);
1158 				return (ECONNRESET);
1159 			}
1160 		}
1161 
1162 		/* initiate the send by calling into HV and set the new tail */
1163 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1164 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1165 
1166 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1167 		if (rv == 0) {
1168 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1169 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1170 			    ldc_versions[idx].minor);
1171 			ldcp->tx_tail = tx_tail;
1172 		} else {
1173 			cmn_err(CE_NOTE,
1174 			    "i_ldc_process_VER: (0x%lx) error sending version"
1175 			    "INFO\n", ldcp->id);
1176 			i_ldc_reset(ldcp, B_TRUE);
1177 			mutex_exit(&ldcp->tx_lock);
1178 			return (ECONNRESET);
1179 		}
1180 
1181 		break;
1182 	}
1183 
1184 	mutex_exit(&ldcp->tx_lock);
1185 	return (rv);
1186 }
1187 
1188 
1189 /*
1190  * Process an incoming RTS ctrl message
1191  */
1192 static int
1193 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1194 {
1195 	int 		rv = 0;
1196 	ldc_msg_t 	*pkt;
1197 	uint64_t	tx_tail;
1198 	boolean_t	sent_NACK = B_FALSE;
1199 
1200 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1201 
1202 	switch (msg->stype) {
1203 	case LDC_NACK:
1204 		DWARN(ldcp->id,
1205 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1206 		    ldcp->id);
1207 
1208 		/* Reset the channel -- as we cannot continue */
1209 		mutex_enter(&ldcp->tx_lock);
1210 		i_ldc_reset(ldcp, B_TRUE);
1211 		mutex_exit(&ldcp->tx_lock);
1212 		rv = ECONNRESET;
1213 		break;
1214 
1215 	case LDC_INFO:
1216 
1217 		/* check mode */
1218 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1219 			cmn_err(CE_NOTE,
1220 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1221 			    ldcp->id);
1222 			/*
1223 			 * send NACK in response to MODE message
1224 			 * get the current tail for the response
1225 			 */
1226 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1227 			if (rv) {
1228 				/* if cannot send NACK - reset channel */
1229 				mutex_enter(&ldcp->tx_lock);
1230 				i_ldc_reset(ldcp, B_TRUE);
1231 				mutex_exit(&ldcp->tx_lock);
1232 				rv = ECONNRESET;
1233 				break;
1234 			}
1235 			sent_NACK = B_TRUE;
1236 		}
1237 		break;
1238 	default:
1239 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1240 		    ldcp->id);
1241 		mutex_enter(&ldcp->tx_lock);
1242 		i_ldc_reset(ldcp, B_TRUE);
1243 		mutex_exit(&ldcp->tx_lock);
1244 		rv = ECONNRESET;
1245 		break;
1246 	}
1247 
1248 	/*
1249 	 * If either the connection was reset (when rv != 0) or
1250 	 * a NACK was sent, we return. In the case of a NACK
1251 	 * we dont want to consume the packet that came in but
1252 	 * not record that we received the RTS
1253 	 */
1254 	if (rv || sent_NACK)
1255 		return (rv);
1256 
1257 	/* record RTS received */
1258 	ldcp->hstate |= TS_RCVD_RTS;
1259 
1260 	/* store initial SEQID info */
1261 	ldcp->last_msg_snt = msg->seqid;
1262 
1263 	/* Obtain Tx lock */
1264 	mutex_enter(&ldcp->tx_lock);
1265 
1266 	/* get the current tail for the response */
1267 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1268 	if (rv != 0) {
1269 		cmn_err(CE_NOTE,
1270 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1271 		    ldcp->id);
1272 		i_ldc_reset(ldcp, B_TRUE);
1273 		mutex_exit(&ldcp->tx_lock);
1274 		return (ECONNRESET);
1275 	}
1276 
1277 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1278 	ZERO_PKT(pkt);
1279 
1280 	/* initialize the packet */
1281 	pkt->type = LDC_CTRL;
1282 	pkt->stype = LDC_INFO;
1283 	pkt->ctrl = LDC_RTR;
1284 	pkt->env = ldcp->mode;
1285 	if (ldcp->mode != LDC_MODE_RAW)
1286 		pkt->seqid = LDC_INIT_SEQID;
1287 
1288 	ldcp->last_msg_rcd = msg->seqid;
1289 
1290 	/* initiate the send by calling into HV and set the new tail */
1291 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1292 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1293 
1294 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1295 	if (rv == 0) {
1296 		D2(ldcp->id,
1297 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1298 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1299 
1300 		ldcp->tx_tail = tx_tail;
1301 		ldcp->hstate |= TS_SENT_RTR;
1302 
1303 	} else {
1304 		cmn_err(CE_NOTE,
1305 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1306 		    ldcp->id);
1307 		i_ldc_reset(ldcp, B_TRUE);
1308 		mutex_exit(&ldcp->tx_lock);
1309 		return (ECONNRESET);
1310 	}
1311 
1312 	mutex_exit(&ldcp->tx_lock);
1313 	return (0);
1314 }
1315 
1316 /*
1317  * Process an incoming RTR ctrl message
1318  */
1319 static int
1320 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1321 {
1322 	int 		rv = 0;
1323 	boolean_t	sent_NACK = B_FALSE;
1324 
1325 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1326 
1327 	switch (msg->stype) {
1328 	case LDC_NACK:
1329 		/* RTR NACK received */
1330 		DWARN(ldcp->id,
1331 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1332 		    ldcp->id);
1333 
1334 		/* Reset the channel -- as we cannot continue */
1335 		mutex_enter(&ldcp->tx_lock);
1336 		i_ldc_reset(ldcp, B_TRUE);
1337 		mutex_exit(&ldcp->tx_lock);
1338 		rv = ECONNRESET;
1339 
1340 		break;
1341 
1342 	case LDC_INFO:
1343 
1344 		/* check mode */
1345 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1346 			DWARN(ldcp->id,
1347 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1348 			    "expecting 0x%x, got 0x%x\n",
1349 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1350 			/*
1351 			 * send NACK in response to MODE message
1352 			 * get the current tail for the response
1353 			 */
1354 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1355 			if (rv) {
1356 				/* if cannot send NACK - reset channel */
1357 				mutex_enter(&ldcp->tx_lock);
1358 				i_ldc_reset(ldcp, B_TRUE);
1359 				mutex_exit(&ldcp->tx_lock);
1360 				rv = ECONNRESET;
1361 				break;
1362 			}
1363 			sent_NACK = B_TRUE;
1364 		}
1365 		break;
1366 
1367 	default:
1368 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1369 		    ldcp->id);
1370 
1371 		/* Reset the channel -- as we cannot continue */
1372 		mutex_enter(&ldcp->tx_lock);
1373 		i_ldc_reset(ldcp, B_TRUE);
1374 		mutex_exit(&ldcp->tx_lock);
1375 		rv = ECONNRESET;
1376 		break;
1377 	}
1378 
1379 	/*
1380 	 * If either the connection was reset (when rv != 0) or
1381 	 * a NACK was sent, we return. In the case of a NACK
1382 	 * we dont want to consume the packet that came in but
1383 	 * not record that we received the RTR
1384 	 */
1385 	if (rv || sent_NACK)
1386 		return (rv);
1387 
1388 	ldcp->last_msg_snt = msg->seqid;
1389 	ldcp->hstate |= TS_RCVD_RTR;
1390 
1391 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1392 	if (rv) {
1393 		cmn_err(CE_NOTE,
1394 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1395 		    ldcp->id);
1396 		mutex_enter(&ldcp->tx_lock);
1397 		i_ldc_reset(ldcp, B_TRUE);
1398 		mutex_exit(&ldcp->tx_lock);
1399 		return (ECONNRESET);
1400 	}
1401 	D2(ldcp->id,
1402 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1403 
1404 	ldcp->hstate |= TS_SENT_RDX;
1405 	ldcp->tstate |= TS_HSHAKE_DONE;
1406 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1407 		ldcp->status = LDC_UP;
1408 
1409 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1410 
1411 	return (0);
1412 }
1413 
1414 
1415 /*
1416  * Process an incoming RDX ctrl message
1417  */
1418 static int
1419 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1420 {
1421 	int	rv = 0;
1422 
1423 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1424 
1425 	switch (msg->stype) {
1426 	case LDC_NACK:
1427 		/* RDX NACK received */
1428 		DWARN(ldcp->id,
1429 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1430 		    ldcp->id);
1431 
1432 		/* Reset the channel -- as we cannot continue */
1433 		mutex_enter(&ldcp->tx_lock);
1434 		i_ldc_reset(ldcp, B_TRUE);
1435 		mutex_exit(&ldcp->tx_lock);
1436 		rv = ECONNRESET;
1437 
1438 		break;
1439 
1440 	case LDC_INFO:
1441 
1442 		/*
1443 		 * if channel is UP and a RDX received after data transmission
1444 		 * has commenced it is an error
1445 		 */
1446 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1447 			DWARN(DBG_ALL_LDCS,
1448 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1449 			    " - LDC reset\n", ldcp->id);
1450 			mutex_enter(&ldcp->tx_lock);
1451 			i_ldc_reset(ldcp, B_TRUE);
1452 			mutex_exit(&ldcp->tx_lock);
1453 			return (ECONNRESET);
1454 		}
1455 
1456 		ldcp->hstate |= TS_RCVD_RDX;
1457 		ldcp->tstate |= TS_HSHAKE_DONE;
1458 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1459 			ldcp->status = LDC_UP;
1460 
1461 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1462 		break;
1463 
1464 	default:
1465 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1466 		    ldcp->id);
1467 
1468 		/* Reset the channel -- as we cannot continue */
1469 		mutex_enter(&ldcp->tx_lock);
1470 		i_ldc_reset(ldcp, B_TRUE);
1471 		mutex_exit(&ldcp->tx_lock);
1472 		rv = ECONNRESET;
1473 		break;
1474 	}
1475 
1476 	return (rv);
1477 }
1478 
1479 /*
1480  * Process an incoming ACK for a data packet
1481  */
1482 static int
1483 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1484 {
1485 	int		rv;
1486 	uint64_t 	tx_head;
1487 	ldc_msg_t	*pkt;
1488 
1489 	/* Obtain Tx lock */
1490 	mutex_enter(&ldcp->tx_lock);
1491 
1492 	/*
1493 	 * Read the current Tx head and tail
1494 	 */
1495 	rv = hv_ldc_tx_get_state(ldcp->id,
1496 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1497 	if (rv != 0) {
1498 		cmn_err(CE_WARN,
1499 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1500 		    ldcp->id);
1501 
1502 		/* Reset the channel -- as we cannot continue */
1503 		i_ldc_reset(ldcp, B_TRUE);
1504 		mutex_exit(&ldcp->tx_lock);
1505 		return (ECONNRESET);
1506 	}
1507 
1508 	/*
1509 	 * loop from where the previous ACK location was to the
1510 	 * current head location. This is how far the HV has
1511 	 * actually send pkts. Pkts between head and tail are
1512 	 * yet to be sent by HV.
1513 	 */
1514 	tx_head = ldcp->tx_ackd_head;
1515 	for (;;) {
1516 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1517 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1518 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1519 
1520 		if (pkt->seqid == msg->ackid) {
1521 			D2(ldcp->id,
1522 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1523 			    ldcp->id);
1524 			ldcp->last_ack_rcd = msg->ackid;
1525 			ldcp->tx_ackd_head = tx_head;
1526 			break;
1527 		}
1528 		if (tx_head == ldcp->tx_head) {
1529 			/* could not find packet */
1530 			DWARN(ldcp->id,
1531 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1532 			    ldcp->id);
1533 
1534 			/* Reset the channel -- as we cannot continue */
1535 			i_ldc_reset(ldcp, B_TRUE);
1536 			mutex_exit(&ldcp->tx_lock);
1537 			return (ECONNRESET);
1538 		}
1539 	}
1540 
1541 	mutex_exit(&ldcp->tx_lock);
1542 	return (0);
1543 }
1544 
1545 /*
1546  * Process incoming control message
1547  * Return 0 - session can continue
1548  *        EAGAIN - reprocess packet - state was changed
1549  *	  ECONNRESET - channel was reset
1550  */
1551 static int
1552 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1553 {
1554 	int 		rv = 0;
1555 
1556 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1557 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1558 
1559 	switch (ldcp->tstate & ~TS_IN_RESET) {
1560 
1561 	case TS_OPEN:
1562 	case TS_READY:
1563 
1564 		switch (msg->ctrl & LDC_CTRL_MASK) {
1565 		case LDC_VER:
1566 			/* process version message */
1567 			rv = i_ldc_process_VER(ldcp, msg);
1568 			break;
1569 		default:
1570 			DWARN(ldcp->id,
1571 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1572 			    "tstate=0x%x\n", ldcp->id,
1573 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1574 			break;
1575 		}
1576 
1577 		break;
1578 
1579 	case TS_VREADY:
1580 
1581 		switch (msg->ctrl & LDC_CTRL_MASK) {
1582 		case LDC_VER:
1583 			/* process version message */
1584 			rv = i_ldc_process_VER(ldcp, msg);
1585 			break;
1586 		case LDC_RTS:
1587 			/* process RTS message */
1588 			rv = i_ldc_process_RTS(ldcp, msg);
1589 			break;
1590 		case LDC_RTR:
1591 			/* process RTR message */
1592 			rv = i_ldc_process_RTR(ldcp, msg);
1593 			break;
1594 		case LDC_RDX:
1595 			/* process RDX message */
1596 			rv = i_ldc_process_RDX(ldcp, msg);
1597 			break;
1598 		default:
1599 			DWARN(ldcp->id,
1600 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1601 			    "tstate=0x%x\n", ldcp->id,
1602 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1603 			break;
1604 		}
1605 
1606 		break;
1607 
1608 	case TS_UP:
1609 
1610 		switch (msg->ctrl & LDC_CTRL_MASK) {
1611 		case LDC_VER:
1612 			DWARN(ldcp->id,
1613 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1614 			    "- LDC reset\n", ldcp->id);
1615 			/* peer is redoing version negotiation */
1616 			mutex_enter(&ldcp->tx_lock);
1617 			(void) i_ldc_txq_reconf(ldcp);
1618 			i_ldc_reset_state(ldcp);
1619 			mutex_exit(&ldcp->tx_lock);
1620 			rv = EAGAIN;
1621 			break;
1622 
1623 		case LDC_RDX:
1624 			/* process RDX message */
1625 			rv = i_ldc_process_RDX(ldcp, msg);
1626 			break;
1627 
1628 		default:
1629 			DWARN(ldcp->id,
1630 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1631 			    "tstate=0x%x\n", ldcp->id,
1632 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1633 			break;
1634 		}
1635 	}
1636 
1637 	return (rv);
1638 }
1639 
1640 /*
1641  * Register channel with the channel nexus
1642  */
1643 static int
1644 i_ldc_register_channel(ldc_chan_t *ldcp)
1645 {
1646 	int		rv = 0;
1647 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1648 
1649 	if (cinfo->dip == NULL) {
1650 		DWARN(ldcp->id,
1651 		    "i_ldc_register_channel: cnex has not registered\n");
1652 		return (EAGAIN);
1653 	}
1654 
1655 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1656 	if (rv) {
1657 		DWARN(ldcp->id,
1658 		    "i_ldc_register_channel: cannot register channel\n");
1659 		return (rv);
1660 	}
1661 
1662 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1663 	    i_ldc_tx_hdlr, ldcp, NULL);
1664 	if (rv) {
1665 		DWARN(ldcp->id,
1666 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1667 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1668 		return (rv);
1669 	}
1670 
1671 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1672 	    i_ldc_rx_hdlr, ldcp, NULL);
1673 	if (rv) {
1674 		DWARN(ldcp->id,
1675 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1676 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1677 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1678 		return (rv);
1679 	}
1680 
1681 	ldcp->tstate |= TS_CNEX_RDY;
1682 
1683 	return (0);
1684 }
1685 
1686 /*
1687  * Unregister a channel with the channel nexus
1688  */
1689 static int
1690 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1691 {
1692 	int		rv = 0;
1693 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1694 
1695 	if (cinfo->dip == NULL) {
1696 		DWARN(ldcp->id,
1697 		    "i_ldc_unregister_channel: cnex has not registered\n");
1698 		return (EAGAIN);
1699 	}
1700 
1701 	if (ldcp->tstate & TS_CNEX_RDY) {
1702 
1703 		/* Remove the Rx interrupt */
1704 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1705 		if (rv) {
1706 			if (rv != EAGAIN) {
1707 				DWARN(ldcp->id,
1708 				    "i_ldc_unregister_channel: err removing "
1709 				    "Rx intr\n");
1710 				return (rv);
1711 			}
1712 
1713 			/*
1714 			 * If interrupts are pending and handler has
1715 			 * finished running, clear interrupt and try
1716 			 * again
1717 			 */
1718 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1719 				return (rv);
1720 
1721 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1722 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1723 			    CNEX_RX_INTR);
1724 			if (rv) {
1725 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1726 				    "err removing Rx interrupt\n");
1727 				return (rv);
1728 			}
1729 		}
1730 
1731 		/* Remove the Tx interrupt */
1732 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1733 		if (rv) {
1734 			DWARN(ldcp->id,
1735 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1736 			return (rv);
1737 		}
1738 
1739 		/* Unregister the channel */
1740 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1741 		if (rv) {
1742 			DWARN(ldcp->id,
1743 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1744 			return (rv);
1745 		}
1746 
1747 		ldcp->tstate &= ~TS_CNEX_RDY;
1748 	}
1749 
1750 	return (0);
1751 }
1752 
1753 
1754 /*
1755  * LDC transmit interrupt handler
1756  *    triggered for chanel up/down/reset events
1757  *    and Tx queue content changes
1758  */
1759 static uint_t
1760 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1761 {
1762 	_NOTE(ARGUNUSED(arg2))
1763 
1764 	int 		rv;
1765 	ldc_chan_t 	*ldcp;
1766 	boolean_t 	notify_client = B_FALSE;
1767 	uint64_t	notify_event = 0, link_state;
1768 
1769 	/* Get the channel for which interrupt was received */
1770 	ASSERT(arg1 != NULL);
1771 	ldcp = (ldc_chan_t *)arg1;
1772 
1773 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1774 	    ldcp->id, ldcp);
1775 
1776 	/* Lock channel */
1777 	mutex_enter(&ldcp->lock);
1778 
1779 	/* Obtain Tx lock */
1780 	mutex_enter(&ldcp->tx_lock);
1781 
1782 	/* mark interrupt as pending */
1783 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
1784 
1785 	/* save current link state */
1786 	link_state = ldcp->link_state;
1787 
1788 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1789 	    &ldcp->link_state);
1790 	if (rv) {
1791 		cmn_err(CE_WARN,
1792 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1793 		    ldcp->id, rv);
1794 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1795 		mutex_exit(&ldcp->tx_lock);
1796 		mutex_exit(&ldcp->lock);
1797 		return (DDI_INTR_CLAIMED);
1798 	}
1799 
1800 	/*
1801 	 * reset the channel state if the channel went down
1802 	 * (other side unconfigured queue) or channel was reset
1803 	 * (other side reconfigured its queue)
1804 	 */
1805 	if (link_state != ldcp->link_state &&
1806 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
1807 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1808 		i_ldc_reset(ldcp, B_FALSE);
1809 		notify_client = B_TRUE;
1810 		notify_event = LDC_EVT_DOWN;
1811 	}
1812 
1813 	if (link_state != ldcp->link_state &&
1814 	    ldcp->link_state == LDC_CHANNEL_RESET) {
1815 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1816 		i_ldc_reset(ldcp, B_FALSE);
1817 		notify_client = B_TRUE;
1818 		notify_event = LDC_EVT_RESET;
1819 	}
1820 
1821 	if (link_state != ldcp->link_state &&
1822 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
1823 	    ldcp->link_state == LDC_CHANNEL_UP) {
1824 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1825 		notify_client = B_TRUE;
1826 		notify_event = LDC_EVT_RESET;
1827 		ldcp->tstate |= TS_LINK_READY;
1828 		ldcp->status = LDC_READY;
1829 	}
1830 
1831 	/* if callbacks are disabled, do not notify */
1832 	if (!ldcp->cb_enabled)
1833 		notify_client = B_FALSE;
1834 
1835 	/* Unlock channel */
1836 
1837 	if (notify_client) {
1838 		ldcp->cb_inprogress = B_TRUE;
1839 		mutex_exit(&ldcp->tx_lock);
1840 		mutex_exit(&ldcp->lock);
1841 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1842 		if (rv) {
1843 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1844 			    "failure", ldcp->id);
1845 		}
1846 		mutex_enter(&ldcp->lock);
1847 		ldcp->cb_inprogress = B_FALSE;
1848 	}
1849 
1850 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1851 	mutex_exit(&ldcp->lock);
1852 
1853 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1854 
1855 	return (DDI_INTR_CLAIMED);
1856 }
1857 
1858 /*
1859  * LDC receive interrupt handler
1860  *    triggered for channel with data pending to read
1861  *    i.e. Rx queue content changes
1862  */
1863 static uint_t
1864 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1865 {
1866 	_NOTE(ARGUNUSED(arg2))
1867 
1868 	int		rv;
1869 	uint64_t 	rx_head, rx_tail;
1870 	ldc_msg_t 	*msg;
1871 	ldc_chan_t 	*ldcp;
1872 	boolean_t 	notify_client = B_FALSE;
1873 	uint64_t	notify_event = 0;
1874 	uint64_t	link_state, first_fragment = 0;
1875 
1876 
1877 	/* Get the channel for which interrupt was received */
1878 	if (arg1 == NULL) {
1879 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1880 		return (DDI_INTR_UNCLAIMED);
1881 	}
1882 
1883 	ldcp = (ldc_chan_t *)arg1;
1884 
1885 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1886 	    ldcp->id, ldcp);
1887 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1888 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1889 	    ldcp->link_state);
1890 
1891 	/* Lock channel */
1892 	mutex_enter(&ldcp->lock);
1893 
1894 	/* mark interrupt as pending */
1895 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1896 
1897 	/*
1898 	 * Read packet(s) from the queue
1899 	 */
1900 	for (;;) {
1901 
1902 		link_state = ldcp->link_state;
1903 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1904 		    &ldcp->link_state);
1905 		if (rv) {
1906 			cmn_err(CE_WARN,
1907 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1908 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1909 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1910 			mutex_exit(&ldcp->lock);
1911 			return (DDI_INTR_CLAIMED);
1912 		}
1913 
1914 		/*
1915 		 * reset the channel state if the channel went down
1916 		 * (other side unconfigured queue) or channel was reset
1917 		 * (other side reconfigured its queue)
1918 		 */
1919 
1920 		if (link_state != ldcp->link_state) {
1921 
1922 			switch (ldcp->link_state) {
1923 			case LDC_CHANNEL_DOWN:
1924 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1925 				    "link down\n", ldcp->id);
1926 				mutex_enter(&ldcp->tx_lock);
1927 				i_ldc_reset(ldcp, B_FALSE);
1928 				mutex_exit(&ldcp->tx_lock);
1929 				notify_client = B_TRUE;
1930 				notify_event = LDC_EVT_DOWN;
1931 				goto loop_exit;
1932 
1933 			case LDC_CHANNEL_UP:
1934 				D1(ldcp->id, "i_ldc_rx_hdlr: "
1935 				    "channel link up\n", ldcp->id);
1936 
1937 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
1938 					notify_client = B_TRUE;
1939 					notify_event = LDC_EVT_RESET;
1940 					ldcp->tstate |= TS_LINK_READY;
1941 					ldcp->status = LDC_READY;
1942 				}
1943 				break;
1944 
1945 			case LDC_CHANNEL_RESET:
1946 			default:
1947 #ifdef DEBUG
1948 force_reset:
1949 #endif
1950 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1951 				    "link reset\n", ldcp->id);
1952 				mutex_enter(&ldcp->tx_lock);
1953 				i_ldc_reset(ldcp, B_FALSE);
1954 				mutex_exit(&ldcp->tx_lock);
1955 				notify_client = B_TRUE;
1956 				notify_event = LDC_EVT_RESET;
1957 				break;
1958 			}
1959 		}
1960 
1961 #ifdef DEBUG
1962 		if (LDC_INJECT_RESET(ldcp))
1963 			goto force_reset;
1964 #endif
1965 
1966 		if (rx_head == rx_tail) {
1967 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1968 			    ldcp->id);
1969 			break;
1970 		}
1971 
1972 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1973 		    rx_head, rx_tail);
1974 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1975 		    ldcp->rx_q_va + rx_head);
1976 
1977 		/* get the message */
1978 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1979 
1980 		/* if channel is in RAW mode or data pkt, notify and return */
1981 		if (ldcp->mode == LDC_MODE_RAW) {
1982 			notify_client = B_TRUE;
1983 			notify_event |= LDC_EVT_READ;
1984 			break;
1985 		}
1986 
1987 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1988 
1989 			/* discard packet if channel is not up */
1990 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
1991 
1992 				/* move the head one position */
1993 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1994 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1995 
1996 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1997 					break;
1998 
1999 				continue;
2000 			} else {
2001 				if ((ldcp->tstate & TS_IN_RESET) == 0)
2002 					notify_client = B_TRUE;
2003 				notify_event |= LDC_EVT_READ;
2004 				break;
2005 			}
2006 		}
2007 
2008 		/* Check the sequence ID for the message received */
2009 		rv = i_ldc_check_seqid(ldcp, msg);
2010 		if (rv != 0) {
2011 
2012 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
2013 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
2014 
2015 			/* Reset last_msg_rcd to start of message */
2016 			if (first_fragment != 0) {
2017 				ldcp->last_msg_rcd = first_fragment - 1;
2018 				first_fragment = 0;
2019 			}
2020 
2021 			/*
2022 			 * Send a NACK due to seqid mismatch
2023 			 */
2024 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
2025 			    (msg->ctrl & LDC_CTRL_MASK));
2026 
2027 			if (rv) {
2028 				cmn_err(CE_NOTE,
2029 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
2030 				    "CTRL/NACK msg\n", ldcp->id);
2031 
2032 				/* if cannot send NACK - reset channel */
2033 				mutex_enter(&ldcp->tx_lock);
2034 				i_ldc_reset(ldcp, B_TRUE);
2035 				mutex_exit(&ldcp->tx_lock);
2036 				rv = ECONNRESET;
2037 				break;
2038 			}
2039 
2040 			/* purge receive queue */
2041 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2042 			break;
2043 		}
2044 
2045 		/* record the message ID */
2046 		ldcp->last_msg_rcd = msg->seqid;
2047 
2048 		/* process control messages */
2049 		if (msg->type & LDC_CTRL) {
2050 			/* save current internal state */
2051 			uint64_t tstate = ldcp->tstate;
2052 
2053 			rv = i_ldc_ctrlmsg(ldcp, msg);
2054 			if (rv == EAGAIN) {
2055 				/* re-process pkt - state was adjusted */
2056 				continue;
2057 			}
2058 			if (rv == ECONNRESET) {
2059 				notify_client = B_TRUE;
2060 				notify_event = LDC_EVT_RESET;
2061 				break;
2062 			}
2063 
2064 			/*
2065 			 * control message processing was successful
2066 			 * channel transitioned to ready for communication
2067 			 */
2068 			if (rv == 0 && ldcp->tstate == TS_UP &&
2069 			    (tstate & ~TS_IN_RESET) !=
2070 			    (ldcp->tstate & ~TS_IN_RESET)) {
2071 				notify_client = B_TRUE;
2072 				notify_event = LDC_EVT_UP;
2073 			}
2074 		}
2075 
2076 		/* process data ACKs */
2077 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2078 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2079 				notify_client = B_TRUE;
2080 				notify_event = LDC_EVT_RESET;
2081 				break;
2082 			}
2083 		}
2084 
2085 		/* move the head one position */
2086 		rx_head = (rx_head + LDC_PACKET_SIZE) %
2087 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2088 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2089 			notify_client = B_TRUE;
2090 			notify_event = LDC_EVT_RESET;
2091 			break;
2092 		}
2093 
2094 	} /* for */
2095 
2096 loop_exit:
2097 
2098 	/* if callbacks are disabled, do not notify */
2099 	if (!ldcp->cb_enabled)
2100 		notify_client = B_FALSE;
2101 
2102 	/*
2103 	 * If there are data packets in the queue, the ldc_read will
2104 	 * clear interrupts after draining the queue, else clear interrupts
2105 	 */
2106 	if ((notify_event & LDC_EVT_READ) == 0) {
2107 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2108 	} else
2109 		ldcp->rx_intr_state = LDC_INTR_PEND;
2110 
2111 	mutex_exit(&ldcp->lock);
2112 
2113 	if (notify_client) {
2114 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2115 		if (rv) {
2116 			DWARN(ldcp->id,
2117 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
2118 			    ldcp->id);
2119 		}
2120 	}
2121 
2122 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
2123 	return (DDI_INTR_CLAIMED);
2124 }
2125 
2126 
2127 /* -------------------------------------------------------------------------- */
2128 
2129 /*
2130  * LDC API functions
2131  */
2132 
2133 /*
2134  * Initialize the channel. Allocate internal structure and memory for
2135  * TX/RX queues, and initialize locks.
2136  */
2137 int
2138 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2139 {
2140 	ldc_chan_t 	*ldcp;
2141 	int		rv, exit_val;
2142 	uint64_t	ra_base, nentries;
2143 	uint64_t	qlen;
2144 
2145 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2146 
2147 	if (attr == NULL) {
2148 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2149 		return (EINVAL);
2150 	}
2151 	if (handle == NULL) {
2152 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2153 		return (EINVAL);
2154 	}
2155 
2156 	/* check if channel is valid */
2157 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2158 	if (rv == H_ECHANNEL) {
2159 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2160 		return (EINVAL);
2161 	}
2162 
2163 	/* check if the channel has already been initialized */
2164 	mutex_enter(&ldcssp->lock);
2165 	ldcp = ldcssp->chan_list;
2166 	while (ldcp != NULL) {
2167 		if (ldcp->id == id) {
2168 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2169 			    id);
2170 			mutex_exit(&ldcssp->lock);
2171 			return (EADDRINUSE);
2172 		}
2173 		ldcp = ldcp->next;
2174 	}
2175 	mutex_exit(&ldcssp->lock);
2176 
2177 	ASSERT(ldcp == NULL);
2178 
2179 	*handle = 0;
2180 
2181 	/* Allocate an ldcp structure */
2182 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2183 
2184 	/*
2185 	 * Initialize the channel and Tx lock
2186 	 *
2187 	 * The channel 'lock' protects the entire channel and
2188 	 * should be acquired before initializing, resetting,
2189 	 * destroying or reading from a channel.
2190 	 *
2191 	 * The 'tx_lock' should be acquired prior to transmitting
2192 	 * data over the channel. The lock should also be acquired
2193 	 * prior to channel reconfiguration (in order to prevent
2194 	 * concurrent writes).
2195 	 *
2196 	 * ORDERING: When both locks are being acquired, to prevent
2197 	 * deadlocks, the channel lock should be always acquired prior
2198 	 * to the tx_lock.
2199 	 */
2200 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2201 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2202 
2203 	/* Initialize the channel */
2204 	ldcp->id = id;
2205 	ldcp->cb = NULL;
2206 	ldcp->cb_arg = NULL;
2207 	ldcp->cb_inprogress = B_FALSE;
2208 	ldcp->cb_enabled = B_FALSE;
2209 	ldcp->next = NULL;
2210 
2211 	/* Read attributes */
2212 	ldcp->mode = attr->mode;
2213 	ldcp->devclass = attr->devclass;
2214 	ldcp->devinst = attr->instance;
2215 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2216 
2217 	D1(ldcp->id,
2218 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2219 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2220 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2221 
2222 	ldcp->next_vidx = 0;
2223 	ldcp->tstate = TS_IN_RESET;
2224 	ldcp->hstate = 0;
2225 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2226 	ldcp->last_ack_rcd = 0;
2227 	ldcp->last_msg_rcd = 0;
2228 
2229 	ldcp->stream_bufferp = NULL;
2230 	ldcp->exp_dring_list = NULL;
2231 	ldcp->imp_dring_list = NULL;
2232 	ldcp->mhdl_list = NULL;
2233 
2234 	ldcp->tx_intr_state = LDC_INTR_NONE;
2235 	ldcp->rx_intr_state = LDC_INTR_NONE;
2236 
2237 	/* Initialize payload size depending on whether channel is reliable */
2238 	switch (ldcp->mode) {
2239 	case LDC_MODE_RAW:
2240 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2241 		ldcp->read_p = i_ldc_read_raw;
2242 		ldcp->write_p = i_ldc_write_raw;
2243 		break;
2244 	case LDC_MODE_UNRELIABLE:
2245 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2246 		ldcp->read_p = i_ldc_read_packet;
2247 		ldcp->write_p = i_ldc_write_packet;
2248 		break;
2249 	case LDC_MODE_RELIABLE:
2250 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2251 		ldcp->read_p = i_ldc_read_packet;
2252 		ldcp->write_p = i_ldc_write_packet;
2253 		break;
2254 	case LDC_MODE_STREAM:
2255 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2256 
2257 		ldcp->stream_remains = 0;
2258 		ldcp->stream_offset = 0;
2259 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2260 		ldcp->read_p = i_ldc_read_stream;
2261 		ldcp->write_p = i_ldc_write_stream;
2262 		break;
2263 	default:
2264 		exit_val = EINVAL;
2265 		goto cleanup_on_exit;
2266 	}
2267 
2268 	/*
2269 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2270 	 * value is smaller than default length of ldc_queue_entries,
2271 	 * qlen is set to ldc_queue_entries..
2272 	 */
2273 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2274 	ldcp->rx_q_entries =
2275 		(qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2276 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2277 
2278 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen);
2279 
2280 	/* Create a transmit queue */
2281 	ldcp->tx_q_va = (uint64_t)
2282 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2283 	if (ldcp->tx_q_va == NULL) {
2284 		cmn_err(CE_WARN,
2285 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2286 		    ldcp->id);
2287 		exit_val = ENOMEM;
2288 		goto cleanup_on_exit;
2289 	}
2290 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2291 
2292 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2293 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2294 
2295 	ldcp->tstate |= TS_TXQ_RDY;
2296 
2297 	/* Create a receive queue */
2298 	ldcp->rx_q_va = (uint64_t)
2299 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2300 	if (ldcp->rx_q_va == NULL) {
2301 		cmn_err(CE_WARN,
2302 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2303 		    ldcp->id);
2304 		exit_val = ENOMEM;
2305 		goto cleanup_on_exit;
2306 	}
2307 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2308 
2309 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2310 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2311 
2312 	ldcp->tstate |= TS_RXQ_RDY;
2313 
2314 	/* Init descriptor ring and memory handle list lock */
2315 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2316 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2317 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2318 
2319 	/* mark status as INITialized */
2320 	ldcp->status = LDC_INIT;
2321 
2322 	/* Add to channel list */
2323 	mutex_enter(&ldcssp->lock);
2324 	ldcp->next = ldcssp->chan_list;
2325 	ldcssp->chan_list = ldcp;
2326 	ldcssp->channel_count++;
2327 	mutex_exit(&ldcssp->lock);
2328 
2329 	/* set the handle */
2330 	*handle = (ldc_handle_t)ldcp;
2331 
2332 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2333 
2334 	return (0);
2335 
2336 cleanup_on_exit:
2337 
2338 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2339 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2340 
2341 	if (ldcp->tstate & TS_TXQ_RDY)
2342 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2343 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2344 
2345 	if (ldcp->tstate & TS_RXQ_RDY)
2346 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2347 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2348 
2349 	mutex_destroy(&ldcp->tx_lock);
2350 	mutex_destroy(&ldcp->lock);
2351 
2352 	if (ldcp)
2353 		kmem_free(ldcp, sizeof (ldc_chan_t));
2354 
2355 	return (exit_val);
2356 }
2357 
2358 /*
2359  * Finalizes the LDC connection. It will return EBUSY if the
2360  * channel is open. A ldc_close() has to be done prior to
2361  * a ldc_fini operation. It frees TX/RX queues, associated
2362  * with the channel
2363  */
2364 int
2365 ldc_fini(ldc_handle_t handle)
2366 {
2367 	ldc_chan_t 	*ldcp;
2368 	ldc_chan_t 	*tmp_ldcp;
2369 	uint64_t 	id;
2370 
2371 	if (handle == NULL) {
2372 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2373 		return (EINVAL);
2374 	}
2375 	ldcp = (ldc_chan_t *)handle;
2376 	id = ldcp->id;
2377 
2378 	mutex_enter(&ldcp->lock);
2379 
2380 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2381 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2382 		    ldcp->id);
2383 		mutex_exit(&ldcp->lock);
2384 		return (EBUSY);
2385 	}
2386 
2387 	/* Remove from the channel list */
2388 	mutex_enter(&ldcssp->lock);
2389 	tmp_ldcp = ldcssp->chan_list;
2390 	if (tmp_ldcp == ldcp) {
2391 		ldcssp->chan_list = ldcp->next;
2392 		ldcp->next = NULL;
2393 	} else {
2394 		while (tmp_ldcp != NULL) {
2395 			if (tmp_ldcp->next == ldcp) {
2396 				tmp_ldcp->next = ldcp->next;
2397 				ldcp->next = NULL;
2398 				break;
2399 			}
2400 			tmp_ldcp = tmp_ldcp->next;
2401 		}
2402 		if (tmp_ldcp == NULL) {
2403 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2404 			mutex_exit(&ldcssp->lock);
2405 			mutex_exit(&ldcp->lock);
2406 			return (EINVAL);
2407 		}
2408 	}
2409 
2410 	ldcssp->channel_count--;
2411 
2412 	mutex_exit(&ldcssp->lock);
2413 
2414 	/* Free the map table for this channel */
2415 	if (ldcp->mtbl) {
2416 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2417 		if (ldcp->mtbl->contigmem)
2418 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2419 		else
2420 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2421 		mutex_destroy(&ldcp->mtbl->lock);
2422 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2423 	}
2424 
2425 	/* Destroy descriptor ring and memory handle list lock */
2426 	mutex_destroy(&ldcp->exp_dlist_lock);
2427 	mutex_destroy(&ldcp->imp_dlist_lock);
2428 	mutex_destroy(&ldcp->mlist_lock);
2429 
2430 	/* Free the stream buffer for STREAM_MODE */
2431 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2432 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2433 
2434 	/* Free the RX queue */
2435 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2436 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2437 	ldcp->tstate &= ~TS_RXQ_RDY;
2438 
2439 	/* Free the TX queue */
2440 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2441 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2442 	ldcp->tstate &= ~TS_TXQ_RDY;
2443 
2444 	mutex_exit(&ldcp->lock);
2445 
2446 	/* Destroy mutex */
2447 	mutex_destroy(&ldcp->tx_lock);
2448 	mutex_destroy(&ldcp->lock);
2449 
2450 	/* free channel structure */
2451 	kmem_free(ldcp, sizeof (ldc_chan_t));
2452 
2453 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2454 
2455 	return (0);
2456 }
2457 
2458 /*
2459  * Open the LDC channel for use. It registers the TX/RX queues
2460  * with the Hypervisor. It also specifies the interrupt number
2461  * and target CPU for this channel
2462  */
2463 int
2464 ldc_open(ldc_handle_t handle)
2465 {
2466 	ldc_chan_t 	*ldcp;
2467 	int 		rv;
2468 
2469 	if (handle == NULL) {
2470 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2471 		return (EINVAL);
2472 	}
2473 
2474 	ldcp = (ldc_chan_t *)handle;
2475 
2476 	mutex_enter(&ldcp->lock);
2477 
2478 	if (ldcp->tstate < TS_INIT) {
2479 		DWARN(ldcp->id,
2480 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2481 		mutex_exit(&ldcp->lock);
2482 		return (EFAULT);
2483 	}
2484 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2485 		DWARN(ldcp->id,
2486 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2487 		mutex_exit(&ldcp->lock);
2488 		return (EFAULT);
2489 	}
2490 
2491 	/*
2492 	 * Unregister/Register the tx queue with the hypervisor
2493 	 */
2494 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2495 	if (rv) {
2496 		cmn_err(CE_WARN,
2497 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2498 		    ldcp->id);
2499 		mutex_exit(&ldcp->lock);
2500 		return (EIO);
2501 	}
2502 
2503 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2504 	if (rv) {
2505 		cmn_err(CE_WARN,
2506 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2507 		    ldcp->id);
2508 		mutex_exit(&ldcp->lock);
2509 		return (EIO);
2510 	}
2511 
2512 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2513 	    ldcp->id);
2514 
2515 	/*
2516 	 * Unregister/Register the rx queue with the hypervisor
2517 	 */
2518 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2519 	if (rv) {
2520 		cmn_err(CE_WARN,
2521 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2522 		    ldcp->id);
2523 		mutex_exit(&ldcp->lock);
2524 		return (EIO);
2525 	}
2526 
2527 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2528 	if (rv) {
2529 		cmn_err(CE_WARN,
2530 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2531 		    ldcp->id);
2532 		mutex_exit(&ldcp->lock);
2533 		return (EIO);
2534 	}
2535 
2536 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2537 	    ldcp->id);
2538 
2539 	ldcp->tstate |= TS_QCONF_RDY;
2540 
2541 	/* Register the channel with the channel nexus */
2542 	rv = i_ldc_register_channel(ldcp);
2543 	if (rv && rv != EAGAIN) {
2544 		cmn_err(CE_WARN,
2545 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2546 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2547 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2548 		mutex_exit(&ldcp->lock);
2549 		return (EIO);
2550 	}
2551 
2552 	/* mark channel in OPEN state */
2553 	ldcp->status = LDC_OPEN;
2554 
2555 	/* Read channel state */
2556 	rv = hv_ldc_tx_get_state(ldcp->id,
2557 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2558 	if (rv) {
2559 		cmn_err(CE_WARN,
2560 		    "ldc_open: (0x%lx) cannot read channel state\n",
2561 		    ldcp->id);
2562 		(void) i_ldc_unregister_channel(ldcp);
2563 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2564 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2565 		mutex_exit(&ldcp->lock);
2566 		return (EIO);
2567 	}
2568 
2569 	/*
2570 	 * set the ACKd head to current head location for reliable &
2571 	 * streaming mode
2572 	 */
2573 	ldcp->tx_ackd_head = ldcp->tx_head;
2574 
2575 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2576 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2577 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2578 		ldcp->tstate |= TS_LINK_READY;
2579 		ldcp->status = LDC_READY;
2580 	}
2581 
2582 	/*
2583 	 * if channel is being opened in RAW mode - no handshake is needed
2584 	 * switch the channel READY and UP state
2585 	 */
2586 	if (ldcp->mode == LDC_MODE_RAW) {
2587 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2588 		ldcp->status = LDC_UP;
2589 	}
2590 
2591 	mutex_exit(&ldcp->lock);
2592 
2593 	/*
2594 	 * Increment number of open channels
2595 	 */
2596 	mutex_enter(&ldcssp->lock);
2597 	ldcssp->channels_open++;
2598 	mutex_exit(&ldcssp->lock);
2599 
2600 	D1(ldcp->id,
2601 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
2602 	    "(tstate=0x%x, status=0x%x)\n",
2603 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
2604 
2605 	return (0);
2606 }
2607 
2608 /*
2609  * Close the LDC connection. It will return EBUSY if there
2610  * are memory segments or descriptor rings either bound to or
2611  * mapped over the channel
2612  */
2613 int
2614 ldc_close(ldc_handle_t handle)
2615 {
2616 	ldc_chan_t 	*ldcp;
2617 	int		rv = 0, retries = 0;
2618 	boolean_t	chk_done = B_FALSE;
2619 
2620 	if (handle == NULL) {
2621 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2622 		return (EINVAL);
2623 	}
2624 	ldcp = (ldc_chan_t *)handle;
2625 
2626 	mutex_enter(&ldcp->lock);
2627 
2628 	/* return error if channel is not open */
2629 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
2630 		DWARN(ldcp->id,
2631 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2632 		mutex_exit(&ldcp->lock);
2633 		return (EFAULT);
2634 	}
2635 
2636 	/* if any memory handles, drings, are bound or mapped cannot close */
2637 	if (ldcp->mhdl_list != NULL) {
2638 		DWARN(ldcp->id,
2639 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2640 		    ldcp->id);
2641 		mutex_exit(&ldcp->lock);
2642 		return (EBUSY);
2643 	}
2644 	if (ldcp->exp_dring_list != NULL) {
2645 		DWARN(ldcp->id,
2646 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2647 		    ldcp->id);
2648 		mutex_exit(&ldcp->lock);
2649 		return (EBUSY);
2650 	}
2651 	if (ldcp->imp_dring_list != NULL) {
2652 		DWARN(ldcp->id,
2653 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2654 		    ldcp->id);
2655 		mutex_exit(&ldcp->lock);
2656 		return (EBUSY);
2657 	}
2658 
2659 	/* Obtain Tx lock */
2660 	mutex_enter(&ldcp->tx_lock);
2661 
2662 	/*
2663 	 * Wait for pending transmits to complete i.e Tx queue to drain
2664 	 * if there are pending pkts - wait 1 ms and retry again
2665 	 */
2666 	for (;;) {
2667 
2668 		rv = hv_ldc_tx_get_state(ldcp->id,
2669 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2670 		if (rv) {
2671 			cmn_err(CE_WARN,
2672 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2673 			mutex_exit(&ldcp->tx_lock);
2674 			mutex_exit(&ldcp->lock);
2675 			return (EIO);
2676 		}
2677 
2678 		if (ldcp->tx_head == ldcp->tx_tail ||
2679 		    ldcp->link_state != LDC_CHANNEL_UP) {
2680 			break;
2681 		}
2682 
2683 		if (chk_done) {
2684 			DWARN(ldcp->id,
2685 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2686 			    ldcp->id);
2687 			break;
2688 		}
2689 
2690 		/* wait for one ms and try again */
2691 		delay(drv_usectohz(1000));
2692 		chk_done = B_TRUE;
2693 	}
2694 
2695 	/*
2696 	 * Drain the Tx and Rx queues as we are closing the
2697 	 * channel. We dont care about any pending packets.
2698 	 * We have to also drain the queue prior to clearing
2699 	 * pending interrupts, otherwise the HV will trigger
2700 	 * an interrupt the moment the interrupt state is
2701 	 * cleared.
2702 	 */
2703 	(void) i_ldc_txq_reconf(ldcp);
2704 	(void) i_ldc_rxq_drain(ldcp);
2705 
2706 	/*
2707 	 * Unregister the channel with the nexus
2708 	 */
2709 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2710 
2711 		mutex_exit(&ldcp->tx_lock);
2712 		mutex_exit(&ldcp->lock);
2713 
2714 		/* if any error other than EAGAIN return back */
2715 		if (rv != EAGAIN || retries >= ldc_max_retries) {
2716 			cmn_err(CE_WARN,
2717 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2718 			    ldcp->id, rv);
2719 			return (rv);
2720 		}
2721 
2722 		/*
2723 		 * As there could be pending interrupts we need
2724 		 * to wait and try again
2725 		 */
2726 		drv_usecwait(ldc_delay);
2727 		mutex_enter(&ldcp->lock);
2728 		mutex_enter(&ldcp->tx_lock);
2729 		retries++;
2730 	}
2731 
2732 	/*
2733 	 * Unregister queues
2734 	 */
2735 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2736 	if (rv) {
2737 		cmn_err(CE_WARN,
2738 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2739 		    ldcp->id);
2740 		mutex_exit(&ldcp->tx_lock);
2741 		mutex_exit(&ldcp->lock);
2742 		return (EIO);
2743 	}
2744 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2745 	if (rv) {
2746 		cmn_err(CE_WARN,
2747 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2748 		    ldcp->id);
2749 		mutex_exit(&ldcp->tx_lock);
2750 		mutex_exit(&ldcp->lock);
2751 		return (EIO);
2752 	}
2753 
2754 	ldcp->tstate &= ~TS_QCONF_RDY;
2755 
2756 	/* Reset channel state information */
2757 	i_ldc_reset_state(ldcp);
2758 
2759 	/* Mark channel as down and in initialized state */
2760 	ldcp->tx_ackd_head = 0;
2761 	ldcp->tx_head = 0;
2762 	ldcp->tstate = TS_IN_RESET|TS_INIT;
2763 	ldcp->status = LDC_INIT;
2764 
2765 	mutex_exit(&ldcp->tx_lock);
2766 	mutex_exit(&ldcp->lock);
2767 
2768 	/* Decrement number of open channels */
2769 	mutex_enter(&ldcssp->lock);
2770 	ldcssp->channels_open--;
2771 	mutex_exit(&ldcssp->lock);
2772 
2773 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2774 
2775 	return (0);
2776 }
2777 
2778 /*
2779  * Register channel callback
2780  */
2781 int
2782 ldc_reg_callback(ldc_handle_t handle,
2783     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2784 {
2785 	ldc_chan_t *ldcp;
2786 
2787 	if (handle == NULL) {
2788 		DWARN(DBG_ALL_LDCS,
2789 		    "ldc_reg_callback: invalid channel handle\n");
2790 		return (EINVAL);
2791 	}
2792 	if (((uint64_t)cb) < KERNELBASE) {
2793 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2794 		return (EINVAL);
2795 	}
2796 	ldcp = (ldc_chan_t *)handle;
2797 
2798 	mutex_enter(&ldcp->lock);
2799 
2800 	if (ldcp->cb) {
2801 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2802 		    ldcp->id);
2803 		mutex_exit(&ldcp->lock);
2804 		return (EIO);
2805 	}
2806 	if (ldcp->cb_inprogress) {
2807 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2808 		    ldcp->id);
2809 		mutex_exit(&ldcp->lock);
2810 		return (EWOULDBLOCK);
2811 	}
2812 
2813 	ldcp->cb = cb;
2814 	ldcp->cb_arg = arg;
2815 	ldcp->cb_enabled = B_TRUE;
2816 
2817 	D1(ldcp->id,
2818 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2819 	    ldcp->id);
2820 
2821 	mutex_exit(&ldcp->lock);
2822 
2823 	return (0);
2824 }
2825 
2826 /*
2827  * Unregister channel callback
2828  */
2829 int
2830 ldc_unreg_callback(ldc_handle_t handle)
2831 {
2832 	ldc_chan_t *ldcp;
2833 
2834 	if (handle == NULL) {
2835 		DWARN(DBG_ALL_LDCS,
2836 		    "ldc_unreg_callback: invalid channel handle\n");
2837 		return (EINVAL);
2838 	}
2839 	ldcp = (ldc_chan_t *)handle;
2840 
2841 	mutex_enter(&ldcp->lock);
2842 
2843 	if (ldcp->cb == NULL) {
2844 		DWARN(ldcp->id,
2845 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2846 		    ldcp->id);
2847 		mutex_exit(&ldcp->lock);
2848 		return (EIO);
2849 	}
2850 	if (ldcp->cb_inprogress) {
2851 		DWARN(ldcp->id,
2852 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2853 		    ldcp->id);
2854 		mutex_exit(&ldcp->lock);
2855 		return (EWOULDBLOCK);
2856 	}
2857 
2858 	ldcp->cb = NULL;
2859 	ldcp->cb_arg = NULL;
2860 	ldcp->cb_enabled = B_FALSE;
2861 
2862 	D1(ldcp->id,
2863 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2864 	    ldcp->id);
2865 
2866 	mutex_exit(&ldcp->lock);
2867 
2868 	return (0);
2869 }
2870 
2871 
2872 /*
2873  * Bring a channel up by initiating a handshake with the peer
2874  * This call is asynchronous. It will complete at a later point
2875  * in time when the peer responds back with an RTR.
2876  */
2877 int
2878 ldc_up(ldc_handle_t handle)
2879 {
2880 	int 		rv;
2881 	ldc_chan_t 	*ldcp;
2882 	ldc_msg_t 	*ldcmsg;
2883 	uint64_t 	tx_tail, tstate;
2884 
2885 	if (handle == NULL) {
2886 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2887 		return (EINVAL);
2888 	}
2889 	ldcp = (ldc_chan_t *)handle;
2890 
2891 	mutex_enter(&ldcp->lock);
2892 
2893 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
2894 
2895 	/* clear the reset state */
2896 	tstate = ldcp->tstate;
2897 	ldcp->tstate &= ~TS_IN_RESET;
2898 
2899 	if (ldcp->tstate == TS_UP) {
2900 		DWARN(ldcp->id,
2901 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2902 		    ldcp->id);
2903 
2904 		/* mark channel as up */
2905 		ldcp->status = LDC_UP;
2906 
2907 		/*
2908 		 * if channel was in reset state and there was
2909 		 * pending data clear interrupt state. this will
2910 		 * trigger an interrupt, causing the RX handler to
2911 		 * to invoke the client's callback
2912 		 */
2913 		if ((tstate & TS_IN_RESET) &&
2914 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
2915 			D1(ldcp->id,
2916 			    "ldc_up: (0x%llx) channel has pending data, "
2917 			    "clearing interrupt\n", ldcp->id);
2918 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2919 		}
2920 
2921 		mutex_exit(&ldcp->lock);
2922 		return (0);
2923 	}
2924 
2925 	/* if the channel is in RAW mode - mark it as UP, if READY */
2926 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2927 		ldcp->tstate = TS_UP;
2928 		mutex_exit(&ldcp->lock);
2929 		return (0);
2930 	}
2931 
2932 	/* Don't start another handshake if there is one in progress */
2933 	if (ldcp->hstate) {
2934 		D1(ldcp->id,
2935 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2936 		    ldcp->id);
2937 		mutex_exit(&ldcp->lock);
2938 		return (0);
2939 	}
2940 
2941 	mutex_enter(&ldcp->tx_lock);
2942 
2943 	/* get the current tail for the LDC msg */
2944 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2945 	if (rv) {
2946 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2947 		    ldcp->id);
2948 		mutex_exit(&ldcp->tx_lock);
2949 		mutex_exit(&ldcp->lock);
2950 		return (ECONNREFUSED);
2951 	}
2952 
2953 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
2954 	ZERO_PKT(ldcmsg);
2955 
2956 	ldcmsg->type = LDC_CTRL;
2957 	ldcmsg->stype = LDC_INFO;
2958 	ldcmsg->ctrl = LDC_VER;
2959 	ldcp->next_vidx = 0;
2960 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
2961 
2962 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
2963 
2964 	/* initiate the send by calling into HV and set the new tail */
2965 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
2966 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2967 
2968 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
2969 	if (rv) {
2970 		DWARN(ldcp->id,
2971 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
2972 		    ldcp->id, rv);
2973 		mutex_exit(&ldcp->tx_lock);
2974 		mutex_exit(&ldcp->lock);
2975 		return (rv);
2976 	}
2977 
2978 	ldcp->hstate |= TS_SENT_VER;
2979 	ldcp->tx_tail = tx_tail;
2980 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
2981 
2982 	mutex_exit(&ldcp->tx_lock);
2983 	mutex_exit(&ldcp->lock);
2984 
2985 	return (rv);
2986 }
2987 
2988 
2989 /*
2990  * Bring a channel down by resetting its state and queues
2991  */
2992 int
2993 ldc_down(ldc_handle_t handle)
2994 {
2995 	ldc_chan_t 	*ldcp;
2996 
2997 	if (handle == NULL) {
2998 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
2999 		return (EINVAL);
3000 	}
3001 	ldcp = (ldc_chan_t *)handle;
3002 	mutex_enter(&ldcp->lock);
3003 	mutex_enter(&ldcp->tx_lock);
3004 	i_ldc_reset(ldcp, B_TRUE);
3005 	mutex_exit(&ldcp->tx_lock);
3006 	mutex_exit(&ldcp->lock);
3007 
3008 	return (0);
3009 }
3010 
3011 /*
3012  * Get the current channel status
3013  */
3014 int
3015 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3016 {
3017 	ldc_chan_t *ldcp;
3018 
3019 	if (handle == NULL || status == NULL) {
3020 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3021 		return (EINVAL);
3022 	}
3023 	ldcp = (ldc_chan_t *)handle;
3024 
3025 	*status = ((ldc_chan_t *)handle)->status;
3026 
3027 	D1(ldcp->id,
3028 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3029 	return (0);
3030 }
3031 
3032 
3033 /*
3034  * Set the channel's callback mode - enable/disable callbacks
3035  */
3036 int
3037 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3038 {
3039 	ldc_chan_t 	*ldcp;
3040 
3041 	if (handle == NULL) {
3042 		DWARN(DBG_ALL_LDCS,
3043 		    "ldc_set_intr_mode: invalid channel handle\n");
3044 		return (EINVAL);
3045 	}
3046 	ldcp = (ldc_chan_t *)handle;
3047 
3048 	/*
3049 	 * Record no callbacks should be invoked
3050 	 */
3051 	mutex_enter(&ldcp->lock);
3052 
3053 	switch (cmode) {
3054 	case LDC_CB_DISABLE:
3055 		if (!ldcp->cb_enabled) {
3056 			DWARN(ldcp->id,
3057 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3058 			    ldcp->id);
3059 			break;
3060 		}
3061 		ldcp->cb_enabled = B_FALSE;
3062 
3063 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3064 		    ldcp->id);
3065 		break;
3066 
3067 	case LDC_CB_ENABLE:
3068 		if (ldcp->cb_enabled) {
3069 			DWARN(ldcp->id,
3070 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3071 			    ldcp->id);
3072 			break;
3073 		}
3074 		ldcp->cb_enabled = B_TRUE;
3075 
3076 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3077 		    ldcp->id);
3078 		break;
3079 	}
3080 
3081 	mutex_exit(&ldcp->lock);
3082 
3083 	return (0);
3084 }
3085 
3086 /*
3087  * Check to see if there are packets on the incoming queue
3088  * Will return hasdata = B_FALSE if there are no packets
3089  */
3090 int
3091 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3092 {
3093 	int 		rv;
3094 	uint64_t 	rx_head, rx_tail;
3095 	ldc_chan_t 	*ldcp;
3096 
3097 	if (handle == NULL) {
3098 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3099 		return (EINVAL);
3100 	}
3101 	ldcp = (ldc_chan_t *)handle;
3102 
3103 	*hasdata = B_FALSE;
3104 
3105 	mutex_enter(&ldcp->lock);
3106 
3107 	if (ldcp->tstate != TS_UP) {
3108 		D1(ldcp->id,
3109 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3110 		mutex_exit(&ldcp->lock);
3111 		return (ECONNRESET);
3112 	}
3113 
3114 	/* Read packet(s) from the queue */
3115 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3116 	    &ldcp->link_state);
3117 	if (rv != 0) {
3118 		cmn_err(CE_WARN,
3119 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3120 		mutex_exit(&ldcp->lock);
3121 		return (EIO);
3122 	}
3123 	/* reset the channel state if the channel went down */
3124 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3125 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3126 		mutex_enter(&ldcp->tx_lock);
3127 		i_ldc_reset(ldcp, B_FALSE);
3128 		mutex_exit(&ldcp->tx_lock);
3129 		mutex_exit(&ldcp->lock);
3130 		return (ECONNRESET);
3131 	}
3132 
3133 	if ((rx_head != rx_tail) ||
3134 	    (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) {
3135 		D1(ldcp->id,
3136 		    "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n",
3137 		    ldcp->id);
3138 		*hasdata = B_TRUE;
3139 	}
3140 
3141 	mutex_exit(&ldcp->lock);
3142 
3143 	return (0);
3144 }
3145 
3146 
3147 /*
3148  * Read 'size' amount of bytes or less. If incoming buffer
3149  * is more than 'size', ENOBUFS is returned.
3150  *
3151  * On return, size contains the number of bytes read.
3152  */
3153 int
3154 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3155 {
3156 	ldc_chan_t 	*ldcp;
3157 	uint64_t 	rx_head = 0, rx_tail = 0;
3158 	int		rv = 0, exit_val;
3159 
3160 	if (handle == NULL) {
3161 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3162 		return (EINVAL);
3163 	}
3164 
3165 	ldcp = (ldc_chan_t *)handle;
3166 
3167 	/* channel lock */
3168 	mutex_enter(&ldcp->lock);
3169 
3170 	if (ldcp->tstate != TS_UP) {
3171 		DWARN(ldcp->id,
3172 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3173 		    ldcp->id);
3174 		exit_val = ECONNRESET;
3175 	} else {
3176 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3177 	}
3178 
3179 	/*
3180 	 * if queue has been drained - clear interrupt
3181 	 */
3182 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3183 	    &ldcp->link_state);
3184 	if (rv != 0) {
3185 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3186 		    ldcp->id);
3187 		mutex_enter(&ldcp->tx_lock);
3188 		i_ldc_reset(ldcp, B_TRUE);
3189 		mutex_exit(&ldcp->tx_lock);
3190 		return (ECONNRESET);
3191 	}
3192 
3193 	if (exit_val == 0) {
3194 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3195 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3196 			mutex_enter(&ldcp->tx_lock);
3197 			i_ldc_reset(ldcp, B_FALSE);
3198 			exit_val = ECONNRESET;
3199 			mutex_exit(&ldcp->tx_lock);
3200 		}
3201 		if ((rv == 0) &&
3202 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3203 		    (rx_head == rx_tail)) {
3204 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3205 		}
3206 	}
3207 
3208 	mutex_exit(&ldcp->lock);
3209 	return (exit_val);
3210 }
3211 
3212 /*
3213  * Basic raw mondo read -
3214  * no interpretation of mondo contents at all.
3215  *
3216  * Enter and exit with ldcp->lock held by caller
3217  */
3218 static int
3219 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3220 {
3221 	uint64_t 	q_size_mask;
3222 	ldc_msg_t 	*msgp;
3223 	uint8_t		*msgbufp;
3224 	int		rv = 0, space;
3225 	uint64_t 	rx_head, rx_tail;
3226 
3227 	space = *sizep;
3228 
3229 	if (space < LDC_PAYLOAD_SIZE_RAW)
3230 		return (ENOBUFS);
3231 
3232 	ASSERT(mutex_owned(&ldcp->lock));
3233 
3234 	/* compute mask for increment */
3235 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3236 
3237 	/*
3238 	 * Read packet(s) from the queue
3239 	 */
3240 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3241 	    &ldcp->link_state);
3242 	if (rv != 0) {
3243 		cmn_err(CE_WARN,
3244 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3245 		    ldcp->id);
3246 		return (EIO);
3247 	}
3248 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3249 		" rxt=0x%llx, st=0x%llx\n",
3250 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
3251 
3252 	/* reset the channel state if the channel went down */
3253 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3254 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3255 		mutex_enter(&ldcp->tx_lock);
3256 		i_ldc_reset(ldcp, B_FALSE);
3257 		mutex_exit(&ldcp->tx_lock);
3258 		return (ECONNRESET);
3259 	}
3260 
3261 	/*
3262 	 * Check for empty queue
3263 	 */
3264 	if (rx_head == rx_tail) {
3265 		*sizep = 0;
3266 		return (0);
3267 	}
3268 
3269 	/* get the message */
3270 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3271 
3272 	/* if channel is in RAW mode, copy data and return */
3273 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3274 
3275 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3276 
3277 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3278 
3279 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3280 
3281 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3282 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3283 
3284 	return (rv);
3285 }
3286 
3287 /*
3288  * Process LDC mondos to build larger packets
3289  * with either un-reliable or reliable delivery.
3290  *
3291  * Enter and exit with ldcp->lock held by caller
3292  */
3293 static int
3294 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3295 {
3296 	int		rv = 0;
3297 	uint64_t 	rx_head = 0, rx_tail = 0;
3298 	uint64_t 	curr_head = 0;
3299 	ldc_msg_t 	*msg;
3300 	caddr_t 	target;
3301 	size_t 		len = 0, bytes_read = 0;
3302 	int 		retries = 0;
3303 	uint64_t 	q_size_mask;
3304 	uint64_t	first_fragment = 0;
3305 
3306 	target = target_bufp;
3307 
3308 	ASSERT(mutex_owned(&ldcp->lock));
3309 
3310 	/* check if the buffer and size are valid */
3311 	if (target_bufp == NULL || *sizep == 0) {
3312 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3313 		    ldcp->id);
3314 		return (EINVAL);
3315 	}
3316 
3317 	/* compute mask for increment */
3318 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3319 
3320 	/*
3321 	 * Read packet(s) from the queue
3322 	 */
3323 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3324 	    &ldcp->link_state);
3325 	if (rv != 0) {
3326 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3327 		    ldcp->id);
3328 		mutex_enter(&ldcp->tx_lock);
3329 		i_ldc_reset(ldcp, B_TRUE);
3330 		mutex_exit(&ldcp->tx_lock);
3331 		return (ECONNRESET);
3332 	}
3333 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3334 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3335 
3336 	/* reset the channel state if the channel went down */
3337 	if (ldcp->link_state != LDC_CHANNEL_UP)
3338 		goto channel_is_reset;
3339 
3340 	for (;;) {
3341 
3342 		if (curr_head == rx_tail) {
3343 			rv = hv_ldc_rx_get_state(ldcp->id,
3344 			    &rx_head, &rx_tail, &ldcp->link_state);
3345 			if (rv != 0) {
3346 				cmn_err(CE_WARN,
3347 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3348 				    ldcp->id);
3349 				mutex_enter(&ldcp->tx_lock);
3350 				i_ldc_reset(ldcp, B_TRUE);
3351 				mutex_exit(&ldcp->tx_lock);
3352 				return (ECONNRESET);
3353 			}
3354 			if (ldcp->link_state != LDC_CHANNEL_UP)
3355 				goto channel_is_reset;
3356 
3357 			if (curr_head == rx_tail) {
3358 
3359 				/* If in the middle of a fragmented xfer */
3360 				if (first_fragment != 0) {
3361 
3362 					/* wait for ldc_delay usecs */
3363 					drv_usecwait(ldc_delay);
3364 
3365 					if (++retries < ldc_max_retries)
3366 						continue;
3367 
3368 					*sizep = 0;
3369 					ldcp->last_msg_rcd = first_fragment - 1;
3370 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3371 						"(0x%llx) read timeout",
3372 						ldcp->id);
3373 					return (EAGAIN);
3374 				}
3375 				*sizep = 0;
3376 				break;
3377 			}
3378 		}
3379 		retries = 0;
3380 
3381 		D2(ldcp->id,
3382 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3383 		    ldcp->id, curr_head, rx_head, rx_tail);
3384 
3385 		/* get the message */
3386 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3387 
3388 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3389 		    ldcp->rx_q_va + curr_head);
3390 
3391 		/* Check the message ID for the message received */
3392 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3393 
3394 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3395 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3396 
3397 			/* throw away data */
3398 			bytes_read = 0;
3399 
3400 			/* Reset last_msg_rcd to start of message */
3401 			if (first_fragment != 0) {
3402 				ldcp->last_msg_rcd = first_fragment - 1;
3403 				first_fragment = 0;
3404 			}
3405 			/*
3406 			 * Send a NACK -- invalid seqid
3407 			 * get the current tail for the response
3408 			 */
3409 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3410 			    (msg->ctrl & LDC_CTRL_MASK));
3411 			if (rv) {
3412 				cmn_err(CE_NOTE,
3413 				    "ldc_read: (0x%lx) err sending "
3414 				    "NACK msg\n", ldcp->id);
3415 
3416 				/* if cannot send NACK - reset channel */
3417 				mutex_enter(&ldcp->tx_lock);
3418 				i_ldc_reset(ldcp, B_FALSE);
3419 				mutex_exit(&ldcp->tx_lock);
3420 				rv = ECONNRESET;
3421 				break;
3422 			}
3423 
3424 			/* purge receive queue */
3425 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3426 
3427 			break;
3428 		}
3429 
3430 		/*
3431 		 * Process any messages of type CTRL messages
3432 		 * Future implementations should try to pass these
3433 		 * to LDC link by resetting the intr state.
3434 		 *
3435 		 * NOTE: not done as a switch() as type can be both ctrl+data
3436 		 */
3437 		if (msg->type & LDC_CTRL) {
3438 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3439 				if (rv == EAGAIN)
3440 					continue;
3441 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3442 				*sizep = 0;
3443 				bytes_read = 0;
3444 				break;
3445 			}
3446 		}
3447 
3448 		/* process data ACKs */
3449 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3450 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3451 				*sizep = 0;
3452 				bytes_read = 0;
3453 				break;
3454 			}
3455 		}
3456 
3457 		/* process data messages */
3458 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3459 
3460 			uint8_t *msgbuf = (uint8_t *)(
3461 				(ldcp->mode == LDC_MODE_RELIABLE ||
3462 				ldcp->mode == LDC_MODE_STREAM)
3463 				? msg->rdata : msg->udata);
3464 
3465 			D2(ldcp->id,
3466 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3467 
3468 			/* get the packet length */
3469 			len = (msg->env & LDC_LEN_MASK);
3470 
3471 				/*
3472 				 * FUTURE OPTIMIZATION:
3473 				 * dont need to set q head for every
3474 				 * packet we read just need to do this when
3475 				 * we are done or need to wait for more
3476 				 * mondos to make a full packet - this is
3477 				 * currently expensive.
3478 				 */
3479 
3480 			if (first_fragment == 0) {
3481 
3482 				/*
3483 				 * first packets should always have the start
3484 				 * bit set (even for a single packet). If not
3485 				 * throw away the packet
3486 				 */
3487 				if (!(msg->env & LDC_FRAG_START)) {
3488 
3489 					DWARN(DBG_ALL_LDCS,
3490 					    "ldc_read: (0x%llx) not start - "
3491 					    "frag=%x\n", ldcp->id,
3492 					    (msg->env) & LDC_FRAG_MASK);
3493 
3494 					/* toss pkt, inc head, cont reading */
3495 					bytes_read = 0;
3496 					target = target_bufp;
3497 					curr_head =
3498 						(curr_head + LDC_PACKET_SIZE)
3499 						& q_size_mask;
3500 					if (rv = i_ldc_set_rx_head(ldcp,
3501 						curr_head))
3502 						break;
3503 
3504 					continue;
3505 				}
3506 
3507 				first_fragment = msg->seqid;
3508 			} else {
3509 				/* check to see if this is a pkt w/ START bit */
3510 				if (msg->env & LDC_FRAG_START) {
3511 					DWARN(DBG_ALL_LDCS,
3512 					    "ldc_read:(0x%llx) unexpected pkt"
3513 					    " env=0x%x discarding %d bytes,"
3514 					    " lastmsg=%d, currentmsg=%d\n",
3515 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3516 					    bytes_read, ldcp->last_msg_rcd,
3517 					    msg->seqid);
3518 
3519 					/* throw data we have read so far */
3520 					bytes_read = 0;
3521 					target = target_bufp;
3522 					first_fragment = msg->seqid;
3523 
3524 					if (rv = i_ldc_set_rx_head(ldcp,
3525 						curr_head))
3526 						break;
3527 				}
3528 			}
3529 
3530 			/* copy (next) pkt into buffer */
3531 			if (len <= (*sizep - bytes_read)) {
3532 				bcopy(msgbuf, target, len);
3533 				target += len;
3534 				bytes_read += len;
3535 			} else {
3536 				/*
3537 				 * there is not enough space in the buffer to
3538 				 * read this pkt. throw message away & continue
3539 				 * reading data from queue
3540 				 */
3541 				DWARN(DBG_ALL_LDCS,
3542 				    "ldc_read: (0x%llx) buffer too small, "
3543 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3544 				    curr_head, *sizep, bytes_read+len);
3545 
3546 				first_fragment = 0;
3547 				target = target_bufp;
3548 				bytes_read = 0;
3549 
3550 				/* throw away everything received so far */
3551 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3552 					break;
3553 
3554 				/* continue reading remaining pkts */
3555 				continue;
3556 			}
3557 		}
3558 
3559 		/* set the message id */
3560 		ldcp->last_msg_rcd = msg->seqid;
3561 
3562 		/* move the head one position */
3563 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3564 
3565 		if (msg->env & LDC_FRAG_STOP) {
3566 
3567 			/*
3568 			 * All pkts that are part of this fragmented transfer
3569 			 * have been read or this was a single pkt read
3570 			 * or there was an error
3571 			 */
3572 
3573 			/* set the queue head */
3574 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3575 				bytes_read = 0;
3576 
3577 			*sizep = bytes_read;
3578 
3579 			break;
3580 		}
3581 
3582 		/* advance head if it is a DATA ACK */
3583 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3584 
3585 			/* set the queue head */
3586 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3587 				bytes_read = 0;
3588 				break;
3589 			}
3590 
3591 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3592 			    ldcp->id, curr_head);
3593 		}
3594 
3595 	} /* for (;;) */
3596 
3597 
3598 	/*
3599 	 * If useful data was read - Send msg ACK
3600 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3601 	 */
3602 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3603 		ldcp->mode == LDC_MODE_STREAM)) {
3604 
3605 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3606 		if (rv && rv != EWOULDBLOCK) {
3607 			cmn_err(CE_NOTE,
3608 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3609 
3610 			/* if cannot send ACK - reset channel */
3611 			goto channel_is_reset;
3612 		}
3613 	}
3614 
3615 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3616 
3617 	return (rv);
3618 
3619 channel_is_reset:
3620 	mutex_enter(&ldcp->tx_lock);
3621 	i_ldc_reset(ldcp, B_FALSE);
3622 	mutex_exit(&ldcp->tx_lock);
3623 	return (ECONNRESET);
3624 }
3625 
3626 /*
3627  * Use underlying reliable packet mechanism to fetch
3628  * and buffer incoming packets so we can hand them back as
3629  * a basic byte stream.
3630  *
3631  * Enter and exit with ldcp->lock held by caller
3632  */
3633 static int
3634 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3635 {
3636 	int	rv;
3637 	size_t	size;
3638 
3639 	ASSERT(mutex_owned(&ldcp->lock));
3640 
3641 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3642 		ldcp->id, *sizep);
3643 
3644 	if (ldcp->stream_remains == 0) {
3645 		size = ldcp->mtu;
3646 		rv = i_ldc_read_packet(ldcp,
3647 			(caddr_t)ldcp->stream_bufferp, &size);
3648 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3649 			ldcp->id, size);
3650 
3651 		if (rv != 0)
3652 			return (rv);
3653 
3654 		ldcp->stream_remains = size;
3655 		ldcp->stream_offset = 0;
3656 	}
3657 
3658 	size = MIN(ldcp->stream_remains, *sizep);
3659 
3660 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3661 	ldcp->stream_offset += size;
3662 	ldcp->stream_remains -= size;
3663 
3664 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3665 		ldcp->id, size);
3666 
3667 	*sizep = size;
3668 	return (0);
3669 }
3670 
3671 /*
3672  * Write specified amount of bytes to the channel
3673  * in multiple pkts of pkt_payload size. Each
3674  * packet is tagged with an unique packet ID in
3675  * the case of a reliable link.
3676  *
3677  * On return, size contains the number of bytes written.
3678  */
3679 int
3680 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3681 {
3682 	ldc_chan_t	*ldcp;
3683 	int		rv = 0;
3684 
3685 	if (handle == NULL) {
3686 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3687 		return (EINVAL);
3688 	}
3689 	ldcp = (ldc_chan_t *)handle;
3690 
3691 	/* check if writes can occur */
3692 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3693 		/*
3694 		 * Could not get the lock - channel could
3695 		 * be in the process of being unconfigured
3696 		 * or reader has encountered an error
3697 		 */
3698 		return (EAGAIN);
3699 	}
3700 
3701 	/* check if non-zero data to write */
3702 	if (buf == NULL || sizep == NULL) {
3703 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3704 		    ldcp->id);
3705 		mutex_exit(&ldcp->tx_lock);
3706 		return (EINVAL);
3707 	}
3708 
3709 	if (*sizep == 0) {
3710 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3711 		    ldcp->id);
3712 		mutex_exit(&ldcp->tx_lock);
3713 		return (0);
3714 	}
3715 
3716 	/* Check if channel is UP for data exchange */
3717 	if (ldcp->tstate != TS_UP) {
3718 		DWARN(ldcp->id,
3719 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3720 		    ldcp->id);
3721 		*sizep = 0;
3722 		rv = ECONNRESET;
3723 	} else {
3724 		rv = ldcp->write_p(ldcp, buf, sizep);
3725 	}
3726 
3727 	mutex_exit(&ldcp->tx_lock);
3728 
3729 	return (rv);
3730 }
3731 
3732 /*
3733  * Write a raw packet to the channel
3734  * On return, size contains the number of bytes written.
3735  */
3736 static int
3737 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3738 {
3739 	ldc_msg_t 	*ldcmsg;
3740 	uint64_t 	tx_head, tx_tail, new_tail;
3741 	int		rv = 0;
3742 	size_t		size;
3743 
3744 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3745 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3746 
3747 	size = *sizep;
3748 
3749 	/*
3750 	 * Check to see if the packet size is less than or
3751 	 * equal to packet size support in raw mode
3752 	 */
3753 	if (size > ldcp->pkt_payload) {
3754 		DWARN(ldcp->id,
3755 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3756 		    ldcp->id, *sizep);
3757 		*sizep = 0;
3758 		return (EMSGSIZE);
3759 	}
3760 
3761 	/* get the qptrs for the tx queue */
3762 	rv = hv_ldc_tx_get_state(ldcp->id,
3763 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3764 	if (rv != 0) {
3765 		cmn_err(CE_WARN,
3766 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3767 		*sizep = 0;
3768 		return (EIO);
3769 	}
3770 
3771 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3772 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3773 		DWARN(ldcp->id,
3774 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3775 
3776 		*sizep = 0;
3777 		if (mutex_tryenter(&ldcp->lock)) {
3778 			i_ldc_reset(ldcp, B_FALSE);
3779 			mutex_exit(&ldcp->lock);
3780 		} else {
3781 			/*
3782 			 * Release Tx lock, and then reacquire channel
3783 			 * and Tx lock in correct order
3784 			 */
3785 			mutex_exit(&ldcp->tx_lock);
3786 			mutex_enter(&ldcp->lock);
3787 			mutex_enter(&ldcp->tx_lock);
3788 			i_ldc_reset(ldcp, B_FALSE);
3789 			mutex_exit(&ldcp->lock);
3790 		}
3791 		return (ECONNRESET);
3792 	}
3793 
3794 	tx_tail = ldcp->tx_tail;
3795 	tx_head = ldcp->tx_head;
3796 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3797 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3798 
3799 	if (new_tail == tx_head) {
3800 		DWARN(DBG_ALL_LDCS,
3801 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3802 		*sizep = 0;
3803 		return (EWOULDBLOCK);
3804 	}
3805 
3806 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3807 	    ldcp->id, size);
3808 
3809 	/* Send the data now */
3810 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3811 
3812 	/* copy the data into pkt */
3813 	bcopy((uint8_t *)buf, ldcmsg, size);
3814 
3815 	/* increment tail */
3816 	tx_tail = new_tail;
3817 
3818 	/*
3819 	 * All packets have been copied into the TX queue
3820 	 * update the tail ptr in the HV
3821 	 */
3822 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3823 	if (rv) {
3824 		if (rv == EWOULDBLOCK) {
3825 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3826 			    ldcp->id);
3827 			*sizep = 0;
3828 			return (EWOULDBLOCK);
3829 		}
3830 
3831 		*sizep = 0;
3832 		if (mutex_tryenter(&ldcp->lock)) {
3833 			i_ldc_reset(ldcp, B_FALSE);
3834 			mutex_exit(&ldcp->lock);
3835 		} else {
3836 			/*
3837 			 * Release Tx lock, and then reacquire channel
3838 			 * and Tx lock in correct order
3839 			 */
3840 			mutex_exit(&ldcp->tx_lock);
3841 			mutex_enter(&ldcp->lock);
3842 			mutex_enter(&ldcp->tx_lock);
3843 			i_ldc_reset(ldcp, B_FALSE);
3844 			mutex_exit(&ldcp->lock);
3845 		}
3846 		return (ECONNRESET);
3847 	}
3848 
3849 	ldcp->tx_tail = tx_tail;
3850 	*sizep = size;
3851 
3852 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3853 
3854 	return (rv);
3855 }
3856 
3857 
3858 /*
3859  * Write specified amount of bytes to the channel
3860  * in multiple pkts of pkt_payload size. Each
3861  * packet is tagged with an unique packet ID in
3862  * the case of a reliable link.
3863  *
3864  * On return, size contains the number of bytes written.
3865  * This function needs to ensure that the write size is < MTU size
3866  */
3867 static int
3868 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3869 {
3870 	ldc_msg_t 	*ldcmsg;
3871 	uint64_t 	tx_head, tx_tail, new_tail, start;
3872 	uint64_t	txq_size_mask, numavail;
3873 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3874 	size_t 		len, bytes_written = 0, remaining;
3875 	int		rv;
3876 	uint32_t	curr_seqid;
3877 
3878 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3879 
3880 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3881 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3882 		ldcp->mode == LDC_MODE_STREAM);
3883 
3884 	/* compute mask for increment */
3885 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3886 
3887 	/* get the qptrs for the tx queue */
3888 	rv = hv_ldc_tx_get_state(ldcp->id,
3889 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3890 	if (rv != 0) {
3891 		cmn_err(CE_WARN,
3892 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3893 		*size = 0;
3894 		return (EIO);
3895 	}
3896 
3897 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3898 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3899 		DWARN(ldcp->id,
3900 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3901 		*size = 0;
3902 		if (mutex_tryenter(&ldcp->lock)) {
3903 			i_ldc_reset(ldcp, B_FALSE);
3904 			mutex_exit(&ldcp->lock);
3905 		} else {
3906 			/*
3907 			 * Release Tx lock, and then reacquire channel
3908 			 * and Tx lock in correct order
3909 			 */
3910 			mutex_exit(&ldcp->tx_lock);
3911 			mutex_enter(&ldcp->lock);
3912 			mutex_enter(&ldcp->tx_lock);
3913 			i_ldc_reset(ldcp, B_FALSE);
3914 			mutex_exit(&ldcp->lock);
3915 		}
3916 		return (ECONNRESET);
3917 	}
3918 
3919 	tx_tail = ldcp->tx_tail;
3920 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3921 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3922 
3923 	/*
3924 	 * Link mode determines whether we use HV Tx head or the
3925 	 * private protocol head (corresponding to last ACKd pkt) for
3926 	 * determining how much we can write
3927 	 */
3928 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3929 		ldcp->mode == LDC_MODE_STREAM)
3930 		? ldcp->tx_ackd_head : ldcp->tx_head;
3931 	if (new_tail == tx_head) {
3932 		DWARN(DBG_ALL_LDCS,
3933 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3934 		*size = 0;
3935 		return (EWOULDBLOCK);
3936 	}
3937 
3938 	/*
3939 	 * Make sure that the LDC Tx queue has enough space
3940 	 */
3941 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
3942 		+ ldcp->tx_q_entries - 1;
3943 	numavail %= ldcp->tx_q_entries;
3944 
3945 	if (*size > (numavail * ldcp->pkt_payload)) {
3946 		DWARN(DBG_ALL_LDCS,
3947 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
3948 		return (EWOULDBLOCK);
3949 	}
3950 
3951 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3952 	    ldcp->id, *size);
3953 
3954 	/* Send the data now */
3955 	bytes_written = 0;
3956 	curr_seqid = ldcp->last_msg_snt;
3957 	start = tx_tail;
3958 
3959 	while (*size > bytes_written) {
3960 
3961 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3962 
3963 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
3964 			ldcp->mode == LDC_MODE_STREAM)
3965 			? ldcmsg->rdata : ldcmsg->udata);
3966 
3967 		ldcmsg->type = LDC_DATA;
3968 		ldcmsg->stype = LDC_INFO;
3969 		ldcmsg->ctrl = 0;
3970 
3971 		remaining = *size - bytes_written;
3972 		len = min(ldcp->pkt_payload, remaining);
3973 		ldcmsg->env = (uint8_t)len;
3974 
3975 		curr_seqid++;
3976 		ldcmsg->seqid = curr_seqid;
3977 
3978 		/* copy the data into pkt */
3979 		bcopy(source, msgbuf, len);
3980 
3981 		source += len;
3982 		bytes_written += len;
3983 
3984 		/* increment tail */
3985 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
3986 
3987 		ASSERT(tx_tail != tx_head);
3988 	}
3989 
3990 	/* Set the start and stop bits */
3991 	ldcmsg->env |= LDC_FRAG_STOP;
3992 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
3993 	ldcmsg->env |= LDC_FRAG_START;
3994 
3995 	/*
3996 	 * All packets have been copied into the TX queue
3997 	 * update the tail ptr in the HV
3998 	 */
3999 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4000 	if (rv == 0) {
4001 		ldcp->tx_tail = tx_tail;
4002 		ldcp->last_msg_snt = curr_seqid;
4003 		*size = bytes_written;
4004 	} else {
4005 		int rv2;
4006 
4007 		if (rv != EWOULDBLOCK) {
4008 			*size = 0;
4009 			if (mutex_tryenter(&ldcp->lock)) {
4010 				i_ldc_reset(ldcp, B_FALSE);
4011 				mutex_exit(&ldcp->lock);
4012 			} else {
4013 				/*
4014 				 * Release Tx lock, and then reacquire channel
4015 				 * and Tx lock in correct order
4016 				 */
4017 				mutex_exit(&ldcp->tx_lock);
4018 				mutex_enter(&ldcp->lock);
4019 				mutex_enter(&ldcp->tx_lock);
4020 				i_ldc_reset(ldcp, B_FALSE);
4021 				mutex_exit(&ldcp->lock);
4022 			}
4023 			return (ECONNRESET);
4024 		}
4025 
4026 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4027 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4028 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4029 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4030 
4031 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4032 		    &tx_head, &tx_tail, &ldcp->link_state);
4033 
4034 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4035 			"(head 0x%x, tail 0x%x state 0x%x)\n",
4036 			rv2, tx_head, tx_tail, ldcp->link_state);
4037 
4038 		*size = 0;
4039 	}
4040 
4041 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4042 
4043 	return (rv);
4044 }
4045 
4046 /*
4047  * Write specified amount of bytes to the channel
4048  * in multiple pkts of pkt_payload size. Each
4049  * packet is tagged with an unique packet ID in
4050  * the case of a reliable link.
4051  *
4052  * On return, size contains the number of bytes written.
4053  * This function needs to ensure that the write size is < MTU size
4054  */
4055 static int
4056 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4057 {
4058 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4059 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
4060 
4061 	/* Truncate packet to max of MTU size */
4062 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4063 	return (i_ldc_write_packet(ldcp, buf, sizep));
4064 }
4065 
4066 
4067 /*
4068  * Interfaces for channel nexus to register/unregister with LDC module
4069  * The nexus will register functions to be used to register individual
4070  * channels with the nexus and enable interrupts for the channels
4071  */
4072 int
4073 ldc_register(ldc_cnex_t *cinfo)
4074 {
4075 	ldc_chan_t	*ldcp;
4076 
4077 	if (cinfo == NULL || cinfo->dip == NULL ||
4078 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4079 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4080 	    cinfo->clr_intr == NULL) {
4081 
4082 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4083 		return (EINVAL);
4084 	}
4085 
4086 	mutex_enter(&ldcssp->lock);
4087 
4088 	/* nexus registration */
4089 	ldcssp->cinfo.dip = cinfo->dip;
4090 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4091 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4092 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4093 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4094 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4095 
4096 	/* register any channels that might have been previously initialized */
4097 	ldcp = ldcssp->chan_list;
4098 	while (ldcp) {
4099 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4100 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4101 			(void) i_ldc_register_channel(ldcp);
4102 
4103 		ldcp = ldcp->next;
4104 	}
4105 
4106 	mutex_exit(&ldcssp->lock);
4107 
4108 	return (0);
4109 }
4110 
4111 int
4112 ldc_unregister(ldc_cnex_t *cinfo)
4113 {
4114 	if (cinfo == NULL || cinfo->dip == NULL) {
4115 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4116 		return (EINVAL);
4117 	}
4118 
4119 	mutex_enter(&ldcssp->lock);
4120 
4121 	if (cinfo->dip != ldcssp->cinfo.dip) {
4122 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4123 		mutex_exit(&ldcssp->lock);
4124 		return (EINVAL);
4125 	}
4126 
4127 	/* nexus unregister */
4128 	ldcssp->cinfo.dip = NULL;
4129 	ldcssp->cinfo.reg_chan = NULL;
4130 	ldcssp->cinfo.unreg_chan = NULL;
4131 	ldcssp->cinfo.add_intr = NULL;
4132 	ldcssp->cinfo.rem_intr = NULL;
4133 	ldcssp->cinfo.clr_intr = NULL;
4134 
4135 	mutex_exit(&ldcssp->lock);
4136 
4137 	return (0);
4138 }
4139 
4140 
4141 /* ------------------------------------------------------------------------- */
4142 
4143 /*
4144  * Allocate a memory handle for the channel and link it into the list
4145  * Also choose which memory table to use if this is the first handle
4146  * being assigned to this channel
4147  */
4148 int
4149 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
4150 {
4151 	ldc_chan_t 	*ldcp;
4152 	ldc_mhdl_t	*mhdl;
4153 
4154 	if (handle == NULL) {
4155 		DWARN(DBG_ALL_LDCS,
4156 		    "ldc_mem_alloc_handle: invalid channel handle\n");
4157 		return (EINVAL);
4158 	}
4159 	ldcp = (ldc_chan_t *)handle;
4160 
4161 	mutex_enter(&ldcp->lock);
4162 
4163 	/* check to see if channel is initalized */
4164 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4165 		DWARN(ldcp->id,
4166 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
4167 		    ldcp->id);
4168 		mutex_exit(&ldcp->lock);
4169 		return (EINVAL);
4170 	}
4171 
4172 	/* allocate handle for channel */
4173 	mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP);
4174 
4175 	/* initialize the lock */
4176 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
4177 
4178 	mhdl->myshadow = B_FALSE;
4179 	mhdl->memseg = NULL;
4180 	mhdl->ldcp = ldcp;
4181 	mhdl->status = LDC_UNBOUND;
4182 
4183 	/* insert memory handle (@ head) into list */
4184 	if (ldcp->mhdl_list == NULL) {
4185 		ldcp->mhdl_list = mhdl;
4186 		mhdl->next = NULL;
4187 	} else {
4188 		/* insert @ head */
4189 		mhdl->next = ldcp->mhdl_list;
4190 		ldcp->mhdl_list = mhdl;
4191 	}
4192 
4193 	/* return the handle */
4194 	*mhandle = (ldc_mem_handle_t)mhdl;
4195 
4196 	mutex_exit(&ldcp->lock);
4197 
4198 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4199 	    ldcp->id, mhdl);
4200 
4201 	return (0);
4202 }
4203 
4204 /*
4205  * Free memory handle for the channel and unlink it from the list
4206  */
4207 int
4208 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4209 {
4210 	ldc_mhdl_t 	*mhdl, *phdl;
4211 	ldc_chan_t 	*ldcp;
4212 
4213 	if (mhandle == NULL) {
4214 		DWARN(DBG_ALL_LDCS,
4215 		    "ldc_mem_free_handle: invalid memory handle\n");
4216 		return (EINVAL);
4217 	}
4218 	mhdl = (ldc_mhdl_t *)mhandle;
4219 
4220 	mutex_enter(&mhdl->lock);
4221 
4222 	ldcp = mhdl->ldcp;
4223 
4224 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4225 		DWARN(ldcp->id,
4226 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4227 		    mhdl);
4228 		mutex_exit(&mhdl->lock);
4229 		return (EINVAL);
4230 	}
4231 	mutex_exit(&mhdl->lock);
4232 
4233 	mutex_enter(&ldcp->mlist_lock);
4234 
4235 	phdl = ldcp->mhdl_list;
4236 
4237 	/* first handle */
4238 	if (phdl == mhdl) {
4239 		ldcp->mhdl_list = mhdl->next;
4240 		mutex_destroy(&mhdl->lock);
4241 		kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4242 
4243 		D1(ldcp->id,
4244 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4245 		    ldcp->id, mhdl);
4246 	} else {
4247 		/* walk the list - unlink and free */
4248 		while (phdl != NULL) {
4249 			if (phdl->next == mhdl) {
4250 				phdl->next = mhdl->next;
4251 				mutex_destroy(&mhdl->lock);
4252 				kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4253 				D1(ldcp->id,
4254 				    "ldc_mem_free_handle: (0x%llx) freed "
4255 				    "handle 0x%llx\n", ldcp->id, mhdl);
4256 				break;
4257 			}
4258 			phdl = phdl->next;
4259 		}
4260 	}
4261 
4262 	if (phdl == NULL) {
4263 		DWARN(ldcp->id,
4264 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4265 		mutex_exit(&ldcp->mlist_lock);
4266 		return (EINVAL);
4267 	}
4268 
4269 	mutex_exit(&ldcp->mlist_lock);
4270 
4271 	return (0);
4272 }
4273 
4274 /*
4275  * Bind a memory handle to a virtual address.
4276  * The virtual address is converted to the corresponding real addresses.
4277  * Returns pointer to the first ldc_mem_cookie and the total number
4278  * of cookies for this virtual address. Other cookies can be obtained
4279  * using the ldc_mem_nextcookie() call. If the pages are stored in
4280  * consecutive locations in the table, a single cookie corresponding to
4281  * the first location is returned. The cookie size spans all the entries.
4282  *
4283  * If the VA corresponds to a page that is already being exported, reuse
4284  * the page and do not export it again. Bump the page's use count.
4285  */
4286 int
4287 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4288     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4289 {
4290 	ldc_mhdl_t	*mhdl;
4291 	ldc_chan_t 	*ldcp;
4292 	ldc_mtbl_t	*mtbl;
4293 	ldc_memseg_t	*memseg;
4294 	ldc_mte_t	tmp_mte;
4295 	uint64_t	index, prev_index = 0;
4296 	int64_t		cookie_idx;
4297 	uintptr_t	raddr, ra_aligned;
4298 	uint64_t	psize, poffset, v_offset;
4299 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4300 	pgcnt_t		npages;
4301 	caddr_t		v_align, addr;
4302 	int 		i, rv;
4303 
4304 	if (mhandle == NULL) {
4305 		DWARN(DBG_ALL_LDCS,
4306 		    "ldc_mem_bind_handle: invalid memory handle\n");
4307 		return (EINVAL);
4308 	}
4309 	mhdl = (ldc_mhdl_t *)mhandle;
4310 	ldcp = mhdl->ldcp;
4311 
4312 	/* clear count */
4313 	*ccount = 0;
4314 
4315 	mutex_enter(&mhdl->lock);
4316 
4317 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4318 		DWARN(ldcp->id,
4319 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4320 		    mhandle);
4321 		mutex_exit(&mhdl->lock);
4322 		return (EINVAL);
4323 	}
4324 
4325 	/* Force address and size to be 8-byte aligned */
4326 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4327 		DWARN(ldcp->id,
4328 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4329 		mutex_exit(&mhdl->lock);
4330 		return (EINVAL);
4331 	}
4332 
4333 	/*
4334 	 * If this channel is binding a memory handle for the
4335 	 * first time allocate it a memory map table and initialize it
4336 	 */
4337 	if ((mtbl = ldcp->mtbl) == NULL) {
4338 
4339 		mutex_enter(&ldcp->lock);
4340 
4341 		/* Allocate and initialize the map table structure */
4342 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
4343 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
4344 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
4345 		mtbl->next_entry = NULL;
4346 		mtbl->contigmem = B_TRUE;
4347 
4348 		/* Allocate the table itself */
4349 		mtbl->table = (ldc_mte_slot_t *)
4350 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
4351 		if (mtbl->table == NULL) {
4352 
4353 			/* allocate a page of memory using kmem_alloc */
4354 			mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP);
4355 			mtbl->size = MMU_PAGESIZE;
4356 			mtbl->contigmem = B_FALSE;
4357 			mtbl->num_entries = mtbl->num_avail =
4358 				mtbl->size / sizeof (ldc_mte_slot_t);
4359 			DWARN(ldcp->id,
4360 			    "ldc_mem_bind_handle: (0x%llx) reduced tbl size "
4361 			    "to %lx entries\n", ldcp->id, mtbl->num_entries);
4362 		}
4363 
4364 		/* zero out the memory */
4365 		bzero(mtbl->table, mtbl->size);
4366 
4367 		/* initialize the lock */
4368 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
4369 
4370 		/* register table for this channel */
4371 		rv = hv_ldc_set_map_table(ldcp->id,
4372 		    va_to_pa(mtbl->table), mtbl->num_entries);
4373 		if (rv != 0) {
4374 			cmn_err(CE_WARN,
4375 			    "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
4376 			    ldcp->id, rv);
4377 			if (mtbl->contigmem)
4378 				contig_mem_free(mtbl->table, mtbl->size);
4379 			else
4380 				kmem_free(mtbl->table, mtbl->size);
4381 			mutex_destroy(&mtbl->lock);
4382 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4383 			mutex_exit(&ldcp->lock);
4384 			mutex_exit(&mhdl->lock);
4385 			return (EIO);
4386 		}
4387 
4388 		ldcp->mtbl = mtbl;
4389 		mutex_exit(&ldcp->lock);
4390 
4391 		D1(ldcp->id,
4392 		    "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
4393 		    ldcp->id, ldcp->mtbl->table);
4394 	}
4395 
4396 	/* FUTURE: get the page size, pgsz code, and shift */
4397 	pg_size = MMU_PAGESIZE;
4398 	pg_size_code = page_szc(pg_size);
4399 	pg_shift = page_get_shift(pg_size_code);
4400 	pg_mask = ~(pg_size - 1);
4401 
4402 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4403 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4404 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4405 
4406 	/* aligned VA and its offset */
4407 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4408 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4409 
4410 	npages = (len+v_offset)/pg_size;
4411 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4412 
4413 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4414 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4415 	    ldcp->id, vaddr, v_align, v_offset, npages);
4416 
4417 	/* lock the memory table - exclusive access to channel */
4418 	mutex_enter(&mtbl->lock);
4419 
4420 	if (npages > mtbl->num_avail) {
4421 		D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4422 		    ldcp->id);
4423 		mutex_exit(&mtbl->lock);
4424 		mutex_exit(&mhdl->lock);
4425 		return (ENOMEM);
4426 	}
4427 
4428 	/* Allocate a memseg structure */
4429 	memseg = mhdl->memseg =
4430 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
4431 
4432 	/* Allocate memory to store all pages and cookies */
4433 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4434 	memseg->cookies =
4435 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4436 
4437 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4438 	    ldcp->id, npages);
4439 
4440 	addr = v_align;
4441 
4442 	/*
4443 	 * Check if direct shared memory map is enabled, if not change
4444 	 * the mapping type to include SHADOW_MAP.
4445 	 */
4446 	if (ldc_shmem_enabled == 0)
4447 		mtype = LDC_SHADOW_MAP;
4448 
4449 	/*
4450 	 * Table slots are used in a round-robin manner. The algorithm permits
4451 	 * inserting duplicate entries. Slots allocated earlier will typically
4452 	 * get freed before we get back to reusing the slot.Inserting duplicate
4453 	 * entries should be OK as we only lookup entries using the cookie addr
4454 	 * i.e. tbl index, during export, unexport and copy operation.
4455 	 *
4456 	 * One implementation what was tried was to search for a duplicate
4457 	 * page entry first and reuse it. The search overhead is very high and
4458 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4459 	 * So it does make sense to avoid searching for duplicates.
4460 	 *
4461 	 * But during the process of searching for a free slot, if we find a
4462 	 * duplicate entry we will go ahead and use it, and bump its use count.
4463 	 */
4464 
4465 	/* index to start searching from */
4466 	index = mtbl->next_entry;
4467 	cookie_idx = -1;
4468 
4469 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4470 
4471 	if (mtype & LDC_DIRECT_MAP) {
4472 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4473 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4474 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4475 	}
4476 
4477 	if (mtype & LDC_SHADOW_MAP) {
4478 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4479 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4480 	}
4481 
4482 	if (mtype & LDC_IO_MAP) {
4483 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4484 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4485 	}
4486 
4487 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4488 
4489 	tmp_mte.mte_pgszc = pg_size_code;
4490 
4491 	/* initialize each mem table entry */
4492 	for (i = 0; i < npages; i++) {
4493 
4494 		/* check if slot is available in the table */
4495 		while (mtbl->table[index].entry.ll != 0) {
4496 
4497 			index = (index + 1) % mtbl->num_entries;
4498 
4499 			if (index == mtbl->next_entry) {
4500 				/* we have looped around */
4501 				DWARN(DBG_ALL_LDCS,
4502 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4503 				    "entry\n", ldcp->id);
4504 				*ccount = 0;
4505 
4506 				/* NOTE: free memory, remove previous entries */
4507 				/* this shouldnt happen as num_avail was ok */
4508 
4509 				mutex_exit(&mtbl->lock);
4510 				mutex_exit(&mhdl->lock);
4511 				return (ENOMEM);
4512 			}
4513 		}
4514 
4515 		/* get the real address */
4516 		raddr = va_to_pa((void *)addr);
4517 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4518 
4519 		/* build the mte */
4520 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4521 
4522 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4523 
4524 		/* update entry in table */
4525 		mtbl->table[index].entry = tmp_mte;
4526 
4527 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4528 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4529 
4530 		/* calculate the size and offset for this export range */
4531 		if (i == 0) {
4532 			/* first page */
4533 			psize = min((pg_size - v_offset), len);
4534 			poffset = v_offset;
4535 
4536 		} else if (i == (npages - 1)) {
4537 			/* last page */
4538 			psize =	(((uintptr_t)(vaddr + len)) &
4539 				    ((uint64_t)(pg_size-1)));
4540 			if (psize == 0)
4541 				psize = pg_size;
4542 			poffset = 0;
4543 
4544 		} else {
4545 			/* middle pages */
4546 			psize = pg_size;
4547 			poffset = 0;
4548 		}
4549 
4550 		/* store entry for this page */
4551 		memseg->pages[i].index = index;
4552 		memseg->pages[i].raddr = raddr;
4553 		memseg->pages[i].offset = poffset;
4554 		memseg->pages[i].size = psize;
4555 		memseg->pages[i].mte = &(mtbl->table[index]);
4556 
4557 		/* create the cookie */
4558 		if (i == 0 || (index != prev_index + 1)) {
4559 			cookie_idx++;
4560 			memseg->cookies[cookie_idx].addr =
4561 				IDX2COOKIE(index, pg_size_code, pg_shift);
4562 			memseg->cookies[cookie_idx].addr |= poffset;
4563 			memseg->cookies[cookie_idx].size = psize;
4564 
4565 		} else {
4566 			memseg->cookies[cookie_idx].size += psize;
4567 		}
4568 
4569 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4570 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4571 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4572 		    ldcp->id, addr, index, raddr, psize, poffset);
4573 
4574 		/* decrement number of available entries */
4575 		mtbl->num_avail--;
4576 
4577 		/* increment va by page size */
4578 		addr += pg_size;
4579 
4580 		/* increment index */
4581 		prev_index = index;
4582 		index = (index + 1) % mtbl->num_entries;
4583 
4584 		/* save the next slot */
4585 		mtbl->next_entry = index;
4586 	}
4587 
4588 	mutex_exit(&mtbl->lock);
4589 
4590 	/* memory handle = bound */
4591 	mhdl->mtype = mtype;
4592 	mhdl->perm = perm;
4593 	mhdl->status = LDC_BOUND;
4594 
4595 	/* update memseg_t */
4596 	memseg->vaddr = vaddr;
4597 	memseg->raddr = memseg->pages[0].raddr;
4598 	memseg->size = len;
4599 	memseg->npages = npages;
4600 	memseg->ncookies = cookie_idx + 1;
4601 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4602 
4603 	/* return count and first cookie */
4604 	*ccount = memseg->ncookies;
4605 	cookie->addr = memseg->cookies[0].addr;
4606 	cookie->size = memseg->cookies[0].size;
4607 
4608 	D1(ldcp->id,
4609 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4610 	    "pgs=0x%llx cookies=0x%llx\n",
4611 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4612 
4613 	mutex_exit(&mhdl->lock);
4614 	return (0);
4615 }
4616 
4617 /*
4618  * Return the next cookie associated with the specified memory handle
4619  */
4620 int
4621 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4622 {
4623 	ldc_mhdl_t	*mhdl;
4624 	ldc_chan_t 	*ldcp;
4625 	ldc_memseg_t	*memseg;
4626 
4627 	if (mhandle == NULL) {
4628 		DWARN(DBG_ALL_LDCS,
4629 		    "ldc_mem_nextcookie: invalid memory handle\n");
4630 		return (EINVAL);
4631 	}
4632 	mhdl = (ldc_mhdl_t *)mhandle;
4633 
4634 	mutex_enter(&mhdl->lock);
4635 
4636 	ldcp = mhdl->ldcp;
4637 	memseg = mhdl->memseg;
4638 
4639 	if (cookie == 0) {
4640 		DWARN(ldcp->id,
4641 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4642 		    ldcp->id);
4643 		mutex_exit(&mhdl->lock);
4644 		return (EINVAL);
4645 	}
4646 
4647 	if (memseg->next_cookie != 0) {
4648 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4649 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4650 		memseg->next_cookie++;
4651 		if (memseg->next_cookie == memseg->ncookies)
4652 			memseg->next_cookie = 0;
4653 
4654 	} else {
4655 		DWARN(ldcp->id,
4656 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4657 		cookie->addr = 0;
4658 		cookie->size = 0;
4659 		mutex_exit(&mhdl->lock);
4660 		return (EINVAL);
4661 	}
4662 
4663 	D1(ldcp->id,
4664 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4665 	    ldcp->id, cookie->addr, cookie->size);
4666 
4667 	mutex_exit(&mhdl->lock);
4668 	return (0);
4669 }
4670 
4671 /*
4672  * Unbind the virtual memory region associated with the specified
4673  * memory handle. Allassociated cookies are freed and the corresponding
4674  * RA space is no longer exported.
4675  */
4676 int
4677 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4678 {
4679 	ldc_mhdl_t	*mhdl;
4680 	ldc_chan_t 	*ldcp;
4681 	ldc_mtbl_t	*mtbl;
4682 	ldc_memseg_t	*memseg;
4683 	uint64_t	cookie_addr;
4684 	uint64_t	pg_shift, pg_size_code;
4685 	int		i, rv;
4686 
4687 	if (mhandle == NULL) {
4688 		DWARN(DBG_ALL_LDCS,
4689 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4690 		return (EINVAL);
4691 	}
4692 	mhdl = (ldc_mhdl_t *)mhandle;
4693 
4694 	mutex_enter(&mhdl->lock);
4695 
4696 	if (mhdl->status == LDC_UNBOUND) {
4697 		DWARN(DBG_ALL_LDCS,
4698 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4699 		    mhandle);
4700 		mutex_exit(&mhdl->lock);
4701 		return (EINVAL);
4702 	}
4703 
4704 	ldcp = mhdl->ldcp;
4705 	mtbl = ldcp->mtbl;
4706 
4707 	memseg = mhdl->memseg;
4708 
4709 	/* lock the memory table - exclusive access to channel */
4710 	mutex_enter(&mtbl->lock);
4711 
4712 	/* undo the pages exported */
4713 	for (i = 0; i < memseg->npages; i++) {
4714 
4715 		/* check for mapped pages, revocation cookie != 0 */
4716 		if (memseg->pages[i].mte->cookie) {
4717 
4718 			pg_size_code = page_szc(memseg->pages[i].size);
4719 			pg_shift = page_get_shift(memseg->pages[i].size);
4720 			cookie_addr = IDX2COOKIE(memseg->pages[i].index,
4721 			    pg_size_code, pg_shift);
4722 
4723 			D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke "
4724 			    "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id,
4725 			    cookie_addr, memseg->pages[i].mte->cookie);
4726 			rv = hv_ldc_revoke(ldcp->id, cookie_addr,
4727 			    memseg->pages[i].mte->cookie);
4728 			if (rv) {
4729 				DWARN(ldcp->id,
4730 				    "ldc_mem_unbind_handle: (0x%llx) cannot "
4731 				    "revoke mapping, cookie %llx\n", ldcp->id,
4732 				    cookie_addr);
4733 			}
4734 		}
4735 
4736 		/* clear the entry from the table */
4737 		memseg->pages[i].mte->entry.ll = 0;
4738 		mtbl->num_avail++;
4739 	}
4740 	mutex_exit(&mtbl->lock);
4741 
4742 	/* free the allocated memseg and page structures */
4743 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4744 	kmem_free(memseg->cookies,
4745 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4746 	kmem_cache_free(ldcssp->memseg_cache, memseg);
4747 
4748 	/* uninitialize the memory handle */
4749 	mhdl->memseg = NULL;
4750 	mhdl->status = LDC_UNBOUND;
4751 
4752 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4753 	    ldcp->id, mhdl);
4754 
4755 	mutex_exit(&mhdl->lock);
4756 	return (0);
4757 }
4758 
4759 /*
4760  * Get information about the dring. The base address of the descriptor
4761  * ring along with the type and permission are returned back.
4762  */
4763 int
4764 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4765 {
4766 	ldc_mhdl_t	*mhdl;
4767 
4768 	if (mhandle == NULL) {
4769 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4770 		return (EINVAL);
4771 	}
4772 	mhdl = (ldc_mhdl_t *)mhandle;
4773 
4774 	if (minfo == NULL) {
4775 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4776 		return (EINVAL);
4777 	}
4778 
4779 	mutex_enter(&mhdl->lock);
4780 
4781 	minfo->status = mhdl->status;
4782 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4783 		minfo->vaddr = mhdl->memseg->vaddr;
4784 		minfo->raddr = mhdl->memseg->raddr;
4785 		minfo->mtype = mhdl->mtype;
4786 		minfo->perm = mhdl->perm;
4787 	}
4788 	mutex_exit(&mhdl->lock);
4789 
4790 	return (0);
4791 }
4792 
4793 /*
4794  * Copy data either from or to the client specified virtual address
4795  * space to or from the exported memory associated with the cookies.
4796  * The direction argument determines whether the data is read from or
4797  * written to exported memory.
4798  */
4799 int
4800 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4801     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4802 {
4803 	ldc_chan_t 	*ldcp;
4804 	uint64_t	local_voff, local_valign;
4805 	uint64_t	cookie_addr, cookie_size;
4806 	uint64_t	pg_shift, pg_size, pg_size_code;
4807 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4808 	uint64_t	local_ra, local_poff, local_psize;
4809 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4810 	pgcnt_t		npages;
4811 	size_t		len = *size;
4812 	int 		i, rv = 0;
4813 
4814 	uint64_t	chid;
4815 
4816 	if (handle == NULL) {
4817 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4818 		return (EINVAL);
4819 	}
4820 	ldcp = (ldc_chan_t *)handle;
4821 	chid = ldcp->id;
4822 
4823 	/* check to see if channel is UP */
4824 	if (ldcp->tstate != TS_UP) {
4825 		DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4826 		    chid);
4827 		return (ECONNRESET);
4828 	}
4829 
4830 	/* Force address and size to be 8-byte aligned */
4831 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4832 		DWARN(chid,
4833 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4834 		return (EINVAL);
4835 	}
4836 
4837 	/* Find the size of the exported memory */
4838 	export_size = 0;
4839 	for (i = 0; i < ccount; i++)
4840 		export_size += cookies[i].size;
4841 
4842 	/* check to see if offset is valid */
4843 	if (off > export_size) {
4844 		DWARN(chid,
4845 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4846 		    chid);
4847 		return (EINVAL);
4848 	}
4849 
4850 	/*
4851 	 * Check to see if the export size is smaller than the size we
4852 	 * are requesting to copy - if so flag an error
4853 	 */
4854 	if ((export_size - off) < *size) {
4855 		DWARN(chid,
4856 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4857 		    chid);
4858 		return (EINVAL);
4859 	}
4860 
4861 	total_bal = min(export_size, *size);
4862 
4863 	/* FUTURE: get the page size, pgsz code, and shift */
4864 	pg_size = MMU_PAGESIZE;
4865 	pg_size_code = page_szc(pg_size);
4866 	pg_shift = page_get_shift(pg_size_code);
4867 
4868 	D1(chid, "ldc_mem_copy: copying data "
4869 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4870 	    chid, vaddr, pg_size, pg_size_code, pg_shift);
4871 
4872 	/* aligned VA and its offset */
4873 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4874 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4875 
4876 	npages = (len+local_voff)/pg_size;
4877 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4878 
4879 	D1(chid,
4880 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4881 	    chid, vaddr, local_valign, local_voff, npages);
4882 
4883 	local_ra = va_to_pa((void *)local_valign);
4884 	local_poff = local_voff;
4885 	local_psize = min(len, (pg_size - local_voff));
4886 
4887 	len -= local_psize;
4888 
4889 	/*
4890 	 * find the first cookie in the list of cookies
4891 	 * if the offset passed in is not zero
4892 	 */
4893 	for (idx = 0; idx < ccount; idx++) {
4894 		cookie_size = cookies[idx].size;
4895 		if (off < cookie_size)
4896 			break;
4897 		off -= cookie_size;
4898 	}
4899 
4900 	cookie_addr = cookies[idx].addr + off;
4901 	cookie_size = cookies[idx].size - off;
4902 
4903 	export_caddr = cookie_addr & ~(pg_size - 1);
4904 	export_poff = cookie_addr & (pg_size - 1);
4905 	export_psize = min(cookie_size, (pg_size - export_poff));
4906 
4907 	for (;;) {
4908 
4909 		copy_size = min(export_psize, local_psize);
4910 
4911 		D1(chid,
4912 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4913 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4914 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4915 		    " total_bal=0x%llx\n",
4916 		    chid, direction, export_caddr, local_ra, export_poff,
4917 		    local_poff, export_psize, local_psize, copy_size,
4918 		    total_bal);
4919 
4920 		rv = hv_ldc_copy(chid, direction,
4921 		    (export_caddr + export_poff), (local_ra + local_poff),
4922 		    copy_size, &copied_len);
4923 
4924 		if (rv != 0) {
4925 			int 		error = EIO;
4926 			uint64_t	rx_hd, rx_tl;
4927 
4928 			DWARN(chid,
4929 			    "ldc_mem_copy: (0x%llx) err %d during copy\n",
4930 			    (unsigned long long)chid, rv);
4931 			DWARN(chid,
4932 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, "
4933 			    "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
4934 			    " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
4935 			    " copied_len=0x%lx, total_bal=0x%lx\n",
4936 			    chid, direction, export_caddr, local_ra,
4937 			    export_poff, local_poff, export_psize, local_psize,
4938 			    copy_size, copied_len, total_bal);
4939 
4940 			*size = *size - total_bal;
4941 
4942 			/*
4943 			 * check if reason for copy error was due to
4944 			 * a channel reset. we need to grab the lock
4945 			 * just in case we have to do a reset.
4946 			 */
4947 			mutex_enter(&ldcp->lock);
4948 			mutex_enter(&ldcp->tx_lock);
4949 
4950 			rv = hv_ldc_rx_get_state(ldcp->id,
4951 			    &rx_hd, &rx_tl, &(ldcp->link_state));
4952 			if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4953 			    ldcp->link_state == LDC_CHANNEL_RESET) {
4954 				i_ldc_reset(ldcp, B_FALSE);
4955 				error = ECONNRESET;
4956 			}
4957 
4958 			mutex_exit(&ldcp->tx_lock);
4959 			mutex_exit(&ldcp->lock);
4960 
4961 			return (error);
4962 		}
4963 
4964 		ASSERT(copied_len <= copy_size);
4965 
4966 		D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len);
4967 		export_poff += copied_len;
4968 		local_poff += copied_len;
4969 		export_psize -= copied_len;
4970 		local_psize -= copied_len;
4971 		cookie_size -= copied_len;
4972 
4973 		total_bal -= copied_len;
4974 
4975 		if (copy_size != copied_len)
4976 			continue;
4977 
4978 		if (export_psize == 0 && total_bal != 0) {
4979 
4980 			if (cookie_size == 0) {
4981 				idx++;
4982 				cookie_addr = cookies[idx].addr;
4983 				cookie_size = cookies[idx].size;
4984 
4985 				export_caddr = cookie_addr & ~(pg_size - 1);
4986 				export_poff = cookie_addr & (pg_size - 1);
4987 				export_psize =
4988 					min(cookie_size, (pg_size-export_poff));
4989 			} else {
4990 				export_caddr += pg_size;
4991 				export_poff = 0;
4992 				export_psize = min(cookie_size, pg_size);
4993 			}
4994 		}
4995 
4996 		if (local_psize == 0 && total_bal != 0) {
4997 			local_valign += pg_size;
4998 			local_ra = va_to_pa((void *)local_valign);
4999 			local_poff = 0;
5000 			local_psize = min(pg_size, len);
5001 			len -= local_psize;
5002 		}
5003 
5004 		/* check if we are all done */
5005 		if (total_bal == 0)
5006 			break;
5007 	}
5008 
5009 
5010 	D1(chid,
5011 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
5012 	    chid, *size);
5013 
5014 	return (0);
5015 }
5016 
5017 /*
5018  * Copy data either from or to the client specified virtual address
5019  * space to or from HV physical memory.
5020  *
5021  * The direction argument determines whether the data is read from or
5022  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
5023  * to the ldc_mem_copy interface
5024  */
5025 int
5026 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size,
5027     caddr_t paddr, uint8_t direction)
5028 {
5029 	ldc_chan_t 	*ldcp;
5030 	uint64_t	local_voff, local_valign;
5031 	uint64_t	pg_shift, pg_size, pg_size_code;
5032 	uint64_t 	target_pa, target_poff, target_psize, target_size;
5033 	uint64_t	local_ra, local_poff, local_psize;
5034 	uint64_t	copy_size, copied_len = 0;
5035 	pgcnt_t		npages;
5036 	size_t		len = *size;
5037 	int 		rv = 0;
5038 
5039 	if (handle == NULL) {
5040 		DWARN(DBG_ALL_LDCS,
5041 		    "ldc_mem_rdwr_cookie: invalid channel handle\n");
5042 		return (EINVAL);
5043 	}
5044 	ldcp = (ldc_chan_t *)handle;
5045 
5046 	mutex_enter(&ldcp->lock);
5047 
5048 	/* check to see if channel is UP */
5049 	if (ldcp->tstate != TS_UP) {
5050 		DWARN(ldcp->id,
5051 		    "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n",
5052 		    ldcp->id);
5053 		mutex_exit(&ldcp->lock);
5054 		return (ECONNRESET);
5055 	}
5056 
5057 	/* Force address and size to be 8-byte aligned */
5058 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
5059 		DWARN(ldcp->id,
5060 		    "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n");
5061 		mutex_exit(&ldcp->lock);
5062 		return (EINVAL);
5063 	}
5064 
5065 	target_size = *size;
5066 
5067 	/* FUTURE: get the page size, pgsz code, and shift */
5068 	pg_size = MMU_PAGESIZE;
5069 	pg_size_code = page_szc(pg_size);
5070 	pg_shift = page_get_shift(pg_size_code);
5071 
5072 	D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data "
5073 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
5074 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
5075 
5076 	/* aligned VA and its offset */
5077 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
5078 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
5079 
5080 	npages = (len + local_voff) / pg_size;
5081 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
5082 
5083 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, "
5084 	    "val=0x%llx,off=0x%x,pgs=0x%x\n",
5085 	    ldcp->id, vaddr, local_valign, local_voff, npages);
5086 
5087 	local_ra = va_to_pa((void *)local_valign);
5088 	local_poff = local_voff;
5089 	local_psize = min(len, (pg_size - local_voff));
5090 
5091 	len -= local_psize;
5092 
5093 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
5094 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
5095 	target_psize = pg_size - target_poff;
5096 
5097 	for (;;) {
5098 
5099 		copy_size = min(target_psize, local_psize);
5100 
5101 		D1(ldcp->id,
5102 		    "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
5103 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
5104 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5105 		    " total_bal=0x%llx\n",
5106 		    ldcp->id, direction, target_pa, local_ra, target_poff,
5107 		    local_poff, target_psize, local_psize, copy_size,
5108 		    target_size);
5109 
5110 		rv = hv_ldc_copy(ldcp->id, direction,
5111 		    (target_pa + target_poff), (local_ra + local_poff),
5112 		    copy_size, &copied_len);
5113 
5114 		if (rv != 0) {
5115 			DWARN(DBG_ALL_LDCS,
5116 			    "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n",
5117 			    ldcp->id, rv);
5118 			DWARN(DBG_ALL_LDCS,
5119 			    "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, "
5120 			    "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, "
5121 			    "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, "
5122 			    "copy_sz=0x%llx, total_bal=0x%llx\n",
5123 			    ldcp->id, direction, target_pa, local_ra,
5124 			    target_poff, local_poff, target_psize, local_psize,
5125 			    copy_size, target_size);
5126 
5127 			*size = *size - target_size;
5128 			mutex_exit(&ldcp->lock);
5129 			return (i_ldc_h2v_error(rv));
5130 		}
5131 
5132 		D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n",
5133 		    copied_len);
5134 		target_poff += copied_len;
5135 		local_poff += copied_len;
5136 		target_psize -= copied_len;
5137 		local_psize -= copied_len;
5138 
5139 		target_size -= copied_len;
5140 
5141 		if (copy_size != copied_len)
5142 			continue;
5143 
5144 		if (target_psize == 0 && target_size != 0) {
5145 			target_pa += pg_size;
5146 			target_poff = 0;
5147 			target_psize = min(pg_size, target_size);
5148 		}
5149 
5150 		if (local_psize == 0 && target_size != 0) {
5151 			local_valign += pg_size;
5152 			local_ra = va_to_pa((void *)local_valign);
5153 			local_poff = 0;
5154 			local_psize = min(pg_size, len);
5155 			len -= local_psize;
5156 		}
5157 
5158 		/* check if we are all done */
5159 		if (target_size == 0)
5160 			break;
5161 	}
5162 
5163 	mutex_exit(&ldcp->lock);
5164 
5165 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n",
5166 	    ldcp->id, *size);
5167 
5168 	return (0);
5169 }
5170 
5171 /*
5172  * Map an exported memory segment into the local address space. If the
5173  * memory range was exported for direct map access, a HV call is made
5174  * to allocate a RA range. If the map is done via a shadow copy, local
5175  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
5176  * the mapping is a direct map then the RA is returned in 'raddr'.
5177  */
5178 int
5179 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
5180     uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr)
5181 {
5182 	int		i, j, idx, rv, retries;
5183 	ldc_chan_t 	*ldcp;
5184 	ldc_mhdl_t	*mhdl;
5185 	ldc_memseg_t	*memseg;
5186 	caddr_t		tmpaddr;
5187 	uint64_t	map_perm = perm;
5188 	uint64_t	pg_size, pg_shift, pg_size_code, pg_mask;
5189 	uint64_t	exp_size = 0, base_off, map_size, npages;
5190 	uint64_t	cookie_addr, cookie_off, cookie_size;
5191 	tte_t		ldc_tte;
5192 
5193 	if (mhandle == NULL) {
5194 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
5195 		return (EINVAL);
5196 	}
5197 	mhdl = (ldc_mhdl_t *)mhandle;
5198 
5199 	mutex_enter(&mhdl->lock);
5200 
5201 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
5202 	    mhdl->memseg != NULL) {
5203 		DWARN(DBG_ALL_LDCS,
5204 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
5205 		mutex_exit(&mhdl->lock);
5206 		return (EINVAL);
5207 	}
5208 
5209 	ldcp = mhdl->ldcp;
5210 
5211 	mutex_enter(&ldcp->lock);
5212 
5213 	if (ldcp->tstate != TS_UP) {
5214 		DWARN(ldcp->id,
5215 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
5216 		    ldcp->id);
5217 		mutex_exit(&ldcp->lock);
5218 		mutex_exit(&mhdl->lock);
5219 		return (ECONNRESET);
5220 	}
5221 
5222 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5223 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
5224 		mutex_exit(&ldcp->lock);
5225 		mutex_exit(&mhdl->lock);
5226 		return (EINVAL);
5227 	}
5228 
5229 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
5230 	    ldcp->id, cookie->addr, cookie->size);
5231 
5232 	/* FUTURE: get the page size, pgsz code, and shift */
5233 	pg_size = MMU_PAGESIZE;
5234 	pg_size_code = page_szc(pg_size);
5235 	pg_shift = page_get_shift(pg_size_code);
5236 	pg_mask = ~(pg_size - 1);
5237 
5238 	/* calculate the number of pages in the exported cookie */
5239 	base_off = cookie[0].addr & (pg_size - 1);
5240 	for (idx = 0; idx < ccount; idx++)
5241 		exp_size += cookie[idx].size;
5242 	map_size = P2ROUNDUP((exp_size + base_off), pg_size);
5243 	npages = (map_size >> pg_shift);
5244 
5245 	/* Allocate memseg structure */
5246 	memseg = mhdl->memseg =
5247 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
5248 
5249 	/* Allocate memory to store all pages and cookies */
5250 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
5251 	memseg->cookies =
5252 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
5253 
5254 	D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx,"
5255 	    "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages);
5256 
5257 	/*
5258 	 * Check if direct map over shared memory is enabled, if not change
5259 	 * the mapping type to SHADOW_MAP.
5260 	 */
5261 	if (ldc_shmem_enabled == 0)
5262 		mtype = LDC_SHADOW_MAP;
5263 
5264 	/*
5265 	 * Check to see if the client is requesting direct or shadow map
5266 	 * If direct map is requested, try to map remote memory first,
5267 	 * and if that fails, revert to shadow map
5268 	 */
5269 	if (mtype == LDC_DIRECT_MAP) {
5270 
5271 		/* Allocate kernel virtual space for mapping */
5272 		memseg->vaddr = vmem_xalloc(heap_arena, map_size,
5273 		    pg_size, 0, 0, NULL, NULL, VM_NOSLEEP);
5274 		if (memseg->vaddr == NULL) {
5275 			cmn_err(CE_WARN,
5276 			    "ldc_mem_map: (0x%lx) memory map failed\n",
5277 			    ldcp->id);
5278 			kmem_free(memseg->cookies,
5279 			    (sizeof (ldc_mem_cookie_t) * ccount));
5280 			kmem_free(memseg->pages,
5281 			    (sizeof (ldc_page_t) * npages));
5282 			kmem_cache_free(ldcssp->memseg_cache, memseg);
5283 
5284 			mutex_exit(&ldcp->lock);
5285 			mutex_exit(&mhdl->lock);
5286 			return (ENOMEM);
5287 		}
5288 
5289 		/* Unload previous mapping */
5290 		hat_unload(kas.a_hat, memseg->vaddr, map_size,
5291 		    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5292 
5293 		/* for each cookie passed in - map into address space */
5294 		idx = 0;
5295 		cookie_size = 0;
5296 		tmpaddr = memseg->vaddr;
5297 
5298 		for (i = 0; i < npages; i++) {
5299 
5300 			if (cookie_size == 0) {
5301 				ASSERT(idx < ccount);
5302 				cookie_addr = cookie[idx].addr & pg_mask;
5303 				cookie_off = cookie[idx].addr & (pg_size - 1);
5304 				cookie_size =
5305 				    P2ROUNDUP((cookie_off + cookie[idx].size),
5306 					pg_size);
5307 				idx++;
5308 			}
5309 
5310 			D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping "
5311 			    "cookie 0x%llx, bal=0x%llx\n", ldcp->id,
5312 			    cookie_addr, cookie_size);
5313 
5314 			/* map the cookie into address space */
5315 			for (retries = 0; retries < ldc_max_retries;
5316 			    retries++) {
5317 
5318 				rv = hv_ldc_mapin(ldcp->id, cookie_addr,
5319 				    &memseg->pages[i].raddr, &map_perm);
5320 				if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY)
5321 					break;
5322 
5323 				drv_usecwait(ldc_delay);
5324 			}
5325 
5326 			if (rv || memseg->pages[i].raddr == 0) {
5327 				DWARN(ldcp->id,
5328 				    "ldc_mem_map: (0x%llx) hv mapin err %d\n",
5329 				    ldcp->id, rv);
5330 
5331 				/* remove previous mapins */
5332 				hat_unload(kas.a_hat, memseg->vaddr, map_size,
5333 				    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5334 				for (j = 0; j < i; j++) {
5335 					rv = hv_ldc_unmap(
5336 							memseg->pages[j].raddr);
5337 					if (rv) {
5338 						DWARN(ldcp->id,
5339 						    "ldc_mem_map: (0x%llx) "
5340 						    "cannot unmap ra=0x%llx\n",
5341 					    ldcp->id,
5342 						    memseg->pages[j].raddr);
5343 					}
5344 				}
5345 
5346 				/* free kernel virtual space */
5347 				vmem_free(heap_arena, (void *)memseg->vaddr,
5348 				    map_size);
5349 
5350 				/* direct map failed - revert to shadow map */
5351 				mtype = LDC_SHADOW_MAP;
5352 				break;
5353 
5354 			} else {
5355 
5356 				D1(ldcp->id,
5357 				    "ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
5358 				    "0x%llx, cookie=0x%llx, perm=0x%llx\n",
5359 				    ldcp->id, tmpaddr, memseg->pages[i].raddr,
5360 				    cookie_addr, perm);
5361 
5362 				/*
5363 				 * NOTE: Calling hat_devload directly, causes it
5364 				 * to look for page_t using the pfn. Since this
5365 				 * addr is greater than the memlist, it treates
5366 				 * it as non-memory
5367 				 */
5368 				sfmmu_memtte(&ldc_tte,
5369 				    (pfn_t)(memseg->pages[i].raddr >> pg_shift),
5370 				    PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K);
5371 
5372 				D1(ldcp->id,
5373 				    "ldc_mem_map: (0x%llx) ra 0x%llx -> "
5374 				    "tte 0x%llx\n", ldcp->id,
5375 				    memseg->pages[i].raddr, ldc_tte);
5376 
5377 				sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr,
5378 				    NULL, HAT_LOAD_LOCK);
5379 
5380 				cookie_size -= pg_size;
5381 				cookie_addr += pg_size;
5382 				tmpaddr += pg_size;
5383 			}
5384 		}
5385 	}
5386 
5387 	if (mtype == LDC_SHADOW_MAP) {
5388 		if (*vaddr == NULL) {
5389 			memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP);
5390 			mhdl->myshadow = B_TRUE;
5391 
5392 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
5393 			    "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr);
5394 		} else {
5395 			/*
5396 			 * Use client supplied memory for memseg->vaddr
5397 			 * WARNING: assuming that client mem is >= exp_size
5398 			 */
5399 			memseg->vaddr = *vaddr;
5400 		}
5401 
5402 		/* Save all page and cookie information */
5403 		for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) {
5404 			memseg->pages[i].raddr = va_to_pa(tmpaddr);
5405 			memseg->pages[i].size = pg_size;
5406 			tmpaddr += pg_size;
5407 		}
5408 
5409 	}
5410 
5411 	/* save all cookies */
5412 	bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t));
5413 
5414 	/* update memseg_t */
5415 	memseg->raddr = memseg->pages[0].raddr;
5416 	memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size;
5417 	memseg->npages = npages;
5418 	memseg->ncookies = ccount;
5419 	memseg->next_cookie = 0;
5420 
5421 	/* memory handle = mapped */
5422 	mhdl->mtype = mtype;
5423 	mhdl->perm = perm;
5424 	mhdl->status = LDC_MAPPED;
5425 
5426 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5427 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5428 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5429 	    memseg->npages, memseg->ncookies);
5430 
5431 	if (mtype == LDC_SHADOW_MAP)
5432 		base_off = 0;
5433 	if (raddr)
5434 		*raddr = (caddr_t)(memseg->raddr | base_off);
5435 	if (vaddr)
5436 		*vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off);
5437 
5438 	mutex_exit(&ldcp->lock);
5439 	mutex_exit(&mhdl->lock);
5440 	return (0);
5441 }
5442 
5443 /*
5444  * Unmap a memory segment. Free shadow memory (if any).
5445  */
5446 int
5447 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5448 {
5449 	int		i, rv;
5450 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5451 	ldc_chan_t 	*ldcp;
5452 	ldc_memseg_t	*memseg;
5453 
5454 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5455 		DWARN(DBG_ALL_LDCS,
5456 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5457 		    mhandle);
5458 		return (EINVAL);
5459 	}
5460 
5461 	mutex_enter(&mhdl->lock);
5462 
5463 	ldcp = mhdl->ldcp;
5464 	memseg = mhdl->memseg;
5465 
5466 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5467 	    ldcp->id, mhdl);
5468 
5469 	/* if we allocated shadow memory - free it */
5470 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5471 		kmem_free(memseg->vaddr, memseg->size);
5472 	} else if (mhdl->mtype == LDC_DIRECT_MAP) {
5473 
5474 		/* unmap in the case of DIRECT_MAP */
5475 		hat_unload(kas.a_hat, memseg->vaddr, memseg->size,
5476 		    HAT_UNLOAD_UNLOCK);
5477 
5478 		for (i = 0; i < memseg->npages; i++) {
5479 			rv = hv_ldc_unmap(memseg->pages[i].raddr);
5480 			if (rv) {
5481 				cmn_err(CE_WARN,
5482 				    "ldc_mem_map: (0x%lx) hv unmap err %d\n",
5483 				    ldcp->id, rv);
5484 			}
5485 		}
5486 
5487 		vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size);
5488 	}
5489 
5490 	/* free the allocated memseg and page structures */
5491 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5492 	kmem_free(memseg->cookies,
5493 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5494 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5495 
5496 	/* uninitialize the memory handle */
5497 	mhdl->memseg = NULL;
5498 	mhdl->status = LDC_UNBOUND;
5499 
5500 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5501 	    ldcp->id, mhdl);
5502 
5503 	mutex_exit(&mhdl->lock);
5504 	return (0);
5505 }
5506 
5507 /*
5508  * Internal entry point for LDC mapped memory entry consistency
5509  * semantics. Acquire copies the contents of the remote memory
5510  * into the local shadow copy. The release operation copies the local
5511  * contents into the remote memory. The offset and size specify the
5512  * bounds for the memory range being synchronized.
5513  */
5514 static int
5515 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5516     uint64_t offset, size_t size)
5517 {
5518 	int 		err;
5519 	ldc_mhdl_t	*mhdl;
5520 	ldc_chan_t	*ldcp;
5521 	ldc_memseg_t	*memseg;
5522 	caddr_t		local_vaddr;
5523 	size_t		copy_size;
5524 
5525 	if (mhandle == NULL) {
5526 		DWARN(DBG_ALL_LDCS,
5527 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5528 		return (EINVAL);
5529 	}
5530 	mhdl = (ldc_mhdl_t *)mhandle;
5531 
5532 	mutex_enter(&mhdl->lock);
5533 
5534 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5535 		DWARN(DBG_ALL_LDCS,
5536 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5537 		mutex_exit(&mhdl->lock);
5538 		return (EINVAL);
5539 	}
5540 
5541 	/* do nothing for direct map */
5542 	if (mhdl->mtype == LDC_DIRECT_MAP) {
5543 		mutex_exit(&mhdl->lock);
5544 		return (0);
5545 	}
5546 
5547 	/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
5548 	if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) ||
5549 	    (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) {
5550 		mutex_exit(&mhdl->lock);
5551 		return (0);
5552 	}
5553 
5554 	if (offset >= mhdl->memseg->size ||
5555 	    (offset + size) > mhdl->memseg->size) {
5556 		DWARN(DBG_ALL_LDCS,
5557 		    "i_ldc_mem_acquire_release: memory out of range\n");
5558 		mutex_exit(&mhdl->lock);
5559 		return (EINVAL);
5560 	}
5561 
5562 	/* get the channel handle and memory segment */
5563 	ldcp = mhdl->ldcp;
5564 	memseg = mhdl->memseg;
5565 
5566 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5567 
5568 		local_vaddr = memseg->vaddr + offset;
5569 		copy_size = size;
5570 
5571 		/* copy to/from remote from/to local memory */
5572 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5573 		    &copy_size, memseg->cookies, memseg->ncookies,
5574 		    direction);
5575 		if (err || copy_size != size) {
5576 			DWARN(ldcp->id,
5577 			    "i_ldc_mem_acquire_release: copy failed\n");
5578 			mutex_exit(&mhdl->lock);
5579 			return (err);
5580 		}
5581 	}
5582 
5583 	mutex_exit(&mhdl->lock);
5584 
5585 	return (0);
5586 }
5587 
5588 /*
5589  * Ensure that the contents in the remote memory seg are consistent
5590  * with the contents if of local segment
5591  */
5592 int
5593 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5594 {
5595 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5596 }
5597 
5598 
5599 /*
5600  * Ensure that the contents in the local memory seg are consistent
5601  * with the contents if of remote segment
5602  */
5603 int
5604 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5605 {
5606 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5607 }
5608 
5609 /*
5610  * Allocate a descriptor ring. The size of each each descriptor
5611  * must be 8-byte aligned and the entire ring should be a multiple
5612  * of MMU_PAGESIZE.
5613  */
5614 int
5615 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5616 {
5617 	ldc_dring_t *dringp;
5618 	size_t size = (dsize * len);
5619 
5620 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5621 	    len, dsize);
5622 
5623 	if (dhandle == NULL) {
5624 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5625 		return (EINVAL);
5626 	}
5627 
5628 	if (len == 0) {
5629 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5630 		return (EINVAL);
5631 	}
5632 
5633 	/* descriptor size should be 8-byte aligned */
5634 	if (dsize == 0 || (dsize & 0x7)) {
5635 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5636 		return (EINVAL);
5637 	}
5638 
5639 	*dhandle = 0;
5640 
5641 	/* Allocate a desc ring structure */
5642 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5643 
5644 	/* Initialize dring */
5645 	dringp->length = len;
5646 	dringp->dsize = dsize;
5647 
5648 	/* round off to multiple of pagesize */
5649 	dringp->size = (size & MMU_PAGEMASK);
5650 	if (size & MMU_PAGEOFFSET)
5651 		dringp->size += MMU_PAGESIZE;
5652 
5653 	dringp->status = LDC_UNBOUND;
5654 
5655 	/* allocate descriptor ring memory */
5656 	dringp->base = kmem_zalloc(dringp->size, KM_SLEEP);
5657 
5658 	/* initialize the desc ring lock */
5659 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5660 
5661 	/* Add descriptor ring to the head of global list */
5662 	mutex_enter(&ldcssp->lock);
5663 	dringp->next = ldcssp->dring_list;
5664 	ldcssp->dring_list = dringp;
5665 	mutex_exit(&ldcssp->lock);
5666 
5667 	*dhandle = (ldc_dring_handle_t)dringp;
5668 
5669 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5670 
5671 	return (0);
5672 }
5673 
5674 
5675 /*
5676  * Destroy a descriptor ring.
5677  */
5678 int
5679 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5680 {
5681 	ldc_dring_t *dringp;
5682 	ldc_dring_t *tmp_dringp;
5683 
5684 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5685 
5686 	if (dhandle == NULL) {
5687 		DWARN(DBG_ALL_LDCS,
5688 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5689 		return (EINVAL);
5690 	}
5691 	dringp = (ldc_dring_t *)dhandle;
5692 
5693 	if (dringp->status == LDC_BOUND) {
5694 		DWARN(DBG_ALL_LDCS,
5695 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5696 		return (EACCES);
5697 	}
5698 
5699 	mutex_enter(&dringp->lock);
5700 	mutex_enter(&ldcssp->lock);
5701 
5702 	/* remove from linked list - if not bound */
5703 	tmp_dringp = ldcssp->dring_list;
5704 	if (tmp_dringp == dringp) {
5705 		ldcssp->dring_list = dringp->next;
5706 		dringp->next = NULL;
5707 
5708 	} else {
5709 		while (tmp_dringp != NULL) {
5710 			if (tmp_dringp->next == dringp) {
5711 				tmp_dringp->next = dringp->next;
5712 				dringp->next = NULL;
5713 				break;
5714 			}
5715 			tmp_dringp = tmp_dringp->next;
5716 		}
5717 		if (tmp_dringp == NULL) {
5718 			DWARN(DBG_ALL_LDCS,
5719 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5720 			mutex_exit(&ldcssp->lock);
5721 			mutex_exit(&dringp->lock);
5722 			return (EINVAL);
5723 		}
5724 	}
5725 
5726 	mutex_exit(&ldcssp->lock);
5727 
5728 	/* free the descriptor ring */
5729 	kmem_free(dringp->base, dringp->size);
5730 
5731 	mutex_exit(&dringp->lock);
5732 
5733 	/* destroy dring lock */
5734 	mutex_destroy(&dringp->lock);
5735 
5736 	/* free desc ring object */
5737 	kmem_free(dringp, sizeof (ldc_dring_t));
5738 
5739 	return (0);
5740 }
5741 
5742 /*
5743  * Bind a previously allocated dring to a channel. The channel should
5744  * be OPEN in order to bind the ring to the channel. Returns back a
5745  * descriptor ring cookie. The descriptor ring is exported for remote
5746  * access by the client at the other end of the channel. An entry for
5747  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5748  */
5749 int
5750 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5751     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5752 {
5753 	int		err;
5754 	ldc_chan_t 	*ldcp;
5755 	ldc_dring_t	*dringp;
5756 	ldc_mem_handle_t mhandle;
5757 
5758 	/* check to see if channel is initalized */
5759 	if (handle == NULL) {
5760 		DWARN(DBG_ALL_LDCS,
5761 		    "ldc_mem_dring_bind: invalid channel handle\n");
5762 		return (EINVAL);
5763 	}
5764 	ldcp = (ldc_chan_t *)handle;
5765 
5766 	if (dhandle == NULL) {
5767 		DWARN(DBG_ALL_LDCS,
5768 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5769 		return (EINVAL);
5770 	}
5771 	dringp = (ldc_dring_t *)dhandle;
5772 
5773 	if (cookie == NULL) {
5774 		DWARN(ldcp->id,
5775 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5776 		return (EINVAL);
5777 	}
5778 
5779 	mutex_enter(&dringp->lock);
5780 
5781 	if (dringp->status == LDC_BOUND) {
5782 		DWARN(DBG_ALL_LDCS,
5783 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5784 		    ldcp->id);
5785 		mutex_exit(&dringp->lock);
5786 		return (EINVAL);
5787 	}
5788 
5789 	if ((perm & LDC_MEM_RW) == 0) {
5790 		DWARN(DBG_ALL_LDCS,
5791 		    "ldc_mem_dring_bind: invalid permissions\n");
5792 		mutex_exit(&dringp->lock);
5793 		return (EINVAL);
5794 	}
5795 
5796 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5797 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5798 		mutex_exit(&dringp->lock);
5799 		return (EINVAL);
5800 	}
5801 
5802 	dringp->ldcp = ldcp;
5803 
5804 	/* create an memory handle */
5805 	err = ldc_mem_alloc_handle(handle, &mhandle);
5806 	if (err || mhandle == NULL) {
5807 		DWARN(DBG_ALL_LDCS,
5808 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5809 		    ldcp->id);
5810 		mutex_exit(&dringp->lock);
5811 		return (err);
5812 	}
5813 	dringp->mhdl = mhandle;
5814 
5815 	/* bind the descriptor ring to channel */
5816 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5817 	    mtype, perm, cookie, ccount);
5818 	if (err) {
5819 		DWARN(ldcp->id,
5820 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5821 		    ldcp->id);
5822 		mutex_exit(&dringp->lock);
5823 		return (err);
5824 	}
5825 
5826 	/*
5827 	 * For now return error if we get more than one cookie
5828 	 * FUTURE: Return multiple cookies ..
5829 	 */
5830 	if (*ccount > 1) {
5831 		(void) ldc_mem_unbind_handle(mhandle);
5832 		(void) ldc_mem_free_handle(mhandle);
5833 
5834 		dringp->ldcp = NULL;
5835 		dringp->mhdl = NULL;
5836 		*ccount = 0;
5837 
5838 		mutex_exit(&dringp->lock);
5839 		return (EAGAIN);
5840 	}
5841 
5842 	/* Add descriptor ring to channel's exported dring list */
5843 	mutex_enter(&ldcp->exp_dlist_lock);
5844 	dringp->ch_next = ldcp->exp_dring_list;
5845 	ldcp->exp_dring_list = dringp;
5846 	mutex_exit(&ldcp->exp_dlist_lock);
5847 
5848 	dringp->status = LDC_BOUND;
5849 
5850 	mutex_exit(&dringp->lock);
5851 
5852 	return (0);
5853 }
5854 
5855 /*
5856  * Return the next cookie associated with the specified dring handle
5857  */
5858 int
5859 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5860 {
5861 	int		rv = 0;
5862 	ldc_dring_t 	*dringp;
5863 	ldc_chan_t	*ldcp;
5864 
5865 	if (dhandle == NULL) {
5866 		DWARN(DBG_ALL_LDCS,
5867 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5868 		return (EINVAL);
5869 	}
5870 	dringp = (ldc_dring_t *)dhandle;
5871 	mutex_enter(&dringp->lock);
5872 
5873 	if (dringp->status != LDC_BOUND) {
5874 		DWARN(DBG_ALL_LDCS,
5875 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5876 		    "is not bound\n", dringp);
5877 		mutex_exit(&dringp->lock);
5878 		return (EINVAL);
5879 	}
5880 
5881 	ldcp = dringp->ldcp;
5882 
5883 	if (cookie == NULL) {
5884 		DWARN(ldcp->id,
5885 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5886 		    ldcp->id);
5887 		mutex_exit(&dringp->lock);
5888 		return (EINVAL);
5889 	}
5890 
5891 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5892 	mutex_exit(&dringp->lock);
5893 
5894 	return (rv);
5895 }
5896 /*
5897  * Unbind a previously bound dring from a channel.
5898  */
5899 int
5900 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5901 {
5902 	ldc_dring_t 	*dringp;
5903 	ldc_dring_t	*tmp_dringp;
5904 	ldc_chan_t	*ldcp;
5905 
5906 	if (dhandle == NULL) {
5907 		DWARN(DBG_ALL_LDCS,
5908 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5909 		return (EINVAL);
5910 	}
5911 	dringp = (ldc_dring_t *)dhandle;
5912 
5913 	mutex_enter(&dringp->lock);
5914 
5915 	if (dringp->status == LDC_UNBOUND) {
5916 		DWARN(DBG_ALL_LDCS,
5917 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5918 		    dringp);
5919 		mutex_exit(&dringp->lock);
5920 		return (EINVAL);
5921 	}
5922 	ldcp = dringp->ldcp;
5923 
5924 	mutex_enter(&ldcp->exp_dlist_lock);
5925 
5926 	tmp_dringp = ldcp->exp_dring_list;
5927 	if (tmp_dringp == dringp) {
5928 		ldcp->exp_dring_list = dringp->ch_next;
5929 		dringp->ch_next = NULL;
5930 
5931 	} else {
5932 		while (tmp_dringp != NULL) {
5933 			if (tmp_dringp->ch_next == dringp) {
5934 				tmp_dringp->ch_next = dringp->ch_next;
5935 				dringp->ch_next = NULL;
5936 				break;
5937 			}
5938 			tmp_dringp = tmp_dringp->ch_next;
5939 		}
5940 		if (tmp_dringp == NULL) {
5941 			DWARN(DBG_ALL_LDCS,
5942 			    "ldc_mem_dring_unbind: invalid descriptor\n");
5943 			mutex_exit(&ldcp->exp_dlist_lock);
5944 			mutex_exit(&dringp->lock);
5945 			return (EINVAL);
5946 		}
5947 	}
5948 
5949 	mutex_exit(&ldcp->exp_dlist_lock);
5950 
5951 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
5952 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5953 
5954 	dringp->ldcp = NULL;
5955 	dringp->mhdl = NULL;
5956 	dringp->status = LDC_UNBOUND;
5957 
5958 	mutex_exit(&dringp->lock);
5959 
5960 	return (0);
5961 }
5962 
5963 /*
5964  * Get information about the dring. The base address of the descriptor
5965  * ring along with the type and permission are returned back.
5966  */
5967 int
5968 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
5969 {
5970 	ldc_dring_t	*dringp;
5971 	int		rv;
5972 
5973 	if (dhandle == NULL) {
5974 		DWARN(DBG_ALL_LDCS,
5975 		    "ldc_mem_dring_info: invalid desc ring handle\n");
5976 		return (EINVAL);
5977 	}
5978 	dringp = (ldc_dring_t *)dhandle;
5979 
5980 	mutex_enter(&dringp->lock);
5981 
5982 	if (dringp->mhdl) {
5983 		rv = ldc_mem_info(dringp->mhdl, minfo);
5984 		if (rv) {
5985 			DWARN(DBG_ALL_LDCS,
5986 			    "ldc_mem_dring_info: error reading mem info\n");
5987 			mutex_exit(&dringp->lock);
5988 			return (rv);
5989 		}
5990 	} else {
5991 		minfo->vaddr = dringp->base;
5992 		minfo->raddr = NULL;
5993 		minfo->status = dringp->status;
5994 	}
5995 
5996 	mutex_exit(&dringp->lock);
5997 
5998 	return (0);
5999 }
6000 
6001 /*
6002  * Map an exported descriptor ring into the local address space. If the
6003  * descriptor ring was exported for direct map access, a HV call is made
6004  * to allocate a RA range. If the map is done via a shadow copy, local
6005  * shadow memory is allocated.
6006  */
6007 int
6008 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
6009     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
6010     ldc_dring_handle_t *dhandle)
6011 {
6012 	int		err;
6013 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
6014 	ldc_mem_handle_t mhandle;
6015 	ldc_dring_t	*dringp;
6016 	size_t		dring_size;
6017 
6018 	if (dhandle == NULL) {
6019 		DWARN(DBG_ALL_LDCS,
6020 		    "ldc_mem_dring_map: invalid dhandle\n");
6021 		return (EINVAL);
6022 	}
6023 
6024 	/* check to see if channel is initalized */
6025 	if (handle == NULL) {
6026 		DWARN(DBG_ALL_LDCS,
6027 		    "ldc_mem_dring_map: invalid channel handle\n");
6028 		return (EINVAL);
6029 	}
6030 	ldcp = (ldc_chan_t *)handle;
6031 
6032 	if (cookie == NULL) {
6033 		DWARN(ldcp->id,
6034 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
6035 		    ldcp->id);
6036 		return (EINVAL);
6037 	}
6038 
6039 	/* FUTURE: For now we support only one cookie per dring */
6040 	ASSERT(ccount == 1);
6041 
6042 	if (cookie->size < (dsize * len)) {
6043 		DWARN(ldcp->id,
6044 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
6045 		    ldcp->id);
6046 		return (EINVAL);
6047 	}
6048 
6049 	*dhandle = 0;
6050 
6051 	/* Allocate an dring structure */
6052 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
6053 
6054 	D1(ldcp->id,
6055 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
6056 	    mtype, len, dsize, cookie->addr, cookie->size);
6057 
6058 	/* Initialize dring */
6059 	dringp->length = len;
6060 	dringp->dsize = dsize;
6061 
6062 	/* round of to multiple of page size */
6063 	dring_size = len * dsize;
6064 	dringp->size = (dring_size & MMU_PAGEMASK);
6065 	if (dring_size & MMU_PAGEOFFSET)
6066 		dringp->size += MMU_PAGESIZE;
6067 
6068 	dringp->ldcp = ldcp;
6069 
6070 	/* create an memory handle */
6071 	err = ldc_mem_alloc_handle(handle, &mhandle);
6072 	if (err || mhandle == NULL) {
6073 		DWARN(DBG_ALL_LDCS,
6074 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
6075 		    err);
6076 		kmem_free(dringp, sizeof (ldc_dring_t));
6077 		return (ENOMEM);
6078 	}
6079 
6080 	dringp->mhdl = mhandle;
6081 	dringp->base = NULL;
6082 
6083 	/* map the dring into local memory */
6084 	err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW,
6085 	    &(dringp->base), NULL);
6086 	if (err || dringp->base == NULL) {
6087 		cmn_err(CE_WARN,
6088 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
6089 		(void) ldc_mem_free_handle(mhandle);
6090 		kmem_free(dringp, sizeof (ldc_dring_t));
6091 		return (ENOMEM);
6092 	}
6093 
6094 	/* initialize the desc ring lock */
6095 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
6096 
6097 	/* Add descriptor ring to channel's imported dring list */
6098 	mutex_enter(&ldcp->imp_dlist_lock);
6099 	dringp->ch_next = ldcp->imp_dring_list;
6100 	ldcp->imp_dring_list = dringp;
6101 	mutex_exit(&ldcp->imp_dlist_lock);
6102 
6103 	dringp->status = LDC_MAPPED;
6104 
6105 	*dhandle = (ldc_dring_handle_t)dringp;
6106 
6107 	return (0);
6108 }
6109 
6110 /*
6111  * Unmap a descriptor ring. Free shadow memory (if any).
6112  */
6113 int
6114 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
6115 {
6116 	ldc_dring_t 	*dringp;
6117 	ldc_dring_t	*tmp_dringp;
6118 	ldc_chan_t	*ldcp;
6119 
6120 	if (dhandle == NULL) {
6121 		DWARN(DBG_ALL_LDCS,
6122 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
6123 		return (EINVAL);
6124 	}
6125 	dringp = (ldc_dring_t *)dhandle;
6126 
6127 	if (dringp->status != LDC_MAPPED) {
6128 		DWARN(DBG_ALL_LDCS,
6129 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
6130 		return (EINVAL);
6131 	}
6132 
6133 	mutex_enter(&dringp->lock);
6134 
6135 	ldcp = dringp->ldcp;
6136 
6137 	mutex_enter(&ldcp->imp_dlist_lock);
6138 
6139 	/* find and unlink the desc ring from channel import list */
6140 	tmp_dringp = ldcp->imp_dring_list;
6141 	if (tmp_dringp == dringp) {
6142 		ldcp->imp_dring_list = dringp->ch_next;
6143 		dringp->ch_next = NULL;
6144 
6145 	} else {
6146 		while (tmp_dringp != NULL) {
6147 			if (tmp_dringp->ch_next == dringp) {
6148 				tmp_dringp->ch_next = dringp->ch_next;
6149 				dringp->ch_next = NULL;
6150 				break;
6151 			}
6152 			tmp_dringp = tmp_dringp->ch_next;
6153 		}
6154 		if (tmp_dringp == NULL) {
6155 			DWARN(DBG_ALL_LDCS,
6156 			    "ldc_mem_dring_unmap: invalid descriptor\n");
6157 			mutex_exit(&ldcp->imp_dlist_lock);
6158 			mutex_exit(&dringp->lock);
6159 			return (EINVAL);
6160 		}
6161 	}
6162 
6163 	mutex_exit(&ldcp->imp_dlist_lock);
6164 
6165 	/* do a LDC memory handle unmap and free */
6166 	(void) ldc_mem_unmap(dringp->mhdl);
6167 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6168 
6169 	dringp->status = 0;
6170 	dringp->ldcp = NULL;
6171 
6172 	mutex_exit(&dringp->lock);
6173 
6174 	/* destroy dring lock */
6175 	mutex_destroy(&dringp->lock);
6176 
6177 	/* free desc ring object */
6178 	kmem_free(dringp, sizeof (ldc_dring_t));
6179 
6180 	return (0);
6181 }
6182 
6183 /*
6184  * Internal entry point for descriptor ring access entry consistency
6185  * semantics. Acquire copies the contents of the remote descriptor ring
6186  * into the local shadow copy. The release operation copies the local
6187  * contents into the remote dring. The start and end locations specify
6188  * bounds for the entries being synchronized.
6189  */
6190 static int
6191 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
6192     uint8_t direction, uint64_t start, uint64_t end)
6193 {
6194 	int 			err;
6195 	ldc_dring_t		*dringp;
6196 	ldc_chan_t		*ldcp;
6197 	uint64_t		soff;
6198 	size_t			copy_size;
6199 
6200 	if (dhandle == NULL) {
6201 		DWARN(DBG_ALL_LDCS,
6202 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
6203 		return (EINVAL);
6204 	}
6205 	dringp = (ldc_dring_t *)dhandle;
6206 	mutex_enter(&dringp->lock);
6207 
6208 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
6209 		DWARN(DBG_ALL_LDCS,
6210 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
6211 		mutex_exit(&dringp->lock);
6212 		return (EINVAL);
6213 	}
6214 
6215 	if (start >= dringp->length || end >= dringp->length) {
6216 		DWARN(DBG_ALL_LDCS,
6217 		    "i_ldc_dring_acquire_release: index out of range\n");
6218 		mutex_exit(&dringp->lock);
6219 		return (EINVAL);
6220 	}
6221 
6222 	/* get the channel handle */
6223 	ldcp = dringp->ldcp;
6224 
6225 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
6226 		((dringp->length - start) * dringp->dsize);
6227 
6228 	/* Calculate the relative offset for the first desc */
6229 	soff = (start * dringp->dsize);
6230 
6231 	/* copy to/from remote from/to local memory */
6232 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
6233 	    soff, copy_size);
6234 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6235 	    direction, soff, copy_size);
6236 	if (err) {
6237 		DWARN(ldcp->id,
6238 		    "i_ldc_dring_acquire_release: copy failed\n");
6239 		mutex_exit(&dringp->lock);
6240 		return (err);
6241 	}
6242 
6243 	/* do the balance */
6244 	if (start > end) {
6245 		copy_size = ((end + 1) * dringp->dsize);
6246 		soff = 0;
6247 
6248 		/* copy to/from remote from/to local memory */
6249 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
6250 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
6251 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6252 		    direction, soff, copy_size);
6253 		if (err) {
6254 			DWARN(ldcp->id,
6255 			    "i_ldc_dring_acquire_release: copy failed\n");
6256 			mutex_exit(&dringp->lock);
6257 			return (err);
6258 		}
6259 	}
6260 
6261 	mutex_exit(&dringp->lock);
6262 
6263 	return (0);
6264 }
6265 
6266 /*
6267  * Ensure that the contents in the local dring are consistent
6268  * with the contents if of remote dring
6269  */
6270 int
6271 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6272 {
6273 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
6274 }
6275 
6276 /*
6277  * Ensure that the contents in the remote dring are consistent
6278  * with the contents if of local dring
6279  */
6280 int
6281 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6282 {
6283 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
6284 }
6285 
6286 
6287 /* ------------------------------------------------------------------------- */
6288