xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision 85025c032d701094e5f35de4f42ce66082924fc1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/mmu.h>
56 #include <sys/pte.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/hat_sfmmu.h>
60 #include <sys/vm_machparam.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/seg_kpm.h>
63 #include <sys/note.h>
64 #include <sys/ivintr.h>
65 #include <sys/hypervisor_api.h>
66 #include <sys/ldc.h>
67 #include <sys/ldc_impl.h>
68 #include <sys/cnex.h>
69 #include <sys/hsvc.h>
70 
71 /* Core internal functions */
72 static int i_ldc_h2v_error(int h_error);
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
76 static void i_ldc_reset_state(ldc_chan_t *ldcp);
77 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
78 
79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
80 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
81 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
82 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
83     uint8_t ctrlmsg);
84 
85 /* Interrupt handling functions */
86 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
87 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
88 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
89 
90 /* Read method functions */
91 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
92 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
93 	size_t *sizep);
94 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
95 	size_t *sizep);
96 
97 /* Write method functions */
98 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
99 	size_t *sizep);
100 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
101 	size_t *sizep);
102 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
103 	size_t *sizep);
104 
105 /* Pkt processing internal functions */
106 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
107 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
108 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
109 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
110 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
111 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
112 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
113 
114 /* Memory synchronization internal functions */
115 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
116     uint8_t direction, uint64_t offset, size_t size);
117 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
118     uint8_t direction, uint64_t start, uint64_t end);
119 
120 /* LDC Version */
121 static ldc_ver_t ldc_versions[] = { {1, 0} };
122 
123 /* number of supported versions */
124 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
125 
126 /* Module State Pointer */
127 static ldc_soft_state_t *ldcssp;
128 
129 static struct modldrv md = {
130 	&mod_miscops,			/* This is a misc module */
131 	"sun4v LDC module v%I%",	/* Name of the module */
132 };
133 
134 static struct modlinkage ml = {
135 	MODREV_1,
136 	&md,
137 	NULL
138 };
139 
140 static uint64_t ldc_sup_minor;		/* Supported minor number */
141 static hsvc_info_t ldc_hsvc = {
142 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
143 };
144 
145 /*
146  * LDC framework supports mapping remote domain's memory
147  * either directly or via shadow memory pages. Default
148  * support is currently implemented via shadow copy.
149  * Direct map can be enabled by setting 'ldc_shmem_enabled'
150  */
151 int ldc_shmem_enabled = 0;
152 
153 /*
154  * The no. of MTU size messages that can be stored in
155  * the LDC Tx queue. The number of Tx queue entries is
156  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
157  */
158 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
159 
160 /*
161  * The minimum queue length. This is the size of the smallest
162  * LDC queue. If the computed value is less than this default,
163  * the queue length is rounded up to 'ldc_queue_entries'.
164  */
165 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
166 
167 /*
168  * Pages exported for remote access over each channel is
169  * maintained in a table registered with the Hypervisor.
170  * The default number of entries in the table is set to
171  * 'ldc_mtbl_entries'.
172  */
173 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
174 
175 /*
176  * LDC retry count and delay - when the HV returns EWOULDBLOCK
177  * the operation is retried 'ldc_max_retries' times with a
178  * wait of 'ldc_delay' usecs between each retry.
179  */
180 int ldc_max_retries = LDC_MAX_RETRIES;
181 clock_t ldc_delay = LDC_DELAY;
182 
183 /*
184  * delay between each retry of channel unregistration in
185  * ldc_close(), to wait for pending interrupts to complete.
186  */
187 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
188 
189 #ifdef DEBUG
190 
191 /*
192  * Print debug messages
193  *
194  * set ldcdbg to 0x7 for enabling all msgs
195  * 0x4 - Warnings
196  * 0x2 - All debug messages
197  * 0x1 - Minimal debug messages
198  *
199  * set ldcdbgchan to the channel number you want to debug
200  * setting it to -1 prints debug messages for all channels
201  * NOTE: ldcdbgchan has no effect on error messages
202  */
203 
204 #define	DBG_ALL_LDCS -1
205 
206 int ldcdbg = 0x0;
207 int64_t ldcdbgchan = DBG_ALL_LDCS;
208 uint64_t ldc_inject_err_flag = 0;
209 
210 static void
211 ldcdebug(int64_t id, const char *fmt, ...)
212 {
213 	char buf[512];
214 	va_list ap;
215 
216 	/*
217 	 * Do not return if,
218 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
219 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
220 	 * debug channel = caller specified channel
221 	 */
222 	if ((id != DBG_ALL_LDCS) &&
223 	    (ldcdbgchan != DBG_ALL_LDCS) &&
224 	    (ldcdbgchan != id)) {
225 		return;
226 	}
227 
228 	va_start(ap, fmt);
229 	(void) vsprintf(buf, fmt, ap);
230 	va_end(ap);
231 
232 	cmn_err(CE_CONT, "?%s", buf);
233 }
234 
235 #define	LDC_ERR_RESET	0x1
236 #define	LDC_ERR_PKTLOSS	0x2
237 
238 static boolean_t
239 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
240 {
241 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
242 		return (B_FALSE);
243 
244 	if ((ldc_inject_err_flag & error) == 0)
245 		return (B_FALSE);
246 
247 	/* clear the injection state */
248 	ldc_inject_err_flag &= ~error;
249 
250 	return (B_TRUE);
251 }
252 
253 #define	D1		\
254 if (ldcdbg & 0x01)	\
255 	ldcdebug
256 
257 #define	D2		\
258 if (ldcdbg & 0x02)	\
259 	ldcdebug
260 
261 #define	DWARN		\
262 if (ldcdbg & 0x04)	\
263 	ldcdebug
264 
265 #define	DUMP_PAYLOAD(id, addr)						\
266 {									\
267 	char buf[65*3];							\
268 	int i;								\
269 	uint8_t *src = (uint8_t *)addr;					\
270 	for (i = 0; i < 64; i++, src++)					\
271 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
272 	(void) sprintf(&buf[i * 3], "|\n");				\
273 	D2((id), "payload: %s", buf);					\
274 }
275 
276 #define	DUMP_LDC_PKT(c, s, addr)					\
277 {									\
278 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
279 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
280 	if (msg->type == LDC_DATA) {                                    \
281 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
282 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
283 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
284 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
285 	    (msg->env & LDC_LEN_MASK));					\
286 	} else { 							\
287 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
288 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
289 	} 								\
290 }
291 
292 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
293 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
294 
295 #else
296 
297 #define	DBG_ALL_LDCS -1
298 
299 #define	D1
300 #define	D2
301 #define	DWARN
302 
303 #define	DUMP_PAYLOAD(id, addr)
304 #define	DUMP_LDC_PKT(c, s, addr)
305 
306 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
307 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
308 
309 #endif
310 
311 #define	ZERO_PKT(p)			\
312 	bzero((p), sizeof (ldc_msg_t));
313 
314 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
315 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
316 
317 
318 int
319 _init(void)
320 {
321 	int status;
322 
323 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
324 	if (status != 0) {
325 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
326 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
327 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
328 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
329 		return (-1);
330 	}
331 
332 	/* allocate soft state structure */
333 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
334 
335 	/* Link the module into the system */
336 	status = mod_install(&ml);
337 	if (status != 0) {
338 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
339 		return (status);
340 	}
341 
342 	/* Initialize the LDC state structure */
343 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
344 
345 	mutex_enter(&ldcssp->lock);
346 
347 	/* Create a cache for memory handles */
348 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
349 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
350 	if (ldcssp->memhdl_cache == NULL) {
351 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
352 		mutex_exit(&ldcssp->lock);
353 		return (-1);
354 	}
355 
356 	/* Create cache for memory segment structures */
357 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
358 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
359 	if (ldcssp->memseg_cache == NULL) {
360 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
361 		mutex_exit(&ldcssp->lock);
362 		return (-1);
363 	}
364 
365 
366 	ldcssp->channel_count = 0;
367 	ldcssp->channels_open = 0;
368 	ldcssp->chan_list = NULL;
369 	ldcssp->dring_list = NULL;
370 
371 	mutex_exit(&ldcssp->lock);
372 
373 	return (0);
374 }
375 
376 int
377 _info(struct modinfo *modinfop)
378 {
379 	/* Report status of the dynamically loadable driver module */
380 	return (mod_info(&ml, modinfop));
381 }
382 
383 int
384 _fini(void)
385 {
386 	int 		rv, status;
387 	ldc_chan_t 	*ldcp;
388 	ldc_dring_t 	*dringp;
389 	ldc_mem_info_t 	minfo;
390 
391 	/* Unlink the driver module from the system */
392 	status = mod_remove(&ml);
393 	if (status) {
394 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
395 		return (EIO);
396 	}
397 
398 	/* close and finalize channels */
399 	ldcp = ldcssp->chan_list;
400 	while (ldcp != NULL) {
401 		(void) ldc_close((ldc_handle_t)ldcp);
402 		(void) ldc_fini((ldc_handle_t)ldcp);
403 
404 		ldcp = ldcp->next;
405 	}
406 
407 	/* Free descriptor rings */
408 	dringp = ldcssp->dring_list;
409 	while (dringp != NULL) {
410 		dringp = dringp->next;
411 
412 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
413 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
414 			if (minfo.status == LDC_BOUND) {
415 				(void) ldc_mem_dring_unbind(
416 						(ldc_dring_handle_t)dringp);
417 			}
418 			if (minfo.status == LDC_MAPPED) {
419 				(void) ldc_mem_dring_unmap(
420 						(ldc_dring_handle_t)dringp);
421 			}
422 		}
423 
424 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
425 	}
426 	ldcssp->dring_list = NULL;
427 
428 	/* Destroy kmem caches */
429 	kmem_cache_destroy(ldcssp->memhdl_cache);
430 	kmem_cache_destroy(ldcssp->memseg_cache);
431 
432 	/*
433 	 * We have successfully "removed" the driver.
434 	 * Destroying soft states
435 	 */
436 	mutex_destroy(&ldcssp->lock);
437 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
438 
439 	(void) hsvc_unregister(&ldc_hsvc);
440 
441 	return (status);
442 }
443 
444 /* -------------------------------------------------------------------------- */
445 
446 /*
447  * LDC Link Layer Internal Functions
448  */
449 
450 /*
451  * Translate HV Errors to sun4v error codes
452  */
453 static int
454 i_ldc_h2v_error(int h_error)
455 {
456 	switch (h_error) {
457 
458 	case	H_EOK:
459 		return (0);
460 
461 	case	H_ENORADDR:
462 		return (EFAULT);
463 
464 	case	H_EBADPGSZ:
465 	case	H_EINVAL:
466 		return (EINVAL);
467 
468 	case	H_EWOULDBLOCK:
469 		return (EWOULDBLOCK);
470 
471 	case	H_ENOACCESS:
472 	case	H_ENOMAP:
473 		return (EACCES);
474 
475 	case	H_EIO:
476 	case	H_ECPUERROR:
477 		return (EIO);
478 
479 	case	H_ENOTSUPPORTED:
480 		return (ENOTSUP);
481 
482 	case 	H_ETOOMANY:
483 		return (ENOSPC);
484 
485 	case	H_ECHANNEL:
486 		return (ECHRNG);
487 	default:
488 		break;
489 	}
490 
491 	return (EIO);
492 }
493 
494 /*
495  * Reconfigure the transmit queue
496  */
497 static int
498 i_ldc_txq_reconf(ldc_chan_t *ldcp)
499 {
500 	int rv;
501 
502 	ASSERT(MUTEX_HELD(&ldcp->lock));
503 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
504 
505 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
506 	if (rv) {
507 		cmn_err(CE_WARN,
508 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
509 		return (EIO);
510 	}
511 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
512 	    &(ldcp->tx_tail), &(ldcp->link_state));
513 	if (rv) {
514 		cmn_err(CE_WARN,
515 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
516 		return (EIO);
517 	}
518 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
519 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
520 	    ldcp->link_state);
521 
522 	return (0);
523 }
524 
525 /*
526  * Reconfigure the receive queue
527  */
528 static int
529 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
530 {
531 	int rv;
532 	uint64_t rx_head, rx_tail;
533 
534 	ASSERT(MUTEX_HELD(&ldcp->lock));
535 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
536 	    &(ldcp->link_state));
537 	if (rv) {
538 		cmn_err(CE_WARN,
539 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
540 		    ldcp->id);
541 		return (EIO);
542 	}
543 
544 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
545 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
546 			ldcp->rx_q_entries);
547 		if (rv) {
548 			cmn_err(CE_WARN,
549 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
550 			    ldcp->id);
551 			return (EIO);
552 		}
553 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
554 		    ldcp->id);
555 	}
556 
557 	return (0);
558 }
559 
560 
561 /*
562  * Drain the contents of the receive queue
563  */
564 static int
565 i_ldc_rxq_drain(ldc_chan_t *ldcp)
566 {
567 	int rv;
568 	uint64_t rx_head, rx_tail;
569 
570 	ASSERT(MUTEX_HELD(&ldcp->lock));
571 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
572 	    &(ldcp->link_state));
573 	if (rv) {
574 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
575 		    ldcp->id);
576 		return (EIO);
577 	}
578 
579 	/* flush contents by setting the head = tail */
580 	return (i_ldc_set_rx_head(ldcp, rx_tail));
581 }
582 
583 
584 /*
585  * Reset LDC state structure and its contents
586  */
587 static void
588 i_ldc_reset_state(ldc_chan_t *ldcp)
589 {
590 	ASSERT(MUTEX_HELD(&ldcp->lock));
591 	ldcp->last_msg_snt = LDC_INIT_SEQID;
592 	ldcp->last_ack_rcd = 0;
593 	ldcp->last_msg_rcd = 0;
594 	ldcp->tx_ackd_head = ldcp->tx_head;
595 	ldcp->next_vidx = 0;
596 	ldcp->hstate = 0;
597 	ldcp->tstate = TS_OPEN;
598 	ldcp->status = LDC_OPEN;
599 
600 	if (ldcp->link_state == LDC_CHANNEL_UP ||
601 	    ldcp->link_state == LDC_CHANNEL_RESET) {
602 
603 		if (ldcp->mode == LDC_MODE_RAW) {
604 			ldcp->status = LDC_UP;
605 			ldcp->tstate = TS_UP;
606 		} else {
607 			ldcp->status = LDC_READY;
608 			ldcp->tstate |= TS_LINK_READY;
609 		}
610 	}
611 }
612 
613 /*
614  * Reset a LDC channel
615  */
616 static void
617 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
618 {
619 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
620 
621 	ASSERT(MUTEX_HELD(&ldcp->lock));
622 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
623 
624 	/* reconfig Tx and Rx queues */
625 	(void) i_ldc_txq_reconf(ldcp);
626 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
627 
628 	/* Clear Tx and Rx interrupts */
629 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
630 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
631 
632 	/* Reset channel state */
633 	i_ldc_reset_state(ldcp);
634 
635 	/* Mark channel in reset */
636 	ldcp->tstate |= TS_IN_RESET;
637 }
638 
639 
640 /*
641  * Clear pending interrupts
642  */
643 static void
644 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
645 {
646 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
647 
648 	ASSERT(MUTEX_HELD(&ldcp->lock));
649 	ASSERT(cinfo->dip != NULL);
650 
651 	switch (itype) {
652 	case CNEX_TX_INTR:
653 		/* check Tx interrupt */
654 		if (ldcp->tx_intr_state)
655 			ldcp->tx_intr_state = LDC_INTR_NONE;
656 		else
657 			return;
658 		break;
659 
660 	case CNEX_RX_INTR:
661 		/* check Rx interrupt */
662 		if (ldcp->rx_intr_state)
663 			ldcp->rx_intr_state = LDC_INTR_NONE;
664 		else
665 			return;
666 		break;
667 	}
668 
669 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
670 	D2(ldcp->id,
671 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
672 	    ldcp->id, itype);
673 }
674 
675 /*
676  * Set the receive queue head
677  * Resets connection and returns an error if it fails.
678  */
679 static int
680 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
681 {
682 	int 	rv;
683 	int 	retries;
684 
685 	ASSERT(MUTEX_HELD(&ldcp->lock));
686 	for (retries = 0; retries < ldc_max_retries; retries++) {
687 
688 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
689 			return (0);
690 
691 		if (rv != H_EWOULDBLOCK)
692 			break;
693 
694 		/* wait for ldc_delay usecs */
695 		drv_usecwait(ldc_delay);
696 	}
697 
698 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
699 		ldcp->id, head);
700 	mutex_enter(&ldcp->tx_lock);
701 	i_ldc_reset(ldcp, B_TRUE);
702 	mutex_exit(&ldcp->tx_lock);
703 
704 	return (ECONNRESET);
705 }
706 
707 
708 /*
709  * Returns the tx_tail to be used for transfer
710  * Re-reads the TX queue ptrs if and only if the
711  * the cached head and tail are equal (queue is full)
712  */
713 static int
714 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
715 {
716 	int 		rv;
717 	uint64_t 	current_head, new_tail;
718 
719 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
720 	/* Read the head and tail ptrs from HV */
721 	rv = hv_ldc_tx_get_state(ldcp->id,
722 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
723 	if (rv) {
724 		cmn_err(CE_WARN,
725 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
726 		    ldcp->id);
727 		return (EIO);
728 	}
729 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
730 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
731 		    ldcp->id);
732 		return (ECONNRESET);
733 	}
734 
735 	/* In reliable mode, check against last ACKd msg */
736 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
737 		ldcp->mode == LDC_MODE_STREAM)
738 		? ldcp->tx_ackd_head : ldcp->tx_head;
739 
740 	/* increment the tail */
741 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
742 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
743 
744 	if (new_tail == current_head) {
745 		DWARN(ldcp->id,
746 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
747 		    ldcp->id);
748 		return (EWOULDBLOCK);
749 	}
750 
751 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
752 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
753 
754 	*tail = ldcp->tx_tail;
755 	return (0);
756 }
757 
758 /*
759  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
760  * and retry ldc_max_retries times before returning an error.
761  * Returns 0, EWOULDBLOCK or EIO
762  */
763 static int
764 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
765 {
766 	int		rv, retval = EWOULDBLOCK;
767 	int 		retries;
768 
769 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
770 	for (retries = 0; retries < ldc_max_retries; retries++) {
771 
772 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
773 			retval = 0;
774 			break;
775 		}
776 		if (rv != H_EWOULDBLOCK) {
777 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
778 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
779 			retval = EIO;
780 			break;
781 		}
782 
783 		/* wait for ldc_delay usecs */
784 		drv_usecwait(ldc_delay);
785 	}
786 	return (retval);
787 }
788 
789 /*
790  * Send a LDC message
791  */
792 static int
793 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
794     uint8_t ctrlmsg)
795 {
796 	int		rv;
797 	ldc_msg_t 	*pkt;
798 	uint64_t	tx_tail;
799 	uint32_t	curr_seqid = ldcp->last_msg_snt;
800 
801 	/* Obtain Tx lock */
802 	mutex_enter(&ldcp->tx_lock);
803 
804 	/* get the current tail for the message */
805 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
806 	if (rv) {
807 		DWARN(ldcp->id,
808 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
809 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
810 		    ldcp->id, pkttype, subtype, ctrlmsg);
811 		mutex_exit(&ldcp->tx_lock);
812 		return (rv);
813 	}
814 
815 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
816 	ZERO_PKT(pkt);
817 
818 	/* Initialize the packet */
819 	pkt->type = pkttype;
820 	pkt->stype = subtype;
821 	pkt->ctrl = ctrlmsg;
822 
823 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
824 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
825 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
826 		curr_seqid++;
827 		if (ldcp->mode != LDC_MODE_RAW) {
828 			pkt->seqid = curr_seqid;
829 			pkt->ackid = ldcp->last_msg_rcd;
830 		}
831 	}
832 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
833 
834 	/* initiate the send by calling into HV and set the new tail */
835 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
836 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
837 
838 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
839 	if (rv) {
840 		DWARN(ldcp->id,
841 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
842 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
843 		    ldcp->id, pkttype, subtype, ctrlmsg);
844 		mutex_exit(&ldcp->tx_lock);
845 		return (EIO);
846 	}
847 
848 	ldcp->last_msg_snt = curr_seqid;
849 	ldcp->tx_tail = tx_tail;
850 
851 	mutex_exit(&ldcp->tx_lock);
852 	return (0);
853 }
854 
855 /*
856  * Checks if packet was received in right order
857  * in the case of a reliable link.
858  * Returns 0 if in order, else EIO
859  */
860 static int
861 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
862 {
863 	/* No seqid checking for RAW mode */
864 	if (ldcp->mode == LDC_MODE_RAW)
865 		return (0);
866 
867 	/* No seqid checking for version, RTS, RTR message */
868 	if (msg->ctrl == LDC_VER ||
869 	    msg->ctrl == LDC_RTS ||
870 	    msg->ctrl == LDC_RTR)
871 		return (0);
872 
873 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
874 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
875 		DWARN(ldcp->id,
876 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
877 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
878 		    (ldcp->last_msg_rcd + 1));
879 		return (EIO);
880 	}
881 
882 #ifdef DEBUG
883 	if (LDC_INJECT_PKTLOSS(ldcp)) {
884 		DWARN(ldcp->id,
885 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
886 		return (EIO);
887 	}
888 #endif
889 
890 	return (0);
891 }
892 
893 
894 /*
895  * Process an incoming version ctrl message
896  */
897 static int
898 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
899 {
900 	int 		rv = 0, idx = ldcp->next_vidx;
901 	ldc_msg_t 	*pkt;
902 	uint64_t	tx_tail;
903 	ldc_ver_t	*rcvd_ver;
904 
905 	/* get the received version */
906 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
907 
908 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
909 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
910 
911 	/* Obtain Tx lock */
912 	mutex_enter(&ldcp->tx_lock);
913 
914 	switch (msg->stype) {
915 	case LDC_INFO:
916 
917 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
918 			(void) i_ldc_txq_reconf(ldcp);
919 			i_ldc_reset_state(ldcp);
920 			mutex_exit(&ldcp->tx_lock);
921 			return (EAGAIN);
922 		}
923 
924 		/* get the current tail and pkt for the response */
925 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
926 		if (rv != 0) {
927 			DWARN(ldcp->id,
928 			    "i_ldc_process_VER: (0x%llx) err sending "
929 			    "version ACK/NACK\n", ldcp->id);
930 			i_ldc_reset(ldcp, B_TRUE);
931 			mutex_exit(&ldcp->tx_lock);
932 			return (ECONNRESET);
933 		}
934 
935 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
936 		ZERO_PKT(pkt);
937 
938 		/* initialize the packet */
939 		pkt->type = LDC_CTRL;
940 		pkt->ctrl = LDC_VER;
941 
942 		for (;;) {
943 
944 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
945 			    rcvd_ver->major, rcvd_ver->minor,
946 			    ldc_versions[idx].major, ldc_versions[idx].minor);
947 
948 			if (rcvd_ver->major == ldc_versions[idx].major) {
949 				/* major version match - ACK version */
950 				pkt->stype = LDC_ACK;
951 
952 				/*
953 				 * lower minor version to the one this endpt
954 				 * supports, if necessary
955 				 */
956 				if (rcvd_ver->minor > ldc_versions[idx].minor)
957 					rcvd_ver->minor =
958 						ldc_versions[idx].minor;
959 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
960 
961 				break;
962 			}
963 
964 			if (rcvd_ver->major > ldc_versions[idx].major) {
965 
966 				D1(ldcp->id, "i_ldc_process_VER: using next"
967 				    " lower idx=%d, v%u.%u\n", idx,
968 				    ldc_versions[idx].major,
969 				    ldc_versions[idx].minor);
970 
971 				/* nack with next lower version */
972 				pkt->stype = LDC_NACK;
973 				bcopy(&ldc_versions[idx], pkt->udata,
974 				    sizeof (ldc_versions[idx]));
975 				ldcp->next_vidx = idx;
976 				break;
977 			}
978 
979 			/* next major version */
980 			idx++;
981 
982 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
983 
984 			if (idx == LDC_NUM_VERS) {
985 				/* no version match - send NACK */
986 				pkt->stype = LDC_NACK;
987 				bzero(pkt->udata, sizeof (ldc_ver_t));
988 				ldcp->next_vidx = 0;
989 				break;
990 			}
991 		}
992 
993 		/* initiate the send by calling into HV and set the new tail */
994 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
995 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
996 
997 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
998 		if (rv == 0) {
999 			ldcp->tx_tail = tx_tail;
1000 			if (pkt->stype == LDC_ACK) {
1001 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1002 				    " version ACK\n", ldcp->id);
1003 				/* Save the ACK'd version */
1004 				ldcp->version.major = rcvd_ver->major;
1005 				ldcp->version.minor = rcvd_ver->minor;
1006 				ldcp->hstate |= TS_RCVD_VER;
1007 				ldcp->tstate |= TS_VER_DONE;
1008 				D1(DBG_ALL_LDCS,
1009 				    "(0x%llx) Sent ACK, "
1010 				    "Agreed on version v%u.%u\n",
1011 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1012 			}
1013 		} else {
1014 			DWARN(ldcp->id,
1015 			    "i_ldc_process_VER: (0x%llx) error sending "
1016 			    "ACK/NACK\n", ldcp->id);
1017 			i_ldc_reset(ldcp, B_TRUE);
1018 			mutex_exit(&ldcp->tx_lock);
1019 			return (ECONNRESET);
1020 		}
1021 
1022 		break;
1023 
1024 	case LDC_ACK:
1025 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1026 			if (ldcp->version.major != rcvd_ver->major ||
1027 				ldcp->version.minor != rcvd_ver->minor) {
1028 
1029 				/* mismatched version - reset connection */
1030 				DWARN(ldcp->id,
1031 					"i_ldc_process_VER: (0x%llx) recvd"
1032 					" ACK ver != sent ACK ver\n", ldcp->id);
1033 				i_ldc_reset(ldcp, B_TRUE);
1034 				mutex_exit(&ldcp->tx_lock);
1035 				return (ECONNRESET);
1036 			}
1037 		} else {
1038 			/* SUCCESS - we have agreed on a version */
1039 			ldcp->version.major = rcvd_ver->major;
1040 			ldcp->version.minor = rcvd_ver->minor;
1041 			ldcp->tstate |= TS_VER_DONE;
1042 		}
1043 
1044 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1045 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1046 
1047 		/* initiate RTS-RTR-RDX handshake */
1048 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1049 		if (rv) {
1050 			DWARN(ldcp->id,
1051 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1052 			    ldcp->id);
1053 			i_ldc_reset(ldcp, B_TRUE);
1054 			mutex_exit(&ldcp->tx_lock);
1055 			return (ECONNRESET);
1056 		}
1057 
1058 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1059 		ZERO_PKT(pkt);
1060 
1061 		pkt->type = LDC_CTRL;
1062 		pkt->stype = LDC_INFO;
1063 		pkt->ctrl = LDC_RTS;
1064 		pkt->env = ldcp->mode;
1065 		if (ldcp->mode != LDC_MODE_RAW)
1066 			pkt->seqid = LDC_INIT_SEQID;
1067 
1068 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1069 
1070 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1071 
1072 		/* initiate the send by calling into HV and set the new tail */
1073 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1074 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1075 
1076 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1077 		if (rv) {
1078 			D2(ldcp->id,
1079 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1080 			    ldcp->id);
1081 			i_ldc_reset(ldcp, B_TRUE);
1082 			mutex_exit(&ldcp->tx_lock);
1083 			return (ECONNRESET);
1084 		}
1085 
1086 		ldcp->tx_tail = tx_tail;
1087 		ldcp->hstate |= TS_SENT_RTS;
1088 
1089 		break;
1090 
1091 	case LDC_NACK:
1092 		/* check if version in NACK is zero */
1093 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1094 			/* version handshake failure */
1095 			DWARN(DBG_ALL_LDCS,
1096 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1097 			    ldcp->id);
1098 			i_ldc_reset(ldcp, B_TRUE);
1099 			mutex_exit(&ldcp->tx_lock);
1100 			return (ECONNRESET);
1101 		}
1102 
1103 		/* get the current tail and pkt for the response */
1104 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1105 		if (rv != 0) {
1106 			cmn_err(CE_NOTE,
1107 			    "i_ldc_process_VER: (0x%lx) err sending "
1108 			    "version ACK/NACK\n", ldcp->id);
1109 			i_ldc_reset(ldcp, B_TRUE);
1110 			mutex_exit(&ldcp->tx_lock);
1111 			return (ECONNRESET);
1112 		}
1113 
1114 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1115 		ZERO_PKT(pkt);
1116 
1117 		/* initialize the packet */
1118 		pkt->type = LDC_CTRL;
1119 		pkt->ctrl = LDC_VER;
1120 		pkt->stype = LDC_INFO;
1121 
1122 		/* check ver in NACK msg has a match */
1123 		for (;;) {
1124 			if (rcvd_ver->major == ldc_versions[idx].major) {
1125 				/*
1126 				 * major version match - resubmit request
1127 				 * if lower minor version to the one this endpt
1128 				 * supports, if necessary
1129 				 */
1130 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1131 					rcvd_ver->minor =
1132 						ldc_versions[idx].minor;
1133 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1134 				break;
1135 
1136 			}
1137 
1138 			if (rcvd_ver->major > ldc_versions[idx].major) {
1139 
1140 				D1(ldcp->id, "i_ldc_process_VER: using next"
1141 				    " lower idx=%d, v%u.%u\n", idx,
1142 				    ldc_versions[idx].major,
1143 				    ldc_versions[idx].minor);
1144 
1145 				/* send next lower version */
1146 				bcopy(&ldc_versions[idx], pkt->udata,
1147 				    sizeof (ldc_versions[idx]));
1148 				ldcp->next_vidx = idx;
1149 				break;
1150 			}
1151 
1152 			/* next version */
1153 			idx++;
1154 
1155 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1156 
1157 			if (idx == LDC_NUM_VERS) {
1158 				/* no version match - terminate */
1159 				ldcp->next_vidx = 0;
1160 				mutex_exit(&ldcp->tx_lock);
1161 				return (ECONNRESET);
1162 			}
1163 		}
1164 
1165 		/* initiate the send by calling into HV and set the new tail */
1166 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1167 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1168 
1169 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1170 		if (rv == 0) {
1171 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1172 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1173 			    ldc_versions[idx].minor);
1174 			ldcp->tx_tail = tx_tail;
1175 		} else {
1176 			cmn_err(CE_NOTE,
1177 			    "i_ldc_process_VER: (0x%lx) error sending version"
1178 			    "INFO\n", ldcp->id);
1179 			i_ldc_reset(ldcp, B_TRUE);
1180 			mutex_exit(&ldcp->tx_lock);
1181 			return (ECONNRESET);
1182 		}
1183 
1184 		break;
1185 	}
1186 
1187 	mutex_exit(&ldcp->tx_lock);
1188 	return (rv);
1189 }
1190 
1191 
1192 /*
1193  * Process an incoming RTS ctrl message
1194  */
1195 static int
1196 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1197 {
1198 	int 		rv = 0;
1199 	ldc_msg_t 	*pkt;
1200 	uint64_t	tx_tail;
1201 	boolean_t	sent_NACK = B_FALSE;
1202 
1203 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1204 
1205 	switch (msg->stype) {
1206 	case LDC_NACK:
1207 		DWARN(ldcp->id,
1208 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1209 		    ldcp->id);
1210 
1211 		/* Reset the channel -- as we cannot continue */
1212 		mutex_enter(&ldcp->tx_lock);
1213 		i_ldc_reset(ldcp, B_TRUE);
1214 		mutex_exit(&ldcp->tx_lock);
1215 		rv = ECONNRESET;
1216 		break;
1217 
1218 	case LDC_INFO:
1219 
1220 		/* check mode */
1221 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1222 			cmn_err(CE_NOTE,
1223 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1224 			    ldcp->id);
1225 			/*
1226 			 * send NACK in response to MODE message
1227 			 * get the current tail for the response
1228 			 */
1229 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1230 			if (rv) {
1231 				/* if cannot send NACK - reset channel */
1232 				mutex_enter(&ldcp->tx_lock);
1233 				i_ldc_reset(ldcp, B_TRUE);
1234 				mutex_exit(&ldcp->tx_lock);
1235 				rv = ECONNRESET;
1236 				break;
1237 			}
1238 			sent_NACK = B_TRUE;
1239 		}
1240 		break;
1241 	default:
1242 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1243 		    ldcp->id);
1244 		mutex_enter(&ldcp->tx_lock);
1245 		i_ldc_reset(ldcp, B_TRUE);
1246 		mutex_exit(&ldcp->tx_lock);
1247 		rv = ECONNRESET;
1248 		break;
1249 	}
1250 
1251 	/*
1252 	 * If either the connection was reset (when rv != 0) or
1253 	 * a NACK was sent, we return. In the case of a NACK
1254 	 * we dont want to consume the packet that came in but
1255 	 * not record that we received the RTS
1256 	 */
1257 	if (rv || sent_NACK)
1258 		return (rv);
1259 
1260 	/* record RTS received */
1261 	ldcp->hstate |= TS_RCVD_RTS;
1262 
1263 	/* store initial SEQID info */
1264 	ldcp->last_msg_snt = msg->seqid;
1265 
1266 	/* Obtain Tx lock */
1267 	mutex_enter(&ldcp->tx_lock);
1268 
1269 	/* get the current tail for the response */
1270 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1271 	if (rv != 0) {
1272 		cmn_err(CE_NOTE,
1273 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1274 		    ldcp->id);
1275 		i_ldc_reset(ldcp, B_TRUE);
1276 		mutex_exit(&ldcp->tx_lock);
1277 		return (ECONNRESET);
1278 	}
1279 
1280 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1281 	ZERO_PKT(pkt);
1282 
1283 	/* initialize the packet */
1284 	pkt->type = LDC_CTRL;
1285 	pkt->stype = LDC_INFO;
1286 	pkt->ctrl = LDC_RTR;
1287 	pkt->env = ldcp->mode;
1288 	if (ldcp->mode != LDC_MODE_RAW)
1289 		pkt->seqid = LDC_INIT_SEQID;
1290 
1291 	ldcp->last_msg_rcd = msg->seqid;
1292 
1293 	/* initiate the send by calling into HV and set the new tail */
1294 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1295 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1296 
1297 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1298 	if (rv == 0) {
1299 		D2(ldcp->id,
1300 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1301 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1302 
1303 		ldcp->tx_tail = tx_tail;
1304 		ldcp->hstate |= TS_SENT_RTR;
1305 
1306 	} else {
1307 		cmn_err(CE_NOTE,
1308 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1309 		    ldcp->id);
1310 		i_ldc_reset(ldcp, B_TRUE);
1311 		mutex_exit(&ldcp->tx_lock);
1312 		return (ECONNRESET);
1313 	}
1314 
1315 	mutex_exit(&ldcp->tx_lock);
1316 	return (0);
1317 }
1318 
1319 /*
1320  * Process an incoming RTR ctrl message
1321  */
1322 static int
1323 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1324 {
1325 	int 		rv = 0;
1326 	boolean_t	sent_NACK = B_FALSE;
1327 
1328 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1329 
1330 	switch (msg->stype) {
1331 	case LDC_NACK:
1332 		/* RTR NACK received */
1333 		DWARN(ldcp->id,
1334 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1335 		    ldcp->id);
1336 
1337 		/* Reset the channel -- as we cannot continue */
1338 		mutex_enter(&ldcp->tx_lock);
1339 		i_ldc_reset(ldcp, B_TRUE);
1340 		mutex_exit(&ldcp->tx_lock);
1341 		rv = ECONNRESET;
1342 
1343 		break;
1344 
1345 	case LDC_INFO:
1346 
1347 		/* check mode */
1348 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1349 			DWARN(ldcp->id,
1350 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1351 			    "expecting 0x%x, got 0x%x\n",
1352 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1353 			/*
1354 			 * send NACK in response to MODE message
1355 			 * get the current tail for the response
1356 			 */
1357 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1358 			if (rv) {
1359 				/* if cannot send NACK - reset channel */
1360 				mutex_enter(&ldcp->tx_lock);
1361 				i_ldc_reset(ldcp, B_TRUE);
1362 				mutex_exit(&ldcp->tx_lock);
1363 				rv = ECONNRESET;
1364 				break;
1365 			}
1366 			sent_NACK = B_TRUE;
1367 		}
1368 		break;
1369 
1370 	default:
1371 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1372 		    ldcp->id);
1373 
1374 		/* Reset the channel -- as we cannot continue */
1375 		mutex_enter(&ldcp->tx_lock);
1376 		i_ldc_reset(ldcp, B_TRUE);
1377 		mutex_exit(&ldcp->tx_lock);
1378 		rv = ECONNRESET;
1379 		break;
1380 	}
1381 
1382 	/*
1383 	 * If either the connection was reset (when rv != 0) or
1384 	 * a NACK was sent, we return. In the case of a NACK
1385 	 * we dont want to consume the packet that came in but
1386 	 * not record that we received the RTR
1387 	 */
1388 	if (rv || sent_NACK)
1389 		return (rv);
1390 
1391 	ldcp->last_msg_snt = msg->seqid;
1392 	ldcp->hstate |= TS_RCVD_RTR;
1393 
1394 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1395 	if (rv) {
1396 		cmn_err(CE_NOTE,
1397 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1398 		    ldcp->id);
1399 		mutex_enter(&ldcp->tx_lock);
1400 		i_ldc_reset(ldcp, B_TRUE);
1401 		mutex_exit(&ldcp->tx_lock);
1402 		return (ECONNRESET);
1403 	}
1404 	D2(ldcp->id,
1405 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1406 
1407 	ldcp->hstate |= TS_SENT_RDX;
1408 	ldcp->tstate |= TS_HSHAKE_DONE;
1409 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1410 		ldcp->status = LDC_UP;
1411 
1412 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1413 
1414 	return (0);
1415 }
1416 
1417 
1418 /*
1419  * Process an incoming RDX ctrl message
1420  */
1421 static int
1422 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1423 {
1424 	int	rv = 0;
1425 
1426 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1427 
1428 	switch (msg->stype) {
1429 	case LDC_NACK:
1430 		/* RDX NACK received */
1431 		DWARN(ldcp->id,
1432 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1433 		    ldcp->id);
1434 
1435 		/* Reset the channel -- as we cannot continue */
1436 		mutex_enter(&ldcp->tx_lock);
1437 		i_ldc_reset(ldcp, B_TRUE);
1438 		mutex_exit(&ldcp->tx_lock);
1439 		rv = ECONNRESET;
1440 
1441 		break;
1442 
1443 	case LDC_INFO:
1444 
1445 		/*
1446 		 * if channel is UP and a RDX received after data transmission
1447 		 * has commenced it is an error
1448 		 */
1449 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1450 			DWARN(DBG_ALL_LDCS,
1451 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1452 			    " - LDC reset\n", ldcp->id);
1453 			mutex_enter(&ldcp->tx_lock);
1454 			i_ldc_reset(ldcp, B_TRUE);
1455 			mutex_exit(&ldcp->tx_lock);
1456 			return (ECONNRESET);
1457 		}
1458 
1459 		ldcp->hstate |= TS_RCVD_RDX;
1460 		ldcp->tstate |= TS_HSHAKE_DONE;
1461 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1462 			ldcp->status = LDC_UP;
1463 
1464 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1465 		break;
1466 
1467 	default:
1468 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1469 		    ldcp->id);
1470 
1471 		/* Reset the channel -- as we cannot continue */
1472 		mutex_enter(&ldcp->tx_lock);
1473 		i_ldc_reset(ldcp, B_TRUE);
1474 		mutex_exit(&ldcp->tx_lock);
1475 		rv = ECONNRESET;
1476 		break;
1477 	}
1478 
1479 	return (rv);
1480 }
1481 
1482 /*
1483  * Process an incoming ACK for a data packet
1484  */
1485 static int
1486 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1487 {
1488 	int		rv;
1489 	uint64_t 	tx_head;
1490 	ldc_msg_t	*pkt;
1491 
1492 	/* Obtain Tx lock */
1493 	mutex_enter(&ldcp->tx_lock);
1494 
1495 	/*
1496 	 * Read the current Tx head and tail
1497 	 */
1498 	rv = hv_ldc_tx_get_state(ldcp->id,
1499 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1500 	if (rv != 0) {
1501 		cmn_err(CE_WARN,
1502 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1503 		    ldcp->id);
1504 
1505 		/* Reset the channel -- as we cannot continue */
1506 		i_ldc_reset(ldcp, B_TRUE);
1507 		mutex_exit(&ldcp->tx_lock);
1508 		return (ECONNRESET);
1509 	}
1510 
1511 	/*
1512 	 * loop from where the previous ACK location was to the
1513 	 * current head location. This is how far the HV has
1514 	 * actually send pkts. Pkts between head and tail are
1515 	 * yet to be sent by HV.
1516 	 */
1517 	tx_head = ldcp->tx_ackd_head;
1518 	for (;;) {
1519 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1520 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1521 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1522 
1523 		if (pkt->seqid == msg->ackid) {
1524 			D2(ldcp->id,
1525 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1526 			    ldcp->id);
1527 			ldcp->last_ack_rcd = msg->ackid;
1528 			ldcp->tx_ackd_head = tx_head;
1529 			break;
1530 		}
1531 		if (tx_head == ldcp->tx_head) {
1532 			/* could not find packet */
1533 			DWARN(ldcp->id,
1534 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1535 			    ldcp->id);
1536 
1537 			/* Reset the channel -- as we cannot continue */
1538 			i_ldc_reset(ldcp, B_TRUE);
1539 			mutex_exit(&ldcp->tx_lock);
1540 			return (ECONNRESET);
1541 		}
1542 	}
1543 
1544 	mutex_exit(&ldcp->tx_lock);
1545 	return (0);
1546 }
1547 
1548 /*
1549  * Process incoming control message
1550  * Return 0 - session can continue
1551  *        EAGAIN - reprocess packet - state was changed
1552  *	  ECONNRESET - channel was reset
1553  */
1554 static int
1555 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1556 {
1557 	int 		rv = 0;
1558 
1559 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1560 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1561 
1562 	switch (ldcp->tstate & ~TS_IN_RESET) {
1563 
1564 	case TS_OPEN:
1565 	case TS_READY:
1566 
1567 		switch (msg->ctrl & LDC_CTRL_MASK) {
1568 		case LDC_VER:
1569 			/* process version message */
1570 			rv = i_ldc_process_VER(ldcp, msg);
1571 			break;
1572 		default:
1573 			DWARN(ldcp->id,
1574 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1575 			    "tstate=0x%x\n", ldcp->id,
1576 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1577 			break;
1578 		}
1579 
1580 		break;
1581 
1582 	case TS_VREADY:
1583 
1584 		switch (msg->ctrl & LDC_CTRL_MASK) {
1585 		case LDC_VER:
1586 			/* process version message */
1587 			rv = i_ldc_process_VER(ldcp, msg);
1588 			break;
1589 		case LDC_RTS:
1590 			/* process RTS message */
1591 			rv = i_ldc_process_RTS(ldcp, msg);
1592 			break;
1593 		case LDC_RTR:
1594 			/* process RTR message */
1595 			rv = i_ldc_process_RTR(ldcp, msg);
1596 			break;
1597 		case LDC_RDX:
1598 			/* process RDX message */
1599 			rv = i_ldc_process_RDX(ldcp, msg);
1600 			break;
1601 		default:
1602 			DWARN(ldcp->id,
1603 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1604 			    "tstate=0x%x\n", ldcp->id,
1605 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1606 			break;
1607 		}
1608 
1609 		break;
1610 
1611 	case TS_UP:
1612 
1613 		switch (msg->ctrl & LDC_CTRL_MASK) {
1614 		case LDC_VER:
1615 			DWARN(ldcp->id,
1616 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1617 			    "- LDC reset\n", ldcp->id);
1618 			/* peer is redoing version negotiation */
1619 			mutex_enter(&ldcp->tx_lock);
1620 			(void) i_ldc_txq_reconf(ldcp);
1621 			i_ldc_reset_state(ldcp);
1622 			mutex_exit(&ldcp->tx_lock);
1623 			rv = EAGAIN;
1624 			break;
1625 
1626 		case LDC_RDX:
1627 			/* process RDX message */
1628 			rv = i_ldc_process_RDX(ldcp, msg);
1629 			break;
1630 
1631 		default:
1632 			DWARN(ldcp->id,
1633 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1634 			    "tstate=0x%x\n", ldcp->id,
1635 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1636 			break;
1637 		}
1638 	}
1639 
1640 	return (rv);
1641 }
1642 
1643 /*
1644  * Register channel with the channel nexus
1645  */
1646 static int
1647 i_ldc_register_channel(ldc_chan_t *ldcp)
1648 {
1649 	int		rv = 0;
1650 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1651 
1652 	if (cinfo->dip == NULL) {
1653 		DWARN(ldcp->id,
1654 		    "i_ldc_register_channel: cnex has not registered\n");
1655 		return (EAGAIN);
1656 	}
1657 
1658 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1659 	if (rv) {
1660 		DWARN(ldcp->id,
1661 		    "i_ldc_register_channel: cannot register channel\n");
1662 		return (rv);
1663 	}
1664 
1665 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1666 	    i_ldc_tx_hdlr, ldcp, NULL);
1667 	if (rv) {
1668 		DWARN(ldcp->id,
1669 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1670 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1671 		return (rv);
1672 	}
1673 
1674 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1675 	    i_ldc_rx_hdlr, ldcp, NULL);
1676 	if (rv) {
1677 		DWARN(ldcp->id,
1678 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1679 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1680 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1681 		return (rv);
1682 	}
1683 
1684 	ldcp->tstate |= TS_CNEX_RDY;
1685 
1686 	return (0);
1687 }
1688 
1689 /*
1690  * Unregister a channel with the channel nexus
1691  */
1692 static int
1693 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1694 {
1695 	int		rv = 0;
1696 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1697 
1698 	if (cinfo->dip == NULL) {
1699 		DWARN(ldcp->id,
1700 		    "i_ldc_unregister_channel: cnex has not registered\n");
1701 		return (EAGAIN);
1702 	}
1703 
1704 	if (ldcp->tstate & TS_CNEX_RDY) {
1705 
1706 		/* Remove the Rx interrupt */
1707 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1708 		if (rv) {
1709 			if (rv != EAGAIN) {
1710 				DWARN(ldcp->id,
1711 				    "i_ldc_unregister_channel: err removing "
1712 				    "Rx intr\n");
1713 				return (rv);
1714 			}
1715 
1716 			/*
1717 			 * If interrupts are pending and handler has
1718 			 * finished running, clear interrupt and try
1719 			 * again
1720 			 */
1721 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1722 				return (rv);
1723 
1724 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1725 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1726 			    CNEX_RX_INTR);
1727 			if (rv) {
1728 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1729 				    "err removing Rx interrupt\n");
1730 				return (rv);
1731 			}
1732 		}
1733 
1734 		/* Remove the Tx interrupt */
1735 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1736 		if (rv) {
1737 			DWARN(ldcp->id,
1738 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1739 			return (rv);
1740 		}
1741 
1742 		/* Unregister the channel */
1743 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1744 		if (rv) {
1745 			DWARN(ldcp->id,
1746 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1747 			return (rv);
1748 		}
1749 
1750 		ldcp->tstate &= ~TS_CNEX_RDY;
1751 	}
1752 
1753 	return (0);
1754 }
1755 
1756 
1757 /*
1758  * LDC transmit interrupt handler
1759  *    triggered for chanel up/down/reset events
1760  *    and Tx queue content changes
1761  */
1762 static uint_t
1763 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1764 {
1765 	_NOTE(ARGUNUSED(arg2))
1766 
1767 	int 		rv;
1768 	ldc_chan_t 	*ldcp;
1769 	boolean_t 	notify_client = B_FALSE;
1770 	uint64_t	notify_event = 0, link_state;
1771 
1772 	/* Get the channel for which interrupt was received */
1773 	ASSERT(arg1 != NULL);
1774 	ldcp = (ldc_chan_t *)arg1;
1775 
1776 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1777 	    ldcp->id, ldcp);
1778 
1779 	/* Lock channel */
1780 	mutex_enter(&ldcp->lock);
1781 
1782 	/* Obtain Tx lock */
1783 	mutex_enter(&ldcp->tx_lock);
1784 
1785 	/* mark interrupt as pending */
1786 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
1787 
1788 	/* save current link state */
1789 	link_state = ldcp->link_state;
1790 
1791 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1792 	    &ldcp->link_state);
1793 	if (rv) {
1794 		cmn_err(CE_WARN,
1795 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1796 		    ldcp->id, rv);
1797 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1798 		mutex_exit(&ldcp->tx_lock);
1799 		mutex_exit(&ldcp->lock);
1800 		return (DDI_INTR_CLAIMED);
1801 	}
1802 
1803 	/*
1804 	 * reset the channel state if the channel went down
1805 	 * (other side unconfigured queue) or channel was reset
1806 	 * (other side reconfigured its queue)
1807 	 */
1808 	if (link_state != ldcp->link_state &&
1809 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
1810 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1811 		i_ldc_reset(ldcp, B_FALSE);
1812 		notify_client = B_TRUE;
1813 		notify_event = LDC_EVT_DOWN;
1814 	}
1815 
1816 	if (link_state != ldcp->link_state &&
1817 	    ldcp->link_state == LDC_CHANNEL_RESET) {
1818 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1819 		i_ldc_reset(ldcp, B_FALSE);
1820 		notify_client = B_TRUE;
1821 		notify_event = LDC_EVT_RESET;
1822 	}
1823 
1824 	if (link_state != ldcp->link_state &&
1825 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
1826 	    ldcp->link_state == LDC_CHANNEL_UP) {
1827 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1828 		notify_client = B_TRUE;
1829 		notify_event = LDC_EVT_RESET;
1830 		ldcp->tstate |= TS_LINK_READY;
1831 		ldcp->status = LDC_READY;
1832 	}
1833 
1834 	/* if callbacks are disabled, do not notify */
1835 	if (!ldcp->cb_enabled)
1836 		notify_client = B_FALSE;
1837 
1838 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1839 
1840 	if (notify_client) {
1841 		ldcp->cb_inprogress = B_TRUE;
1842 		mutex_exit(&ldcp->tx_lock);
1843 		mutex_exit(&ldcp->lock);
1844 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1845 		if (rv) {
1846 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1847 			    "failure", ldcp->id);
1848 		}
1849 		mutex_enter(&ldcp->lock);
1850 		ldcp->cb_inprogress = B_FALSE;
1851 	}
1852 
1853 	mutex_exit(&ldcp->lock);
1854 
1855 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1856 
1857 	return (DDI_INTR_CLAIMED);
1858 }
1859 
1860 /*
1861  * LDC receive interrupt handler
1862  *    triggered for channel with data pending to read
1863  *    i.e. Rx queue content changes
1864  */
1865 static uint_t
1866 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1867 {
1868 	_NOTE(ARGUNUSED(arg2))
1869 
1870 	int		rv;
1871 	uint64_t 	rx_head, rx_tail;
1872 	ldc_msg_t 	*msg;
1873 	ldc_chan_t 	*ldcp;
1874 	boolean_t 	notify_client = B_FALSE;
1875 	uint64_t	notify_event = 0;
1876 	uint64_t	link_state, first_fragment = 0;
1877 
1878 
1879 	/* Get the channel for which interrupt was received */
1880 	if (arg1 == NULL) {
1881 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1882 		return (DDI_INTR_UNCLAIMED);
1883 	}
1884 
1885 	ldcp = (ldc_chan_t *)arg1;
1886 
1887 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1888 	    ldcp->id, ldcp);
1889 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1890 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1891 	    ldcp->link_state);
1892 
1893 	/* Lock channel */
1894 	mutex_enter(&ldcp->lock);
1895 
1896 	/* mark interrupt as pending */
1897 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1898 
1899 	/*
1900 	 * Read packet(s) from the queue
1901 	 */
1902 	for (;;) {
1903 
1904 		link_state = ldcp->link_state;
1905 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1906 		    &ldcp->link_state);
1907 		if (rv) {
1908 			cmn_err(CE_WARN,
1909 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1910 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1911 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1912 			mutex_exit(&ldcp->lock);
1913 			return (DDI_INTR_CLAIMED);
1914 		}
1915 
1916 		/*
1917 		 * reset the channel state if the channel went down
1918 		 * (other side unconfigured queue) or channel was reset
1919 		 * (other side reconfigured its queue)
1920 		 */
1921 
1922 		if (link_state != ldcp->link_state) {
1923 
1924 			switch (ldcp->link_state) {
1925 			case LDC_CHANNEL_DOWN:
1926 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1927 				    "link down\n", ldcp->id);
1928 				mutex_enter(&ldcp->tx_lock);
1929 				i_ldc_reset(ldcp, B_FALSE);
1930 				mutex_exit(&ldcp->tx_lock);
1931 				notify_client = B_TRUE;
1932 				notify_event = LDC_EVT_DOWN;
1933 				goto loop_exit;
1934 
1935 			case LDC_CHANNEL_UP:
1936 				D1(ldcp->id, "i_ldc_rx_hdlr: "
1937 				    "channel link up\n", ldcp->id);
1938 
1939 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
1940 					notify_client = B_TRUE;
1941 					notify_event = LDC_EVT_RESET;
1942 					ldcp->tstate |= TS_LINK_READY;
1943 					ldcp->status = LDC_READY;
1944 				}
1945 				break;
1946 
1947 			case LDC_CHANNEL_RESET:
1948 			default:
1949 #ifdef DEBUG
1950 force_reset:
1951 #endif
1952 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1953 				    "link reset\n", ldcp->id);
1954 				mutex_enter(&ldcp->tx_lock);
1955 				i_ldc_reset(ldcp, B_FALSE);
1956 				mutex_exit(&ldcp->tx_lock);
1957 				notify_client = B_TRUE;
1958 				notify_event = LDC_EVT_RESET;
1959 				break;
1960 			}
1961 		}
1962 
1963 #ifdef DEBUG
1964 		if (LDC_INJECT_RESET(ldcp))
1965 			goto force_reset;
1966 #endif
1967 
1968 		if (rx_head == rx_tail) {
1969 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1970 			    ldcp->id);
1971 			break;
1972 		}
1973 
1974 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1975 		    rx_head, rx_tail);
1976 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1977 		    ldcp->rx_q_va + rx_head);
1978 
1979 		/* get the message */
1980 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1981 
1982 		/* if channel is in RAW mode or data pkt, notify and return */
1983 		if (ldcp->mode == LDC_MODE_RAW) {
1984 			notify_client = B_TRUE;
1985 			notify_event |= LDC_EVT_READ;
1986 			break;
1987 		}
1988 
1989 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1990 
1991 			/* discard packet if channel is not up */
1992 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
1993 
1994 				/* move the head one position */
1995 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1996 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1997 
1998 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1999 					break;
2000 
2001 				continue;
2002 			} else {
2003 				if ((ldcp->tstate & TS_IN_RESET) == 0)
2004 					notify_client = B_TRUE;
2005 				notify_event |= LDC_EVT_READ;
2006 				break;
2007 			}
2008 		}
2009 
2010 		/* Check the sequence ID for the message received */
2011 		rv = i_ldc_check_seqid(ldcp, msg);
2012 		if (rv != 0) {
2013 
2014 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
2015 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
2016 
2017 			/* Reset last_msg_rcd to start of message */
2018 			if (first_fragment != 0) {
2019 				ldcp->last_msg_rcd = first_fragment - 1;
2020 				first_fragment = 0;
2021 			}
2022 
2023 			/*
2024 			 * Send a NACK due to seqid mismatch
2025 			 */
2026 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
2027 			    (msg->ctrl & LDC_CTRL_MASK));
2028 
2029 			if (rv) {
2030 				cmn_err(CE_NOTE,
2031 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
2032 				    "CTRL/NACK msg\n", ldcp->id);
2033 
2034 				/* if cannot send NACK - reset channel */
2035 				mutex_enter(&ldcp->tx_lock);
2036 				i_ldc_reset(ldcp, B_TRUE);
2037 				mutex_exit(&ldcp->tx_lock);
2038 
2039 				notify_client = B_TRUE;
2040 				notify_event = LDC_EVT_RESET;
2041 				break;
2042 			}
2043 
2044 			/* purge receive queue */
2045 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2046 			break;
2047 		}
2048 
2049 		/* record the message ID */
2050 		ldcp->last_msg_rcd = msg->seqid;
2051 
2052 		/* process control messages */
2053 		if (msg->type & LDC_CTRL) {
2054 			/* save current internal state */
2055 			uint64_t tstate = ldcp->tstate;
2056 
2057 			rv = i_ldc_ctrlmsg(ldcp, msg);
2058 			if (rv == EAGAIN) {
2059 				/* re-process pkt - state was adjusted */
2060 				continue;
2061 			}
2062 			if (rv == ECONNRESET) {
2063 				notify_client = B_TRUE;
2064 				notify_event = LDC_EVT_RESET;
2065 				break;
2066 			}
2067 
2068 			/*
2069 			 * control message processing was successful
2070 			 * channel transitioned to ready for communication
2071 			 */
2072 			if (rv == 0 && ldcp->tstate == TS_UP &&
2073 			    (tstate & ~TS_IN_RESET) !=
2074 			    (ldcp->tstate & ~TS_IN_RESET)) {
2075 				notify_client = B_TRUE;
2076 				notify_event = LDC_EVT_UP;
2077 			}
2078 		}
2079 
2080 		/* process data NACKs */
2081 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2082 			DWARN(ldcp->id,
2083 			    "i_ldc_rx_hdlr: (0x%llx) received DATA/NACK",
2084 			    ldcp->id);
2085 			mutex_enter(&ldcp->tx_lock);
2086 			i_ldc_reset(ldcp, B_TRUE);
2087 			mutex_exit(&ldcp->tx_lock);
2088 			notify_client = B_TRUE;
2089 			notify_event = LDC_EVT_RESET;
2090 			break;
2091 		}
2092 
2093 		/* process data ACKs */
2094 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2095 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2096 				notify_client = B_TRUE;
2097 				notify_event = LDC_EVT_RESET;
2098 				break;
2099 			}
2100 		}
2101 
2102 		/* move the head one position */
2103 		rx_head = (rx_head + LDC_PACKET_SIZE) %
2104 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2105 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2106 			notify_client = B_TRUE;
2107 			notify_event = LDC_EVT_RESET;
2108 			break;
2109 		}
2110 
2111 	} /* for */
2112 
2113 loop_exit:
2114 
2115 	/* if callbacks are disabled, do not notify */
2116 	if (!ldcp->cb_enabled)
2117 		notify_client = B_FALSE;
2118 
2119 	/*
2120 	 * If there are data packets in the queue, the ldc_read will
2121 	 * clear interrupts after draining the queue, else clear interrupts
2122 	 */
2123 	if ((notify_event & LDC_EVT_READ) == 0) {
2124 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2125 	} else
2126 		ldcp->rx_intr_state = LDC_INTR_PEND;
2127 
2128 
2129 	if (notify_client) {
2130 		ldcp->cb_inprogress = B_TRUE;
2131 		mutex_exit(&ldcp->lock);
2132 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2133 		if (rv) {
2134 			DWARN(ldcp->id,
2135 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
2136 			    ldcp->id);
2137 		}
2138 		mutex_enter(&ldcp->lock);
2139 		ldcp->cb_inprogress = B_FALSE;
2140 	}
2141 
2142 	mutex_exit(&ldcp->lock);
2143 
2144 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
2145 	return (DDI_INTR_CLAIMED);
2146 }
2147 
2148 
2149 /* -------------------------------------------------------------------------- */
2150 
2151 /*
2152  * LDC API functions
2153  */
2154 
2155 /*
2156  * Initialize the channel. Allocate internal structure and memory for
2157  * TX/RX queues, and initialize locks.
2158  */
2159 int
2160 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2161 {
2162 	ldc_chan_t 	*ldcp;
2163 	int		rv, exit_val;
2164 	uint64_t	ra_base, nentries;
2165 	uint64_t	qlen;
2166 
2167 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2168 
2169 	if (attr == NULL) {
2170 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2171 		return (EINVAL);
2172 	}
2173 	if (handle == NULL) {
2174 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2175 		return (EINVAL);
2176 	}
2177 
2178 	/* check if channel is valid */
2179 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2180 	if (rv == H_ECHANNEL) {
2181 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2182 		return (EINVAL);
2183 	}
2184 
2185 	/* check if the channel has already been initialized */
2186 	mutex_enter(&ldcssp->lock);
2187 	ldcp = ldcssp->chan_list;
2188 	while (ldcp != NULL) {
2189 		if (ldcp->id == id) {
2190 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2191 			    id);
2192 			mutex_exit(&ldcssp->lock);
2193 			return (EADDRINUSE);
2194 		}
2195 		ldcp = ldcp->next;
2196 	}
2197 	mutex_exit(&ldcssp->lock);
2198 
2199 	ASSERT(ldcp == NULL);
2200 
2201 	*handle = 0;
2202 
2203 	/* Allocate an ldcp structure */
2204 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2205 
2206 	/*
2207 	 * Initialize the channel and Tx lock
2208 	 *
2209 	 * The channel 'lock' protects the entire channel and
2210 	 * should be acquired before initializing, resetting,
2211 	 * destroying or reading from a channel.
2212 	 *
2213 	 * The 'tx_lock' should be acquired prior to transmitting
2214 	 * data over the channel. The lock should also be acquired
2215 	 * prior to channel reconfiguration (in order to prevent
2216 	 * concurrent writes).
2217 	 *
2218 	 * ORDERING: When both locks are being acquired, to prevent
2219 	 * deadlocks, the channel lock should be always acquired prior
2220 	 * to the tx_lock.
2221 	 */
2222 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2223 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2224 
2225 	/* Initialize the channel */
2226 	ldcp->id = id;
2227 	ldcp->cb = NULL;
2228 	ldcp->cb_arg = NULL;
2229 	ldcp->cb_inprogress = B_FALSE;
2230 	ldcp->cb_enabled = B_FALSE;
2231 	ldcp->next = NULL;
2232 
2233 	/* Read attributes */
2234 	ldcp->mode = attr->mode;
2235 	ldcp->devclass = attr->devclass;
2236 	ldcp->devinst = attr->instance;
2237 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2238 
2239 	D1(ldcp->id,
2240 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2241 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2242 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2243 
2244 	ldcp->next_vidx = 0;
2245 	ldcp->tstate = TS_IN_RESET;
2246 	ldcp->hstate = 0;
2247 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2248 	ldcp->last_ack_rcd = 0;
2249 	ldcp->last_msg_rcd = 0;
2250 
2251 	ldcp->stream_bufferp = NULL;
2252 	ldcp->exp_dring_list = NULL;
2253 	ldcp->imp_dring_list = NULL;
2254 	ldcp->mhdl_list = NULL;
2255 
2256 	ldcp->tx_intr_state = LDC_INTR_NONE;
2257 	ldcp->rx_intr_state = LDC_INTR_NONE;
2258 
2259 	/* Initialize payload size depending on whether channel is reliable */
2260 	switch (ldcp->mode) {
2261 	case LDC_MODE_RAW:
2262 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2263 		ldcp->read_p = i_ldc_read_raw;
2264 		ldcp->write_p = i_ldc_write_raw;
2265 		break;
2266 	case LDC_MODE_UNRELIABLE:
2267 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2268 		ldcp->read_p = i_ldc_read_packet;
2269 		ldcp->write_p = i_ldc_write_packet;
2270 		break;
2271 	case LDC_MODE_RELIABLE:
2272 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2273 		ldcp->read_p = i_ldc_read_packet;
2274 		ldcp->write_p = i_ldc_write_packet;
2275 		break;
2276 	case LDC_MODE_STREAM:
2277 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2278 
2279 		ldcp->stream_remains = 0;
2280 		ldcp->stream_offset = 0;
2281 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2282 		ldcp->read_p = i_ldc_read_stream;
2283 		ldcp->write_p = i_ldc_write_stream;
2284 		break;
2285 	default:
2286 		exit_val = EINVAL;
2287 		goto cleanup_on_exit;
2288 	}
2289 
2290 	/*
2291 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2292 	 * value is smaller than default length of ldc_queue_entries,
2293 	 * qlen is set to ldc_queue_entries..
2294 	 */
2295 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2296 	ldcp->rx_q_entries =
2297 		(qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2298 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2299 
2300 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen);
2301 
2302 	/* Create a transmit queue */
2303 	ldcp->tx_q_va = (uint64_t)
2304 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2305 	if (ldcp->tx_q_va == NULL) {
2306 		cmn_err(CE_WARN,
2307 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2308 		    ldcp->id);
2309 		exit_val = ENOMEM;
2310 		goto cleanup_on_exit;
2311 	}
2312 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2313 
2314 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2315 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2316 
2317 	ldcp->tstate |= TS_TXQ_RDY;
2318 
2319 	/* Create a receive queue */
2320 	ldcp->rx_q_va = (uint64_t)
2321 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2322 	if (ldcp->rx_q_va == NULL) {
2323 		cmn_err(CE_WARN,
2324 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2325 		    ldcp->id);
2326 		exit_val = ENOMEM;
2327 		goto cleanup_on_exit;
2328 	}
2329 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2330 
2331 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2332 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2333 
2334 	ldcp->tstate |= TS_RXQ_RDY;
2335 
2336 	/* Init descriptor ring and memory handle list lock */
2337 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2338 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2339 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2340 
2341 	/* mark status as INITialized */
2342 	ldcp->status = LDC_INIT;
2343 
2344 	/* Add to channel list */
2345 	mutex_enter(&ldcssp->lock);
2346 	ldcp->next = ldcssp->chan_list;
2347 	ldcssp->chan_list = ldcp;
2348 	ldcssp->channel_count++;
2349 	mutex_exit(&ldcssp->lock);
2350 
2351 	/* set the handle */
2352 	*handle = (ldc_handle_t)ldcp;
2353 
2354 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2355 
2356 	return (0);
2357 
2358 cleanup_on_exit:
2359 
2360 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2361 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2362 
2363 	if (ldcp->tstate & TS_TXQ_RDY)
2364 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2365 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2366 
2367 	if (ldcp->tstate & TS_RXQ_RDY)
2368 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2369 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2370 
2371 	mutex_destroy(&ldcp->tx_lock);
2372 	mutex_destroy(&ldcp->lock);
2373 
2374 	if (ldcp)
2375 		kmem_free(ldcp, sizeof (ldc_chan_t));
2376 
2377 	return (exit_val);
2378 }
2379 
2380 /*
2381  * Finalizes the LDC connection. It will return EBUSY if the
2382  * channel is open. A ldc_close() has to be done prior to
2383  * a ldc_fini operation. It frees TX/RX queues, associated
2384  * with the channel
2385  */
2386 int
2387 ldc_fini(ldc_handle_t handle)
2388 {
2389 	ldc_chan_t 	*ldcp;
2390 	ldc_chan_t 	*tmp_ldcp;
2391 	uint64_t 	id;
2392 
2393 	if (handle == NULL) {
2394 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2395 		return (EINVAL);
2396 	}
2397 	ldcp = (ldc_chan_t *)handle;
2398 	id = ldcp->id;
2399 
2400 	mutex_enter(&ldcp->lock);
2401 
2402 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2403 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2404 		    ldcp->id);
2405 		mutex_exit(&ldcp->lock);
2406 		return (EBUSY);
2407 	}
2408 
2409 	/* Remove from the channel list */
2410 	mutex_enter(&ldcssp->lock);
2411 	tmp_ldcp = ldcssp->chan_list;
2412 	if (tmp_ldcp == ldcp) {
2413 		ldcssp->chan_list = ldcp->next;
2414 		ldcp->next = NULL;
2415 	} else {
2416 		while (tmp_ldcp != NULL) {
2417 			if (tmp_ldcp->next == ldcp) {
2418 				tmp_ldcp->next = ldcp->next;
2419 				ldcp->next = NULL;
2420 				break;
2421 			}
2422 			tmp_ldcp = tmp_ldcp->next;
2423 		}
2424 		if (tmp_ldcp == NULL) {
2425 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2426 			mutex_exit(&ldcssp->lock);
2427 			mutex_exit(&ldcp->lock);
2428 			return (EINVAL);
2429 		}
2430 	}
2431 
2432 	ldcssp->channel_count--;
2433 
2434 	mutex_exit(&ldcssp->lock);
2435 
2436 	/* Free the map table for this channel */
2437 	if (ldcp->mtbl) {
2438 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2439 		if (ldcp->mtbl->contigmem)
2440 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2441 		else
2442 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2443 		mutex_destroy(&ldcp->mtbl->lock);
2444 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2445 	}
2446 
2447 	/* Destroy descriptor ring and memory handle list lock */
2448 	mutex_destroy(&ldcp->exp_dlist_lock);
2449 	mutex_destroy(&ldcp->imp_dlist_lock);
2450 	mutex_destroy(&ldcp->mlist_lock);
2451 
2452 	/* Free the stream buffer for STREAM_MODE */
2453 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2454 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2455 
2456 	/* Free the RX queue */
2457 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2458 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2459 	ldcp->tstate &= ~TS_RXQ_RDY;
2460 
2461 	/* Free the TX queue */
2462 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2463 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2464 	ldcp->tstate &= ~TS_TXQ_RDY;
2465 
2466 	mutex_exit(&ldcp->lock);
2467 
2468 	/* Destroy mutex */
2469 	mutex_destroy(&ldcp->tx_lock);
2470 	mutex_destroy(&ldcp->lock);
2471 
2472 	/* free channel structure */
2473 	kmem_free(ldcp, sizeof (ldc_chan_t));
2474 
2475 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2476 
2477 	return (0);
2478 }
2479 
2480 /*
2481  * Open the LDC channel for use. It registers the TX/RX queues
2482  * with the Hypervisor. It also specifies the interrupt number
2483  * and target CPU for this channel
2484  */
2485 int
2486 ldc_open(ldc_handle_t handle)
2487 {
2488 	ldc_chan_t 	*ldcp;
2489 	int 		rv;
2490 
2491 	if (handle == NULL) {
2492 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2493 		return (EINVAL);
2494 	}
2495 
2496 	ldcp = (ldc_chan_t *)handle;
2497 
2498 	mutex_enter(&ldcp->lock);
2499 
2500 	if (ldcp->tstate < TS_INIT) {
2501 		DWARN(ldcp->id,
2502 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2503 		mutex_exit(&ldcp->lock);
2504 		return (EFAULT);
2505 	}
2506 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2507 		DWARN(ldcp->id,
2508 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2509 		mutex_exit(&ldcp->lock);
2510 		return (EFAULT);
2511 	}
2512 
2513 	/*
2514 	 * Unregister/Register the tx queue with the hypervisor
2515 	 */
2516 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2517 	if (rv) {
2518 		cmn_err(CE_WARN,
2519 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2520 		    ldcp->id);
2521 		mutex_exit(&ldcp->lock);
2522 		return (EIO);
2523 	}
2524 
2525 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2526 	if (rv) {
2527 		cmn_err(CE_WARN,
2528 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2529 		    ldcp->id);
2530 		mutex_exit(&ldcp->lock);
2531 		return (EIO);
2532 	}
2533 
2534 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2535 	    ldcp->id);
2536 
2537 	/*
2538 	 * Unregister/Register the rx queue with the hypervisor
2539 	 */
2540 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2541 	if (rv) {
2542 		cmn_err(CE_WARN,
2543 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2544 		    ldcp->id);
2545 		mutex_exit(&ldcp->lock);
2546 		return (EIO);
2547 	}
2548 
2549 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2550 	if (rv) {
2551 		cmn_err(CE_WARN,
2552 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2553 		    ldcp->id);
2554 		mutex_exit(&ldcp->lock);
2555 		return (EIO);
2556 	}
2557 
2558 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2559 	    ldcp->id);
2560 
2561 	ldcp->tstate |= TS_QCONF_RDY;
2562 
2563 	/* Register the channel with the channel nexus */
2564 	rv = i_ldc_register_channel(ldcp);
2565 	if (rv && rv != EAGAIN) {
2566 		cmn_err(CE_WARN,
2567 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2568 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2569 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2570 		mutex_exit(&ldcp->lock);
2571 		return (EIO);
2572 	}
2573 
2574 	/* mark channel in OPEN state */
2575 	ldcp->status = LDC_OPEN;
2576 
2577 	/* Read channel state */
2578 	rv = hv_ldc_tx_get_state(ldcp->id,
2579 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2580 	if (rv) {
2581 		cmn_err(CE_WARN,
2582 		    "ldc_open: (0x%lx) cannot read channel state\n",
2583 		    ldcp->id);
2584 		(void) i_ldc_unregister_channel(ldcp);
2585 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2586 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2587 		mutex_exit(&ldcp->lock);
2588 		return (EIO);
2589 	}
2590 
2591 	/*
2592 	 * set the ACKd head to current head location for reliable &
2593 	 * streaming mode
2594 	 */
2595 	ldcp->tx_ackd_head = ldcp->tx_head;
2596 
2597 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2598 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2599 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2600 		ldcp->tstate |= TS_LINK_READY;
2601 		ldcp->status = LDC_READY;
2602 	}
2603 
2604 	/*
2605 	 * if channel is being opened in RAW mode - no handshake is needed
2606 	 * switch the channel READY and UP state
2607 	 */
2608 	if (ldcp->mode == LDC_MODE_RAW) {
2609 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2610 		ldcp->status = LDC_UP;
2611 	}
2612 
2613 	mutex_exit(&ldcp->lock);
2614 
2615 	/*
2616 	 * Increment number of open channels
2617 	 */
2618 	mutex_enter(&ldcssp->lock);
2619 	ldcssp->channels_open++;
2620 	mutex_exit(&ldcssp->lock);
2621 
2622 	D1(ldcp->id,
2623 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
2624 	    "(tstate=0x%x, status=0x%x)\n",
2625 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
2626 
2627 	return (0);
2628 }
2629 
2630 /*
2631  * Close the LDC connection. It will return EBUSY if there
2632  * are memory segments or descriptor rings either bound to or
2633  * mapped over the channel
2634  */
2635 int
2636 ldc_close(ldc_handle_t handle)
2637 {
2638 	ldc_chan_t 	*ldcp;
2639 	int		rv = 0, retries = 0;
2640 	boolean_t	chk_done = B_FALSE;
2641 
2642 	if (handle == NULL) {
2643 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2644 		return (EINVAL);
2645 	}
2646 	ldcp = (ldc_chan_t *)handle;
2647 
2648 	mutex_enter(&ldcp->lock);
2649 
2650 	/* return error if channel is not open */
2651 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
2652 		DWARN(ldcp->id,
2653 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2654 		mutex_exit(&ldcp->lock);
2655 		return (EFAULT);
2656 	}
2657 
2658 	/* if any memory handles, drings, are bound or mapped cannot close */
2659 	if (ldcp->mhdl_list != NULL) {
2660 		DWARN(ldcp->id,
2661 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2662 		    ldcp->id);
2663 		mutex_exit(&ldcp->lock);
2664 		return (EBUSY);
2665 	}
2666 	if (ldcp->exp_dring_list != NULL) {
2667 		DWARN(ldcp->id,
2668 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2669 		    ldcp->id);
2670 		mutex_exit(&ldcp->lock);
2671 		return (EBUSY);
2672 	}
2673 	if (ldcp->imp_dring_list != NULL) {
2674 		DWARN(ldcp->id,
2675 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2676 		    ldcp->id);
2677 		mutex_exit(&ldcp->lock);
2678 		return (EBUSY);
2679 	}
2680 
2681 	if (ldcp->cb_inprogress) {
2682 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
2683 		    ldcp->id);
2684 		mutex_exit(&ldcp->lock);
2685 		return (EWOULDBLOCK);
2686 	}
2687 
2688 	/* Obtain Tx lock */
2689 	mutex_enter(&ldcp->tx_lock);
2690 
2691 	/*
2692 	 * Wait for pending transmits to complete i.e Tx queue to drain
2693 	 * if there are pending pkts - wait 1 ms and retry again
2694 	 */
2695 	for (;;) {
2696 
2697 		rv = hv_ldc_tx_get_state(ldcp->id,
2698 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2699 		if (rv) {
2700 			cmn_err(CE_WARN,
2701 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2702 			mutex_exit(&ldcp->tx_lock);
2703 			mutex_exit(&ldcp->lock);
2704 			return (EIO);
2705 		}
2706 
2707 		if (ldcp->tx_head == ldcp->tx_tail ||
2708 		    ldcp->link_state != LDC_CHANNEL_UP) {
2709 			break;
2710 		}
2711 
2712 		if (chk_done) {
2713 			DWARN(ldcp->id,
2714 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2715 			    ldcp->id);
2716 			break;
2717 		}
2718 
2719 		/* wait for one ms and try again */
2720 		delay(drv_usectohz(1000));
2721 		chk_done = B_TRUE;
2722 	}
2723 
2724 	/*
2725 	 * Drain the Tx and Rx queues as we are closing the
2726 	 * channel. We dont care about any pending packets.
2727 	 * We have to also drain the queue prior to clearing
2728 	 * pending interrupts, otherwise the HV will trigger
2729 	 * an interrupt the moment the interrupt state is
2730 	 * cleared.
2731 	 */
2732 	(void) i_ldc_txq_reconf(ldcp);
2733 	(void) i_ldc_rxq_drain(ldcp);
2734 
2735 	/*
2736 	 * Unregister the channel with the nexus
2737 	 */
2738 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2739 
2740 		mutex_exit(&ldcp->tx_lock);
2741 		mutex_exit(&ldcp->lock);
2742 
2743 		/* if any error other than EAGAIN return back */
2744 		if (rv != EAGAIN || retries >= ldc_max_retries) {
2745 			cmn_err(CE_WARN,
2746 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2747 			    ldcp->id, rv);
2748 			return (rv);
2749 		}
2750 
2751 		/*
2752 		 * As there could be pending interrupts we need
2753 		 * to wait and try again
2754 		 */
2755 		drv_usecwait(ldc_close_delay);
2756 		mutex_enter(&ldcp->lock);
2757 		mutex_enter(&ldcp->tx_lock);
2758 		retries++;
2759 	}
2760 
2761 	/*
2762 	 * Unregister queues
2763 	 */
2764 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2765 	if (rv) {
2766 		cmn_err(CE_WARN,
2767 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2768 		    ldcp->id);
2769 		mutex_exit(&ldcp->tx_lock);
2770 		mutex_exit(&ldcp->lock);
2771 		return (EIO);
2772 	}
2773 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2774 	if (rv) {
2775 		cmn_err(CE_WARN,
2776 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2777 		    ldcp->id);
2778 		mutex_exit(&ldcp->tx_lock);
2779 		mutex_exit(&ldcp->lock);
2780 		return (EIO);
2781 	}
2782 
2783 	ldcp->tstate &= ~TS_QCONF_RDY;
2784 
2785 	/* Reset channel state information */
2786 	i_ldc_reset_state(ldcp);
2787 
2788 	/* Mark channel as down and in initialized state */
2789 	ldcp->tx_ackd_head = 0;
2790 	ldcp->tx_head = 0;
2791 	ldcp->tstate = TS_IN_RESET|TS_INIT;
2792 	ldcp->status = LDC_INIT;
2793 
2794 	mutex_exit(&ldcp->tx_lock);
2795 	mutex_exit(&ldcp->lock);
2796 
2797 	/* Decrement number of open channels */
2798 	mutex_enter(&ldcssp->lock);
2799 	ldcssp->channels_open--;
2800 	mutex_exit(&ldcssp->lock);
2801 
2802 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2803 
2804 	return (0);
2805 }
2806 
2807 /*
2808  * Register channel callback
2809  */
2810 int
2811 ldc_reg_callback(ldc_handle_t handle,
2812     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2813 {
2814 	ldc_chan_t *ldcp;
2815 
2816 	if (handle == NULL) {
2817 		DWARN(DBG_ALL_LDCS,
2818 		    "ldc_reg_callback: invalid channel handle\n");
2819 		return (EINVAL);
2820 	}
2821 	if (((uint64_t)cb) < KERNELBASE) {
2822 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2823 		return (EINVAL);
2824 	}
2825 	ldcp = (ldc_chan_t *)handle;
2826 
2827 	mutex_enter(&ldcp->lock);
2828 
2829 	if (ldcp->cb) {
2830 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2831 		    ldcp->id);
2832 		mutex_exit(&ldcp->lock);
2833 		return (EIO);
2834 	}
2835 	if (ldcp->cb_inprogress) {
2836 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2837 		    ldcp->id);
2838 		mutex_exit(&ldcp->lock);
2839 		return (EWOULDBLOCK);
2840 	}
2841 
2842 	ldcp->cb = cb;
2843 	ldcp->cb_arg = arg;
2844 	ldcp->cb_enabled = B_TRUE;
2845 
2846 	D1(ldcp->id,
2847 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2848 	    ldcp->id);
2849 
2850 	mutex_exit(&ldcp->lock);
2851 
2852 	return (0);
2853 }
2854 
2855 /*
2856  * Unregister channel callback
2857  */
2858 int
2859 ldc_unreg_callback(ldc_handle_t handle)
2860 {
2861 	ldc_chan_t *ldcp;
2862 
2863 	if (handle == NULL) {
2864 		DWARN(DBG_ALL_LDCS,
2865 		    "ldc_unreg_callback: invalid channel handle\n");
2866 		return (EINVAL);
2867 	}
2868 	ldcp = (ldc_chan_t *)handle;
2869 
2870 	mutex_enter(&ldcp->lock);
2871 
2872 	if (ldcp->cb == NULL) {
2873 		DWARN(ldcp->id,
2874 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2875 		    ldcp->id);
2876 		mutex_exit(&ldcp->lock);
2877 		return (EIO);
2878 	}
2879 	if (ldcp->cb_inprogress) {
2880 		DWARN(ldcp->id,
2881 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2882 		    ldcp->id);
2883 		mutex_exit(&ldcp->lock);
2884 		return (EWOULDBLOCK);
2885 	}
2886 
2887 	ldcp->cb = NULL;
2888 	ldcp->cb_arg = NULL;
2889 	ldcp->cb_enabled = B_FALSE;
2890 
2891 	D1(ldcp->id,
2892 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2893 	    ldcp->id);
2894 
2895 	mutex_exit(&ldcp->lock);
2896 
2897 	return (0);
2898 }
2899 
2900 
2901 /*
2902  * Bring a channel up by initiating a handshake with the peer
2903  * This call is asynchronous. It will complete at a later point
2904  * in time when the peer responds back with an RTR.
2905  */
2906 int
2907 ldc_up(ldc_handle_t handle)
2908 {
2909 	int 		rv;
2910 	ldc_chan_t 	*ldcp;
2911 	ldc_msg_t 	*ldcmsg;
2912 	uint64_t 	tx_tail, tstate, link_state;
2913 
2914 	if (handle == NULL) {
2915 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2916 		return (EINVAL);
2917 	}
2918 	ldcp = (ldc_chan_t *)handle;
2919 
2920 	mutex_enter(&ldcp->lock);
2921 
2922 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
2923 
2924 	/* clear the reset state */
2925 	tstate = ldcp->tstate;
2926 	ldcp->tstate &= ~TS_IN_RESET;
2927 
2928 	if (ldcp->tstate == TS_UP) {
2929 		DWARN(ldcp->id,
2930 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2931 		    ldcp->id);
2932 
2933 		/* mark channel as up */
2934 		ldcp->status = LDC_UP;
2935 
2936 		/*
2937 		 * if channel was in reset state and there was
2938 		 * pending data clear interrupt state. this will
2939 		 * trigger an interrupt, causing the RX handler to
2940 		 * to invoke the client's callback
2941 		 */
2942 		if ((tstate & TS_IN_RESET) &&
2943 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
2944 			D1(ldcp->id,
2945 			    "ldc_up: (0x%llx) channel has pending data, "
2946 			    "clearing interrupt\n", ldcp->id);
2947 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2948 		}
2949 
2950 		mutex_exit(&ldcp->lock);
2951 		return (0);
2952 	}
2953 
2954 	/* if the channel is in RAW mode - mark it as UP, if READY */
2955 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2956 		ldcp->tstate = TS_UP;
2957 		mutex_exit(&ldcp->lock);
2958 		return (0);
2959 	}
2960 
2961 	/* Don't start another handshake if there is one in progress */
2962 	if (ldcp->hstate) {
2963 		D1(ldcp->id,
2964 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2965 		    ldcp->id);
2966 		mutex_exit(&ldcp->lock);
2967 		return (0);
2968 	}
2969 
2970 	mutex_enter(&ldcp->tx_lock);
2971 
2972 	/* save current link state */
2973 	link_state = ldcp->link_state;
2974 
2975 	/* get the current tail for the LDC msg */
2976 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2977 	if (rv) {
2978 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2979 		    ldcp->id);
2980 		mutex_exit(&ldcp->tx_lock);
2981 		mutex_exit(&ldcp->lock);
2982 		return (ECONNREFUSED);
2983 	}
2984 
2985 	/*
2986 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
2987 	 * from a previous state of DOWN, then mark the channel as
2988 	 * being ready for handshake.
2989 	 */
2990 	if ((link_state == LDC_CHANNEL_DOWN) &&
2991 	    (link_state != ldcp->link_state)) {
2992 
2993 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
2994 		    (ldcp->link_state == LDC_CHANNEL_UP));
2995 
2996 		if (ldcp->mode == LDC_MODE_RAW) {
2997 			ldcp->status = LDC_UP;
2998 			ldcp->tstate = TS_UP;
2999 			mutex_exit(&ldcp->tx_lock);
3000 			mutex_exit(&ldcp->lock);
3001 			return (0);
3002 		} else {
3003 			ldcp->status = LDC_READY;
3004 			ldcp->tstate |= TS_LINK_READY;
3005 		}
3006 
3007 	}
3008 
3009 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3010 	ZERO_PKT(ldcmsg);
3011 
3012 	ldcmsg->type = LDC_CTRL;
3013 	ldcmsg->stype = LDC_INFO;
3014 	ldcmsg->ctrl = LDC_VER;
3015 	ldcp->next_vidx = 0;
3016 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3017 
3018 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3019 
3020 	/* initiate the send by calling into HV and set the new tail */
3021 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3022 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3023 
3024 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3025 	if (rv) {
3026 		DWARN(ldcp->id,
3027 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3028 		    ldcp->id, rv);
3029 		mutex_exit(&ldcp->tx_lock);
3030 		mutex_exit(&ldcp->lock);
3031 		return (rv);
3032 	}
3033 
3034 	ldcp->hstate |= TS_SENT_VER;
3035 	ldcp->tx_tail = tx_tail;
3036 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3037 
3038 	mutex_exit(&ldcp->tx_lock);
3039 	mutex_exit(&ldcp->lock);
3040 
3041 	return (rv);
3042 }
3043 
3044 
3045 /*
3046  * Bring a channel down by resetting its state and queues
3047  */
3048 int
3049 ldc_down(ldc_handle_t handle)
3050 {
3051 	ldc_chan_t 	*ldcp;
3052 
3053 	if (handle == NULL) {
3054 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3055 		return (EINVAL);
3056 	}
3057 	ldcp = (ldc_chan_t *)handle;
3058 	mutex_enter(&ldcp->lock);
3059 	mutex_enter(&ldcp->tx_lock);
3060 	i_ldc_reset(ldcp, B_TRUE);
3061 	mutex_exit(&ldcp->tx_lock);
3062 	mutex_exit(&ldcp->lock);
3063 
3064 	return (0);
3065 }
3066 
3067 /*
3068  * Get the current channel status
3069  */
3070 int
3071 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3072 {
3073 	ldc_chan_t *ldcp;
3074 
3075 	if (handle == NULL || status == NULL) {
3076 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3077 		return (EINVAL);
3078 	}
3079 	ldcp = (ldc_chan_t *)handle;
3080 
3081 	*status = ((ldc_chan_t *)handle)->status;
3082 
3083 	D1(ldcp->id,
3084 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3085 	return (0);
3086 }
3087 
3088 
3089 /*
3090  * Set the channel's callback mode - enable/disable callbacks
3091  */
3092 int
3093 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3094 {
3095 	ldc_chan_t 	*ldcp;
3096 
3097 	if (handle == NULL) {
3098 		DWARN(DBG_ALL_LDCS,
3099 		    "ldc_set_intr_mode: invalid channel handle\n");
3100 		return (EINVAL);
3101 	}
3102 	ldcp = (ldc_chan_t *)handle;
3103 
3104 	/*
3105 	 * Record no callbacks should be invoked
3106 	 */
3107 	mutex_enter(&ldcp->lock);
3108 
3109 	switch (cmode) {
3110 	case LDC_CB_DISABLE:
3111 		if (!ldcp->cb_enabled) {
3112 			DWARN(ldcp->id,
3113 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3114 			    ldcp->id);
3115 			break;
3116 		}
3117 		ldcp->cb_enabled = B_FALSE;
3118 
3119 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3120 		    ldcp->id);
3121 		break;
3122 
3123 	case LDC_CB_ENABLE:
3124 		if (ldcp->cb_enabled) {
3125 			DWARN(ldcp->id,
3126 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3127 			    ldcp->id);
3128 			break;
3129 		}
3130 		ldcp->cb_enabled = B_TRUE;
3131 
3132 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3133 		    ldcp->id);
3134 		break;
3135 	}
3136 
3137 	mutex_exit(&ldcp->lock);
3138 
3139 	return (0);
3140 }
3141 
3142 /*
3143  * Check to see if there are packets on the incoming queue
3144  * Will return hasdata = B_FALSE if there are no packets
3145  */
3146 int
3147 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3148 {
3149 	int 		rv;
3150 	uint64_t 	rx_head, rx_tail;
3151 	ldc_chan_t 	*ldcp;
3152 
3153 	if (handle == NULL) {
3154 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3155 		return (EINVAL);
3156 	}
3157 	ldcp = (ldc_chan_t *)handle;
3158 
3159 	*hasdata = B_FALSE;
3160 
3161 	mutex_enter(&ldcp->lock);
3162 
3163 	if (ldcp->tstate != TS_UP) {
3164 		D1(ldcp->id,
3165 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3166 		mutex_exit(&ldcp->lock);
3167 		return (ECONNRESET);
3168 	}
3169 
3170 	/* Read packet(s) from the queue */
3171 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3172 	    &ldcp->link_state);
3173 	if (rv != 0) {
3174 		cmn_err(CE_WARN,
3175 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3176 		mutex_exit(&ldcp->lock);
3177 		return (EIO);
3178 	}
3179 	/* reset the channel state if the channel went down */
3180 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3181 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3182 		mutex_enter(&ldcp->tx_lock);
3183 		i_ldc_reset(ldcp, B_FALSE);
3184 		mutex_exit(&ldcp->tx_lock);
3185 		mutex_exit(&ldcp->lock);
3186 		return (ECONNRESET);
3187 	}
3188 
3189 	if ((rx_head != rx_tail) ||
3190 	    (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) {
3191 		D1(ldcp->id,
3192 		    "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n",
3193 		    ldcp->id);
3194 		*hasdata = B_TRUE;
3195 	}
3196 
3197 	mutex_exit(&ldcp->lock);
3198 
3199 	return (0);
3200 }
3201 
3202 
3203 /*
3204  * Read 'size' amount of bytes or less. If incoming buffer
3205  * is more than 'size', ENOBUFS is returned.
3206  *
3207  * On return, size contains the number of bytes read.
3208  */
3209 int
3210 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3211 {
3212 	ldc_chan_t 	*ldcp;
3213 	uint64_t 	rx_head = 0, rx_tail = 0;
3214 	int		rv = 0, exit_val;
3215 
3216 	if (handle == NULL) {
3217 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3218 		return (EINVAL);
3219 	}
3220 
3221 	ldcp = (ldc_chan_t *)handle;
3222 
3223 	/* channel lock */
3224 	mutex_enter(&ldcp->lock);
3225 
3226 	if (ldcp->tstate != TS_UP) {
3227 		DWARN(ldcp->id,
3228 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3229 		    ldcp->id);
3230 		exit_val = ECONNRESET;
3231 	} else {
3232 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3233 	}
3234 
3235 	/*
3236 	 * if queue has been drained - clear interrupt
3237 	 */
3238 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3239 	    &ldcp->link_state);
3240 	if (rv != 0) {
3241 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3242 		    ldcp->id);
3243 		mutex_enter(&ldcp->tx_lock);
3244 		i_ldc_reset(ldcp, B_TRUE);
3245 		mutex_exit(&ldcp->tx_lock);
3246 		mutex_exit(&ldcp->lock);
3247 		return (ECONNRESET);
3248 	}
3249 
3250 	if (exit_val == 0) {
3251 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3252 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3253 			mutex_enter(&ldcp->tx_lock);
3254 			i_ldc_reset(ldcp, B_FALSE);
3255 			exit_val = ECONNRESET;
3256 			mutex_exit(&ldcp->tx_lock);
3257 		}
3258 		if ((rv == 0) &&
3259 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3260 		    (rx_head == rx_tail)) {
3261 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3262 		}
3263 	}
3264 
3265 	mutex_exit(&ldcp->lock);
3266 	return (exit_val);
3267 }
3268 
3269 /*
3270  * Basic raw mondo read -
3271  * no interpretation of mondo contents at all.
3272  *
3273  * Enter and exit with ldcp->lock held by caller
3274  */
3275 static int
3276 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3277 {
3278 	uint64_t 	q_size_mask;
3279 	ldc_msg_t 	*msgp;
3280 	uint8_t		*msgbufp;
3281 	int		rv = 0, space;
3282 	uint64_t 	rx_head, rx_tail;
3283 
3284 	space = *sizep;
3285 
3286 	if (space < LDC_PAYLOAD_SIZE_RAW)
3287 		return (ENOBUFS);
3288 
3289 	ASSERT(mutex_owned(&ldcp->lock));
3290 
3291 	/* compute mask for increment */
3292 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3293 
3294 	/*
3295 	 * Read packet(s) from the queue
3296 	 */
3297 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3298 	    &ldcp->link_state);
3299 	if (rv != 0) {
3300 		cmn_err(CE_WARN,
3301 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3302 		    ldcp->id);
3303 		return (EIO);
3304 	}
3305 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3306 		" rxt=0x%llx, st=0x%llx\n",
3307 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
3308 
3309 	/* reset the channel state if the channel went down */
3310 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3311 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3312 		mutex_enter(&ldcp->tx_lock);
3313 		i_ldc_reset(ldcp, B_FALSE);
3314 		mutex_exit(&ldcp->tx_lock);
3315 		return (ECONNRESET);
3316 	}
3317 
3318 	/*
3319 	 * Check for empty queue
3320 	 */
3321 	if (rx_head == rx_tail) {
3322 		*sizep = 0;
3323 		return (0);
3324 	}
3325 
3326 	/* get the message */
3327 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3328 
3329 	/* if channel is in RAW mode, copy data and return */
3330 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3331 
3332 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3333 
3334 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3335 
3336 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3337 
3338 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3339 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3340 
3341 	return (rv);
3342 }
3343 
3344 /*
3345  * Process LDC mondos to build larger packets
3346  * with either un-reliable or reliable delivery.
3347  *
3348  * Enter and exit with ldcp->lock held by caller
3349  */
3350 static int
3351 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3352 {
3353 	int		rv = 0;
3354 	uint64_t 	rx_head = 0, rx_tail = 0;
3355 	uint64_t 	curr_head = 0;
3356 	ldc_msg_t 	*msg;
3357 	caddr_t 	target;
3358 	size_t 		len = 0, bytes_read = 0;
3359 	int 		retries = 0;
3360 	uint64_t 	q_size_mask;
3361 	uint64_t	first_fragment = 0;
3362 
3363 	target = target_bufp;
3364 
3365 	ASSERT(mutex_owned(&ldcp->lock));
3366 
3367 	/* check if the buffer and size are valid */
3368 	if (target_bufp == NULL || *sizep == 0) {
3369 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3370 		    ldcp->id);
3371 		return (EINVAL);
3372 	}
3373 
3374 	/* compute mask for increment */
3375 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3376 
3377 	/*
3378 	 * Read packet(s) from the queue
3379 	 */
3380 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3381 	    &ldcp->link_state);
3382 	if (rv != 0) {
3383 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3384 		    ldcp->id);
3385 		mutex_enter(&ldcp->tx_lock);
3386 		i_ldc_reset(ldcp, B_TRUE);
3387 		mutex_exit(&ldcp->tx_lock);
3388 		return (ECONNRESET);
3389 	}
3390 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3391 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3392 
3393 	/* reset the channel state if the channel went down */
3394 	if (ldcp->link_state != LDC_CHANNEL_UP)
3395 		goto channel_is_reset;
3396 
3397 	for (;;) {
3398 
3399 		if (curr_head == rx_tail) {
3400 			rv = hv_ldc_rx_get_state(ldcp->id,
3401 			    &rx_head, &rx_tail, &ldcp->link_state);
3402 			if (rv != 0) {
3403 				cmn_err(CE_WARN,
3404 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3405 				    ldcp->id);
3406 				mutex_enter(&ldcp->tx_lock);
3407 				i_ldc_reset(ldcp, B_TRUE);
3408 				mutex_exit(&ldcp->tx_lock);
3409 				return (ECONNRESET);
3410 			}
3411 			if (ldcp->link_state != LDC_CHANNEL_UP)
3412 				goto channel_is_reset;
3413 
3414 			if (curr_head == rx_tail) {
3415 
3416 				/* If in the middle of a fragmented xfer */
3417 				if (first_fragment != 0) {
3418 
3419 					/* wait for ldc_delay usecs */
3420 					drv_usecwait(ldc_delay);
3421 
3422 					if (++retries < ldc_max_retries)
3423 						continue;
3424 
3425 					*sizep = 0;
3426 					ldcp->last_msg_rcd = first_fragment - 1;
3427 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3428 						"(0x%llx) read timeout",
3429 						ldcp->id);
3430 					return (EAGAIN);
3431 				}
3432 				*sizep = 0;
3433 				break;
3434 			}
3435 		}
3436 		retries = 0;
3437 
3438 		D2(ldcp->id,
3439 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3440 		    ldcp->id, curr_head, rx_head, rx_tail);
3441 
3442 		/* get the message */
3443 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3444 
3445 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3446 		    ldcp->rx_q_va + curr_head);
3447 
3448 		/* Check the message ID for the message received */
3449 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3450 
3451 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3452 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3453 
3454 			/* throw away data */
3455 			bytes_read = 0;
3456 
3457 			/* Reset last_msg_rcd to start of message */
3458 			if (first_fragment != 0) {
3459 				ldcp->last_msg_rcd = first_fragment - 1;
3460 				first_fragment = 0;
3461 			}
3462 			/*
3463 			 * Send a NACK -- invalid seqid
3464 			 * get the current tail for the response
3465 			 */
3466 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3467 			    (msg->ctrl & LDC_CTRL_MASK));
3468 			if (rv) {
3469 				cmn_err(CE_NOTE,
3470 				    "ldc_read: (0x%lx) err sending "
3471 				    "NACK msg\n", ldcp->id);
3472 
3473 				/* if cannot send NACK - reset channel */
3474 				mutex_enter(&ldcp->tx_lock);
3475 				i_ldc_reset(ldcp, B_FALSE);
3476 				mutex_exit(&ldcp->tx_lock);
3477 				rv = ECONNRESET;
3478 				break;
3479 			}
3480 
3481 			/* purge receive queue */
3482 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3483 
3484 			break;
3485 		}
3486 
3487 		/*
3488 		 * Process any messages of type CTRL messages
3489 		 * Future implementations should try to pass these
3490 		 * to LDC link by resetting the intr state.
3491 		 *
3492 		 * NOTE: not done as a switch() as type can be both ctrl+data
3493 		 */
3494 		if (msg->type & LDC_CTRL) {
3495 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3496 				if (rv == EAGAIN)
3497 					continue;
3498 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3499 				*sizep = 0;
3500 				bytes_read = 0;
3501 				break;
3502 			}
3503 		}
3504 
3505 		/* process data ACKs */
3506 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3507 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3508 				*sizep = 0;
3509 				bytes_read = 0;
3510 				break;
3511 			}
3512 		}
3513 
3514 		/* process data NACKs */
3515 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
3516 			DWARN(ldcp->id,
3517 			    "ldc_read: (0x%llx) received DATA/NACK", ldcp->id);
3518 			mutex_enter(&ldcp->tx_lock);
3519 			i_ldc_reset(ldcp, B_TRUE);
3520 			mutex_exit(&ldcp->tx_lock);
3521 			return (ECONNRESET);
3522 		}
3523 
3524 		/* process data messages */
3525 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3526 
3527 			uint8_t *msgbuf = (uint8_t *)(
3528 				(ldcp->mode == LDC_MODE_RELIABLE ||
3529 				ldcp->mode == LDC_MODE_STREAM)
3530 				? msg->rdata : msg->udata);
3531 
3532 			D2(ldcp->id,
3533 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3534 
3535 			/* get the packet length */
3536 			len = (msg->env & LDC_LEN_MASK);
3537 
3538 				/*
3539 				 * FUTURE OPTIMIZATION:
3540 				 * dont need to set q head for every
3541 				 * packet we read just need to do this when
3542 				 * we are done or need to wait for more
3543 				 * mondos to make a full packet - this is
3544 				 * currently expensive.
3545 				 */
3546 
3547 			if (first_fragment == 0) {
3548 
3549 				/*
3550 				 * first packets should always have the start
3551 				 * bit set (even for a single packet). If not
3552 				 * throw away the packet
3553 				 */
3554 				if (!(msg->env & LDC_FRAG_START)) {
3555 
3556 					DWARN(DBG_ALL_LDCS,
3557 					    "ldc_read: (0x%llx) not start - "
3558 					    "frag=%x\n", ldcp->id,
3559 					    (msg->env) & LDC_FRAG_MASK);
3560 
3561 					/* toss pkt, inc head, cont reading */
3562 					bytes_read = 0;
3563 					target = target_bufp;
3564 					curr_head =
3565 						(curr_head + LDC_PACKET_SIZE)
3566 						& q_size_mask;
3567 					if (rv = i_ldc_set_rx_head(ldcp,
3568 						curr_head))
3569 						break;
3570 
3571 					continue;
3572 				}
3573 
3574 				first_fragment = msg->seqid;
3575 			} else {
3576 				/* check to see if this is a pkt w/ START bit */
3577 				if (msg->env & LDC_FRAG_START) {
3578 					DWARN(DBG_ALL_LDCS,
3579 					    "ldc_read:(0x%llx) unexpected pkt"
3580 					    " env=0x%x discarding %d bytes,"
3581 					    " lastmsg=%d, currentmsg=%d\n",
3582 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3583 					    bytes_read, ldcp->last_msg_rcd,
3584 					    msg->seqid);
3585 
3586 					/* throw data we have read so far */
3587 					bytes_read = 0;
3588 					target = target_bufp;
3589 					first_fragment = msg->seqid;
3590 
3591 					if (rv = i_ldc_set_rx_head(ldcp,
3592 						curr_head))
3593 						break;
3594 				}
3595 			}
3596 
3597 			/* copy (next) pkt into buffer */
3598 			if (len <= (*sizep - bytes_read)) {
3599 				bcopy(msgbuf, target, len);
3600 				target += len;
3601 				bytes_read += len;
3602 			} else {
3603 				/*
3604 				 * there is not enough space in the buffer to
3605 				 * read this pkt. throw message away & continue
3606 				 * reading data from queue
3607 				 */
3608 				DWARN(DBG_ALL_LDCS,
3609 				    "ldc_read: (0x%llx) buffer too small, "
3610 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3611 				    curr_head, *sizep, bytes_read+len);
3612 
3613 				first_fragment = 0;
3614 				target = target_bufp;
3615 				bytes_read = 0;
3616 
3617 				/* throw away everything received so far */
3618 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3619 					break;
3620 
3621 				/* continue reading remaining pkts */
3622 				continue;
3623 			}
3624 		}
3625 
3626 		/* set the message id */
3627 		ldcp->last_msg_rcd = msg->seqid;
3628 
3629 		/* move the head one position */
3630 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3631 
3632 		if (msg->env & LDC_FRAG_STOP) {
3633 
3634 			/*
3635 			 * All pkts that are part of this fragmented transfer
3636 			 * have been read or this was a single pkt read
3637 			 * or there was an error
3638 			 */
3639 
3640 			/* set the queue head */
3641 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3642 				bytes_read = 0;
3643 
3644 			*sizep = bytes_read;
3645 
3646 			break;
3647 		}
3648 
3649 		/* advance head if it is a DATA ACK */
3650 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3651 
3652 			/* set the queue head */
3653 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3654 				bytes_read = 0;
3655 				break;
3656 			}
3657 
3658 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3659 			    ldcp->id, curr_head);
3660 		}
3661 
3662 	} /* for (;;) */
3663 
3664 
3665 	/*
3666 	 * If useful data was read - Send msg ACK
3667 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3668 	 */
3669 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3670 		ldcp->mode == LDC_MODE_STREAM)) {
3671 
3672 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3673 		if (rv && rv != EWOULDBLOCK) {
3674 			cmn_err(CE_NOTE,
3675 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3676 
3677 			/* if cannot send ACK - reset channel */
3678 			goto channel_is_reset;
3679 		}
3680 	}
3681 
3682 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3683 
3684 	return (rv);
3685 
3686 channel_is_reset:
3687 	mutex_enter(&ldcp->tx_lock);
3688 	i_ldc_reset(ldcp, B_FALSE);
3689 	mutex_exit(&ldcp->tx_lock);
3690 	return (ECONNRESET);
3691 }
3692 
3693 /*
3694  * Use underlying reliable packet mechanism to fetch
3695  * and buffer incoming packets so we can hand them back as
3696  * a basic byte stream.
3697  *
3698  * Enter and exit with ldcp->lock held by caller
3699  */
3700 static int
3701 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3702 {
3703 	int	rv;
3704 	size_t	size;
3705 
3706 	ASSERT(mutex_owned(&ldcp->lock));
3707 
3708 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3709 		ldcp->id, *sizep);
3710 
3711 	if (ldcp->stream_remains == 0) {
3712 		size = ldcp->mtu;
3713 		rv = i_ldc_read_packet(ldcp,
3714 			(caddr_t)ldcp->stream_bufferp, &size);
3715 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3716 			ldcp->id, size);
3717 
3718 		if (rv != 0)
3719 			return (rv);
3720 
3721 		ldcp->stream_remains = size;
3722 		ldcp->stream_offset = 0;
3723 	}
3724 
3725 	size = MIN(ldcp->stream_remains, *sizep);
3726 
3727 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3728 	ldcp->stream_offset += size;
3729 	ldcp->stream_remains -= size;
3730 
3731 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3732 		ldcp->id, size);
3733 
3734 	*sizep = size;
3735 	return (0);
3736 }
3737 
3738 /*
3739  * Write specified amount of bytes to the channel
3740  * in multiple pkts of pkt_payload size. Each
3741  * packet is tagged with an unique packet ID in
3742  * the case of a reliable link.
3743  *
3744  * On return, size contains the number of bytes written.
3745  */
3746 int
3747 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3748 {
3749 	ldc_chan_t	*ldcp;
3750 	int		rv = 0;
3751 
3752 	if (handle == NULL) {
3753 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3754 		return (EINVAL);
3755 	}
3756 	ldcp = (ldc_chan_t *)handle;
3757 
3758 	/* check if writes can occur */
3759 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3760 		/*
3761 		 * Could not get the lock - channel could
3762 		 * be in the process of being unconfigured
3763 		 * or reader has encountered an error
3764 		 */
3765 		return (EAGAIN);
3766 	}
3767 
3768 	/* check if non-zero data to write */
3769 	if (buf == NULL || sizep == NULL) {
3770 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3771 		    ldcp->id);
3772 		mutex_exit(&ldcp->tx_lock);
3773 		return (EINVAL);
3774 	}
3775 
3776 	if (*sizep == 0) {
3777 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3778 		    ldcp->id);
3779 		mutex_exit(&ldcp->tx_lock);
3780 		return (0);
3781 	}
3782 
3783 	/* Check if channel is UP for data exchange */
3784 	if (ldcp->tstate != TS_UP) {
3785 		DWARN(ldcp->id,
3786 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3787 		    ldcp->id);
3788 		*sizep = 0;
3789 		rv = ECONNRESET;
3790 	} else {
3791 		rv = ldcp->write_p(ldcp, buf, sizep);
3792 	}
3793 
3794 	mutex_exit(&ldcp->tx_lock);
3795 
3796 	return (rv);
3797 }
3798 
3799 /*
3800  * Write a raw packet to the channel
3801  * On return, size contains the number of bytes written.
3802  */
3803 static int
3804 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3805 {
3806 	ldc_msg_t 	*ldcmsg;
3807 	uint64_t 	tx_head, tx_tail, new_tail;
3808 	int		rv = 0;
3809 	size_t		size;
3810 
3811 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3812 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3813 
3814 	size = *sizep;
3815 
3816 	/*
3817 	 * Check to see if the packet size is less than or
3818 	 * equal to packet size support in raw mode
3819 	 */
3820 	if (size > ldcp->pkt_payload) {
3821 		DWARN(ldcp->id,
3822 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3823 		    ldcp->id, *sizep);
3824 		*sizep = 0;
3825 		return (EMSGSIZE);
3826 	}
3827 
3828 	/* get the qptrs for the tx queue */
3829 	rv = hv_ldc_tx_get_state(ldcp->id,
3830 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3831 	if (rv != 0) {
3832 		cmn_err(CE_WARN,
3833 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3834 		*sizep = 0;
3835 		return (EIO);
3836 	}
3837 
3838 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3839 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3840 		DWARN(ldcp->id,
3841 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3842 
3843 		*sizep = 0;
3844 		if (mutex_tryenter(&ldcp->lock)) {
3845 			i_ldc_reset(ldcp, B_FALSE);
3846 			mutex_exit(&ldcp->lock);
3847 		} else {
3848 			/*
3849 			 * Release Tx lock, and then reacquire channel
3850 			 * and Tx lock in correct order
3851 			 */
3852 			mutex_exit(&ldcp->tx_lock);
3853 			mutex_enter(&ldcp->lock);
3854 			mutex_enter(&ldcp->tx_lock);
3855 			i_ldc_reset(ldcp, B_FALSE);
3856 			mutex_exit(&ldcp->lock);
3857 		}
3858 		return (ECONNRESET);
3859 	}
3860 
3861 	tx_tail = ldcp->tx_tail;
3862 	tx_head = ldcp->tx_head;
3863 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3864 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3865 
3866 	if (new_tail == tx_head) {
3867 		DWARN(DBG_ALL_LDCS,
3868 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3869 		*sizep = 0;
3870 		return (EWOULDBLOCK);
3871 	}
3872 
3873 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3874 	    ldcp->id, size);
3875 
3876 	/* Send the data now */
3877 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3878 
3879 	/* copy the data into pkt */
3880 	bcopy((uint8_t *)buf, ldcmsg, size);
3881 
3882 	/* increment tail */
3883 	tx_tail = new_tail;
3884 
3885 	/*
3886 	 * All packets have been copied into the TX queue
3887 	 * update the tail ptr in the HV
3888 	 */
3889 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3890 	if (rv) {
3891 		if (rv == EWOULDBLOCK) {
3892 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3893 			    ldcp->id);
3894 			*sizep = 0;
3895 			return (EWOULDBLOCK);
3896 		}
3897 
3898 		*sizep = 0;
3899 		if (mutex_tryenter(&ldcp->lock)) {
3900 			i_ldc_reset(ldcp, B_FALSE);
3901 			mutex_exit(&ldcp->lock);
3902 		} else {
3903 			/*
3904 			 * Release Tx lock, and then reacquire channel
3905 			 * and Tx lock in correct order
3906 			 */
3907 			mutex_exit(&ldcp->tx_lock);
3908 			mutex_enter(&ldcp->lock);
3909 			mutex_enter(&ldcp->tx_lock);
3910 			i_ldc_reset(ldcp, B_FALSE);
3911 			mutex_exit(&ldcp->lock);
3912 		}
3913 		return (ECONNRESET);
3914 	}
3915 
3916 	ldcp->tx_tail = tx_tail;
3917 	*sizep = size;
3918 
3919 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3920 
3921 	return (rv);
3922 }
3923 
3924 
3925 /*
3926  * Write specified amount of bytes to the channel
3927  * in multiple pkts of pkt_payload size. Each
3928  * packet is tagged with an unique packet ID in
3929  * the case of a reliable link.
3930  *
3931  * On return, size contains the number of bytes written.
3932  * This function needs to ensure that the write size is < MTU size
3933  */
3934 static int
3935 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3936 {
3937 	ldc_msg_t 	*ldcmsg;
3938 	uint64_t 	tx_head, tx_tail, new_tail, start;
3939 	uint64_t	txq_size_mask, numavail;
3940 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3941 	size_t 		len, bytes_written = 0, remaining;
3942 	int		rv;
3943 	uint32_t	curr_seqid;
3944 
3945 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3946 
3947 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3948 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3949 		ldcp->mode == LDC_MODE_STREAM);
3950 
3951 	/* compute mask for increment */
3952 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3953 
3954 	/* get the qptrs for the tx queue */
3955 	rv = hv_ldc_tx_get_state(ldcp->id,
3956 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3957 	if (rv != 0) {
3958 		cmn_err(CE_WARN,
3959 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3960 		*size = 0;
3961 		return (EIO);
3962 	}
3963 
3964 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3965 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3966 		DWARN(ldcp->id,
3967 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3968 		*size = 0;
3969 		if (mutex_tryenter(&ldcp->lock)) {
3970 			i_ldc_reset(ldcp, B_FALSE);
3971 			mutex_exit(&ldcp->lock);
3972 		} else {
3973 			/*
3974 			 * Release Tx lock, and then reacquire channel
3975 			 * and Tx lock in correct order
3976 			 */
3977 			mutex_exit(&ldcp->tx_lock);
3978 			mutex_enter(&ldcp->lock);
3979 			mutex_enter(&ldcp->tx_lock);
3980 			i_ldc_reset(ldcp, B_FALSE);
3981 			mutex_exit(&ldcp->lock);
3982 		}
3983 		return (ECONNRESET);
3984 	}
3985 
3986 	tx_tail = ldcp->tx_tail;
3987 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3988 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3989 
3990 	/*
3991 	 * Link mode determines whether we use HV Tx head or the
3992 	 * private protocol head (corresponding to last ACKd pkt) for
3993 	 * determining how much we can write
3994 	 */
3995 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3996 		ldcp->mode == LDC_MODE_STREAM)
3997 		? ldcp->tx_ackd_head : ldcp->tx_head;
3998 	if (new_tail == tx_head) {
3999 		DWARN(DBG_ALL_LDCS,
4000 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4001 		*size = 0;
4002 		return (EWOULDBLOCK);
4003 	}
4004 
4005 	/*
4006 	 * Make sure that the LDC Tx queue has enough space
4007 	 */
4008 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4009 		+ ldcp->tx_q_entries - 1;
4010 	numavail %= ldcp->tx_q_entries;
4011 
4012 	if (*size > (numavail * ldcp->pkt_payload)) {
4013 		DWARN(DBG_ALL_LDCS,
4014 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4015 		return (EWOULDBLOCK);
4016 	}
4017 
4018 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4019 	    ldcp->id, *size);
4020 
4021 	/* Send the data now */
4022 	bytes_written = 0;
4023 	curr_seqid = ldcp->last_msg_snt;
4024 	start = tx_tail;
4025 
4026 	while (*size > bytes_written) {
4027 
4028 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4029 
4030 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
4031 			ldcp->mode == LDC_MODE_STREAM)
4032 			? ldcmsg->rdata : ldcmsg->udata);
4033 
4034 		ldcmsg->type = LDC_DATA;
4035 		ldcmsg->stype = LDC_INFO;
4036 		ldcmsg->ctrl = 0;
4037 
4038 		remaining = *size - bytes_written;
4039 		len = min(ldcp->pkt_payload, remaining);
4040 		ldcmsg->env = (uint8_t)len;
4041 
4042 		curr_seqid++;
4043 		ldcmsg->seqid = curr_seqid;
4044 
4045 		/* copy the data into pkt */
4046 		bcopy(source, msgbuf, len);
4047 
4048 		source += len;
4049 		bytes_written += len;
4050 
4051 		/* increment tail */
4052 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4053 
4054 		ASSERT(tx_tail != tx_head);
4055 	}
4056 
4057 	/* Set the start and stop bits */
4058 	ldcmsg->env |= LDC_FRAG_STOP;
4059 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4060 	ldcmsg->env |= LDC_FRAG_START;
4061 
4062 	/*
4063 	 * All packets have been copied into the TX queue
4064 	 * update the tail ptr in the HV
4065 	 */
4066 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4067 	if (rv == 0) {
4068 		ldcp->tx_tail = tx_tail;
4069 		ldcp->last_msg_snt = curr_seqid;
4070 		*size = bytes_written;
4071 	} else {
4072 		int rv2;
4073 
4074 		if (rv != EWOULDBLOCK) {
4075 			*size = 0;
4076 			if (mutex_tryenter(&ldcp->lock)) {
4077 				i_ldc_reset(ldcp, B_FALSE);
4078 				mutex_exit(&ldcp->lock);
4079 			} else {
4080 				/*
4081 				 * Release Tx lock, and then reacquire channel
4082 				 * and Tx lock in correct order
4083 				 */
4084 				mutex_exit(&ldcp->tx_lock);
4085 				mutex_enter(&ldcp->lock);
4086 				mutex_enter(&ldcp->tx_lock);
4087 				i_ldc_reset(ldcp, B_FALSE);
4088 				mutex_exit(&ldcp->lock);
4089 			}
4090 			return (ECONNRESET);
4091 		}
4092 
4093 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4094 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4095 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4096 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4097 
4098 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4099 		    &tx_head, &tx_tail, &ldcp->link_state);
4100 
4101 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4102 			"(head 0x%x, tail 0x%x state 0x%x)\n",
4103 			rv2, tx_head, tx_tail, ldcp->link_state);
4104 
4105 		*size = 0;
4106 	}
4107 
4108 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4109 
4110 	return (rv);
4111 }
4112 
4113 /*
4114  * Write specified amount of bytes to the channel
4115  * in multiple pkts of pkt_payload size. Each
4116  * packet is tagged with an unique packet ID in
4117  * the case of a reliable link.
4118  *
4119  * On return, size contains the number of bytes written.
4120  * This function needs to ensure that the write size is < MTU size
4121  */
4122 static int
4123 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4124 {
4125 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4126 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
4127 
4128 	/* Truncate packet to max of MTU size */
4129 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4130 	return (i_ldc_write_packet(ldcp, buf, sizep));
4131 }
4132 
4133 
4134 /*
4135  * Interfaces for channel nexus to register/unregister with LDC module
4136  * The nexus will register functions to be used to register individual
4137  * channels with the nexus and enable interrupts for the channels
4138  */
4139 int
4140 ldc_register(ldc_cnex_t *cinfo)
4141 {
4142 	ldc_chan_t	*ldcp;
4143 
4144 	if (cinfo == NULL || cinfo->dip == NULL ||
4145 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4146 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4147 	    cinfo->clr_intr == NULL) {
4148 
4149 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4150 		return (EINVAL);
4151 	}
4152 
4153 	mutex_enter(&ldcssp->lock);
4154 
4155 	/* nexus registration */
4156 	ldcssp->cinfo.dip = cinfo->dip;
4157 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4158 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4159 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4160 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4161 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4162 
4163 	/* register any channels that might have been previously initialized */
4164 	ldcp = ldcssp->chan_list;
4165 	while (ldcp) {
4166 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4167 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4168 			(void) i_ldc_register_channel(ldcp);
4169 
4170 		ldcp = ldcp->next;
4171 	}
4172 
4173 	mutex_exit(&ldcssp->lock);
4174 
4175 	return (0);
4176 }
4177 
4178 int
4179 ldc_unregister(ldc_cnex_t *cinfo)
4180 {
4181 	if (cinfo == NULL || cinfo->dip == NULL) {
4182 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4183 		return (EINVAL);
4184 	}
4185 
4186 	mutex_enter(&ldcssp->lock);
4187 
4188 	if (cinfo->dip != ldcssp->cinfo.dip) {
4189 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4190 		mutex_exit(&ldcssp->lock);
4191 		return (EINVAL);
4192 	}
4193 
4194 	/* nexus unregister */
4195 	ldcssp->cinfo.dip = NULL;
4196 	ldcssp->cinfo.reg_chan = NULL;
4197 	ldcssp->cinfo.unreg_chan = NULL;
4198 	ldcssp->cinfo.add_intr = NULL;
4199 	ldcssp->cinfo.rem_intr = NULL;
4200 	ldcssp->cinfo.clr_intr = NULL;
4201 
4202 	mutex_exit(&ldcssp->lock);
4203 
4204 	return (0);
4205 }
4206 
4207 
4208 /* ------------------------------------------------------------------------- */
4209 
4210 /*
4211  * Allocate a memory handle for the channel and link it into the list
4212  * Also choose which memory table to use if this is the first handle
4213  * being assigned to this channel
4214  */
4215 int
4216 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
4217 {
4218 	ldc_chan_t 	*ldcp;
4219 	ldc_mhdl_t	*mhdl;
4220 
4221 	if (handle == NULL) {
4222 		DWARN(DBG_ALL_LDCS,
4223 		    "ldc_mem_alloc_handle: invalid channel handle\n");
4224 		return (EINVAL);
4225 	}
4226 	ldcp = (ldc_chan_t *)handle;
4227 
4228 	mutex_enter(&ldcp->lock);
4229 
4230 	/* check to see if channel is initalized */
4231 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4232 		DWARN(ldcp->id,
4233 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
4234 		    ldcp->id);
4235 		mutex_exit(&ldcp->lock);
4236 		return (EINVAL);
4237 	}
4238 
4239 	/* allocate handle for channel */
4240 	mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP);
4241 
4242 	/* initialize the lock */
4243 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
4244 
4245 	mhdl->myshadow = B_FALSE;
4246 	mhdl->memseg = NULL;
4247 	mhdl->ldcp = ldcp;
4248 	mhdl->status = LDC_UNBOUND;
4249 
4250 	/* insert memory handle (@ head) into list */
4251 	if (ldcp->mhdl_list == NULL) {
4252 		ldcp->mhdl_list = mhdl;
4253 		mhdl->next = NULL;
4254 	} else {
4255 		/* insert @ head */
4256 		mhdl->next = ldcp->mhdl_list;
4257 		ldcp->mhdl_list = mhdl;
4258 	}
4259 
4260 	/* return the handle */
4261 	*mhandle = (ldc_mem_handle_t)mhdl;
4262 
4263 	mutex_exit(&ldcp->lock);
4264 
4265 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4266 	    ldcp->id, mhdl);
4267 
4268 	return (0);
4269 }
4270 
4271 /*
4272  * Free memory handle for the channel and unlink it from the list
4273  */
4274 int
4275 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4276 {
4277 	ldc_mhdl_t 	*mhdl, *phdl;
4278 	ldc_chan_t 	*ldcp;
4279 
4280 	if (mhandle == NULL) {
4281 		DWARN(DBG_ALL_LDCS,
4282 		    "ldc_mem_free_handle: invalid memory handle\n");
4283 		return (EINVAL);
4284 	}
4285 	mhdl = (ldc_mhdl_t *)mhandle;
4286 
4287 	mutex_enter(&mhdl->lock);
4288 
4289 	ldcp = mhdl->ldcp;
4290 
4291 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4292 		DWARN(ldcp->id,
4293 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4294 		    mhdl);
4295 		mutex_exit(&mhdl->lock);
4296 		return (EINVAL);
4297 	}
4298 	mutex_exit(&mhdl->lock);
4299 
4300 	mutex_enter(&ldcp->mlist_lock);
4301 
4302 	phdl = ldcp->mhdl_list;
4303 
4304 	/* first handle */
4305 	if (phdl == mhdl) {
4306 		ldcp->mhdl_list = mhdl->next;
4307 		mutex_destroy(&mhdl->lock);
4308 		kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4309 
4310 		D1(ldcp->id,
4311 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4312 		    ldcp->id, mhdl);
4313 	} else {
4314 		/* walk the list - unlink and free */
4315 		while (phdl != NULL) {
4316 			if (phdl->next == mhdl) {
4317 				phdl->next = mhdl->next;
4318 				mutex_destroy(&mhdl->lock);
4319 				kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4320 				D1(ldcp->id,
4321 				    "ldc_mem_free_handle: (0x%llx) freed "
4322 				    "handle 0x%llx\n", ldcp->id, mhdl);
4323 				break;
4324 			}
4325 			phdl = phdl->next;
4326 		}
4327 	}
4328 
4329 	if (phdl == NULL) {
4330 		DWARN(ldcp->id,
4331 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4332 		mutex_exit(&ldcp->mlist_lock);
4333 		return (EINVAL);
4334 	}
4335 
4336 	mutex_exit(&ldcp->mlist_lock);
4337 
4338 	return (0);
4339 }
4340 
4341 /*
4342  * Bind a memory handle to a virtual address.
4343  * The virtual address is converted to the corresponding real addresses.
4344  * Returns pointer to the first ldc_mem_cookie and the total number
4345  * of cookies for this virtual address. Other cookies can be obtained
4346  * using the ldc_mem_nextcookie() call. If the pages are stored in
4347  * consecutive locations in the table, a single cookie corresponding to
4348  * the first location is returned. The cookie size spans all the entries.
4349  *
4350  * If the VA corresponds to a page that is already being exported, reuse
4351  * the page and do not export it again. Bump the page's use count.
4352  */
4353 int
4354 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4355     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4356 {
4357 	ldc_mhdl_t	*mhdl;
4358 	ldc_chan_t 	*ldcp;
4359 	ldc_mtbl_t	*mtbl;
4360 	ldc_memseg_t	*memseg;
4361 	ldc_mte_t	tmp_mte;
4362 	uint64_t	index, prev_index = 0;
4363 	int64_t		cookie_idx;
4364 	uintptr_t	raddr, ra_aligned;
4365 	uint64_t	psize, poffset, v_offset;
4366 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4367 	pgcnt_t		npages;
4368 	caddr_t		v_align, addr;
4369 	int 		i, rv;
4370 
4371 	if (mhandle == NULL) {
4372 		DWARN(DBG_ALL_LDCS,
4373 		    "ldc_mem_bind_handle: invalid memory handle\n");
4374 		return (EINVAL);
4375 	}
4376 	mhdl = (ldc_mhdl_t *)mhandle;
4377 	ldcp = mhdl->ldcp;
4378 
4379 	/* clear count */
4380 	*ccount = 0;
4381 
4382 	mutex_enter(&mhdl->lock);
4383 
4384 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4385 		DWARN(ldcp->id,
4386 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4387 		    mhandle);
4388 		mutex_exit(&mhdl->lock);
4389 		return (EINVAL);
4390 	}
4391 
4392 	/* Force address and size to be 8-byte aligned */
4393 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4394 		DWARN(ldcp->id,
4395 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4396 		mutex_exit(&mhdl->lock);
4397 		return (EINVAL);
4398 	}
4399 
4400 	/*
4401 	 * If this channel is binding a memory handle for the
4402 	 * first time allocate it a memory map table and initialize it
4403 	 */
4404 	if ((mtbl = ldcp->mtbl) == NULL) {
4405 
4406 		mutex_enter(&ldcp->lock);
4407 
4408 		/* Allocate and initialize the map table structure */
4409 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
4410 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
4411 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
4412 		mtbl->next_entry = NULL;
4413 		mtbl->contigmem = B_TRUE;
4414 
4415 		/* Allocate the table itself */
4416 		mtbl->table = (ldc_mte_slot_t *)
4417 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
4418 		if (mtbl->table == NULL) {
4419 
4420 			/* allocate a page of memory using kmem_alloc */
4421 			mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP);
4422 			mtbl->size = MMU_PAGESIZE;
4423 			mtbl->contigmem = B_FALSE;
4424 			mtbl->num_entries = mtbl->num_avail =
4425 				mtbl->size / sizeof (ldc_mte_slot_t);
4426 			DWARN(ldcp->id,
4427 			    "ldc_mem_bind_handle: (0x%llx) reduced tbl size "
4428 			    "to %lx entries\n", ldcp->id, mtbl->num_entries);
4429 		}
4430 
4431 		/* zero out the memory */
4432 		bzero(mtbl->table, mtbl->size);
4433 
4434 		/* initialize the lock */
4435 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
4436 
4437 		/* register table for this channel */
4438 		rv = hv_ldc_set_map_table(ldcp->id,
4439 		    va_to_pa(mtbl->table), mtbl->num_entries);
4440 		if (rv != 0) {
4441 			cmn_err(CE_WARN,
4442 			    "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
4443 			    ldcp->id, rv);
4444 			if (mtbl->contigmem)
4445 				contig_mem_free(mtbl->table, mtbl->size);
4446 			else
4447 				kmem_free(mtbl->table, mtbl->size);
4448 			mutex_destroy(&mtbl->lock);
4449 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4450 			mutex_exit(&ldcp->lock);
4451 			mutex_exit(&mhdl->lock);
4452 			return (EIO);
4453 		}
4454 
4455 		ldcp->mtbl = mtbl;
4456 		mutex_exit(&ldcp->lock);
4457 
4458 		D1(ldcp->id,
4459 		    "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
4460 		    ldcp->id, ldcp->mtbl->table);
4461 	}
4462 
4463 	/* FUTURE: get the page size, pgsz code, and shift */
4464 	pg_size = MMU_PAGESIZE;
4465 	pg_size_code = page_szc(pg_size);
4466 	pg_shift = page_get_shift(pg_size_code);
4467 	pg_mask = ~(pg_size - 1);
4468 
4469 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4470 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4471 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4472 
4473 	/* aligned VA and its offset */
4474 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4475 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4476 
4477 	npages = (len+v_offset)/pg_size;
4478 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4479 
4480 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4481 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4482 	    ldcp->id, vaddr, v_align, v_offset, npages);
4483 
4484 	/* lock the memory table - exclusive access to channel */
4485 	mutex_enter(&mtbl->lock);
4486 
4487 	if (npages > mtbl->num_avail) {
4488 		D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4489 		    ldcp->id);
4490 		mutex_exit(&mtbl->lock);
4491 		mutex_exit(&mhdl->lock);
4492 		return (ENOMEM);
4493 	}
4494 
4495 	/* Allocate a memseg structure */
4496 	memseg = mhdl->memseg =
4497 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
4498 
4499 	/* Allocate memory to store all pages and cookies */
4500 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4501 	memseg->cookies =
4502 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4503 
4504 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4505 	    ldcp->id, npages);
4506 
4507 	addr = v_align;
4508 
4509 	/*
4510 	 * Check if direct shared memory map is enabled, if not change
4511 	 * the mapping type to include SHADOW_MAP.
4512 	 */
4513 	if (ldc_shmem_enabled == 0)
4514 		mtype = LDC_SHADOW_MAP;
4515 
4516 	/*
4517 	 * Table slots are used in a round-robin manner. The algorithm permits
4518 	 * inserting duplicate entries. Slots allocated earlier will typically
4519 	 * get freed before we get back to reusing the slot.Inserting duplicate
4520 	 * entries should be OK as we only lookup entries using the cookie addr
4521 	 * i.e. tbl index, during export, unexport and copy operation.
4522 	 *
4523 	 * One implementation what was tried was to search for a duplicate
4524 	 * page entry first and reuse it. The search overhead is very high and
4525 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4526 	 * So it does make sense to avoid searching for duplicates.
4527 	 *
4528 	 * But during the process of searching for a free slot, if we find a
4529 	 * duplicate entry we will go ahead and use it, and bump its use count.
4530 	 */
4531 
4532 	/* index to start searching from */
4533 	index = mtbl->next_entry;
4534 	cookie_idx = -1;
4535 
4536 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4537 
4538 	if (mtype & LDC_DIRECT_MAP) {
4539 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4540 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4541 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4542 	}
4543 
4544 	if (mtype & LDC_SHADOW_MAP) {
4545 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4546 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4547 	}
4548 
4549 	if (mtype & LDC_IO_MAP) {
4550 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4551 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4552 	}
4553 
4554 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4555 
4556 	tmp_mte.mte_pgszc = pg_size_code;
4557 
4558 	/* initialize each mem table entry */
4559 	for (i = 0; i < npages; i++) {
4560 
4561 		/* check if slot is available in the table */
4562 		while (mtbl->table[index].entry.ll != 0) {
4563 
4564 			index = (index + 1) % mtbl->num_entries;
4565 
4566 			if (index == mtbl->next_entry) {
4567 				/* we have looped around */
4568 				DWARN(DBG_ALL_LDCS,
4569 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4570 				    "entry\n", ldcp->id);
4571 				*ccount = 0;
4572 
4573 				/* NOTE: free memory, remove previous entries */
4574 				/* this shouldnt happen as num_avail was ok */
4575 
4576 				mutex_exit(&mtbl->lock);
4577 				mutex_exit(&mhdl->lock);
4578 				return (ENOMEM);
4579 			}
4580 		}
4581 
4582 		/* get the real address */
4583 		raddr = va_to_pa((void *)addr);
4584 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4585 
4586 		/* build the mte */
4587 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4588 
4589 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4590 
4591 		/* update entry in table */
4592 		mtbl->table[index].entry = tmp_mte;
4593 
4594 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4595 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4596 
4597 		/* calculate the size and offset for this export range */
4598 		if (i == 0) {
4599 			/* first page */
4600 			psize = min((pg_size - v_offset), len);
4601 			poffset = v_offset;
4602 
4603 		} else if (i == (npages - 1)) {
4604 			/* last page */
4605 			psize =	(((uintptr_t)(vaddr + len)) &
4606 				    ((uint64_t)(pg_size-1)));
4607 			if (psize == 0)
4608 				psize = pg_size;
4609 			poffset = 0;
4610 
4611 		} else {
4612 			/* middle pages */
4613 			psize = pg_size;
4614 			poffset = 0;
4615 		}
4616 
4617 		/* store entry for this page */
4618 		memseg->pages[i].index = index;
4619 		memseg->pages[i].raddr = raddr;
4620 		memseg->pages[i].offset = poffset;
4621 		memseg->pages[i].size = psize;
4622 		memseg->pages[i].mte = &(mtbl->table[index]);
4623 
4624 		/* create the cookie */
4625 		if (i == 0 || (index != prev_index + 1)) {
4626 			cookie_idx++;
4627 			memseg->cookies[cookie_idx].addr =
4628 				IDX2COOKIE(index, pg_size_code, pg_shift);
4629 			memseg->cookies[cookie_idx].addr |= poffset;
4630 			memseg->cookies[cookie_idx].size = psize;
4631 
4632 		} else {
4633 			memseg->cookies[cookie_idx].size += psize;
4634 		}
4635 
4636 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4637 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4638 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4639 		    ldcp->id, addr, index, raddr, psize, poffset);
4640 
4641 		/* decrement number of available entries */
4642 		mtbl->num_avail--;
4643 
4644 		/* increment va by page size */
4645 		addr += pg_size;
4646 
4647 		/* increment index */
4648 		prev_index = index;
4649 		index = (index + 1) % mtbl->num_entries;
4650 
4651 		/* save the next slot */
4652 		mtbl->next_entry = index;
4653 	}
4654 
4655 	mutex_exit(&mtbl->lock);
4656 
4657 	/* memory handle = bound */
4658 	mhdl->mtype = mtype;
4659 	mhdl->perm = perm;
4660 	mhdl->status = LDC_BOUND;
4661 
4662 	/* update memseg_t */
4663 	memseg->vaddr = vaddr;
4664 	memseg->raddr = memseg->pages[0].raddr;
4665 	memseg->size = len;
4666 	memseg->npages = npages;
4667 	memseg->ncookies = cookie_idx + 1;
4668 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4669 
4670 	/* return count and first cookie */
4671 	*ccount = memseg->ncookies;
4672 	cookie->addr = memseg->cookies[0].addr;
4673 	cookie->size = memseg->cookies[0].size;
4674 
4675 	D1(ldcp->id,
4676 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4677 	    "pgs=0x%llx cookies=0x%llx\n",
4678 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4679 
4680 	mutex_exit(&mhdl->lock);
4681 	return (0);
4682 }
4683 
4684 /*
4685  * Return the next cookie associated with the specified memory handle
4686  */
4687 int
4688 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4689 {
4690 	ldc_mhdl_t	*mhdl;
4691 	ldc_chan_t 	*ldcp;
4692 	ldc_memseg_t	*memseg;
4693 
4694 	if (mhandle == NULL) {
4695 		DWARN(DBG_ALL_LDCS,
4696 		    "ldc_mem_nextcookie: invalid memory handle\n");
4697 		return (EINVAL);
4698 	}
4699 	mhdl = (ldc_mhdl_t *)mhandle;
4700 
4701 	mutex_enter(&mhdl->lock);
4702 
4703 	ldcp = mhdl->ldcp;
4704 	memseg = mhdl->memseg;
4705 
4706 	if (cookie == 0) {
4707 		DWARN(ldcp->id,
4708 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4709 		    ldcp->id);
4710 		mutex_exit(&mhdl->lock);
4711 		return (EINVAL);
4712 	}
4713 
4714 	if (memseg->next_cookie != 0) {
4715 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4716 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4717 		memseg->next_cookie++;
4718 		if (memseg->next_cookie == memseg->ncookies)
4719 			memseg->next_cookie = 0;
4720 
4721 	} else {
4722 		DWARN(ldcp->id,
4723 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4724 		cookie->addr = 0;
4725 		cookie->size = 0;
4726 		mutex_exit(&mhdl->lock);
4727 		return (EINVAL);
4728 	}
4729 
4730 	D1(ldcp->id,
4731 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4732 	    ldcp->id, cookie->addr, cookie->size);
4733 
4734 	mutex_exit(&mhdl->lock);
4735 	return (0);
4736 }
4737 
4738 /*
4739  * Unbind the virtual memory region associated with the specified
4740  * memory handle. Allassociated cookies are freed and the corresponding
4741  * RA space is no longer exported.
4742  */
4743 int
4744 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4745 {
4746 	ldc_mhdl_t	*mhdl;
4747 	ldc_chan_t 	*ldcp;
4748 	ldc_mtbl_t	*mtbl;
4749 	ldc_memseg_t	*memseg;
4750 	uint64_t	cookie_addr;
4751 	uint64_t	pg_shift, pg_size_code;
4752 	int		i, rv;
4753 
4754 	if (mhandle == NULL) {
4755 		DWARN(DBG_ALL_LDCS,
4756 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4757 		return (EINVAL);
4758 	}
4759 	mhdl = (ldc_mhdl_t *)mhandle;
4760 
4761 	mutex_enter(&mhdl->lock);
4762 
4763 	if (mhdl->status == LDC_UNBOUND) {
4764 		DWARN(DBG_ALL_LDCS,
4765 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4766 		    mhandle);
4767 		mutex_exit(&mhdl->lock);
4768 		return (EINVAL);
4769 	}
4770 
4771 	ldcp = mhdl->ldcp;
4772 	mtbl = ldcp->mtbl;
4773 
4774 	memseg = mhdl->memseg;
4775 
4776 	/* lock the memory table - exclusive access to channel */
4777 	mutex_enter(&mtbl->lock);
4778 
4779 	/* undo the pages exported */
4780 	for (i = 0; i < memseg->npages; i++) {
4781 
4782 		/* check for mapped pages, revocation cookie != 0 */
4783 		if (memseg->pages[i].mte->cookie) {
4784 
4785 			pg_size_code = page_szc(memseg->pages[i].size);
4786 			pg_shift = page_get_shift(memseg->pages[i].size);
4787 			cookie_addr = IDX2COOKIE(memseg->pages[i].index,
4788 			    pg_size_code, pg_shift);
4789 
4790 			D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke "
4791 			    "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id,
4792 			    cookie_addr, memseg->pages[i].mte->cookie);
4793 			rv = hv_ldc_revoke(ldcp->id, cookie_addr,
4794 			    memseg->pages[i].mte->cookie);
4795 			if (rv) {
4796 				DWARN(ldcp->id,
4797 				    "ldc_mem_unbind_handle: (0x%llx) cannot "
4798 				    "revoke mapping, cookie %llx\n", ldcp->id,
4799 				    cookie_addr);
4800 			}
4801 		}
4802 
4803 		/* clear the entry from the table */
4804 		memseg->pages[i].mte->entry.ll = 0;
4805 		mtbl->num_avail++;
4806 	}
4807 	mutex_exit(&mtbl->lock);
4808 
4809 	/* free the allocated memseg and page structures */
4810 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4811 	kmem_free(memseg->cookies,
4812 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4813 	kmem_cache_free(ldcssp->memseg_cache, memseg);
4814 
4815 	/* uninitialize the memory handle */
4816 	mhdl->memseg = NULL;
4817 	mhdl->status = LDC_UNBOUND;
4818 
4819 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4820 	    ldcp->id, mhdl);
4821 
4822 	mutex_exit(&mhdl->lock);
4823 	return (0);
4824 }
4825 
4826 /*
4827  * Get information about the dring. The base address of the descriptor
4828  * ring along with the type and permission are returned back.
4829  */
4830 int
4831 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4832 {
4833 	ldc_mhdl_t	*mhdl;
4834 
4835 	if (mhandle == NULL) {
4836 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4837 		return (EINVAL);
4838 	}
4839 	mhdl = (ldc_mhdl_t *)mhandle;
4840 
4841 	if (minfo == NULL) {
4842 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4843 		return (EINVAL);
4844 	}
4845 
4846 	mutex_enter(&mhdl->lock);
4847 
4848 	minfo->status = mhdl->status;
4849 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4850 		minfo->vaddr = mhdl->memseg->vaddr;
4851 		minfo->raddr = mhdl->memseg->raddr;
4852 		minfo->mtype = mhdl->mtype;
4853 		minfo->perm = mhdl->perm;
4854 	}
4855 	mutex_exit(&mhdl->lock);
4856 
4857 	return (0);
4858 }
4859 
4860 /*
4861  * Copy data either from or to the client specified virtual address
4862  * space to or from the exported memory associated with the cookies.
4863  * The direction argument determines whether the data is read from or
4864  * written to exported memory.
4865  */
4866 int
4867 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4868     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4869 {
4870 	ldc_chan_t 	*ldcp;
4871 	uint64_t	local_voff, local_valign;
4872 	uint64_t	cookie_addr, cookie_size;
4873 	uint64_t	pg_shift, pg_size, pg_size_code;
4874 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4875 	uint64_t	local_ra, local_poff, local_psize;
4876 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4877 	pgcnt_t		npages;
4878 	size_t		len = *size;
4879 	int 		i, rv = 0;
4880 
4881 	uint64_t	chid;
4882 
4883 	if (handle == NULL) {
4884 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4885 		return (EINVAL);
4886 	}
4887 	ldcp = (ldc_chan_t *)handle;
4888 	chid = ldcp->id;
4889 
4890 	/* check to see if channel is UP */
4891 	if (ldcp->tstate != TS_UP) {
4892 		DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4893 		    chid);
4894 		return (ECONNRESET);
4895 	}
4896 
4897 	/* Force address and size to be 8-byte aligned */
4898 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4899 		DWARN(chid,
4900 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4901 		return (EINVAL);
4902 	}
4903 
4904 	/* Find the size of the exported memory */
4905 	export_size = 0;
4906 	for (i = 0; i < ccount; i++)
4907 		export_size += cookies[i].size;
4908 
4909 	/* check to see if offset is valid */
4910 	if (off > export_size) {
4911 		DWARN(chid,
4912 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4913 		    chid);
4914 		return (EINVAL);
4915 	}
4916 
4917 	/*
4918 	 * Check to see if the export size is smaller than the size we
4919 	 * are requesting to copy - if so flag an error
4920 	 */
4921 	if ((export_size - off) < *size) {
4922 		DWARN(chid,
4923 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4924 		    chid);
4925 		return (EINVAL);
4926 	}
4927 
4928 	total_bal = min(export_size, *size);
4929 
4930 	/* FUTURE: get the page size, pgsz code, and shift */
4931 	pg_size = MMU_PAGESIZE;
4932 	pg_size_code = page_szc(pg_size);
4933 	pg_shift = page_get_shift(pg_size_code);
4934 
4935 	D1(chid, "ldc_mem_copy: copying data "
4936 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4937 	    chid, vaddr, pg_size, pg_size_code, pg_shift);
4938 
4939 	/* aligned VA and its offset */
4940 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4941 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4942 
4943 	npages = (len+local_voff)/pg_size;
4944 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4945 
4946 	D1(chid,
4947 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4948 	    chid, vaddr, local_valign, local_voff, npages);
4949 
4950 	local_ra = va_to_pa((void *)local_valign);
4951 	local_poff = local_voff;
4952 	local_psize = min(len, (pg_size - local_voff));
4953 
4954 	len -= local_psize;
4955 
4956 	/*
4957 	 * find the first cookie in the list of cookies
4958 	 * if the offset passed in is not zero
4959 	 */
4960 	for (idx = 0; idx < ccount; idx++) {
4961 		cookie_size = cookies[idx].size;
4962 		if (off < cookie_size)
4963 			break;
4964 		off -= cookie_size;
4965 	}
4966 
4967 	cookie_addr = cookies[idx].addr + off;
4968 	cookie_size = cookies[idx].size - off;
4969 
4970 	export_caddr = cookie_addr & ~(pg_size - 1);
4971 	export_poff = cookie_addr & (pg_size - 1);
4972 	export_psize = min(cookie_size, (pg_size - export_poff));
4973 
4974 	for (;;) {
4975 
4976 		copy_size = min(export_psize, local_psize);
4977 
4978 		D1(chid,
4979 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4980 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4981 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4982 		    " total_bal=0x%llx\n",
4983 		    chid, direction, export_caddr, local_ra, export_poff,
4984 		    local_poff, export_psize, local_psize, copy_size,
4985 		    total_bal);
4986 
4987 		rv = hv_ldc_copy(chid, direction,
4988 		    (export_caddr + export_poff), (local_ra + local_poff),
4989 		    copy_size, &copied_len);
4990 
4991 		if (rv != 0) {
4992 			int 		error = EIO;
4993 			uint64_t	rx_hd, rx_tl;
4994 
4995 			DWARN(chid,
4996 			    "ldc_mem_copy: (0x%llx) err %d during copy\n",
4997 			    (unsigned long long)chid, rv);
4998 			DWARN(chid,
4999 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, "
5000 			    "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
5001 			    " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
5002 			    " copied_len=0x%lx, total_bal=0x%lx\n",
5003 			    chid, direction, export_caddr, local_ra,
5004 			    export_poff, local_poff, export_psize, local_psize,
5005 			    copy_size, copied_len, total_bal);
5006 
5007 			*size = *size - total_bal;
5008 
5009 			/*
5010 			 * check if reason for copy error was due to
5011 			 * a channel reset. we need to grab the lock
5012 			 * just in case we have to do a reset.
5013 			 */
5014 			mutex_enter(&ldcp->lock);
5015 			mutex_enter(&ldcp->tx_lock);
5016 
5017 			rv = hv_ldc_rx_get_state(ldcp->id,
5018 			    &rx_hd, &rx_tl, &(ldcp->link_state));
5019 			if (ldcp->link_state == LDC_CHANNEL_DOWN ||
5020 			    ldcp->link_state == LDC_CHANNEL_RESET) {
5021 				i_ldc_reset(ldcp, B_FALSE);
5022 				error = ECONNRESET;
5023 			}
5024 
5025 			mutex_exit(&ldcp->tx_lock);
5026 			mutex_exit(&ldcp->lock);
5027 
5028 			return (error);
5029 		}
5030 
5031 		ASSERT(copied_len <= copy_size);
5032 
5033 		D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len);
5034 		export_poff += copied_len;
5035 		local_poff += copied_len;
5036 		export_psize -= copied_len;
5037 		local_psize -= copied_len;
5038 		cookie_size -= copied_len;
5039 
5040 		total_bal -= copied_len;
5041 
5042 		if (copy_size != copied_len)
5043 			continue;
5044 
5045 		if (export_psize == 0 && total_bal != 0) {
5046 
5047 			if (cookie_size == 0) {
5048 				idx++;
5049 				cookie_addr = cookies[idx].addr;
5050 				cookie_size = cookies[idx].size;
5051 
5052 				export_caddr = cookie_addr & ~(pg_size - 1);
5053 				export_poff = cookie_addr & (pg_size - 1);
5054 				export_psize =
5055 					min(cookie_size, (pg_size-export_poff));
5056 			} else {
5057 				export_caddr += pg_size;
5058 				export_poff = 0;
5059 				export_psize = min(cookie_size, pg_size);
5060 			}
5061 		}
5062 
5063 		if (local_psize == 0 && total_bal != 0) {
5064 			local_valign += pg_size;
5065 			local_ra = va_to_pa((void *)local_valign);
5066 			local_poff = 0;
5067 			local_psize = min(pg_size, len);
5068 			len -= local_psize;
5069 		}
5070 
5071 		/* check if we are all done */
5072 		if (total_bal == 0)
5073 			break;
5074 	}
5075 
5076 
5077 	D1(chid,
5078 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
5079 	    chid, *size);
5080 
5081 	return (0);
5082 }
5083 
5084 /*
5085  * Copy data either from or to the client specified virtual address
5086  * space to or from HV physical memory.
5087  *
5088  * The direction argument determines whether the data is read from or
5089  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
5090  * to the ldc_mem_copy interface
5091  */
5092 int
5093 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size,
5094     caddr_t paddr, uint8_t direction)
5095 {
5096 	ldc_chan_t 	*ldcp;
5097 	uint64_t	local_voff, local_valign;
5098 	uint64_t	pg_shift, pg_size, pg_size_code;
5099 	uint64_t 	target_pa, target_poff, target_psize, target_size;
5100 	uint64_t	local_ra, local_poff, local_psize;
5101 	uint64_t	copy_size, copied_len = 0;
5102 	pgcnt_t		npages;
5103 	size_t		len = *size;
5104 	int 		rv = 0;
5105 
5106 	if (handle == NULL) {
5107 		DWARN(DBG_ALL_LDCS,
5108 		    "ldc_mem_rdwr_cookie: invalid channel handle\n");
5109 		return (EINVAL);
5110 	}
5111 	ldcp = (ldc_chan_t *)handle;
5112 
5113 	mutex_enter(&ldcp->lock);
5114 
5115 	/* check to see if channel is UP */
5116 	if (ldcp->tstate != TS_UP) {
5117 		DWARN(ldcp->id,
5118 		    "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n",
5119 		    ldcp->id);
5120 		mutex_exit(&ldcp->lock);
5121 		return (ECONNRESET);
5122 	}
5123 
5124 	/* Force address and size to be 8-byte aligned */
5125 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
5126 		DWARN(ldcp->id,
5127 		    "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n");
5128 		mutex_exit(&ldcp->lock);
5129 		return (EINVAL);
5130 	}
5131 
5132 	target_size = *size;
5133 
5134 	/* FUTURE: get the page size, pgsz code, and shift */
5135 	pg_size = MMU_PAGESIZE;
5136 	pg_size_code = page_szc(pg_size);
5137 	pg_shift = page_get_shift(pg_size_code);
5138 
5139 	D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data "
5140 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
5141 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
5142 
5143 	/* aligned VA and its offset */
5144 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
5145 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
5146 
5147 	npages = (len + local_voff) / pg_size;
5148 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
5149 
5150 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, "
5151 	    "val=0x%llx,off=0x%x,pgs=0x%x\n",
5152 	    ldcp->id, vaddr, local_valign, local_voff, npages);
5153 
5154 	local_ra = va_to_pa((void *)local_valign);
5155 	local_poff = local_voff;
5156 	local_psize = min(len, (pg_size - local_voff));
5157 
5158 	len -= local_psize;
5159 
5160 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
5161 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
5162 	target_psize = pg_size - target_poff;
5163 
5164 	for (;;) {
5165 
5166 		copy_size = min(target_psize, local_psize);
5167 
5168 		D1(ldcp->id,
5169 		    "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
5170 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
5171 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5172 		    " total_bal=0x%llx\n",
5173 		    ldcp->id, direction, target_pa, local_ra, target_poff,
5174 		    local_poff, target_psize, local_psize, copy_size,
5175 		    target_size);
5176 
5177 		rv = hv_ldc_copy(ldcp->id, direction,
5178 		    (target_pa + target_poff), (local_ra + local_poff),
5179 		    copy_size, &copied_len);
5180 
5181 		if (rv != 0) {
5182 			DWARN(DBG_ALL_LDCS,
5183 			    "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n",
5184 			    ldcp->id, rv);
5185 			DWARN(DBG_ALL_LDCS,
5186 			    "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, "
5187 			    "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, "
5188 			    "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, "
5189 			    "copy_sz=0x%llx, total_bal=0x%llx\n",
5190 			    ldcp->id, direction, target_pa, local_ra,
5191 			    target_poff, local_poff, target_psize, local_psize,
5192 			    copy_size, target_size);
5193 
5194 			*size = *size - target_size;
5195 			mutex_exit(&ldcp->lock);
5196 			return (i_ldc_h2v_error(rv));
5197 		}
5198 
5199 		D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n",
5200 		    copied_len);
5201 		target_poff += copied_len;
5202 		local_poff += copied_len;
5203 		target_psize -= copied_len;
5204 		local_psize -= copied_len;
5205 
5206 		target_size -= copied_len;
5207 
5208 		if (copy_size != copied_len)
5209 			continue;
5210 
5211 		if (target_psize == 0 && target_size != 0) {
5212 			target_pa += pg_size;
5213 			target_poff = 0;
5214 			target_psize = min(pg_size, target_size);
5215 		}
5216 
5217 		if (local_psize == 0 && target_size != 0) {
5218 			local_valign += pg_size;
5219 			local_ra = va_to_pa((void *)local_valign);
5220 			local_poff = 0;
5221 			local_psize = min(pg_size, len);
5222 			len -= local_psize;
5223 		}
5224 
5225 		/* check if we are all done */
5226 		if (target_size == 0)
5227 			break;
5228 	}
5229 
5230 	mutex_exit(&ldcp->lock);
5231 
5232 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n",
5233 	    ldcp->id, *size);
5234 
5235 	return (0);
5236 }
5237 
5238 /*
5239  * Map an exported memory segment into the local address space. If the
5240  * memory range was exported for direct map access, a HV call is made
5241  * to allocate a RA range. If the map is done via a shadow copy, local
5242  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
5243  * the mapping is a direct map then the RA is returned in 'raddr'.
5244  */
5245 int
5246 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
5247     uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr)
5248 {
5249 	int		i, j, idx, rv, retries;
5250 	ldc_chan_t 	*ldcp;
5251 	ldc_mhdl_t	*mhdl;
5252 	ldc_memseg_t	*memseg;
5253 	caddr_t		tmpaddr;
5254 	uint64_t	map_perm = perm;
5255 	uint64_t	pg_size, pg_shift, pg_size_code, pg_mask;
5256 	uint64_t	exp_size = 0, base_off, map_size, npages;
5257 	uint64_t	cookie_addr, cookie_off, cookie_size;
5258 	tte_t		ldc_tte;
5259 
5260 	if (mhandle == NULL) {
5261 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
5262 		return (EINVAL);
5263 	}
5264 	mhdl = (ldc_mhdl_t *)mhandle;
5265 
5266 	mutex_enter(&mhdl->lock);
5267 
5268 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
5269 	    mhdl->memseg != NULL) {
5270 		DWARN(DBG_ALL_LDCS,
5271 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
5272 		mutex_exit(&mhdl->lock);
5273 		return (EINVAL);
5274 	}
5275 
5276 	ldcp = mhdl->ldcp;
5277 
5278 	mutex_enter(&ldcp->lock);
5279 
5280 	if (ldcp->tstate != TS_UP) {
5281 		DWARN(ldcp->id,
5282 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
5283 		    ldcp->id);
5284 		mutex_exit(&ldcp->lock);
5285 		mutex_exit(&mhdl->lock);
5286 		return (ECONNRESET);
5287 	}
5288 
5289 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5290 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
5291 		mutex_exit(&ldcp->lock);
5292 		mutex_exit(&mhdl->lock);
5293 		return (EINVAL);
5294 	}
5295 
5296 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
5297 	    ldcp->id, cookie->addr, cookie->size);
5298 
5299 	/* FUTURE: get the page size, pgsz code, and shift */
5300 	pg_size = MMU_PAGESIZE;
5301 	pg_size_code = page_szc(pg_size);
5302 	pg_shift = page_get_shift(pg_size_code);
5303 	pg_mask = ~(pg_size - 1);
5304 
5305 	/* calculate the number of pages in the exported cookie */
5306 	base_off = cookie[0].addr & (pg_size - 1);
5307 	for (idx = 0; idx < ccount; idx++)
5308 		exp_size += cookie[idx].size;
5309 	map_size = P2ROUNDUP((exp_size + base_off), pg_size);
5310 	npages = (map_size >> pg_shift);
5311 
5312 	/* Allocate memseg structure */
5313 	memseg = mhdl->memseg =
5314 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
5315 
5316 	/* Allocate memory to store all pages and cookies */
5317 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
5318 	memseg->cookies =
5319 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
5320 
5321 	D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx,"
5322 	    "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages);
5323 
5324 	/*
5325 	 * Check if direct map over shared memory is enabled, if not change
5326 	 * the mapping type to SHADOW_MAP.
5327 	 */
5328 	if (ldc_shmem_enabled == 0)
5329 		mtype = LDC_SHADOW_MAP;
5330 
5331 	/*
5332 	 * Check to see if the client is requesting direct or shadow map
5333 	 * If direct map is requested, try to map remote memory first,
5334 	 * and if that fails, revert to shadow map
5335 	 */
5336 	if (mtype == LDC_DIRECT_MAP) {
5337 
5338 		/* Allocate kernel virtual space for mapping */
5339 		memseg->vaddr = vmem_xalloc(heap_arena, map_size,
5340 		    pg_size, 0, 0, NULL, NULL, VM_NOSLEEP);
5341 		if (memseg->vaddr == NULL) {
5342 			cmn_err(CE_WARN,
5343 			    "ldc_mem_map: (0x%lx) memory map failed\n",
5344 			    ldcp->id);
5345 			kmem_free(memseg->cookies,
5346 			    (sizeof (ldc_mem_cookie_t) * ccount));
5347 			kmem_free(memseg->pages,
5348 			    (sizeof (ldc_page_t) * npages));
5349 			kmem_cache_free(ldcssp->memseg_cache, memseg);
5350 
5351 			mutex_exit(&ldcp->lock);
5352 			mutex_exit(&mhdl->lock);
5353 			return (ENOMEM);
5354 		}
5355 
5356 		/* Unload previous mapping */
5357 		hat_unload(kas.a_hat, memseg->vaddr, map_size,
5358 		    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5359 
5360 		/* for each cookie passed in - map into address space */
5361 		idx = 0;
5362 		cookie_size = 0;
5363 		tmpaddr = memseg->vaddr;
5364 
5365 		for (i = 0; i < npages; i++) {
5366 
5367 			if (cookie_size == 0) {
5368 				ASSERT(idx < ccount);
5369 				cookie_addr = cookie[idx].addr & pg_mask;
5370 				cookie_off = cookie[idx].addr & (pg_size - 1);
5371 				cookie_size =
5372 				    P2ROUNDUP((cookie_off + cookie[idx].size),
5373 					pg_size);
5374 				idx++;
5375 			}
5376 
5377 			D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping "
5378 			    "cookie 0x%llx, bal=0x%llx\n", ldcp->id,
5379 			    cookie_addr, cookie_size);
5380 
5381 			/* map the cookie into address space */
5382 			for (retries = 0; retries < ldc_max_retries;
5383 			    retries++) {
5384 
5385 				rv = hv_ldc_mapin(ldcp->id, cookie_addr,
5386 				    &memseg->pages[i].raddr, &map_perm);
5387 				if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY)
5388 					break;
5389 
5390 				drv_usecwait(ldc_delay);
5391 			}
5392 
5393 			if (rv || memseg->pages[i].raddr == 0) {
5394 				DWARN(ldcp->id,
5395 				    "ldc_mem_map: (0x%llx) hv mapin err %d\n",
5396 				    ldcp->id, rv);
5397 
5398 				/* remove previous mapins */
5399 				hat_unload(kas.a_hat, memseg->vaddr, map_size,
5400 				    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5401 				for (j = 0; j < i; j++) {
5402 					rv = hv_ldc_unmap(
5403 							memseg->pages[j].raddr);
5404 					if (rv) {
5405 						DWARN(ldcp->id,
5406 						    "ldc_mem_map: (0x%llx) "
5407 						    "cannot unmap ra=0x%llx\n",
5408 					    ldcp->id,
5409 						    memseg->pages[j].raddr);
5410 					}
5411 				}
5412 
5413 				/* free kernel virtual space */
5414 				vmem_free(heap_arena, (void *)memseg->vaddr,
5415 				    map_size);
5416 
5417 				/* direct map failed - revert to shadow map */
5418 				mtype = LDC_SHADOW_MAP;
5419 				break;
5420 
5421 			} else {
5422 
5423 				D1(ldcp->id,
5424 				    "ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
5425 				    "0x%llx, cookie=0x%llx, perm=0x%llx\n",
5426 				    ldcp->id, tmpaddr, memseg->pages[i].raddr,
5427 				    cookie_addr, perm);
5428 
5429 				/*
5430 				 * NOTE: Calling hat_devload directly, causes it
5431 				 * to look for page_t using the pfn. Since this
5432 				 * addr is greater than the memlist, it treates
5433 				 * it as non-memory
5434 				 */
5435 				sfmmu_memtte(&ldc_tte,
5436 				    (pfn_t)(memseg->pages[i].raddr >> pg_shift),
5437 				    PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K);
5438 
5439 				D1(ldcp->id,
5440 				    "ldc_mem_map: (0x%llx) ra 0x%llx -> "
5441 				    "tte 0x%llx\n", ldcp->id,
5442 				    memseg->pages[i].raddr, ldc_tte);
5443 
5444 				sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr,
5445 				    NULL, HAT_LOAD_LOCK);
5446 
5447 				cookie_size -= pg_size;
5448 				cookie_addr += pg_size;
5449 				tmpaddr += pg_size;
5450 			}
5451 		}
5452 	}
5453 
5454 	if (mtype == LDC_SHADOW_MAP) {
5455 		if (*vaddr == NULL) {
5456 			memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP);
5457 			mhdl->myshadow = B_TRUE;
5458 
5459 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
5460 			    "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr);
5461 		} else {
5462 			/*
5463 			 * Use client supplied memory for memseg->vaddr
5464 			 * WARNING: assuming that client mem is >= exp_size
5465 			 */
5466 			memseg->vaddr = *vaddr;
5467 		}
5468 
5469 		/* Save all page and cookie information */
5470 		for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) {
5471 			memseg->pages[i].raddr = va_to_pa(tmpaddr);
5472 			memseg->pages[i].size = pg_size;
5473 			tmpaddr += pg_size;
5474 		}
5475 
5476 	}
5477 
5478 	/* save all cookies */
5479 	bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t));
5480 
5481 	/* update memseg_t */
5482 	memseg->raddr = memseg->pages[0].raddr;
5483 	memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size;
5484 	memseg->npages = npages;
5485 	memseg->ncookies = ccount;
5486 	memseg->next_cookie = 0;
5487 
5488 	/* memory handle = mapped */
5489 	mhdl->mtype = mtype;
5490 	mhdl->perm = perm;
5491 	mhdl->status = LDC_MAPPED;
5492 
5493 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5494 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5495 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5496 	    memseg->npages, memseg->ncookies);
5497 
5498 	if (mtype == LDC_SHADOW_MAP)
5499 		base_off = 0;
5500 	if (raddr)
5501 		*raddr = (caddr_t)(memseg->raddr | base_off);
5502 	if (vaddr)
5503 		*vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off);
5504 
5505 	mutex_exit(&ldcp->lock);
5506 	mutex_exit(&mhdl->lock);
5507 	return (0);
5508 }
5509 
5510 /*
5511  * Unmap a memory segment. Free shadow memory (if any).
5512  */
5513 int
5514 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5515 {
5516 	int		i, rv;
5517 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5518 	ldc_chan_t 	*ldcp;
5519 	ldc_memseg_t	*memseg;
5520 
5521 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5522 		DWARN(DBG_ALL_LDCS,
5523 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5524 		    mhandle);
5525 		return (EINVAL);
5526 	}
5527 
5528 	mutex_enter(&mhdl->lock);
5529 
5530 	ldcp = mhdl->ldcp;
5531 	memseg = mhdl->memseg;
5532 
5533 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5534 	    ldcp->id, mhdl);
5535 
5536 	/* if we allocated shadow memory - free it */
5537 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5538 		kmem_free(memseg->vaddr, memseg->size);
5539 	} else if (mhdl->mtype == LDC_DIRECT_MAP) {
5540 
5541 		/* unmap in the case of DIRECT_MAP */
5542 		hat_unload(kas.a_hat, memseg->vaddr, memseg->size,
5543 		    HAT_UNLOAD_UNLOCK);
5544 
5545 		for (i = 0; i < memseg->npages; i++) {
5546 			rv = hv_ldc_unmap(memseg->pages[i].raddr);
5547 			if (rv) {
5548 				cmn_err(CE_WARN,
5549 				    "ldc_mem_map: (0x%lx) hv unmap err %d\n",
5550 				    ldcp->id, rv);
5551 			}
5552 		}
5553 
5554 		vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size);
5555 	}
5556 
5557 	/* free the allocated memseg and page structures */
5558 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5559 	kmem_free(memseg->cookies,
5560 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5561 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5562 
5563 	/* uninitialize the memory handle */
5564 	mhdl->memseg = NULL;
5565 	mhdl->status = LDC_UNBOUND;
5566 
5567 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5568 	    ldcp->id, mhdl);
5569 
5570 	mutex_exit(&mhdl->lock);
5571 	return (0);
5572 }
5573 
5574 /*
5575  * Internal entry point for LDC mapped memory entry consistency
5576  * semantics. Acquire copies the contents of the remote memory
5577  * into the local shadow copy. The release operation copies the local
5578  * contents into the remote memory. The offset and size specify the
5579  * bounds for the memory range being synchronized.
5580  */
5581 static int
5582 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5583     uint64_t offset, size_t size)
5584 {
5585 	int 		err;
5586 	ldc_mhdl_t	*mhdl;
5587 	ldc_chan_t	*ldcp;
5588 	ldc_memseg_t	*memseg;
5589 	caddr_t		local_vaddr;
5590 	size_t		copy_size;
5591 
5592 	if (mhandle == NULL) {
5593 		DWARN(DBG_ALL_LDCS,
5594 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5595 		return (EINVAL);
5596 	}
5597 	mhdl = (ldc_mhdl_t *)mhandle;
5598 
5599 	mutex_enter(&mhdl->lock);
5600 
5601 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5602 		DWARN(DBG_ALL_LDCS,
5603 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5604 		mutex_exit(&mhdl->lock);
5605 		return (EINVAL);
5606 	}
5607 
5608 	/* do nothing for direct map */
5609 	if (mhdl->mtype == LDC_DIRECT_MAP) {
5610 		mutex_exit(&mhdl->lock);
5611 		return (0);
5612 	}
5613 
5614 	/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
5615 	if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) ||
5616 	    (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) {
5617 		mutex_exit(&mhdl->lock);
5618 		return (0);
5619 	}
5620 
5621 	if (offset >= mhdl->memseg->size ||
5622 	    (offset + size) > mhdl->memseg->size) {
5623 		DWARN(DBG_ALL_LDCS,
5624 		    "i_ldc_mem_acquire_release: memory out of range\n");
5625 		mutex_exit(&mhdl->lock);
5626 		return (EINVAL);
5627 	}
5628 
5629 	/* get the channel handle and memory segment */
5630 	ldcp = mhdl->ldcp;
5631 	memseg = mhdl->memseg;
5632 
5633 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5634 
5635 		local_vaddr = memseg->vaddr + offset;
5636 		copy_size = size;
5637 
5638 		/* copy to/from remote from/to local memory */
5639 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5640 		    &copy_size, memseg->cookies, memseg->ncookies,
5641 		    direction);
5642 		if (err || copy_size != size) {
5643 			DWARN(ldcp->id,
5644 			    "i_ldc_mem_acquire_release: copy failed\n");
5645 			mutex_exit(&mhdl->lock);
5646 			return (err);
5647 		}
5648 	}
5649 
5650 	mutex_exit(&mhdl->lock);
5651 
5652 	return (0);
5653 }
5654 
5655 /*
5656  * Ensure that the contents in the remote memory seg are consistent
5657  * with the contents if of local segment
5658  */
5659 int
5660 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5661 {
5662 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5663 }
5664 
5665 
5666 /*
5667  * Ensure that the contents in the local memory seg are consistent
5668  * with the contents if of remote segment
5669  */
5670 int
5671 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5672 {
5673 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5674 }
5675 
5676 /*
5677  * Allocate a descriptor ring. The size of each each descriptor
5678  * must be 8-byte aligned and the entire ring should be a multiple
5679  * of MMU_PAGESIZE.
5680  */
5681 int
5682 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5683 {
5684 	ldc_dring_t *dringp;
5685 	size_t size = (dsize * len);
5686 
5687 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5688 	    len, dsize);
5689 
5690 	if (dhandle == NULL) {
5691 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5692 		return (EINVAL);
5693 	}
5694 
5695 	if (len == 0) {
5696 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5697 		return (EINVAL);
5698 	}
5699 
5700 	/* descriptor size should be 8-byte aligned */
5701 	if (dsize == 0 || (dsize & 0x7)) {
5702 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5703 		return (EINVAL);
5704 	}
5705 
5706 	*dhandle = 0;
5707 
5708 	/* Allocate a desc ring structure */
5709 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5710 
5711 	/* Initialize dring */
5712 	dringp->length = len;
5713 	dringp->dsize = dsize;
5714 
5715 	/* round off to multiple of pagesize */
5716 	dringp->size = (size & MMU_PAGEMASK);
5717 	if (size & MMU_PAGEOFFSET)
5718 		dringp->size += MMU_PAGESIZE;
5719 
5720 	dringp->status = LDC_UNBOUND;
5721 
5722 	/* allocate descriptor ring memory */
5723 	dringp->base = kmem_zalloc(dringp->size, KM_SLEEP);
5724 
5725 	/* initialize the desc ring lock */
5726 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5727 
5728 	/* Add descriptor ring to the head of global list */
5729 	mutex_enter(&ldcssp->lock);
5730 	dringp->next = ldcssp->dring_list;
5731 	ldcssp->dring_list = dringp;
5732 	mutex_exit(&ldcssp->lock);
5733 
5734 	*dhandle = (ldc_dring_handle_t)dringp;
5735 
5736 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5737 
5738 	return (0);
5739 }
5740 
5741 
5742 /*
5743  * Destroy a descriptor ring.
5744  */
5745 int
5746 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5747 {
5748 	ldc_dring_t *dringp;
5749 	ldc_dring_t *tmp_dringp;
5750 
5751 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5752 
5753 	if (dhandle == NULL) {
5754 		DWARN(DBG_ALL_LDCS,
5755 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5756 		return (EINVAL);
5757 	}
5758 	dringp = (ldc_dring_t *)dhandle;
5759 
5760 	if (dringp->status == LDC_BOUND) {
5761 		DWARN(DBG_ALL_LDCS,
5762 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5763 		return (EACCES);
5764 	}
5765 
5766 	mutex_enter(&dringp->lock);
5767 	mutex_enter(&ldcssp->lock);
5768 
5769 	/* remove from linked list - if not bound */
5770 	tmp_dringp = ldcssp->dring_list;
5771 	if (tmp_dringp == dringp) {
5772 		ldcssp->dring_list = dringp->next;
5773 		dringp->next = NULL;
5774 
5775 	} else {
5776 		while (tmp_dringp != NULL) {
5777 			if (tmp_dringp->next == dringp) {
5778 				tmp_dringp->next = dringp->next;
5779 				dringp->next = NULL;
5780 				break;
5781 			}
5782 			tmp_dringp = tmp_dringp->next;
5783 		}
5784 		if (tmp_dringp == NULL) {
5785 			DWARN(DBG_ALL_LDCS,
5786 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5787 			mutex_exit(&ldcssp->lock);
5788 			mutex_exit(&dringp->lock);
5789 			return (EINVAL);
5790 		}
5791 	}
5792 
5793 	mutex_exit(&ldcssp->lock);
5794 
5795 	/* free the descriptor ring */
5796 	kmem_free(dringp->base, dringp->size);
5797 
5798 	mutex_exit(&dringp->lock);
5799 
5800 	/* destroy dring lock */
5801 	mutex_destroy(&dringp->lock);
5802 
5803 	/* free desc ring object */
5804 	kmem_free(dringp, sizeof (ldc_dring_t));
5805 
5806 	return (0);
5807 }
5808 
5809 /*
5810  * Bind a previously allocated dring to a channel. The channel should
5811  * be OPEN in order to bind the ring to the channel. Returns back a
5812  * descriptor ring cookie. The descriptor ring is exported for remote
5813  * access by the client at the other end of the channel. An entry for
5814  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5815  */
5816 int
5817 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5818     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5819 {
5820 	int		err;
5821 	ldc_chan_t 	*ldcp;
5822 	ldc_dring_t	*dringp;
5823 	ldc_mem_handle_t mhandle;
5824 
5825 	/* check to see if channel is initalized */
5826 	if (handle == NULL) {
5827 		DWARN(DBG_ALL_LDCS,
5828 		    "ldc_mem_dring_bind: invalid channel handle\n");
5829 		return (EINVAL);
5830 	}
5831 	ldcp = (ldc_chan_t *)handle;
5832 
5833 	if (dhandle == NULL) {
5834 		DWARN(DBG_ALL_LDCS,
5835 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5836 		return (EINVAL);
5837 	}
5838 	dringp = (ldc_dring_t *)dhandle;
5839 
5840 	if (cookie == NULL) {
5841 		DWARN(ldcp->id,
5842 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5843 		return (EINVAL);
5844 	}
5845 
5846 	mutex_enter(&dringp->lock);
5847 
5848 	if (dringp->status == LDC_BOUND) {
5849 		DWARN(DBG_ALL_LDCS,
5850 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5851 		    ldcp->id);
5852 		mutex_exit(&dringp->lock);
5853 		return (EINVAL);
5854 	}
5855 
5856 	if ((perm & LDC_MEM_RW) == 0) {
5857 		DWARN(DBG_ALL_LDCS,
5858 		    "ldc_mem_dring_bind: invalid permissions\n");
5859 		mutex_exit(&dringp->lock);
5860 		return (EINVAL);
5861 	}
5862 
5863 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5864 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5865 		mutex_exit(&dringp->lock);
5866 		return (EINVAL);
5867 	}
5868 
5869 	dringp->ldcp = ldcp;
5870 
5871 	/* create an memory handle */
5872 	err = ldc_mem_alloc_handle(handle, &mhandle);
5873 	if (err || mhandle == NULL) {
5874 		DWARN(DBG_ALL_LDCS,
5875 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5876 		    ldcp->id);
5877 		mutex_exit(&dringp->lock);
5878 		return (err);
5879 	}
5880 	dringp->mhdl = mhandle;
5881 
5882 	/* bind the descriptor ring to channel */
5883 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5884 	    mtype, perm, cookie, ccount);
5885 	if (err) {
5886 		DWARN(ldcp->id,
5887 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5888 		    ldcp->id);
5889 		mutex_exit(&dringp->lock);
5890 		return (err);
5891 	}
5892 
5893 	/*
5894 	 * For now return error if we get more than one cookie
5895 	 * FUTURE: Return multiple cookies ..
5896 	 */
5897 	if (*ccount > 1) {
5898 		(void) ldc_mem_unbind_handle(mhandle);
5899 		(void) ldc_mem_free_handle(mhandle);
5900 
5901 		dringp->ldcp = NULL;
5902 		dringp->mhdl = NULL;
5903 		*ccount = 0;
5904 
5905 		mutex_exit(&dringp->lock);
5906 		return (EAGAIN);
5907 	}
5908 
5909 	/* Add descriptor ring to channel's exported dring list */
5910 	mutex_enter(&ldcp->exp_dlist_lock);
5911 	dringp->ch_next = ldcp->exp_dring_list;
5912 	ldcp->exp_dring_list = dringp;
5913 	mutex_exit(&ldcp->exp_dlist_lock);
5914 
5915 	dringp->status = LDC_BOUND;
5916 
5917 	mutex_exit(&dringp->lock);
5918 
5919 	return (0);
5920 }
5921 
5922 /*
5923  * Return the next cookie associated with the specified dring handle
5924  */
5925 int
5926 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5927 {
5928 	int		rv = 0;
5929 	ldc_dring_t 	*dringp;
5930 	ldc_chan_t	*ldcp;
5931 
5932 	if (dhandle == NULL) {
5933 		DWARN(DBG_ALL_LDCS,
5934 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5935 		return (EINVAL);
5936 	}
5937 	dringp = (ldc_dring_t *)dhandle;
5938 	mutex_enter(&dringp->lock);
5939 
5940 	if (dringp->status != LDC_BOUND) {
5941 		DWARN(DBG_ALL_LDCS,
5942 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5943 		    "is not bound\n", dringp);
5944 		mutex_exit(&dringp->lock);
5945 		return (EINVAL);
5946 	}
5947 
5948 	ldcp = dringp->ldcp;
5949 
5950 	if (cookie == NULL) {
5951 		DWARN(ldcp->id,
5952 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5953 		    ldcp->id);
5954 		mutex_exit(&dringp->lock);
5955 		return (EINVAL);
5956 	}
5957 
5958 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5959 	mutex_exit(&dringp->lock);
5960 
5961 	return (rv);
5962 }
5963 /*
5964  * Unbind a previously bound dring from a channel.
5965  */
5966 int
5967 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5968 {
5969 	ldc_dring_t 	*dringp;
5970 	ldc_dring_t	*tmp_dringp;
5971 	ldc_chan_t	*ldcp;
5972 
5973 	if (dhandle == NULL) {
5974 		DWARN(DBG_ALL_LDCS,
5975 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5976 		return (EINVAL);
5977 	}
5978 	dringp = (ldc_dring_t *)dhandle;
5979 
5980 	mutex_enter(&dringp->lock);
5981 
5982 	if (dringp->status == LDC_UNBOUND) {
5983 		DWARN(DBG_ALL_LDCS,
5984 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5985 		    dringp);
5986 		mutex_exit(&dringp->lock);
5987 		return (EINVAL);
5988 	}
5989 	ldcp = dringp->ldcp;
5990 
5991 	mutex_enter(&ldcp->exp_dlist_lock);
5992 
5993 	tmp_dringp = ldcp->exp_dring_list;
5994 	if (tmp_dringp == dringp) {
5995 		ldcp->exp_dring_list = dringp->ch_next;
5996 		dringp->ch_next = NULL;
5997 
5998 	} else {
5999 		while (tmp_dringp != NULL) {
6000 			if (tmp_dringp->ch_next == dringp) {
6001 				tmp_dringp->ch_next = dringp->ch_next;
6002 				dringp->ch_next = NULL;
6003 				break;
6004 			}
6005 			tmp_dringp = tmp_dringp->ch_next;
6006 		}
6007 		if (tmp_dringp == NULL) {
6008 			DWARN(DBG_ALL_LDCS,
6009 			    "ldc_mem_dring_unbind: invalid descriptor\n");
6010 			mutex_exit(&ldcp->exp_dlist_lock);
6011 			mutex_exit(&dringp->lock);
6012 			return (EINVAL);
6013 		}
6014 	}
6015 
6016 	mutex_exit(&ldcp->exp_dlist_lock);
6017 
6018 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
6019 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6020 
6021 	dringp->ldcp = NULL;
6022 	dringp->mhdl = NULL;
6023 	dringp->status = LDC_UNBOUND;
6024 
6025 	mutex_exit(&dringp->lock);
6026 
6027 	return (0);
6028 }
6029 
6030 /*
6031  * Get information about the dring. The base address of the descriptor
6032  * ring along with the type and permission are returned back.
6033  */
6034 int
6035 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
6036 {
6037 	ldc_dring_t	*dringp;
6038 	int		rv;
6039 
6040 	if (dhandle == NULL) {
6041 		DWARN(DBG_ALL_LDCS,
6042 		    "ldc_mem_dring_info: invalid desc ring handle\n");
6043 		return (EINVAL);
6044 	}
6045 	dringp = (ldc_dring_t *)dhandle;
6046 
6047 	mutex_enter(&dringp->lock);
6048 
6049 	if (dringp->mhdl) {
6050 		rv = ldc_mem_info(dringp->mhdl, minfo);
6051 		if (rv) {
6052 			DWARN(DBG_ALL_LDCS,
6053 			    "ldc_mem_dring_info: error reading mem info\n");
6054 			mutex_exit(&dringp->lock);
6055 			return (rv);
6056 		}
6057 	} else {
6058 		minfo->vaddr = dringp->base;
6059 		minfo->raddr = NULL;
6060 		minfo->status = dringp->status;
6061 	}
6062 
6063 	mutex_exit(&dringp->lock);
6064 
6065 	return (0);
6066 }
6067 
6068 /*
6069  * Map an exported descriptor ring into the local address space. If the
6070  * descriptor ring was exported for direct map access, a HV call is made
6071  * to allocate a RA range. If the map is done via a shadow copy, local
6072  * shadow memory is allocated.
6073  */
6074 int
6075 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
6076     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
6077     ldc_dring_handle_t *dhandle)
6078 {
6079 	int		err;
6080 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
6081 	ldc_mem_handle_t mhandle;
6082 	ldc_dring_t	*dringp;
6083 	size_t		dring_size;
6084 
6085 	if (dhandle == NULL) {
6086 		DWARN(DBG_ALL_LDCS,
6087 		    "ldc_mem_dring_map: invalid dhandle\n");
6088 		return (EINVAL);
6089 	}
6090 
6091 	/* check to see if channel is initalized */
6092 	if (handle == NULL) {
6093 		DWARN(DBG_ALL_LDCS,
6094 		    "ldc_mem_dring_map: invalid channel handle\n");
6095 		return (EINVAL);
6096 	}
6097 	ldcp = (ldc_chan_t *)handle;
6098 
6099 	if (cookie == NULL) {
6100 		DWARN(ldcp->id,
6101 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
6102 		    ldcp->id);
6103 		return (EINVAL);
6104 	}
6105 
6106 	/* FUTURE: For now we support only one cookie per dring */
6107 	ASSERT(ccount == 1);
6108 
6109 	if (cookie->size < (dsize * len)) {
6110 		DWARN(ldcp->id,
6111 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
6112 		    ldcp->id);
6113 		return (EINVAL);
6114 	}
6115 
6116 	*dhandle = 0;
6117 
6118 	/* Allocate an dring structure */
6119 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
6120 
6121 	D1(ldcp->id,
6122 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
6123 	    mtype, len, dsize, cookie->addr, cookie->size);
6124 
6125 	/* Initialize dring */
6126 	dringp->length = len;
6127 	dringp->dsize = dsize;
6128 
6129 	/* round of to multiple of page size */
6130 	dring_size = len * dsize;
6131 	dringp->size = (dring_size & MMU_PAGEMASK);
6132 	if (dring_size & MMU_PAGEOFFSET)
6133 		dringp->size += MMU_PAGESIZE;
6134 
6135 	dringp->ldcp = ldcp;
6136 
6137 	/* create an memory handle */
6138 	err = ldc_mem_alloc_handle(handle, &mhandle);
6139 	if (err || mhandle == NULL) {
6140 		DWARN(DBG_ALL_LDCS,
6141 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
6142 		    err);
6143 		kmem_free(dringp, sizeof (ldc_dring_t));
6144 		return (ENOMEM);
6145 	}
6146 
6147 	dringp->mhdl = mhandle;
6148 	dringp->base = NULL;
6149 
6150 	/* map the dring into local memory */
6151 	err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW,
6152 	    &(dringp->base), NULL);
6153 	if (err || dringp->base == NULL) {
6154 		cmn_err(CE_WARN,
6155 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
6156 		(void) ldc_mem_free_handle(mhandle);
6157 		kmem_free(dringp, sizeof (ldc_dring_t));
6158 		return (ENOMEM);
6159 	}
6160 
6161 	/* initialize the desc ring lock */
6162 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
6163 
6164 	/* Add descriptor ring to channel's imported dring list */
6165 	mutex_enter(&ldcp->imp_dlist_lock);
6166 	dringp->ch_next = ldcp->imp_dring_list;
6167 	ldcp->imp_dring_list = dringp;
6168 	mutex_exit(&ldcp->imp_dlist_lock);
6169 
6170 	dringp->status = LDC_MAPPED;
6171 
6172 	*dhandle = (ldc_dring_handle_t)dringp;
6173 
6174 	return (0);
6175 }
6176 
6177 /*
6178  * Unmap a descriptor ring. Free shadow memory (if any).
6179  */
6180 int
6181 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
6182 {
6183 	ldc_dring_t 	*dringp;
6184 	ldc_dring_t	*tmp_dringp;
6185 	ldc_chan_t	*ldcp;
6186 
6187 	if (dhandle == NULL) {
6188 		DWARN(DBG_ALL_LDCS,
6189 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
6190 		return (EINVAL);
6191 	}
6192 	dringp = (ldc_dring_t *)dhandle;
6193 
6194 	if (dringp->status != LDC_MAPPED) {
6195 		DWARN(DBG_ALL_LDCS,
6196 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
6197 		return (EINVAL);
6198 	}
6199 
6200 	mutex_enter(&dringp->lock);
6201 
6202 	ldcp = dringp->ldcp;
6203 
6204 	mutex_enter(&ldcp->imp_dlist_lock);
6205 
6206 	/* find and unlink the desc ring from channel import list */
6207 	tmp_dringp = ldcp->imp_dring_list;
6208 	if (tmp_dringp == dringp) {
6209 		ldcp->imp_dring_list = dringp->ch_next;
6210 		dringp->ch_next = NULL;
6211 
6212 	} else {
6213 		while (tmp_dringp != NULL) {
6214 			if (tmp_dringp->ch_next == dringp) {
6215 				tmp_dringp->ch_next = dringp->ch_next;
6216 				dringp->ch_next = NULL;
6217 				break;
6218 			}
6219 			tmp_dringp = tmp_dringp->ch_next;
6220 		}
6221 		if (tmp_dringp == NULL) {
6222 			DWARN(DBG_ALL_LDCS,
6223 			    "ldc_mem_dring_unmap: invalid descriptor\n");
6224 			mutex_exit(&ldcp->imp_dlist_lock);
6225 			mutex_exit(&dringp->lock);
6226 			return (EINVAL);
6227 		}
6228 	}
6229 
6230 	mutex_exit(&ldcp->imp_dlist_lock);
6231 
6232 	/* do a LDC memory handle unmap and free */
6233 	(void) ldc_mem_unmap(dringp->mhdl);
6234 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6235 
6236 	dringp->status = 0;
6237 	dringp->ldcp = NULL;
6238 
6239 	mutex_exit(&dringp->lock);
6240 
6241 	/* destroy dring lock */
6242 	mutex_destroy(&dringp->lock);
6243 
6244 	/* free desc ring object */
6245 	kmem_free(dringp, sizeof (ldc_dring_t));
6246 
6247 	return (0);
6248 }
6249 
6250 /*
6251  * Internal entry point for descriptor ring access entry consistency
6252  * semantics. Acquire copies the contents of the remote descriptor ring
6253  * into the local shadow copy. The release operation copies the local
6254  * contents into the remote dring. The start and end locations specify
6255  * bounds for the entries being synchronized.
6256  */
6257 static int
6258 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
6259     uint8_t direction, uint64_t start, uint64_t end)
6260 {
6261 	int 			err;
6262 	ldc_dring_t		*dringp;
6263 	ldc_chan_t		*ldcp;
6264 	uint64_t		soff;
6265 	size_t			copy_size;
6266 
6267 	if (dhandle == NULL) {
6268 		DWARN(DBG_ALL_LDCS,
6269 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
6270 		return (EINVAL);
6271 	}
6272 	dringp = (ldc_dring_t *)dhandle;
6273 	mutex_enter(&dringp->lock);
6274 
6275 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
6276 		DWARN(DBG_ALL_LDCS,
6277 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
6278 		mutex_exit(&dringp->lock);
6279 		return (EINVAL);
6280 	}
6281 
6282 	if (start >= dringp->length || end >= dringp->length) {
6283 		DWARN(DBG_ALL_LDCS,
6284 		    "i_ldc_dring_acquire_release: index out of range\n");
6285 		mutex_exit(&dringp->lock);
6286 		return (EINVAL);
6287 	}
6288 
6289 	/* get the channel handle */
6290 	ldcp = dringp->ldcp;
6291 
6292 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
6293 		((dringp->length - start) * dringp->dsize);
6294 
6295 	/* Calculate the relative offset for the first desc */
6296 	soff = (start * dringp->dsize);
6297 
6298 	/* copy to/from remote from/to local memory */
6299 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
6300 	    soff, copy_size);
6301 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6302 	    direction, soff, copy_size);
6303 	if (err) {
6304 		DWARN(ldcp->id,
6305 		    "i_ldc_dring_acquire_release: copy failed\n");
6306 		mutex_exit(&dringp->lock);
6307 		return (err);
6308 	}
6309 
6310 	/* do the balance */
6311 	if (start > end) {
6312 		copy_size = ((end + 1) * dringp->dsize);
6313 		soff = 0;
6314 
6315 		/* copy to/from remote from/to local memory */
6316 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
6317 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
6318 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6319 		    direction, soff, copy_size);
6320 		if (err) {
6321 			DWARN(ldcp->id,
6322 			    "i_ldc_dring_acquire_release: copy failed\n");
6323 			mutex_exit(&dringp->lock);
6324 			return (err);
6325 		}
6326 	}
6327 
6328 	mutex_exit(&dringp->lock);
6329 
6330 	return (0);
6331 }
6332 
6333 /*
6334  * Ensure that the contents in the local dring are consistent
6335  * with the contents if of remote dring
6336  */
6337 int
6338 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6339 {
6340 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
6341 }
6342 
6343 /*
6344  * Ensure that the contents in the remote dring are consistent
6345  * with the contents if of local dring
6346  */
6347 int
6348 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6349 {
6350 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
6351 }
6352 
6353 
6354 /* ------------------------------------------------------------------------- */
6355