xref: /titanic_51/usr/src/uts/sun4v/io/ldc.c (revision 261a51afbf7133d9f7c89f1388050677f56b7d1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/mmu.h>
56 #include <sys/pte.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/hat_sfmmu.h>
60 #include <sys/vm_machparam.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/seg_kpm.h>
63 #include <sys/note.h>
64 #include <sys/ivintr.h>
65 #include <sys/hypervisor_api.h>
66 #include <sys/ldc.h>
67 #include <sys/ldc_impl.h>
68 #include <sys/cnex.h>
69 #include <sys/hsvc.h>
70 
71 /* Core internal functions */
72 static int i_ldc_h2v_error(int h_error);
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
76 static void i_ldc_reset_state(ldc_chan_t *ldcp);
77 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
78 
79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
80 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
81 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
82 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
83     uint8_t ctrlmsg);
84 
85 /* Interrupt handling functions */
86 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
87 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
88 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
89 
90 /* Read method functions */
91 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
92 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
93 	size_t *sizep);
94 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
95 	size_t *sizep);
96 
97 /* Write method functions */
98 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
99 	size_t *sizep);
100 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
101 	size_t *sizep);
102 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
103 	size_t *sizep);
104 
105 /* Pkt processing internal functions */
106 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
107 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
108 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
109 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
110 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
111 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
112 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
113 
114 /* Memory synchronization internal functions */
115 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
116     uint8_t direction, uint64_t offset, size_t size);
117 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
118     uint8_t direction, uint64_t start, uint64_t end);
119 
120 /* LDC Version */
121 static ldc_ver_t ldc_versions[] = { {1, 0} };
122 
123 /* number of supported versions */
124 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
125 
126 /* Module State Pointer */
127 static ldc_soft_state_t *ldcssp;
128 
129 static struct modldrv md = {
130 	&mod_miscops,			/* This is a misc module */
131 	"sun4v LDC module v%I%",	/* Name of the module */
132 };
133 
134 static struct modlinkage ml = {
135 	MODREV_1,
136 	&md,
137 	NULL
138 };
139 
140 static uint64_t ldc_sup_minor;		/* Supported minor number */
141 static hsvc_info_t ldc_hsvc = {
142 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
143 };
144 
145 static uint64_t intr_sup_minor;		/* Supported minor number */
146 static hsvc_info_t intr_hsvc = {
147 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
148 };
149 
150 /*
151  * LDC framework supports mapping remote domain's memory
152  * either directly or via shadow memory pages. Default
153  * support is currently implemented via shadow copy.
154  * Direct map can be enabled by setting 'ldc_shmem_enabled'
155  */
156 int ldc_shmem_enabled = 0;
157 
158 /*
159  * The no. of MTU size messages that can be stored in
160  * the LDC Tx queue. The number of Tx queue entries is
161  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
162  */
163 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
164 
165 /*
166  * The minimum queue length. This is the size of the smallest
167  * LDC queue. If the computed value is less than this default,
168  * the queue length is rounded up to 'ldc_queue_entries'.
169  */
170 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
171 
172 /*
173  * Pages exported for remote access over each channel is
174  * maintained in a table registered with the Hypervisor.
175  * The default number of entries in the table is set to
176  * 'ldc_mtbl_entries'.
177  */
178 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
179 
180 /*
181  * LDC retry count and delay - when the HV returns EWOULDBLOCK
182  * the operation is retried 'ldc_max_retries' times with a
183  * wait of 'ldc_delay' usecs between each retry.
184  */
185 int ldc_max_retries = LDC_MAX_RETRIES;
186 clock_t ldc_delay = LDC_DELAY;
187 
188 /*
189  * delay between each retry of channel unregistration in
190  * ldc_close(), to wait for pending interrupts to complete.
191  */
192 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
193 
194 #ifdef DEBUG
195 
196 /*
197  * Print debug messages
198  *
199  * set ldcdbg to 0x7 for enabling all msgs
200  * 0x4 - Warnings
201  * 0x2 - All debug messages
202  * 0x1 - Minimal debug messages
203  *
204  * set ldcdbgchan to the channel number you want to debug
205  * setting it to -1 prints debug messages for all channels
206  * NOTE: ldcdbgchan has no effect on error messages
207  */
208 
209 #define	DBG_ALL_LDCS -1
210 
211 int ldcdbg = 0x0;
212 int64_t ldcdbgchan = DBG_ALL_LDCS;
213 uint64_t ldc_inject_err_flag = 0;
214 
215 static void
216 ldcdebug(int64_t id, const char *fmt, ...)
217 {
218 	char buf[512];
219 	va_list ap;
220 
221 	/*
222 	 * Do not return if,
223 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
224 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
225 	 * debug channel = caller specified channel
226 	 */
227 	if ((id != DBG_ALL_LDCS) &&
228 	    (ldcdbgchan != DBG_ALL_LDCS) &&
229 	    (ldcdbgchan != id)) {
230 		return;
231 	}
232 
233 	va_start(ap, fmt);
234 	(void) vsprintf(buf, fmt, ap);
235 	va_end(ap);
236 
237 	cmn_err(CE_CONT, "?%s", buf);
238 }
239 
240 #define	LDC_ERR_RESET	0x1
241 #define	LDC_ERR_PKTLOSS	0x2
242 
243 static boolean_t
244 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
245 {
246 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
247 		return (B_FALSE);
248 
249 	if ((ldc_inject_err_flag & error) == 0)
250 		return (B_FALSE);
251 
252 	/* clear the injection state */
253 	ldc_inject_err_flag &= ~error;
254 
255 	return (B_TRUE);
256 }
257 
258 #define	D1		\
259 if (ldcdbg & 0x01)	\
260 	ldcdebug
261 
262 #define	D2		\
263 if (ldcdbg & 0x02)	\
264 	ldcdebug
265 
266 #define	DWARN		\
267 if (ldcdbg & 0x04)	\
268 	ldcdebug
269 
270 #define	DUMP_PAYLOAD(id, addr)						\
271 {									\
272 	char buf[65*3];							\
273 	int i;								\
274 	uint8_t *src = (uint8_t *)addr;					\
275 	for (i = 0; i < 64; i++, src++)					\
276 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
277 	(void) sprintf(&buf[i * 3], "|\n");				\
278 	D2((id), "payload: %s", buf);					\
279 }
280 
281 #define	DUMP_LDC_PKT(c, s, addr)					\
282 {									\
283 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
284 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
285 	if (msg->type == LDC_DATA) {                                    \
286 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
287 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
288 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
289 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
290 	    (msg->env & LDC_LEN_MASK));					\
291 	} else { 							\
292 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
293 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
294 	} 								\
295 }
296 
297 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
298 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
299 
300 #else
301 
302 #define	DBG_ALL_LDCS -1
303 
304 #define	D1
305 #define	D2
306 #define	DWARN
307 
308 #define	DUMP_PAYLOAD(id, addr)
309 #define	DUMP_LDC_PKT(c, s, addr)
310 
311 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
312 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
313 
314 #endif
315 
316 #define	ZERO_PKT(p)			\
317 	bzero((p), sizeof (ldc_msg_t));
318 
319 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
320 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
321 
322 
323 int
324 _init(void)
325 {
326 	int status;
327 
328 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
329 	if (status != 0) {
330 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
331 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
332 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
333 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
334 		return (-1);
335 	}
336 
337 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
338 	if (status != 0) {
339 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
340 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
341 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
342 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
343 		(void) hsvc_unregister(&ldc_hsvc);
344 		return (-1);
345 	}
346 
347 	/* allocate soft state structure */
348 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
349 
350 	/* Link the module into the system */
351 	status = mod_install(&ml);
352 	if (status != 0) {
353 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
354 		return (status);
355 	}
356 
357 	/* Initialize the LDC state structure */
358 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
359 
360 	mutex_enter(&ldcssp->lock);
361 
362 	/* Create a cache for memory handles */
363 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
364 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
365 	if (ldcssp->memhdl_cache == NULL) {
366 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
367 		mutex_exit(&ldcssp->lock);
368 		return (-1);
369 	}
370 
371 	/* Create cache for memory segment structures */
372 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
373 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
374 	if (ldcssp->memseg_cache == NULL) {
375 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
376 		mutex_exit(&ldcssp->lock);
377 		return (-1);
378 	}
379 
380 
381 	ldcssp->channel_count = 0;
382 	ldcssp->channels_open = 0;
383 	ldcssp->chan_list = NULL;
384 	ldcssp->dring_list = NULL;
385 
386 	mutex_exit(&ldcssp->lock);
387 
388 	return (0);
389 }
390 
391 int
392 _info(struct modinfo *modinfop)
393 {
394 	/* Report status of the dynamically loadable driver module */
395 	return (mod_info(&ml, modinfop));
396 }
397 
398 int
399 _fini(void)
400 {
401 	int 		rv, status;
402 	ldc_chan_t 	*ldcp;
403 	ldc_dring_t 	*dringp;
404 	ldc_mem_info_t 	minfo;
405 
406 	/* Unlink the driver module from the system */
407 	status = mod_remove(&ml);
408 	if (status) {
409 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
410 		return (EIO);
411 	}
412 
413 	/* close and finalize channels */
414 	ldcp = ldcssp->chan_list;
415 	while (ldcp != NULL) {
416 		(void) ldc_close((ldc_handle_t)ldcp);
417 		(void) ldc_fini((ldc_handle_t)ldcp);
418 
419 		ldcp = ldcp->next;
420 	}
421 
422 	/* Free descriptor rings */
423 	dringp = ldcssp->dring_list;
424 	while (dringp != NULL) {
425 		dringp = dringp->next;
426 
427 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
428 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
429 			if (minfo.status == LDC_BOUND) {
430 				(void) ldc_mem_dring_unbind(
431 						(ldc_dring_handle_t)dringp);
432 			}
433 			if (minfo.status == LDC_MAPPED) {
434 				(void) ldc_mem_dring_unmap(
435 						(ldc_dring_handle_t)dringp);
436 			}
437 		}
438 
439 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
440 	}
441 	ldcssp->dring_list = NULL;
442 
443 	/* Destroy kmem caches */
444 	kmem_cache_destroy(ldcssp->memhdl_cache);
445 	kmem_cache_destroy(ldcssp->memseg_cache);
446 
447 	/*
448 	 * We have successfully "removed" the driver.
449 	 * Destroying soft states
450 	 */
451 	mutex_destroy(&ldcssp->lock);
452 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
453 
454 	(void) hsvc_unregister(&ldc_hsvc);
455 	(void) hsvc_unregister(&intr_hsvc);
456 
457 	return (status);
458 }
459 
460 /* -------------------------------------------------------------------------- */
461 
462 /*
463  * LDC Link Layer Internal Functions
464  */
465 
466 /*
467  * Translate HV Errors to sun4v error codes
468  */
469 static int
470 i_ldc_h2v_error(int h_error)
471 {
472 	switch (h_error) {
473 
474 	case	H_EOK:
475 		return (0);
476 
477 	case	H_ENORADDR:
478 		return (EFAULT);
479 
480 	case	H_EBADPGSZ:
481 	case	H_EINVAL:
482 		return (EINVAL);
483 
484 	case	H_EWOULDBLOCK:
485 		return (EWOULDBLOCK);
486 
487 	case	H_ENOACCESS:
488 	case	H_ENOMAP:
489 		return (EACCES);
490 
491 	case	H_EIO:
492 	case	H_ECPUERROR:
493 		return (EIO);
494 
495 	case	H_ENOTSUPPORTED:
496 		return (ENOTSUP);
497 
498 	case 	H_ETOOMANY:
499 		return (ENOSPC);
500 
501 	case	H_ECHANNEL:
502 		return (ECHRNG);
503 	default:
504 		break;
505 	}
506 
507 	return (EIO);
508 }
509 
510 /*
511  * Reconfigure the transmit queue
512  */
513 static int
514 i_ldc_txq_reconf(ldc_chan_t *ldcp)
515 {
516 	int rv;
517 
518 	ASSERT(MUTEX_HELD(&ldcp->lock));
519 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
520 
521 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
522 	if (rv) {
523 		cmn_err(CE_WARN,
524 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
525 		return (EIO);
526 	}
527 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
528 	    &(ldcp->tx_tail), &(ldcp->link_state));
529 	if (rv) {
530 		cmn_err(CE_WARN,
531 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
532 		return (EIO);
533 	}
534 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
535 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
536 	    ldcp->link_state);
537 
538 	return (0);
539 }
540 
541 /*
542  * Reconfigure the receive queue
543  */
544 static int
545 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
546 {
547 	int rv;
548 	uint64_t rx_head, rx_tail;
549 
550 	ASSERT(MUTEX_HELD(&ldcp->lock));
551 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
552 	    &(ldcp->link_state));
553 	if (rv) {
554 		cmn_err(CE_WARN,
555 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
556 		    ldcp->id);
557 		return (EIO);
558 	}
559 
560 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
561 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
562 			ldcp->rx_q_entries);
563 		if (rv) {
564 			cmn_err(CE_WARN,
565 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
566 			    ldcp->id);
567 			return (EIO);
568 		}
569 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
570 		    ldcp->id);
571 	}
572 
573 	return (0);
574 }
575 
576 
577 /*
578  * Drain the contents of the receive queue
579  */
580 static int
581 i_ldc_rxq_drain(ldc_chan_t *ldcp)
582 {
583 	int rv;
584 	uint64_t rx_head, rx_tail;
585 
586 	ASSERT(MUTEX_HELD(&ldcp->lock));
587 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
588 	    &(ldcp->link_state));
589 	if (rv) {
590 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
591 		    ldcp->id);
592 		return (EIO);
593 	}
594 
595 	/* flush contents by setting the head = tail */
596 	return (i_ldc_set_rx_head(ldcp, rx_tail));
597 }
598 
599 
600 /*
601  * Reset LDC state structure and its contents
602  */
603 static void
604 i_ldc_reset_state(ldc_chan_t *ldcp)
605 {
606 	ASSERT(MUTEX_HELD(&ldcp->lock));
607 	ldcp->last_msg_snt = LDC_INIT_SEQID;
608 	ldcp->last_ack_rcd = 0;
609 	ldcp->last_msg_rcd = 0;
610 	ldcp->tx_ackd_head = ldcp->tx_head;
611 	ldcp->next_vidx = 0;
612 	ldcp->hstate = 0;
613 	ldcp->tstate = TS_OPEN;
614 	ldcp->status = LDC_OPEN;
615 
616 	if (ldcp->link_state == LDC_CHANNEL_UP ||
617 	    ldcp->link_state == LDC_CHANNEL_RESET) {
618 
619 		if (ldcp->mode == LDC_MODE_RAW) {
620 			ldcp->status = LDC_UP;
621 			ldcp->tstate = TS_UP;
622 		} else {
623 			ldcp->status = LDC_READY;
624 			ldcp->tstate |= TS_LINK_READY;
625 		}
626 	}
627 }
628 
629 /*
630  * Reset a LDC channel
631  */
632 static void
633 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
634 {
635 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
636 
637 	ASSERT(MUTEX_HELD(&ldcp->lock));
638 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
639 
640 	/* reconfig Tx and Rx queues */
641 	(void) i_ldc_txq_reconf(ldcp);
642 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
643 
644 	/* Clear Tx and Rx interrupts */
645 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
646 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
647 
648 	/* Reset channel state */
649 	i_ldc_reset_state(ldcp);
650 
651 	/* Mark channel in reset */
652 	ldcp->tstate |= TS_IN_RESET;
653 }
654 
655 
656 /*
657  * Clear pending interrupts
658  */
659 static void
660 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
661 {
662 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
663 
664 	ASSERT(MUTEX_HELD(&ldcp->lock));
665 	ASSERT(cinfo->dip != NULL);
666 
667 	switch (itype) {
668 	case CNEX_TX_INTR:
669 		/* check Tx interrupt */
670 		if (ldcp->tx_intr_state)
671 			ldcp->tx_intr_state = LDC_INTR_NONE;
672 		else
673 			return;
674 		break;
675 
676 	case CNEX_RX_INTR:
677 		/* check Rx interrupt */
678 		if (ldcp->rx_intr_state)
679 			ldcp->rx_intr_state = LDC_INTR_NONE;
680 		else
681 			return;
682 		break;
683 	}
684 
685 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
686 	D2(ldcp->id,
687 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
688 	    ldcp->id, itype);
689 }
690 
691 /*
692  * Set the receive queue head
693  * Resets connection and returns an error if it fails.
694  */
695 static int
696 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
697 {
698 	int 	rv;
699 	int 	retries;
700 
701 	ASSERT(MUTEX_HELD(&ldcp->lock));
702 	for (retries = 0; retries < ldc_max_retries; retries++) {
703 
704 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
705 			return (0);
706 
707 		if (rv != H_EWOULDBLOCK)
708 			break;
709 
710 		/* wait for ldc_delay usecs */
711 		drv_usecwait(ldc_delay);
712 	}
713 
714 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
715 		ldcp->id, head);
716 	mutex_enter(&ldcp->tx_lock);
717 	i_ldc_reset(ldcp, B_TRUE);
718 	mutex_exit(&ldcp->tx_lock);
719 
720 	return (ECONNRESET);
721 }
722 
723 
724 /*
725  * Returns the tx_tail to be used for transfer
726  * Re-reads the TX queue ptrs if and only if the
727  * the cached head and tail are equal (queue is full)
728  */
729 static int
730 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
731 {
732 	int 		rv;
733 	uint64_t 	current_head, new_tail;
734 
735 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
736 	/* Read the head and tail ptrs from HV */
737 	rv = hv_ldc_tx_get_state(ldcp->id,
738 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
739 	if (rv) {
740 		cmn_err(CE_WARN,
741 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
742 		    ldcp->id);
743 		return (EIO);
744 	}
745 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
746 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
747 		    ldcp->id);
748 		return (ECONNRESET);
749 	}
750 
751 	/* In reliable mode, check against last ACKd msg */
752 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
753 		ldcp->mode == LDC_MODE_STREAM)
754 		? ldcp->tx_ackd_head : ldcp->tx_head;
755 
756 	/* increment the tail */
757 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
758 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
759 
760 	if (new_tail == current_head) {
761 		DWARN(ldcp->id,
762 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
763 		    ldcp->id);
764 		return (EWOULDBLOCK);
765 	}
766 
767 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
768 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
769 
770 	*tail = ldcp->tx_tail;
771 	return (0);
772 }
773 
774 /*
775  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
776  * and retry ldc_max_retries times before returning an error.
777  * Returns 0, EWOULDBLOCK or EIO
778  */
779 static int
780 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
781 {
782 	int		rv, retval = EWOULDBLOCK;
783 	int 		retries;
784 
785 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
786 	for (retries = 0; retries < ldc_max_retries; retries++) {
787 
788 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
789 			retval = 0;
790 			break;
791 		}
792 		if (rv != H_EWOULDBLOCK) {
793 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
794 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
795 			retval = EIO;
796 			break;
797 		}
798 
799 		/* wait for ldc_delay usecs */
800 		drv_usecwait(ldc_delay);
801 	}
802 	return (retval);
803 }
804 
805 /*
806  * Send a LDC message
807  */
808 static int
809 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
810     uint8_t ctrlmsg)
811 {
812 	int		rv;
813 	ldc_msg_t 	*pkt;
814 	uint64_t	tx_tail;
815 	uint32_t	curr_seqid = ldcp->last_msg_snt;
816 
817 	/* Obtain Tx lock */
818 	mutex_enter(&ldcp->tx_lock);
819 
820 	/* get the current tail for the message */
821 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
822 	if (rv) {
823 		DWARN(ldcp->id,
824 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
825 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
826 		    ldcp->id, pkttype, subtype, ctrlmsg);
827 		mutex_exit(&ldcp->tx_lock);
828 		return (rv);
829 	}
830 
831 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
832 	ZERO_PKT(pkt);
833 
834 	/* Initialize the packet */
835 	pkt->type = pkttype;
836 	pkt->stype = subtype;
837 	pkt->ctrl = ctrlmsg;
838 
839 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
840 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
841 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
842 		curr_seqid++;
843 		if (ldcp->mode != LDC_MODE_RAW) {
844 			pkt->seqid = curr_seqid;
845 			pkt->ackid = ldcp->last_msg_rcd;
846 		}
847 	}
848 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
849 
850 	/* initiate the send by calling into HV and set the new tail */
851 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
852 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
853 
854 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
855 	if (rv) {
856 		DWARN(ldcp->id,
857 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
858 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
859 		    ldcp->id, pkttype, subtype, ctrlmsg);
860 		mutex_exit(&ldcp->tx_lock);
861 		return (EIO);
862 	}
863 
864 	ldcp->last_msg_snt = curr_seqid;
865 	ldcp->tx_tail = tx_tail;
866 
867 	mutex_exit(&ldcp->tx_lock);
868 	return (0);
869 }
870 
871 /*
872  * Checks if packet was received in right order
873  * in the case of a reliable link.
874  * Returns 0 if in order, else EIO
875  */
876 static int
877 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
878 {
879 	/* No seqid checking for RAW mode */
880 	if (ldcp->mode == LDC_MODE_RAW)
881 		return (0);
882 
883 	/* No seqid checking for version, RTS, RTR message */
884 	if (msg->ctrl == LDC_VER ||
885 	    msg->ctrl == LDC_RTS ||
886 	    msg->ctrl == LDC_RTR)
887 		return (0);
888 
889 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
890 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
891 		DWARN(ldcp->id,
892 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
893 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
894 		    (ldcp->last_msg_rcd + 1));
895 		return (EIO);
896 	}
897 
898 #ifdef DEBUG
899 	if (LDC_INJECT_PKTLOSS(ldcp)) {
900 		DWARN(ldcp->id,
901 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
902 		return (EIO);
903 	}
904 #endif
905 
906 	return (0);
907 }
908 
909 
910 /*
911  * Process an incoming version ctrl message
912  */
913 static int
914 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
915 {
916 	int 		rv = 0, idx = ldcp->next_vidx;
917 	ldc_msg_t 	*pkt;
918 	uint64_t	tx_tail;
919 	ldc_ver_t	*rcvd_ver;
920 
921 	/* get the received version */
922 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
923 
924 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
925 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
926 
927 	/* Obtain Tx lock */
928 	mutex_enter(&ldcp->tx_lock);
929 
930 	switch (msg->stype) {
931 	case LDC_INFO:
932 
933 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
934 			(void) i_ldc_txq_reconf(ldcp);
935 			i_ldc_reset_state(ldcp);
936 			mutex_exit(&ldcp->tx_lock);
937 			return (EAGAIN);
938 		}
939 
940 		/* get the current tail and pkt for the response */
941 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
942 		if (rv != 0) {
943 			DWARN(ldcp->id,
944 			    "i_ldc_process_VER: (0x%llx) err sending "
945 			    "version ACK/NACK\n", ldcp->id);
946 			i_ldc_reset(ldcp, B_TRUE);
947 			mutex_exit(&ldcp->tx_lock);
948 			return (ECONNRESET);
949 		}
950 
951 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
952 		ZERO_PKT(pkt);
953 
954 		/* initialize the packet */
955 		pkt->type = LDC_CTRL;
956 		pkt->ctrl = LDC_VER;
957 
958 		for (;;) {
959 
960 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
961 			    rcvd_ver->major, rcvd_ver->minor,
962 			    ldc_versions[idx].major, ldc_versions[idx].minor);
963 
964 			if (rcvd_ver->major == ldc_versions[idx].major) {
965 				/* major version match - ACK version */
966 				pkt->stype = LDC_ACK;
967 
968 				/*
969 				 * lower minor version to the one this endpt
970 				 * supports, if necessary
971 				 */
972 				if (rcvd_ver->minor > ldc_versions[idx].minor)
973 					rcvd_ver->minor =
974 						ldc_versions[idx].minor;
975 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
976 
977 				break;
978 			}
979 
980 			if (rcvd_ver->major > ldc_versions[idx].major) {
981 
982 				D1(ldcp->id, "i_ldc_process_VER: using next"
983 				    " lower idx=%d, v%u.%u\n", idx,
984 				    ldc_versions[idx].major,
985 				    ldc_versions[idx].minor);
986 
987 				/* nack with next lower version */
988 				pkt->stype = LDC_NACK;
989 				bcopy(&ldc_versions[idx], pkt->udata,
990 				    sizeof (ldc_versions[idx]));
991 				ldcp->next_vidx = idx;
992 				break;
993 			}
994 
995 			/* next major version */
996 			idx++;
997 
998 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
999 
1000 			if (idx == LDC_NUM_VERS) {
1001 				/* no version match - send NACK */
1002 				pkt->stype = LDC_NACK;
1003 				bzero(pkt->udata, sizeof (ldc_ver_t));
1004 				ldcp->next_vidx = 0;
1005 				break;
1006 			}
1007 		}
1008 
1009 		/* initiate the send by calling into HV and set the new tail */
1010 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1011 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1012 
1013 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1014 		if (rv == 0) {
1015 			ldcp->tx_tail = tx_tail;
1016 			if (pkt->stype == LDC_ACK) {
1017 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1018 				    " version ACK\n", ldcp->id);
1019 				/* Save the ACK'd version */
1020 				ldcp->version.major = rcvd_ver->major;
1021 				ldcp->version.minor = rcvd_ver->minor;
1022 				ldcp->hstate |= TS_RCVD_VER;
1023 				ldcp->tstate |= TS_VER_DONE;
1024 				D1(DBG_ALL_LDCS,
1025 				    "(0x%llx) Sent ACK, "
1026 				    "Agreed on version v%u.%u\n",
1027 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1028 			}
1029 		} else {
1030 			DWARN(ldcp->id,
1031 			    "i_ldc_process_VER: (0x%llx) error sending "
1032 			    "ACK/NACK\n", ldcp->id);
1033 			i_ldc_reset(ldcp, B_TRUE);
1034 			mutex_exit(&ldcp->tx_lock);
1035 			return (ECONNRESET);
1036 		}
1037 
1038 		break;
1039 
1040 	case LDC_ACK:
1041 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1042 			if (ldcp->version.major != rcvd_ver->major ||
1043 				ldcp->version.minor != rcvd_ver->minor) {
1044 
1045 				/* mismatched version - reset connection */
1046 				DWARN(ldcp->id,
1047 					"i_ldc_process_VER: (0x%llx) recvd"
1048 					" ACK ver != sent ACK ver\n", ldcp->id);
1049 				i_ldc_reset(ldcp, B_TRUE);
1050 				mutex_exit(&ldcp->tx_lock);
1051 				return (ECONNRESET);
1052 			}
1053 		} else {
1054 			/* SUCCESS - we have agreed on a version */
1055 			ldcp->version.major = rcvd_ver->major;
1056 			ldcp->version.minor = rcvd_ver->minor;
1057 			ldcp->tstate |= TS_VER_DONE;
1058 		}
1059 
1060 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1061 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1062 
1063 		/* initiate RTS-RTR-RDX handshake */
1064 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1065 		if (rv) {
1066 			DWARN(ldcp->id,
1067 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1068 			    ldcp->id);
1069 			i_ldc_reset(ldcp, B_TRUE);
1070 			mutex_exit(&ldcp->tx_lock);
1071 			return (ECONNRESET);
1072 		}
1073 
1074 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1075 		ZERO_PKT(pkt);
1076 
1077 		pkt->type = LDC_CTRL;
1078 		pkt->stype = LDC_INFO;
1079 		pkt->ctrl = LDC_RTS;
1080 		pkt->env = ldcp->mode;
1081 		if (ldcp->mode != LDC_MODE_RAW)
1082 			pkt->seqid = LDC_INIT_SEQID;
1083 
1084 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1085 
1086 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1087 
1088 		/* initiate the send by calling into HV and set the new tail */
1089 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1090 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1091 
1092 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1093 		if (rv) {
1094 			D2(ldcp->id,
1095 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1096 			    ldcp->id);
1097 			i_ldc_reset(ldcp, B_TRUE);
1098 			mutex_exit(&ldcp->tx_lock);
1099 			return (ECONNRESET);
1100 		}
1101 
1102 		ldcp->tx_tail = tx_tail;
1103 		ldcp->hstate |= TS_SENT_RTS;
1104 
1105 		break;
1106 
1107 	case LDC_NACK:
1108 		/* check if version in NACK is zero */
1109 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1110 			/* version handshake failure */
1111 			DWARN(DBG_ALL_LDCS,
1112 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1113 			    ldcp->id);
1114 			i_ldc_reset(ldcp, B_TRUE);
1115 			mutex_exit(&ldcp->tx_lock);
1116 			return (ECONNRESET);
1117 		}
1118 
1119 		/* get the current tail and pkt for the response */
1120 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1121 		if (rv != 0) {
1122 			cmn_err(CE_NOTE,
1123 			    "i_ldc_process_VER: (0x%lx) err sending "
1124 			    "version ACK/NACK\n", ldcp->id);
1125 			i_ldc_reset(ldcp, B_TRUE);
1126 			mutex_exit(&ldcp->tx_lock);
1127 			return (ECONNRESET);
1128 		}
1129 
1130 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1131 		ZERO_PKT(pkt);
1132 
1133 		/* initialize the packet */
1134 		pkt->type = LDC_CTRL;
1135 		pkt->ctrl = LDC_VER;
1136 		pkt->stype = LDC_INFO;
1137 
1138 		/* check ver in NACK msg has a match */
1139 		for (;;) {
1140 			if (rcvd_ver->major == ldc_versions[idx].major) {
1141 				/*
1142 				 * major version match - resubmit request
1143 				 * if lower minor version to the one this endpt
1144 				 * supports, if necessary
1145 				 */
1146 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1147 					rcvd_ver->minor =
1148 						ldc_versions[idx].minor;
1149 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1150 				break;
1151 
1152 			}
1153 
1154 			if (rcvd_ver->major > ldc_versions[idx].major) {
1155 
1156 				D1(ldcp->id, "i_ldc_process_VER: using next"
1157 				    " lower idx=%d, v%u.%u\n", idx,
1158 				    ldc_versions[idx].major,
1159 				    ldc_versions[idx].minor);
1160 
1161 				/* send next lower version */
1162 				bcopy(&ldc_versions[idx], pkt->udata,
1163 				    sizeof (ldc_versions[idx]));
1164 				ldcp->next_vidx = idx;
1165 				break;
1166 			}
1167 
1168 			/* next version */
1169 			idx++;
1170 
1171 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1172 
1173 			if (idx == LDC_NUM_VERS) {
1174 				/* no version match - terminate */
1175 				ldcp->next_vidx = 0;
1176 				mutex_exit(&ldcp->tx_lock);
1177 				return (ECONNRESET);
1178 			}
1179 		}
1180 
1181 		/* initiate the send by calling into HV and set the new tail */
1182 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1183 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1184 
1185 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1186 		if (rv == 0) {
1187 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1188 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1189 			    ldc_versions[idx].minor);
1190 			ldcp->tx_tail = tx_tail;
1191 		} else {
1192 			cmn_err(CE_NOTE,
1193 			    "i_ldc_process_VER: (0x%lx) error sending version"
1194 			    "INFO\n", ldcp->id);
1195 			i_ldc_reset(ldcp, B_TRUE);
1196 			mutex_exit(&ldcp->tx_lock);
1197 			return (ECONNRESET);
1198 		}
1199 
1200 		break;
1201 	}
1202 
1203 	mutex_exit(&ldcp->tx_lock);
1204 	return (rv);
1205 }
1206 
1207 
1208 /*
1209  * Process an incoming RTS ctrl message
1210  */
1211 static int
1212 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1213 {
1214 	int 		rv = 0;
1215 	ldc_msg_t 	*pkt;
1216 	uint64_t	tx_tail;
1217 	boolean_t	sent_NACK = B_FALSE;
1218 
1219 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1220 
1221 	switch (msg->stype) {
1222 	case LDC_NACK:
1223 		DWARN(ldcp->id,
1224 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1225 		    ldcp->id);
1226 
1227 		/* Reset the channel -- as we cannot continue */
1228 		mutex_enter(&ldcp->tx_lock);
1229 		i_ldc_reset(ldcp, B_TRUE);
1230 		mutex_exit(&ldcp->tx_lock);
1231 		rv = ECONNRESET;
1232 		break;
1233 
1234 	case LDC_INFO:
1235 
1236 		/* check mode */
1237 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1238 			cmn_err(CE_NOTE,
1239 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1240 			    ldcp->id);
1241 			/*
1242 			 * send NACK in response to MODE message
1243 			 * get the current tail for the response
1244 			 */
1245 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1246 			if (rv) {
1247 				/* if cannot send NACK - reset channel */
1248 				mutex_enter(&ldcp->tx_lock);
1249 				i_ldc_reset(ldcp, B_TRUE);
1250 				mutex_exit(&ldcp->tx_lock);
1251 				rv = ECONNRESET;
1252 				break;
1253 			}
1254 			sent_NACK = B_TRUE;
1255 		}
1256 		break;
1257 	default:
1258 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1259 		    ldcp->id);
1260 		mutex_enter(&ldcp->tx_lock);
1261 		i_ldc_reset(ldcp, B_TRUE);
1262 		mutex_exit(&ldcp->tx_lock);
1263 		rv = ECONNRESET;
1264 		break;
1265 	}
1266 
1267 	/*
1268 	 * If either the connection was reset (when rv != 0) or
1269 	 * a NACK was sent, we return. In the case of a NACK
1270 	 * we dont want to consume the packet that came in but
1271 	 * not record that we received the RTS
1272 	 */
1273 	if (rv || sent_NACK)
1274 		return (rv);
1275 
1276 	/* record RTS received */
1277 	ldcp->hstate |= TS_RCVD_RTS;
1278 
1279 	/* store initial SEQID info */
1280 	ldcp->last_msg_snt = msg->seqid;
1281 
1282 	/* Obtain Tx lock */
1283 	mutex_enter(&ldcp->tx_lock);
1284 
1285 	/* get the current tail for the response */
1286 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1287 	if (rv != 0) {
1288 		cmn_err(CE_NOTE,
1289 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1290 		    ldcp->id);
1291 		i_ldc_reset(ldcp, B_TRUE);
1292 		mutex_exit(&ldcp->tx_lock);
1293 		return (ECONNRESET);
1294 	}
1295 
1296 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1297 	ZERO_PKT(pkt);
1298 
1299 	/* initialize the packet */
1300 	pkt->type = LDC_CTRL;
1301 	pkt->stype = LDC_INFO;
1302 	pkt->ctrl = LDC_RTR;
1303 	pkt->env = ldcp->mode;
1304 	if (ldcp->mode != LDC_MODE_RAW)
1305 		pkt->seqid = LDC_INIT_SEQID;
1306 
1307 	ldcp->last_msg_rcd = msg->seqid;
1308 
1309 	/* initiate the send by calling into HV and set the new tail */
1310 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1311 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1312 
1313 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1314 	if (rv == 0) {
1315 		D2(ldcp->id,
1316 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1317 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1318 
1319 		ldcp->tx_tail = tx_tail;
1320 		ldcp->hstate |= TS_SENT_RTR;
1321 
1322 	} else {
1323 		cmn_err(CE_NOTE,
1324 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1325 		    ldcp->id);
1326 		i_ldc_reset(ldcp, B_TRUE);
1327 		mutex_exit(&ldcp->tx_lock);
1328 		return (ECONNRESET);
1329 	}
1330 
1331 	mutex_exit(&ldcp->tx_lock);
1332 	return (0);
1333 }
1334 
1335 /*
1336  * Process an incoming RTR ctrl message
1337  */
1338 static int
1339 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1340 {
1341 	int 		rv = 0;
1342 	boolean_t	sent_NACK = B_FALSE;
1343 
1344 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1345 
1346 	switch (msg->stype) {
1347 	case LDC_NACK:
1348 		/* RTR NACK received */
1349 		DWARN(ldcp->id,
1350 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1351 		    ldcp->id);
1352 
1353 		/* Reset the channel -- as we cannot continue */
1354 		mutex_enter(&ldcp->tx_lock);
1355 		i_ldc_reset(ldcp, B_TRUE);
1356 		mutex_exit(&ldcp->tx_lock);
1357 		rv = ECONNRESET;
1358 
1359 		break;
1360 
1361 	case LDC_INFO:
1362 
1363 		/* check mode */
1364 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1365 			DWARN(ldcp->id,
1366 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1367 			    "expecting 0x%x, got 0x%x\n",
1368 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1369 			/*
1370 			 * send NACK in response to MODE message
1371 			 * get the current tail for the response
1372 			 */
1373 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1374 			if (rv) {
1375 				/* if cannot send NACK - reset channel */
1376 				mutex_enter(&ldcp->tx_lock);
1377 				i_ldc_reset(ldcp, B_TRUE);
1378 				mutex_exit(&ldcp->tx_lock);
1379 				rv = ECONNRESET;
1380 				break;
1381 			}
1382 			sent_NACK = B_TRUE;
1383 		}
1384 		break;
1385 
1386 	default:
1387 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1388 		    ldcp->id);
1389 
1390 		/* Reset the channel -- as we cannot continue */
1391 		mutex_enter(&ldcp->tx_lock);
1392 		i_ldc_reset(ldcp, B_TRUE);
1393 		mutex_exit(&ldcp->tx_lock);
1394 		rv = ECONNRESET;
1395 		break;
1396 	}
1397 
1398 	/*
1399 	 * If either the connection was reset (when rv != 0) or
1400 	 * a NACK was sent, we return. In the case of a NACK
1401 	 * we dont want to consume the packet that came in but
1402 	 * not record that we received the RTR
1403 	 */
1404 	if (rv || sent_NACK)
1405 		return (rv);
1406 
1407 	ldcp->last_msg_snt = msg->seqid;
1408 	ldcp->hstate |= TS_RCVD_RTR;
1409 
1410 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1411 	if (rv) {
1412 		cmn_err(CE_NOTE,
1413 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1414 		    ldcp->id);
1415 		mutex_enter(&ldcp->tx_lock);
1416 		i_ldc_reset(ldcp, B_TRUE);
1417 		mutex_exit(&ldcp->tx_lock);
1418 		return (ECONNRESET);
1419 	}
1420 	D2(ldcp->id,
1421 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1422 
1423 	ldcp->hstate |= TS_SENT_RDX;
1424 	ldcp->tstate |= TS_HSHAKE_DONE;
1425 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1426 		ldcp->status = LDC_UP;
1427 
1428 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1429 
1430 	return (0);
1431 }
1432 
1433 
1434 /*
1435  * Process an incoming RDX ctrl message
1436  */
1437 static int
1438 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1439 {
1440 	int	rv = 0;
1441 
1442 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1443 
1444 	switch (msg->stype) {
1445 	case LDC_NACK:
1446 		/* RDX NACK received */
1447 		DWARN(ldcp->id,
1448 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1449 		    ldcp->id);
1450 
1451 		/* Reset the channel -- as we cannot continue */
1452 		mutex_enter(&ldcp->tx_lock);
1453 		i_ldc_reset(ldcp, B_TRUE);
1454 		mutex_exit(&ldcp->tx_lock);
1455 		rv = ECONNRESET;
1456 
1457 		break;
1458 
1459 	case LDC_INFO:
1460 
1461 		/*
1462 		 * if channel is UP and a RDX received after data transmission
1463 		 * has commenced it is an error
1464 		 */
1465 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1466 			DWARN(DBG_ALL_LDCS,
1467 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1468 			    " - LDC reset\n", ldcp->id);
1469 			mutex_enter(&ldcp->tx_lock);
1470 			i_ldc_reset(ldcp, B_TRUE);
1471 			mutex_exit(&ldcp->tx_lock);
1472 			return (ECONNRESET);
1473 		}
1474 
1475 		ldcp->hstate |= TS_RCVD_RDX;
1476 		ldcp->tstate |= TS_HSHAKE_DONE;
1477 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1478 			ldcp->status = LDC_UP;
1479 
1480 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1481 		break;
1482 
1483 	default:
1484 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1485 		    ldcp->id);
1486 
1487 		/* Reset the channel -- as we cannot continue */
1488 		mutex_enter(&ldcp->tx_lock);
1489 		i_ldc_reset(ldcp, B_TRUE);
1490 		mutex_exit(&ldcp->tx_lock);
1491 		rv = ECONNRESET;
1492 		break;
1493 	}
1494 
1495 	return (rv);
1496 }
1497 
1498 /*
1499  * Process an incoming ACK for a data packet
1500  */
1501 static int
1502 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1503 {
1504 	int		rv;
1505 	uint64_t 	tx_head;
1506 	ldc_msg_t	*pkt;
1507 
1508 	/* Obtain Tx lock */
1509 	mutex_enter(&ldcp->tx_lock);
1510 
1511 	/*
1512 	 * Read the current Tx head and tail
1513 	 */
1514 	rv = hv_ldc_tx_get_state(ldcp->id,
1515 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1516 	if (rv != 0) {
1517 		cmn_err(CE_WARN,
1518 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1519 		    ldcp->id);
1520 
1521 		/* Reset the channel -- as we cannot continue */
1522 		i_ldc_reset(ldcp, B_TRUE);
1523 		mutex_exit(&ldcp->tx_lock);
1524 		return (ECONNRESET);
1525 	}
1526 
1527 	/*
1528 	 * loop from where the previous ACK location was to the
1529 	 * current head location. This is how far the HV has
1530 	 * actually send pkts. Pkts between head and tail are
1531 	 * yet to be sent by HV.
1532 	 */
1533 	tx_head = ldcp->tx_ackd_head;
1534 	for (;;) {
1535 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1536 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1537 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1538 
1539 		if (pkt->seqid == msg->ackid) {
1540 			D2(ldcp->id,
1541 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1542 			    ldcp->id);
1543 			ldcp->last_ack_rcd = msg->ackid;
1544 			ldcp->tx_ackd_head = tx_head;
1545 			break;
1546 		}
1547 		if (tx_head == ldcp->tx_head) {
1548 			/* could not find packet */
1549 			DWARN(ldcp->id,
1550 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1551 			    ldcp->id);
1552 
1553 			/* Reset the channel -- as we cannot continue */
1554 			i_ldc_reset(ldcp, B_TRUE);
1555 			mutex_exit(&ldcp->tx_lock);
1556 			return (ECONNRESET);
1557 		}
1558 	}
1559 
1560 	mutex_exit(&ldcp->tx_lock);
1561 	return (0);
1562 }
1563 
1564 /*
1565  * Process incoming control message
1566  * Return 0 - session can continue
1567  *        EAGAIN - reprocess packet - state was changed
1568  *	  ECONNRESET - channel was reset
1569  */
1570 static int
1571 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1572 {
1573 	int 		rv = 0;
1574 
1575 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1576 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1577 
1578 	switch (ldcp->tstate & ~TS_IN_RESET) {
1579 
1580 	case TS_OPEN:
1581 	case TS_READY:
1582 
1583 		switch (msg->ctrl & LDC_CTRL_MASK) {
1584 		case LDC_VER:
1585 			/* process version message */
1586 			rv = i_ldc_process_VER(ldcp, msg);
1587 			break;
1588 		default:
1589 			DWARN(ldcp->id,
1590 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1591 			    "tstate=0x%x\n", ldcp->id,
1592 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1593 			break;
1594 		}
1595 
1596 		break;
1597 
1598 	case TS_VREADY:
1599 
1600 		switch (msg->ctrl & LDC_CTRL_MASK) {
1601 		case LDC_VER:
1602 			/* process version message */
1603 			rv = i_ldc_process_VER(ldcp, msg);
1604 			break;
1605 		case LDC_RTS:
1606 			/* process RTS message */
1607 			rv = i_ldc_process_RTS(ldcp, msg);
1608 			break;
1609 		case LDC_RTR:
1610 			/* process RTR message */
1611 			rv = i_ldc_process_RTR(ldcp, msg);
1612 			break;
1613 		case LDC_RDX:
1614 			/* process RDX message */
1615 			rv = i_ldc_process_RDX(ldcp, msg);
1616 			break;
1617 		default:
1618 			DWARN(ldcp->id,
1619 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1620 			    "tstate=0x%x\n", ldcp->id,
1621 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1622 			break;
1623 		}
1624 
1625 		break;
1626 
1627 	case TS_UP:
1628 
1629 		switch (msg->ctrl & LDC_CTRL_MASK) {
1630 		case LDC_VER:
1631 			DWARN(ldcp->id,
1632 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1633 			    "- LDC reset\n", ldcp->id);
1634 			/* peer is redoing version negotiation */
1635 			mutex_enter(&ldcp->tx_lock);
1636 			(void) i_ldc_txq_reconf(ldcp);
1637 			i_ldc_reset_state(ldcp);
1638 			mutex_exit(&ldcp->tx_lock);
1639 			rv = EAGAIN;
1640 			break;
1641 
1642 		case LDC_RDX:
1643 			/* process RDX message */
1644 			rv = i_ldc_process_RDX(ldcp, msg);
1645 			break;
1646 
1647 		default:
1648 			DWARN(ldcp->id,
1649 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1650 			    "tstate=0x%x\n", ldcp->id,
1651 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1652 			break;
1653 		}
1654 	}
1655 
1656 	return (rv);
1657 }
1658 
1659 /*
1660  * Register channel with the channel nexus
1661  */
1662 static int
1663 i_ldc_register_channel(ldc_chan_t *ldcp)
1664 {
1665 	int		rv = 0;
1666 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1667 
1668 	if (cinfo->dip == NULL) {
1669 		DWARN(ldcp->id,
1670 		    "i_ldc_register_channel: cnex has not registered\n");
1671 		return (EAGAIN);
1672 	}
1673 
1674 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1675 	if (rv) {
1676 		DWARN(ldcp->id,
1677 		    "i_ldc_register_channel: cannot register channel\n");
1678 		return (rv);
1679 	}
1680 
1681 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1682 	    i_ldc_tx_hdlr, ldcp, NULL);
1683 	if (rv) {
1684 		DWARN(ldcp->id,
1685 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1686 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1687 		return (rv);
1688 	}
1689 
1690 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1691 	    i_ldc_rx_hdlr, ldcp, NULL);
1692 	if (rv) {
1693 		DWARN(ldcp->id,
1694 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1695 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1696 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1697 		return (rv);
1698 	}
1699 
1700 	ldcp->tstate |= TS_CNEX_RDY;
1701 
1702 	return (0);
1703 }
1704 
1705 /*
1706  * Unregister a channel with the channel nexus
1707  */
1708 static int
1709 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1710 {
1711 	int		rv = 0;
1712 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1713 
1714 	if (cinfo->dip == NULL) {
1715 		DWARN(ldcp->id,
1716 		    "i_ldc_unregister_channel: cnex has not registered\n");
1717 		return (EAGAIN);
1718 	}
1719 
1720 	if (ldcp->tstate & TS_CNEX_RDY) {
1721 
1722 		/* Remove the Rx interrupt */
1723 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1724 		if (rv) {
1725 			if (rv != EAGAIN) {
1726 				DWARN(ldcp->id,
1727 				    "i_ldc_unregister_channel: err removing "
1728 				    "Rx intr\n");
1729 				return (rv);
1730 			}
1731 
1732 			/*
1733 			 * If interrupts are pending and handler has
1734 			 * finished running, clear interrupt and try
1735 			 * again
1736 			 */
1737 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1738 				return (rv);
1739 
1740 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1741 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1742 			    CNEX_RX_INTR);
1743 			if (rv) {
1744 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1745 				    "err removing Rx interrupt\n");
1746 				return (rv);
1747 			}
1748 		}
1749 
1750 		/* Remove the Tx interrupt */
1751 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1752 		if (rv) {
1753 			DWARN(ldcp->id,
1754 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1755 			return (rv);
1756 		}
1757 
1758 		/* Unregister the channel */
1759 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1760 		if (rv) {
1761 			DWARN(ldcp->id,
1762 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1763 			return (rv);
1764 		}
1765 
1766 		ldcp->tstate &= ~TS_CNEX_RDY;
1767 	}
1768 
1769 	return (0);
1770 }
1771 
1772 
1773 /*
1774  * LDC transmit interrupt handler
1775  *    triggered for chanel up/down/reset events
1776  *    and Tx queue content changes
1777  */
1778 static uint_t
1779 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1780 {
1781 	_NOTE(ARGUNUSED(arg2))
1782 
1783 	int 		rv;
1784 	ldc_chan_t 	*ldcp;
1785 	boolean_t 	notify_client = B_FALSE;
1786 	uint64_t	notify_event = 0, link_state;
1787 
1788 	/* Get the channel for which interrupt was received */
1789 	ASSERT(arg1 != NULL);
1790 	ldcp = (ldc_chan_t *)arg1;
1791 
1792 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1793 	    ldcp->id, ldcp);
1794 
1795 	/* Lock channel */
1796 	mutex_enter(&ldcp->lock);
1797 
1798 	/* Obtain Tx lock */
1799 	mutex_enter(&ldcp->tx_lock);
1800 
1801 	/* mark interrupt as pending */
1802 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
1803 
1804 	/* save current link state */
1805 	link_state = ldcp->link_state;
1806 
1807 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1808 	    &ldcp->link_state);
1809 	if (rv) {
1810 		cmn_err(CE_WARN,
1811 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1812 		    ldcp->id, rv);
1813 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1814 		mutex_exit(&ldcp->tx_lock);
1815 		mutex_exit(&ldcp->lock);
1816 		return (DDI_INTR_CLAIMED);
1817 	}
1818 
1819 	/*
1820 	 * reset the channel state if the channel went down
1821 	 * (other side unconfigured queue) or channel was reset
1822 	 * (other side reconfigured its queue)
1823 	 */
1824 	if (link_state != ldcp->link_state &&
1825 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
1826 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1827 		i_ldc_reset(ldcp, B_FALSE);
1828 		notify_client = B_TRUE;
1829 		notify_event = LDC_EVT_DOWN;
1830 	}
1831 
1832 	if (link_state != ldcp->link_state &&
1833 	    ldcp->link_state == LDC_CHANNEL_RESET) {
1834 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1835 		i_ldc_reset(ldcp, B_FALSE);
1836 		notify_client = B_TRUE;
1837 		notify_event = LDC_EVT_RESET;
1838 	}
1839 
1840 	if (link_state != ldcp->link_state &&
1841 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
1842 	    ldcp->link_state == LDC_CHANNEL_UP) {
1843 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1844 		notify_client = B_TRUE;
1845 		notify_event = LDC_EVT_RESET;
1846 		ldcp->tstate |= TS_LINK_READY;
1847 		ldcp->status = LDC_READY;
1848 	}
1849 
1850 	/* if callbacks are disabled, do not notify */
1851 	if (!ldcp->cb_enabled)
1852 		notify_client = B_FALSE;
1853 
1854 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1855 
1856 	if (notify_client) {
1857 		ldcp->cb_inprogress = B_TRUE;
1858 		mutex_exit(&ldcp->tx_lock);
1859 		mutex_exit(&ldcp->lock);
1860 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1861 		if (rv) {
1862 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1863 			    "failure", ldcp->id);
1864 		}
1865 		mutex_enter(&ldcp->lock);
1866 		ldcp->cb_inprogress = B_FALSE;
1867 	}
1868 
1869 	mutex_exit(&ldcp->lock);
1870 
1871 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1872 
1873 	return (DDI_INTR_CLAIMED);
1874 }
1875 
1876 /*
1877  * LDC receive interrupt handler
1878  *    triggered for channel with data pending to read
1879  *    i.e. Rx queue content changes
1880  */
1881 static uint_t
1882 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1883 {
1884 	_NOTE(ARGUNUSED(arg2))
1885 
1886 	int		rv;
1887 	uint64_t 	rx_head, rx_tail;
1888 	ldc_msg_t 	*msg;
1889 	ldc_chan_t 	*ldcp;
1890 	boolean_t 	notify_client = B_FALSE;
1891 	uint64_t	notify_event = 0;
1892 	uint64_t	link_state, first_fragment = 0;
1893 
1894 
1895 	/* Get the channel for which interrupt was received */
1896 	if (arg1 == NULL) {
1897 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1898 		return (DDI_INTR_UNCLAIMED);
1899 	}
1900 
1901 	ldcp = (ldc_chan_t *)arg1;
1902 
1903 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1904 	    ldcp->id, ldcp);
1905 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1906 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1907 	    ldcp->link_state);
1908 
1909 	/* Lock channel */
1910 	mutex_enter(&ldcp->lock);
1911 
1912 	/* mark interrupt as pending */
1913 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1914 
1915 	/*
1916 	 * Read packet(s) from the queue
1917 	 */
1918 	for (;;) {
1919 
1920 		link_state = ldcp->link_state;
1921 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1922 		    &ldcp->link_state);
1923 		if (rv) {
1924 			cmn_err(CE_WARN,
1925 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1926 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1927 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1928 			mutex_exit(&ldcp->lock);
1929 			return (DDI_INTR_CLAIMED);
1930 		}
1931 
1932 		/*
1933 		 * reset the channel state if the channel went down
1934 		 * (other side unconfigured queue) or channel was reset
1935 		 * (other side reconfigured its queue)
1936 		 */
1937 
1938 		if (link_state != ldcp->link_state) {
1939 
1940 			switch (ldcp->link_state) {
1941 			case LDC_CHANNEL_DOWN:
1942 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1943 				    "link down\n", ldcp->id);
1944 				mutex_enter(&ldcp->tx_lock);
1945 				i_ldc_reset(ldcp, B_FALSE);
1946 				mutex_exit(&ldcp->tx_lock);
1947 				notify_client = B_TRUE;
1948 				notify_event = LDC_EVT_DOWN;
1949 				goto loop_exit;
1950 
1951 			case LDC_CHANNEL_UP:
1952 				D1(ldcp->id, "i_ldc_rx_hdlr: "
1953 				    "channel link up\n", ldcp->id);
1954 
1955 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
1956 					notify_client = B_TRUE;
1957 					notify_event = LDC_EVT_RESET;
1958 					ldcp->tstate |= TS_LINK_READY;
1959 					ldcp->status = LDC_READY;
1960 				}
1961 				break;
1962 
1963 			case LDC_CHANNEL_RESET:
1964 			default:
1965 #ifdef DEBUG
1966 force_reset:
1967 #endif
1968 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1969 				    "link reset\n", ldcp->id);
1970 				mutex_enter(&ldcp->tx_lock);
1971 				i_ldc_reset(ldcp, B_FALSE);
1972 				mutex_exit(&ldcp->tx_lock);
1973 				notify_client = B_TRUE;
1974 				notify_event = LDC_EVT_RESET;
1975 				break;
1976 			}
1977 		}
1978 
1979 #ifdef DEBUG
1980 		if (LDC_INJECT_RESET(ldcp))
1981 			goto force_reset;
1982 #endif
1983 
1984 		if (rx_head == rx_tail) {
1985 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1986 			    ldcp->id);
1987 			break;
1988 		}
1989 
1990 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1991 		    rx_head, rx_tail);
1992 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1993 		    ldcp->rx_q_va + rx_head);
1994 
1995 		/* get the message */
1996 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1997 
1998 		/* if channel is in RAW mode or data pkt, notify and return */
1999 		if (ldcp->mode == LDC_MODE_RAW) {
2000 			notify_client = B_TRUE;
2001 			notify_event |= LDC_EVT_READ;
2002 			break;
2003 		}
2004 
2005 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2006 
2007 			/* discard packet if channel is not up */
2008 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2009 
2010 				/* move the head one position */
2011 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2012 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2013 
2014 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2015 					break;
2016 
2017 				continue;
2018 			} else {
2019 				if ((ldcp->tstate & TS_IN_RESET) == 0)
2020 					notify_client = B_TRUE;
2021 				notify_event |= LDC_EVT_READ;
2022 				break;
2023 			}
2024 		}
2025 
2026 		/* Check the sequence ID for the message received */
2027 		rv = i_ldc_check_seqid(ldcp, msg);
2028 		if (rv != 0) {
2029 
2030 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
2031 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
2032 
2033 			/* Reset last_msg_rcd to start of message */
2034 			if (first_fragment != 0) {
2035 				ldcp->last_msg_rcd = first_fragment - 1;
2036 				first_fragment = 0;
2037 			}
2038 
2039 			/*
2040 			 * Send a NACK due to seqid mismatch
2041 			 */
2042 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
2043 			    (msg->ctrl & LDC_CTRL_MASK));
2044 
2045 			if (rv) {
2046 				cmn_err(CE_NOTE,
2047 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
2048 				    "CTRL/NACK msg\n", ldcp->id);
2049 
2050 				/* if cannot send NACK - reset channel */
2051 				mutex_enter(&ldcp->tx_lock);
2052 				i_ldc_reset(ldcp, B_TRUE);
2053 				mutex_exit(&ldcp->tx_lock);
2054 
2055 				notify_client = B_TRUE;
2056 				notify_event = LDC_EVT_RESET;
2057 				break;
2058 			}
2059 
2060 			/* purge receive queue */
2061 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2062 			break;
2063 		}
2064 
2065 		/* record the message ID */
2066 		ldcp->last_msg_rcd = msg->seqid;
2067 
2068 		/* process control messages */
2069 		if (msg->type & LDC_CTRL) {
2070 			/* save current internal state */
2071 			uint64_t tstate = ldcp->tstate;
2072 
2073 			rv = i_ldc_ctrlmsg(ldcp, msg);
2074 			if (rv == EAGAIN) {
2075 				/* re-process pkt - state was adjusted */
2076 				continue;
2077 			}
2078 			if (rv == ECONNRESET) {
2079 				notify_client = B_TRUE;
2080 				notify_event = LDC_EVT_RESET;
2081 				break;
2082 			}
2083 
2084 			/*
2085 			 * control message processing was successful
2086 			 * channel transitioned to ready for communication
2087 			 */
2088 			if (rv == 0 && ldcp->tstate == TS_UP &&
2089 			    (tstate & ~TS_IN_RESET) !=
2090 			    (ldcp->tstate & ~TS_IN_RESET)) {
2091 				notify_client = B_TRUE;
2092 				notify_event = LDC_EVT_UP;
2093 			}
2094 		}
2095 
2096 		/* process data NACKs */
2097 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2098 			DWARN(ldcp->id,
2099 			    "i_ldc_rx_hdlr: (0x%llx) received DATA/NACK",
2100 			    ldcp->id);
2101 			mutex_enter(&ldcp->tx_lock);
2102 			i_ldc_reset(ldcp, B_TRUE);
2103 			mutex_exit(&ldcp->tx_lock);
2104 			notify_client = B_TRUE;
2105 			notify_event = LDC_EVT_RESET;
2106 			break;
2107 		}
2108 
2109 		/* process data ACKs */
2110 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2111 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2112 				notify_client = B_TRUE;
2113 				notify_event = LDC_EVT_RESET;
2114 				break;
2115 			}
2116 		}
2117 
2118 		/* move the head one position */
2119 		rx_head = (rx_head + LDC_PACKET_SIZE) %
2120 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2121 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2122 			notify_client = B_TRUE;
2123 			notify_event = LDC_EVT_RESET;
2124 			break;
2125 		}
2126 
2127 	} /* for */
2128 
2129 loop_exit:
2130 
2131 	/* if callbacks are disabled, do not notify */
2132 	if (!ldcp->cb_enabled)
2133 		notify_client = B_FALSE;
2134 
2135 	/*
2136 	 * If there are data packets in the queue, the ldc_read will
2137 	 * clear interrupts after draining the queue, else clear interrupts
2138 	 */
2139 	if ((notify_event & LDC_EVT_READ) == 0) {
2140 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2141 	} else
2142 		ldcp->rx_intr_state = LDC_INTR_PEND;
2143 
2144 
2145 	if (notify_client) {
2146 		ldcp->cb_inprogress = B_TRUE;
2147 		mutex_exit(&ldcp->lock);
2148 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2149 		if (rv) {
2150 			DWARN(ldcp->id,
2151 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
2152 			    ldcp->id);
2153 		}
2154 		mutex_enter(&ldcp->lock);
2155 		ldcp->cb_inprogress = B_FALSE;
2156 	}
2157 
2158 	mutex_exit(&ldcp->lock);
2159 
2160 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
2161 	return (DDI_INTR_CLAIMED);
2162 }
2163 
2164 
2165 /* -------------------------------------------------------------------------- */
2166 
2167 /*
2168  * LDC API functions
2169  */
2170 
2171 /*
2172  * Initialize the channel. Allocate internal structure and memory for
2173  * TX/RX queues, and initialize locks.
2174  */
2175 int
2176 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2177 {
2178 	ldc_chan_t 	*ldcp;
2179 	int		rv, exit_val;
2180 	uint64_t	ra_base, nentries;
2181 	uint64_t	qlen;
2182 
2183 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2184 
2185 	if (attr == NULL) {
2186 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2187 		return (EINVAL);
2188 	}
2189 	if (handle == NULL) {
2190 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2191 		return (EINVAL);
2192 	}
2193 
2194 	/* check if channel is valid */
2195 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2196 	if (rv == H_ECHANNEL) {
2197 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2198 		return (EINVAL);
2199 	}
2200 
2201 	/* check if the channel has already been initialized */
2202 	mutex_enter(&ldcssp->lock);
2203 	ldcp = ldcssp->chan_list;
2204 	while (ldcp != NULL) {
2205 		if (ldcp->id == id) {
2206 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2207 			    id);
2208 			mutex_exit(&ldcssp->lock);
2209 			return (EADDRINUSE);
2210 		}
2211 		ldcp = ldcp->next;
2212 	}
2213 	mutex_exit(&ldcssp->lock);
2214 
2215 	ASSERT(ldcp == NULL);
2216 
2217 	*handle = 0;
2218 
2219 	/* Allocate an ldcp structure */
2220 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2221 
2222 	/*
2223 	 * Initialize the channel and Tx lock
2224 	 *
2225 	 * The channel 'lock' protects the entire channel and
2226 	 * should be acquired before initializing, resetting,
2227 	 * destroying or reading from a channel.
2228 	 *
2229 	 * The 'tx_lock' should be acquired prior to transmitting
2230 	 * data over the channel. The lock should also be acquired
2231 	 * prior to channel reconfiguration (in order to prevent
2232 	 * concurrent writes).
2233 	 *
2234 	 * ORDERING: When both locks are being acquired, to prevent
2235 	 * deadlocks, the channel lock should be always acquired prior
2236 	 * to the tx_lock.
2237 	 */
2238 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2239 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2240 
2241 	/* Initialize the channel */
2242 	ldcp->id = id;
2243 	ldcp->cb = NULL;
2244 	ldcp->cb_arg = NULL;
2245 	ldcp->cb_inprogress = B_FALSE;
2246 	ldcp->cb_enabled = B_FALSE;
2247 	ldcp->next = NULL;
2248 
2249 	/* Read attributes */
2250 	ldcp->mode = attr->mode;
2251 	ldcp->devclass = attr->devclass;
2252 	ldcp->devinst = attr->instance;
2253 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2254 
2255 	D1(ldcp->id,
2256 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2257 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2258 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2259 
2260 	ldcp->next_vidx = 0;
2261 	ldcp->tstate = TS_IN_RESET;
2262 	ldcp->hstate = 0;
2263 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2264 	ldcp->last_ack_rcd = 0;
2265 	ldcp->last_msg_rcd = 0;
2266 
2267 	ldcp->stream_bufferp = NULL;
2268 	ldcp->exp_dring_list = NULL;
2269 	ldcp->imp_dring_list = NULL;
2270 	ldcp->mhdl_list = NULL;
2271 
2272 	ldcp->tx_intr_state = LDC_INTR_NONE;
2273 	ldcp->rx_intr_state = LDC_INTR_NONE;
2274 
2275 	/* Initialize payload size depending on whether channel is reliable */
2276 	switch (ldcp->mode) {
2277 	case LDC_MODE_RAW:
2278 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2279 		ldcp->read_p = i_ldc_read_raw;
2280 		ldcp->write_p = i_ldc_write_raw;
2281 		break;
2282 	case LDC_MODE_UNRELIABLE:
2283 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2284 		ldcp->read_p = i_ldc_read_packet;
2285 		ldcp->write_p = i_ldc_write_packet;
2286 		break;
2287 	case LDC_MODE_RELIABLE:
2288 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2289 		ldcp->read_p = i_ldc_read_packet;
2290 		ldcp->write_p = i_ldc_write_packet;
2291 		break;
2292 	case LDC_MODE_STREAM:
2293 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2294 
2295 		ldcp->stream_remains = 0;
2296 		ldcp->stream_offset = 0;
2297 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2298 		ldcp->read_p = i_ldc_read_stream;
2299 		ldcp->write_p = i_ldc_write_stream;
2300 		break;
2301 	default:
2302 		exit_val = EINVAL;
2303 		goto cleanup_on_exit;
2304 	}
2305 
2306 	/*
2307 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2308 	 * value is smaller than default length of ldc_queue_entries,
2309 	 * qlen is set to ldc_queue_entries..
2310 	 */
2311 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2312 	ldcp->rx_q_entries =
2313 		(qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2314 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2315 
2316 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen);
2317 
2318 	/* Create a transmit queue */
2319 	ldcp->tx_q_va = (uint64_t)
2320 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2321 	if (ldcp->tx_q_va == NULL) {
2322 		cmn_err(CE_WARN,
2323 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2324 		    ldcp->id);
2325 		exit_val = ENOMEM;
2326 		goto cleanup_on_exit;
2327 	}
2328 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2329 
2330 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2331 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2332 
2333 	ldcp->tstate |= TS_TXQ_RDY;
2334 
2335 	/* Create a receive queue */
2336 	ldcp->rx_q_va = (uint64_t)
2337 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2338 	if (ldcp->rx_q_va == NULL) {
2339 		cmn_err(CE_WARN,
2340 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2341 		    ldcp->id);
2342 		exit_val = ENOMEM;
2343 		goto cleanup_on_exit;
2344 	}
2345 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2346 
2347 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2348 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2349 
2350 	ldcp->tstate |= TS_RXQ_RDY;
2351 
2352 	/* Init descriptor ring and memory handle list lock */
2353 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2354 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2355 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2356 
2357 	/* mark status as INITialized */
2358 	ldcp->status = LDC_INIT;
2359 
2360 	/* Add to channel list */
2361 	mutex_enter(&ldcssp->lock);
2362 	ldcp->next = ldcssp->chan_list;
2363 	ldcssp->chan_list = ldcp;
2364 	ldcssp->channel_count++;
2365 	mutex_exit(&ldcssp->lock);
2366 
2367 	/* set the handle */
2368 	*handle = (ldc_handle_t)ldcp;
2369 
2370 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2371 
2372 	return (0);
2373 
2374 cleanup_on_exit:
2375 
2376 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2377 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2378 
2379 	if (ldcp->tstate & TS_TXQ_RDY)
2380 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2381 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2382 
2383 	if (ldcp->tstate & TS_RXQ_RDY)
2384 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2385 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2386 
2387 	mutex_destroy(&ldcp->tx_lock);
2388 	mutex_destroy(&ldcp->lock);
2389 
2390 	if (ldcp)
2391 		kmem_free(ldcp, sizeof (ldc_chan_t));
2392 
2393 	return (exit_val);
2394 }
2395 
2396 /*
2397  * Finalizes the LDC connection. It will return EBUSY if the
2398  * channel is open. A ldc_close() has to be done prior to
2399  * a ldc_fini operation. It frees TX/RX queues, associated
2400  * with the channel
2401  */
2402 int
2403 ldc_fini(ldc_handle_t handle)
2404 {
2405 	ldc_chan_t 	*ldcp;
2406 	ldc_chan_t 	*tmp_ldcp;
2407 	uint64_t 	id;
2408 
2409 	if (handle == NULL) {
2410 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2411 		return (EINVAL);
2412 	}
2413 	ldcp = (ldc_chan_t *)handle;
2414 	id = ldcp->id;
2415 
2416 	mutex_enter(&ldcp->lock);
2417 
2418 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2419 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2420 		    ldcp->id);
2421 		mutex_exit(&ldcp->lock);
2422 		return (EBUSY);
2423 	}
2424 
2425 	/* Remove from the channel list */
2426 	mutex_enter(&ldcssp->lock);
2427 	tmp_ldcp = ldcssp->chan_list;
2428 	if (tmp_ldcp == ldcp) {
2429 		ldcssp->chan_list = ldcp->next;
2430 		ldcp->next = NULL;
2431 	} else {
2432 		while (tmp_ldcp != NULL) {
2433 			if (tmp_ldcp->next == ldcp) {
2434 				tmp_ldcp->next = ldcp->next;
2435 				ldcp->next = NULL;
2436 				break;
2437 			}
2438 			tmp_ldcp = tmp_ldcp->next;
2439 		}
2440 		if (tmp_ldcp == NULL) {
2441 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2442 			mutex_exit(&ldcssp->lock);
2443 			mutex_exit(&ldcp->lock);
2444 			return (EINVAL);
2445 		}
2446 	}
2447 
2448 	ldcssp->channel_count--;
2449 
2450 	mutex_exit(&ldcssp->lock);
2451 
2452 	/* Free the map table for this channel */
2453 	if (ldcp->mtbl) {
2454 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2455 		if (ldcp->mtbl->contigmem)
2456 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2457 		else
2458 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2459 		mutex_destroy(&ldcp->mtbl->lock);
2460 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2461 	}
2462 
2463 	/* Destroy descriptor ring and memory handle list lock */
2464 	mutex_destroy(&ldcp->exp_dlist_lock);
2465 	mutex_destroy(&ldcp->imp_dlist_lock);
2466 	mutex_destroy(&ldcp->mlist_lock);
2467 
2468 	/* Free the stream buffer for STREAM_MODE */
2469 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2470 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2471 
2472 	/* Free the RX queue */
2473 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2474 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2475 	ldcp->tstate &= ~TS_RXQ_RDY;
2476 
2477 	/* Free the TX queue */
2478 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2479 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2480 	ldcp->tstate &= ~TS_TXQ_RDY;
2481 
2482 	mutex_exit(&ldcp->lock);
2483 
2484 	/* Destroy mutex */
2485 	mutex_destroy(&ldcp->tx_lock);
2486 	mutex_destroy(&ldcp->lock);
2487 
2488 	/* free channel structure */
2489 	kmem_free(ldcp, sizeof (ldc_chan_t));
2490 
2491 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2492 
2493 	return (0);
2494 }
2495 
2496 /*
2497  * Open the LDC channel for use. It registers the TX/RX queues
2498  * with the Hypervisor. It also specifies the interrupt number
2499  * and target CPU for this channel
2500  */
2501 int
2502 ldc_open(ldc_handle_t handle)
2503 {
2504 	ldc_chan_t 	*ldcp;
2505 	int 		rv;
2506 
2507 	if (handle == NULL) {
2508 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2509 		return (EINVAL);
2510 	}
2511 
2512 	ldcp = (ldc_chan_t *)handle;
2513 
2514 	mutex_enter(&ldcp->lock);
2515 
2516 	if (ldcp->tstate < TS_INIT) {
2517 		DWARN(ldcp->id,
2518 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2519 		mutex_exit(&ldcp->lock);
2520 		return (EFAULT);
2521 	}
2522 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2523 		DWARN(ldcp->id,
2524 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2525 		mutex_exit(&ldcp->lock);
2526 		return (EFAULT);
2527 	}
2528 
2529 	/*
2530 	 * Unregister/Register the tx queue with the hypervisor
2531 	 */
2532 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2533 	if (rv) {
2534 		cmn_err(CE_WARN,
2535 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2536 		    ldcp->id);
2537 		mutex_exit(&ldcp->lock);
2538 		return (EIO);
2539 	}
2540 
2541 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2542 	if (rv) {
2543 		cmn_err(CE_WARN,
2544 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2545 		    ldcp->id);
2546 		mutex_exit(&ldcp->lock);
2547 		return (EIO);
2548 	}
2549 
2550 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2551 	    ldcp->id);
2552 
2553 	/*
2554 	 * Unregister/Register the rx queue with the hypervisor
2555 	 */
2556 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2557 	if (rv) {
2558 		cmn_err(CE_WARN,
2559 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2560 		    ldcp->id);
2561 		mutex_exit(&ldcp->lock);
2562 		return (EIO);
2563 	}
2564 
2565 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2566 	if (rv) {
2567 		cmn_err(CE_WARN,
2568 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2569 		    ldcp->id);
2570 		mutex_exit(&ldcp->lock);
2571 		return (EIO);
2572 	}
2573 
2574 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2575 	    ldcp->id);
2576 
2577 	ldcp->tstate |= TS_QCONF_RDY;
2578 
2579 	/* Register the channel with the channel nexus */
2580 	rv = i_ldc_register_channel(ldcp);
2581 	if (rv && rv != EAGAIN) {
2582 		cmn_err(CE_WARN,
2583 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2584 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2585 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2586 		mutex_exit(&ldcp->lock);
2587 		return (EIO);
2588 	}
2589 
2590 	/* mark channel in OPEN state */
2591 	ldcp->status = LDC_OPEN;
2592 
2593 	/* Read channel state */
2594 	rv = hv_ldc_tx_get_state(ldcp->id,
2595 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2596 	if (rv) {
2597 		cmn_err(CE_WARN,
2598 		    "ldc_open: (0x%lx) cannot read channel state\n",
2599 		    ldcp->id);
2600 		(void) i_ldc_unregister_channel(ldcp);
2601 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2602 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2603 		mutex_exit(&ldcp->lock);
2604 		return (EIO);
2605 	}
2606 
2607 	/*
2608 	 * set the ACKd head to current head location for reliable &
2609 	 * streaming mode
2610 	 */
2611 	ldcp->tx_ackd_head = ldcp->tx_head;
2612 
2613 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2614 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2615 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2616 		ldcp->tstate |= TS_LINK_READY;
2617 		ldcp->status = LDC_READY;
2618 	}
2619 
2620 	/*
2621 	 * if channel is being opened in RAW mode - no handshake is needed
2622 	 * switch the channel READY and UP state
2623 	 */
2624 	if (ldcp->mode == LDC_MODE_RAW) {
2625 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2626 		ldcp->status = LDC_UP;
2627 	}
2628 
2629 	mutex_exit(&ldcp->lock);
2630 
2631 	/*
2632 	 * Increment number of open channels
2633 	 */
2634 	mutex_enter(&ldcssp->lock);
2635 	ldcssp->channels_open++;
2636 	mutex_exit(&ldcssp->lock);
2637 
2638 	D1(ldcp->id,
2639 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
2640 	    "(tstate=0x%x, status=0x%x)\n",
2641 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
2642 
2643 	return (0);
2644 }
2645 
2646 /*
2647  * Close the LDC connection. It will return EBUSY if there
2648  * are memory segments or descriptor rings either bound to or
2649  * mapped over the channel
2650  */
2651 int
2652 ldc_close(ldc_handle_t handle)
2653 {
2654 	ldc_chan_t 	*ldcp;
2655 	int		rv = 0, retries = 0;
2656 	boolean_t	chk_done = B_FALSE;
2657 
2658 	if (handle == NULL) {
2659 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2660 		return (EINVAL);
2661 	}
2662 	ldcp = (ldc_chan_t *)handle;
2663 
2664 	mutex_enter(&ldcp->lock);
2665 
2666 	/* return error if channel is not open */
2667 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
2668 		DWARN(ldcp->id,
2669 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2670 		mutex_exit(&ldcp->lock);
2671 		return (EFAULT);
2672 	}
2673 
2674 	/* if any memory handles, drings, are bound or mapped cannot close */
2675 	if (ldcp->mhdl_list != NULL) {
2676 		DWARN(ldcp->id,
2677 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2678 		    ldcp->id);
2679 		mutex_exit(&ldcp->lock);
2680 		return (EBUSY);
2681 	}
2682 	if (ldcp->exp_dring_list != NULL) {
2683 		DWARN(ldcp->id,
2684 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2685 		    ldcp->id);
2686 		mutex_exit(&ldcp->lock);
2687 		return (EBUSY);
2688 	}
2689 	if (ldcp->imp_dring_list != NULL) {
2690 		DWARN(ldcp->id,
2691 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2692 		    ldcp->id);
2693 		mutex_exit(&ldcp->lock);
2694 		return (EBUSY);
2695 	}
2696 
2697 	if (ldcp->cb_inprogress) {
2698 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
2699 		    ldcp->id);
2700 		mutex_exit(&ldcp->lock);
2701 		return (EWOULDBLOCK);
2702 	}
2703 
2704 	/* Obtain Tx lock */
2705 	mutex_enter(&ldcp->tx_lock);
2706 
2707 	/*
2708 	 * Wait for pending transmits to complete i.e Tx queue to drain
2709 	 * if there are pending pkts - wait 1 ms and retry again
2710 	 */
2711 	for (;;) {
2712 
2713 		rv = hv_ldc_tx_get_state(ldcp->id,
2714 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2715 		if (rv) {
2716 			cmn_err(CE_WARN,
2717 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2718 			mutex_exit(&ldcp->tx_lock);
2719 			mutex_exit(&ldcp->lock);
2720 			return (EIO);
2721 		}
2722 
2723 		if (ldcp->tx_head == ldcp->tx_tail ||
2724 		    ldcp->link_state != LDC_CHANNEL_UP) {
2725 			break;
2726 		}
2727 
2728 		if (chk_done) {
2729 			DWARN(ldcp->id,
2730 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2731 			    ldcp->id);
2732 			break;
2733 		}
2734 
2735 		/* wait for one ms and try again */
2736 		delay(drv_usectohz(1000));
2737 		chk_done = B_TRUE;
2738 	}
2739 
2740 	/*
2741 	 * Drain the Tx and Rx queues as we are closing the
2742 	 * channel. We dont care about any pending packets.
2743 	 * We have to also drain the queue prior to clearing
2744 	 * pending interrupts, otherwise the HV will trigger
2745 	 * an interrupt the moment the interrupt state is
2746 	 * cleared.
2747 	 */
2748 	(void) i_ldc_txq_reconf(ldcp);
2749 	(void) i_ldc_rxq_drain(ldcp);
2750 
2751 	/*
2752 	 * Unregister the channel with the nexus
2753 	 */
2754 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2755 
2756 		mutex_exit(&ldcp->tx_lock);
2757 		mutex_exit(&ldcp->lock);
2758 
2759 		/* if any error other than EAGAIN return back */
2760 		if (rv != EAGAIN || retries >= ldc_max_retries) {
2761 			cmn_err(CE_WARN,
2762 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2763 			    ldcp->id, rv);
2764 			return (rv);
2765 		}
2766 
2767 		/*
2768 		 * As there could be pending interrupts we need
2769 		 * to wait and try again
2770 		 */
2771 		drv_usecwait(ldc_close_delay);
2772 		mutex_enter(&ldcp->lock);
2773 		mutex_enter(&ldcp->tx_lock);
2774 		retries++;
2775 	}
2776 
2777 	/*
2778 	 * Unregister queues
2779 	 */
2780 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2781 	if (rv) {
2782 		cmn_err(CE_WARN,
2783 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2784 		    ldcp->id);
2785 		mutex_exit(&ldcp->tx_lock);
2786 		mutex_exit(&ldcp->lock);
2787 		return (EIO);
2788 	}
2789 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2790 	if (rv) {
2791 		cmn_err(CE_WARN,
2792 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2793 		    ldcp->id);
2794 		mutex_exit(&ldcp->tx_lock);
2795 		mutex_exit(&ldcp->lock);
2796 		return (EIO);
2797 	}
2798 
2799 	ldcp->tstate &= ~TS_QCONF_RDY;
2800 
2801 	/* Reset channel state information */
2802 	i_ldc_reset_state(ldcp);
2803 
2804 	/* Mark channel as down and in initialized state */
2805 	ldcp->tx_ackd_head = 0;
2806 	ldcp->tx_head = 0;
2807 	ldcp->tstate = TS_IN_RESET|TS_INIT;
2808 	ldcp->status = LDC_INIT;
2809 
2810 	mutex_exit(&ldcp->tx_lock);
2811 	mutex_exit(&ldcp->lock);
2812 
2813 	/* Decrement number of open channels */
2814 	mutex_enter(&ldcssp->lock);
2815 	ldcssp->channels_open--;
2816 	mutex_exit(&ldcssp->lock);
2817 
2818 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2819 
2820 	return (0);
2821 }
2822 
2823 /*
2824  * Register channel callback
2825  */
2826 int
2827 ldc_reg_callback(ldc_handle_t handle,
2828     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2829 {
2830 	ldc_chan_t *ldcp;
2831 
2832 	if (handle == NULL) {
2833 		DWARN(DBG_ALL_LDCS,
2834 		    "ldc_reg_callback: invalid channel handle\n");
2835 		return (EINVAL);
2836 	}
2837 	if (((uint64_t)cb) < KERNELBASE) {
2838 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2839 		return (EINVAL);
2840 	}
2841 	ldcp = (ldc_chan_t *)handle;
2842 
2843 	mutex_enter(&ldcp->lock);
2844 
2845 	if (ldcp->cb) {
2846 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2847 		    ldcp->id);
2848 		mutex_exit(&ldcp->lock);
2849 		return (EIO);
2850 	}
2851 	if (ldcp->cb_inprogress) {
2852 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2853 		    ldcp->id);
2854 		mutex_exit(&ldcp->lock);
2855 		return (EWOULDBLOCK);
2856 	}
2857 
2858 	ldcp->cb = cb;
2859 	ldcp->cb_arg = arg;
2860 	ldcp->cb_enabled = B_TRUE;
2861 
2862 	D1(ldcp->id,
2863 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2864 	    ldcp->id);
2865 
2866 	mutex_exit(&ldcp->lock);
2867 
2868 	return (0);
2869 }
2870 
2871 /*
2872  * Unregister channel callback
2873  */
2874 int
2875 ldc_unreg_callback(ldc_handle_t handle)
2876 {
2877 	ldc_chan_t *ldcp;
2878 
2879 	if (handle == NULL) {
2880 		DWARN(DBG_ALL_LDCS,
2881 		    "ldc_unreg_callback: invalid channel handle\n");
2882 		return (EINVAL);
2883 	}
2884 	ldcp = (ldc_chan_t *)handle;
2885 
2886 	mutex_enter(&ldcp->lock);
2887 
2888 	if (ldcp->cb == NULL) {
2889 		DWARN(ldcp->id,
2890 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2891 		    ldcp->id);
2892 		mutex_exit(&ldcp->lock);
2893 		return (EIO);
2894 	}
2895 	if (ldcp->cb_inprogress) {
2896 		DWARN(ldcp->id,
2897 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2898 		    ldcp->id);
2899 		mutex_exit(&ldcp->lock);
2900 		return (EWOULDBLOCK);
2901 	}
2902 
2903 	ldcp->cb = NULL;
2904 	ldcp->cb_arg = NULL;
2905 	ldcp->cb_enabled = B_FALSE;
2906 
2907 	D1(ldcp->id,
2908 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2909 	    ldcp->id);
2910 
2911 	mutex_exit(&ldcp->lock);
2912 
2913 	return (0);
2914 }
2915 
2916 
2917 /*
2918  * Bring a channel up by initiating a handshake with the peer
2919  * This call is asynchronous. It will complete at a later point
2920  * in time when the peer responds back with an RTR.
2921  */
2922 int
2923 ldc_up(ldc_handle_t handle)
2924 {
2925 	int 		rv;
2926 	ldc_chan_t 	*ldcp;
2927 	ldc_msg_t 	*ldcmsg;
2928 	uint64_t 	tx_tail, tstate, link_state;
2929 
2930 	if (handle == NULL) {
2931 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2932 		return (EINVAL);
2933 	}
2934 	ldcp = (ldc_chan_t *)handle;
2935 
2936 	mutex_enter(&ldcp->lock);
2937 
2938 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
2939 
2940 	/* clear the reset state */
2941 	tstate = ldcp->tstate;
2942 	ldcp->tstate &= ~TS_IN_RESET;
2943 
2944 	if (ldcp->tstate == TS_UP) {
2945 		DWARN(ldcp->id,
2946 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2947 		    ldcp->id);
2948 
2949 		/* mark channel as up */
2950 		ldcp->status = LDC_UP;
2951 
2952 		/*
2953 		 * if channel was in reset state and there was
2954 		 * pending data clear interrupt state. this will
2955 		 * trigger an interrupt, causing the RX handler to
2956 		 * to invoke the client's callback
2957 		 */
2958 		if ((tstate & TS_IN_RESET) &&
2959 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
2960 			D1(ldcp->id,
2961 			    "ldc_up: (0x%llx) channel has pending data, "
2962 			    "clearing interrupt\n", ldcp->id);
2963 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2964 		}
2965 
2966 		mutex_exit(&ldcp->lock);
2967 		return (0);
2968 	}
2969 
2970 	/* if the channel is in RAW mode - mark it as UP, if READY */
2971 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2972 		ldcp->tstate = TS_UP;
2973 		mutex_exit(&ldcp->lock);
2974 		return (0);
2975 	}
2976 
2977 	/* Don't start another handshake if there is one in progress */
2978 	if (ldcp->hstate) {
2979 		D1(ldcp->id,
2980 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2981 		    ldcp->id);
2982 		mutex_exit(&ldcp->lock);
2983 		return (0);
2984 	}
2985 
2986 	mutex_enter(&ldcp->tx_lock);
2987 
2988 	/* save current link state */
2989 	link_state = ldcp->link_state;
2990 
2991 	/* get the current tail for the LDC msg */
2992 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2993 	if (rv) {
2994 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2995 		    ldcp->id);
2996 		mutex_exit(&ldcp->tx_lock);
2997 		mutex_exit(&ldcp->lock);
2998 		return (ECONNREFUSED);
2999 	}
3000 
3001 	/*
3002 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3003 	 * from a previous state of DOWN, then mark the channel as
3004 	 * being ready for handshake.
3005 	 */
3006 	if ((link_state == LDC_CHANNEL_DOWN) &&
3007 	    (link_state != ldcp->link_state)) {
3008 
3009 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3010 		    (ldcp->link_state == LDC_CHANNEL_UP));
3011 
3012 		if (ldcp->mode == LDC_MODE_RAW) {
3013 			ldcp->status = LDC_UP;
3014 			ldcp->tstate = TS_UP;
3015 			mutex_exit(&ldcp->tx_lock);
3016 			mutex_exit(&ldcp->lock);
3017 			return (0);
3018 		} else {
3019 			ldcp->status = LDC_READY;
3020 			ldcp->tstate |= TS_LINK_READY;
3021 		}
3022 
3023 	}
3024 
3025 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3026 	ZERO_PKT(ldcmsg);
3027 
3028 	ldcmsg->type = LDC_CTRL;
3029 	ldcmsg->stype = LDC_INFO;
3030 	ldcmsg->ctrl = LDC_VER;
3031 	ldcp->next_vidx = 0;
3032 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3033 
3034 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3035 
3036 	/* initiate the send by calling into HV and set the new tail */
3037 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3038 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3039 
3040 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3041 	if (rv) {
3042 		DWARN(ldcp->id,
3043 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3044 		    ldcp->id, rv);
3045 		mutex_exit(&ldcp->tx_lock);
3046 		mutex_exit(&ldcp->lock);
3047 		return (rv);
3048 	}
3049 
3050 	ldcp->hstate |= TS_SENT_VER;
3051 	ldcp->tx_tail = tx_tail;
3052 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3053 
3054 	mutex_exit(&ldcp->tx_lock);
3055 	mutex_exit(&ldcp->lock);
3056 
3057 	return (rv);
3058 }
3059 
3060 
3061 /*
3062  * Bring a channel down by resetting its state and queues
3063  */
3064 int
3065 ldc_down(ldc_handle_t handle)
3066 {
3067 	ldc_chan_t 	*ldcp;
3068 
3069 	if (handle == NULL) {
3070 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3071 		return (EINVAL);
3072 	}
3073 	ldcp = (ldc_chan_t *)handle;
3074 	mutex_enter(&ldcp->lock);
3075 	mutex_enter(&ldcp->tx_lock);
3076 	i_ldc_reset(ldcp, B_TRUE);
3077 	mutex_exit(&ldcp->tx_lock);
3078 	mutex_exit(&ldcp->lock);
3079 
3080 	return (0);
3081 }
3082 
3083 /*
3084  * Get the current channel status
3085  */
3086 int
3087 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3088 {
3089 	ldc_chan_t *ldcp;
3090 
3091 	if (handle == NULL || status == NULL) {
3092 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3093 		return (EINVAL);
3094 	}
3095 	ldcp = (ldc_chan_t *)handle;
3096 
3097 	*status = ((ldc_chan_t *)handle)->status;
3098 
3099 	D1(ldcp->id,
3100 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3101 	return (0);
3102 }
3103 
3104 
3105 /*
3106  * Set the channel's callback mode - enable/disable callbacks
3107  */
3108 int
3109 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3110 {
3111 	ldc_chan_t 	*ldcp;
3112 
3113 	if (handle == NULL) {
3114 		DWARN(DBG_ALL_LDCS,
3115 		    "ldc_set_intr_mode: invalid channel handle\n");
3116 		return (EINVAL);
3117 	}
3118 	ldcp = (ldc_chan_t *)handle;
3119 
3120 	/*
3121 	 * Record no callbacks should be invoked
3122 	 */
3123 	mutex_enter(&ldcp->lock);
3124 
3125 	switch (cmode) {
3126 	case LDC_CB_DISABLE:
3127 		if (!ldcp->cb_enabled) {
3128 			DWARN(ldcp->id,
3129 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3130 			    ldcp->id);
3131 			break;
3132 		}
3133 		ldcp->cb_enabled = B_FALSE;
3134 
3135 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3136 		    ldcp->id);
3137 		break;
3138 
3139 	case LDC_CB_ENABLE:
3140 		if (ldcp->cb_enabled) {
3141 			DWARN(ldcp->id,
3142 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3143 			    ldcp->id);
3144 			break;
3145 		}
3146 		ldcp->cb_enabled = B_TRUE;
3147 
3148 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3149 		    ldcp->id);
3150 		break;
3151 	}
3152 
3153 	mutex_exit(&ldcp->lock);
3154 
3155 	return (0);
3156 }
3157 
3158 /*
3159  * Check to see if there are packets on the incoming queue
3160  * Will return hasdata = B_FALSE if there are no packets
3161  */
3162 int
3163 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3164 {
3165 	int 		rv;
3166 	uint64_t 	rx_head, rx_tail;
3167 	ldc_chan_t 	*ldcp;
3168 
3169 	if (handle == NULL) {
3170 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3171 		return (EINVAL);
3172 	}
3173 	ldcp = (ldc_chan_t *)handle;
3174 
3175 	*hasdata = B_FALSE;
3176 
3177 	mutex_enter(&ldcp->lock);
3178 
3179 	if (ldcp->tstate != TS_UP) {
3180 		D1(ldcp->id,
3181 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3182 		mutex_exit(&ldcp->lock);
3183 		return (ECONNRESET);
3184 	}
3185 
3186 	/* Read packet(s) from the queue */
3187 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3188 	    &ldcp->link_state);
3189 	if (rv != 0) {
3190 		cmn_err(CE_WARN,
3191 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3192 		mutex_exit(&ldcp->lock);
3193 		return (EIO);
3194 	}
3195 	/* reset the channel state if the channel went down */
3196 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3197 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3198 		mutex_enter(&ldcp->tx_lock);
3199 		i_ldc_reset(ldcp, B_FALSE);
3200 		mutex_exit(&ldcp->tx_lock);
3201 		mutex_exit(&ldcp->lock);
3202 		return (ECONNRESET);
3203 	}
3204 
3205 	if ((rx_head != rx_tail) ||
3206 	    (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) {
3207 		D1(ldcp->id,
3208 		    "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n",
3209 		    ldcp->id);
3210 		*hasdata = B_TRUE;
3211 	}
3212 
3213 	mutex_exit(&ldcp->lock);
3214 
3215 	return (0);
3216 }
3217 
3218 
3219 /*
3220  * Read 'size' amount of bytes or less. If incoming buffer
3221  * is more than 'size', ENOBUFS is returned.
3222  *
3223  * On return, size contains the number of bytes read.
3224  */
3225 int
3226 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3227 {
3228 	ldc_chan_t 	*ldcp;
3229 	uint64_t 	rx_head = 0, rx_tail = 0;
3230 	int		rv = 0, exit_val;
3231 
3232 	if (handle == NULL) {
3233 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3234 		return (EINVAL);
3235 	}
3236 
3237 	ldcp = (ldc_chan_t *)handle;
3238 
3239 	/* channel lock */
3240 	mutex_enter(&ldcp->lock);
3241 
3242 	if (ldcp->tstate != TS_UP) {
3243 		DWARN(ldcp->id,
3244 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3245 		    ldcp->id);
3246 		exit_val = ECONNRESET;
3247 	} else {
3248 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3249 	}
3250 
3251 	/*
3252 	 * if queue has been drained - clear interrupt
3253 	 */
3254 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3255 	    &ldcp->link_state);
3256 	if (rv != 0) {
3257 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3258 		    ldcp->id);
3259 		mutex_enter(&ldcp->tx_lock);
3260 		i_ldc_reset(ldcp, B_TRUE);
3261 		mutex_exit(&ldcp->tx_lock);
3262 		mutex_exit(&ldcp->lock);
3263 		return (ECONNRESET);
3264 	}
3265 
3266 	if (exit_val == 0) {
3267 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3268 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3269 			mutex_enter(&ldcp->tx_lock);
3270 			i_ldc_reset(ldcp, B_FALSE);
3271 			exit_val = ECONNRESET;
3272 			mutex_exit(&ldcp->tx_lock);
3273 		}
3274 		if ((rv == 0) &&
3275 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3276 		    (rx_head == rx_tail)) {
3277 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3278 		}
3279 	}
3280 
3281 	mutex_exit(&ldcp->lock);
3282 	return (exit_val);
3283 }
3284 
3285 /*
3286  * Basic raw mondo read -
3287  * no interpretation of mondo contents at all.
3288  *
3289  * Enter and exit with ldcp->lock held by caller
3290  */
3291 static int
3292 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3293 {
3294 	uint64_t 	q_size_mask;
3295 	ldc_msg_t 	*msgp;
3296 	uint8_t		*msgbufp;
3297 	int		rv = 0, space;
3298 	uint64_t 	rx_head, rx_tail;
3299 
3300 	space = *sizep;
3301 
3302 	if (space < LDC_PAYLOAD_SIZE_RAW)
3303 		return (ENOBUFS);
3304 
3305 	ASSERT(mutex_owned(&ldcp->lock));
3306 
3307 	/* compute mask for increment */
3308 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3309 
3310 	/*
3311 	 * Read packet(s) from the queue
3312 	 */
3313 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3314 	    &ldcp->link_state);
3315 	if (rv != 0) {
3316 		cmn_err(CE_WARN,
3317 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3318 		    ldcp->id);
3319 		return (EIO);
3320 	}
3321 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3322 		" rxt=0x%llx, st=0x%llx\n",
3323 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
3324 
3325 	/* reset the channel state if the channel went down */
3326 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3327 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3328 		mutex_enter(&ldcp->tx_lock);
3329 		i_ldc_reset(ldcp, B_FALSE);
3330 		mutex_exit(&ldcp->tx_lock);
3331 		return (ECONNRESET);
3332 	}
3333 
3334 	/*
3335 	 * Check for empty queue
3336 	 */
3337 	if (rx_head == rx_tail) {
3338 		*sizep = 0;
3339 		return (0);
3340 	}
3341 
3342 	/* get the message */
3343 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3344 
3345 	/* if channel is in RAW mode, copy data and return */
3346 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3347 
3348 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3349 
3350 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3351 
3352 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3353 
3354 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3355 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3356 
3357 	return (rv);
3358 }
3359 
3360 /*
3361  * Process LDC mondos to build larger packets
3362  * with either un-reliable or reliable delivery.
3363  *
3364  * Enter and exit with ldcp->lock held by caller
3365  */
3366 static int
3367 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3368 {
3369 	int		rv = 0;
3370 	uint64_t 	rx_head = 0, rx_tail = 0;
3371 	uint64_t 	curr_head = 0;
3372 	ldc_msg_t 	*msg;
3373 	caddr_t 	target;
3374 	size_t 		len = 0, bytes_read = 0;
3375 	int 		retries = 0;
3376 	uint64_t 	q_size_mask;
3377 	uint64_t	first_fragment = 0;
3378 
3379 	target = target_bufp;
3380 
3381 	ASSERT(mutex_owned(&ldcp->lock));
3382 
3383 	/* check if the buffer and size are valid */
3384 	if (target_bufp == NULL || *sizep == 0) {
3385 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3386 		    ldcp->id);
3387 		return (EINVAL);
3388 	}
3389 
3390 	/* compute mask for increment */
3391 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3392 
3393 	/*
3394 	 * Read packet(s) from the queue
3395 	 */
3396 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3397 	    &ldcp->link_state);
3398 	if (rv != 0) {
3399 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3400 		    ldcp->id);
3401 		mutex_enter(&ldcp->tx_lock);
3402 		i_ldc_reset(ldcp, B_TRUE);
3403 		mutex_exit(&ldcp->tx_lock);
3404 		return (ECONNRESET);
3405 	}
3406 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3407 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3408 
3409 	/* reset the channel state if the channel went down */
3410 	if (ldcp->link_state != LDC_CHANNEL_UP)
3411 		goto channel_is_reset;
3412 
3413 	for (;;) {
3414 
3415 		if (curr_head == rx_tail) {
3416 			rv = hv_ldc_rx_get_state(ldcp->id,
3417 			    &rx_head, &rx_tail, &ldcp->link_state);
3418 			if (rv != 0) {
3419 				cmn_err(CE_WARN,
3420 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3421 				    ldcp->id);
3422 				mutex_enter(&ldcp->tx_lock);
3423 				i_ldc_reset(ldcp, B_TRUE);
3424 				mutex_exit(&ldcp->tx_lock);
3425 				return (ECONNRESET);
3426 			}
3427 			if (ldcp->link_state != LDC_CHANNEL_UP)
3428 				goto channel_is_reset;
3429 
3430 			if (curr_head == rx_tail) {
3431 
3432 				/* If in the middle of a fragmented xfer */
3433 				if (first_fragment != 0) {
3434 
3435 					/* wait for ldc_delay usecs */
3436 					drv_usecwait(ldc_delay);
3437 
3438 					if (++retries < ldc_max_retries)
3439 						continue;
3440 
3441 					*sizep = 0;
3442 					ldcp->last_msg_rcd = first_fragment - 1;
3443 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3444 						"(0x%llx) read timeout",
3445 						ldcp->id);
3446 					return (EAGAIN);
3447 				}
3448 				*sizep = 0;
3449 				break;
3450 			}
3451 		}
3452 		retries = 0;
3453 
3454 		D2(ldcp->id,
3455 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3456 		    ldcp->id, curr_head, rx_head, rx_tail);
3457 
3458 		/* get the message */
3459 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3460 
3461 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3462 		    ldcp->rx_q_va + curr_head);
3463 
3464 		/* Check the message ID for the message received */
3465 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3466 
3467 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3468 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3469 
3470 			/* throw away data */
3471 			bytes_read = 0;
3472 
3473 			/* Reset last_msg_rcd to start of message */
3474 			if (first_fragment != 0) {
3475 				ldcp->last_msg_rcd = first_fragment - 1;
3476 				first_fragment = 0;
3477 			}
3478 			/*
3479 			 * Send a NACK -- invalid seqid
3480 			 * get the current tail for the response
3481 			 */
3482 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3483 			    (msg->ctrl & LDC_CTRL_MASK));
3484 			if (rv) {
3485 				cmn_err(CE_NOTE,
3486 				    "ldc_read: (0x%lx) err sending "
3487 				    "NACK msg\n", ldcp->id);
3488 
3489 				/* if cannot send NACK - reset channel */
3490 				mutex_enter(&ldcp->tx_lock);
3491 				i_ldc_reset(ldcp, B_FALSE);
3492 				mutex_exit(&ldcp->tx_lock);
3493 				rv = ECONNRESET;
3494 				break;
3495 			}
3496 
3497 			/* purge receive queue */
3498 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3499 
3500 			break;
3501 		}
3502 
3503 		/*
3504 		 * Process any messages of type CTRL messages
3505 		 * Future implementations should try to pass these
3506 		 * to LDC link by resetting the intr state.
3507 		 *
3508 		 * NOTE: not done as a switch() as type can be both ctrl+data
3509 		 */
3510 		if (msg->type & LDC_CTRL) {
3511 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3512 				if (rv == EAGAIN)
3513 					continue;
3514 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3515 				*sizep = 0;
3516 				bytes_read = 0;
3517 				break;
3518 			}
3519 		}
3520 
3521 		/* process data ACKs */
3522 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3523 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3524 				*sizep = 0;
3525 				bytes_read = 0;
3526 				break;
3527 			}
3528 		}
3529 
3530 		/* process data NACKs */
3531 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
3532 			DWARN(ldcp->id,
3533 			    "ldc_read: (0x%llx) received DATA/NACK", ldcp->id);
3534 			mutex_enter(&ldcp->tx_lock);
3535 			i_ldc_reset(ldcp, B_TRUE);
3536 			mutex_exit(&ldcp->tx_lock);
3537 			return (ECONNRESET);
3538 		}
3539 
3540 		/* process data messages */
3541 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3542 
3543 			uint8_t *msgbuf = (uint8_t *)(
3544 				(ldcp->mode == LDC_MODE_RELIABLE ||
3545 				ldcp->mode == LDC_MODE_STREAM)
3546 				? msg->rdata : msg->udata);
3547 
3548 			D2(ldcp->id,
3549 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3550 
3551 			/* get the packet length */
3552 			len = (msg->env & LDC_LEN_MASK);
3553 
3554 				/*
3555 				 * FUTURE OPTIMIZATION:
3556 				 * dont need to set q head for every
3557 				 * packet we read just need to do this when
3558 				 * we are done or need to wait for more
3559 				 * mondos to make a full packet - this is
3560 				 * currently expensive.
3561 				 */
3562 
3563 			if (first_fragment == 0) {
3564 
3565 				/*
3566 				 * first packets should always have the start
3567 				 * bit set (even for a single packet). If not
3568 				 * throw away the packet
3569 				 */
3570 				if (!(msg->env & LDC_FRAG_START)) {
3571 
3572 					DWARN(DBG_ALL_LDCS,
3573 					    "ldc_read: (0x%llx) not start - "
3574 					    "frag=%x\n", ldcp->id,
3575 					    (msg->env) & LDC_FRAG_MASK);
3576 
3577 					/* toss pkt, inc head, cont reading */
3578 					bytes_read = 0;
3579 					target = target_bufp;
3580 					curr_head =
3581 						(curr_head + LDC_PACKET_SIZE)
3582 						& q_size_mask;
3583 					if (rv = i_ldc_set_rx_head(ldcp,
3584 						curr_head))
3585 						break;
3586 
3587 					continue;
3588 				}
3589 
3590 				first_fragment = msg->seqid;
3591 			} else {
3592 				/* check to see if this is a pkt w/ START bit */
3593 				if (msg->env & LDC_FRAG_START) {
3594 					DWARN(DBG_ALL_LDCS,
3595 					    "ldc_read:(0x%llx) unexpected pkt"
3596 					    " env=0x%x discarding %d bytes,"
3597 					    " lastmsg=%d, currentmsg=%d\n",
3598 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3599 					    bytes_read, ldcp->last_msg_rcd,
3600 					    msg->seqid);
3601 
3602 					/* throw data we have read so far */
3603 					bytes_read = 0;
3604 					target = target_bufp;
3605 					first_fragment = msg->seqid;
3606 
3607 					if (rv = i_ldc_set_rx_head(ldcp,
3608 						curr_head))
3609 						break;
3610 				}
3611 			}
3612 
3613 			/* copy (next) pkt into buffer */
3614 			if (len <= (*sizep - bytes_read)) {
3615 				bcopy(msgbuf, target, len);
3616 				target += len;
3617 				bytes_read += len;
3618 			} else {
3619 				/*
3620 				 * there is not enough space in the buffer to
3621 				 * read this pkt. throw message away & continue
3622 				 * reading data from queue
3623 				 */
3624 				DWARN(DBG_ALL_LDCS,
3625 				    "ldc_read: (0x%llx) buffer too small, "
3626 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3627 				    curr_head, *sizep, bytes_read+len);
3628 
3629 				first_fragment = 0;
3630 				target = target_bufp;
3631 				bytes_read = 0;
3632 
3633 				/* throw away everything received so far */
3634 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3635 					break;
3636 
3637 				/* continue reading remaining pkts */
3638 				continue;
3639 			}
3640 		}
3641 
3642 		/* set the message id */
3643 		ldcp->last_msg_rcd = msg->seqid;
3644 
3645 		/* move the head one position */
3646 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3647 
3648 		if (msg->env & LDC_FRAG_STOP) {
3649 
3650 			/*
3651 			 * All pkts that are part of this fragmented transfer
3652 			 * have been read or this was a single pkt read
3653 			 * or there was an error
3654 			 */
3655 
3656 			/* set the queue head */
3657 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3658 				bytes_read = 0;
3659 
3660 			*sizep = bytes_read;
3661 
3662 			break;
3663 		}
3664 
3665 		/* advance head if it is a DATA ACK */
3666 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3667 
3668 			/* set the queue head */
3669 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3670 				bytes_read = 0;
3671 				break;
3672 			}
3673 
3674 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3675 			    ldcp->id, curr_head);
3676 		}
3677 
3678 	} /* for (;;) */
3679 
3680 
3681 	/*
3682 	 * If useful data was read - Send msg ACK
3683 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3684 	 */
3685 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3686 		ldcp->mode == LDC_MODE_STREAM)) {
3687 
3688 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3689 		if (rv && rv != EWOULDBLOCK) {
3690 			cmn_err(CE_NOTE,
3691 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3692 
3693 			/* if cannot send ACK - reset channel */
3694 			goto channel_is_reset;
3695 		}
3696 	}
3697 
3698 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3699 
3700 	return (rv);
3701 
3702 channel_is_reset:
3703 	mutex_enter(&ldcp->tx_lock);
3704 	i_ldc_reset(ldcp, B_FALSE);
3705 	mutex_exit(&ldcp->tx_lock);
3706 	return (ECONNRESET);
3707 }
3708 
3709 /*
3710  * Use underlying reliable packet mechanism to fetch
3711  * and buffer incoming packets so we can hand them back as
3712  * a basic byte stream.
3713  *
3714  * Enter and exit with ldcp->lock held by caller
3715  */
3716 static int
3717 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3718 {
3719 	int	rv;
3720 	size_t	size;
3721 
3722 	ASSERT(mutex_owned(&ldcp->lock));
3723 
3724 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3725 		ldcp->id, *sizep);
3726 
3727 	if (ldcp->stream_remains == 0) {
3728 		size = ldcp->mtu;
3729 		rv = i_ldc_read_packet(ldcp,
3730 			(caddr_t)ldcp->stream_bufferp, &size);
3731 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3732 			ldcp->id, size);
3733 
3734 		if (rv != 0)
3735 			return (rv);
3736 
3737 		ldcp->stream_remains = size;
3738 		ldcp->stream_offset = 0;
3739 	}
3740 
3741 	size = MIN(ldcp->stream_remains, *sizep);
3742 
3743 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3744 	ldcp->stream_offset += size;
3745 	ldcp->stream_remains -= size;
3746 
3747 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3748 		ldcp->id, size);
3749 
3750 	*sizep = size;
3751 	return (0);
3752 }
3753 
3754 /*
3755  * Write specified amount of bytes to the channel
3756  * in multiple pkts of pkt_payload size. Each
3757  * packet is tagged with an unique packet ID in
3758  * the case of a reliable link.
3759  *
3760  * On return, size contains the number of bytes written.
3761  */
3762 int
3763 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3764 {
3765 	ldc_chan_t	*ldcp;
3766 	int		rv = 0;
3767 
3768 	if (handle == NULL) {
3769 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3770 		return (EINVAL);
3771 	}
3772 	ldcp = (ldc_chan_t *)handle;
3773 
3774 	/* check if writes can occur */
3775 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3776 		/*
3777 		 * Could not get the lock - channel could
3778 		 * be in the process of being unconfigured
3779 		 * or reader has encountered an error
3780 		 */
3781 		return (EAGAIN);
3782 	}
3783 
3784 	/* check if non-zero data to write */
3785 	if (buf == NULL || sizep == NULL) {
3786 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3787 		    ldcp->id);
3788 		mutex_exit(&ldcp->tx_lock);
3789 		return (EINVAL);
3790 	}
3791 
3792 	if (*sizep == 0) {
3793 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3794 		    ldcp->id);
3795 		mutex_exit(&ldcp->tx_lock);
3796 		return (0);
3797 	}
3798 
3799 	/* Check if channel is UP for data exchange */
3800 	if (ldcp->tstate != TS_UP) {
3801 		DWARN(ldcp->id,
3802 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3803 		    ldcp->id);
3804 		*sizep = 0;
3805 		rv = ECONNRESET;
3806 	} else {
3807 		rv = ldcp->write_p(ldcp, buf, sizep);
3808 	}
3809 
3810 	mutex_exit(&ldcp->tx_lock);
3811 
3812 	return (rv);
3813 }
3814 
3815 /*
3816  * Write a raw packet to the channel
3817  * On return, size contains the number of bytes written.
3818  */
3819 static int
3820 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3821 {
3822 	ldc_msg_t 	*ldcmsg;
3823 	uint64_t 	tx_head, tx_tail, new_tail;
3824 	int		rv = 0;
3825 	size_t		size;
3826 
3827 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3828 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3829 
3830 	size = *sizep;
3831 
3832 	/*
3833 	 * Check to see if the packet size is less than or
3834 	 * equal to packet size support in raw mode
3835 	 */
3836 	if (size > ldcp->pkt_payload) {
3837 		DWARN(ldcp->id,
3838 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3839 		    ldcp->id, *sizep);
3840 		*sizep = 0;
3841 		return (EMSGSIZE);
3842 	}
3843 
3844 	/* get the qptrs for the tx queue */
3845 	rv = hv_ldc_tx_get_state(ldcp->id,
3846 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3847 	if (rv != 0) {
3848 		cmn_err(CE_WARN,
3849 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3850 		*sizep = 0;
3851 		return (EIO);
3852 	}
3853 
3854 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3855 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3856 		DWARN(ldcp->id,
3857 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3858 
3859 		*sizep = 0;
3860 		if (mutex_tryenter(&ldcp->lock)) {
3861 			i_ldc_reset(ldcp, B_FALSE);
3862 			mutex_exit(&ldcp->lock);
3863 		} else {
3864 			/*
3865 			 * Release Tx lock, and then reacquire channel
3866 			 * and Tx lock in correct order
3867 			 */
3868 			mutex_exit(&ldcp->tx_lock);
3869 			mutex_enter(&ldcp->lock);
3870 			mutex_enter(&ldcp->tx_lock);
3871 			i_ldc_reset(ldcp, B_FALSE);
3872 			mutex_exit(&ldcp->lock);
3873 		}
3874 		return (ECONNRESET);
3875 	}
3876 
3877 	tx_tail = ldcp->tx_tail;
3878 	tx_head = ldcp->tx_head;
3879 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3880 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3881 
3882 	if (new_tail == tx_head) {
3883 		DWARN(DBG_ALL_LDCS,
3884 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3885 		*sizep = 0;
3886 		return (EWOULDBLOCK);
3887 	}
3888 
3889 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3890 	    ldcp->id, size);
3891 
3892 	/* Send the data now */
3893 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3894 
3895 	/* copy the data into pkt */
3896 	bcopy((uint8_t *)buf, ldcmsg, size);
3897 
3898 	/* increment tail */
3899 	tx_tail = new_tail;
3900 
3901 	/*
3902 	 * All packets have been copied into the TX queue
3903 	 * update the tail ptr in the HV
3904 	 */
3905 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3906 	if (rv) {
3907 		if (rv == EWOULDBLOCK) {
3908 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3909 			    ldcp->id);
3910 			*sizep = 0;
3911 			return (EWOULDBLOCK);
3912 		}
3913 
3914 		*sizep = 0;
3915 		if (mutex_tryenter(&ldcp->lock)) {
3916 			i_ldc_reset(ldcp, B_FALSE);
3917 			mutex_exit(&ldcp->lock);
3918 		} else {
3919 			/*
3920 			 * Release Tx lock, and then reacquire channel
3921 			 * and Tx lock in correct order
3922 			 */
3923 			mutex_exit(&ldcp->tx_lock);
3924 			mutex_enter(&ldcp->lock);
3925 			mutex_enter(&ldcp->tx_lock);
3926 			i_ldc_reset(ldcp, B_FALSE);
3927 			mutex_exit(&ldcp->lock);
3928 		}
3929 		return (ECONNRESET);
3930 	}
3931 
3932 	ldcp->tx_tail = tx_tail;
3933 	*sizep = size;
3934 
3935 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3936 
3937 	return (rv);
3938 }
3939 
3940 
3941 /*
3942  * Write specified amount of bytes to the channel
3943  * in multiple pkts of pkt_payload size. Each
3944  * packet is tagged with an unique packet ID in
3945  * the case of a reliable link.
3946  *
3947  * On return, size contains the number of bytes written.
3948  * This function needs to ensure that the write size is < MTU size
3949  */
3950 static int
3951 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3952 {
3953 	ldc_msg_t 	*ldcmsg;
3954 	uint64_t 	tx_head, tx_tail, new_tail, start;
3955 	uint64_t	txq_size_mask, numavail;
3956 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3957 	size_t 		len, bytes_written = 0, remaining;
3958 	int		rv;
3959 	uint32_t	curr_seqid;
3960 
3961 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3962 
3963 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3964 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3965 		ldcp->mode == LDC_MODE_STREAM);
3966 
3967 	/* compute mask for increment */
3968 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3969 
3970 	/* get the qptrs for the tx queue */
3971 	rv = hv_ldc_tx_get_state(ldcp->id,
3972 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3973 	if (rv != 0) {
3974 		cmn_err(CE_WARN,
3975 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3976 		*size = 0;
3977 		return (EIO);
3978 	}
3979 
3980 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3981 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3982 		DWARN(ldcp->id,
3983 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3984 		*size = 0;
3985 		if (mutex_tryenter(&ldcp->lock)) {
3986 			i_ldc_reset(ldcp, B_FALSE);
3987 			mutex_exit(&ldcp->lock);
3988 		} else {
3989 			/*
3990 			 * Release Tx lock, and then reacquire channel
3991 			 * and Tx lock in correct order
3992 			 */
3993 			mutex_exit(&ldcp->tx_lock);
3994 			mutex_enter(&ldcp->lock);
3995 			mutex_enter(&ldcp->tx_lock);
3996 			i_ldc_reset(ldcp, B_FALSE);
3997 			mutex_exit(&ldcp->lock);
3998 		}
3999 		return (ECONNRESET);
4000 	}
4001 
4002 	tx_tail = ldcp->tx_tail;
4003 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4004 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4005 
4006 	/*
4007 	 * Link mode determines whether we use HV Tx head or the
4008 	 * private protocol head (corresponding to last ACKd pkt) for
4009 	 * determining how much we can write
4010 	 */
4011 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
4012 		ldcp->mode == LDC_MODE_STREAM)
4013 		? ldcp->tx_ackd_head : ldcp->tx_head;
4014 	if (new_tail == tx_head) {
4015 		DWARN(DBG_ALL_LDCS,
4016 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4017 		*size = 0;
4018 		return (EWOULDBLOCK);
4019 	}
4020 
4021 	/*
4022 	 * Make sure that the LDC Tx queue has enough space
4023 	 */
4024 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4025 		+ ldcp->tx_q_entries - 1;
4026 	numavail %= ldcp->tx_q_entries;
4027 
4028 	if (*size > (numavail * ldcp->pkt_payload)) {
4029 		DWARN(DBG_ALL_LDCS,
4030 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4031 		return (EWOULDBLOCK);
4032 	}
4033 
4034 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4035 	    ldcp->id, *size);
4036 
4037 	/* Send the data now */
4038 	bytes_written = 0;
4039 	curr_seqid = ldcp->last_msg_snt;
4040 	start = tx_tail;
4041 
4042 	while (*size > bytes_written) {
4043 
4044 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4045 
4046 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
4047 			ldcp->mode == LDC_MODE_STREAM)
4048 			? ldcmsg->rdata : ldcmsg->udata);
4049 
4050 		ldcmsg->type = LDC_DATA;
4051 		ldcmsg->stype = LDC_INFO;
4052 		ldcmsg->ctrl = 0;
4053 
4054 		remaining = *size - bytes_written;
4055 		len = min(ldcp->pkt_payload, remaining);
4056 		ldcmsg->env = (uint8_t)len;
4057 
4058 		curr_seqid++;
4059 		ldcmsg->seqid = curr_seqid;
4060 
4061 		/* copy the data into pkt */
4062 		bcopy(source, msgbuf, len);
4063 
4064 		source += len;
4065 		bytes_written += len;
4066 
4067 		/* increment tail */
4068 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4069 
4070 		ASSERT(tx_tail != tx_head);
4071 	}
4072 
4073 	/* Set the start and stop bits */
4074 	ldcmsg->env |= LDC_FRAG_STOP;
4075 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4076 	ldcmsg->env |= LDC_FRAG_START;
4077 
4078 	/*
4079 	 * All packets have been copied into the TX queue
4080 	 * update the tail ptr in the HV
4081 	 */
4082 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4083 	if (rv == 0) {
4084 		ldcp->tx_tail = tx_tail;
4085 		ldcp->last_msg_snt = curr_seqid;
4086 		*size = bytes_written;
4087 	} else {
4088 		int rv2;
4089 
4090 		if (rv != EWOULDBLOCK) {
4091 			*size = 0;
4092 			if (mutex_tryenter(&ldcp->lock)) {
4093 				i_ldc_reset(ldcp, B_FALSE);
4094 				mutex_exit(&ldcp->lock);
4095 			} else {
4096 				/*
4097 				 * Release Tx lock, and then reacquire channel
4098 				 * and Tx lock in correct order
4099 				 */
4100 				mutex_exit(&ldcp->tx_lock);
4101 				mutex_enter(&ldcp->lock);
4102 				mutex_enter(&ldcp->tx_lock);
4103 				i_ldc_reset(ldcp, B_FALSE);
4104 				mutex_exit(&ldcp->lock);
4105 			}
4106 			return (ECONNRESET);
4107 		}
4108 
4109 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4110 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4111 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4112 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4113 
4114 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4115 		    &tx_head, &tx_tail, &ldcp->link_state);
4116 
4117 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4118 			"(head 0x%x, tail 0x%x state 0x%x)\n",
4119 			rv2, tx_head, tx_tail, ldcp->link_state);
4120 
4121 		*size = 0;
4122 	}
4123 
4124 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4125 
4126 	return (rv);
4127 }
4128 
4129 /*
4130  * Write specified amount of bytes to the channel
4131  * in multiple pkts of pkt_payload size. Each
4132  * packet is tagged with an unique packet ID in
4133  * the case of a reliable link.
4134  *
4135  * On return, size contains the number of bytes written.
4136  * This function needs to ensure that the write size is < MTU size
4137  */
4138 static int
4139 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4140 {
4141 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4142 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
4143 
4144 	/* Truncate packet to max of MTU size */
4145 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4146 	return (i_ldc_write_packet(ldcp, buf, sizep));
4147 }
4148 
4149 
4150 /*
4151  * Interfaces for channel nexus to register/unregister with LDC module
4152  * The nexus will register functions to be used to register individual
4153  * channels with the nexus and enable interrupts for the channels
4154  */
4155 int
4156 ldc_register(ldc_cnex_t *cinfo)
4157 {
4158 	ldc_chan_t	*ldcp;
4159 
4160 	if (cinfo == NULL || cinfo->dip == NULL ||
4161 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4162 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4163 	    cinfo->clr_intr == NULL) {
4164 
4165 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4166 		return (EINVAL);
4167 	}
4168 
4169 	mutex_enter(&ldcssp->lock);
4170 
4171 	/* nexus registration */
4172 	ldcssp->cinfo.dip = cinfo->dip;
4173 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4174 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4175 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4176 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4177 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4178 
4179 	/* register any channels that might have been previously initialized */
4180 	ldcp = ldcssp->chan_list;
4181 	while (ldcp) {
4182 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4183 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4184 			(void) i_ldc_register_channel(ldcp);
4185 
4186 		ldcp = ldcp->next;
4187 	}
4188 
4189 	mutex_exit(&ldcssp->lock);
4190 
4191 	return (0);
4192 }
4193 
4194 int
4195 ldc_unregister(ldc_cnex_t *cinfo)
4196 {
4197 	if (cinfo == NULL || cinfo->dip == NULL) {
4198 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4199 		return (EINVAL);
4200 	}
4201 
4202 	mutex_enter(&ldcssp->lock);
4203 
4204 	if (cinfo->dip != ldcssp->cinfo.dip) {
4205 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4206 		mutex_exit(&ldcssp->lock);
4207 		return (EINVAL);
4208 	}
4209 
4210 	/* nexus unregister */
4211 	ldcssp->cinfo.dip = NULL;
4212 	ldcssp->cinfo.reg_chan = NULL;
4213 	ldcssp->cinfo.unreg_chan = NULL;
4214 	ldcssp->cinfo.add_intr = NULL;
4215 	ldcssp->cinfo.rem_intr = NULL;
4216 	ldcssp->cinfo.clr_intr = NULL;
4217 
4218 	mutex_exit(&ldcssp->lock);
4219 
4220 	return (0);
4221 }
4222 
4223 
4224 /* ------------------------------------------------------------------------- */
4225 
4226 /*
4227  * Allocate a memory handle for the channel and link it into the list
4228  * Also choose which memory table to use if this is the first handle
4229  * being assigned to this channel
4230  */
4231 int
4232 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
4233 {
4234 	ldc_chan_t 	*ldcp;
4235 	ldc_mhdl_t	*mhdl;
4236 
4237 	if (handle == NULL) {
4238 		DWARN(DBG_ALL_LDCS,
4239 		    "ldc_mem_alloc_handle: invalid channel handle\n");
4240 		return (EINVAL);
4241 	}
4242 	ldcp = (ldc_chan_t *)handle;
4243 
4244 	mutex_enter(&ldcp->lock);
4245 
4246 	/* check to see if channel is initalized */
4247 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4248 		DWARN(ldcp->id,
4249 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
4250 		    ldcp->id);
4251 		mutex_exit(&ldcp->lock);
4252 		return (EINVAL);
4253 	}
4254 
4255 	/* allocate handle for channel */
4256 	mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP);
4257 
4258 	/* initialize the lock */
4259 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
4260 
4261 	mhdl->myshadow = B_FALSE;
4262 	mhdl->memseg = NULL;
4263 	mhdl->ldcp = ldcp;
4264 	mhdl->status = LDC_UNBOUND;
4265 
4266 	/* insert memory handle (@ head) into list */
4267 	if (ldcp->mhdl_list == NULL) {
4268 		ldcp->mhdl_list = mhdl;
4269 		mhdl->next = NULL;
4270 	} else {
4271 		/* insert @ head */
4272 		mhdl->next = ldcp->mhdl_list;
4273 		ldcp->mhdl_list = mhdl;
4274 	}
4275 
4276 	/* return the handle */
4277 	*mhandle = (ldc_mem_handle_t)mhdl;
4278 
4279 	mutex_exit(&ldcp->lock);
4280 
4281 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4282 	    ldcp->id, mhdl);
4283 
4284 	return (0);
4285 }
4286 
4287 /*
4288  * Free memory handle for the channel and unlink it from the list
4289  */
4290 int
4291 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4292 {
4293 	ldc_mhdl_t 	*mhdl, *phdl;
4294 	ldc_chan_t 	*ldcp;
4295 
4296 	if (mhandle == NULL) {
4297 		DWARN(DBG_ALL_LDCS,
4298 		    "ldc_mem_free_handle: invalid memory handle\n");
4299 		return (EINVAL);
4300 	}
4301 	mhdl = (ldc_mhdl_t *)mhandle;
4302 
4303 	mutex_enter(&mhdl->lock);
4304 
4305 	ldcp = mhdl->ldcp;
4306 
4307 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4308 		DWARN(ldcp->id,
4309 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4310 		    mhdl);
4311 		mutex_exit(&mhdl->lock);
4312 		return (EINVAL);
4313 	}
4314 	mutex_exit(&mhdl->lock);
4315 
4316 	mutex_enter(&ldcp->mlist_lock);
4317 
4318 	phdl = ldcp->mhdl_list;
4319 
4320 	/* first handle */
4321 	if (phdl == mhdl) {
4322 		ldcp->mhdl_list = mhdl->next;
4323 		mutex_destroy(&mhdl->lock);
4324 		kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4325 
4326 		D1(ldcp->id,
4327 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4328 		    ldcp->id, mhdl);
4329 	} else {
4330 		/* walk the list - unlink and free */
4331 		while (phdl != NULL) {
4332 			if (phdl->next == mhdl) {
4333 				phdl->next = mhdl->next;
4334 				mutex_destroy(&mhdl->lock);
4335 				kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4336 				D1(ldcp->id,
4337 				    "ldc_mem_free_handle: (0x%llx) freed "
4338 				    "handle 0x%llx\n", ldcp->id, mhdl);
4339 				break;
4340 			}
4341 			phdl = phdl->next;
4342 		}
4343 	}
4344 
4345 	if (phdl == NULL) {
4346 		DWARN(ldcp->id,
4347 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4348 		mutex_exit(&ldcp->mlist_lock);
4349 		return (EINVAL);
4350 	}
4351 
4352 	mutex_exit(&ldcp->mlist_lock);
4353 
4354 	return (0);
4355 }
4356 
4357 /*
4358  * Bind a memory handle to a virtual address.
4359  * The virtual address is converted to the corresponding real addresses.
4360  * Returns pointer to the first ldc_mem_cookie and the total number
4361  * of cookies for this virtual address. Other cookies can be obtained
4362  * using the ldc_mem_nextcookie() call. If the pages are stored in
4363  * consecutive locations in the table, a single cookie corresponding to
4364  * the first location is returned. The cookie size spans all the entries.
4365  *
4366  * If the VA corresponds to a page that is already being exported, reuse
4367  * the page and do not export it again. Bump the page's use count.
4368  */
4369 int
4370 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4371     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4372 {
4373 	ldc_mhdl_t	*mhdl;
4374 	ldc_chan_t 	*ldcp;
4375 	ldc_mtbl_t	*mtbl;
4376 	ldc_memseg_t	*memseg;
4377 	ldc_mte_t	tmp_mte;
4378 	uint64_t	index, prev_index = 0;
4379 	int64_t		cookie_idx;
4380 	uintptr_t	raddr, ra_aligned;
4381 	uint64_t	psize, poffset, v_offset;
4382 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4383 	pgcnt_t		npages;
4384 	caddr_t		v_align, addr;
4385 	int 		i, rv;
4386 
4387 	if (mhandle == NULL) {
4388 		DWARN(DBG_ALL_LDCS,
4389 		    "ldc_mem_bind_handle: invalid memory handle\n");
4390 		return (EINVAL);
4391 	}
4392 	mhdl = (ldc_mhdl_t *)mhandle;
4393 	ldcp = mhdl->ldcp;
4394 
4395 	/* clear count */
4396 	*ccount = 0;
4397 
4398 	mutex_enter(&mhdl->lock);
4399 
4400 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4401 		DWARN(ldcp->id,
4402 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4403 		    mhandle);
4404 		mutex_exit(&mhdl->lock);
4405 		return (EINVAL);
4406 	}
4407 
4408 	/* Force address and size to be 8-byte aligned */
4409 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4410 		DWARN(ldcp->id,
4411 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4412 		mutex_exit(&mhdl->lock);
4413 		return (EINVAL);
4414 	}
4415 
4416 	/*
4417 	 * If this channel is binding a memory handle for the
4418 	 * first time allocate it a memory map table and initialize it
4419 	 */
4420 	if ((mtbl = ldcp->mtbl) == NULL) {
4421 
4422 		mutex_enter(&ldcp->lock);
4423 
4424 		/* Allocate and initialize the map table structure */
4425 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
4426 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
4427 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
4428 		mtbl->next_entry = NULL;
4429 		mtbl->contigmem = B_TRUE;
4430 
4431 		/* Allocate the table itself */
4432 		mtbl->table = (ldc_mte_slot_t *)
4433 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
4434 		if (mtbl->table == NULL) {
4435 
4436 			/* allocate a page of memory using kmem_alloc */
4437 			mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP);
4438 			mtbl->size = MMU_PAGESIZE;
4439 			mtbl->contigmem = B_FALSE;
4440 			mtbl->num_entries = mtbl->num_avail =
4441 				mtbl->size / sizeof (ldc_mte_slot_t);
4442 			DWARN(ldcp->id,
4443 			    "ldc_mem_bind_handle: (0x%llx) reduced tbl size "
4444 			    "to %lx entries\n", ldcp->id, mtbl->num_entries);
4445 		}
4446 
4447 		/* zero out the memory */
4448 		bzero(mtbl->table, mtbl->size);
4449 
4450 		/* initialize the lock */
4451 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
4452 
4453 		/* register table for this channel */
4454 		rv = hv_ldc_set_map_table(ldcp->id,
4455 		    va_to_pa(mtbl->table), mtbl->num_entries);
4456 		if (rv != 0) {
4457 			cmn_err(CE_WARN,
4458 			    "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
4459 			    ldcp->id, rv);
4460 			if (mtbl->contigmem)
4461 				contig_mem_free(mtbl->table, mtbl->size);
4462 			else
4463 				kmem_free(mtbl->table, mtbl->size);
4464 			mutex_destroy(&mtbl->lock);
4465 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4466 			mutex_exit(&ldcp->lock);
4467 			mutex_exit(&mhdl->lock);
4468 			return (EIO);
4469 		}
4470 
4471 		ldcp->mtbl = mtbl;
4472 		mutex_exit(&ldcp->lock);
4473 
4474 		D1(ldcp->id,
4475 		    "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
4476 		    ldcp->id, ldcp->mtbl->table);
4477 	}
4478 
4479 	/* FUTURE: get the page size, pgsz code, and shift */
4480 	pg_size = MMU_PAGESIZE;
4481 	pg_size_code = page_szc(pg_size);
4482 	pg_shift = page_get_shift(pg_size_code);
4483 	pg_mask = ~(pg_size - 1);
4484 
4485 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4486 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4487 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4488 
4489 	/* aligned VA and its offset */
4490 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4491 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4492 
4493 	npages = (len+v_offset)/pg_size;
4494 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4495 
4496 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4497 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4498 	    ldcp->id, vaddr, v_align, v_offset, npages);
4499 
4500 	/* lock the memory table - exclusive access to channel */
4501 	mutex_enter(&mtbl->lock);
4502 
4503 	if (npages > mtbl->num_avail) {
4504 		D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4505 		    ldcp->id);
4506 		mutex_exit(&mtbl->lock);
4507 		mutex_exit(&mhdl->lock);
4508 		return (ENOMEM);
4509 	}
4510 
4511 	/* Allocate a memseg structure */
4512 	memseg = mhdl->memseg =
4513 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
4514 
4515 	/* Allocate memory to store all pages and cookies */
4516 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4517 	memseg->cookies =
4518 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4519 
4520 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4521 	    ldcp->id, npages);
4522 
4523 	addr = v_align;
4524 
4525 	/*
4526 	 * Check if direct shared memory map is enabled, if not change
4527 	 * the mapping type to include SHADOW_MAP.
4528 	 */
4529 	if (ldc_shmem_enabled == 0)
4530 		mtype = LDC_SHADOW_MAP;
4531 
4532 	/*
4533 	 * Table slots are used in a round-robin manner. The algorithm permits
4534 	 * inserting duplicate entries. Slots allocated earlier will typically
4535 	 * get freed before we get back to reusing the slot.Inserting duplicate
4536 	 * entries should be OK as we only lookup entries using the cookie addr
4537 	 * i.e. tbl index, during export, unexport and copy operation.
4538 	 *
4539 	 * One implementation what was tried was to search for a duplicate
4540 	 * page entry first and reuse it. The search overhead is very high and
4541 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4542 	 * So it does make sense to avoid searching for duplicates.
4543 	 *
4544 	 * But during the process of searching for a free slot, if we find a
4545 	 * duplicate entry we will go ahead and use it, and bump its use count.
4546 	 */
4547 
4548 	/* index to start searching from */
4549 	index = mtbl->next_entry;
4550 	cookie_idx = -1;
4551 
4552 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4553 
4554 	if (mtype & LDC_DIRECT_MAP) {
4555 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4556 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4557 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4558 	}
4559 
4560 	if (mtype & LDC_SHADOW_MAP) {
4561 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4562 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4563 	}
4564 
4565 	if (mtype & LDC_IO_MAP) {
4566 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4567 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4568 	}
4569 
4570 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4571 
4572 	tmp_mte.mte_pgszc = pg_size_code;
4573 
4574 	/* initialize each mem table entry */
4575 	for (i = 0; i < npages; i++) {
4576 
4577 		/* check if slot is available in the table */
4578 		while (mtbl->table[index].entry.ll != 0) {
4579 
4580 			index = (index + 1) % mtbl->num_entries;
4581 
4582 			if (index == mtbl->next_entry) {
4583 				/* we have looped around */
4584 				DWARN(DBG_ALL_LDCS,
4585 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4586 				    "entry\n", ldcp->id);
4587 				*ccount = 0;
4588 
4589 				/* NOTE: free memory, remove previous entries */
4590 				/* this shouldnt happen as num_avail was ok */
4591 
4592 				mutex_exit(&mtbl->lock);
4593 				mutex_exit(&mhdl->lock);
4594 				return (ENOMEM);
4595 			}
4596 		}
4597 
4598 		/* get the real address */
4599 		raddr = va_to_pa((void *)addr);
4600 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4601 
4602 		/* build the mte */
4603 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4604 
4605 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4606 
4607 		/* update entry in table */
4608 		mtbl->table[index].entry = tmp_mte;
4609 
4610 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4611 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4612 
4613 		/* calculate the size and offset for this export range */
4614 		if (i == 0) {
4615 			/* first page */
4616 			psize = min((pg_size - v_offset), len);
4617 			poffset = v_offset;
4618 
4619 		} else if (i == (npages - 1)) {
4620 			/* last page */
4621 			psize =	(((uintptr_t)(vaddr + len)) &
4622 				    ((uint64_t)(pg_size-1)));
4623 			if (psize == 0)
4624 				psize = pg_size;
4625 			poffset = 0;
4626 
4627 		} else {
4628 			/* middle pages */
4629 			psize = pg_size;
4630 			poffset = 0;
4631 		}
4632 
4633 		/* store entry for this page */
4634 		memseg->pages[i].index = index;
4635 		memseg->pages[i].raddr = raddr;
4636 		memseg->pages[i].offset = poffset;
4637 		memseg->pages[i].size = psize;
4638 		memseg->pages[i].mte = &(mtbl->table[index]);
4639 
4640 		/* create the cookie */
4641 		if (i == 0 || (index != prev_index + 1)) {
4642 			cookie_idx++;
4643 			memseg->cookies[cookie_idx].addr =
4644 				IDX2COOKIE(index, pg_size_code, pg_shift);
4645 			memseg->cookies[cookie_idx].addr |= poffset;
4646 			memseg->cookies[cookie_idx].size = psize;
4647 
4648 		} else {
4649 			memseg->cookies[cookie_idx].size += psize;
4650 		}
4651 
4652 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4653 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4654 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4655 		    ldcp->id, addr, index, raddr, psize, poffset);
4656 
4657 		/* decrement number of available entries */
4658 		mtbl->num_avail--;
4659 
4660 		/* increment va by page size */
4661 		addr += pg_size;
4662 
4663 		/* increment index */
4664 		prev_index = index;
4665 		index = (index + 1) % mtbl->num_entries;
4666 
4667 		/* save the next slot */
4668 		mtbl->next_entry = index;
4669 	}
4670 
4671 	mutex_exit(&mtbl->lock);
4672 
4673 	/* memory handle = bound */
4674 	mhdl->mtype = mtype;
4675 	mhdl->perm = perm;
4676 	mhdl->status = LDC_BOUND;
4677 
4678 	/* update memseg_t */
4679 	memseg->vaddr = vaddr;
4680 	memseg->raddr = memseg->pages[0].raddr;
4681 	memseg->size = len;
4682 	memseg->npages = npages;
4683 	memseg->ncookies = cookie_idx + 1;
4684 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4685 
4686 	/* return count and first cookie */
4687 	*ccount = memseg->ncookies;
4688 	cookie->addr = memseg->cookies[0].addr;
4689 	cookie->size = memseg->cookies[0].size;
4690 
4691 	D1(ldcp->id,
4692 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4693 	    "pgs=0x%llx cookies=0x%llx\n",
4694 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4695 
4696 	mutex_exit(&mhdl->lock);
4697 	return (0);
4698 }
4699 
4700 /*
4701  * Return the next cookie associated with the specified memory handle
4702  */
4703 int
4704 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4705 {
4706 	ldc_mhdl_t	*mhdl;
4707 	ldc_chan_t 	*ldcp;
4708 	ldc_memseg_t	*memseg;
4709 
4710 	if (mhandle == NULL) {
4711 		DWARN(DBG_ALL_LDCS,
4712 		    "ldc_mem_nextcookie: invalid memory handle\n");
4713 		return (EINVAL);
4714 	}
4715 	mhdl = (ldc_mhdl_t *)mhandle;
4716 
4717 	mutex_enter(&mhdl->lock);
4718 
4719 	ldcp = mhdl->ldcp;
4720 	memseg = mhdl->memseg;
4721 
4722 	if (cookie == 0) {
4723 		DWARN(ldcp->id,
4724 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4725 		    ldcp->id);
4726 		mutex_exit(&mhdl->lock);
4727 		return (EINVAL);
4728 	}
4729 
4730 	if (memseg->next_cookie != 0) {
4731 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4732 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4733 		memseg->next_cookie++;
4734 		if (memseg->next_cookie == memseg->ncookies)
4735 			memseg->next_cookie = 0;
4736 
4737 	} else {
4738 		DWARN(ldcp->id,
4739 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4740 		cookie->addr = 0;
4741 		cookie->size = 0;
4742 		mutex_exit(&mhdl->lock);
4743 		return (EINVAL);
4744 	}
4745 
4746 	D1(ldcp->id,
4747 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4748 	    ldcp->id, cookie->addr, cookie->size);
4749 
4750 	mutex_exit(&mhdl->lock);
4751 	return (0);
4752 }
4753 
4754 /*
4755  * Unbind the virtual memory region associated with the specified
4756  * memory handle. Allassociated cookies are freed and the corresponding
4757  * RA space is no longer exported.
4758  */
4759 int
4760 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4761 {
4762 	ldc_mhdl_t	*mhdl;
4763 	ldc_chan_t 	*ldcp;
4764 	ldc_mtbl_t	*mtbl;
4765 	ldc_memseg_t	*memseg;
4766 	uint64_t	cookie_addr;
4767 	uint64_t	pg_shift, pg_size_code;
4768 	int		i, rv;
4769 
4770 	if (mhandle == NULL) {
4771 		DWARN(DBG_ALL_LDCS,
4772 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4773 		return (EINVAL);
4774 	}
4775 	mhdl = (ldc_mhdl_t *)mhandle;
4776 
4777 	mutex_enter(&mhdl->lock);
4778 
4779 	if (mhdl->status == LDC_UNBOUND) {
4780 		DWARN(DBG_ALL_LDCS,
4781 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4782 		    mhandle);
4783 		mutex_exit(&mhdl->lock);
4784 		return (EINVAL);
4785 	}
4786 
4787 	ldcp = mhdl->ldcp;
4788 	mtbl = ldcp->mtbl;
4789 
4790 	memseg = mhdl->memseg;
4791 
4792 	/* lock the memory table - exclusive access to channel */
4793 	mutex_enter(&mtbl->lock);
4794 
4795 	/* undo the pages exported */
4796 	for (i = 0; i < memseg->npages; i++) {
4797 
4798 		/* check for mapped pages, revocation cookie != 0 */
4799 		if (memseg->pages[i].mte->cookie) {
4800 
4801 			pg_size_code = page_szc(memseg->pages[i].size);
4802 			pg_shift = page_get_shift(memseg->pages[i].size);
4803 			cookie_addr = IDX2COOKIE(memseg->pages[i].index,
4804 			    pg_size_code, pg_shift);
4805 
4806 			D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke "
4807 			    "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id,
4808 			    cookie_addr, memseg->pages[i].mte->cookie);
4809 			rv = hv_ldc_revoke(ldcp->id, cookie_addr,
4810 			    memseg->pages[i].mte->cookie);
4811 			if (rv) {
4812 				DWARN(ldcp->id,
4813 				    "ldc_mem_unbind_handle: (0x%llx) cannot "
4814 				    "revoke mapping, cookie %llx\n", ldcp->id,
4815 				    cookie_addr);
4816 			}
4817 		}
4818 
4819 		/* clear the entry from the table */
4820 		memseg->pages[i].mte->entry.ll = 0;
4821 		mtbl->num_avail++;
4822 	}
4823 	mutex_exit(&mtbl->lock);
4824 
4825 	/* free the allocated memseg and page structures */
4826 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4827 	kmem_free(memseg->cookies,
4828 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4829 	kmem_cache_free(ldcssp->memseg_cache, memseg);
4830 
4831 	/* uninitialize the memory handle */
4832 	mhdl->memseg = NULL;
4833 	mhdl->status = LDC_UNBOUND;
4834 
4835 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4836 	    ldcp->id, mhdl);
4837 
4838 	mutex_exit(&mhdl->lock);
4839 	return (0);
4840 }
4841 
4842 /*
4843  * Get information about the dring. The base address of the descriptor
4844  * ring along with the type and permission are returned back.
4845  */
4846 int
4847 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4848 {
4849 	ldc_mhdl_t	*mhdl;
4850 
4851 	if (mhandle == NULL) {
4852 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4853 		return (EINVAL);
4854 	}
4855 	mhdl = (ldc_mhdl_t *)mhandle;
4856 
4857 	if (minfo == NULL) {
4858 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4859 		return (EINVAL);
4860 	}
4861 
4862 	mutex_enter(&mhdl->lock);
4863 
4864 	minfo->status = mhdl->status;
4865 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4866 		minfo->vaddr = mhdl->memseg->vaddr;
4867 		minfo->raddr = mhdl->memseg->raddr;
4868 		minfo->mtype = mhdl->mtype;
4869 		minfo->perm = mhdl->perm;
4870 	}
4871 	mutex_exit(&mhdl->lock);
4872 
4873 	return (0);
4874 }
4875 
4876 /*
4877  * Copy data either from or to the client specified virtual address
4878  * space to or from the exported memory associated with the cookies.
4879  * The direction argument determines whether the data is read from or
4880  * written to exported memory.
4881  */
4882 int
4883 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4884     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4885 {
4886 	ldc_chan_t 	*ldcp;
4887 	uint64_t	local_voff, local_valign;
4888 	uint64_t	cookie_addr, cookie_size;
4889 	uint64_t	pg_shift, pg_size, pg_size_code;
4890 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4891 	uint64_t	local_ra, local_poff, local_psize;
4892 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4893 	pgcnt_t		npages;
4894 	size_t		len = *size;
4895 	int 		i, rv = 0;
4896 
4897 	uint64_t	chid;
4898 
4899 	if (handle == NULL) {
4900 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4901 		return (EINVAL);
4902 	}
4903 	ldcp = (ldc_chan_t *)handle;
4904 	chid = ldcp->id;
4905 
4906 	/* check to see if channel is UP */
4907 	if (ldcp->tstate != TS_UP) {
4908 		DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4909 		    chid);
4910 		return (ECONNRESET);
4911 	}
4912 
4913 	/* Force address and size to be 8-byte aligned */
4914 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4915 		DWARN(chid,
4916 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4917 		return (EINVAL);
4918 	}
4919 
4920 	/* Find the size of the exported memory */
4921 	export_size = 0;
4922 	for (i = 0; i < ccount; i++)
4923 		export_size += cookies[i].size;
4924 
4925 	/* check to see if offset is valid */
4926 	if (off > export_size) {
4927 		DWARN(chid,
4928 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4929 		    chid);
4930 		return (EINVAL);
4931 	}
4932 
4933 	/*
4934 	 * Check to see if the export size is smaller than the size we
4935 	 * are requesting to copy - if so flag an error
4936 	 */
4937 	if ((export_size - off) < *size) {
4938 		DWARN(chid,
4939 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4940 		    chid);
4941 		return (EINVAL);
4942 	}
4943 
4944 	total_bal = min(export_size, *size);
4945 
4946 	/* FUTURE: get the page size, pgsz code, and shift */
4947 	pg_size = MMU_PAGESIZE;
4948 	pg_size_code = page_szc(pg_size);
4949 	pg_shift = page_get_shift(pg_size_code);
4950 
4951 	D1(chid, "ldc_mem_copy: copying data "
4952 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4953 	    chid, vaddr, pg_size, pg_size_code, pg_shift);
4954 
4955 	/* aligned VA and its offset */
4956 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4957 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4958 
4959 	npages = (len+local_voff)/pg_size;
4960 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4961 
4962 	D1(chid,
4963 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4964 	    chid, vaddr, local_valign, local_voff, npages);
4965 
4966 	local_ra = va_to_pa((void *)local_valign);
4967 	local_poff = local_voff;
4968 	local_psize = min(len, (pg_size - local_voff));
4969 
4970 	len -= local_psize;
4971 
4972 	/*
4973 	 * find the first cookie in the list of cookies
4974 	 * if the offset passed in is not zero
4975 	 */
4976 	for (idx = 0; idx < ccount; idx++) {
4977 		cookie_size = cookies[idx].size;
4978 		if (off < cookie_size)
4979 			break;
4980 		off -= cookie_size;
4981 	}
4982 
4983 	cookie_addr = cookies[idx].addr + off;
4984 	cookie_size = cookies[idx].size - off;
4985 
4986 	export_caddr = cookie_addr & ~(pg_size - 1);
4987 	export_poff = cookie_addr & (pg_size - 1);
4988 	export_psize = min(cookie_size, (pg_size - export_poff));
4989 
4990 	for (;;) {
4991 
4992 		copy_size = min(export_psize, local_psize);
4993 
4994 		D1(chid,
4995 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4996 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4997 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4998 		    " total_bal=0x%llx\n",
4999 		    chid, direction, export_caddr, local_ra, export_poff,
5000 		    local_poff, export_psize, local_psize, copy_size,
5001 		    total_bal);
5002 
5003 		rv = hv_ldc_copy(chid, direction,
5004 		    (export_caddr + export_poff), (local_ra + local_poff),
5005 		    copy_size, &copied_len);
5006 
5007 		if (rv != 0) {
5008 			int 		error = EIO;
5009 			uint64_t	rx_hd, rx_tl;
5010 
5011 			DWARN(chid,
5012 			    "ldc_mem_copy: (0x%llx) err %d during copy\n",
5013 			    (unsigned long long)chid, rv);
5014 			DWARN(chid,
5015 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, "
5016 			    "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
5017 			    " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
5018 			    " copied_len=0x%lx, total_bal=0x%lx\n",
5019 			    chid, direction, export_caddr, local_ra,
5020 			    export_poff, local_poff, export_psize, local_psize,
5021 			    copy_size, copied_len, total_bal);
5022 
5023 			*size = *size - total_bal;
5024 
5025 			/*
5026 			 * check if reason for copy error was due to
5027 			 * a channel reset. we need to grab the lock
5028 			 * just in case we have to do a reset.
5029 			 */
5030 			mutex_enter(&ldcp->lock);
5031 			mutex_enter(&ldcp->tx_lock);
5032 
5033 			rv = hv_ldc_rx_get_state(ldcp->id,
5034 			    &rx_hd, &rx_tl, &(ldcp->link_state));
5035 			if (ldcp->link_state == LDC_CHANNEL_DOWN ||
5036 			    ldcp->link_state == LDC_CHANNEL_RESET) {
5037 				i_ldc_reset(ldcp, B_FALSE);
5038 				error = ECONNRESET;
5039 			}
5040 
5041 			mutex_exit(&ldcp->tx_lock);
5042 			mutex_exit(&ldcp->lock);
5043 
5044 			return (error);
5045 		}
5046 
5047 		ASSERT(copied_len <= copy_size);
5048 
5049 		D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len);
5050 		export_poff += copied_len;
5051 		local_poff += copied_len;
5052 		export_psize -= copied_len;
5053 		local_psize -= copied_len;
5054 		cookie_size -= copied_len;
5055 
5056 		total_bal -= copied_len;
5057 
5058 		if (copy_size != copied_len)
5059 			continue;
5060 
5061 		if (export_psize == 0 && total_bal != 0) {
5062 
5063 			if (cookie_size == 0) {
5064 				idx++;
5065 				cookie_addr = cookies[idx].addr;
5066 				cookie_size = cookies[idx].size;
5067 
5068 				export_caddr = cookie_addr & ~(pg_size - 1);
5069 				export_poff = cookie_addr & (pg_size - 1);
5070 				export_psize =
5071 					min(cookie_size, (pg_size-export_poff));
5072 			} else {
5073 				export_caddr += pg_size;
5074 				export_poff = 0;
5075 				export_psize = min(cookie_size, pg_size);
5076 			}
5077 		}
5078 
5079 		if (local_psize == 0 && total_bal != 0) {
5080 			local_valign += pg_size;
5081 			local_ra = va_to_pa((void *)local_valign);
5082 			local_poff = 0;
5083 			local_psize = min(pg_size, len);
5084 			len -= local_psize;
5085 		}
5086 
5087 		/* check if we are all done */
5088 		if (total_bal == 0)
5089 			break;
5090 	}
5091 
5092 
5093 	D1(chid,
5094 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
5095 	    chid, *size);
5096 
5097 	return (0);
5098 }
5099 
5100 /*
5101  * Copy data either from or to the client specified virtual address
5102  * space to or from HV physical memory.
5103  *
5104  * The direction argument determines whether the data is read from or
5105  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
5106  * to the ldc_mem_copy interface
5107  */
5108 int
5109 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size,
5110     caddr_t paddr, uint8_t direction)
5111 {
5112 	ldc_chan_t 	*ldcp;
5113 	uint64_t	local_voff, local_valign;
5114 	uint64_t	pg_shift, pg_size, pg_size_code;
5115 	uint64_t 	target_pa, target_poff, target_psize, target_size;
5116 	uint64_t	local_ra, local_poff, local_psize;
5117 	uint64_t	copy_size, copied_len = 0;
5118 	pgcnt_t		npages;
5119 	size_t		len = *size;
5120 	int 		rv = 0;
5121 
5122 	if (handle == NULL) {
5123 		DWARN(DBG_ALL_LDCS,
5124 		    "ldc_mem_rdwr_cookie: invalid channel handle\n");
5125 		return (EINVAL);
5126 	}
5127 	ldcp = (ldc_chan_t *)handle;
5128 
5129 	mutex_enter(&ldcp->lock);
5130 
5131 	/* check to see if channel is UP */
5132 	if (ldcp->tstate != TS_UP) {
5133 		DWARN(ldcp->id,
5134 		    "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n",
5135 		    ldcp->id);
5136 		mutex_exit(&ldcp->lock);
5137 		return (ECONNRESET);
5138 	}
5139 
5140 	/* Force address and size to be 8-byte aligned */
5141 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
5142 		DWARN(ldcp->id,
5143 		    "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n");
5144 		mutex_exit(&ldcp->lock);
5145 		return (EINVAL);
5146 	}
5147 
5148 	target_size = *size;
5149 
5150 	/* FUTURE: get the page size, pgsz code, and shift */
5151 	pg_size = MMU_PAGESIZE;
5152 	pg_size_code = page_szc(pg_size);
5153 	pg_shift = page_get_shift(pg_size_code);
5154 
5155 	D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data "
5156 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
5157 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
5158 
5159 	/* aligned VA and its offset */
5160 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
5161 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
5162 
5163 	npages = (len + local_voff) / pg_size;
5164 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
5165 
5166 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, "
5167 	    "val=0x%llx,off=0x%x,pgs=0x%x\n",
5168 	    ldcp->id, vaddr, local_valign, local_voff, npages);
5169 
5170 	local_ra = va_to_pa((void *)local_valign);
5171 	local_poff = local_voff;
5172 	local_psize = min(len, (pg_size - local_voff));
5173 
5174 	len -= local_psize;
5175 
5176 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
5177 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
5178 	target_psize = pg_size - target_poff;
5179 
5180 	for (;;) {
5181 
5182 		copy_size = min(target_psize, local_psize);
5183 
5184 		D1(ldcp->id,
5185 		    "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
5186 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
5187 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5188 		    " total_bal=0x%llx\n",
5189 		    ldcp->id, direction, target_pa, local_ra, target_poff,
5190 		    local_poff, target_psize, local_psize, copy_size,
5191 		    target_size);
5192 
5193 		rv = hv_ldc_copy(ldcp->id, direction,
5194 		    (target_pa + target_poff), (local_ra + local_poff),
5195 		    copy_size, &copied_len);
5196 
5197 		if (rv != 0) {
5198 			DWARN(DBG_ALL_LDCS,
5199 			    "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n",
5200 			    ldcp->id, rv);
5201 			DWARN(DBG_ALL_LDCS,
5202 			    "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, "
5203 			    "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, "
5204 			    "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, "
5205 			    "copy_sz=0x%llx, total_bal=0x%llx\n",
5206 			    ldcp->id, direction, target_pa, local_ra,
5207 			    target_poff, local_poff, target_psize, local_psize,
5208 			    copy_size, target_size);
5209 
5210 			*size = *size - target_size;
5211 			mutex_exit(&ldcp->lock);
5212 			return (i_ldc_h2v_error(rv));
5213 		}
5214 
5215 		D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n",
5216 		    copied_len);
5217 		target_poff += copied_len;
5218 		local_poff += copied_len;
5219 		target_psize -= copied_len;
5220 		local_psize -= copied_len;
5221 
5222 		target_size -= copied_len;
5223 
5224 		if (copy_size != copied_len)
5225 			continue;
5226 
5227 		if (target_psize == 0 && target_size != 0) {
5228 			target_pa += pg_size;
5229 			target_poff = 0;
5230 			target_psize = min(pg_size, target_size);
5231 		}
5232 
5233 		if (local_psize == 0 && target_size != 0) {
5234 			local_valign += pg_size;
5235 			local_ra = va_to_pa((void *)local_valign);
5236 			local_poff = 0;
5237 			local_psize = min(pg_size, len);
5238 			len -= local_psize;
5239 		}
5240 
5241 		/* check if we are all done */
5242 		if (target_size == 0)
5243 			break;
5244 	}
5245 
5246 	mutex_exit(&ldcp->lock);
5247 
5248 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n",
5249 	    ldcp->id, *size);
5250 
5251 	return (0);
5252 }
5253 
5254 /*
5255  * Map an exported memory segment into the local address space. If the
5256  * memory range was exported for direct map access, a HV call is made
5257  * to allocate a RA range. If the map is done via a shadow copy, local
5258  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
5259  * the mapping is a direct map then the RA is returned in 'raddr'.
5260  */
5261 int
5262 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
5263     uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr)
5264 {
5265 	int		i, j, idx, rv, retries;
5266 	ldc_chan_t 	*ldcp;
5267 	ldc_mhdl_t	*mhdl;
5268 	ldc_memseg_t	*memseg;
5269 	caddr_t		tmpaddr;
5270 	uint64_t	map_perm = perm;
5271 	uint64_t	pg_size, pg_shift, pg_size_code, pg_mask;
5272 	uint64_t	exp_size = 0, base_off, map_size, npages;
5273 	uint64_t	cookie_addr, cookie_off, cookie_size;
5274 	tte_t		ldc_tte;
5275 
5276 	if (mhandle == NULL) {
5277 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
5278 		return (EINVAL);
5279 	}
5280 	mhdl = (ldc_mhdl_t *)mhandle;
5281 
5282 	mutex_enter(&mhdl->lock);
5283 
5284 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
5285 	    mhdl->memseg != NULL) {
5286 		DWARN(DBG_ALL_LDCS,
5287 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
5288 		mutex_exit(&mhdl->lock);
5289 		return (EINVAL);
5290 	}
5291 
5292 	ldcp = mhdl->ldcp;
5293 
5294 	mutex_enter(&ldcp->lock);
5295 
5296 	if (ldcp->tstate != TS_UP) {
5297 		DWARN(ldcp->id,
5298 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
5299 		    ldcp->id);
5300 		mutex_exit(&ldcp->lock);
5301 		mutex_exit(&mhdl->lock);
5302 		return (ECONNRESET);
5303 	}
5304 
5305 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5306 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
5307 		mutex_exit(&ldcp->lock);
5308 		mutex_exit(&mhdl->lock);
5309 		return (EINVAL);
5310 	}
5311 
5312 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
5313 	    ldcp->id, cookie->addr, cookie->size);
5314 
5315 	/* FUTURE: get the page size, pgsz code, and shift */
5316 	pg_size = MMU_PAGESIZE;
5317 	pg_size_code = page_szc(pg_size);
5318 	pg_shift = page_get_shift(pg_size_code);
5319 	pg_mask = ~(pg_size - 1);
5320 
5321 	/* calculate the number of pages in the exported cookie */
5322 	base_off = cookie[0].addr & (pg_size - 1);
5323 	for (idx = 0; idx < ccount; idx++)
5324 		exp_size += cookie[idx].size;
5325 	map_size = P2ROUNDUP((exp_size + base_off), pg_size);
5326 	npages = (map_size >> pg_shift);
5327 
5328 	/* Allocate memseg structure */
5329 	memseg = mhdl->memseg =
5330 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
5331 
5332 	/* Allocate memory to store all pages and cookies */
5333 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
5334 	memseg->cookies =
5335 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
5336 
5337 	D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx,"
5338 	    "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages);
5339 
5340 	/*
5341 	 * Check if direct map over shared memory is enabled, if not change
5342 	 * the mapping type to SHADOW_MAP.
5343 	 */
5344 	if (ldc_shmem_enabled == 0)
5345 		mtype = LDC_SHADOW_MAP;
5346 
5347 	/*
5348 	 * Check to see if the client is requesting direct or shadow map
5349 	 * If direct map is requested, try to map remote memory first,
5350 	 * and if that fails, revert to shadow map
5351 	 */
5352 	if (mtype == LDC_DIRECT_MAP) {
5353 
5354 		/* Allocate kernel virtual space for mapping */
5355 		memseg->vaddr = vmem_xalloc(heap_arena, map_size,
5356 		    pg_size, 0, 0, NULL, NULL, VM_NOSLEEP);
5357 		if (memseg->vaddr == NULL) {
5358 			cmn_err(CE_WARN,
5359 			    "ldc_mem_map: (0x%lx) memory map failed\n",
5360 			    ldcp->id);
5361 			kmem_free(memseg->cookies,
5362 			    (sizeof (ldc_mem_cookie_t) * ccount));
5363 			kmem_free(memseg->pages,
5364 			    (sizeof (ldc_page_t) * npages));
5365 			kmem_cache_free(ldcssp->memseg_cache, memseg);
5366 
5367 			mutex_exit(&ldcp->lock);
5368 			mutex_exit(&mhdl->lock);
5369 			return (ENOMEM);
5370 		}
5371 
5372 		/* Unload previous mapping */
5373 		hat_unload(kas.a_hat, memseg->vaddr, map_size,
5374 		    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5375 
5376 		/* for each cookie passed in - map into address space */
5377 		idx = 0;
5378 		cookie_size = 0;
5379 		tmpaddr = memseg->vaddr;
5380 
5381 		for (i = 0; i < npages; i++) {
5382 
5383 			if (cookie_size == 0) {
5384 				ASSERT(idx < ccount);
5385 				cookie_addr = cookie[idx].addr & pg_mask;
5386 				cookie_off = cookie[idx].addr & (pg_size - 1);
5387 				cookie_size =
5388 				    P2ROUNDUP((cookie_off + cookie[idx].size),
5389 					pg_size);
5390 				idx++;
5391 			}
5392 
5393 			D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping "
5394 			    "cookie 0x%llx, bal=0x%llx\n", ldcp->id,
5395 			    cookie_addr, cookie_size);
5396 
5397 			/* map the cookie into address space */
5398 			for (retries = 0; retries < ldc_max_retries;
5399 			    retries++) {
5400 
5401 				rv = hv_ldc_mapin(ldcp->id, cookie_addr,
5402 				    &memseg->pages[i].raddr, &map_perm);
5403 				if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY)
5404 					break;
5405 
5406 				drv_usecwait(ldc_delay);
5407 			}
5408 
5409 			if (rv || memseg->pages[i].raddr == 0) {
5410 				DWARN(ldcp->id,
5411 				    "ldc_mem_map: (0x%llx) hv mapin err %d\n",
5412 				    ldcp->id, rv);
5413 
5414 				/* remove previous mapins */
5415 				hat_unload(kas.a_hat, memseg->vaddr, map_size,
5416 				    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5417 				for (j = 0; j < i; j++) {
5418 					rv = hv_ldc_unmap(
5419 							memseg->pages[j].raddr);
5420 					if (rv) {
5421 						DWARN(ldcp->id,
5422 						    "ldc_mem_map: (0x%llx) "
5423 						    "cannot unmap ra=0x%llx\n",
5424 					    ldcp->id,
5425 						    memseg->pages[j].raddr);
5426 					}
5427 				}
5428 
5429 				/* free kernel virtual space */
5430 				vmem_free(heap_arena, (void *)memseg->vaddr,
5431 				    map_size);
5432 
5433 				/* direct map failed - revert to shadow map */
5434 				mtype = LDC_SHADOW_MAP;
5435 				break;
5436 
5437 			} else {
5438 
5439 				D1(ldcp->id,
5440 				    "ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
5441 				    "0x%llx, cookie=0x%llx, perm=0x%llx\n",
5442 				    ldcp->id, tmpaddr, memseg->pages[i].raddr,
5443 				    cookie_addr, perm);
5444 
5445 				/*
5446 				 * NOTE: Calling hat_devload directly, causes it
5447 				 * to look for page_t using the pfn. Since this
5448 				 * addr is greater than the memlist, it treates
5449 				 * it as non-memory
5450 				 */
5451 				sfmmu_memtte(&ldc_tte,
5452 				    (pfn_t)(memseg->pages[i].raddr >> pg_shift),
5453 				    PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K);
5454 
5455 				D1(ldcp->id,
5456 				    "ldc_mem_map: (0x%llx) ra 0x%llx -> "
5457 				    "tte 0x%llx\n", ldcp->id,
5458 				    memseg->pages[i].raddr, ldc_tte);
5459 
5460 				sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr,
5461 				    NULL, HAT_LOAD_LOCK);
5462 
5463 				cookie_size -= pg_size;
5464 				cookie_addr += pg_size;
5465 				tmpaddr += pg_size;
5466 			}
5467 		}
5468 	}
5469 
5470 	if (mtype == LDC_SHADOW_MAP) {
5471 		if (*vaddr == NULL) {
5472 			memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP);
5473 			mhdl->myshadow = B_TRUE;
5474 
5475 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
5476 			    "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr);
5477 		} else {
5478 			/*
5479 			 * Use client supplied memory for memseg->vaddr
5480 			 * WARNING: assuming that client mem is >= exp_size
5481 			 */
5482 			memseg->vaddr = *vaddr;
5483 		}
5484 
5485 		/* Save all page and cookie information */
5486 		for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) {
5487 			memseg->pages[i].raddr = va_to_pa(tmpaddr);
5488 			memseg->pages[i].size = pg_size;
5489 			tmpaddr += pg_size;
5490 		}
5491 
5492 	}
5493 
5494 	/* save all cookies */
5495 	bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t));
5496 
5497 	/* update memseg_t */
5498 	memseg->raddr = memseg->pages[0].raddr;
5499 	memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size;
5500 	memseg->npages = npages;
5501 	memseg->ncookies = ccount;
5502 	memseg->next_cookie = 0;
5503 
5504 	/* memory handle = mapped */
5505 	mhdl->mtype = mtype;
5506 	mhdl->perm = perm;
5507 	mhdl->status = LDC_MAPPED;
5508 
5509 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5510 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5511 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5512 	    memseg->npages, memseg->ncookies);
5513 
5514 	if (mtype == LDC_SHADOW_MAP)
5515 		base_off = 0;
5516 	if (raddr)
5517 		*raddr = (caddr_t)(memseg->raddr | base_off);
5518 	if (vaddr)
5519 		*vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off);
5520 
5521 	mutex_exit(&ldcp->lock);
5522 	mutex_exit(&mhdl->lock);
5523 	return (0);
5524 }
5525 
5526 /*
5527  * Unmap a memory segment. Free shadow memory (if any).
5528  */
5529 int
5530 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5531 {
5532 	int		i, rv;
5533 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5534 	ldc_chan_t 	*ldcp;
5535 	ldc_memseg_t	*memseg;
5536 
5537 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5538 		DWARN(DBG_ALL_LDCS,
5539 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5540 		    mhandle);
5541 		return (EINVAL);
5542 	}
5543 
5544 	mutex_enter(&mhdl->lock);
5545 
5546 	ldcp = mhdl->ldcp;
5547 	memseg = mhdl->memseg;
5548 
5549 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5550 	    ldcp->id, mhdl);
5551 
5552 	/* if we allocated shadow memory - free it */
5553 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5554 		kmem_free(memseg->vaddr, memseg->size);
5555 	} else if (mhdl->mtype == LDC_DIRECT_MAP) {
5556 
5557 		/* unmap in the case of DIRECT_MAP */
5558 		hat_unload(kas.a_hat, memseg->vaddr, memseg->size,
5559 		    HAT_UNLOAD_UNLOCK);
5560 
5561 		for (i = 0; i < memseg->npages; i++) {
5562 			rv = hv_ldc_unmap(memseg->pages[i].raddr);
5563 			if (rv) {
5564 				cmn_err(CE_WARN,
5565 				    "ldc_mem_map: (0x%lx) hv unmap err %d\n",
5566 				    ldcp->id, rv);
5567 			}
5568 		}
5569 
5570 		vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size);
5571 	}
5572 
5573 	/* free the allocated memseg and page structures */
5574 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5575 	kmem_free(memseg->cookies,
5576 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5577 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5578 
5579 	/* uninitialize the memory handle */
5580 	mhdl->memseg = NULL;
5581 	mhdl->status = LDC_UNBOUND;
5582 
5583 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5584 	    ldcp->id, mhdl);
5585 
5586 	mutex_exit(&mhdl->lock);
5587 	return (0);
5588 }
5589 
5590 /*
5591  * Internal entry point for LDC mapped memory entry consistency
5592  * semantics. Acquire copies the contents of the remote memory
5593  * into the local shadow copy. The release operation copies the local
5594  * contents into the remote memory. The offset and size specify the
5595  * bounds for the memory range being synchronized.
5596  */
5597 static int
5598 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5599     uint64_t offset, size_t size)
5600 {
5601 	int 		err;
5602 	ldc_mhdl_t	*mhdl;
5603 	ldc_chan_t	*ldcp;
5604 	ldc_memseg_t	*memseg;
5605 	caddr_t		local_vaddr;
5606 	size_t		copy_size;
5607 
5608 	if (mhandle == NULL) {
5609 		DWARN(DBG_ALL_LDCS,
5610 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5611 		return (EINVAL);
5612 	}
5613 	mhdl = (ldc_mhdl_t *)mhandle;
5614 
5615 	mutex_enter(&mhdl->lock);
5616 
5617 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5618 		DWARN(DBG_ALL_LDCS,
5619 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5620 		mutex_exit(&mhdl->lock);
5621 		return (EINVAL);
5622 	}
5623 
5624 	/* do nothing for direct map */
5625 	if (mhdl->mtype == LDC_DIRECT_MAP) {
5626 		mutex_exit(&mhdl->lock);
5627 		return (0);
5628 	}
5629 
5630 	/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
5631 	if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) ||
5632 	    (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) {
5633 		mutex_exit(&mhdl->lock);
5634 		return (0);
5635 	}
5636 
5637 	if (offset >= mhdl->memseg->size ||
5638 	    (offset + size) > mhdl->memseg->size) {
5639 		DWARN(DBG_ALL_LDCS,
5640 		    "i_ldc_mem_acquire_release: memory out of range\n");
5641 		mutex_exit(&mhdl->lock);
5642 		return (EINVAL);
5643 	}
5644 
5645 	/* get the channel handle and memory segment */
5646 	ldcp = mhdl->ldcp;
5647 	memseg = mhdl->memseg;
5648 
5649 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5650 
5651 		local_vaddr = memseg->vaddr + offset;
5652 		copy_size = size;
5653 
5654 		/* copy to/from remote from/to local memory */
5655 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5656 		    &copy_size, memseg->cookies, memseg->ncookies,
5657 		    direction);
5658 		if (err || copy_size != size) {
5659 			DWARN(ldcp->id,
5660 			    "i_ldc_mem_acquire_release: copy failed\n");
5661 			mutex_exit(&mhdl->lock);
5662 			return (err);
5663 		}
5664 	}
5665 
5666 	mutex_exit(&mhdl->lock);
5667 
5668 	return (0);
5669 }
5670 
5671 /*
5672  * Ensure that the contents in the remote memory seg are consistent
5673  * with the contents if of local segment
5674  */
5675 int
5676 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5677 {
5678 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5679 }
5680 
5681 
5682 /*
5683  * Ensure that the contents in the local memory seg are consistent
5684  * with the contents if of remote segment
5685  */
5686 int
5687 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5688 {
5689 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5690 }
5691 
5692 /*
5693  * Allocate a descriptor ring. The size of each each descriptor
5694  * must be 8-byte aligned and the entire ring should be a multiple
5695  * of MMU_PAGESIZE.
5696  */
5697 int
5698 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5699 {
5700 	ldc_dring_t *dringp;
5701 	size_t size = (dsize * len);
5702 
5703 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5704 	    len, dsize);
5705 
5706 	if (dhandle == NULL) {
5707 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5708 		return (EINVAL);
5709 	}
5710 
5711 	if (len == 0) {
5712 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5713 		return (EINVAL);
5714 	}
5715 
5716 	/* descriptor size should be 8-byte aligned */
5717 	if (dsize == 0 || (dsize & 0x7)) {
5718 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5719 		return (EINVAL);
5720 	}
5721 
5722 	*dhandle = 0;
5723 
5724 	/* Allocate a desc ring structure */
5725 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5726 
5727 	/* Initialize dring */
5728 	dringp->length = len;
5729 	dringp->dsize = dsize;
5730 
5731 	/* round off to multiple of pagesize */
5732 	dringp->size = (size & MMU_PAGEMASK);
5733 	if (size & MMU_PAGEOFFSET)
5734 		dringp->size += MMU_PAGESIZE;
5735 
5736 	dringp->status = LDC_UNBOUND;
5737 
5738 	/* allocate descriptor ring memory */
5739 	dringp->base = kmem_zalloc(dringp->size, KM_SLEEP);
5740 
5741 	/* initialize the desc ring lock */
5742 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5743 
5744 	/* Add descriptor ring to the head of global list */
5745 	mutex_enter(&ldcssp->lock);
5746 	dringp->next = ldcssp->dring_list;
5747 	ldcssp->dring_list = dringp;
5748 	mutex_exit(&ldcssp->lock);
5749 
5750 	*dhandle = (ldc_dring_handle_t)dringp;
5751 
5752 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5753 
5754 	return (0);
5755 }
5756 
5757 
5758 /*
5759  * Destroy a descriptor ring.
5760  */
5761 int
5762 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5763 {
5764 	ldc_dring_t *dringp;
5765 	ldc_dring_t *tmp_dringp;
5766 
5767 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5768 
5769 	if (dhandle == NULL) {
5770 		DWARN(DBG_ALL_LDCS,
5771 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5772 		return (EINVAL);
5773 	}
5774 	dringp = (ldc_dring_t *)dhandle;
5775 
5776 	if (dringp->status == LDC_BOUND) {
5777 		DWARN(DBG_ALL_LDCS,
5778 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5779 		return (EACCES);
5780 	}
5781 
5782 	mutex_enter(&dringp->lock);
5783 	mutex_enter(&ldcssp->lock);
5784 
5785 	/* remove from linked list - if not bound */
5786 	tmp_dringp = ldcssp->dring_list;
5787 	if (tmp_dringp == dringp) {
5788 		ldcssp->dring_list = dringp->next;
5789 		dringp->next = NULL;
5790 
5791 	} else {
5792 		while (tmp_dringp != NULL) {
5793 			if (tmp_dringp->next == dringp) {
5794 				tmp_dringp->next = dringp->next;
5795 				dringp->next = NULL;
5796 				break;
5797 			}
5798 			tmp_dringp = tmp_dringp->next;
5799 		}
5800 		if (tmp_dringp == NULL) {
5801 			DWARN(DBG_ALL_LDCS,
5802 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5803 			mutex_exit(&ldcssp->lock);
5804 			mutex_exit(&dringp->lock);
5805 			return (EINVAL);
5806 		}
5807 	}
5808 
5809 	mutex_exit(&ldcssp->lock);
5810 
5811 	/* free the descriptor ring */
5812 	kmem_free(dringp->base, dringp->size);
5813 
5814 	mutex_exit(&dringp->lock);
5815 
5816 	/* destroy dring lock */
5817 	mutex_destroy(&dringp->lock);
5818 
5819 	/* free desc ring object */
5820 	kmem_free(dringp, sizeof (ldc_dring_t));
5821 
5822 	return (0);
5823 }
5824 
5825 /*
5826  * Bind a previously allocated dring to a channel. The channel should
5827  * be OPEN in order to bind the ring to the channel. Returns back a
5828  * descriptor ring cookie. The descriptor ring is exported for remote
5829  * access by the client at the other end of the channel. An entry for
5830  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5831  */
5832 int
5833 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5834     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5835 {
5836 	int		err;
5837 	ldc_chan_t 	*ldcp;
5838 	ldc_dring_t	*dringp;
5839 	ldc_mem_handle_t mhandle;
5840 
5841 	/* check to see if channel is initalized */
5842 	if (handle == NULL) {
5843 		DWARN(DBG_ALL_LDCS,
5844 		    "ldc_mem_dring_bind: invalid channel handle\n");
5845 		return (EINVAL);
5846 	}
5847 	ldcp = (ldc_chan_t *)handle;
5848 
5849 	if (dhandle == NULL) {
5850 		DWARN(DBG_ALL_LDCS,
5851 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5852 		return (EINVAL);
5853 	}
5854 	dringp = (ldc_dring_t *)dhandle;
5855 
5856 	if (cookie == NULL) {
5857 		DWARN(ldcp->id,
5858 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5859 		return (EINVAL);
5860 	}
5861 
5862 	mutex_enter(&dringp->lock);
5863 
5864 	if (dringp->status == LDC_BOUND) {
5865 		DWARN(DBG_ALL_LDCS,
5866 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5867 		    ldcp->id);
5868 		mutex_exit(&dringp->lock);
5869 		return (EINVAL);
5870 	}
5871 
5872 	if ((perm & LDC_MEM_RW) == 0) {
5873 		DWARN(DBG_ALL_LDCS,
5874 		    "ldc_mem_dring_bind: invalid permissions\n");
5875 		mutex_exit(&dringp->lock);
5876 		return (EINVAL);
5877 	}
5878 
5879 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5880 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5881 		mutex_exit(&dringp->lock);
5882 		return (EINVAL);
5883 	}
5884 
5885 	dringp->ldcp = ldcp;
5886 
5887 	/* create an memory handle */
5888 	err = ldc_mem_alloc_handle(handle, &mhandle);
5889 	if (err || mhandle == NULL) {
5890 		DWARN(DBG_ALL_LDCS,
5891 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5892 		    ldcp->id);
5893 		mutex_exit(&dringp->lock);
5894 		return (err);
5895 	}
5896 	dringp->mhdl = mhandle;
5897 
5898 	/* bind the descriptor ring to channel */
5899 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5900 	    mtype, perm, cookie, ccount);
5901 	if (err) {
5902 		DWARN(ldcp->id,
5903 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5904 		    ldcp->id);
5905 		mutex_exit(&dringp->lock);
5906 		return (err);
5907 	}
5908 
5909 	/*
5910 	 * For now return error if we get more than one cookie
5911 	 * FUTURE: Return multiple cookies ..
5912 	 */
5913 	if (*ccount > 1) {
5914 		(void) ldc_mem_unbind_handle(mhandle);
5915 		(void) ldc_mem_free_handle(mhandle);
5916 
5917 		dringp->ldcp = NULL;
5918 		dringp->mhdl = NULL;
5919 		*ccount = 0;
5920 
5921 		mutex_exit(&dringp->lock);
5922 		return (EAGAIN);
5923 	}
5924 
5925 	/* Add descriptor ring to channel's exported dring list */
5926 	mutex_enter(&ldcp->exp_dlist_lock);
5927 	dringp->ch_next = ldcp->exp_dring_list;
5928 	ldcp->exp_dring_list = dringp;
5929 	mutex_exit(&ldcp->exp_dlist_lock);
5930 
5931 	dringp->status = LDC_BOUND;
5932 
5933 	mutex_exit(&dringp->lock);
5934 
5935 	return (0);
5936 }
5937 
5938 /*
5939  * Return the next cookie associated with the specified dring handle
5940  */
5941 int
5942 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5943 {
5944 	int		rv = 0;
5945 	ldc_dring_t 	*dringp;
5946 	ldc_chan_t	*ldcp;
5947 
5948 	if (dhandle == NULL) {
5949 		DWARN(DBG_ALL_LDCS,
5950 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5951 		return (EINVAL);
5952 	}
5953 	dringp = (ldc_dring_t *)dhandle;
5954 	mutex_enter(&dringp->lock);
5955 
5956 	if (dringp->status != LDC_BOUND) {
5957 		DWARN(DBG_ALL_LDCS,
5958 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5959 		    "is not bound\n", dringp);
5960 		mutex_exit(&dringp->lock);
5961 		return (EINVAL);
5962 	}
5963 
5964 	ldcp = dringp->ldcp;
5965 
5966 	if (cookie == NULL) {
5967 		DWARN(ldcp->id,
5968 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5969 		    ldcp->id);
5970 		mutex_exit(&dringp->lock);
5971 		return (EINVAL);
5972 	}
5973 
5974 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5975 	mutex_exit(&dringp->lock);
5976 
5977 	return (rv);
5978 }
5979 /*
5980  * Unbind a previously bound dring from a channel.
5981  */
5982 int
5983 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5984 {
5985 	ldc_dring_t 	*dringp;
5986 	ldc_dring_t	*tmp_dringp;
5987 	ldc_chan_t	*ldcp;
5988 
5989 	if (dhandle == NULL) {
5990 		DWARN(DBG_ALL_LDCS,
5991 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5992 		return (EINVAL);
5993 	}
5994 	dringp = (ldc_dring_t *)dhandle;
5995 
5996 	mutex_enter(&dringp->lock);
5997 
5998 	if (dringp->status == LDC_UNBOUND) {
5999 		DWARN(DBG_ALL_LDCS,
6000 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
6001 		    dringp);
6002 		mutex_exit(&dringp->lock);
6003 		return (EINVAL);
6004 	}
6005 	ldcp = dringp->ldcp;
6006 
6007 	mutex_enter(&ldcp->exp_dlist_lock);
6008 
6009 	tmp_dringp = ldcp->exp_dring_list;
6010 	if (tmp_dringp == dringp) {
6011 		ldcp->exp_dring_list = dringp->ch_next;
6012 		dringp->ch_next = NULL;
6013 
6014 	} else {
6015 		while (tmp_dringp != NULL) {
6016 			if (tmp_dringp->ch_next == dringp) {
6017 				tmp_dringp->ch_next = dringp->ch_next;
6018 				dringp->ch_next = NULL;
6019 				break;
6020 			}
6021 			tmp_dringp = tmp_dringp->ch_next;
6022 		}
6023 		if (tmp_dringp == NULL) {
6024 			DWARN(DBG_ALL_LDCS,
6025 			    "ldc_mem_dring_unbind: invalid descriptor\n");
6026 			mutex_exit(&ldcp->exp_dlist_lock);
6027 			mutex_exit(&dringp->lock);
6028 			return (EINVAL);
6029 		}
6030 	}
6031 
6032 	mutex_exit(&ldcp->exp_dlist_lock);
6033 
6034 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
6035 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6036 
6037 	dringp->ldcp = NULL;
6038 	dringp->mhdl = NULL;
6039 	dringp->status = LDC_UNBOUND;
6040 
6041 	mutex_exit(&dringp->lock);
6042 
6043 	return (0);
6044 }
6045 
6046 /*
6047  * Get information about the dring. The base address of the descriptor
6048  * ring along with the type and permission are returned back.
6049  */
6050 int
6051 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
6052 {
6053 	ldc_dring_t	*dringp;
6054 	int		rv;
6055 
6056 	if (dhandle == NULL) {
6057 		DWARN(DBG_ALL_LDCS,
6058 		    "ldc_mem_dring_info: invalid desc ring handle\n");
6059 		return (EINVAL);
6060 	}
6061 	dringp = (ldc_dring_t *)dhandle;
6062 
6063 	mutex_enter(&dringp->lock);
6064 
6065 	if (dringp->mhdl) {
6066 		rv = ldc_mem_info(dringp->mhdl, minfo);
6067 		if (rv) {
6068 			DWARN(DBG_ALL_LDCS,
6069 			    "ldc_mem_dring_info: error reading mem info\n");
6070 			mutex_exit(&dringp->lock);
6071 			return (rv);
6072 		}
6073 	} else {
6074 		minfo->vaddr = dringp->base;
6075 		minfo->raddr = NULL;
6076 		minfo->status = dringp->status;
6077 	}
6078 
6079 	mutex_exit(&dringp->lock);
6080 
6081 	return (0);
6082 }
6083 
6084 /*
6085  * Map an exported descriptor ring into the local address space. If the
6086  * descriptor ring was exported for direct map access, a HV call is made
6087  * to allocate a RA range. If the map is done via a shadow copy, local
6088  * shadow memory is allocated.
6089  */
6090 int
6091 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
6092     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
6093     ldc_dring_handle_t *dhandle)
6094 {
6095 	int		err;
6096 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
6097 	ldc_mem_handle_t mhandle;
6098 	ldc_dring_t	*dringp;
6099 	size_t		dring_size;
6100 
6101 	if (dhandle == NULL) {
6102 		DWARN(DBG_ALL_LDCS,
6103 		    "ldc_mem_dring_map: invalid dhandle\n");
6104 		return (EINVAL);
6105 	}
6106 
6107 	/* check to see if channel is initalized */
6108 	if (handle == NULL) {
6109 		DWARN(DBG_ALL_LDCS,
6110 		    "ldc_mem_dring_map: invalid channel handle\n");
6111 		return (EINVAL);
6112 	}
6113 	ldcp = (ldc_chan_t *)handle;
6114 
6115 	if (cookie == NULL) {
6116 		DWARN(ldcp->id,
6117 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
6118 		    ldcp->id);
6119 		return (EINVAL);
6120 	}
6121 
6122 	/* FUTURE: For now we support only one cookie per dring */
6123 	ASSERT(ccount == 1);
6124 
6125 	if (cookie->size < (dsize * len)) {
6126 		DWARN(ldcp->id,
6127 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
6128 		    ldcp->id);
6129 		return (EINVAL);
6130 	}
6131 
6132 	*dhandle = 0;
6133 
6134 	/* Allocate an dring structure */
6135 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
6136 
6137 	D1(ldcp->id,
6138 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
6139 	    mtype, len, dsize, cookie->addr, cookie->size);
6140 
6141 	/* Initialize dring */
6142 	dringp->length = len;
6143 	dringp->dsize = dsize;
6144 
6145 	/* round of to multiple of page size */
6146 	dring_size = len * dsize;
6147 	dringp->size = (dring_size & MMU_PAGEMASK);
6148 	if (dring_size & MMU_PAGEOFFSET)
6149 		dringp->size += MMU_PAGESIZE;
6150 
6151 	dringp->ldcp = ldcp;
6152 
6153 	/* create an memory handle */
6154 	err = ldc_mem_alloc_handle(handle, &mhandle);
6155 	if (err || mhandle == NULL) {
6156 		DWARN(DBG_ALL_LDCS,
6157 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
6158 		    err);
6159 		kmem_free(dringp, sizeof (ldc_dring_t));
6160 		return (ENOMEM);
6161 	}
6162 
6163 	dringp->mhdl = mhandle;
6164 	dringp->base = NULL;
6165 
6166 	/* map the dring into local memory */
6167 	err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW,
6168 	    &(dringp->base), NULL);
6169 	if (err || dringp->base == NULL) {
6170 		cmn_err(CE_WARN,
6171 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
6172 		(void) ldc_mem_free_handle(mhandle);
6173 		kmem_free(dringp, sizeof (ldc_dring_t));
6174 		return (ENOMEM);
6175 	}
6176 
6177 	/* initialize the desc ring lock */
6178 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
6179 
6180 	/* Add descriptor ring to channel's imported dring list */
6181 	mutex_enter(&ldcp->imp_dlist_lock);
6182 	dringp->ch_next = ldcp->imp_dring_list;
6183 	ldcp->imp_dring_list = dringp;
6184 	mutex_exit(&ldcp->imp_dlist_lock);
6185 
6186 	dringp->status = LDC_MAPPED;
6187 
6188 	*dhandle = (ldc_dring_handle_t)dringp;
6189 
6190 	return (0);
6191 }
6192 
6193 /*
6194  * Unmap a descriptor ring. Free shadow memory (if any).
6195  */
6196 int
6197 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
6198 {
6199 	ldc_dring_t 	*dringp;
6200 	ldc_dring_t	*tmp_dringp;
6201 	ldc_chan_t	*ldcp;
6202 
6203 	if (dhandle == NULL) {
6204 		DWARN(DBG_ALL_LDCS,
6205 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
6206 		return (EINVAL);
6207 	}
6208 	dringp = (ldc_dring_t *)dhandle;
6209 
6210 	if (dringp->status != LDC_MAPPED) {
6211 		DWARN(DBG_ALL_LDCS,
6212 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
6213 		return (EINVAL);
6214 	}
6215 
6216 	mutex_enter(&dringp->lock);
6217 
6218 	ldcp = dringp->ldcp;
6219 
6220 	mutex_enter(&ldcp->imp_dlist_lock);
6221 
6222 	/* find and unlink the desc ring from channel import list */
6223 	tmp_dringp = ldcp->imp_dring_list;
6224 	if (tmp_dringp == dringp) {
6225 		ldcp->imp_dring_list = dringp->ch_next;
6226 		dringp->ch_next = NULL;
6227 
6228 	} else {
6229 		while (tmp_dringp != NULL) {
6230 			if (tmp_dringp->ch_next == dringp) {
6231 				tmp_dringp->ch_next = dringp->ch_next;
6232 				dringp->ch_next = NULL;
6233 				break;
6234 			}
6235 			tmp_dringp = tmp_dringp->ch_next;
6236 		}
6237 		if (tmp_dringp == NULL) {
6238 			DWARN(DBG_ALL_LDCS,
6239 			    "ldc_mem_dring_unmap: invalid descriptor\n");
6240 			mutex_exit(&ldcp->imp_dlist_lock);
6241 			mutex_exit(&dringp->lock);
6242 			return (EINVAL);
6243 		}
6244 	}
6245 
6246 	mutex_exit(&ldcp->imp_dlist_lock);
6247 
6248 	/* do a LDC memory handle unmap and free */
6249 	(void) ldc_mem_unmap(dringp->mhdl);
6250 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6251 
6252 	dringp->status = 0;
6253 	dringp->ldcp = NULL;
6254 
6255 	mutex_exit(&dringp->lock);
6256 
6257 	/* destroy dring lock */
6258 	mutex_destroy(&dringp->lock);
6259 
6260 	/* free desc ring object */
6261 	kmem_free(dringp, sizeof (ldc_dring_t));
6262 
6263 	return (0);
6264 }
6265 
6266 /*
6267  * Internal entry point for descriptor ring access entry consistency
6268  * semantics. Acquire copies the contents of the remote descriptor ring
6269  * into the local shadow copy. The release operation copies the local
6270  * contents into the remote dring. The start and end locations specify
6271  * bounds for the entries being synchronized.
6272  */
6273 static int
6274 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
6275     uint8_t direction, uint64_t start, uint64_t end)
6276 {
6277 	int 			err;
6278 	ldc_dring_t		*dringp;
6279 	ldc_chan_t		*ldcp;
6280 	uint64_t		soff;
6281 	size_t			copy_size;
6282 
6283 	if (dhandle == NULL) {
6284 		DWARN(DBG_ALL_LDCS,
6285 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
6286 		return (EINVAL);
6287 	}
6288 	dringp = (ldc_dring_t *)dhandle;
6289 	mutex_enter(&dringp->lock);
6290 
6291 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
6292 		DWARN(DBG_ALL_LDCS,
6293 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
6294 		mutex_exit(&dringp->lock);
6295 		return (EINVAL);
6296 	}
6297 
6298 	if (start >= dringp->length || end >= dringp->length) {
6299 		DWARN(DBG_ALL_LDCS,
6300 		    "i_ldc_dring_acquire_release: index out of range\n");
6301 		mutex_exit(&dringp->lock);
6302 		return (EINVAL);
6303 	}
6304 
6305 	/* get the channel handle */
6306 	ldcp = dringp->ldcp;
6307 
6308 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
6309 		((dringp->length - start) * dringp->dsize);
6310 
6311 	/* Calculate the relative offset for the first desc */
6312 	soff = (start * dringp->dsize);
6313 
6314 	/* copy to/from remote from/to local memory */
6315 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
6316 	    soff, copy_size);
6317 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6318 	    direction, soff, copy_size);
6319 	if (err) {
6320 		DWARN(ldcp->id,
6321 		    "i_ldc_dring_acquire_release: copy failed\n");
6322 		mutex_exit(&dringp->lock);
6323 		return (err);
6324 	}
6325 
6326 	/* do the balance */
6327 	if (start > end) {
6328 		copy_size = ((end + 1) * dringp->dsize);
6329 		soff = 0;
6330 
6331 		/* copy to/from remote from/to local memory */
6332 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
6333 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
6334 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6335 		    direction, soff, copy_size);
6336 		if (err) {
6337 			DWARN(ldcp->id,
6338 			    "i_ldc_dring_acquire_release: copy failed\n");
6339 			mutex_exit(&dringp->lock);
6340 			return (err);
6341 		}
6342 	}
6343 
6344 	mutex_exit(&dringp->lock);
6345 
6346 	return (0);
6347 }
6348 
6349 /*
6350  * Ensure that the contents in the local dring are consistent
6351  * with the contents if of remote dring
6352  */
6353 int
6354 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6355 {
6356 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
6357 }
6358 
6359 /*
6360  * Ensure that the contents in the remote dring are consistent
6361  * with the contents if of local dring
6362  */
6363 int
6364 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6365 {
6366 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
6367 }
6368 
6369 
6370 /* ------------------------------------------------------------------------- */
6371