xref: /titanic_44/usr/src/uts/sun4v/io/ldc.c (revision 3ab45760e29dbab3ec3197fc452899c4d4b1c4c4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/mmu.h>
56 #include <sys/pte.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/hat_sfmmu.h>
60 #include <sys/vm_machparam.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/seg_kpm.h>
63 #include <sys/note.h>
64 #include <sys/ivintr.h>
65 #include <sys/hypervisor_api.h>
66 #include <sys/ldc.h>
67 #include <sys/ldc_impl.h>
68 #include <sys/cnex.h>
69 #include <sys/hsvc.h>
70 
71 /* Core internal functions */
72 static int i_ldc_h2v_error(int h_error);
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
76 static void i_ldc_reset_state(ldc_chan_t *ldcp);
77 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
78 
79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
80 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
81 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
82 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
83 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
84     uint8_t ctrlmsg);
85 
86 /* Interrupt handling functions */
87 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
88 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
89 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
90 
91 /* Read method functions */
92 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
93 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
94 	size_t *sizep);
95 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
96 	size_t *sizep);
97 
98 /* Write method functions */
99 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
100 	size_t *sizep);
101 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
102 	size_t *sizep);
103 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
104 	size_t *sizep);
105 
106 /* Pkt processing internal functions */
107 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
108 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
109 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
110 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
111 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
112 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
113 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
114 
115 /* Memory synchronization internal functions */
116 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
117     uint8_t direction, uint64_t offset, size_t size);
118 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
119     uint8_t direction, uint64_t start, uint64_t end);
120 
121 /* LDC Version */
122 static ldc_ver_t ldc_versions[] = { {1, 0} };
123 
124 /* number of supported versions */
125 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
126 
127 /* Module State Pointer */
128 static ldc_soft_state_t *ldcssp;
129 
130 static struct modldrv md = {
131 	&mod_miscops,			/* This is a misc module */
132 	"sun4v LDC module v%I%",	/* Name of the module */
133 };
134 
135 static struct modlinkage ml = {
136 	MODREV_1,
137 	&md,
138 	NULL
139 };
140 
141 static uint64_t ldc_sup_minor;		/* Supported minor number */
142 static hsvc_info_t ldc_hsvc = {
143 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
144 };
145 
146 /*
147  * LDC framework supports mapping remote domain's memory
148  * either directly or via shadow memory pages. Default
149  * support is currently implemented via shadow copy.
150  * Direct map can be enabled by setting 'ldc_shmem_enabled'
151  */
152 int ldc_shmem_enabled = 0;
153 
154 /*
155  * The no. of MTU size messages that can be stored in
156  * the LDC Tx queue. The number of Tx queue entries is
157  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
158  */
159 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
160 
161 /*
162  * The minimum queue length. This is the size of the smallest
163  * LDC queue. If the computed value is less than this default,
164  * the queue length is rounded up to 'ldc_queue_entries'.
165  */
166 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
167 
168 /*
169  * Pages exported for remote access over each channel is
170  * maintained in a table registered with the Hypervisor.
171  * The default number of entries in the table is set to
172  * 'ldc_mtbl_entries'.
173  */
174 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
175 
176 /*
177  * LDC retry count and delay - when the HV returns EWOULDBLOCK
178  * the operation is retried 'ldc_max_retries' times with a
179  * wait of 'ldc_delay' usecs between each retry.
180  */
181 int ldc_max_retries = LDC_MAX_RETRIES;
182 clock_t ldc_delay = LDC_DELAY;
183 
184 /*
185  * delay between each retry of channel unregistration in
186  * ldc_close(), to wait for pending interrupts to complete.
187  */
188 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
189 
190 #ifdef DEBUG
191 
192 /*
193  * Print debug messages
194  *
195  * set ldcdbg to 0x7 for enabling all msgs
196  * 0x4 - Warnings
197  * 0x2 - All debug messages
198  * 0x1 - Minimal debug messages
199  *
200  * set ldcdbgchan to the channel number you want to debug
201  * setting it to -1 prints debug messages for all channels
202  * NOTE: ldcdbgchan has no effect on error messages
203  */
204 
205 #define	DBG_ALL_LDCS -1
206 
207 int ldcdbg = 0x0;
208 int64_t ldcdbgchan = DBG_ALL_LDCS;
209 uint64_t ldc_inject_err_flag = 0;
210 
211 static void
212 ldcdebug(int64_t id, const char *fmt, ...)
213 {
214 	char buf[512];
215 	va_list ap;
216 
217 	/*
218 	 * Do not return if,
219 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
220 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
221 	 * debug channel = caller specified channel
222 	 */
223 	if ((id != DBG_ALL_LDCS) &&
224 	    (ldcdbgchan != DBG_ALL_LDCS) &&
225 	    (ldcdbgchan != id)) {
226 		return;
227 	}
228 
229 	va_start(ap, fmt);
230 	(void) vsprintf(buf, fmt, ap);
231 	va_end(ap);
232 
233 	cmn_err(CE_CONT, "?%s", buf);
234 }
235 
236 #define	LDC_ERR_RESET	0x1
237 #define	LDC_ERR_PKTLOSS	0x2
238 
239 static boolean_t
240 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
241 {
242 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
243 		return (B_FALSE);
244 
245 	if ((ldc_inject_err_flag & error) == 0)
246 		return (B_FALSE);
247 
248 	/* clear the injection state */
249 	ldc_inject_err_flag &= ~error;
250 
251 	return (B_TRUE);
252 }
253 
254 #define	D1		\
255 if (ldcdbg & 0x01)	\
256 	ldcdebug
257 
258 #define	D2		\
259 if (ldcdbg & 0x02)	\
260 	ldcdebug
261 
262 #define	DWARN		\
263 if (ldcdbg & 0x04)	\
264 	ldcdebug
265 
266 #define	DUMP_PAYLOAD(id, addr)						\
267 {									\
268 	char buf[65*3];							\
269 	int i;								\
270 	uint8_t *src = (uint8_t *)addr;					\
271 	for (i = 0; i < 64; i++, src++)					\
272 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
273 	(void) sprintf(&buf[i * 3], "|\n");				\
274 	D2((id), "payload: %s", buf);					\
275 }
276 
277 #define	DUMP_LDC_PKT(c, s, addr)					\
278 {									\
279 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
280 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
281 	if (msg->type == LDC_DATA) {                                    \
282 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
283 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
284 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
285 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
286 	    (msg->env & LDC_LEN_MASK));					\
287 	} else { 							\
288 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
289 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
290 	} 								\
291 }
292 
293 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
294 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
295 
296 #else
297 
298 #define	DBG_ALL_LDCS -1
299 
300 #define	D1
301 #define	D2
302 #define	DWARN
303 
304 #define	DUMP_PAYLOAD(id, addr)
305 #define	DUMP_LDC_PKT(c, s, addr)
306 
307 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
308 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
309 
310 #endif
311 
312 #define	ZERO_PKT(p)			\
313 	bzero((p), sizeof (ldc_msg_t));
314 
315 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
316 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
317 
318 int
319 _init(void)
320 {
321 	int status;
322 
323 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
324 	if (status != 0) {
325 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
326 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
327 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
328 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
329 		return (-1);
330 	}
331 
332 	/* allocate soft state structure */
333 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
334 
335 	/* Link the module into the system */
336 	status = mod_install(&ml);
337 	if (status != 0) {
338 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
339 		return (status);
340 	}
341 
342 	/* Initialize the LDC state structure */
343 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
344 
345 	mutex_enter(&ldcssp->lock);
346 
347 	/* Create a cache for memory handles */
348 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
349 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
350 	if (ldcssp->memhdl_cache == NULL) {
351 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
352 		mutex_exit(&ldcssp->lock);
353 		return (-1);
354 	}
355 
356 	/* Create cache for memory segment structures */
357 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
358 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
359 	if (ldcssp->memseg_cache == NULL) {
360 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
361 		mutex_exit(&ldcssp->lock);
362 		return (-1);
363 	}
364 
365 
366 	ldcssp->channel_count = 0;
367 	ldcssp->channels_open = 0;
368 	ldcssp->chan_list = NULL;
369 	ldcssp->dring_list = NULL;
370 
371 	mutex_exit(&ldcssp->lock);
372 
373 	return (0);
374 }
375 
376 int
377 _info(struct modinfo *modinfop)
378 {
379 	/* Report status of the dynamically loadable driver module */
380 	return (mod_info(&ml, modinfop));
381 }
382 
383 int
384 _fini(void)
385 {
386 	int 		rv, status;
387 	ldc_chan_t 	*tmp_ldcp, *ldcp;
388 	ldc_dring_t 	*tmp_dringp, *dringp;
389 	ldc_mem_info_t 	minfo;
390 
391 	/* Unlink the driver module from the system */
392 	status = mod_remove(&ml);
393 	if (status) {
394 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
395 		return (EIO);
396 	}
397 
398 	/* Free descriptor rings */
399 	dringp = ldcssp->dring_list;
400 	while (dringp != NULL) {
401 		tmp_dringp = dringp->next;
402 
403 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
404 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
405 			if (minfo.status == LDC_BOUND) {
406 				(void) ldc_mem_dring_unbind(
407 				    (ldc_dring_handle_t)dringp);
408 			}
409 			if (minfo.status == LDC_MAPPED) {
410 				(void) ldc_mem_dring_unmap(
411 				    (ldc_dring_handle_t)dringp);
412 			}
413 		}
414 
415 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
416 		dringp = tmp_dringp;
417 	}
418 	ldcssp->dring_list = NULL;
419 
420 	/* close and finalize channels */
421 	ldcp = ldcssp->chan_list;
422 	while (ldcp != NULL) {
423 		tmp_ldcp = ldcp->next;
424 
425 		(void) ldc_close((ldc_handle_t)ldcp);
426 		(void) ldc_fini((ldc_handle_t)ldcp);
427 
428 		ldcp = tmp_ldcp;
429 	}
430 	ldcssp->chan_list = NULL;
431 
432 	/* Destroy kmem caches */
433 	kmem_cache_destroy(ldcssp->memhdl_cache);
434 	kmem_cache_destroy(ldcssp->memseg_cache);
435 
436 	/*
437 	 * We have successfully "removed" the driver.
438 	 * Destroying soft states
439 	 */
440 	mutex_destroy(&ldcssp->lock);
441 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
442 
443 	(void) hsvc_unregister(&ldc_hsvc);
444 
445 	return (status);
446 }
447 
448 /* -------------------------------------------------------------------------- */
449 
450 /*
451  * LDC Link Layer Internal Functions
452  */
453 
454 /*
455  * Translate HV Errors to sun4v error codes
456  */
457 static int
458 i_ldc_h2v_error(int h_error)
459 {
460 	switch (h_error) {
461 
462 	case	H_EOK:
463 		return (0);
464 
465 	case	H_ENORADDR:
466 		return (EFAULT);
467 
468 	case	H_EBADPGSZ:
469 	case	H_EINVAL:
470 		return (EINVAL);
471 
472 	case	H_EWOULDBLOCK:
473 		return (EWOULDBLOCK);
474 
475 	case	H_ENOACCESS:
476 	case	H_ENOMAP:
477 		return (EACCES);
478 
479 	case	H_EIO:
480 	case	H_ECPUERROR:
481 		return (EIO);
482 
483 	case	H_ENOTSUPPORTED:
484 		return (ENOTSUP);
485 
486 	case 	H_ETOOMANY:
487 		return (ENOSPC);
488 
489 	case	H_ECHANNEL:
490 		return (ECHRNG);
491 	default:
492 		break;
493 	}
494 
495 	return (EIO);
496 }
497 
498 /*
499  * Reconfigure the transmit queue
500  */
501 static int
502 i_ldc_txq_reconf(ldc_chan_t *ldcp)
503 {
504 	int rv;
505 
506 	ASSERT(MUTEX_HELD(&ldcp->lock));
507 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
508 
509 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
510 	if (rv) {
511 		cmn_err(CE_WARN,
512 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
513 		return (EIO);
514 	}
515 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
516 	    &(ldcp->tx_tail), &(ldcp->link_state));
517 	if (rv) {
518 		cmn_err(CE_WARN,
519 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
520 		return (EIO);
521 	}
522 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
523 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
524 	    ldcp->link_state);
525 
526 	return (0);
527 }
528 
529 /*
530  * Reconfigure the receive queue
531  */
532 static int
533 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
534 {
535 	int rv;
536 	uint64_t rx_head, rx_tail;
537 
538 	ASSERT(MUTEX_HELD(&ldcp->lock));
539 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
540 	    &(ldcp->link_state));
541 	if (rv) {
542 		cmn_err(CE_WARN,
543 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
544 		    ldcp->id);
545 		return (EIO);
546 	}
547 
548 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
549 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
550 		    ldcp->rx_q_entries);
551 		if (rv) {
552 			cmn_err(CE_WARN,
553 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
554 			    ldcp->id);
555 			return (EIO);
556 		}
557 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
558 		    ldcp->id);
559 	}
560 
561 	return (0);
562 }
563 
564 
565 /*
566  * Drain the contents of the receive queue
567  */
568 static int
569 i_ldc_rxq_drain(ldc_chan_t *ldcp)
570 {
571 	int rv;
572 	uint64_t rx_head, rx_tail;
573 
574 	ASSERT(MUTEX_HELD(&ldcp->lock));
575 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
576 	    &(ldcp->link_state));
577 	if (rv) {
578 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
579 		    ldcp->id);
580 		return (EIO);
581 	}
582 
583 	/* flush contents by setting the head = tail */
584 	return (i_ldc_set_rx_head(ldcp, rx_tail));
585 }
586 
587 
588 /*
589  * Reset LDC state structure and its contents
590  */
591 static void
592 i_ldc_reset_state(ldc_chan_t *ldcp)
593 {
594 	ASSERT(MUTEX_HELD(&ldcp->lock));
595 	ldcp->last_msg_snt = LDC_INIT_SEQID;
596 	ldcp->last_ack_rcd = 0;
597 	ldcp->last_msg_rcd = 0;
598 	ldcp->tx_ackd_head = ldcp->tx_head;
599 	ldcp->next_vidx = 0;
600 	ldcp->hstate = 0;
601 	ldcp->tstate = TS_OPEN;
602 	ldcp->status = LDC_OPEN;
603 
604 	if (ldcp->link_state == LDC_CHANNEL_UP ||
605 	    ldcp->link_state == LDC_CHANNEL_RESET) {
606 
607 		if (ldcp->mode == LDC_MODE_RAW) {
608 			ldcp->status = LDC_UP;
609 			ldcp->tstate = TS_UP;
610 		} else {
611 			ldcp->status = LDC_READY;
612 			ldcp->tstate |= TS_LINK_READY;
613 		}
614 	}
615 }
616 
617 /*
618  * Reset a LDC channel
619  */
620 static void
621 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
622 {
623 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
624 
625 	ASSERT(MUTEX_HELD(&ldcp->lock));
626 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
627 
628 	/* reconfig Tx and Rx queues */
629 	(void) i_ldc_txq_reconf(ldcp);
630 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
631 
632 	/* Clear Tx and Rx interrupts */
633 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
634 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
635 
636 	/* Reset channel state */
637 	i_ldc_reset_state(ldcp);
638 
639 	/* Mark channel in reset */
640 	ldcp->tstate |= TS_IN_RESET;
641 }
642 
643 
644 /*
645  * Clear pending interrupts
646  */
647 static void
648 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
649 {
650 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
651 
652 	ASSERT(MUTEX_HELD(&ldcp->lock));
653 	ASSERT(cinfo->dip != NULL);
654 
655 	switch (itype) {
656 	case CNEX_TX_INTR:
657 		/* check Tx interrupt */
658 		if (ldcp->tx_intr_state)
659 			ldcp->tx_intr_state = LDC_INTR_NONE;
660 		else
661 			return;
662 		break;
663 
664 	case CNEX_RX_INTR:
665 		/* check Rx interrupt */
666 		if (ldcp->rx_intr_state)
667 			ldcp->rx_intr_state = LDC_INTR_NONE;
668 		else
669 			return;
670 		break;
671 	}
672 
673 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
674 	D2(ldcp->id,
675 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
676 	    ldcp->id, itype);
677 }
678 
679 /*
680  * Set the receive queue head
681  * Resets connection and returns an error if it fails.
682  */
683 static int
684 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
685 {
686 	int 	rv;
687 	int 	retries;
688 
689 	ASSERT(MUTEX_HELD(&ldcp->lock));
690 	for (retries = 0; retries < ldc_max_retries; retries++) {
691 
692 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
693 			return (0);
694 
695 		if (rv != H_EWOULDBLOCK)
696 			break;
697 
698 		/* wait for ldc_delay usecs */
699 		drv_usecwait(ldc_delay);
700 	}
701 
702 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
703 	    ldcp->id, head);
704 	mutex_enter(&ldcp->tx_lock);
705 	i_ldc_reset(ldcp, B_TRUE);
706 	mutex_exit(&ldcp->tx_lock);
707 
708 	return (ECONNRESET);
709 }
710 
711 /*
712  * Returns the tx_head to be used for transfer
713  */
714 static void
715 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
716 {
717 	ldc_msg_t 	*pkt;
718 
719 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
720 
721 	/* get current Tx head */
722 	*head = ldcp->tx_head;
723 
724 	/*
725 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
726 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
727 	 * up to the current location of tx_head. This needs to be done
728 	 * as the peer will only ACK DATA/INFO pkts.
729 	 */
730 	if (ldcp->mode == LDC_MODE_RELIABLE || ldcp->mode == LDC_MODE_STREAM) {
731 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
732 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
733 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
734 				break;
735 			}
736 			/* advance ACKd head */
737 			ldcp->tx_ackd_head =
738 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
739 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
740 		}
741 		*head = ldcp->tx_ackd_head;
742 	}
743 }
744 
745 /*
746  * Returns the tx_tail to be used for transfer
747  * Re-reads the TX queue ptrs if and only if the
748  * the cached head and tail are equal (queue is full)
749  */
750 static int
751 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
752 {
753 	int 		rv;
754 	uint64_t 	current_head, new_tail;
755 
756 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
757 	/* Read the head and tail ptrs from HV */
758 	rv = hv_ldc_tx_get_state(ldcp->id,
759 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
760 	if (rv) {
761 		cmn_err(CE_WARN,
762 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
763 		    ldcp->id);
764 		return (EIO);
765 	}
766 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
767 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
768 		    ldcp->id);
769 		return (ECONNRESET);
770 	}
771 
772 	i_ldc_get_tx_head(ldcp, &current_head);
773 
774 	/* increment the tail */
775 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
776 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
777 
778 	if (new_tail == current_head) {
779 		DWARN(ldcp->id,
780 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
781 		    ldcp->id);
782 		return (EWOULDBLOCK);
783 	}
784 
785 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
786 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
787 
788 	*tail = ldcp->tx_tail;
789 	return (0);
790 }
791 
792 /*
793  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
794  * and retry ldc_max_retries times before returning an error.
795  * Returns 0, EWOULDBLOCK or EIO
796  */
797 static int
798 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
799 {
800 	int		rv, retval = EWOULDBLOCK;
801 	int 		retries;
802 
803 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
804 	for (retries = 0; retries < ldc_max_retries; retries++) {
805 
806 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
807 			retval = 0;
808 			break;
809 		}
810 		if (rv != H_EWOULDBLOCK) {
811 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
812 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
813 			retval = EIO;
814 			break;
815 		}
816 
817 		/* wait for ldc_delay usecs */
818 		drv_usecwait(ldc_delay);
819 	}
820 	return (retval);
821 }
822 
823 /*
824  * Send a LDC message
825  */
826 static int
827 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
828     uint8_t ctrlmsg)
829 {
830 	int		rv;
831 	ldc_msg_t 	*pkt;
832 	uint64_t	tx_tail;
833 	uint32_t	curr_seqid;
834 
835 	/* Obtain Tx lock */
836 	mutex_enter(&ldcp->tx_lock);
837 
838 	curr_seqid = ldcp->last_msg_snt;
839 
840 	/* get the current tail for the message */
841 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
842 	if (rv) {
843 		DWARN(ldcp->id,
844 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
845 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
846 		    ldcp->id, pkttype, subtype, ctrlmsg);
847 		mutex_exit(&ldcp->tx_lock);
848 		return (rv);
849 	}
850 
851 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
852 	ZERO_PKT(pkt);
853 
854 	/* Initialize the packet */
855 	pkt->type = pkttype;
856 	pkt->stype = subtype;
857 	pkt->ctrl = ctrlmsg;
858 
859 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
860 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
861 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
862 		curr_seqid++;
863 		if (ldcp->mode != LDC_MODE_RAW) {
864 			pkt->seqid = curr_seqid;
865 			pkt->ackid = ldcp->last_msg_rcd;
866 		}
867 	}
868 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
869 
870 	/* initiate the send by calling into HV and set the new tail */
871 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
872 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
873 
874 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
875 	if (rv) {
876 		DWARN(ldcp->id,
877 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
878 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
879 		    ldcp->id, pkttype, subtype, ctrlmsg);
880 		mutex_exit(&ldcp->tx_lock);
881 		return (EIO);
882 	}
883 
884 	ldcp->last_msg_snt = curr_seqid;
885 	ldcp->tx_tail = tx_tail;
886 
887 	mutex_exit(&ldcp->tx_lock);
888 	return (0);
889 }
890 
891 /*
892  * Checks if packet was received in right order
893  * in the case of a reliable link.
894  * Returns 0 if in order, else EIO
895  */
896 static int
897 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
898 {
899 	/* No seqid checking for RAW mode */
900 	if (ldcp->mode == LDC_MODE_RAW)
901 		return (0);
902 
903 	/* No seqid checking for version, RTS, RTR message */
904 	if (msg->ctrl == LDC_VER ||
905 	    msg->ctrl == LDC_RTS ||
906 	    msg->ctrl == LDC_RTR)
907 		return (0);
908 
909 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
910 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
911 		DWARN(ldcp->id,
912 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
913 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
914 		    (ldcp->last_msg_rcd + 1));
915 		return (EIO);
916 	}
917 
918 #ifdef DEBUG
919 	if (LDC_INJECT_PKTLOSS(ldcp)) {
920 		DWARN(ldcp->id,
921 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
922 		return (EIO);
923 	}
924 #endif
925 
926 	return (0);
927 }
928 
929 
930 /*
931  * Process an incoming version ctrl message
932  */
933 static int
934 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
935 {
936 	int 		rv = 0, idx = ldcp->next_vidx;
937 	ldc_msg_t 	*pkt;
938 	uint64_t	tx_tail;
939 	ldc_ver_t	*rcvd_ver;
940 
941 	/* get the received version */
942 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
943 
944 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
945 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
946 
947 	/* Obtain Tx lock */
948 	mutex_enter(&ldcp->tx_lock);
949 
950 	switch (msg->stype) {
951 	case LDC_INFO:
952 
953 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
954 			(void) i_ldc_txq_reconf(ldcp);
955 			i_ldc_reset_state(ldcp);
956 			mutex_exit(&ldcp->tx_lock);
957 			return (EAGAIN);
958 		}
959 
960 		/* get the current tail and pkt for the response */
961 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
962 		if (rv != 0) {
963 			DWARN(ldcp->id,
964 			    "i_ldc_process_VER: (0x%llx) err sending "
965 			    "version ACK/NACK\n", ldcp->id);
966 			i_ldc_reset(ldcp, B_TRUE);
967 			mutex_exit(&ldcp->tx_lock);
968 			return (ECONNRESET);
969 		}
970 
971 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
972 		ZERO_PKT(pkt);
973 
974 		/* initialize the packet */
975 		pkt->type = LDC_CTRL;
976 		pkt->ctrl = LDC_VER;
977 
978 		for (;;) {
979 
980 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
981 			    rcvd_ver->major, rcvd_ver->minor,
982 			    ldc_versions[idx].major, ldc_versions[idx].minor);
983 
984 			if (rcvd_ver->major == ldc_versions[idx].major) {
985 				/* major version match - ACK version */
986 				pkt->stype = LDC_ACK;
987 
988 				/*
989 				 * lower minor version to the one this endpt
990 				 * supports, if necessary
991 				 */
992 				if (rcvd_ver->minor > ldc_versions[idx].minor)
993 					rcvd_ver->minor =
994 					    ldc_versions[idx].minor;
995 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
996 
997 				break;
998 			}
999 
1000 			if (rcvd_ver->major > ldc_versions[idx].major) {
1001 
1002 				D1(ldcp->id, "i_ldc_process_VER: using next"
1003 				    " lower idx=%d, v%u.%u\n", idx,
1004 				    ldc_versions[idx].major,
1005 				    ldc_versions[idx].minor);
1006 
1007 				/* nack with next lower version */
1008 				pkt->stype = LDC_NACK;
1009 				bcopy(&ldc_versions[idx], pkt->udata,
1010 				    sizeof (ldc_versions[idx]));
1011 				ldcp->next_vidx = idx;
1012 				break;
1013 			}
1014 
1015 			/* next major version */
1016 			idx++;
1017 
1018 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1019 
1020 			if (idx == LDC_NUM_VERS) {
1021 				/* no version match - send NACK */
1022 				pkt->stype = LDC_NACK;
1023 				bzero(pkt->udata, sizeof (ldc_ver_t));
1024 				ldcp->next_vidx = 0;
1025 				break;
1026 			}
1027 		}
1028 
1029 		/* initiate the send by calling into HV and set the new tail */
1030 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1031 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1032 
1033 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1034 		if (rv == 0) {
1035 			ldcp->tx_tail = tx_tail;
1036 			if (pkt->stype == LDC_ACK) {
1037 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1038 				    " version ACK\n", ldcp->id);
1039 				/* Save the ACK'd version */
1040 				ldcp->version.major = rcvd_ver->major;
1041 				ldcp->version.minor = rcvd_ver->minor;
1042 				ldcp->hstate |= TS_RCVD_VER;
1043 				ldcp->tstate |= TS_VER_DONE;
1044 				D1(DBG_ALL_LDCS,
1045 				    "(0x%llx) Sent ACK, "
1046 				    "Agreed on version v%u.%u\n",
1047 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1048 			}
1049 		} else {
1050 			DWARN(ldcp->id,
1051 			    "i_ldc_process_VER: (0x%llx) error sending "
1052 			    "ACK/NACK\n", ldcp->id);
1053 			i_ldc_reset(ldcp, B_TRUE);
1054 			mutex_exit(&ldcp->tx_lock);
1055 			return (ECONNRESET);
1056 		}
1057 
1058 		break;
1059 
1060 	case LDC_ACK:
1061 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1062 			if (ldcp->version.major != rcvd_ver->major ||
1063 			    ldcp->version.minor != rcvd_ver->minor) {
1064 
1065 				/* mismatched version - reset connection */
1066 				DWARN(ldcp->id,
1067 				    "i_ldc_process_VER: (0x%llx) recvd"
1068 				    " ACK ver != sent ACK ver\n", ldcp->id);
1069 				i_ldc_reset(ldcp, B_TRUE);
1070 				mutex_exit(&ldcp->tx_lock);
1071 				return (ECONNRESET);
1072 			}
1073 		} else {
1074 			/* SUCCESS - we have agreed on a version */
1075 			ldcp->version.major = rcvd_ver->major;
1076 			ldcp->version.minor = rcvd_ver->minor;
1077 			ldcp->tstate |= TS_VER_DONE;
1078 		}
1079 
1080 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1081 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1082 
1083 		/* initiate RTS-RTR-RDX handshake */
1084 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1085 		if (rv) {
1086 			DWARN(ldcp->id,
1087 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1088 			    ldcp->id);
1089 			i_ldc_reset(ldcp, B_TRUE);
1090 			mutex_exit(&ldcp->tx_lock);
1091 			return (ECONNRESET);
1092 		}
1093 
1094 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1095 		ZERO_PKT(pkt);
1096 
1097 		pkt->type = LDC_CTRL;
1098 		pkt->stype = LDC_INFO;
1099 		pkt->ctrl = LDC_RTS;
1100 		pkt->env = ldcp->mode;
1101 		if (ldcp->mode != LDC_MODE_RAW)
1102 			pkt->seqid = LDC_INIT_SEQID;
1103 
1104 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1105 
1106 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1107 
1108 		/* initiate the send by calling into HV and set the new tail */
1109 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1110 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1111 
1112 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1113 		if (rv) {
1114 			D2(ldcp->id,
1115 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1116 			    ldcp->id);
1117 			i_ldc_reset(ldcp, B_TRUE);
1118 			mutex_exit(&ldcp->tx_lock);
1119 			return (ECONNRESET);
1120 		}
1121 
1122 		ldcp->tx_tail = tx_tail;
1123 		ldcp->hstate |= TS_SENT_RTS;
1124 
1125 		break;
1126 
1127 	case LDC_NACK:
1128 		/* check if version in NACK is zero */
1129 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1130 			/* version handshake failure */
1131 			DWARN(DBG_ALL_LDCS,
1132 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1133 			    ldcp->id);
1134 			i_ldc_reset(ldcp, B_TRUE);
1135 			mutex_exit(&ldcp->tx_lock);
1136 			return (ECONNRESET);
1137 		}
1138 
1139 		/* get the current tail and pkt for the response */
1140 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1141 		if (rv != 0) {
1142 			cmn_err(CE_NOTE,
1143 			    "i_ldc_process_VER: (0x%lx) err sending "
1144 			    "version ACK/NACK\n", ldcp->id);
1145 			i_ldc_reset(ldcp, B_TRUE);
1146 			mutex_exit(&ldcp->tx_lock);
1147 			return (ECONNRESET);
1148 		}
1149 
1150 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1151 		ZERO_PKT(pkt);
1152 
1153 		/* initialize the packet */
1154 		pkt->type = LDC_CTRL;
1155 		pkt->ctrl = LDC_VER;
1156 		pkt->stype = LDC_INFO;
1157 
1158 		/* check ver in NACK msg has a match */
1159 		for (;;) {
1160 			if (rcvd_ver->major == ldc_versions[idx].major) {
1161 				/*
1162 				 * major version match - resubmit request
1163 				 * if lower minor version to the one this endpt
1164 				 * supports, if necessary
1165 				 */
1166 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1167 					rcvd_ver->minor =
1168 					    ldc_versions[idx].minor;
1169 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1170 				break;
1171 			}
1172 
1173 			if (rcvd_ver->major > ldc_versions[idx].major) {
1174 
1175 				D1(ldcp->id, "i_ldc_process_VER: using next"
1176 				    " lower idx=%d, v%u.%u\n", idx,
1177 				    ldc_versions[idx].major,
1178 				    ldc_versions[idx].minor);
1179 
1180 				/* send next lower version */
1181 				bcopy(&ldc_versions[idx], pkt->udata,
1182 				    sizeof (ldc_versions[idx]));
1183 				ldcp->next_vidx = idx;
1184 				break;
1185 			}
1186 
1187 			/* next version */
1188 			idx++;
1189 
1190 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1191 
1192 			if (idx == LDC_NUM_VERS) {
1193 				/* no version match - terminate */
1194 				ldcp->next_vidx = 0;
1195 				mutex_exit(&ldcp->tx_lock);
1196 				return (ECONNRESET);
1197 			}
1198 		}
1199 
1200 		/* initiate the send by calling into HV and set the new tail */
1201 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1202 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1203 
1204 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1205 		if (rv == 0) {
1206 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1207 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1208 			    ldc_versions[idx].minor);
1209 			ldcp->tx_tail = tx_tail;
1210 		} else {
1211 			cmn_err(CE_NOTE,
1212 			    "i_ldc_process_VER: (0x%lx) error sending version"
1213 			    "INFO\n", ldcp->id);
1214 			i_ldc_reset(ldcp, B_TRUE);
1215 			mutex_exit(&ldcp->tx_lock);
1216 			return (ECONNRESET);
1217 		}
1218 
1219 		break;
1220 	}
1221 
1222 	mutex_exit(&ldcp->tx_lock);
1223 	return (rv);
1224 }
1225 
1226 
1227 /*
1228  * Process an incoming RTS ctrl message
1229  */
1230 static int
1231 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1232 {
1233 	int 		rv = 0;
1234 	ldc_msg_t 	*pkt;
1235 	uint64_t	tx_tail;
1236 	boolean_t	sent_NACK = B_FALSE;
1237 
1238 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1239 
1240 	switch (msg->stype) {
1241 	case LDC_NACK:
1242 		DWARN(ldcp->id,
1243 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1244 		    ldcp->id);
1245 
1246 		/* Reset the channel -- as we cannot continue */
1247 		mutex_enter(&ldcp->tx_lock);
1248 		i_ldc_reset(ldcp, B_TRUE);
1249 		mutex_exit(&ldcp->tx_lock);
1250 		rv = ECONNRESET;
1251 		break;
1252 
1253 	case LDC_INFO:
1254 
1255 		/* check mode */
1256 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1257 			cmn_err(CE_NOTE,
1258 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1259 			    ldcp->id);
1260 			/*
1261 			 * send NACK in response to MODE message
1262 			 * get the current tail for the response
1263 			 */
1264 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1265 			if (rv) {
1266 				/* if cannot send NACK - reset channel */
1267 				mutex_enter(&ldcp->tx_lock);
1268 				i_ldc_reset(ldcp, B_TRUE);
1269 				mutex_exit(&ldcp->tx_lock);
1270 				rv = ECONNRESET;
1271 				break;
1272 			}
1273 			sent_NACK = B_TRUE;
1274 		}
1275 		break;
1276 	default:
1277 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1278 		    ldcp->id);
1279 		mutex_enter(&ldcp->tx_lock);
1280 		i_ldc_reset(ldcp, B_TRUE);
1281 		mutex_exit(&ldcp->tx_lock);
1282 		rv = ECONNRESET;
1283 		break;
1284 	}
1285 
1286 	/*
1287 	 * If either the connection was reset (when rv != 0) or
1288 	 * a NACK was sent, we return. In the case of a NACK
1289 	 * we dont want to consume the packet that came in but
1290 	 * not record that we received the RTS
1291 	 */
1292 	if (rv || sent_NACK)
1293 		return (rv);
1294 
1295 	/* record RTS received */
1296 	ldcp->hstate |= TS_RCVD_RTS;
1297 
1298 	/* store initial SEQID info */
1299 	ldcp->last_msg_snt = msg->seqid;
1300 
1301 	/* Obtain Tx lock */
1302 	mutex_enter(&ldcp->tx_lock);
1303 
1304 	/* get the current tail for the response */
1305 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1306 	if (rv != 0) {
1307 		cmn_err(CE_NOTE,
1308 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1309 		    ldcp->id);
1310 		i_ldc_reset(ldcp, B_TRUE);
1311 		mutex_exit(&ldcp->tx_lock);
1312 		return (ECONNRESET);
1313 	}
1314 
1315 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1316 	ZERO_PKT(pkt);
1317 
1318 	/* initialize the packet */
1319 	pkt->type = LDC_CTRL;
1320 	pkt->stype = LDC_INFO;
1321 	pkt->ctrl = LDC_RTR;
1322 	pkt->env = ldcp->mode;
1323 	if (ldcp->mode != LDC_MODE_RAW)
1324 		pkt->seqid = LDC_INIT_SEQID;
1325 
1326 	ldcp->last_msg_rcd = msg->seqid;
1327 
1328 	/* initiate the send by calling into HV and set the new tail */
1329 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1330 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1331 
1332 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1333 	if (rv == 0) {
1334 		D2(ldcp->id,
1335 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1336 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1337 
1338 		ldcp->tx_tail = tx_tail;
1339 		ldcp->hstate |= TS_SENT_RTR;
1340 
1341 	} else {
1342 		cmn_err(CE_NOTE,
1343 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1344 		    ldcp->id);
1345 		i_ldc_reset(ldcp, B_TRUE);
1346 		mutex_exit(&ldcp->tx_lock);
1347 		return (ECONNRESET);
1348 	}
1349 
1350 	mutex_exit(&ldcp->tx_lock);
1351 	return (0);
1352 }
1353 
1354 /*
1355  * Process an incoming RTR ctrl message
1356  */
1357 static int
1358 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1359 {
1360 	int 		rv = 0;
1361 	boolean_t	sent_NACK = B_FALSE;
1362 
1363 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1364 
1365 	switch (msg->stype) {
1366 	case LDC_NACK:
1367 		/* RTR NACK received */
1368 		DWARN(ldcp->id,
1369 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1370 		    ldcp->id);
1371 
1372 		/* Reset the channel -- as we cannot continue */
1373 		mutex_enter(&ldcp->tx_lock);
1374 		i_ldc_reset(ldcp, B_TRUE);
1375 		mutex_exit(&ldcp->tx_lock);
1376 		rv = ECONNRESET;
1377 
1378 		break;
1379 
1380 	case LDC_INFO:
1381 
1382 		/* check mode */
1383 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1384 			DWARN(ldcp->id,
1385 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1386 			    "expecting 0x%x, got 0x%x\n",
1387 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1388 			/*
1389 			 * send NACK in response to MODE message
1390 			 * get the current tail for the response
1391 			 */
1392 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1393 			if (rv) {
1394 				/* if cannot send NACK - reset channel */
1395 				mutex_enter(&ldcp->tx_lock);
1396 				i_ldc_reset(ldcp, B_TRUE);
1397 				mutex_exit(&ldcp->tx_lock);
1398 				rv = ECONNRESET;
1399 				break;
1400 			}
1401 			sent_NACK = B_TRUE;
1402 		}
1403 		break;
1404 
1405 	default:
1406 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1407 		    ldcp->id);
1408 
1409 		/* Reset the channel -- as we cannot continue */
1410 		mutex_enter(&ldcp->tx_lock);
1411 		i_ldc_reset(ldcp, B_TRUE);
1412 		mutex_exit(&ldcp->tx_lock);
1413 		rv = ECONNRESET;
1414 		break;
1415 	}
1416 
1417 	/*
1418 	 * If either the connection was reset (when rv != 0) or
1419 	 * a NACK was sent, we return. In the case of a NACK
1420 	 * we dont want to consume the packet that came in but
1421 	 * not record that we received the RTR
1422 	 */
1423 	if (rv || sent_NACK)
1424 		return (rv);
1425 
1426 	ldcp->last_msg_snt = msg->seqid;
1427 	ldcp->hstate |= TS_RCVD_RTR;
1428 
1429 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1430 	if (rv) {
1431 		cmn_err(CE_NOTE,
1432 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1433 		    ldcp->id);
1434 		mutex_enter(&ldcp->tx_lock);
1435 		i_ldc_reset(ldcp, B_TRUE);
1436 		mutex_exit(&ldcp->tx_lock);
1437 		return (ECONNRESET);
1438 	}
1439 	D2(ldcp->id,
1440 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1441 
1442 	ldcp->hstate |= TS_SENT_RDX;
1443 	ldcp->tstate |= TS_HSHAKE_DONE;
1444 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1445 		ldcp->status = LDC_UP;
1446 
1447 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1448 
1449 	return (0);
1450 }
1451 
1452 
1453 /*
1454  * Process an incoming RDX ctrl message
1455  */
1456 static int
1457 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1458 {
1459 	int	rv = 0;
1460 
1461 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1462 
1463 	switch (msg->stype) {
1464 	case LDC_NACK:
1465 		/* RDX NACK received */
1466 		DWARN(ldcp->id,
1467 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1468 		    ldcp->id);
1469 
1470 		/* Reset the channel -- as we cannot continue */
1471 		mutex_enter(&ldcp->tx_lock);
1472 		i_ldc_reset(ldcp, B_TRUE);
1473 		mutex_exit(&ldcp->tx_lock);
1474 		rv = ECONNRESET;
1475 
1476 		break;
1477 
1478 	case LDC_INFO:
1479 
1480 		/*
1481 		 * if channel is UP and a RDX received after data transmission
1482 		 * has commenced it is an error
1483 		 */
1484 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1485 			DWARN(DBG_ALL_LDCS,
1486 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1487 			    " - LDC reset\n", ldcp->id);
1488 			mutex_enter(&ldcp->tx_lock);
1489 			i_ldc_reset(ldcp, B_TRUE);
1490 			mutex_exit(&ldcp->tx_lock);
1491 			return (ECONNRESET);
1492 		}
1493 
1494 		ldcp->hstate |= TS_RCVD_RDX;
1495 		ldcp->tstate |= TS_HSHAKE_DONE;
1496 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1497 			ldcp->status = LDC_UP;
1498 
1499 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1500 		break;
1501 
1502 	default:
1503 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1504 		    ldcp->id);
1505 
1506 		/* Reset the channel -- as we cannot continue */
1507 		mutex_enter(&ldcp->tx_lock);
1508 		i_ldc_reset(ldcp, B_TRUE);
1509 		mutex_exit(&ldcp->tx_lock);
1510 		rv = ECONNRESET;
1511 		break;
1512 	}
1513 
1514 	return (rv);
1515 }
1516 
1517 /*
1518  * Process an incoming ACK for a data packet
1519  */
1520 static int
1521 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1522 {
1523 	int		rv;
1524 	uint64_t 	tx_head;
1525 	ldc_msg_t	*pkt;
1526 
1527 	/* Obtain Tx lock */
1528 	mutex_enter(&ldcp->tx_lock);
1529 
1530 	/*
1531 	 * Read the current Tx head and tail
1532 	 */
1533 	rv = hv_ldc_tx_get_state(ldcp->id,
1534 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1535 	if (rv != 0) {
1536 		cmn_err(CE_WARN,
1537 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1538 		    ldcp->id);
1539 
1540 		/* Reset the channel -- as we cannot continue */
1541 		i_ldc_reset(ldcp, B_TRUE);
1542 		mutex_exit(&ldcp->tx_lock);
1543 		return (ECONNRESET);
1544 	}
1545 
1546 	/*
1547 	 * loop from where the previous ACK location was to the
1548 	 * current head location. This is how far the HV has
1549 	 * actually send pkts. Pkts between head and tail are
1550 	 * yet to be sent by HV.
1551 	 */
1552 	tx_head = ldcp->tx_ackd_head;
1553 	for (;;) {
1554 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1555 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1556 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1557 
1558 		if (pkt->seqid == msg->ackid) {
1559 			D2(ldcp->id,
1560 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1561 			    ldcp->id);
1562 			ldcp->last_ack_rcd = msg->ackid;
1563 			ldcp->tx_ackd_head = tx_head;
1564 			break;
1565 		}
1566 		if (tx_head == ldcp->tx_head) {
1567 			/* could not find packet */
1568 			DWARN(ldcp->id,
1569 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1570 			    ldcp->id);
1571 
1572 			/* Reset the channel -- as we cannot continue */
1573 			i_ldc_reset(ldcp, B_TRUE);
1574 			mutex_exit(&ldcp->tx_lock);
1575 			return (ECONNRESET);
1576 		}
1577 	}
1578 
1579 	mutex_exit(&ldcp->tx_lock);
1580 	return (0);
1581 }
1582 
1583 /*
1584  * Process incoming control message
1585  * Return 0 - session can continue
1586  *        EAGAIN - reprocess packet - state was changed
1587  *	  ECONNRESET - channel was reset
1588  */
1589 static int
1590 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1591 {
1592 	int 		rv = 0;
1593 
1594 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1595 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1596 
1597 	switch (ldcp->tstate & ~TS_IN_RESET) {
1598 
1599 	case TS_OPEN:
1600 	case TS_READY:
1601 
1602 		switch (msg->ctrl & LDC_CTRL_MASK) {
1603 		case LDC_VER:
1604 			/* process version message */
1605 			rv = i_ldc_process_VER(ldcp, msg);
1606 			break;
1607 		default:
1608 			DWARN(ldcp->id,
1609 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1610 			    "tstate=0x%x\n", ldcp->id,
1611 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1612 			break;
1613 		}
1614 
1615 		break;
1616 
1617 	case TS_VREADY:
1618 
1619 		switch (msg->ctrl & LDC_CTRL_MASK) {
1620 		case LDC_VER:
1621 			/* process version message */
1622 			rv = i_ldc_process_VER(ldcp, msg);
1623 			break;
1624 		case LDC_RTS:
1625 			/* process RTS message */
1626 			rv = i_ldc_process_RTS(ldcp, msg);
1627 			break;
1628 		case LDC_RTR:
1629 			/* process RTR message */
1630 			rv = i_ldc_process_RTR(ldcp, msg);
1631 			break;
1632 		case LDC_RDX:
1633 			/* process RDX message */
1634 			rv = i_ldc_process_RDX(ldcp, msg);
1635 			break;
1636 		default:
1637 			DWARN(ldcp->id,
1638 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1639 			    "tstate=0x%x\n", ldcp->id,
1640 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1641 			break;
1642 		}
1643 
1644 		break;
1645 
1646 	case TS_UP:
1647 
1648 		switch (msg->ctrl & LDC_CTRL_MASK) {
1649 		case LDC_VER:
1650 			DWARN(ldcp->id,
1651 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1652 			    "- LDC reset\n", ldcp->id);
1653 			/* peer is redoing version negotiation */
1654 			mutex_enter(&ldcp->tx_lock);
1655 			(void) i_ldc_txq_reconf(ldcp);
1656 			i_ldc_reset_state(ldcp);
1657 			mutex_exit(&ldcp->tx_lock);
1658 			rv = EAGAIN;
1659 			break;
1660 
1661 		case LDC_RDX:
1662 			/* process RDX message */
1663 			rv = i_ldc_process_RDX(ldcp, msg);
1664 			break;
1665 
1666 		default:
1667 			DWARN(ldcp->id,
1668 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1669 			    "tstate=0x%x\n", ldcp->id,
1670 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1671 			break;
1672 		}
1673 	}
1674 
1675 	return (rv);
1676 }
1677 
1678 /*
1679  * Register channel with the channel nexus
1680  */
1681 static int
1682 i_ldc_register_channel(ldc_chan_t *ldcp)
1683 {
1684 	int		rv = 0;
1685 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1686 
1687 	if (cinfo->dip == NULL) {
1688 		DWARN(ldcp->id,
1689 		    "i_ldc_register_channel: cnex has not registered\n");
1690 		return (EAGAIN);
1691 	}
1692 
1693 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1694 	if (rv) {
1695 		DWARN(ldcp->id,
1696 		    "i_ldc_register_channel: cannot register channel\n");
1697 		return (rv);
1698 	}
1699 
1700 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1701 	    i_ldc_tx_hdlr, ldcp, NULL);
1702 	if (rv) {
1703 		DWARN(ldcp->id,
1704 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1705 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1706 		return (rv);
1707 	}
1708 
1709 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1710 	    i_ldc_rx_hdlr, ldcp, NULL);
1711 	if (rv) {
1712 		DWARN(ldcp->id,
1713 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1714 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1715 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1716 		return (rv);
1717 	}
1718 
1719 	ldcp->tstate |= TS_CNEX_RDY;
1720 
1721 	return (0);
1722 }
1723 
1724 /*
1725  * Unregister a channel with the channel nexus
1726  */
1727 static int
1728 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1729 {
1730 	int		rv = 0;
1731 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1732 
1733 	if (cinfo->dip == NULL) {
1734 		DWARN(ldcp->id,
1735 		    "i_ldc_unregister_channel: cnex has not registered\n");
1736 		return (EAGAIN);
1737 	}
1738 
1739 	if (ldcp->tstate & TS_CNEX_RDY) {
1740 
1741 		/* Remove the Rx interrupt */
1742 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1743 		if (rv) {
1744 			if (rv != EAGAIN) {
1745 				DWARN(ldcp->id,
1746 				    "i_ldc_unregister_channel: err removing "
1747 				    "Rx intr\n");
1748 				return (rv);
1749 			}
1750 
1751 			/*
1752 			 * If interrupts are pending and handler has
1753 			 * finished running, clear interrupt and try
1754 			 * again
1755 			 */
1756 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1757 				return (rv);
1758 
1759 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1760 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1761 			    CNEX_RX_INTR);
1762 			if (rv) {
1763 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1764 				    "err removing Rx interrupt\n");
1765 				return (rv);
1766 			}
1767 		}
1768 
1769 		/* Remove the Tx interrupt */
1770 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1771 		if (rv) {
1772 			DWARN(ldcp->id,
1773 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1774 			return (rv);
1775 		}
1776 
1777 		/* Unregister the channel */
1778 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1779 		if (rv) {
1780 			DWARN(ldcp->id,
1781 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1782 			return (rv);
1783 		}
1784 
1785 		ldcp->tstate &= ~TS_CNEX_RDY;
1786 	}
1787 
1788 	return (0);
1789 }
1790 
1791 
1792 /*
1793  * LDC transmit interrupt handler
1794  *    triggered for chanel up/down/reset events
1795  *    and Tx queue content changes
1796  */
1797 static uint_t
1798 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1799 {
1800 	_NOTE(ARGUNUSED(arg2))
1801 
1802 	int 		rv;
1803 	ldc_chan_t 	*ldcp;
1804 	boolean_t 	notify_client = B_FALSE;
1805 	uint64_t	notify_event = 0, link_state;
1806 
1807 	/* Get the channel for which interrupt was received */
1808 	ASSERT(arg1 != NULL);
1809 	ldcp = (ldc_chan_t *)arg1;
1810 
1811 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1812 	    ldcp->id, ldcp);
1813 
1814 	/* Lock channel */
1815 	mutex_enter(&ldcp->lock);
1816 
1817 	/* Obtain Tx lock */
1818 	mutex_enter(&ldcp->tx_lock);
1819 
1820 	/* mark interrupt as pending */
1821 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
1822 
1823 	/* save current link state */
1824 	link_state = ldcp->link_state;
1825 
1826 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1827 	    &ldcp->link_state);
1828 	if (rv) {
1829 		cmn_err(CE_WARN,
1830 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1831 		    ldcp->id, rv);
1832 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1833 		mutex_exit(&ldcp->tx_lock);
1834 		mutex_exit(&ldcp->lock);
1835 		return (DDI_INTR_CLAIMED);
1836 	}
1837 
1838 	/*
1839 	 * reset the channel state if the channel went down
1840 	 * (other side unconfigured queue) or channel was reset
1841 	 * (other side reconfigured its queue)
1842 	 */
1843 	if (link_state != ldcp->link_state &&
1844 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
1845 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1846 		i_ldc_reset(ldcp, B_FALSE);
1847 		notify_client = B_TRUE;
1848 		notify_event = LDC_EVT_DOWN;
1849 	}
1850 
1851 	if (link_state != ldcp->link_state &&
1852 	    ldcp->link_state == LDC_CHANNEL_RESET) {
1853 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1854 		i_ldc_reset(ldcp, B_FALSE);
1855 		notify_client = B_TRUE;
1856 		notify_event = LDC_EVT_RESET;
1857 	}
1858 
1859 	if (link_state != ldcp->link_state &&
1860 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
1861 	    ldcp->link_state == LDC_CHANNEL_UP) {
1862 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1863 		notify_client = B_TRUE;
1864 		notify_event = LDC_EVT_RESET;
1865 		ldcp->tstate |= TS_LINK_READY;
1866 		ldcp->status = LDC_READY;
1867 	}
1868 
1869 	/* if callbacks are disabled, do not notify */
1870 	if (!ldcp->cb_enabled)
1871 		notify_client = B_FALSE;
1872 
1873 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1874 	mutex_exit(&ldcp->tx_lock);
1875 
1876 	if (notify_client) {
1877 		ldcp->cb_inprogress = B_TRUE;
1878 		mutex_exit(&ldcp->lock);
1879 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1880 		if (rv) {
1881 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1882 			    "failure", ldcp->id);
1883 		}
1884 		mutex_enter(&ldcp->lock);
1885 		ldcp->cb_inprogress = B_FALSE;
1886 	}
1887 
1888 	mutex_exit(&ldcp->lock);
1889 
1890 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1891 
1892 	return (DDI_INTR_CLAIMED);
1893 }
1894 
1895 /*
1896  * LDC receive interrupt handler
1897  *    triggered for channel with data pending to read
1898  *    i.e. Rx queue content changes
1899  */
1900 static uint_t
1901 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1902 {
1903 	_NOTE(ARGUNUSED(arg2))
1904 
1905 	int		rv;
1906 	uint64_t 	rx_head, rx_tail;
1907 	ldc_msg_t 	*msg;
1908 	ldc_chan_t 	*ldcp;
1909 	boolean_t 	notify_client = B_FALSE;
1910 	uint64_t	notify_event = 0;
1911 	uint64_t	link_state, first_fragment = 0;
1912 
1913 
1914 	/* Get the channel for which interrupt was received */
1915 	if (arg1 == NULL) {
1916 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1917 		return (DDI_INTR_UNCLAIMED);
1918 	}
1919 
1920 	ldcp = (ldc_chan_t *)arg1;
1921 
1922 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1923 	    ldcp->id, ldcp);
1924 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1925 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1926 	    ldcp->link_state);
1927 
1928 	/* Lock channel */
1929 	mutex_enter(&ldcp->lock);
1930 
1931 	/* mark interrupt as pending */
1932 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1933 
1934 	/*
1935 	 * Read packet(s) from the queue
1936 	 */
1937 	for (;;) {
1938 
1939 		link_state = ldcp->link_state;
1940 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1941 		    &ldcp->link_state);
1942 		if (rv) {
1943 			cmn_err(CE_WARN,
1944 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1945 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1946 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1947 			mutex_exit(&ldcp->lock);
1948 			return (DDI_INTR_CLAIMED);
1949 		}
1950 
1951 		/*
1952 		 * reset the channel state if the channel went down
1953 		 * (other side unconfigured queue) or channel was reset
1954 		 * (other side reconfigured its queue)
1955 		 */
1956 
1957 		if (link_state != ldcp->link_state) {
1958 
1959 			switch (ldcp->link_state) {
1960 			case LDC_CHANNEL_DOWN:
1961 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1962 				    "link down\n", ldcp->id);
1963 				mutex_enter(&ldcp->tx_lock);
1964 				i_ldc_reset(ldcp, B_FALSE);
1965 				mutex_exit(&ldcp->tx_lock);
1966 				notify_client = B_TRUE;
1967 				notify_event = LDC_EVT_DOWN;
1968 				goto loop_exit;
1969 
1970 			case LDC_CHANNEL_UP:
1971 				D1(ldcp->id, "i_ldc_rx_hdlr: "
1972 				    "channel link up\n", ldcp->id);
1973 
1974 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
1975 					notify_client = B_TRUE;
1976 					notify_event = LDC_EVT_RESET;
1977 					ldcp->tstate |= TS_LINK_READY;
1978 					ldcp->status = LDC_READY;
1979 				}
1980 				break;
1981 
1982 			case LDC_CHANNEL_RESET:
1983 			default:
1984 #ifdef DEBUG
1985 force_reset:
1986 #endif
1987 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1988 				    "link reset\n", ldcp->id);
1989 				mutex_enter(&ldcp->tx_lock);
1990 				i_ldc_reset(ldcp, B_FALSE);
1991 				mutex_exit(&ldcp->tx_lock);
1992 				notify_client = B_TRUE;
1993 				notify_event = LDC_EVT_RESET;
1994 				break;
1995 			}
1996 		}
1997 
1998 #ifdef DEBUG
1999 		if (LDC_INJECT_RESET(ldcp))
2000 			goto force_reset;
2001 #endif
2002 
2003 		if (rx_head == rx_tail) {
2004 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
2005 			    ldcp->id);
2006 			break;
2007 		}
2008 
2009 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
2010 		    rx_head, rx_tail);
2011 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
2012 		    ldcp->rx_q_va + rx_head);
2013 
2014 		/* get the message */
2015 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2016 
2017 		/* if channel is in RAW mode or data pkt, notify and return */
2018 		if (ldcp->mode == LDC_MODE_RAW) {
2019 			notify_client = B_TRUE;
2020 			notify_event |= LDC_EVT_READ;
2021 			break;
2022 		}
2023 
2024 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2025 
2026 			/* discard packet if channel is not up */
2027 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2028 
2029 				/* move the head one position */
2030 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2031 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2032 
2033 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2034 					break;
2035 
2036 				continue;
2037 			} else {
2038 				if ((ldcp->tstate & TS_IN_RESET) == 0)
2039 					notify_client = B_TRUE;
2040 				notify_event |= LDC_EVT_READ;
2041 				break;
2042 			}
2043 		}
2044 
2045 		/* Check the sequence ID for the message received */
2046 		rv = i_ldc_check_seqid(ldcp, msg);
2047 		if (rv != 0) {
2048 
2049 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
2050 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
2051 
2052 			/* Reset last_msg_rcd to start of message */
2053 			if (first_fragment != 0) {
2054 				ldcp->last_msg_rcd = first_fragment - 1;
2055 				first_fragment = 0;
2056 			}
2057 
2058 			/*
2059 			 * Send a NACK due to seqid mismatch
2060 			 */
2061 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2062 			    (msg->ctrl & LDC_CTRL_MASK));
2063 
2064 			if (rv) {
2065 				cmn_err(CE_NOTE,
2066 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
2067 				    "CTRL/DATA NACK msg\n", ldcp->id);
2068 
2069 				/* if cannot send NACK - reset channel */
2070 				mutex_enter(&ldcp->tx_lock);
2071 				i_ldc_reset(ldcp, B_TRUE);
2072 				mutex_exit(&ldcp->tx_lock);
2073 
2074 				notify_client = B_TRUE;
2075 				notify_event = LDC_EVT_RESET;
2076 				break;
2077 			}
2078 
2079 			/* purge receive queue */
2080 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2081 			break;
2082 		}
2083 
2084 		/* record the message ID */
2085 		ldcp->last_msg_rcd = msg->seqid;
2086 
2087 		/* process control messages */
2088 		if (msg->type & LDC_CTRL) {
2089 			/* save current internal state */
2090 			uint64_t tstate = ldcp->tstate;
2091 
2092 			rv = i_ldc_ctrlmsg(ldcp, msg);
2093 			if (rv == EAGAIN) {
2094 				/* re-process pkt - state was adjusted */
2095 				continue;
2096 			}
2097 			if (rv == ECONNRESET) {
2098 				notify_client = B_TRUE;
2099 				notify_event = LDC_EVT_RESET;
2100 				break;
2101 			}
2102 
2103 			/*
2104 			 * control message processing was successful
2105 			 * channel transitioned to ready for communication
2106 			 */
2107 			if (rv == 0 && ldcp->tstate == TS_UP &&
2108 			    (tstate & ~TS_IN_RESET) !=
2109 			    (ldcp->tstate & ~TS_IN_RESET)) {
2110 				notify_client = B_TRUE;
2111 				notify_event = LDC_EVT_UP;
2112 			}
2113 		}
2114 
2115 		/* process data NACKs */
2116 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2117 			DWARN(ldcp->id,
2118 			    "i_ldc_rx_hdlr: (0x%llx) received DATA/NACK",
2119 			    ldcp->id);
2120 			mutex_enter(&ldcp->tx_lock);
2121 			i_ldc_reset(ldcp, B_TRUE);
2122 			mutex_exit(&ldcp->tx_lock);
2123 			notify_client = B_TRUE;
2124 			notify_event = LDC_EVT_RESET;
2125 			break;
2126 		}
2127 
2128 		/* process data ACKs */
2129 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2130 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2131 				notify_client = B_TRUE;
2132 				notify_event = LDC_EVT_RESET;
2133 				break;
2134 			}
2135 		}
2136 
2137 		/* move the head one position */
2138 		rx_head = (rx_head + LDC_PACKET_SIZE) %
2139 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2140 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2141 			notify_client = B_TRUE;
2142 			notify_event = LDC_EVT_RESET;
2143 			break;
2144 		}
2145 
2146 	} /* for */
2147 
2148 loop_exit:
2149 
2150 	/* if callbacks are disabled, do not notify */
2151 	if (!ldcp->cb_enabled)
2152 		notify_client = B_FALSE;
2153 
2154 	/*
2155 	 * If there are data packets in the queue, the ldc_read will
2156 	 * clear interrupts after draining the queue, else clear interrupts
2157 	 */
2158 	if ((notify_event & LDC_EVT_READ) == 0) {
2159 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2160 	} else
2161 		ldcp->rx_intr_state = LDC_INTR_PEND;
2162 
2163 
2164 	if (notify_client) {
2165 		ldcp->cb_inprogress = B_TRUE;
2166 		mutex_exit(&ldcp->lock);
2167 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2168 		if (rv) {
2169 			DWARN(ldcp->id,
2170 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
2171 			    ldcp->id);
2172 		}
2173 		mutex_enter(&ldcp->lock);
2174 		ldcp->cb_inprogress = B_FALSE;
2175 	}
2176 
2177 	mutex_exit(&ldcp->lock);
2178 
2179 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
2180 	return (DDI_INTR_CLAIMED);
2181 }
2182 
2183 
2184 /* -------------------------------------------------------------------------- */
2185 
2186 /*
2187  * LDC API functions
2188  */
2189 
2190 /*
2191  * Initialize the channel. Allocate internal structure and memory for
2192  * TX/RX queues, and initialize locks.
2193  */
2194 int
2195 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2196 {
2197 	ldc_chan_t 	*ldcp;
2198 	int		rv, exit_val;
2199 	uint64_t	ra_base, nentries;
2200 	uint64_t	qlen;
2201 
2202 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2203 
2204 	if (attr == NULL) {
2205 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2206 		return (EINVAL);
2207 	}
2208 	if (handle == NULL) {
2209 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2210 		return (EINVAL);
2211 	}
2212 
2213 	/* check if channel is valid */
2214 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2215 	if (rv == H_ECHANNEL) {
2216 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2217 		return (EINVAL);
2218 	}
2219 
2220 	/* check if the channel has already been initialized */
2221 	mutex_enter(&ldcssp->lock);
2222 	ldcp = ldcssp->chan_list;
2223 	while (ldcp != NULL) {
2224 		if (ldcp->id == id) {
2225 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2226 			    id);
2227 			mutex_exit(&ldcssp->lock);
2228 			return (EADDRINUSE);
2229 		}
2230 		ldcp = ldcp->next;
2231 	}
2232 	mutex_exit(&ldcssp->lock);
2233 
2234 	ASSERT(ldcp == NULL);
2235 
2236 	*handle = 0;
2237 
2238 	/* Allocate an ldcp structure */
2239 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2240 
2241 	/*
2242 	 * Initialize the channel and Tx lock
2243 	 *
2244 	 * The channel 'lock' protects the entire channel and
2245 	 * should be acquired before initializing, resetting,
2246 	 * destroying or reading from a channel.
2247 	 *
2248 	 * The 'tx_lock' should be acquired prior to transmitting
2249 	 * data over the channel. The lock should also be acquired
2250 	 * prior to channel reconfiguration (in order to prevent
2251 	 * concurrent writes).
2252 	 *
2253 	 * ORDERING: When both locks are being acquired, to prevent
2254 	 * deadlocks, the channel lock should be always acquired prior
2255 	 * to the tx_lock.
2256 	 */
2257 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2258 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2259 
2260 	/* Initialize the channel */
2261 	ldcp->id = id;
2262 	ldcp->cb = NULL;
2263 	ldcp->cb_arg = NULL;
2264 	ldcp->cb_inprogress = B_FALSE;
2265 	ldcp->cb_enabled = B_FALSE;
2266 	ldcp->next = NULL;
2267 
2268 	/* Read attributes */
2269 	ldcp->mode = attr->mode;
2270 	ldcp->devclass = attr->devclass;
2271 	ldcp->devinst = attr->instance;
2272 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2273 
2274 	D1(ldcp->id,
2275 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2276 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2277 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2278 
2279 	ldcp->next_vidx = 0;
2280 	ldcp->tstate = TS_IN_RESET;
2281 	ldcp->hstate = 0;
2282 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2283 	ldcp->last_ack_rcd = 0;
2284 	ldcp->last_msg_rcd = 0;
2285 
2286 	ldcp->stream_bufferp = NULL;
2287 	ldcp->exp_dring_list = NULL;
2288 	ldcp->imp_dring_list = NULL;
2289 	ldcp->mhdl_list = NULL;
2290 
2291 	ldcp->tx_intr_state = LDC_INTR_NONE;
2292 	ldcp->rx_intr_state = LDC_INTR_NONE;
2293 
2294 	/* Initialize payload size depending on whether channel is reliable */
2295 	switch (ldcp->mode) {
2296 	case LDC_MODE_RAW:
2297 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2298 		ldcp->read_p = i_ldc_read_raw;
2299 		ldcp->write_p = i_ldc_write_raw;
2300 		break;
2301 	case LDC_MODE_UNRELIABLE:
2302 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2303 		ldcp->read_p = i_ldc_read_packet;
2304 		ldcp->write_p = i_ldc_write_packet;
2305 		break;
2306 	case LDC_MODE_RELIABLE:
2307 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2308 		ldcp->read_p = i_ldc_read_packet;
2309 		ldcp->write_p = i_ldc_write_packet;
2310 		break;
2311 	case LDC_MODE_STREAM:
2312 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2313 
2314 		ldcp->stream_remains = 0;
2315 		ldcp->stream_offset = 0;
2316 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2317 		ldcp->read_p = i_ldc_read_stream;
2318 		ldcp->write_p = i_ldc_write_stream;
2319 		break;
2320 	default:
2321 		exit_val = EINVAL;
2322 		goto cleanup_on_exit;
2323 	}
2324 
2325 	/*
2326 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2327 	 * value is smaller than default length of ldc_queue_entries,
2328 	 * qlen is set to ldc_queue_entries. Ensure that computed
2329 	 * length is a power-of-two value.
2330 	 */
2331 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2332 	if (!ISP2(qlen)) {
2333 		uint64_t	tmp = 1;
2334 		while (qlen) {
2335 			qlen >>= 1; tmp <<= 1;
2336 		}
2337 		qlen = tmp;
2338 	}
2339 
2340 	ldcp->rx_q_entries =
2341 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2342 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2343 
2344 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2345 
2346 	/* Create a transmit queue */
2347 	ldcp->tx_q_va = (uint64_t)
2348 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2349 	if (ldcp->tx_q_va == NULL) {
2350 		cmn_err(CE_WARN,
2351 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2352 		    ldcp->id);
2353 		exit_val = ENOMEM;
2354 		goto cleanup_on_exit;
2355 	}
2356 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2357 
2358 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2359 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2360 
2361 	ldcp->tstate |= TS_TXQ_RDY;
2362 
2363 	/* Create a receive queue */
2364 	ldcp->rx_q_va = (uint64_t)
2365 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2366 	if (ldcp->rx_q_va == NULL) {
2367 		cmn_err(CE_WARN,
2368 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2369 		    ldcp->id);
2370 		exit_val = ENOMEM;
2371 		goto cleanup_on_exit;
2372 	}
2373 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2374 
2375 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2376 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2377 
2378 	ldcp->tstate |= TS_RXQ_RDY;
2379 
2380 	/* Init descriptor ring and memory handle list lock */
2381 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2382 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2383 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2384 
2385 	/* mark status as INITialized */
2386 	ldcp->status = LDC_INIT;
2387 
2388 	/* Add to channel list */
2389 	mutex_enter(&ldcssp->lock);
2390 	ldcp->next = ldcssp->chan_list;
2391 	ldcssp->chan_list = ldcp;
2392 	ldcssp->channel_count++;
2393 	mutex_exit(&ldcssp->lock);
2394 
2395 	/* set the handle */
2396 	*handle = (ldc_handle_t)ldcp;
2397 
2398 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2399 
2400 	return (0);
2401 
2402 cleanup_on_exit:
2403 
2404 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2405 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2406 
2407 	if (ldcp->tstate & TS_TXQ_RDY)
2408 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2409 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2410 
2411 	if (ldcp->tstate & TS_RXQ_RDY)
2412 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2413 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2414 
2415 	mutex_destroy(&ldcp->tx_lock);
2416 	mutex_destroy(&ldcp->lock);
2417 
2418 	if (ldcp)
2419 		kmem_free(ldcp, sizeof (ldc_chan_t));
2420 
2421 	return (exit_val);
2422 }
2423 
2424 /*
2425  * Finalizes the LDC connection. It will return EBUSY if the
2426  * channel is open. A ldc_close() has to be done prior to
2427  * a ldc_fini operation. It frees TX/RX queues, associated
2428  * with the channel
2429  */
2430 int
2431 ldc_fini(ldc_handle_t handle)
2432 {
2433 	ldc_chan_t 	*ldcp;
2434 	ldc_chan_t 	*tmp_ldcp;
2435 	uint64_t 	id;
2436 
2437 	if (handle == NULL) {
2438 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2439 		return (EINVAL);
2440 	}
2441 	ldcp = (ldc_chan_t *)handle;
2442 	id = ldcp->id;
2443 
2444 	mutex_enter(&ldcp->lock);
2445 
2446 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2447 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2448 		    ldcp->id);
2449 		mutex_exit(&ldcp->lock);
2450 		return (EBUSY);
2451 	}
2452 
2453 	/* Remove from the channel list */
2454 	mutex_enter(&ldcssp->lock);
2455 	tmp_ldcp = ldcssp->chan_list;
2456 	if (tmp_ldcp == ldcp) {
2457 		ldcssp->chan_list = ldcp->next;
2458 		ldcp->next = NULL;
2459 	} else {
2460 		while (tmp_ldcp != NULL) {
2461 			if (tmp_ldcp->next == ldcp) {
2462 				tmp_ldcp->next = ldcp->next;
2463 				ldcp->next = NULL;
2464 				break;
2465 			}
2466 			tmp_ldcp = tmp_ldcp->next;
2467 		}
2468 		if (tmp_ldcp == NULL) {
2469 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2470 			mutex_exit(&ldcssp->lock);
2471 			mutex_exit(&ldcp->lock);
2472 			return (EINVAL);
2473 		}
2474 	}
2475 
2476 	ldcssp->channel_count--;
2477 
2478 	mutex_exit(&ldcssp->lock);
2479 
2480 	/* Free the map table for this channel */
2481 	if (ldcp->mtbl) {
2482 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2483 		if (ldcp->mtbl->contigmem)
2484 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2485 		else
2486 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2487 		mutex_destroy(&ldcp->mtbl->lock);
2488 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2489 	}
2490 
2491 	/* Destroy descriptor ring and memory handle list lock */
2492 	mutex_destroy(&ldcp->exp_dlist_lock);
2493 	mutex_destroy(&ldcp->imp_dlist_lock);
2494 	mutex_destroy(&ldcp->mlist_lock);
2495 
2496 	/* Free the stream buffer for STREAM_MODE */
2497 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2498 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2499 
2500 	/* Free the RX queue */
2501 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2502 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2503 	ldcp->tstate &= ~TS_RXQ_RDY;
2504 
2505 	/* Free the TX queue */
2506 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2507 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2508 	ldcp->tstate &= ~TS_TXQ_RDY;
2509 
2510 	mutex_exit(&ldcp->lock);
2511 
2512 	/* Destroy mutex */
2513 	mutex_destroy(&ldcp->tx_lock);
2514 	mutex_destroy(&ldcp->lock);
2515 
2516 	/* free channel structure */
2517 	kmem_free(ldcp, sizeof (ldc_chan_t));
2518 
2519 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2520 
2521 	return (0);
2522 }
2523 
2524 /*
2525  * Open the LDC channel for use. It registers the TX/RX queues
2526  * with the Hypervisor. It also specifies the interrupt number
2527  * and target CPU for this channel
2528  */
2529 int
2530 ldc_open(ldc_handle_t handle)
2531 {
2532 	ldc_chan_t 	*ldcp;
2533 	int 		rv;
2534 
2535 	if (handle == NULL) {
2536 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2537 		return (EINVAL);
2538 	}
2539 
2540 	ldcp = (ldc_chan_t *)handle;
2541 
2542 	mutex_enter(&ldcp->lock);
2543 
2544 	if (ldcp->tstate < TS_INIT) {
2545 		DWARN(ldcp->id,
2546 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2547 		mutex_exit(&ldcp->lock);
2548 		return (EFAULT);
2549 	}
2550 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2551 		DWARN(ldcp->id,
2552 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2553 		mutex_exit(&ldcp->lock);
2554 		return (EFAULT);
2555 	}
2556 
2557 	/*
2558 	 * Unregister/Register the tx queue with the hypervisor
2559 	 */
2560 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2561 	if (rv) {
2562 		cmn_err(CE_WARN,
2563 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2564 		    ldcp->id);
2565 		mutex_exit(&ldcp->lock);
2566 		return (EIO);
2567 	}
2568 
2569 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2570 	if (rv) {
2571 		cmn_err(CE_WARN,
2572 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2573 		    ldcp->id);
2574 		mutex_exit(&ldcp->lock);
2575 		return (EIO);
2576 	}
2577 
2578 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2579 	    ldcp->id);
2580 
2581 	/*
2582 	 * Unregister/Register the rx queue with the hypervisor
2583 	 */
2584 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2585 	if (rv) {
2586 		cmn_err(CE_WARN,
2587 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2588 		    ldcp->id);
2589 		mutex_exit(&ldcp->lock);
2590 		return (EIO);
2591 	}
2592 
2593 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2594 	if (rv) {
2595 		cmn_err(CE_WARN,
2596 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2597 		    ldcp->id);
2598 		mutex_exit(&ldcp->lock);
2599 		return (EIO);
2600 	}
2601 
2602 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2603 	    ldcp->id);
2604 
2605 	ldcp->tstate |= TS_QCONF_RDY;
2606 
2607 	/* Register the channel with the channel nexus */
2608 	rv = i_ldc_register_channel(ldcp);
2609 	if (rv && rv != EAGAIN) {
2610 		cmn_err(CE_WARN,
2611 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2612 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2613 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2614 		mutex_exit(&ldcp->lock);
2615 		return (EIO);
2616 	}
2617 
2618 	/* mark channel in OPEN state */
2619 	ldcp->status = LDC_OPEN;
2620 
2621 	/* Read channel state */
2622 	rv = hv_ldc_tx_get_state(ldcp->id,
2623 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2624 	if (rv) {
2625 		cmn_err(CE_WARN,
2626 		    "ldc_open: (0x%lx) cannot read channel state\n",
2627 		    ldcp->id);
2628 		(void) i_ldc_unregister_channel(ldcp);
2629 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2630 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2631 		mutex_exit(&ldcp->lock);
2632 		return (EIO);
2633 	}
2634 
2635 	/*
2636 	 * set the ACKd head to current head location for reliable &
2637 	 * streaming mode
2638 	 */
2639 	ldcp->tx_ackd_head = ldcp->tx_head;
2640 
2641 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2642 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2643 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2644 		ldcp->tstate |= TS_LINK_READY;
2645 		ldcp->status = LDC_READY;
2646 	}
2647 
2648 	/*
2649 	 * if channel is being opened in RAW mode - no handshake is needed
2650 	 * switch the channel READY and UP state
2651 	 */
2652 	if (ldcp->mode == LDC_MODE_RAW) {
2653 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2654 		ldcp->status = LDC_UP;
2655 	}
2656 
2657 	mutex_exit(&ldcp->lock);
2658 
2659 	/*
2660 	 * Increment number of open channels
2661 	 */
2662 	mutex_enter(&ldcssp->lock);
2663 	ldcssp->channels_open++;
2664 	mutex_exit(&ldcssp->lock);
2665 
2666 	D1(ldcp->id,
2667 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
2668 	    "(tstate=0x%x, status=0x%x)\n",
2669 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
2670 
2671 	return (0);
2672 }
2673 
2674 /*
2675  * Close the LDC connection. It will return EBUSY if there
2676  * are memory segments or descriptor rings either bound to or
2677  * mapped over the channel
2678  */
2679 int
2680 ldc_close(ldc_handle_t handle)
2681 {
2682 	ldc_chan_t 	*ldcp;
2683 	int		rv = 0, retries = 0;
2684 	boolean_t	chk_done = B_FALSE;
2685 
2686 	if (handle == NULL) {
2687 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2688 		return (EINVAL);
2689 	}
2690 	ldcp = (ldc_chan_t *)handle;
2691 
2692 	mutex_enter(&ldcp->lock);
2693 
2694 	/* return error if channel is not open */
2695 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
2696 		DWARN(ldcp->id,
2697 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2698 		mutex_exit(&ldcp->lock);
2699 		return (EFAULT);
2700 	}
2701 
2702 	/* if any memory handles, drings, are bound or mapped cannot close */
2703 	if (ldcp->mhdl_list != NULL) {
2704 		DWARN(ldcp->id,
2705 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2706 		    ldcp->id);
2707 		mutex_exit(&ldcp->lock);
2708 		return (EBUSY);
2709 	}
2710 	if (ldcp->exp_dring_list != NULL) {
2711 		DWARN(ldcp->id,
2712 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2713 		    ldcp->id);
2714 		mutex_exit(&ldcp->lock);
2715 		return (EBUSY);
2716 	}
2717 	if (ldcp->imp_dring_list != NULL) {
2718 		DWARN(ldcp->id,
2719 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2720 		    ldcp->id);
2721 		mutex_exit(&ldcp->lock);
2722 		return (EBUSY);
2723 	}
2724 
2725 	if (ldcp->cb_inprogress) {
2726 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
2727 		    ldcp->id);
2728 		mutex_exit(&ldcp->lock);
2729 		return (EWOULDBLOCK);
2730 	}
2731 
2732 	/* Obtain Tx lock */
2733 	mutex_enter(&ldcp->tx_lock);
2734 
2735 	/*
2736 	 * Wait for pending transmits to complete i.e Tx queue to drain
2737 	 * if there are pending pkts - wait 1 ms and retry again
2738 	 */
2739 	for (;;) {
2740 
2741 		rv = hv_ldc_tx_get_state(ldcp->id,
2742 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2743 		if (rv) {
2744 			cmn_err(CE_WARN,
2745 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2746 			mutex_exit(&ldcp->tx_lock);
2747 			mutex_exit(&ldcp->lock);
2748 			return (EIO);
2749 		}
2750 
2751 		if (ldcp->tx_head == ldcp->tx_tail ||
2752 		    ldcp->link_state != LDC_CHANNEL_UP) {
2753 			break;
2754 		}
2755 
2756 		if (chk_done) {
2757 			DWARN(ldcp->id,
2758 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2759 			    ldcp->id);
2760 			break;
2761 		}
2762 
2763 		/* wait for one ms and try again */
2764 		delay(drv_usectohz(1000));
2765 		chk_done = B_TRUE;
2766 	}
2767 
2768 	/*
2769 	 * Drain the Tx and Rx queues as we are closing the
2770 	 * channel. We dont care about any pending packets.
2771 	 * We have to also drain the queue prior to clearing
2772 	 * pending interrupts, otherwise the HV will trigger
2773 	 * an interrupt the moment the interrupt state is
2774 	 * cleared.
2775 	 */
2776 	(void) i_ldc_txq_reconf(ldcp);
2777 	(void) i_ldc_rxq_drain(ldcp);
2778 
2779 	/*
2780 	 * Unregister the channel with the nexus
2781 	 */
2782 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2783 
2784 		mutex_exit(&ldcp->tx_lock);
2785 		mutex_exit(&ldcp->lock);
2786 
2787 		/* if any error other than EAGAIN return back */
2788 		if (rv != EAGAIN || retries >= ldc_max_retries) {
2789 			cmn_err(CE_WARN,
2790 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2791 			    ldcp->id, rv);
2792 			return (rv);
2793 		}
2794 
2795 		/*
2796 		 * As there could be pending interrupts we need
2797 		 * to wait and try again
2798 		 */
2799 		drv_usecwait(ldc_close_delay);
2800 		mutex_enter(&ldcp->lock);
2801 		mutex_enter(&ldcp->tx_lock);
2802 		retries++;
2803 	}
2804 
2805 	/*
2806 	 * Unregister queues
2807 	 */
2808 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2809 	if (rv) {
2810 		cmn_err(CE_WARN,
2811 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2812 		    ldcp->id);
2813 		mutex_exit(&ldcp->tx_lock);
2814 		mutex_exit(&ldcp->lock);
2815 		return (EIO);
2816 	}
2817 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2818 	if (rv) {
2819 		cmn_err(CE_WARN,
2820 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2821 		    ldcp->id);
2822 		mutex_exit(&ldcp->tx_lock);
2823 		mutex_exit(&ldcp->lock);
2824 		return (EIO);
2825 	}
2826 
2827 	ldcp->tstate &= ~TS_QCONF_RDY;
2828 
2829 	/* Reset channel state information */
2830 	i_ldc_reset_state(ldcp);
2831 
2832 	/* Mark channel as down and in initialized state */
2833 	ldcp->tx_ackd_head = 0;
2834 	ldcp->tx_head = 0;
2835 	ldcp->tstate = TS_IN_RESET|TS_INIT;
2836 	ldcp->status = LDC_INIT;
2837 
2838 	mutex_exit(&ldcp->tx_lock);
2839 	mutex_exit(&ldcp->lock);
2840 
2841 	/* Decrement number of open channels */
2842 	mutex_enter(&ldcssp->lock);
2843 	ldcssp->channels_open--;
2844 	mutex_exit(&ldcssp->lock);
2845 
2846 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2847 
2848 	return (0);
2849 }
2850 
2851 /*
2852  * Register channel callback
2853  */
2854 int
2855 ldc_reg_callback(ldc_handle_t handle,
2856     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2857 {
2858 	ldc_chan_t *ldcp;
2859 
2860 	if (handle == NULL) {
2861 		DWARN(DBG_ALL_LDCS,
2862 		    "ldc_reg_callback: invalid channel handle\n");
2863 		return (EINVAL);
2864 	}
2865 	if (((uint64_t)cb) < KERNELBASE) {
2866 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2867 		return (EINVAL);
2868 	}
2869 	ldcp = (ldc_chan_t *)handle;
2870 
2871 	mutex_enter(&ldcp->lock);
2872 
2873 	if (ldcp->cb) {
2874 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2875 		    ldcp->id);
2876 		mutex_exit(&ldcp->lock);
2877 		return (EIO);
2878 	}
2879 	if (ldcp->cb_inprogress) {
2880 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2881 		    ldcp->id);
2882 		mutex_exit(&ldcp->lock);
2883 		return (EWOULDBLOCK);
2884 	}
2885 
2886 	ldcp->cb = cb;
2887 	ldcp->cb_arg = arg;
2888 	ldcp->cb_enabled = B_TRUE;
2889 
2890 	D1(ldcp->id,
2891 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2892 	    ldcp->id);
2893 
2894 	mutex_exit(&ldcp->lock);
2895 
2896 	return (0);
2897 }
2898 
2899 /*
2900  * Unregister channel callback
2901  */
2902 int
2903 ldc_unreg_callback(ldc_handle_t handle)
2904 {
2905 	ldc_chan_t *ldcp;
2906 
2907 	if (handle == NULL) {
2908 		DWARN(DBG_ALL_LDCS,
2909 		    "ldc_unreg_callback: invalid channel handle\n");
2910 		return (EINVAL);
2911 	}
2912 	ldcp = (ldc_chan_t *)handle;
2913 
2914 	mutex_enter(&ldcp->lock);
2915 
2916 	if (ldcp->cb == NULL) {
2917 		DWARN(ldcp->id,
2918 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2919 		    ldcp->id);
2920 		mutex_exit(&ldcp->lock);
2921 		return (EIO);
2922 	}
2923 	if (ldcp->cb_inprogress) {
2924 		DWARN(ldcp->id,
2925 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2926 		    ldcp->id);
2927 		mutex_exit(&ldcp->lock);
2928 		return (EWOULDBLOCK);
2929 	}
2930 
2931 	ldcp->cb = NULL;
2932 	ldcp->cb_arg = NULL;
2933 	ldcp->cb_enabled = B_FALSE;
2934 
2935 	D1(ldcp->id,
2936 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2937 	    ldcp->id);
2938 
2939 	mutex_exit(&ldcp->lock);
2940 
2941 	return (0);
2942 }
2943 
2944 
2945 /*
2946  * Bring a channel up by initiating a handshake with the peer
2947  * This call is asynchronous. It will complete at a later point
2948  * in time when the peer responds back with an RTR.
2949  */
2950 int
2951 ldc_up(ldc_handle_t handle)
2952 {
2953 	int 		rv;
2954 	ldc_chan_t 	*ldcp;
2955 	ldc_msg_t 	*ldcmsg;
2956 	uint64_t 	tx_tail, tstate, link_state;
2957 
2958 	if (handle == NULL) {
2959 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2960 		return (EINVAL);
2961 	}
2962 	ldcp = (ldc_chan_t *)handle;
2963 
2964 	mutex_enter(&ldcp->lock);
2965 
2966 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
2967 
2968 	/* clear the reset state */
2969 	tstate = ldcp->tstate;
2970 	ldcp->tstate &= ~TS_IN_RESET;
2971 
2972 	if (ldcp->tstate == TS_UP) {
2973 		DWARN(ldcp->id,
2974 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2975 		    ldcp->id);
2976 
2977 		/* mark channel as up */
2978 		ldcp->status = LDC_UP;
2979 
2980 		/*
2981 		 * if channel was in reset state and there was
2982 		 * pending data clear interrupt state. this will
2983 		 * trigger an interrupt, causing the RX handler to
2984 		 * to invoke the client's callback
2985 		 */
2986 		if ((tstate & TS_IN_RESET) &&
2987 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
2988 			D1(ldcp->id,
2989 			    "ldc_up: (0x%llx) channel has pending data, "
2990 			    "clearing interrupt\n", ldcp->id);
2991 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2992 		}
2993 
2994 		mutex_exit(&ldcp->lock);
2995 		return (0);
2996 	}
2997 
2998 	/* if the channel is in RAW mode - mark it as UP, if READY */
2999 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3000 		ldcp->tstate = TS_UP;
3001 		mutex_exit(&ldcp->lock);
3002 		return (0);
3003 	}
3004 
3005 	/* Don't start another handshake if there is one in progress */
3006 	if (ldcp->hstate) {
3007 		D1(ldcp->id,
3008 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3009 		    ldcp->id);
3010 		mutex_exit(&ldcp->lock);
3011 		return (0);
3012 	}
3013 
3014 	mutex_enter(&ldcp->tx_lock);
3015 
3016 	/* save current link state */
3017 	link_state = ldcp->link_state;
3018 
3019 	/* get the current tail for the LDC msg */
3020 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3021 	if (rv) {
3022 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3023 		    ldcp->id);
3024 		mutex_exit(&ldcp->tx_lock);
3025 		mutex_exit(&ldcp->lock);
3026 		return (ECONNREFUSED);
3027 	}
3028 
3029 	/*
3030 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3031 	 * from a previous state of DOWN, then mark the channel as
3032 	 * being ready for handshake.
3033 	 */
3034 	if ((link_state == LDC_CHANNEL_DOWN) &&
3035 	    (link_state != ldcp->link_state)) {
3036 
3037 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3038 		    (ldcp->link_state == LDC_CHANNEL_UP));
3039 
3040 		if (ldcp->mode == LDC_MODE_RAW) {
3041 			ldcp->status = LDC_UP;
3042 			ldcp->tstate = TS_UP;
3043 			mutex_exit(&ldcp->tx_lock);
3044 			mutex_exit(&ldcp->lock);
3045 			return (0);
3046 		} else {
3047 			ldcp->status = LDC_READY;
3048 			ldcp->tstate |= TS_LINK_READY;
3049 		}
3050 
3051 	}
3052 
3053 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3054 	ZERO_PKT(ldcmsg);
3055 
3056 	ldcmsg->type = LDC_CTRL;
3057 	ldcmsg->stype = LDC_INFO;
3058 	ldcmsg->ctrl = LDC_VER;
3059 	ldcp->next_vidx = 0;
3060 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3061 
3062 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3063 
3064 	/* initiate the send by calling into HV and set the new tail */
3065 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3066 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3067 
3068 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3069 	if (rv) {
3070 		DWARN(ldcp->id,
3071 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3072 		    ldcp->id, rv);
3073 		mutex_exit(&ldcp->tx_lock);
3074 		mutex_exit(&ldcp->lock);
3075 		return (rv);
3076 	}
3077 
3078 	ldcp->hstate |= TS_SENT_VER;
3079 	ldcp->tx_tail = tx_tail;
3080 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3081 
3082 	mutex_exit(&ldcp->tx_lock);
3083 	mutex_exit(&ldcp->lock);
3084 
3085 	return (rv);
3086 }
3087 
3088 
3089 /*
3090  * Bring a channel down by resetting its state and queues
3091  */
3092 int
3093 ldc_down(ldc_handle_t handle)
3094 {
3095 	ldc_chan_t 	*ldcp;
3096 
3097 	if (handle == NULL) {
3098 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3099 		return (EINVAL);
3100 	}
3101 	ldcp = (ldc_chan_t *)handle;
3102 	mutex_enter(&ldcp->lock);
3103 	mutex_enter(&ldcp->tx_lock);
3104 	i_ldc_reset(ldcp, B_TRUE);
3105 	mutex_exit(&ldcp->tx_lock);
3106 	mutex_exit(&ldcp->lock);
3107 
3108 	return (0);
3109 }
3110 
3111 /*
3112  * Get the current channel status
3113  */
3114 int
3115 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3116 {
3117 	ldc_chan_t *ldcp;
3118 
3119 	if (handle == NULL || status == NULL) {
3120 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3121 		return (EINVAL);
3122 	}
3123 	ldcp = (ldc_chan_t *)handle;
3124 
3125 	*status = ((ldc_chan_t *)handle)->status;
3126 
3127 	D1(ldcp->id,
3128 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3129 	return (0);
3130 }
3131 
3132 
3133 /*
3134  * Set the channel's callback mode - enable/disable callbacks
3135  */
3136 int
3137 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3138 {
3139 	ldc_chan_t 	*ldcp;
3140 
3141 	if (handle == NULL) {
3142 		DWARN(DBG_ALL_LDCS,
3143 		    "ldc_set_intr_mode: invalid channel handle\n");
3144 		return (EINVAL);
3145 	}
3146 	ldcp = (ldc_chan_t *)handle;
3147 
3148 	/*
3149 	 * Record no callbacks should be invoked
3150 	 */
3151 	mutex_enter(&ldcp->lock);
3152 
3153 	switch (cmode) {
3154 	case LDC_CB_DISABLE:
3155 		if (!ldcp->cb_enabled) {
3156 			DWARN(ldcp->id,
3157 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3158 			    ldcp->id);
3159 			break;
3160 		}
3161 		ldcp->cb_enabled = B_FALSE;
3162 
3163 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3164 		    ldcp->id);
3165 		break;
3166 
3167 	case LDC_CB_ENABLE:
3168 		if (ldcp->cb_enabled) {
3169 			DWARN(ldcp->id,
3170 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3171 			    ldcp->id);
3172 			break;
3173 		}
3174 		ldcp->cb_enabled = B_TRUE;
3175 
3176 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3177 		    ldcp->id);
3178 		break;
3179 	}
3180 
3181 	mutex_exit(&ldcp->lock);
3182 
3183 	return (0);
3184 }
3185 
3186 /*
3187  * Check to see if there are packets on the incoming queue
3188  * Will return hasdata = B_FALSE if there are no packets
3189  */
3190 int
3191 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3192 {
3193 	int 		rv;
3194 	uint64_t 	rx_head, rx_tail;
3195 	ldc_chan_t 	*ldcp;
3196 
3197 	if (handle == NULL) {
3198 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3199 		return (EINVAL);
3200 	}
3201 	ldcp = (ldc_chan_t *)handle;
3202 
3203 	*hasdata = B_FALSE;
3204 
3205 	mutex_enter(&ldcp->lock);
3206 
3207 	if (ldcp->tstate != TS_UP) {
3208 		D1(ldcp->id,
3209 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3210 		mutex_exit(&ldcp->lock);
3211 		return (ECONNRESET);
3212 	}
3213 
3214 	/* Read packet(s) from the queue */
3215 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3216 	    &ldcp->link_state);
3217 	if (rv != 0) {
3218 		cmn_err(CE_WARN,
3219 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3220 		mutex_exit(&ldcp->lock);
3221 		return (EIO);
3222 	}
3223 	/* reset the channel state if the channel went down */
3224 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3225 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3226 		mutex_enter(&ldcp->tx_lock);
3227 		i_ldc_reset(ldcp, B_FALSE);
3228 		mutex_exit(&ldcp->tx_lock);
3229 		mutex_exit(&ldcp->lock);
3230 		return (ECONNRESET);
3231 	}
3232 
3233 	if ((rx_head != rx_tail) ||
3234 	    (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) {
3235 		D1(ldcp->id,
3236 		    "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n",
3237 		    ldcp->id);
3238 		*hasdata = B_TRUE;
3239 	}
3240 
3241 	mutex_exit(&ldcp->lock);
3242 
3243 	return (0);
3244 }
3245 
3246 
3247 /*
3248  * Read 'size' amount of bytes or less. If incoming buffer
3249  * is more than 'size', ENOBUFS is returned.
3250  *
3251  * On return, size contains the number of bytes read.
3252  */
3253 int
3254 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3255 {
3256 	ldc_chan_t 	*ldcp;
3257 	uint64_t 	rx_head = 0, rx_tail = 0;
3258 	int		rv = 0, exit_val;
3259 
3260 	if (handle == NULL) {
3261 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3262 		return (EINVAL);
3263 	}
3264 
3265 	ldcp = (ldc_chan_t *)handle;
3266 
3267 	/* channel lock */
3268 	mutex_enter(&ldcp->lock);
3269 
3270 	if (ldcp->tstate != TS_UP) {
3271 		DWARN(ldcp->id,
3272 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3273 		    ldcp->id);
3274 		exit_val = ECONNRESET;
3275 	} else {
3276 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3277 	}
3278 
3279 	/*
3280 	 * if queue has been drained - clear interrupt
3281 	 */
3282 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3283 	    &ldcp->link_state);
3284 	if (rv != 0) {
3285 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3286 		    ldcp->id);
3287 		mutex_enter(&ldcp->tx_lock);
3288 		i_ldc_reset(ldcp, B_TRUE);
3289 		mutex_exit(&ldcp->tx_lock);
3290 		mutex_exit(&ldcp->lock);
3291 		return (ECONNRESET);
3292 	}
3293 
3294 	if (exit_val == 0) {
3295 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3296 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3297 			mutex_enter(&ldcp->tx_lock);
3298 			i_ldc_reset(ldcp, B_FALSE);
3299 			exit_val = ECONNRESET;
3300 			mutex_exit(&ldcp->tx_lock);
3301 		}
3302 		if ((rv == 0) &&
3303 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3304 		    (rx_head == rx_tail)) {
3305 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3306 		}
3307 	}
3308 
3309 	mutex_exit(&ldcp->lock);
3310 	return (exit_val);
3311 }
3312 
3313 /*
3314  * Basic raw mondo read -
3315  * no interpretation of mondo contents at all.
3316  *
3317  * Enter and exit with ldcp->lock held by caller
3318  */
3319 static int
3320 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3321 {
3322 	uint64_t 	q_size_mask;
3323 	ldc_msg_t 	*msgp;
3324 	uint8_t		*msgbufp;
3325 	int		rv = 0, space;
3326 	uint64_t 	rx_head, rx_tail;
3327 
3328 	space = *sizep;
3329 
3330 	if (space < LDC_PAYLOAD_SIZE_RAW)
3331 		return (ENOBUFS);
3332 
3333 	ASSERT(mutex_owned(&ldcp->lock));
3334 
3335 	/* compute mask for increment */
3336 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3337 
3338 	/*
3339 	 * Read packet(s) from the queue
3340 	 */
3341 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3342 	    &ldcp->link_state);
3343 	if (rv != 0) {
3344 		cmn_err(CE_WARN,
3345 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3346 		    ldcp->id);
3347 		return (EIO);
3348 	}
3349 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3350 	    " rxt=0x%llx, st=0x%llx\n",
3351 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3352 
3353 	/* reset the channel state if the channel went down */
3354 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3355 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3356 		mutex_enter(&ldcp->tx_lock);
3357 		i_ldc_reset(ldcp, B_FALSE);
3358 		mutex_exit(&ldcp->tx_lock);
3359 		return (ECONNRESET);
3360 	}
3361 
3362 	/*
3363 	 * Check for empty queue
3364 	 */
3365 	if (rx_head == rx_tail) {
3366 		*sizep = 0;
3367 		return (0);
3368 	}
3369 
3370 	/* get the message */
3371 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3372 
3373 	/* if channel is in RAW mode, copy data and return */
3374 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3375 
3376 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3377 
3378 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3379 
3380 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3381 
3382 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3383 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3384 
3385 	return (rv);
3386 }
3387 
3388 /*
3389  * Process LDC mondos to build larger packets
3390  * with either un-reliable or reliable delivery.
3391  *
3392  * Enter and exit with ldcp->lock held by caller
3393  */
3394 static int
3395 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3396 {
3397 	int		rv = 0;
3398 	uint64_t 	rx_head = 0, rx_tail = 0;
3399 	uint64_t 	curr_head = 0;
3400 	ldc_msg_t 	*msg;
3401 	caddr_t 	target;
3402 	size_t 		len = 0, bytes_read = 0;
3403 	int 		retries = 0;
3404 	uint64_t 	q_size_mask;
3405 	uint64_t	first_fragment = 0;
3406 
3407 	target = target_bufp;
3408 
3409 	ASSERT(mutex_owned(&ldcp->lock));
3410 
3411 	/* check if the buffer and size are valid */
3412 	if (target_bufp == NULL || *sizep == 0) {
3413 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3414 		    ldcp->id);
3415 		return (EINVAL);
3416 	}
3417 
3418 	/* compute mask for increment */
3419 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3420 
3421 	/*
3422 	 * Read packet(s) from the queue
3423 	 */
3424 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3425 	    &ldcp->link_state);
3426 	if (rv != 0) {
3427 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3428 		    ldcp->id);
3429 		mutex_enter(&ldcp->tx_lock);
3430 		i_ldc_reset(ldcp, B_TRUE);
3431 		mutex_exit(&ldcp->tx_lock);
3432 		return (ECONNRESET);
3433 	}
3434 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3435 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3436 
3437 	/* reset the channel state if the channel went down */
3438 	if (ldcp->link_state != LDC_CHANNEL_UP)
3439 		goto channel_is_reset;
3440 
3441 	for (;;) {
3442 
3443 		if (curr_head == rx_tail) {
3444 			rv = hv_ldc_rx_get_state(ldcp->id,
3445 			    &rx_head, &rx_tail, &ldcp->link_state);
3446 			if (rv != 0) {
3447 				cmn_err(CE_WARN,
3448 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3449 				    ldcp->id);
3450 				mutex_enter(&ldcp->tx_lock);
3451 				i_ldc_reset(ldcp, B_TRUE);
3452 				mutex_exit(&ldcp->tx_lock);
3453 				return (ECONNRESET);
3454 			}
3455 			if (ldcp->link_state != LDC_CHANNEL_UP)
3456 				goto channel_is_reset;
3457 
3458 			if (curr_head == rx_tail) {
3459 
3460 				/* If in the middle of a fragmented xfer */
3461 				if (first_fragment != 0) {
3462 
3463 					/* wait for ldc_delay usecs */
3464 					drv_usecwait(ldc_delay);
3465 
3466 					if (++retries < ldc_max_retries)
3467 						continue;
3468 
3469 					*sizep = 0;
3470 					ldcp->last_msg_rcd = first_fragment - 1;
3471 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3472 					    "(0x%llx) read timeout", ldcp->id);
3473 					return (EAGAIN);
3474 				}
3475 				*sizep = 0;
3476 				break;
3477 			}
3478 		}
3479 		retries = 0;
3480 
3481 		D2(ldcp->id,
3482 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3483 		    ldcp->id, curr_head, rx_head, rx_tail);
3484 
3485 		/* get the message */
3486 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3487 
3488 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3489 		    ldcp->rx_q_va + curr_head);
3490 
3491 		/* Check the message ID for the message received */
3492 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3493 
3494 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3495 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3496 
3497 			/* throw away data */
3498 			bytes_read = 0;
3499 
3500 			/* Reset last_msg_rcd to start of message */
3501 			if (first_fragment != 0) {
3502 				ldcp->last_msg_rcd = first_fragment - 1;
3503 				first_fragment = 0;
3504 			}
3505 			/*
3506 			 * Send a NACK -- invalid seqid
3507 			 * get the current tail for the response
3508 			 */
3509 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3510 			    (msg->ctrl & LDC_CTRL_MASK));
3511 			if (rv) {
3512 				cmn_err(CE_NOTE,
3513 				    "ldc_read: (0x%lx) err sending "
3514 				    "NACK msg\n", ldcp->id);
3515 
3516 				/* if cannot send NACK - reset channel */
3517 				mutex_enter(&ldcp->tx_lock);
3518 				i_ldc_reset(ldcp, B_FALSE);
3519 				mutex_exit(&ldcp->tx_lock);
3520 				rv = ECONNRESET;
3521 				break;
3522 			}
3523 
3524 			/* purge receive queue */
3525 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3526 
3527 			break;
3528 		}
3529 
3530 		/*
3531 		 * Process any messages of type CTRL messages
3532 		 * Future implementations should try to pass these
3533 		 * to LDC link by resetting the intr state.
3534 		 *
3535 		 * NOTE: not done as a switch() as type can be both ctrl+data
3536 		 */
3537 		if (msg->type & LDC_CTRL) {
3538 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3539 				if (rv == EAGAIN)
3540 					continue;
3541 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3542 				*sizep = 0;
3543 				bytes_read = 0;
3544 				break;
3545 			}
3546 		}
3547 
3548 		/* process data ACKs */
3549 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3550 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3551 				*sizep = 0;
3552 				bytes_read = 0;
3553 				break;
3554 			}
3555 		}
3556 
3557 		/* process data NACKs */
3558 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
3559 			DWARN(ldcp->id,
3560 			    "ldc_read: (0x%llx) received DATA/NACK", ldcp->id);
3561 			mutex_enter(&ldcp->tx_lock);
3562 			i_ldc_reset(ldcp, B_TRUE);
3563 			mutex_exit(&ldcp->tx_lock);
3564 			return (ECONNRESET);
3565 		}
3566 
3567 		/* process data messages */
3568 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3569 
3570 			uint8_t *msgbuf = (uint8_t *)(
3571 			    (ldcp->mode == LDC_MODE_RELIABLE ||
3572 			    ldcp->mode == LDC_MODE_STREAM) ?
3573 			    msg->rdata : msg->udata);
3574 
3575 			D2(ldcp->id,
3576 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3577 
3578 			/* get the packet length */
3579 			len = (msg->env & LDC_LEN_MASK);
3580 
3581 				/*
3582 				 * FUTURE OPTIMIZATION:
3583 				 * dont need to set q head for every
3584 				 * packet we read just need to do this when
3585 				 * we are done or need to wait for more
3586 				 * mondos to make a full packet - this is
3587 				 * currently expensive.
3588 				 */
3589 
3590 			if (first_fragment == 0) {
3591 
3592 				/*
3593 				 * first packets should always have the start
3594 				 * bit set (even for a single packet). If not
3595 				 * throw away the packet
3596 				 */
3597 				if (!(msg->env & LDC_FRAG_START)) {
3598 
3599 					DWARN(DBG_ALL_LDCS,
3600 					    "ldc_read: (0x%llx) not start - "
3601 					    "frag=%x\n", ldcp->id,
3602 					    (msg->env) & LDC_FRAG_MASK);
3603 
3604 					/* toss pkt, inc head, cont reading */
3605 					bytes_read = 0;
3606 					target = target_bufp;
3607 					curr_head =
3608 					    (curr_head + LDC_PACKET_SIZE)
3609 					    & q_size_mask;
3610 					if (rv = i_ldc_set_rx_head(ldcp,
3611 					    curr_head))
3612 						break;
3613 
3614 					continue;
3615 				}
3616 
3617 				first_fragment = msg->seqid;
3618 			} else {
3619 				/* check to see if this is a pkt w/ START bit */
3620 				if (msg->env & LDC_FRAG_START) {
3621 					DWARN(DBG_ALL_LDCS,
3622 					    "ldc_read:(0x%llx) unexpected pkt"
3623 					    " env=0x%x discarding %d bytes,"
3624 					    " lastmsg=%d, currentmsg=%d\n",
3625 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3626 					    bytes_read, ldcp->last_msg_rcd,
3627 					    msg->seqid);
3628 
3629 					/* throw data we have read so far */
3630 					bytes_read = 0;
3631 					target = target_bufp;
3632 					first_fragment = msg->seqid;
3633 
3634 					if (rv = i_ldc_set_rx_head(ldcp,
3635 					    curr_head))
3636 						break;
3637 				}
3638 			}
3639 
3640 			/* copy (next) pkt into buffer */
3641 			if (len <= (*sizep - bytes_read)) {
3642 				bcopy(msgbuf, target, len);
3643 				target += len;
3644 				bytes_read += len;
3645 			} else {
3646 				/*
3647 				 * there is not enough space in the buffer to
3648 				 * read this pkt. throw message away & continue
3649 				 * reading data from queue
3650 				 */
3651 				DWARN(DBG_ALL_LDCS,
3652 				    "ldc_read: (0x%llx) buffer too small, "
3653 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3654 				    curr_head, *sizep, bytes_read+len);
3655 
3656 				first_fragment = 0;
3657 				target = target_bufp;
3658 				bytes_read = 0;
3659 
3660 				/* throw away everything received so far */
3661 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3662 					break;
3663 
3664 				/* continue reading remaining pkts */
3665 				continue;
3666 			}
3667 		}
3668 
3669 		/* set the message id */
3670 		ldcp->last_msg_rcd = msg->seqid;
3671 
3672 		/* move the head one position */
3673 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3674 
3675 		if (msg->env & LDC_FRAG_STOP) {
3676 
3677 			/*
3678 			 * All pkts that are part of this fragmented transfer
3679 			 * have been read or this was a single pkt read
3680 			 * or there was an error
3681 			 */
3682 
3683 			/* set the queue head */
3684 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3685 				bytes_read = 0;
3686 
3687 			*sizep = bytes_read;
3688 
3689 			break;
3690 		}
3691 
3692 		/* advance head if it is a CTRL packet or a DATA ACK packet */
3693 		if ((msg->type & LDC_CTRL) ||
3694 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
3695 
3696 			/* set the queue head */
3697 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3698 				bytes_read = 0;
3699 				break;
3700 			}
3701 
3702 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3703 			    ldcp->id, curr_head);
3704 		}
3705 
3706 	} /* for (;;) */
3707 
3708 
3709 	/*
3710 	 * If useful data was read - Send msg ACK
3711 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3712 	 */
3713 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3714 	    ldcp->mode == LDC_MODE_STREAM)) {
3715 
3716 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3717 		if (rv && rv != EWOULDBLOCK) {
3718 			cmn_err(CE_NOTE,
3719 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3720 
3721 			/* if cannot send ACK - reset channel */
3722 			goto channel_is_reset;
3723 		}
3724 	}
3725 
3726 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3727 
3728 	return (rv);
3729 
3730 channel_is_reset:
3731 	mutex_enter(&ldcp->tx_lock);
3732 	i_ldc_reset(ldcp, B_FALSE);
3733 	mutex_exit(&ldcp->tx_lock);
3734 	return (ECONNRESET);
3735 }
3736 
3737 /*
3738  * Use underlying reliable packet mechanism to fetch
3739  * and buffer incoming packets so we can hand them back as
3740  * a basic byte stream.
3741  *
3742  * Enter and exit with ldcp->lock held by caller
3743  */
3744 static int
3745 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3746 {
3747 	int	rv;
3748 	size_t	size;
3749 
3750 	ASSERT(mutex_owned(&ldcp->lock));
3751 
3752 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3753 	    ldcp->id, *sizep);
3754 
3755 	if (ldcp->stream_remains == 0) {
3756 		size = ldcp->mtu;
3757 		rv = i_ldc_read_packet(ldcp,
3758 		    (caddr_t)ldcp->stream_bufferp, &size);
3759 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3760 		    ldcp->id, size);
3761 
3762 		if (rv != 0)
3763 			return (rv);
3764 
3765 		ldcp->stream_remains = size;
3766 		ldcp->stream_offset = 0;
3767 	}
3768 
3769 	size = MIN(ldcp->stream_remains, *sizep);
3770 
3771 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3772 	ldcp->stream_offset += size;
3773 	ldcp->stream_remains -= size;
3774 
3775 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3776 	    ldcp->id, size);
3777 
3778 	*sizep = size;
3779 	return (0);
3780 }
3781 
3782 /*
3783  * Write specified amount of bytes to the channel
3784  * in multiple pkts of pkt_payload size. Each
3785  * packet is tagged with an unique packet ID in
3786  * the case of a reliable link.
3787  *
3788  * On return, size contains the number of bytes written.
3789  */
3790 int
3791 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3792 {
3793 	ldc_chan_t	*ldcp;
3794 	int		rv = 0;
3795 
3796 	if (handle == NULL) {
3797 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3798 		return (EINVAL);
3799 	}
3800 	ldcp = (ldc_chan_t *)handle;
3801 
3802 	/* check if writes can occur */
3803 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3804 		/*
3805 		 * Could not get the lock - channel could
3806 		 * be in the process of being unconfigured
3807 		 * or reader has encountered an error
3808 		 */
3809 		return (EAGAIN);
3810 	}
3811 
3812 	/* check if non-zero data to write */
3813 	if (buf == NULL || sizep == NULL) {
3814 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3815 		    ldcp->id);
3816 		mutex_exit(&ldcp->tx_lock);
3817 		return (EINVAL);
3818 	}
3819 
3820 	if (*sizep == 0) {
3821 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3822 		    ldcp->id);
3823 		mutex_exit(&ldcp->tx_lock);
3824 		return (0);
3825 	}
3826 
3827 	/* Check if channel is UP for data exchange */
3828 	if (ldcp->tstate != TS_UP) {
3829 		DWARN(ldcp->id,
3830 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3831 		    ldcp->id);
3832 		*sizep = 0;
3833 		rv = ECONNRESET;
3834 	} else {
3835 		rv = ldcp->write_p(ldcp, buf, sizep);
3836 	}
3837 
3838 	mutex_exit(&ldcp->tx_lock);
3839 
3840 	return (rv);
3841 }
3842 
3843 /*
3844  * Write a raw packet to the channel
3845  * On return, size contains the number of bytes written.
3846  */
3847 static int
3848 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3849 {
3850 	ldc_msg_t 	*ldcmsg;
3851 	uint64_t 	tx_head, tx_tail, new_tail;
3852 	int		rv = 0;
3853 	size_t		size;
3854 
3855 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3856 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3857 
3858 	size = *sizep;
3859 
3860 	/*
3861 	 * Check to see if the packet size is less than or
3862 	 * equal to packet size support in raw mode
3863 	 */
3864 	if (size > ldcp->pkt_payload) {
3865 		DWARN(ldcp->id,
3866 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3867 		    ldcp->id, *sizep);
3868 		*sizep = 0;
3869 		return (EMSGSIZE);
3870 	}
3871 
3872 	/* get the qptrs for the tx queue */
3873 	rv = hv_ldc_tx_get_state(ldcp->id,
3874 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3875 	if (rv != 0) {
3876 		cmn_err(CE_WARN,
3877 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3878 		*sizep = 0;
3879 		return (EIO);
3880 	}
3881 
3882 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3883 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3884 		DWARN(ldcp->id,
3885 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3886 
3887 		*sizep = 0;
3888 		if (mutex_tryenter(&ldcp->lock)) {
3889 			i_ldc_reset(ldcp, B_FALSE);
3890 			mutex_exit(&ldcp->lock);
3891 		} else {
3892 			/*
3893 			 * Release Tx lock, and then reacquire channel
3894 			 * and Tx lock in correct order
3895 			 */
3896 			mutex_exit(&ldcp->tx_lock);
3897 			mutex_enter(&ldcp->lock);
3898 			mutex_enter(&ldcp->tx_lock);
3899 			i_ldc_reset(ldcp, B_FALSE);
3900 			mutex_exit(&ldcp->lock);
3901 		}
3902 		return (ECONNRESET);
3903 	}
3904 
3905 	tx_tail = ldcp->tx_tail;
3906 	tx_head = ldcp->tx_head;
3907 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3908 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3909 
3910 	if (new_tail == tx_head) {
3911 		DWARN(DBG_ALL_LDCS,
3912 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3913 		*sizep = 0;
3914 		return (EWOULDBLOCK);
3915 	}
3916 
3917 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3918 	    ldcp->id, size);
3919 
3920 	/* Send the data now */
3921 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3922 
3923 	/* copy the data into pkt */
3924 	bcopy((uint8_t *)buf, ldcmsg, size);
3925 
3926 	/* increment tail */
3927 	tx_tail = new_tail;
3928 
3929 	/*
3930 	 * All packets have been copied into the TX queue
3931 	 * update the tail ptr in the HV
3932 	 */
3933 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3934 	if (rv) {
3935 		if (rv == EWOULDBLOCK) {
3936 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3937 			    ldcp->id);
3938 			*sizep = 0;
3939 			return (EWOULDBLOCK);
3940 		}
3941 
3942 		*sizep = 0;
3943 		if (mutex_tryenter(&ldcp->lock)) {
3944 			i_ldc_reset(ldcp, B_FALSE);
3945 			mutex_exit(&ldcp->lock);
3946 		} else {
3947 			/*
3948 			 * Release Tx lock, and then reacquire channel
3949 			 * and Tx lock in correct order
3950 			 */
3951 			mutex_exit(&ldcp->tx_lock);
3952 			mutex_enter(&ldcp->lock);
3953 			mutex_enter(&ldcp->tx_lock);
3954 			i_ldc_reset(ldcp, B_FALSE);
3955 			mutex_exit(&ldcp->lock);
3956 		}
3957 		return (ECONNRESET);
3958 	}
3959 
3960 	ldcp->tx_tail = tx_tail;
3961 	*sizep = size;
3962 
3963 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3964 
3965 	return (rv);
3966 }
3967 
3968 
3969 /*
3970  * Write specified amount of bytes to the channel
3971  * in multiple pkts of pkt_payload size. Each
3972  * packet is tagged with an unique packet ID in
3973  * the case of a reliable link.
3974  *
3975  * On return, size contains the number of bytes written.
3976  * This function needs to ensure that the write size is < MTU size
3977  */
3978 static int
3979 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3980 {
3981 	ldc_msg_t 	*ldcmsg;
3982 	uint64_t 	tx_head, tx_tail, new_tail, start;
3983 	uint64_t	txq_size_mask, numavail;
3984 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3985 	size_t 		len, bytes_written = 0, remaining;
3986 	int		rv;
3987 	uint32_t	curr_seqid;
3988 
3989 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3990 
3991 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3992 	    ldcp->mode == LDC_MODE_UNRELIABLE ||
3993 	    ldcp->mode == LDC_MODE_STREAM);
3994 
3995 	/* compute mask for increment */
3996 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3997 
3998 	/* get the qptrs for the tx queue */
3999 	rv = hv_ldc_tx_get_state(ldcp->id,
4000 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4001 	if (rv != 0) {
4002 		cmn_err(CE_WARN,
4003 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4004 		*size = 0;
4005 		return (EIO);
4006 	}
4007 
4008 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4009 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4010 		DWARN(ldcp->id,
4011 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4012 		*size = 0;
4013 		if (mutex_tryenter(&ldcp->lock)) {
4014 			i_ldc_reset(ldcp, B_FALSE);
4015 			mutex_exit(&ldcp->lock);
4016 		} else {
4017 			/*
4018 			 * Release Tx lock, and then reacquire channel
4019 			 * and Tx lock in correct order
4020 			 */
4021 			mutex_exit(&ldcp->tx_lock);
4022 			mutex_enter(&ldcp->lock);
4023 			mutex_enter(&ldcp->tx_lock);
4024 			i_ldc_reset(ldcp, B_FALSE);
4025 			mutex_exit(&ldcp->lock);
4026 		}
4027 		return (ECONNRESET);
4028 	}
4029 
4030 	tx_tail = ldcp->tx_tail;
4031 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4032 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4033 
4034 	/*
4035 	 * Check to see if the queue is full. The check is done using
4036 	 * the appropriate head based on the link mode.
4037 	 */
4038 	i_ldc_get_tx_head(ldcp, &tx_head);
4039 
4040 	if (new_tail == tx_head) {
4041 		DWARN(DBG_ALL_LDCS,
4042 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4043 		*size = 0;
4044 		return (EWOULDBLOCK);
4045 	}
4046 
4047 	/*
4048 	 * Make sure that the LDC Tx queue has enough space
4049 	 */
4050 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4051 	    + ldcp->tx_q_entries - 1;
4052 	numavail %= ldcp->tx_q_entries;
4053 
4054 	if (*size > (numavail * ldcp->pkt_payload)) {
4055 		DWARN(DBG_ALL_LDCS,
4056 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4057 		return (EWOULDBLOCK);
4058 	}
4059 
4060 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4061 	    ldcp->id, *size);
4062 
4063 	/* Send the data now */
4064 	bytes_written = 0;
4065 	curr_seqid = ldcp->last_msg_snt;
4066 	start = tx_tail;
4067 
4068 	while (*size > bytes_written) {
4069 
4070 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4071 
4072 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
4073 		    ldcp->mode == LDC_MODE_STREAM) ?
4074 		    ldcmsg->rdata : ldcmsg->udata);
4075 
4076 		ldcmsg->type = LDC_DATA;
4077 		ldcmsg->stype = LDC_INFO;
4078 		ldcmsg->ctrl = 0;
4079 
4080 		remaining = *size - bytes_written;
4081 		len = min(ldcp->pkt_payload, remaining);
4082 		ldcmsg->env = (uint8_t)len;
4083 
4084 		curr_seqid++;
4085 		ldcmsg->seqid = curr_seqid;
4086 
4087 		/* copy the data into pkt */
4088 		bcopy(source, msgbuf, len);
4089 
4090 		source += len;
4091 		bytes_written += len;
4092 
4093 		/* increment tail */
4094 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4095 
4096 		ASSERT(tx_tail != tx_head);
4097 	}
4098 
4099 	/* Set the start and stop bits */
4100 	ldcmsg->env |= LDC_FRAG_STOP;
4101 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4102 	ldcmsg->env |= LDC_FRAG_START;
4103 
4104 	/*
4105 	 * All packets have been copied into the TX queue
4106 	 * update the tail ptr in the HV
4107 	 */
4108 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4109 	if (rv == 0) {
4110 		ldcp->tx_tail = tx_tail;
4111 		ldcp->last_msg_snt = curr_seqid;
4112 		*size = bytes_written;
4113 	} else {
4114 		int rv2;
4115 
4116 		if (rv != EWOULDBLOCK) {
4117 			*size = 0;
4118 			if (mutex_tryenter(&ldcp->lock)) {
4119 				i_ldc_reset(ldcp, B_FALSE);
4120 				mutex_exit(&ldcp->lock);
4121 			} else {
4122 				/*
4123 				 * Release Tx lock, and then reacquire channel
4124 				 * and Tx lock in correct order
4125 				 */
4126 				mutex_exit(&ldcp->tx_lock);
4127 				mutex_enter(&ldcp->lock);
4128 				mutex_enter(&ldcp->tx_lock);
4129 				i_ldc_reset(ldcp, B_FALSE);
4130 				mutex_exit(&ldcp->lock);
4131 			}
4132 			return (ECONNRESET);
4133 		}
4134 
4135 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4136 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4137 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4138 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4139 
4140 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4141 		    &tx_head, &tx_tail, &ldcp->link_state);
4142 
4143 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4144 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4145 		    rv2, tx_head, tx_tail, ldcp->link_state);
4146 
4147 		*size = 0;
4148 	}
4149 
4150 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4151 
4152 	return (rv);
4153 }
4154 
4155 /*
4156  * Write specified amount of bytes to the channel
4157  * in multiple pkts of pkt_payload size. Each
4158  * packet is tagged with an unique packet ID in
4159  * the case of a reliable link.
4160  *
4161  * On return, size contains the number of bytes written.
4162  * This function needs to ensure that the write size is < MTU size
4163  */
4164 static int
4165 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4166 {
4167 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4168 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
4169 
4170 	/* Truncate packet to max of MTU size */
4171 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4172 	return (i_ldc_write_packet(ldcp, buf, sizep));
4173 }
4174 
4175 
4176 /*
4177  * Interfaces for channel nexus to register/unregister with LDC module
4178  * The nexus will register functions to be used to register individual
4179  * channels with the nexus and enable interrupts for the channels
4180  */
4181 int
4182 ldc_register(ldc_cnex_t *cinfo)
4183 {
4184 	ldc_chan_t	*ldcp;
4185 
4186 	if (cinfo == NULL || cinfo->dip == NULL ||
4187 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4188 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4189 	    cinfo->clr_intr == NULL) {
4190 
4191 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4192 		return (EINVAL);
4193 	}
4194 
4195 	mutex_enter(&ldcssp->lock);
4196 
4197 	/* nexus registration */
4198 	ldcssp->cinfo.dip = cinfo->dip;
4199 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4200 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4201 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4202 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4203 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4204 
4205 	/* register any channels that might have been previously initialized */
4206 	ldcp = ldcssp->chan_list;
4207 	while (ldcp) {
4208 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4209 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4210 			(void) i_ldc_register_channel(ldcp);
4211 
4212 		ldcp = ldcp->next;
4213 	}
4214 
4215 	mutex_exit(&ldcssp->lock);
4216 
4217 	return (0);
4218 }
4219 
4220 int
4221 ldc_unregister(ldc_cnex_t *cinfo)
4222 {
4223 	if (cinfo == NULL || cinfo->dip == NULL) {
4224 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4225 		return (EINVAL);
4226 	}
4227 
4228 	mutex_enter(&ldcssp->lock);
4229 
4230 	if (cinfo->dip != ldcssp->cinfo.dip) {
4231 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4232 		mutex_exit(&ldcssp->lock);
4233 		return (EINVAL);
4234 	}
4235 
4236 	/* nexus unregister */
4237 	ldcssp->cinfo.dip = NULL;
4238 	ldcssp->cinfo.reg_chan = NULL;
4239 	ldcssp->cinfo.unreg_chan = NULL;
4240 	ldcssp->cinfo.add_intr = NULL;
4241 	ldcssp->cinfo.rem_intr = NULL;
4242 	ldcssp->cinfo.clr_intr = NULL;
4243 
4244 	mutex_exit(&ldcssp->lock);
4245 
4246 	return (0);
4247 }
4248 
4249 
4250 /* ------------------------------------------------------------------------- */
4251 
4252 /*
4253  * Allocate a memory handle for the channel and link it into the list
4254  * Also choose which memory table to use if this is the first handle
4255  * being assigned to this channel
4256  */
4257 int
4258 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
4259 {
4260 	ldc_chan_t 	*ldcp;
4261 	ldc_mhdl_t	*mhdl;
4262 
4263 	if (handle == NULL) {
4264 		DWARN(DBG_ALL_LDCS,
4265 		    "ldc_mem_alloc_handle: invalid channel handle\n");
4266 		return (EINVAL);
4267 	}
4268 	ldcp = (ldc_chan_t *)handle;
4269 
4270 	mutex_enter(&ldcp->lock);
4271 
4272 	/* check to see if channel is initalized */
4273 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4274 		DWARN(ldcp->id,
4275 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
4276 		    ldcp->id);
4277 		mutex_exit(&ldcp->lock);
4278 		return (EINVAL);
4279 	}
4280 
4281 	/* allocate handle for channel */
4282 	mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP);
4283 
4284 	/* initialize the lock */
4285 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
4286 
4287 	mhdl->myshadow = B_FALSE;
4288 	mhdl->memseg = NULL;
4289 	mhdl->ldcp = ldcp;
4290 	mhdl->status = LDC_UNBOUND;
4291 
4292 	/* insert memory handle (@ head) into list */
4293 	if (ldcp->mhdl_list == NULL) {
4294 		ldcp->mhdl_list = mhdl;
4295 		mhdl->next = NULL;
4296 	} else {
4297 		/* insert @ head */
4298 		mhdl->next = ldcp->mhdl_list;
4299 		ldcp->mhdl_list = mhdl;
4300 	}
4301 
4302 	/* return the handle */
4303 	*mhandle = (ldc_mem_handle_t)mhdl;
4304 
4305 	mutex_exit(&ldcp->lock);
4306 
4307 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4308 	    ldcp->id, mhdl);
4309 
4310 	return (0);
4311 }
4312 
4313 /*
4314  * Free memory handle for the channel and unlink it from the list
4315  */
4316 int
4317 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4318 {
4319 	ldc_mhdl_t 	*mhdl, *phdl;
4320 	ldc_chan_t 	*ldcp;
4321 
4322 	if (mhandle == NULL) {
4323 		DWARN(DBG_ALL_LDCS,
4324 		    "ldc_mem_free_handle: invalid memory handle\n");
4325 		return (EINVAL);
4326 	}
4327 	mhdl = (ldc_mhdl_t *)mhandle;
4328 
4329 	mutex_enter(&mhdl->lock);
4330 
4331 	ldcp = mhdl->ldcp;
4332 
4333 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4334 		DWARN(ldcp->id,
4335 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4336 		    mhdl);
4337 		mutex_exit(&mhdl->lock);
4338 		return (EINVAL);
4339 	}
4340 	mutex_exit(&mhdl->lock);
4341 
4342 	mutex_enter(&ldcp->mlist_lock);
4343 
4344 	phdl = ldcp->mhdl_list;
4345 
4346 	/* first handle */
4347 	if (phdl == mhdl) {
4348 		ldcp->mhdl_list = mhdl->next;
4349 		mutex_destroy(&mhdl->lock);
4350 		kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4351 
4352 		D1(ldcp->id,
4353 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4354 		    ldcp->id, mhdl);
4355 	} else {
4356 		/* walk the list - unlink and free */
4357 		while (phdl != NULL) {
4358 			if (phdl->next == mhdl) {
4359 				phdl->next = mhdl->next;
4360 				mutex_destroy(&mhdl->lock);
4361 				kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4362 				D1(ldcp->id,
4363 				    "ldc_mem_free_handle: (0x%llx) freed "
4364 				    "handle 0x%llx\n", ldcp->id, mhdl);
4365 				break;
4366 			}
4367 			phdl = phdl->next;
4368 		}
4369 	}
4370 
4371 	if (phdl == NULL) {
4372 		DWARN(ldcp->id,
4373 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4374 		mutex_exit(&ldcp->mlist_lock);
4375 		return (EINVAL);
4376 	}
4377 
4378 	mutex_exit(&ldcp->mlist_lock);
4379 
4380 	return (0);
4381 }
4382 
4383 /*
4384  * Bind a memory handle to a virtual address.
4385  * The virtual address is converted to the corresponding real addresses.
4386  * Returns pointer to the first ldc_mem_cookie and the total number
4387  * of cookies for this virtual address. Other cookies can be obtained
4388  * using the ldc_mem_nextcookie() call. If the pages are stored in
4389  * consecutive locations in the table, a single cookie corresponding to
4390  * the first location is returned. The cookie size spans all the entries.
4391  *
4392  * If the VA corresponds to a page that is already being exported, reuse
4393  * the page and do not export it again. Bump the page's use count.
4394  */
4395 int
4396 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4397     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4398 {
4399 	ldc_mhdl_t	*mhdl;
4400 	ldc_chan_t 	*ldcp;
4401 	ldc_mtbl_t	*mtbl;
4402 	ldc_memseg_t	*memseg;
4403 	ldc_mte_t	tmp_mte;
4404 	uint64_t	index, prev_index = 0;
4405 	int64_t		cookie_idx;
4406 	uintptr_t	raddr, ra_aligned;
4407 	uint64_t	psize, poffset, v_offset;
4408 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4409 	pgcnt_t		npages;
4410 	caddr_t		v_align, addr;
4411 	int 		i, rv;
4412 
4413 	if (mhandle == NULL) {
4414 		DWARN(DBG_ALL_LDCS,
4415 		    "ldc_mem_bind_handle: invalid memory handle\n");
4416 		return (EINVAL);
4417 	}
4418 	mhdl = (ldc_mhdl_t *)mhandle;
4419 	ldcp = mhdl->ldcp;
4420 
4421 	/* clear count */
4422 	*ccount = 0;
4423 
4424 	mutex_enter(&mhdl->lock);
4425 
4426 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4427 		DWARN(ldcp->id,
4428 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4429 		    mhandle);
4430 		mutex_exit(&mhdl->lock);
4431 		return (EINVAL);
4432 	}
4433 
4434 	/* Force address and size to be 8-byte aligned */
4435 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4436 		DWARN(ldcp->id,
4437 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4438 		mutex_exit(&mhdl->lock);
4439 		return (EINVAL);
4440 	}
4441 
4442 	/*
4443 	 * If this channel is binding a memory handle for the
4444 	 * first time allocate it a memory map table and initialize it
4445 	 */
4446 	if ((mtbl = ldcp->mtbl) == NULL) {
4447 
4448 		mutex_enter(&ldcp->lock);
4449 
4450 		/* Allocate and initialize the map table structure */
4451 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
4452 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
4453 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
4454 		mtbl->next_entry = NULL;
4455 		mtbl->contigmem = B_TRUE;
4456 
4457 		/* Allocate the table itself */
4458 		mtbl->table = (ldc_mte_slot_t *)
4459 		    contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
4460 		if (mtbl->table == NULL) {
4461 
4462 			/* allocate a page of memory using kmem_alloc */
4463 			mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP);
4464 			mtbl->size = MMU_PAGESIZE;
4465 			mtbl->contigmem = B_FALSE;
4466 			mtbl->num_entries = mtbl->num_avail =
4467 			    mtbl->size / sizeof (ldc_mte_slot_t);
4468 			DWARN(ldcp->id,
4469 			    "ldc_mem_bind_handle: (0x%llx) reduced tbl size "
4470 			    "to %lx entries\n", ldcp->id, mtbl->num_entries);
4471 		}
4472 
4473 		/* zero out the memory */
4474 		bzero(mtbl->table, mtbl->size);
4475 
4476 		/* initialize the lock */
4477 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
4478 
4479 		/* register table for this channel */
4480 		rv = hv_ldc_set_map_table(ldcp->id,
4481 		    va_to_pa(mtbl->table), mtbl->num_entries);
4482 		if (rv != 0) {
4483 			cmn_err(CE_WARN,
4484 			    "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
4485 			    ldcp->id, rv);
4486 			if (mtbl->contigmem)
4487 				contig_mem_free(mtbl->table, mtbl->size);
4488 			else
4489 				kmem_free(mtbl->table, mtbl->size);
4490 			mutex_destroy(&mtbl->lock);
4491 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4492 			mutex_exit(&ldcp->lock);
4493 			mutex_exit(&mhdl->lock);
4494 			return (EIO);
4495 		}
4496 
4497 		ldcp->mtbl = mtbl;
4498 		mutex_exit(&ldcp->lock);
4499 
4500 		D1(ldcp->id,
4501 		    "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
4502 		    ldcp->id, ldcp->mtbl->table);
4503 	}
4504 
4505 	/* FUTURE: get the page size, pgsz code, and shift */
4506 	pg_size = MMU_PAGESIZE;
4507 	pg_size_code = page_szc(pg_size);
4508 	pg_shift = page_get_shift(pg_size_code);
4509 	pg_mask = ~(pg_size - 1);
4510 
4511 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4512 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4513 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4514 
4515 	/* aligned VA and its offset */
4516 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4517 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4518 
4519 	npages = (len+v_offset)/pg_size;
4520 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4521 
4522 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4523 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4524 	    ldcp->id, vaddr, v_align, v_offset, npages);
4525 
4526 	/* lock the memory table - exclusive access to channel */
4527 	mutex_enter(&mtbl->lock);
4528 
4529 	if (npages > mtbl->num_avail) {
4530 		D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4531 		    ldcp->id);
4532 		mutex_exit(&mtbl->lock);
4533 		mutex_exit(&mhdl->lock);
4534 		return (ENOMEM);
4535 	}
4536 
4537 	/* Allocate a memseg structure */
4538 	memseg = mhdl->memseg =
4539 	    kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
4540 
4541 	/* Allocate memory to store all pages and cookies */
4542 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4543 	memseg->cookies =
4544 	    kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4545 
4546 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4547 	    ldcp->id, npages);
4548 
4549 	addr = v_align;
4550 
4551 	/*
4552 	 * Check if direct shared memory map is enabled, if not change
4553 	 * the mapping type to include SHADOW_MAP.
4554 	 */
4555 	if (ldc_shmem_enabled == 0)
4556 		mtype = LDC_SHADOW_MAP;
4557 
4558 	/*
4559 	 * Table slots are used in a round-robin manner. The algorithm permits
4560 	 * inserting duplicate entries. Slots allocated earlier will typically
4561 	 * get freed before we get back to reusing the slot.Inserting duplicate
4562 	 * entries should be OK as we only lookup entries using the cookie addr
4563 	 * i.e. tbl index, during export, unexport and copy operation.
4564 	 *
4565 	 * One implementation what was tried was to search for a duplicate
4566 	 * page entry first and reuse it. The search overhead is very high and
4567 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4568 	 * So it does make sense to avoid searching for duplicates.
4569 	 *
4570 	 * But during the process of searching for a free slot, if we find a
4571 	 * duplicate entry we will go ahead and use it, and bump its use count.
4572 	 */
4573 
4574 	/* index to start searching from */
4575 	index = mtbl->next_entry;
4576 	cookie_idx = -1;
4577 
4578 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4579 
4580 	if (mtype & LDC_DIRECT_MAP) {
4581 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4582 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4583 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4584 	}
4585 
4586 	if (mtype & LDC_SHADOW_MAP) {
4587 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4588 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4589 	}
4590 
4591 	if (mtype & LDC_IO_MAP) {
4592 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4593 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4594 	}
4595 
4596 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4597 
4598 	tmp_mte.mte_pgszc = pg_size_code;
4599 
4600 	/* initialize each mem table entry */
4601 	for (i = 0; i < npages; i++) {
4602 
4603 		/* check if slot is available in the table */
4604 		while (mtbl->table[index].entry.ll != 0) {
4605 
4606 			index = (index + 1) % mtbl->num_entries;
4607 
4608 			if (index == mtbl->next_entry) {
4609 				/* we have looped around */
4610 				DWARN(DBG_ALL_LDCS,
4611 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4612 				    "entry\n", ldcp->id);
4613 				*ccount = 0;
4614 
4615 				/* NOTE: free memory, remove previous entries */
4616 				/* this shouldnt happen as num_avail was ok */
4617 
4618 				mutex_exit(&mtbl->lock);
4619 				mutex_exit(&mhdl->lock);
4620 				return (ENOMEM);
4621 			}
4622 		}
4623 
4624 		/* get the real address */
4625 		raddr = va_to_pa((void *)addr);
4626 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4627 
4628 		/* build the mte */
4629 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4630 
4631 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4632 
4633 		/* update entry in table */
4634 		mtbl->table[index].entry = tmp_mte;
4635 
4636 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4637 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4638 
4639 		/* calculate the size and offset for this export range */
4640 		if (i == 0) {
4641 			/* first page */
4642 			psize = min((pg_size - v_offset), len);
4643 			poffset = v_offset;
4644 
4645 		} else if (i == (npages - 1)) {
4646 			/* last page */
4647 			psize =	(((uintptr_t)(vaddr + len)) &
4648 			    ((uint64_t)(pg_size-1)));
4649 			if (psize == 0)
4650 				psize = pg_size;
4651 			poffset = 0;
4652 
4653 		} else {
4654 			/* middle pages */
4655 			psize = pg_size;
4656 			poffset = 0;
4657 		}
4658 
4659 		/* store entry for this page */
4660 		memseg->pages[i].index = index;
4661 		memseg->pages[i].raddr = raddr;
4662 		memseg->pages[i].offset = poffset;
4663 		memseg->pages[i].size = psize;
4664 		memseg->pages[i].mte = &(mtbl->table[index]);
4665 
4666 		/* create the cookie */
4667 		if (i == 0 || (index != prev_index + 1)) {
4668 			cookie_idx++;
4669 			memseg->cookies[cookie_idx].addr =
4670 			    IDX2COOKIE(index, pg_size_code, pg_shift);
4671 			memseg->cookies[cookie_idx].addr |= poffset;
4672 			memseg->cookies[cookie_idx].size = psize;
4673 
4674 		} else {
4675 			memseg->cookies[cookie_idx].size += psize;
4676 		}
4677 
4678 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4679 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4680 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4681 		    ldcp->id, addr, index, raddr, psize, poffset);
4682 
4683 		/* decrement number of available entries */
4684 		mtbl->num_avail--;
4685 
4686 		/* increment va by page size */
4687 		addr += pg_size;
4688 
4689 		/* increment index */
4690 		prev_index = index;
4691 		index = (index + 1) % mtbl->num_entries;
4692 
4693 		/* save the next slot */
4694 		mtbl->next_entry = index;
4695 	}
4696 
4697 	mutex_exit(&mtbl->lock);
4698 
4699 	/* memory handle = bound */
4700 	mhdl->mtype = mtype;
4701 	mhdl->perm = perm;
4702 	mhdl->status = LDC_BOUND;
4703 
4704 	/* update memseg_t */
4705 	memseg->vaddr = vaddr;
4706 	memseg->raddr = memseg->pages[0].raddr;
4707 	memseg->size = len;
4708 	memseg->npages = npages;
4709 	memseg->ncookies = cookie_idx + 1;
4710 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4711 
4712 	/* return count and first cookie */
4713 	*ccount = memseg->ncookies;
4714 	cookie->addr = memseg->cookies[0].addr;
4715 	cookie->size = memseg->cookies[0].size;
4716 
4717 	D1(ldcp->id,
4718 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4719 	    "pgs=0x%llx cookies=0x%llx\n",
4720 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4721 
4722 	mutex_exit(&mhdl->lock);
4723 	return (0);
4724 }
4725 
4726 /*
4727  * Return the next cookie associated with the specified memory handle
4728  */
4729 int
4730 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4731 {
4732 	ldc_mhdl_t	*mhdl;
4733 	ldc_chan_t 	*ldcp;
4734 	ldc_memseg_t	*memseg;
4735 
4736 	if (mhandle == NULL) {
4737 		DWARN(DBG_ALL_LDCS,
4738 		    "ldc_mem_nextcookie: invalid memory handle\n");
4739 		return (EINVAL);
4740 	}
4741 	mhdl = (ldc_mhdl_t *)mhandle;
4742 
4743 	mutex_enter(&mhdl->lock);
4744 
4745 	ldcp = mhdl->ldcp;
4746 	memseg = mhdl->memseg;
4747 
4748 	if (cookie == 0) {
4749 		DWARN(ldcp->id,
4750 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4751 		    ldcp->id);
4752 		mutex_exit(&mhdl->lock);
4753 		return (EINVAL);
4754 	}
4755 
4756 	if (memseg->next_cookie != 0) {
4757 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4758 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4759 		memseg->next_cookie++;
4760 		if (memseg->next_cookie == memseg->ncookies)
4761 			memseg->next_cookie = 0;
4762 
4763 	} else {
4764 		DWARN(ldcp->id,
4765 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4766 		cookie->addr = 0;
4767 		cookie->size = 0;
4768 		mutex_exit(&mhdl->lock);
4769 		return (EINVAL);
4770 	}
4771 
4772 	D1(ldcp->id,
4773 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4774 	    ldcp->id, cookie->addr, cookie->size);
4775 
4776 	mutex_exit(&mhdl->lock);
4777 	return (0);
4778 }
4779 
4780 /*
4781  * Unbind the virtual memory region associated with the specified
4782  * memory handle. Allassociated cookies are freed and the corresponding
4783  * RA space is no longer exported.
4784  */
4785 int
4786 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4787 {
4788 	ldc_mhdl_t	*mhdl;
4789 	ldc_chan_t 	*ldcp;
4790 	ldc_mtbl_t	*mtbl;
4791 	ldc_memseg_t	*memseg;
4792 	uint64_t	cookie_addr;
4793 	uint64_t	pg_shift, pg_size_code;
4794 	int		i, rv;
4795 
4796 	if (mhandle == NULL) {
4797 		DWARN(DBG_ALL_LDCS,
4798 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4799 		return (EINVAL);
4800 	}
4801 	mhdl = (ldc_mhdl_t *)mhandle;
4802 
4803 	mutex_enter(&mhdl->lock);
4804 
4805 	if (mhdl->status == LDC_UNBOUND) {
4806 		DWARN(DBG_ALL_LDCS,
4807 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4808 		    mhandle);
4809 		mutex_exit(&mhdl->lock);
4810 		return (EINVAL);
4811 	}
4812 
4813 	ldcp = mhdl->ldcp;
4814 	mtbl = ldcp->mtbl;
4815 
4816 	memseg = mhdl->memseg;
4817 
4818 	/* lock the memory table - exclusive access to channel */
4819 	mutex_enter(&mtbl->lock);
4820 
4821 	/* undo the pages exported */
4822 	for (i = 0; i < memseg->npages; i++) {
4823 
4824 		/* check for mapped pages, revocation cookie != 0 */
4825 		if (memseg->pages[i].mte->cookie) {
4826 
4827 			pg_size_code = page_szc(memseg->pages[i].size);
4828 			pg_shift = page_get_shift(memseg->pages[i].size);
4829 			cookie_addr = IDX2COOKIE(memseg->pages[i].index,
4830 			    pg_size_code, pg_shift);
4831 
4832 			D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke "
4833 			    "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id,
4834 			    cookie_addr, memseg->pages[i].mte->cookie);
4835 			rv = hv_ldc_revoke(ldcp->id, cookie_addr,
4836 			    memseg->pages[i].mte->cookie);
4837 			if (rv) {
4838 				DWARN(ldcp->id,
4839 				    "ldc_mem_unbind_handle: (0x%llx) cannot "
4840 				    "revoke mapping, cookie %llx\n", ldcp->id,
4841 				    cookie_addr);
4842 			}
4843 		}
4844 
4845 		/* clear the entry from the table */
4846 		memseg->pages[i].mte->entry.ll = 0;
4847 		mtbl->num_avail++;
4848 	}
4849 	mutex_exit(&mtbl->lock);
4850 
4851 	/* free the allocated memseg and page structures */
4852 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4853 	kmem_free(memseg->cookies,
4854 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4855 	kmem_cache_free(ldcssp->memseg_cache, memseg);
4856 
4857 	/* uninitialize the memory handle */
4858 	mhdl->memseg = NULL;
4859 	mhdl->status = LDC_UNBOUND;
4860 
4861 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4862 	    ldcp->id, mhdl);
4863 
4864 	mutex_exit(&mhdl->lock);
4865 	return (0);
4866 }
4867 
4868 /*
4869  * Get information about the dring. The base address of the descriptor
4870  * ring along with the type and permission are returned back.
4871  */
4872 int
4873 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4874 {
4875 	ldc_mhdl_t	*mhdl;
4876 
4877 	if (mhandle == NULL) {
4878 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4879 		return (EINVAL);
4880 	}
4881 	mhdl = (ldc_mhdl_t *)mhandle;
4882 
4883 	if (minfo == NULL) {
4884 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4885 		return (EINVAL);
4886 	}
4887 
4888 	mutex_enter(&mhdl->lock);
4889 
4890 	minfo->status = mhdl->status;
4891 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4892 		minfo->vaddr = mhdl->memseg->vaddr;
4893 		minfo->raddr = mhdl->memseg->raddr;
4894 		minfo->mtype = mhdl->mtype;
4895 		minfo->perm = mhdl->perm;
4896 	}
4897 	mutex_exit(&mhdl->lock);
4898 
4899 	return (0);
4900 }
4901 
4902 /*
4903  * Copy data either from or to the client specified virtual address
4904  * space to or from the exported memory associated with the cookies.
4905  * The direction argument determines whether the data is read from or
4906  * written to exported memory.
4907  */
4908 int
4909 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4910     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4911 {
4912 	ldc_chan_t 	*ldcp;
4913 	uint64_t	local_voff, local_valign;
4914 	uint64_t	cookie_addr, cookie_size;
4915 	uint64_t	pg_shift, pg_size, pg_size_code;
4916 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4917 	uint64_t	local_ra, local_poff, local_psize;
4918 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4919 	pgcnt_t		npages;
4920 	size_t		len = *size;
4921 	int 		i, rv = 0;
4922 
4923 	uint64_t	chid;
4924 
4925 	if (handle == NULL) {
4926 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4927 		return (EINVAL);
4928 	}
4929 	ldcp = (ldc_chan_t *)handle;
4930 	chid = ldcp->id;
4931 
4932 	/* check to see if channel is UP */
4933 	if (ldcp->tstate != TS_UP) {
4934 		DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4935 		    chid);
4936 		return (ECONNRESET);
4937 	}
4938 
4939 	/* Force address and size to be 8-byte aligned */
4940 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4941 		DWARN(chid,
4942 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4943 		return (EINVAL);
4944 	}
4945 
4946 	/* Find the size of the exported memory */
4947 	export_size = 0;
4948 	for (i = 0; i < ccount; i++)
4949 		export_size += cookies[i].size;
4950 
4951 	/* check to see if offset is valid */
4952 	if (off > export_size) {
4953 		DWARN(chid,
4954 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4955 		    chid);
4956 		return (EINVAL);
4957 	}
4958 
4959 	/*
4960 	 * Check to see if the export size is smaller than the size we
4961 	 * are requesting to copy - if so flag an error
4962 	 */
4963 	if ((export_size - off) < *size) {
4964 		DWARN(chid,
4965 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4966 		    chid);
4967 		return (EINVAL);
4968 	}
4969 
4970 	total_bal = min(export_size, *size);
4971 
4972 	/* FUTURE: get the page size, pgsz code, and shift */
4973 	pg_size = MMU_PAGESIZE;
4974 	pg_size_code = page_szc(pg_size);
4975 	pg_shift = page_get_shift(pg_size_code);
4976 
4977 	D1(chid, "ldc_mem_copy: copying data "
4978 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4979 	    chid, vaddr, pg_size, pg_size_code, pg_shift);
4980 
4981 	/* aligned VA and its offset */
4982 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4983 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4984 
4985 	npages = (len+local_voff)/pg_size;
4986 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4987 
4988 	D1(chid,
4989 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4990 	    chid, vaddr, local_valign, local_voff, npages);
4991 
4992 	local_ra = va_to_pa((void *)local_valign);
4993 	local_poff = local_voff;
4994 	local_psize = min(len, (pg_size - local_voff));
4995 
4996 	len -= local_psize;
4997 
4998 	/*
4999 	 * find the first cookie in the list of cookies
5000 	 * if the offset passed in is not zero
5001 	 */
5002 	for (idx = 0; idx < ccount; idx++) {
5003 		cookie_size = cookies[idx].size;
5004 		if (off < cookie_size)
5005 			break;
5006 		off -= cookie_size;
5007 	}
5008 
5009 	cookie_addr = cookies[idx].addr + off;
5010 	cookie_size = cookies[idx].size - off;
5011 
5012 	export_caddr = cookie_addr & ~(pg_size - 1);
5013 	export_poff = cookie_addr & (pg_size - 1);
5014 	export_psize = min(cookie_size, (pg_size - export_poff));
5015 
5016 	for (;;) {
5017 
5018 		copy_size = min(export_psize, local_psize);
5019 
5020 		D1(chid,
5021 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
5022 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
5023 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5024 		    " total_bal=0x%llx\n",
5025 		    chid, direction, export_caddr, local_ra, export_poff,
5026 		    local_poff, export_psize, local_psize, copy_size,
5027 		    total_bal);
5028 
5029 		rv = hv_ldc_copy(chid, direction,
5030 		    (export_caddr + export_poff), (local_ra + local_poff),
5031 		    copy_size, &copied_len);
5032 
5033 		if (rv != 0) {
5034 			int 		error = EIO;
5035 			uint64_t	rx_hd, rx_tl;
5036 
5037 			DWARN(chid,
5038 			    "ldc_mem_copy: (0x%llx) err %d during copy\n",
5039 			    (unsigned long long)chid, rv);
5040 			DWARN(chid,
5041 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, "
5042 			    "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
5043 			    " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
5044 			    " copied_len=0x%lx, total_bal=0x%lx\n",
5045 			    chid, direction, export_caddr, local_ra,
5046 			    export_poff, local_poff, export_psize, local_psize,
5047 			    copy_size, copied_len, total_bal);
5048 
5049 			*size = *size - total_bal;
5050 
5051 			/*
5052 			 * check if reason for copy error was due to
5053 			 * a channel reset. we need to grab the lock
5054 			 * just in case we have to do a reset.
5055 			 */
5056 			mutex_enter(&ldcp->lock);
5057 			mutex_enter(&ldcp->tx_lock);
5058 
5059 			rv = hv_ldc_rx_get_state(ldcp->id,
5060 			    &rx_hd, &rx_tl, &(ldcp->link_state));
5061 			if (ldcp->link_state == LDC_CHANNEL_DOWN ||
5062 			    ldcp->link_state == LDC_CHANNEL_RESET) {
5063 				i_ldc_reset(ldcp, B_FALSE);
5064 				error = ECONNRESET;
5065 			}
5066 
5067 			mutex_exit(&ldcp->tx_lock);
5068 			mutex_exit(&ldcp->lock);
5069 
5070 			return (error);
5071 		}
5072 
5073 		ASSERT(copied_len <= copy_size);
5074 
5075 		D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len);
5076 		export_poff += copied_len;
5077 		local_poff += copied_len;
5078 		export_psize -= copied_len;
5079 		local_psize -= copied_len;
5080 		cookie_size -= copied_len;
5081 
5082 		total_bal -= copied_len;
5083 
5084 		if (copy_size != copied_len)
5085 			continue;
5086 
5087 		if (export_psize == 0 && total_bal != 0) {
5088 
5089 			if (cookie_size == 0) {
5090 				idx++;
5091 				cookie_addr = cookies[idx].addr;
5092 				cookie_size = cookies[idx].size;
5093 
5094 				export_caddr = cookie_addr & ~(pg_size - 1);
5095 				export_poff = cookie_addr & (pg_size - 1);
5096 				export_psize =
5097 				    min(cookie_size, (pg_size-export_poff));
5098 			} else {
5099 				export_caddr += pg_size;
5100 				export_poff = 0;
5101 				export_psize = min(cookie_size, pg_size);
5102 			}
5103 		}
5104 
5105 		if (local_psize == 0 && total_bal != 0) {
5106 			local_valign += pg_size;
5107 			local_ra = va_to_pa((void *)local_valign);
5108 			local_poff = 0;
5109 			local_psize = min(pg_size, len);
5110 			len -= local_psize;
5111 		}
5112 
5113 		/* check if we are all done */
5114 		if (total_bal == 0)
5115 			break;
5116 	}
5117 
5118 
5119 	D1(chid,
5120 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
5121 	    chid, *size);
5122 
5123 	return (0);
5124 }
5125 
5126 /*
5127  * Copy data either from or to the client specified virtual address
5128  * space to or from HV physical memory.
5129  *
5130  * The direction argument determines whether the data is read from or
5131  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
5132  * to the ldc_mem_copy interface
5133  */
5134 int
5135 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size,
5136     caddr_t paddr, uint8_t direction)
5137 {
5138 	ldc_chan_t 	*ldcp;
5139 	uint64_t	local_voff, local_valign;
5140 	uint64_t	pg_shift, pg_size, pg_size_code;
5141 	uint64_t 	target_pa, target_poff, target_psize, target_size;
5142 	uint64_t	local_ra, local_poff, local_psize;
5143 	uint64_t	copy_size, copied_len = 0;
5144 	pgcnt_t		npages;
5145 	size_t		len = *size;
5146 	int 		rv = 0;
5147 
5148 	if (handle == NULL) {
5149 		DWARN(DBG_ALL_LDCS,
5150 		    "ldc_mem_rdwr_cookie: invalid channel handle\n");
5151 		return (EINVAL);
5152 	}
5153 	ldcp = (ldc_chan_t *)handle;
5154 
5155 	mutex_enter(&ldcp->lock);
5156 
5157 	/* check to see if channel is UP */
5158 	if (ldcp->tstate != TS_UP) {
5159 		DWARN(ldcp->id,
5160 		    "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n",
5161 		    ldcp->id);
5162 		mutex_exit(&ldcp->lock);
5163 		return (ECONNRESET);
5164 	}
5165 
5166 	/* Force address and size to be 8-byte aligned */
5167 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
5168 		DWARN(ldcp->id,
5169 		    "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n");
5170 		mutex_exit(&ldcp->lock);
5171 		return (EINVAL);
5172 	}
5173 
5174 	target_size = *size;
5175 
5176 	/* FUTURE: get the page size, pgsz code, and shift */
5177 	pg_size = MMU_PAGESIZE;
5178 	pg_size_code = page_szc(pg_size);
5179 	pg_shift = page_get_shift(pg_size_code);
5180 
5181 	D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data "
5182 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
5183 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
5184 
5185 	/* aligned VA and its offset */
5186 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
5187 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
5188 
5189 	npages = (len + local_voff) / pg_size;
5190 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
5191 
5192 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, "
5193 	    "val=0x%llx,off=0x%x,pgs=0x%x\n",
5194 	    ldcp->id, vaddr, local_valign, local_voff, npages);
5195 
5196 	local_ra = va_to_pa((void *)local_valign);
5197 	local_poff = local_voff;
5198 	local_psize = min(len, (pg_size - local_voff));
5199 
5200 	len -= local_psize;
5201 
5202 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
5203 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
5204 	target_psize = pg_size - target_poff;
5205 
5206 	for (;;) {
5207 
5208 		copy_size = min(target_psize, local_psize);
5209 
5210 		D1(ldcp->id,
5211 		    "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
5212 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
5213 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5214 		    " total_bal=0x%llx\n",
5215 		    ldcp->id, direction, target_pa, local_ra, target_poff,
5216 		    local_poff, target_psize, local_psize, copy_size,
5217 		    target_size);
5218 
5219 		rv = hv_ldc_copy(ldcp->id, direction,
5220 		    (target_pa + target_poff), (local_ra + local_poff),
5221 		    copy_size, &copied_len);
5222 
5223 		if (rv != 0) {
5224 			DWARN(DBG_ALL_LDCS,
5225 			    "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n",
5226 			    ldcp->id, rv);
5227 			DWARN(DBG_ALL_LDCS,
5228 			    "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, "
5229 			    "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, "
5230 			    "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, "
5231 			    "copy_sz=0x%llx, total_bal=0x%llx\n",
5232 			    ldcp->id, direction, target_pa, local_ra,
5233 			    target_poff, local_poff, target_psize, local_psize,
5234 			    copy_size, target_size);
5235 
5236 			*size = *size - target_size;
5237 			mutex_exit(&ldcp->lock);
5238 			return (i_ldc_h2v_error(rv));
5239 		}
5240 
5241 		D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n",
5242 		    copied_len);
5243 		target_poff += copied_len;
5244 		local_poff += copied_len;
5245 		target_psize -= copied_len;
5246 		local_psize -= copied_len;
5247 
5248 		target_size -= copied_len;
5249 
5250 		if (copy_size != copied_len)
5251 			continue;
5252 
5253 		if (target_psize == 0 && target_size != 0) {
5254 			target_pa += pg_size;
5255 			target_poff = 0;
5256 			target_psize = min(pg_size, target_size);
5257 		}
5258 
5259 		if (local_psize == 0 && target_size != 0) {
5260 			local_valign += pg_size;
5261 			local_ra = va_to_pa((void *)local_valign);
5262 			local_poff = 0;
5263 			local_psize = min(pg_size, len);
5264 			len -= local_psize;
5265 		}
5266 
5267 		/* check if we are all done */
5268 		if (target_size == 0)
5269 			break;
5270 	}
5271 
5272 	mutex_exit(&ldcp->lock);
5273 
5274 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n",
5275 	    ldcp->id, *size);
5276 
5277 	return (0);
5278 }
5279 
5280 /*
5281  * Map an exported memory segment into the local address space. If the
5282  * memory range was exported for direct map access, a HV call is made
5283  * to allocate a RA range. If the map is done via a shadow copy, local
5284  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
5285  * the mapping is a direct map then the RA is returned in 'raddr'.
5286  */
5287 int
5288 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
5289     uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr)
5290 {
5291 	int		i, j, idx, rv, retries;
5292 	ldc_chan_t 	*ldcp;
5293 	ldc_mhdl_t	*mhdl;
5294 	ldc_memseg_t	*memseg;
5295 	caddr_t		tmpaddr;
5296 	uint64_t	map_perm = perm;
5297 	uint64_t	pg_size, pg_shift, pg_size_code, pg_mask;
5298 	uint64_t	exp_size = 0, base_off, map_size, npages;
5299 	uint64_t	cookie_addr, cookie_off, cookie_size;
5300 	tte_t		ldc_tte;
5301 
5302 	if (mhandle == NULL) {
5303 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
5304 		return (EINVAL);
5305 	}
5306 	mhdl = (ldc_mhdl_t *)mhandle;
5307 
5308 	mutex_enter(&mhdl->lock);
5309 
5310 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
5311 	    mhdl->memseg != NULL) {
5312 		DWARN(DBG_ALL_LDCS,
5313 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
5314 		mutex_exit(&mhdl->lock);
5315 		return (EINVAL);
5316 	}
5317 
5318 	ldcp = mhdl->ldcp;
5319 
5320 	mutex_enter(&ldcp->lock);
5321 
5322 	if (ldcp->tstate != TS_UP) {
5323 		DWARN(ldcp->id,
5324 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
5325 		    ldcp->id);
5326 		mutex_exit(&ldcp->lock);
5327 		mutex_exit(&mhdl->lock);
5328 		return (ECONNRESET);
5329 	}
5330 
5331 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5332 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
5333 		mutex_exit(&ldcp->lock);
5334 		mutex_exit(&mhdl->lock);
5335 		return (EINVAL);
5336 	}
5337 
5338 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
5339 	    ldcp->id, cookie->addr, cookie->size);
5340 
5341 	/* FUTURE: get the page size, pgsz code, and shift */
5342 	pg_size = MMU_PAGESIZE;
5343 	pg_size_code = page_szc(pg_size);
5344 	pg_shift = page_get_shift(pg_size_code);
5345 	pg_mask = ~(pg_size - 1);
5346 
5347 	/* calculate the number of pages in the exported cookie */
5348 	base_off = cookie[0].addr & (pg_size - 1);
5349 	for (idx = 0; idx < ccount; idx++)
5350 		exp_size += cookie[idx].size;
5351 	map_size = P2ROUNDUP((exp_size + base_off), pg_size);
5352 	npages = (map_size >> pg_shift);
5353 
5354 	/* Allocate memseg structure */
5355 	memseg = mhdl->memseg =
5356 	    kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
5357 
5358 	/* Allocate memory to store all pages and cookies */
5359 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
5360 	memseg->cookies =
5361 	    kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
5362 
5363 	D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx,"
5364 	    "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages);
5365 
5366 	/*
5367 	 * Check if direct map over shared memory is enabled, if not change
5368 	 * the mapping type to SHADOW_MAP.
5369 	 */
5370 	if (ldc_shmem_enabled == 0)
5371 		mtype = LDC_SHADOW_MAP;
5372 
5373 	/*
5374 	 * Check to see if the client is requesting direct or shadow map
5375 	 * If direct map is requested, try to map remote memory first,
5376 	 * and if that fails, revert to shadow map
5377 	 */
5378 	if (mtype == LDC_DIRECT_MAP) {
5379 
5380 		/* Allocate kernel virtual space for mapping */
5381 		memseg->vaddr = vmem_xalloc(heap_arena, map_size,
5382 		    pg_size, 0, 0, NULL, NULL, VM_NOSLEEP);
5383 		if (memseg->vaddr == NULL) {
5384 			cmn_err(CE_WARN,
5385 			    "ldc_mem_map: (0x%lx) memory map failed\n",
5386 			    ldcp->id);
5387 			kmem_free(memseg->cookies,
5388 			    (sizeof (ldc_mem_cookie_t) * ccount));
5389 			kmem_free(memseg->pages,
5390 			    (sizeof (ldc_page_t) * npages));
5391 			kmem_cache_free(ldcssp->memseg_cache, memseg);
5392 
5393 			mutex_exit(&ldcp->lock);
5394 			mutex_exit(&mhdl->lock);
5395 			return (ENOMEM);
5396 		}
5397 
5398 		/* Unload previous mapping */
5399 		hat_unload(kas.a_hat, memseg->vaddr, map_size,
5400 		    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5401 
5402 		/* for each cookie passed in - map into address space */
5403 		idx = 0;
5404 		cookie_size = 0;
5405 		tmpaddr = memseg->vaddr;
5406 
5407 		for (i = 0; i < npages; i++) {
5408 
5409 			if (cookie_size == 0) {
5410 				ASSERT(idx < ccount);
5411 				cookie_addr = cookie[idx].addr & pg_mask;
5412 				cookie_off = cookie[idx].addr & (pg_size - 1);
5413 				cookie_size =
5414 				    P2ROUNDUP((cookie_off + cookie[idx].size),
5415 				    pg_size);
5416 				idx++;
5417 			}
5418 
5419 			D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping "
5420 			    "cookie 0x%llx, bal=0x%llx\n", ldcp->id,
5421 			    cookie_addr, cookie_size);
5422 
5423 			/* map the cookie into address space */
5424 			for (retries = 0; retries < ldc_max_retries;
5425 			    retries++) {
5426 
5427 				rv = hv_ldc_mapin(ldcp->id, cookie_addr,
5428 				    &memseg->pages[i].raddr, &map_perm);
5429 				if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY)
5430 					break;
5431 
5432 				drv_usecwait(ldc_delay);
5433 			}
5434 
5435 			if (rv || memseg->pages[i].raddr == 0) {
5436 				DWARN(ldcp->id,
5437 				    "ldc_mem_map: (0x%llx) hv mapin err %d\n",
5438 				    ldcp->id, rv);
5439 
5440 				/* remove previous mapins */
5441 				hat_unload(kas.a_hat, memseg->vaddr, map_size,
5442 				    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5443 				for (j = 0; j < i; j++) {
5444 					rv = hv_ldc_unmap(
5445 					    memseg->pages[j].raddr);
5446 					if (rv) {
5447 						DWARN(ldcp->id,
5448 						    "ldc_mem_map: (0x%llx) "
5449 						    "cannot unmap ra=0x%llx\n",
5450 						    ldcp->id,
5451 						    memseg->pages[j].raddr);
5452 					}
5453 				}
5454 
5455 				/* free kernel virtual space */
5456 				vmem_free(heap_arena, (void *)memseg->vaddr,
5457 				    map_size);
5458 
5459 				/* direct map failed - revert to shadow map */
5460 				mtype = LDC_SHADOW_MAP;
5461 				break;
5462 
5463 			} else {
5464 
5465 				D1(ldcp->id,
5466 				    "ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
5467 				    "0x%llx, cookie=0x%llx, perm=0x%llx\n",
5468 				    ldcp->id, tmpaddr, memseg->pages[i].raddr,
5469 				    cookie_addr, perm);
5470 
5471 				/*
5472 				 * NOTE: Calling hat_devload directly, causes it
5473 				 * to look for page_t using the pfn. Since this
5474 				 * addr is greater than the memlist, it treates
5475 				 * it as non-memory
5476 				 */
5477 				sfmmu_memtte(&ldc_tte,
5478 				    (pfn_t)(memseg->pages[i].raddr >> pg_shift),
5479 				    PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K);
5480 
5481 				D1(ldcp->id,
5482 				    "ldc_mem_map: (0x%llx) ra 0x%llx -> "
5483 				    "tte 0x%llx\n", ldcp->id,
5484 				    memseg->pages[i].raddr, ldc_tte);
5485 
5486 				sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr,
5487 				    NULL, HAT_LOAD_LOCK);
5488 
5489 				cookie_size -= pg_size;
5490 				cookie_addr += pg_size;
5491 				tmpaddr += pg_size;
5492 			}
5493 		}
5494 	}
5495 
5496 	if (mtype == LDC_SHADOW_MAP) {
5497 		if (*vaddr == NULL) {
5498 			memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP);
5499 			mhdl->myshadow = B_TRUE;
5500 
5501 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
5502 			    "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr);
5503 		} else {
5504 			/*
5505 			 * Use client supplied memory for memseg->vaddr
5506 			 * WARNING: assuming that client mem is >= exp_size
5507 			 */
5508 			memseg->vaddr = *vaddr;
5509 		}
5510 
5511 		/* Save all page and cookie information */
5512 		for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) {
5513 			memseg->pages[i].raddr = va_to_pa(tmpaddr);
5514 			memseg->pages[i].size = pg_size;
5515 			tmpaddr += pg_size;
5516 		}
5517 
5518 	}
5519 
5520 	/* save all cookies */
5521 	bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t));
5522 
5523 	/* update memseg_t */
5524 	memseg->raddr = memseg->pages[0].raddr;
5525 	memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size;
5526 	memseg->npages = npages;
5527 	memseg->ncookies = ccount;
5528 	memseg->next_cookie = 0;
5529 
5530 	/* memory handle = mapped */
5531 	mhdl->mtype = mtype;
5532 	mhdl->perm = perm;
5533 	mhdl->status = LDC_MAPPED;
5534 
5535 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5536 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5537 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5538 	    memseg->npages, memseg->ncookies);
5539 
5540 	if (mtype == LDC_SHADOW_MAP)
5541 		base_off = 0;
5542 	if (raddr)
5543 		*raddr = (caddr_t)(memseg->raddr | base_off);
5544 	if (vaddr)
5545 		*vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off);
5546 
5547 	mutex_exit(&ldcp->lock);
5548 	mutex_exit(&mhdl->lock);
5549 	return (0);
5550 }
5551 
5552 /*
5553  * Unmap a memory segment. Free shadow memory (if any).
5554  */
5555 int
5556 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5557 {
5558 	int		i, rv;
5559 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5560 	ldc_chan_t 	*ldcp;
5561 	ldc_memseg_t	*memseg;
5562 
5563 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5564 		DWARN(DBG_ALL_LDCS,
5565 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5566 		    mhandle);
5567 		return (EINVAL);
5568 	}
5569 
5570 	mutex_enter(&mhdl->lock);
5571 
5572 	ldcp = mhdl->ldcp;
5573 	memseg = mhdl->memseg;
5574 
5575 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5576 	    ldcp->id, mhdl);
5577 
5578 	/* if we allocated shadow memory - free it */
5579 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5580 		kmem_free(memseg->vaddr, memseg->size);
5581 	} else if (mhdl->mtype == LDC_DIRECT_MAP) {
5582 
5583 		/* unmap in the case of DIRECT_MAP */
5584 		hat_unload(kas.a_hat, memseg->vaddr, memseg->size,
5585 		    HAT_UNLOAD_UNLOCK);
5586 
5587 		for (i = 0; i < memseg->npages; i++) {
5588 			rv = hv_ldc_unmap(memseg->pages[i].raddr);
5589 			if (rv) {
5590 				cmn_err(CE_WARN,
5591 				    "ldc_mem_map: (0x%lx) hv unmap err %d\n",
5592 				    ldcp->id, rv);
5593 			}
5594 		}
5595 
5596 		vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size);
5597 	}
5598 
5599 	/* free the allocated memseg and page structures */
5600 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5601 	kmem_free(memseg->cookies,
5602 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5603 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5604 
5605 	/* uninitialize the memory handle */
5606 	mhdl->memseg = NULL;
5607 	mhdl->status = LDC_UNBOUND;
5608 
5609 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5610 	    ldcp->id, mhdl);
5611 
5612 	mutex_exit(&mhdl->lock);
5613 	return (0);
5614 }
5615 
5616 /*
5617  * Internal entry point for LDC mapped memory entry consistency
5618  * semantics. Acquire copies the contents of the remote memory
5619  * into the local shadow copy. The release operation copies the local
5620  * contents into the remote memory. The offset and size specify the
5621  * bounds for the memory range being synchronized.
5622  */
5623 static int
5624 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5625     uint64_t offset, size_t size)
5626 {
5627 	int 		err;
5628 	ldc_mhdl_t	*mhdl;
5629 	ldc_chan_t	*ldcp;
5630 	ldc_memseg_t	*memseg;
5631 	caddr_t		local_vaddr;
5632 	size_t		copy_size;
5633 
5634 	if (mhandle == NULL) {
5635 		DWARN(DBG_ALL_LDCS,
5636 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5637 		return (EINVAL);
5638 	}
5639 	mhdl = (ldc_mhdl_t *)mhandle;
5640 
5641 	mutex_enter(&mhdl->lock);
5642 
5643 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5644 		DWARN(DBG_ALL_LDCS,
5645 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5646 		mutex_exit(&mhdl->lock);
5647 		return (EINVAL);
5648 	}
5649 
5650 	/* do nothing for direct map */
5651 	if (mhdl->mtype == LDC_DIRECT_MAP) {
5652 		mutex_exit(&mhdl->lock);
5653 		return (0);
5654 	}
5655 
5656 	/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
5657 	if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) ||
5658 	    (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) {
5659 		mutex_exit(&mhdl->lock);
5660 		return (0);
5661 	}
5662 
5663 	if (offset >= mhdl->memseg->size ||
5664 	    (offset + size) > mhdl->memseg->size) {
5665 		DWARN(DBG_ALL_LDCS,
5666 		    "i_ldc_mem_acquire_release: memory out of range\n");
5667 		mutex_exit(&mhdl->lock);
5668 		return (EINVAL);
5669 	}
5670 
5671 	/* get the channel handle and memory segment */
5672 	ldcp = mhdl->ldcp;
5673 	memseg = mhdl->memseg;
5674 
5675 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5676 
5677 		local_vaddr = memseg->vaddr + offset;
5678 		copy_size = size;
5679 
5680 		/* copy to/from remote from/to local memory */
5681 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5682 		    &copy_size, memseg->cookies, memseg->ncookies,
5683 		    direction);
5684 		if (err || copy_size != size) {
5685 			DWARN(ldcp->id,
5686 			    "i_ldc_mem_acquire_release: copy failed\n");
5687 			mutex_exit(&mhdl->lock);
5688 			return (err);
5689 		}
5690 	}
5691 
5692 	mutex_exit(&mhdl->lock);
5693 
5694 	return (0);
5695 }
5696 
5697 /*
5698  * Ensure that the contents in the remote memory seg are consistent
5699  * with the contents if of local segment
5700  */
5701 int
5702 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5703 {
5704 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5705 }
5706 
5707 
5708 /*
5709  * Ensure that the contents in the local memory seg are consistent
5710  * with the contents if of remote segment
5711  */
5712 int
5713 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5714 {
5715 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5716 }
5717 
5718 /*
5719  * Allocate a descriptor ring. The size of each each descriptor
5720  * must be 8-byte aligned and the entire ring should be a multiple
5721  * of MMU_PAGESIZE.
5722  */
5723 int
5724 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5725 {
5726 	ldc_dring_t *dringp;
5727 	size_t size = (dsize * len);
5728 
5729 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5730 	    len, dsize);
5731 
5732 	if (dhandle == NULL) {
5733 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5734 		return (EINVAL);
5735 	}
5736 
5737 	if (len == 0) {
5738 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5739 		return (EINVAL);
5740 	}
5741 
5742 	/* descriptor size should be 8-byte aligned */
5743 	if (dsize == 0 || (dsize & 0x7)) {
5744 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5745 		return (EINVAL);
5746 	}
5747 
5748 	*dhandle = 0;
5749 
5750 	/* Allocate a desc ring structure */
5751 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5752 
5753 	/* Initialize dring */
5754 	dringp->length = len;
5755 	dringp->dsize = dsize;
5756 
5757 	/* round off to multiple of pagesize */
5758 	dringp->size = (size & MMU_PAGEMASK);
5759 	if (size & MMU_PAGEOFFSET)
5760 		dringp->size += MMU_PAGESIZE;
5761 
5762 	dringp->status = LDC_UNBOUND;
5763 
5764 	/* allocate descriptor ring memory */
5765 	dringp->base = kmem_zalloc(dringp->size, KM_SLEEP);
5766 
5767 	/* initialize the desc ring lock */
5768 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5769 
5770 	/* Add descriptor ring to the head of global list */
5771 	mutex_enter(&ldcssp->lock);
5772 	dringp->next = ldcssp->dring_list;
5773 	ldcssp->dring_list = dringp;
5774 	mutex_exit(&ldcssp->lock);
5775 
5776 	*dhandle = (ldc_dring_handle_t)dringp;
5777 
5778 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5779 
5780 	return (0);
5781 }
5782 
5783 
5784 /*
5785  * Destroy a descriptor ring.
5786  */
5787 int
5788 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5789 {
5790 	ldc_dring_t *dringp;
5791 	ldc_dring_t *tmp_dringp;
5792 
5793 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5794 
5795 	if (dhandle == NULL) {
5796 		DWARN(DBG_ALL_LDCS,
5797 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5798 		return (EINVAL);
5799 	}
5800 	dringp = (ldc_dring_t *)dhandle;
5801 
5802 	if (dringp->status == LDC_BOUND) {
5803 		DWARN(DBG_ALL_LDCS,
5804 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5805 		return (EACCES);
5806 	}
5807 
5808 	mutex_enter(&dringp->lock);
5809 	mutex_enter(&ldcssp->lock);
5810 
5811 	/* remove from linked list - if not bound */
5812 	tmp_dringp = ldcssp->dring_list;
5813 	if (tmp_dringp == dringp) {
5814 		ldcssp->dring_list = dringp->next;
5815 		dringp->next = NULL;
5816 
5817 	} else {
5818 		while (tmp_dringp != NULL) {
5819 			if (tmp_dringp->next == dringp) {
5820 				tmp_dringp->next = dringp->next;
5821 				dringp->next = NULL;
5822 				break;
5823 			}
5824 			tmp_dringp = tmp_dringp->next;
5825 		}
5826 		if (tmp_dringp == NULL) {
5827 			DWARN(DBG_ALL_LDCS,
5828 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5829 			mutex_exit(&ldcssp->lock);
5830 			mutex_exit(&dringp->lock);
5831 			return (EINVAL);
5832 		}
5833 	}
5834 
5835 	mutex_exit(&ldcssp->lock);
5836 
5837 	/* free the descriptor ring */
5838 	kmem_free(dringp->base, dringp->size);
5839 
5840 	mutex_exit(&dringp->lock);
5841 
5842 	/* destroy dring lock */
5843 	mutex_destroy(&dringp->lock);
5844 
5845 	/* free desc ring object */
5846 	kmem_free(dringp, sizeof (ldc_dring_t));
5847 
5848 	return (0);
5849 }
5850 
5851 /*
5852  * Bind a previously allocated dring to a channel. The channel should
5853  * be OPEN in order to bind the ring to the channel. Returns back a
5854  * descriptor ring cookie. The descriptor ring is exported for remote
5855  * access by the client at the other end of the channel. An entry for
5856  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5857  */
5858 int
5859 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5860     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5861 {
5862 	int		err;
5863 	ldc_chan_t 	*ldcp;
5864 	ldc_dring_t	*dringp;
5865 	ldc_mem_handle_t mhandle;
5866 
5867 	/* check to see if channel is initalized */
5868 	if (handle == NULL) {
5869 		DWARN(DBG_ALL_LDCS,
5870 		    "ldc_mem_dring_bind: invalid channel handle\n");
5871 		return (EINVAL);
5872 	}
5873 	ldcp = (ldc_chan_t *)handle;
5874 
5875 	if (dhandle == NULL) {
5876 		DWARN(DBG_ALL_LDCS,
5877 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5878 		return (EINVAL);
5879 	}
5880 	dringp = (ldc_dring_t *)dhandle;
5881 
5882 	if (cookie == NULL) {
5883 		DWARN(ldcp->id,
5884 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5885 		return (EINVAL);
5886 	}
5887 
5888 	mutex_enter(&dringp->lock);
5889 
5890 	if (dringp->status == LDC_BOUND) {
5891 		DWARN(DBG_ALL_LDCS,
5892 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5893 		    ldcp->id);
5894 		mutex_exit(&dringp->lock);
5895 		return (EINVAL);
5896 	}
5897 
5898 	if ((perm & LDC_MEM_RW) == 0) {
5899 		DWARN(DBG_ALL_LDCS,
5900 		    "ldc_mem_dring_bind: invalid permissions\n");
5901 		mutex_exit(&dringp->lock);
5902 		return (EINVAL);
5903 	}
5904 
5905 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5906 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5907 		mutex_exit(&dringp->lock);
5908 		return (EINVAL);
5909 	}
5910 
5911 	dringp->ldcp = ldcp;
5912 
5913 	/* create an memory handle */
5914 	err = ldc_mem_alloc_handle(handle, &mhandle);
5915 	if (err || mhandle == NULL) {
5916 		DWARN(DBG_ALL_LDCS,
5917 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5918 		    ldcp->id);
5919 		mutex_exit(&dringp->lock);
5920 		return (err);
5921 	}
5922 	dringp->mhdl = mhandle;
5923 
5924 	/* bind the descriptor ring to channel */
5925 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5926 	    mtype, perm, cookie, ccount);
5927 	if (err) {
5928 		DWARN(ldcp->id,
5929 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5930 		    ldcp->id);
5931 		mutex_exit(&dringp->lock);
5932 		return (err);
5933 	}
5934 
5935 	/*
5936 	 * For now return error if we get more than one cookie
5937 	 * FUTURE: Return multiple cookies ..
5938 	 */
5939 	if (*ccount > 1) {
5940 		(void) ldc_mem_unbind_handle(mhandle);
5941 		(void) ldc_mem_free_handle(mhandle);
5942 
5943 		dringp->ldcp = NULL;
5944 		dringp->mhdl = NULL;
5945 		*ccount = 0;
5946 
5947 		mutex_exit(&dringp->lock);
5948 		return (EAGAIN);
5949 	}
5950 
5951 	/* Add descriptor ring to channel's exported dring list */
5952 	mutex_enter(&ldcp->exp_dlist_lock);
5953 	dringp->ch_next = ldcp->exp_dring_list;
5954 	ldcp->exp_dring_list = dringp;
5955 	mutex_exit(&ldcp->exp_dlist_lock);
5956 
5957 	dringp->status = LDC_BOUND;
5958 
5959 	mutex_exit(&dringp->lock);
5960 
5961 	return (0);
5962 }
5963 
5964 /*
5965  * Return the next cookie associated with the specified dring handle
5966  */
5967 int
5968 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5969 {
5970 	int		rv = 0;
5971 	ldc_dring_t 	*dringp;
5972 	ldc_chan_t	*ldcp;
5973 
5974 	if (dhandle == NULL) {
5975 		DWARN(DBG_ALL_LDCS,
5976 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5977 		return (EINVAL);
5978 	}
5979 	dringp = (ldc_dring_t *)dhandle;
5980 	mutex_enter(&dringp->lock);
5981 
5982 	if (dringp->status != LDC_BOUND) {
5983 		DWARN(DBG_ALL_LDCS,
5984 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5985 		    "is not bound\n", dringp);
5986 		mutex_exit(&dringp->lock);
5987 		return (EINVAL);
5988 	}
5989 
5990 	ldcp = dringp->ldcp;
5991 
5992 	if (cookie == NULL) {
5993 		DWARN(ldcp->id,
5994 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5995 		    ldcp->id);
5996 		mutex_exit(&dringp->lock);
5997 		return (EINVAL);
5998 	}
5999 
6000 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
6001 	mutex_exit(&dringp->lock);
6002 
6003 	return (rv);
6004 }
6005 /*
6006  * Unbind a previously bound dring from a channel.
6007  */
6008 int
6009 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
6010 {
6011 	ldc_dring_t 	*dringp;
6012 	ldc_dring_t	*tmp_dringp;
6013 	ldc_chan_t	*ldcp;
6014 
6015 	if (dhandle == NULL) {
6016 		DWARN(DBG_ALL_LDCS,
6017 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
6018 		return (EINVAL);
6019 	}
6020 	dringp = (ldc_dring_t *)dhandle;
6021 
6022 	mutex_enter(&dringp->lock);
6023 
6024 	if (dringp->status == LDC_UNBOUND) {
6025 		DWARN(DBG_ALL_LDCS,
6026 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
6027 		    dringp);
6028 		mutex_exit(&dringp->lock);
6029 		return (EINVAL);
6030 	}
6031 	ldcp = dringp->ldcp;
6032 
6033 	mutex_enter(&ldcp->exp_dlist_lock);
6034 
6035 	tmp_dringp = ldcp->exp_dring_list;
6036 	if (tmp_dringp == dringp) {
6037 		ldcp->exp_dring_list = dringp->ch_next;
6038 		dringp->ch_next = NULL;
6039 
6040 	} else {
6041 		while (tmp_dringp != NULL) {
6042 			if (tmp_dringp->ch_next == dringp) {
6043 				tmp_dringp->ch_next = dringp->ch_next;
6044 				dringp->ch_next = NULL;
6045 				break;
6046 			}
6047 			tmp_dringp = tmp_dringp->ch_next;
6048 		}
6049 		if (tmp_dringp == NULL) {
6050 			DWARN(DBG_ALL_LDCS,
6051 			    "ldc_mem_dring_unbind: invalid descriptor\n");
6052 			mutex_exit(&ldcp->exp_dlist_lock);
6053 			mutex_exit(&dringp->lock);
6054 			return (EINVAL);
6055 		}
6056 	}
6057 
6058 	mutex_exit(&ldcp->exp_dlist_lock);
6059 
6060 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
6061 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6062 
6063 	dringp->ldcp = NULL;
6064 	dringp->mhdl = NULL;
6065 	dringp->status = LDC_UNBOUND;
6066 
6067 	mutex_exit(&dringp->lock);
6068 
6069 	return (0);
6070 }
6071 
6072 /*
6073  * Get information about the dring. The base address of the descriptor
6074  * ring along with the type and permission are returned back.
6075  */
6076 int
6077 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
6078 {
6079 	ldc_dring_t	*dringp;
6080 	int		rv;
6081 
6082 	if (dhandle == NULL) {
6083 		DWARN(DBG_ALL_LDCS,
6084 		    "ldc_mem_dring_info: invalid desc ring handle\n");
6085 		return (EINVAL);
6086 	}
6087 	dringp = (ldc_dring_t *)dhandle;
6088 
6089 	mutex_enter(&dringp->lock);
6090 
6091 	if (dringp->mhdl) {
6092 		rv = ldc_mem_info(dringp->mhdl, minfo);
6093 		if (rv) {
6094 			DWARN(DBG_ALL_LDCS,
6095 			    "ldc_mem_dring_info: error reading mem info\n");
6096 			mutex_exit(&dringp->lock);
6097 			return (rv);
6098 		}
6099 	} else {
6100 		minfo->vaddr = dringp->base;
6101 		minfo->raddr = NULL;
6102 		minfo->status = dringp->status;
6103 	}
6104 
6105 	mutex_exit(&dringp->lock);
6106 
6107 	return (0);
6108 }
6109 
6110 /*
6111  * Map an exported descriptor ring into the local address space. If the
6112  * descriptor ring was exported for direct map access, a HV call is made
6113  * to allocate a RA range. If the map is done via a shadow copy, local
6114  * shadow memory is allocated.
6115  */
6116 int
6117 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
6118     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
6119     ldc_dring_handle_t *dhandle)
6120 {
6121 	int		err;
6122 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
6123 	ldc_mem_handle_t mhandle;
6124 	ldc_dring_t	*dringp;
6125 	size_t		dring_size;
6126 
6127 	if (dhandle == NULL) {
6128 		DWARN(DBG_ALL_LDCS,
6129 		    "ldc_mem_dring_map: invalid dhandle\n");
6130 		return (EINVAL);
6131 	}
6132 
6133 	/* check to see if channel is initalized */
6134 	if (handle == NULL) {
6135 		DWARN(DBG_ALL_LDCS,
6136 		    "ldc_mem_dring_map: invalid channel handle\n");
6137 		return (EINVAL);
6138 	}
6139 	ldcp = (ldc_chan_t *)handle;
6140 
6141 	if (cookie == NULL) {
6142 		DWARN(ldcp->id,
6143 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
6144 		    ldcp->id);
6145 		return (EINVAL);
6146 	}
6147 
6148 	/* FUTURE: For now we support only one cookie per dring */
6149 	ASSERT(ccount == 1);
6150 
6151 	if (cookie->size < (dsize * len)) {
6152 		DWARN(ldcp->id,
6153 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
6154 		    ldcp->id);
6155 		return (EINVAL);
6156 	}
6157 
6158 	*dhandle = 0;
6159 
6160 	/* Allocate an dring structure */
6161 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
6162 
6163 	D1(ldcp->id,
6164 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
6165 	    mtype, len, dsize, cookie->addr, cookie->size);
6166 
6167 	/* Initialize dring */
6168 	dringp->length = len;
6169 	dringp->dsize = dsize;
6170 
6171 	/* round of to multiple of page size */
6172 	dring_size = len * dsize;
6173 	dringp->size = (dring_size & MMU_PAGEMASK);
6174 	if (dring_size & MMU_PAGEOFFSET)
6175 		dringp->size += MMU_PAGESIZE;
6176 
6177 	dringp->ldcp = ldcp;
6178 
6179 	/* create an memory handle */
6180 	err = ldc_mem_alloc_handle(handle, &mhandle);
6181 	if (err || mhandle == NULL) {
6182 		DWARN(DBG_ALL_LDCS,
6183 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
6184 		    err);
6185 		kmem_free(dringp, sizeof (ldc_dring_t));
6186 		return (ENOMEM);
6187 	}
6188 
6189 	dringp->mhdl = mhandle;
6190 	dringp->base = NULL;
6191 
6192 	/* map the dring into local memory */
6193 	err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW,
6194 	    &(dringp->base), NULL);
6195 	if (err || dringp->base == NULL) {
6196 		cmn_err(CE_WARN,
6197 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
6198 		(void) ldc_mem_free_handle(mhandle);
6199 		kmem_free(dringp, sizeof (ldc_dring_t));
6200 		return (ENOMEM);
6201 	}
6202 
6203 	/* initialize the desc ring lock */
6204 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
6205 
6206 	/* Add descriptor ring to channel's imported dring list */
6207 	mutex_enter(&ldcp->imp_dlist_lock);
6208 	dringp->ch_next = ldcp->imp_dring_list;
6209 	ldcp->imp_dring_list = dringp;
6210 	mutex_exit(&ldcp->imp_dlist_lock);
6211 
6212 	dringp->status = LDC_MAPPED;
6213 
6214 	*dhandle = (ldc_dring_handle_t)dringp;
6215 
6216 	return (0);
6217 }
6218 
6219 /*
6220  * Unmap a descriptor ring. Free shadow memory (if any).
6221  */
6222 int
6223 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
6224 {
6225 	ldc_dring_t 	*dringp;
6226 	ldc_dring_t	*tmp_dringp;
6227 	ldc_chan_t	*ldcp;
6228 
6229 	if (dhandle == NULL) {
6230 		DWARN(DBG_ALL_LDCS,
6231 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
6232 		return (EINVAL);
6233 	}
6234 	dringp = (ldc_dring_t *)dhandle;
6235 
6236 	if (dringp->status != LDC_MAPPED) {
6237 		DWARN(DBG_ALL_LDCS,
6238 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
6239 		return (EINVAL);
6240 	}
6241 
6242 	mutex_enter(&dringp->lock);
6243 
6244 	ldcp = dringp->ldcp;
6245 
6246 	mutex_enter(&ldcp->imp_dlist_lock);
6247 
6248 	/* find and unlink the desc ring from channel import list */
6249 	tmp_dringp = ldcp->imp_dring_list;
6250 	if (tmp_dringp == dringp) {
6251 		ldcp->imp_dring_list = dringp->ch_next;
6252 		dringp->ch_next = NULL;
6253 
6254 	} else {
6255 		while (tmp_dringp != NULL) {
6256 			if (tmp_dringp->ch_next == dringp) {
6257 				tmp_dringp->ch_next = dringp->ch_next;
6258 				dringp->ch_next = NULL;
6259 				break;
6260 			}
6261 			tmp_dringp = tmp_dringp->ch_next;
6262 		}
6263 		if (tmp_dringp == NULL) {
6264 			DWARN(DBG_ALL_LDCS,
6265 			    "ldc_mem_dring_unmap: invalid descriptor\n");
6266 			mutex_exit(&ldcp->imp_dlist_lock);
6267 			mutex_exit(&dringp->lock);
6268 			return (EINVAL);
6269 		}
6270 	}
6271 
6272 	mutex_exit(&ldcp->imp_dlist_lock);
6273 
6274 	/* do a LDC memory handle unmap and free */
6275 	(void) ldc_mem_unmap(dringp->mhdl);
6276 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6277 
6278 	dringp->status = 0;
6279 	dringp->ldcp = NULL;
6280 
6281 	mutex_exit(&dringp->lock);
6282 
6283 	/* destroy dring lock */
6284 	mutex_destroy(&dringp->lock);
6285 
6286 	/* free desc ring object */
6287 	kmem_free(dringp, sizeof (ldc_dring_t));
6288 
6289 	return (0);
6290 }
6291 
6292 /*
6293  * Internal entry point for descriptor ring access entry consistency
6294  * semantics. Acquire copies the contents of the remote descriptor ring
6295  * into the local shadow copy. The release operation copies the local
6296  * contents into the remote dring. The start and end locations specify
6297  * bounds for the entries being synchronized.
6298  */
6299 static int
6300 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
6301     uint8_t direction, uint64_t start, uint64_t end)
6302 {
6303 	int 			err;
6304 	ldc_dring_t		*dringp;
6305 	ldc_chan_t		*ldcp;
6306 	uint64_t		soff;
6307 	size_t			copy_size;
6308 
6309 	if (dhandle == NULL) {
6310 		DWARN(DBG_ALL_LDCS,
6311 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
6312 		return (EINVAL);
6313 	}
6314 	dringp = (ldc_dring_t *)dhandle;
6315 	mutex_enter(&dringp->lock);
6316 
6317 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
6318 		DWARN(DBG_ALL_LDCS,
6319 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
6320 		mutex_exit(&dringp->lock);
6321 		return (EINVAL);
6322 	}
6323 
6324 	if (start >= dringp->length || end >= dringp->length) {
6325 		DWARN(DBG_ALL_LDCS,
6326 		    "i_ldc_dring_acquire_release: index out of range\n");
6327 		mutex_exit(&dringp->lock);
6328 		return (EINVAL);
6329 	}
6330 
6331 	/* get the channel handle */
6332 	ldcp = dringp->ldcp;
6333 
6334 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
6335 	    ((dringp->length - start) * dringp->dsize);
6336 
6337 	/* Calculate the relative offset for the first desc */
6338 	soff = (start * dringp->dsize);
6339 
6340 	/* copy to/from remote from/to local memory */
6341 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
6342 	    soff, copy_size);
6343 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6344 	    direction, soff, copy_size);
6345 	if (err) {
6346 		DWARN(ldcp->id,
6347 		    "i_ldc_dring_acquire_release: copy failed\n");
6348 		mutex_exit(&dringp->lock);
6349 		return (err);
6350 	}
6351 
6352 	/* do the balance */
6353 	if (start > end) {
6354 		copy_size = ((end + 1) * dringp->dsize);
6355 		soff = 0;
6356 
6357 		/* copy to/from remote from/to local memory */
6358 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
6359 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
6360 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6361 		    direction, soff, copy_size);
6362 		if (err) {
6363 			DWARN(ldcp->id,
6364 			    "i_ldc_dring_acquire_release: copy failed\n");
6365 			mutex_exit(&dringp->lock);
6366 			return (err);
6367 		}
6368 	}
6369 
6370 	mutex_exit(&dringp->lock);
6371 
6372 	return (0);
6373 }
6374 
6375 /*
6376  * Ensure that the contents in the local dring are consistent
6377  * with the contents if of remote dring
6378  */
6379 int
6380 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6381 {
6382 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
6383 }
6384 
6385 /*
6386  * Ensure that the contents in the remote dring are consistent
6387  * with the contents if of local dring
6388  */
6389 int
6390 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6391 {
6392 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
6393 }
6394 
6395 
6396 /* ------------------------------------------------------------------------- */
6397