xref: /titanic_51/usr/src/uts/sun4v/io/ldc.c (revision 342440ec94087b8c751c580ab9ed6c693d31d418)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/cred.h>
45 #include <sys/promif.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/cyclic.h>
49 #include <sys/machsystm.h>
50 #include <sys/vm.h>
51 #include <sys/cpu.h>
52 #include <sys/intreg.h>
53 #include <sys/machcpuvar.h>
54 #include <sys/mmu.h>
55 #include <sys/pte.h>
56 #include <vm/hat.h>
57 #include <vm/as.h>
58 #include <vm/hat_sfmmu.h>
59 #include <sys/vm_machparam.h>
60 #include <vm/seg_kmem.h>
61 #include <vm/seg_kpm.h>
62 #include <sys/note.h>
63 #include <sys/ivintr.h>
64 #include <sys/hypervisor_api.h>
65 #include <sys/ldc.h>
66 #include <sys/ldc_impl.h>
67 #include <sys/cnex.h>
68 #include <sys/hsvc.h>
69 #include <sys/sdt.h>
70 
71 /* Core internal functions */
72 int i_ldc_h2v_error(int h_error);
73 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
74 
75 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
76 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
77 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
78 static void i_ldc_reset_state(ldc_chan_t *ldcp);
79 
80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
85     uint8_t ctrlmsg);
86 
87 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
92     uint64_t *tail, uint64_t *link_state);
93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
94     uint64_t rx_tail);
95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
96 
97 /* Interrupt handling functions */
98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
101     uint64_t *notify_event);
102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
103 
104 /* Read method functions */
105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
109 	size_t *sizep);
110 
111 /* Write method functions */
112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
117 	size_t *sizep);
118 
119 /* Pkt processing internal functions */
120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
127 
128 /* LDC Version */
129 static ldc_ver_t ldc_versions[] = { {1, 0} };
130 
131 /* number of supported versions */
132 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
133 
134 /* Invalid value for the ldc_chan_t rx_ack_head field */
135 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
136 
137 
138 /* Module State Pointer */
139 ldc_soft_state_t *ldcssp;
140 
141 static struct modldrv md = {
142 	&mod_miscops,			/* This is a misc module */
143 	"sun4v LDC module v%I%",	/* Name of the module */
144 };
145 
146 static struct modlinkage ml = {
147 	MODREV_1,
148 	&md,
149 	NULL
150 };
151 
152 static uint64_t ldc_sup_minor;		/* Supported minor number */
153 static hsvc_info_t ldc_hsvc = {
154 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 1, "ldc"
155 };
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * The length of the reliable-mode data queue in terms of the LDC
173  * receive queue length. i.e., the number of times larger than the
174  * LDC receive queue that the data queue should be. The HV receive
175  * queue is required to be a power of 2 and this implementation
176  * assumes the data queue will also be a power of 2. By making the
177  * multiplier a power of 2, we ensure the data queue will be a
178  * power of 2. We use a multiplier because the receive queue is
179  * sized to be sane relative to the MTU and the same is needed for
180  * the data queue.
181  */
182 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
183 
184 /*
185  * LDC retry count and delay - when the HV returns EWOULDBLOCK
186  * the operation is retried 'ldc_max_retries' times with a
187  * wait of 'ldc_delay' usecs between each retry.
188  */
189 int ldc_max_retries = LDC_MAX_RETRIES;
190 clock_t ldc_delay = LDC_DELAY;
191 
192 /*
193  * delay between each retry of channel unregistration in
194  * ldc_close(), to wait for pending interrupts to complete.
195  */
196 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
197 
198 #ifdef DEBUG
199 
200 /*
201  * Print debug messages
202  *
203  * set ldcdbg to 0x7 for enabling all msgs
204  * 0x4 - Warnings
205  * 0x2 - All debug messages
206  * 0x1 - Minimal debug messages
207  *
208  * set ldcdbgchan to the channel number you want to debug
209  * setting it to -1 prints debug messages for all channels
210  * NOTE: ldcdbgchan has no effect on error messages
211  */
212 
213 int ldcdbg = 0x0;
214 int64_t ldcdbgchan = DBG_ALL_LDCS;
215 uint64_t ldc_inject_err_flag = 0;
216 
217 void
218 ldcdebug(int64_t id, const char *fmt, ...)
219 {
220 	char buf[512];
221 	va_list ap;
222 
223 	/*
224 	 * Do not return if,
225 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
226 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
227 	 * debug channel = caller specified channel
228 	 */
229 	if ((id != DBG_ALL_LDCS) &&
230 	    (ldcdbgchan != DBG_ALL_LDCS) &&
231 	    (ldcdbgchan != id)) {
232 		return;
233 	}
234 
235 	va_start(ap, fmt);
236 	(void) vsprintf(buf, fmt, ap);
237 	va_end(ap);
238 
239 	cmn_err(CE_CONT, "?%s", buf);
240 }
241 
242 #define	LDC_ERR_RESET		0x1
243 #define	LDC_ERR_PKTLOSS		0x2
244 #define	LDC_ERR_DQFULL		0x4
245 #define	LDC_ERR_DRNGCLEAR	0x8
246 
247 static boolean_t
248 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
249 {
250 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
251 		return (B_FALSE);
252 
253 	if ((ldc_inject_err_flag & error) == 0)
254 		return (B_FALSE);
255 
256 	/* clear the injection state */
257 	ldc_inject_err_flag &= ~error;
258 
259 	return (B_TRUE);
260 }
261 
262 #define	D1		\
263 if (ldcdbg & 0x01)	\
264 	ldcdebug
265 
266 #define	D2		\
267 if (ldcdbg & 0x02)	\
268 	ldcdebug
269 
270 #define	DWARN		\
271 if (ldcdbg & 0x04)	\
272 	ldcdebug
273 
274 #define	DUMP_PAYLOAD(id, addr)						\
275 {									\
276 	char buf[65*3];							\
277 	int i;								\
278 	uint8_t *src = (uint8_t *)addr;					\
279 	for (i = 0; i < 64; i++, src++)					\
280 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
281 	(void) sprintf(&buf[i * 3], "|\n");				\
282 	D2((id), "payload: %s", buf);					\
283 }
284 
285 #define	DUMP_LDC_PKT(c, s, addr)					\
286 {									\
287 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
288 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
289 	if (msg->type == LDC_DATA) {                                    \
290 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
291 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
292 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
293 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
294 	    (msg->env & LDC_LEN_MASK));					\
295 	} else { 							\
296 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
297 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
298 	} 								\
299 }
300 
301 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
302 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
303 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
304 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
305 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
306 
307 #else
308 
309 #define	DBG_ALL_LDCS -1
310 
311 #define	D1
312 #define	D2
313 #define	DWARN
314 
315 #define	DUMP_PAYLOAD(id, addr)
316 #define	DUMP_LDC_PKT(c, s, addr)
317 
318 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
319 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
320 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
321 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
322 
323 #endif
324 
325 /*
326  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
327  * lengths. Just pass the head, tail, and entries values so that the
328  * length can be calculated in a dtrace script when the probe is enabled.
329  */
330 #define	TRACE_RXDQ_LENGTH(ldcp)						\
331 	DTRACE_PROBE4(rxdq__size,					\
332 	uint64_t, ldcp->id,						\
333 	uint64_t, ldcp->rx_dq_head,					\
334 	uint64_t, ldcp->rx_dq_tail,					\
335 	uint64_t, ldcp->rx_dq_entries)
336 
337 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
338 	DTRACE_PROBE4(rxhvq__size,					\
339 	uint64_t, ldcp->id,						\
340 	uint64_t, head,							\
341 	uint64_t, tail,							\
342 	uint64_t, ldcp->rx_q_entries)
343 
344 /* A dtrace SDT probe to ease tracing of data queue copy operations */
345 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
346 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
347 
348 /* The amount of contiguous space at the tail of the queue */
349 #define	Q_CONTIG_SPACE(head, tail, size)				\
350 	((head) <= (tail) ? ((size) - (tail)) :				\
351 	((head) - (tail) - LDC_PACKET_SIZE))
352 
353 #define	ZERO_PKT(p)			\
354 	bzero((p), sizeof (ldc_msg_t));
355 
356 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
357 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
358 
359 int
360 _init(void)
361 {
362 	int status;
363 	extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
364 
365 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
366 	if (status != 0) {
367 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
368 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
369 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
370 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
371 		return (-1);
372 	}
373 
374 	/* Initialize shared memory HV API version checking */
375 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
376 
377 	/* allocate soft state structure */
378 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
379 
380 	/* Link the module into the system */
381 	status = mod_install(&ml);
382 	if (status != 0) {
383 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
384 		return (status);
385 	}
386 
387 	/* Initialize the LDC state structure */
388 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
389 
390 	mutex_enter(&ldcssp->lock);
391 
392 	/* Create a cache for memory handles */
393 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
394 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
395 	if (ldcssp->memhdl_cache == NULL) {
396 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
397 		mutex_exit(&ldcssp->lock);
398 		return (-1);
399 	}
400 
401 	/* Create cache for memory segment structures */
402 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
403 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
404 	if (ldcssp->memseg_cache == NULL) {
405 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
406 		mutex_exit(&ldcssp->lock);
407 		return (-1);
408 	}
409 
410 
411 	ldcssp->channel_count = 0;
412 	ldcssp->channels_open = 0;
413 	ldcssp->chan_list = NULL;
414 	ldcssp->dring_list = NULL;
415 
416 	mutex_exit(&ldcssp->lock);
417 
418 	return (0);
419 }
420 
421 int
422 _info(struct modinfo *modinfop)
423 {
424 	/* Report status of the dynamically loadable driver module */
425 	return (mod_info(&ml, modinfop));
426 }
427 
428 int
429 _fini(void)
430 {
431 	int 		rv, status;
432 	ldc_chan_t 	*tmp_ldcp, *ldcp;
433 	ldc_dring_t 	*tmp_dringp, *dringp;
434 	ldc_mem_info_t 	minfo;
435 
436 	/* Unlink the driver module from the system */
437 	status = mod_remove(&ml);
438 	if (status) {
439 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
440 		return (EIO);
441 	}
442 
443 	/* Free descriptor rings */
444 	dringp = ldcssp->dring_list;
445 	while (dringp != NULL) {
446 		tmp_dringp = dringp->next;
447 
448 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
449 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
450 			if (minfo.status == LDC_BOUND) {
451 				(void) ldc_mem_dring_unbind(
452 				    (ldc_dring_handle_t)dringp);
453 			}
454 			if (minfo.status == LDC_MAPPED) {
455 				(void) ldc_mem_dring_unmap(
456 				    (ldc_dring_handle_t)dringp);
457 			}
458 		}
459 
460 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
461 		dringp = tmp_dringp;
462 	}
463 	ldcssp->dring_list = NULL;
464 
465 	/* close and finalize channels */
466 	ldcp = ldcssp->chan_list;
467 	while (ldcp != NULL) {
468 		tmp_ldcp = ldcp->next;
469 
470 		(void) ldc_close((ldc_handle_t)ldcp);
471 		(void) ldc_fini((ldc_handle_t)ldcp);
472 
473 		ldcp = tmp_ldcp;
474 	}
475 	ldcssp->chan_list = NULL;
476 
477 	/* Destroy kmem caches */
478 	kmem_cache_destroy(ldcssp->memhdl_cache);
479 	kmem_cache_destroy(ldcssp->memseg_cache);
480 
481 	/*
482 	 * We have successfully "removed" the driver.
483 	 * Destroying soft states
484 	 */
485 	mutex_destroy(&ldcssp->lock);
486 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
487 
488 	(void) hsvc_unregister(&ldc_hsvc);
489 
490 	return (status);
491 }
492 
493 /* -------------------------------------------------------------------------- */
494 
495 /*
496  * LDC Link Layer Internal Functions
497  */
498 
499 /*
500  * Translate HV Errors to sun4v error codes
501  */
502 int
503 i_ldc_h2v_error(int h_error)
504 {
505 	switch (h_error) {
506 
507 	case	H_EOK:
508 		return (0);
509 
510 	case	H_ENORADDR:
511 		return (EFAULT);
512 
513 	case	H_EBADPGSZ:
514 	case	H_EINVAL:
515 		return (EINVAL);
516 
517 	case	H_EWOULDBLOCK:
518 		return (EWOULDBLOCK);
519 
520 	case	H_ENOACCESS:
521 	case	H_ENOMAP:
522 		return (EACCES);
523 
524 	case	H_EIO:
525 	case	H_ECPUERROR:
526 		return (EIO);
527 
528 	case	H_ENOTSUPPORTED:
529 		return (ENOTSUP);
530 
531 	case 	H_ETOOMANY:
532 		return (ENOSPC);
533 
534 	case	H_ECHANNEL:
535 		return (ECHRNG);
536 	default:
537 		break;
538 	}
539 
540 	return (EIO);
541 }
542 
543 /*
544  * Reconfigure the transmit queue
545  */
546 static int
547 i_ldc_txq_reconf(ldc_chan_t *ldcp)
548 {
549 	int rv;
550 
551 	ASSERT(MUTEX_HELD(&ldcp->lock));
552 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
553 
554 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
555 	if (rv) {
556 		cmn_err(CE_WARN,
557 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
558 		return (EIO);
559 	}
560 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
561 	    &(ldcp->tx_tail), &(ldcp->link_state));
562 	if (rv) {
563 		cmn_err(CE_WARN,
564 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
565 		return (EIO);
566 	}
567 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
568 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
569 	    ldcp->link_state);
570 
571 	return (0);
572 }
573 
574 /*
575  * Reconfigure the receive queue
576  */
577 static int
578 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
579 {
580 	int rv;
581 	uint64_t rx_head, rx_tail;
582 
583 	ASSERT(MUTEX_HELD(&ldcp->lock));
584 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
585 	    &(ldcp->link_state));
586 	if (rv) {
587 		cmn_err(CE_WARN,
588 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
589 		    ldcp->id);
590 		return (EIO);
591 	}
592 
593 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
594 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
595 		    ldcp->rx_q_entries);
596 		if (rv) {
597 			cmn_err(CE_WARN,
598 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
599 			    ldcp->id);
600 			return (EIO);
601 		}
602 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
603 		    ldcp->id);
604 	}
605 
606 	return (0);
607 }
608 
609 
610 /*
611  * Drain the contents of the receive queue
612  */
613 static int
614 i_ldc_rxq_drain(ldc_chan_t *ldcp)
615 {
616 	int rv;
617 	uint64_t rx_head, rx_tail;
618 
619 	ASSERT(MUTEX_HELD(&ldcp->lock));
620 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
621 	    &(ldcp->link_state));
622 	if (rv) {
623 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
624 		    ldcp->id);
625 		return (EIO);
626 	}
627 
628 	/* flush contents by setting the head = tail */
629 	return (i_ldc_set_rx_head(ldcp, rx_tail));
630 }
631 
632 
633 /*
634  * Reset LDC state structure and its contents
635  */
636 static void
637 i_ldc_reset_state(ldc_chan_t *ldcp)
638 {
639 	ASSERT(MUTEX_HELD(&ldcp->lock));
640 	ldcp->last_msg_snt = LDC_INIT_SEQID;
641 	ldcp->last_ack_rcd = 0;
642 	ldcp->last_msg_rcd = 0;
643 	ldcp->tx_ackd_head = ldcp->tx_head;
644 	ldcp->stream_remains = 0;
645 	ldcp->next_vidx = 0;
646 	ldcp->hstate = 0;
647 	ldcp->tstate = TS_OPEN;
648 	ldcp->status = LDC_OPEN;
649 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
650 	ldcp->rx_dq_head = 0;
651 	ldcp->rx_dq_tail = 0;
652 
653 	if (ldcp->link_state == LDC_CHANNEL_UP ||
654 	    ldcp->link_state == LDC_CHANNEL_RESET) {
655 
656 		if (ldcp->mode == LDC_MODE_RAW) {
657 			ldcp->status = LDC_UP;
658 			ldcp->tstate = TS_UP;
659 		} else {
660 			ldcp->status = LDC_READY;
661 			ldcp->tstate |= TS_LINK_READY;
662 		}
663 	}
664 }
665 
666 /*
667  * Reset a LDC channel
668  */
669 void
670 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
671 {
672 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
673 
674 	ASSERT(MUTEX_HELD(&ldcp->lock));
675 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
676 
677 	/* reconfig Tx and Rx queues */
678 	(void) i_ldc_txq_reconf(ldcp);
679 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
680 
681 	/* Clear Tx and Rx interrupts */
682 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
683 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
684 
685 	/* Reset channel state */
686 	i_ldc_reset_state(ldcp);
687 
688 	/* Mark channel in reset */
689 	ldcp->tstate |= TS_IN_RESET;
690 }
691 
692 
693 /*
694  * Clear pending interrupts
695  */
696 static void
697 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
698 {
699 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
700 
701 	ASSERT(MUTEX_HELD(&ldcp->lock));
702 	ASSERT(cinfo->dip != NULL);
703 
704 	switch (itype) {
705 	case CNEX_TX_INTR:
706 		/* check Tx interrupt */
707 		if (ldcp->tx_intr_state)
708 			ldcp->tx_intr_state = LDC_INTR_NONE;
709 		else
710 			return;
711 		break;
712 
713 	case CNEX_RX_INTR:
714 		/* check Rx interrupt */
715 		if (ldcp->rx_intr_state)
716 			ldcp->rx_intr_state = LDC_INTR_NONE;
717 		else
718 			return;
719 		break;
720 	}
721 
722 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
723 	D2(ldcp->id,
724 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
725 	    ldcp->id, itype);
726 }
727 
728 /*
729  * Set the receive queue head
730  * Resets connection and returns an error if it fails.
731  */
732 static int
733 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
734 {
735 	int 	rv;
736 	int 	retries;
737 
738 	ASSERT(MUTEX_HELD(&ldcp->lock));
739 	for (retries = 0; retries < ldc_max_retries; retries++) {
740 
741 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
742 			return (0);
743 
744 		if (rv != H_EWOULDBLOCK)
745 			break;
746 
747 		/* wait for ldc_delay usecs */
748 		drv_usecwait(ldc_delay);
749 	}
750 
751 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
752 	    ldcp->id, head);
753 	mutex_enter(&ldcp->tx_lock);
754 	i_ldc_reset(ldcp, B_TRUE);
755 	mutex_exit(&ldcp->tx_lock);
756 
757 	return (ECONNRESET);
758 }
759 
760 /*
761  * Returns the tx_head to be used for transfer
762  */
763 static void
764 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
765 {
766 	ldc_msg_t 	*pkt;
767 
768 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
769 
770 	/* get current Tx head */
771 	*head = ldcp->tx_head;
772 
773 	/*
774 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
775 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
776 	 * up to the current location of tx_head. This needs to be done
777 	 * as the peer will only ACK DATA/INFO pkts.
778 	 */
779 	if (ldcp->mode == LDC_MODE_RELIABLE) {
780 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
781 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
782 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
783 				break;
784 			}
785 			/* advance ACKd head */
786 			ldcp->tx_ackd_head =
787 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
788 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
789 		}
790 		*head = ldcp->tx_ackd_head;
791 	}
792 }
793 
794 /*
795  * Returns the tx_tail to be used for transfer
796  * Re-reads the TX queue ptrs if and only if the
797  * the cached head and tail are equal (queue is full)
798  */
799 static int
800 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
801 {
802 	int 		rv;
803 	uint64_t 	current_head, new_tail;
804 
805 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
806 	/* Read the head and tail ptrs from HV */
807 	rv = hv_ldc_tx_get_state(ldcp->id,
808 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
809 	if (rv) {
810 		cmn_err(CE_WARN,
811 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
812 		    ldcp->id);
813 		return (EIO);
814 	}
815 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
816 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
817 		    ldcp->id);
818 		return (ECONNRESET);
819 	}
820 
821 	i_ldc_get_tx_head(ldcp, &current_head);
822 
823 	/* increment the tail */
824 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
825 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
826 
827 	if (new_tail == current_head) {
828 		DWARN(ldcp->id,
829 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
830 		    ldcp->id);
831 		return (EWOULDBLOCK);
832 	}
833 
834 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
835 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
836 
837 	*tail = ldcp->tx_tail;
838 	return (0);
839 }
840 
841 /*
842  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
843  * and retry ldc_max_retries times before returning an error.
844  * Returns 0, EWOULDBLOCK or EIO
845  */
846 static int
847 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
848 {
849 	int		rv, retval = EWOULDBLOCK;
850 	int 		retries;
851 
852 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
853 	for (retries = 0; retries < ldc_max_retries; retries++) {
854 
855 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
856 			retval = 0;
857 			break;
858 		}
859 		if (rv != H_EWOULDBLOCK) {
860 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
861 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
862 			retval = EIO;
863 			break;
864 		}
865 
866 		/* wait for ldc_delay usecs */
867 		drv_usecwait(ldc_delay);
868 	}
869 	return (retval);
870 }
871 
872 /*
873  * Copy a data packet from the HV receive queue to the data queue.
874  * Caller must ensure that the data queue is not already full.
875  *
876  * The *head argument represents the current head pointer for the HV
877  * receive queue. After copying a packet from the HV receive queue,
878  * the *head pointer will be updated. This allows the caller to update
879  * the head pointer in HV using the returned *head value.
880  */
881 void
882 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
883 {
884 	uint64_t	q_size, dq_size;
885 
886 	ASSERT(MUTEX_HELD(&ldcp->lock));
887 
888 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
889 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
890 
891 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
892 	    dq_size) >= LDC_PACKET_SIZE);
893 
894 	bcopy((void *)(ldcp->rx_q_va + *head),
895 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
896 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
897 
898 	/* Update rx head */
899 	*head = (*head + LDC_PACKET_SIZE) % q_size;
900 
901 	/* Update dq tail */
902 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
903 }
904 
905 /*
906  * Update the Rx data queue head pointer
907  */
908 static int
909 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
910 {
911 	ldcp->rx_dq_head = head;
912 	return (0);
913 }
914 
915 /*
916  * Get the Rx data queue head and tail pointers
917  */
918 static uint64_t
919 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
920     uint64_t *link_state)
921 {
922 	_NOTE(ARGUNUSED(link_state))
923 	*head = ldcp->rx_dq_head;
924 	*tail = ldcp->rx_dq_tail;
925 	return (0);
926 }
927 
928 /*
929  * Wrapper for the Rx HV queue set head function. Giving the
930  * data queue and HV queue set head functions the same type.
931  */
932 static uint64_t
933 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
934     uint64_t *link_state)
935 {
936 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
937 	    link_state)));
938 }
939 
940 /*
941  * LDC receive interrupt handler
942  *    triggered for channel with data pending to read
943  *    i.e. Rx queue content changes
944  */
945 static uint_t
946 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
947 {
948 	_NOTE(ARGUNUSED(arg2))
949 
950 	ldc_chan_t	*ldcp;
951 	boolean_t	notify;
952 	uint64_t	event;
953 	int		rv, status;
954 
955 	/* Get the channel for which interrupt was received */
956 	if (arg1 == NULL) {
957 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
958 		return (DDI_INTR_UNCLAIMED);
959 	}
960 
961 	ldcp = (ldc_chan_t *)arg1;
962 
963 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
964 	    ldcp->id, ldcp);
965 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
966 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
967 	    ldcp->link_state);
968 
969 	/* Lock channel */
970 	mutex_enter(&ldcp->lock);
971 
972 	/* Mark the interrupt as being actively handled */
973 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
974 
975 	status = i_ldc_rx_process_hvq(ldcp, &notify, &event);
976 
977 	if (ldcp->mode != LDC_MODE_RELIABLE) {
978 		/*
979 		 * If there are no data packets on the queue, clear
980 		 * the interrupt. Otherwise, the ldc_read will clear
981 		 * interrupts after draining the queue. To indicate the
982 		 * interrupt has not yet been cleared, it is marked
983 		 * as pending.
984 		 */
985 		if ((event & LDC_EVT_READ) == 0) {
986 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
987 		} else {
988 			ldcp->rx_intr_state = LDC_INTR_PEND;
989 		}
990 	}
991 
992 	/* if callbacks are disabled, do not notify */
993 	if (notify && ldcp->cb_enabled) {
994 		ldcp->cb_inprogress = B_TRUE;
995 		mutex_exit(&ldcp->lock);
996 		rv = ldcp->cb(event, ldcp->cb_arg);
997 		if (rv) {
998 			DWARN(ldcp->id,
999 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1000 			    ldcp->id);
1001 		}
1002 		mutex_enter(&ldcp->lock);
1003 		ldcp->cb_inprogress = B_FALSE;
1004 	}
1005 
1006 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1007 		if (status == ENOSPC) {
1008 			/*
1009 			 * Here, ENOSPC indicates the secondary data
1010 			 * queue is full and the Rx queue is non-empty.
1011 			 * Much like how reliable and raw modes are
1012 			 * handled above, since the Rx queue is non-
1013 			 * empty, we mark the interrupt as pending to
1014 			 * indicate it has not yet been cleared.
1015 			 */
1016 			ldcp->rx_intr_state = LDC_INTR_PEND;
1017 		} else {
1018 			/*
1019 			 * We have processed all CTRL packets and
1020 			 * copied all DATA packets to the secondary
1021 			 * queue. Clear the interrupt.
1022 			 */
1023 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1024 		}
1025 	}
1026 
1027 	mutex_exit(&ldcp->lock);
1028 
1029 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1030 
1031 	return (DDI_INTR_CLAIMED);
1032 }
1033 
1034 /*
1035  * Wrapper for the Rx HV queue processing function to be used when
1036  * checking the Rx HV queue for data packets. Unlike the interrupt
1037  * handler code flow, the Rx interrupt is not cleared here and
1038  * callbacks are not made.
1039  */
1040 static uint_t
1041 i_ldc_chkq(ldc_chan_t *ldcp)
1042 {
1043 	boolean_t	notify;
1044 	uint64_t	event;
1045 
1046 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1047 }
1048 
1049 /*
1050  * Send a LDC message
1051  */
1052 static int
1053 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1054     uint8_t ctrlmsg)
1055 {
1056 	int		rv;
1057 	ldc_msg_t 	*pkt;
1058 	uint64_t	tx_tail;
1059 	uint32_t	curr_seqid;
1060 
1061 	/* Obtain Tx lock */
1062 	mutex_enter(&ldcp->tx_lock);
1063 
1064 	curr_seqid = ldcp->last_msg_snt;
1065 
1066 	/* get the current tail for the message */
1067 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1068 	if (rv) {
1069 		DWARN(ldcp->id,
1070 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1071 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1072 		    ldcp->id, pkttype, subtype, ctrlmsg);
1073 		mutex_exit(&ldcp->tx_lock);
1074 		return (rv);
1075 	}
1076 
1077 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1078 	ZERO_PKT(pkt);
1079 
1080 	/* Initialize the packet */
1081 	pkt->type = pkttype;
1082 	pkt->stype = subtype;
1083 	pkt->ctrl = ctrlmsg;
1084 
1085 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1086 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1087 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1088 		curr_seqid++;
1089 		if (ldcp->mode != LDC_MODE_RAW) {
1090 			pkt->seqid = curr_seqid;
1091 			pkt->ackid = ldcp->last_msg_rcd;
1092 		}
1093 	}
1094 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1095 
1096 	/* initiate the send by calling into HV and set the new tail */
1097 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1098 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1099 
1100 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1101 	if (rv) {
1102 		DWARN(ldcp->id,
1103 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1104 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1105 		    ldcp->id, pkttype, subtype, ctrlmsg);
1106 		mutex_exit(&ldcp->tx_lock);
1107 		return (EIO);
1108 	}
1109 
1110 	ldcp->last_msg_snt = curr_seqid;
1111 	ldcp->tx_tail = tx_tail;
1112 
1113 	mutex_exit(&ldcp->tx_lock);
1114 	return (0);
1115 }
1116 
1117 /*
1118  * Checks if packet was received in right order
1119  * in the case of a reliable link.
1120  * Returns 0 if in order, else EIO
1121  */
1122 static int
1123 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1124 {
1125 	/* No seqid checking for RAW mode */
1126 	if (ldcp->mode == LDC_MODE_RAW)
1127 		return (0);
1128 
1129 	/* No seqid checking for version, RTS, RTR message */
1130 	if (msg->ctrl == LDC_VER ||
1131 	    msg->ctrl == LDC_RTS ||
1132 	    msg->ctrl == LDC_RTR)
1133 		return (0);
1134 
1135 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1136 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1137 		DWARN(ldcp->id,
1138 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1139 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1140 		    (ldcp->last_msg_rcd + 1));
1141 		return (EIO);
1142 	}
1143 
1144 #ifdef DEBUG
1145 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1146 		DWARN(ldcp->id,
1147 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1148 		return (EIO);
1149 	}
1150 #endif
1151 
1152 	return (0);
1153 }
1154 
1155 
1156 /*
1157  * Process an incoming version ctrl message
1158  */
1159 static int
1160 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1161 {
1162 	int 		rv = 0, idx = ldcp->next_vidx;
1163 	ldc_msg_t 	*pkt;
1164 	uint64_t	tx_tail;
1165 	ldc_ver_t	*rcvd_ver;
1166 
1167 	/* get the received version */
1168 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1169 
1170 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1171 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1172 
1173 	/* Obtain Tx lock */
1174 	mutex_enter(&ldcp->tx_lock);
1175 
1176 	switch (msg->stype) {
1177 	case LDC_INFO:
1178 
1179 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1180 			(void) i_ldc_txq_reconf(ldcp);
1181 			i_ldc_reset_state(ldcp);
1182 			mutex_exit(&ldcp->tx_lock);
1183 			return (EAGAIN);
1184 		}
1185 
1186 		/* get the current tail and pkt for the response */
1187 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1188 		if (rv != 0) {
1189 			DWARN(ldcp->id,
1190 			    "i_ldc_process_VER: (0x%llx) err sending "
1191 			    "version ACK/NACK\n", ldcp->id);
1192 			i_ldc_reset(ldcp, B_TRUE);
1193 			mutex_exit(&ldcp->tx_lock);
1194 			return (ECONNRESET);
1195 		}
1196 
1197 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1198 		ZERO_PKT(pkt);
1199 
1200 		/* initialize the packet */
1201 		pkt->type = LDC_CTRL;
1202 		pkt->ctrl = LDC_VER;
1203 
1204 		for (;;) {
1205 
1206 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1207 			    rcvd_ver->major, rcvd_ver->minor,
1208 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1209 
1210 			if (rcvd_ver->major == ldc_versions[idx].major) {
1211 				/* major version match - ACK version */
1212 				pkt->stype = LDC_ACK;
1213 
1214 				/*
1215 				 * lower minor version to the one this endpt
1216 				 * supports, if necessary
1217 				 */
1218 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1219 					rcvd_ver->minor =
1220 					    ldc_versions[idx].minor;
1221 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1222 
1223 				break;
1224 			}
1225 
1226 			if (rcvd_ver->major > ldc_versions[idx].major) {
1227 
1228 				D1(ldcp->id, "i_ldc_process_VER: using next"
1229 				    " lower idx=%d, v%u.%u\n", idx,
1230 				    ldc_versions[idx].major,
1231 				    ldc_versions[idx].minor);
1232 
1233 				/* nack with next lower version */
1234 				pkt->stype = LDC_NACK;
1235 				bcopy(&ldc_versions[idx], pkt->udata,
1236 				    sizeof (ldc_versions[idx]));
1237 				ldcp->next_vidx = idx;
1238 				break;
1239 			}
1240 
1241 			/* next major version */
1242 			idx++;
1243 
1244 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1245 
1246 			if (idx == LDC_NUM_VERS) {
1247 				/* no version match - send NACK */
1248 				pkt->stype = LDC_NACK;
1249 				bzero(pkt->udata, sizeof (ldc_ver_t));
1250 				ldcp->next_vidx = 0;
1251 				break;
1252 			}
1253 		}
1254 
1255 		/* initiate the send by calling into HV and set the new tail */
1256 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1257 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1258 
1259 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1260 		if (rv == 0) {
1261 			ldcp->tx_tail = tx_tail;
1262 			if (pkt->stype == LDC_ACK) {
1263 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1264 				    " version ACK\n", ldcp->id);
1265 				/* Save the ACK'd version */
1266 				ldcp->version.major = rcvd_ver->major;
1267 				ldcp->version.minor = rcvd_ver->minor;
1268 				ldcp->hstate |= TS_RCVD_VER;
1269 				ldcp->tstate |= TS_VER_DONE;
1270 				D1(DBG_ALL_LDCS,
1271 				    "(0x%llx) Sent ACK, "
1272 				    "Agreed on version v%u.%u\n",
1273 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1274 			}
1275 		} else {
1276 			DWARN(ldcp->id,
1277 			    "i_ldc_process_VER: (0x%llx) error sending "
1278 			    "ACK/NACK\n", ldcp->id);
1279 			i_ldc_reset(ldcp, B_TRUE);
1280 			mutex_exit(&ldcp->tx_lock);
1281 			return (ECONNRESET);
1282 		}
1283 
1284 		break;
1285 
1286 	case LDC_ACK:
1287 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1288 			if (ldcp->version.major != rcvd_ver->major ||
1289 			    ldcp->version.minor != rcvd_ver->minor) {
1290 
1291 				/* mismatched version - reset connection */
1292 				DWARN(ldcp->id,
1293 				    "i_ldc_process_VER: (0x%llx) recvd"
1294 				    " ACK ver != sent ACK ver\n", ldcp->id);
1295 				i_ldc_reset(ldcp, B_TRUE);
1296 				mutex_exit(&ldcp->tx_lock);
1297 				return (ECONNRESET);
1298 			}
1299 		} else {
1300 			/* SUCCESS - we have agreed on a version */
1301 			ldcp->version.major = rcvd_ver->major;
1302 			ldcp->version.minor = rcvd_ver->minor;
1303 			ldcp->tstate |= TS_VER_DONE;
1304 		}
1305 
1306 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1307 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1308 
1309 		/* initiate RTS-RTR-RDX handshake */
1310 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1311 		if (rv) {
1312 			DWARN(ldcp->id,
1313 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1314 			    ldcp->id);
1315 			i_ldc_reset(ldcp, B_TRUE);
1316 			mutex_exit(&ldcp->tx_lock);
1317 			return (ECONNRESET);
1318 		}
1319 
1320 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1321 		ZERO_PKT(pkt);
1322 
1323 		pkt->type = LDC_CTRL;
1324 		pkt->stype = LDC_INFO;
1325 		pkt->ctrl = LDC_RTS;
1326 		pkt->env = ldcp->mode;
1327 		if (ldcp->mode != LDC_MODE_RAW)
1328 			pkt->seqid = LDC_INIT_SEQID;
1329 
1330 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1331 
1332 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1333 
1334 		/* initiate the send by calling into HV and set the new tail */
1335 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1336 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1337 
1338 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1339 		if (rv) {
1340 			D2(ldcp->id,
1341 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1342 			    ldcp->id);
1343 			i_ldc_reset(ldcp, B_TRUE);
1344 			mutex_exit(&ldcp->tx_lock);
1345 			return (ECONNRESET);
1346 		}
1347 
1348 		ldcp->tx_tail = tx_tail;
1349 		ldcp->hstate |= TS_SENT_RTS;
1350 
1351 		break;
1352 
1353 	case LDC_NACK:
1354 		/* check if version in NACK is zero */
1355 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1356 			/* version handshake failure */
1357 			DWARN(DBG_ALL_LDCS,
1358 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1359 			    ldcp->id);
1360 			i_ldc_reset(ldcp, B_TRUE);
1361 			mutex_exit(&ldcp->tx_lock);
1362 			return (ECONNRESET);
1363 		}
1364 
1365 		/* get the current tail and pkt for the response */
1366 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1367 		if (rv != 0) {
1368 			cmn_err(CE_NOTE,
1369 			    "i_ldc_process_VER: (0x%lx) err sending "
1370 			    "version ACK/NACK\n", ldcp->id);
1371 			i_ldc_reset(ldcp, B_TRUE);
1372 			mutex_exit(&ldcp->tx_lock);
1373 			return (ECONNRESET);
1374 		}
1375 
1376 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1377 		ZERO_PKT(pkt);
1378 
1379 		/* initialize the packet */
1380 		pkt->type = LDC_CTRL;
1381 		pkt->ctrl = LDC_VER;
1382 		pkt->stype = LDC_INFO;
1383 
1384 		/* check ver in NACK msg has a match */
1385 		for (;;) {
1386 			if (rcvd_ver->major == ldc_versions[idx].major) {
1387 				/*
1388 				 * major version match - resubmit request
1389 				 * if lower minor version to the one this endpt
1390 				 * supports, if necessary
1391 				 */
1392 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1393 					rcvd_ver->minor =
1394 					    ldc_versions[idx].minor;
1395 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1396 				break;
1397 			}
1398 
1399 			if (rcvd_ver->major > ldc_versions[idx].major) {
1400 
1401 				D1(ldcp->id, "i_ldc_process_VER: using next"
1402 				    " lower idx=%d, v%u.%u\n", idx,
1403 				    ldc_versions[idx].major,
1404 				    ldc_versions[idx].minor);
1405 
1406 				/* send next lower version */
1407 				bcopy(&ldc_versions[idx], pkt->udata,
1408 				    sizeof (ldc_versions[idx]));
1409 				ldcp->next_vidx = idx;
1410 				break;
1411 			}
1412 
1413 			/* next version */
1414 			idx++;
1415 
1416 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1417 
1418 			if (idx == LDC_NUM_VERS) {
1419 				/* no version match - terminate */
1420 				ldcp->next_vidx = 0;
1421 				mutex_exit(&ldcp->tx_lock);
1422 				return (ECONNRESET);
1423 			}
1424 		}
1425 
1426 		/* initiate the send by calling into HV and set the new tail */
1427 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1428 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1429 
1430 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1431 		if (rv == 0) {
1432 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1433 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1434 			    ldc_versions[idx].minor);
1435 			ldcp->tx_tail = tx_tail;
1436 		} else {
1437 			cmn_err(CE_NOTE,
1438 			    "i_ldc_process_VER: (0x%lx) error sending version"
1439 			    "INFO\n", ldcp->id);
1440 			i_ldc_reset(ldcp, B_TRUE);
1441 			mutex_exit(&ldcp->tx_lock);
1442 			return (ECONNRESET);
1443 		}
1444 
1445 		break;
1446 	}
1447 
1448 	mutex_exit(&ldcp->tx_lock);
1449 	return (rv);
1450 }
1451 
1452 
1453 /*
1454  * Process an incoming RTS ctrl message
1455  */
1456 static int
1457 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1458 {
1459 	int 		rv = 0;
1460 	ldc_msg_t 	*pkt;
1461 	uint64_t	tx_tail;
1462 	boolean_t	sent_NACK = B_FALSE;
1463 
1464 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1465 
1466 	switch (msg->stype) {
1467 	case LDC_NACK:
1468 		DWARN(ldcp->id,
1469 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1470 		    ldcp->id);
1471 
1472 		/* Reset the channel -- as we cannot continue */
1473 		mutex_enter(&ldcp->tx_lock);
1474 		i_ldc_reset(ldcp, B_TRUE);
1475 		mutex_exit(&ldcp->tx_lock);
1476 		rv = ECONNRESET;
1477 		break;
1478 
1479 	case LDC_INFO:
1480 
1481 		/* check mode */
1482 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1483 			cmn_err(CE_NOTE,
1484 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1485 			    ldcp->id);
1486 			/*
1487 			 * send NACK in response to MODE message
1488 			 * get the current tail for the response
1489 			 */
1490 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1491 			if (rv) {
1492 				/* if cannot send NACK - reset channel */
1493 				mutex_enter(&ldcp->tx_lock);
1494 				i_ldc_reset(ldcp, B_TRUE);
1495 				mutex_exit(&ldcp->tx_lock);
1496 				rv = ECONNRESET;
1497 				break;
1498 			}
1499 			sent_NACK = B_TRUE;
1500 		}
1501 		break;
1502 	default:
1503 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1504 		    ldcp->id);
1505 		mutex_enter(&ldcp->tx_lock);
1506 		i_ldc_reset(ldcp, B_TRUE);
1507 		mutex_exit(&ldcp->tx_lock);
1508 		rv = ECONNRESET;
1509 		break;
1510 	}
1511 
1512 	/*
1513 	 * If either the connection was reset (when rv != 0) or
1514 	 * a NACK was sent, we return. In the case of a NACK
1515 	 * we dont want to consume the packet that came in but
1516 	 * not record that we received the RTS
1517 	 */
1518 	if (rv || sent_NACK)
1519 		return (rv);
1520 
1521 	/* record RTS received */
1522 	ldcp->hstate |= TS_RCVD_RTS;
1523 
1524 	/* store initial SEQID info */
1525 	ldcp->last_msg_snt = msg->seqid;
1526 
1527 	/* Obtain Tx lock */
1528 	mutex_enter(&ldcp->tx_lock);
1529 
1530 	/* get the current tail for the response */
1531 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1532 	if (rv != 0) {
1533 		cmn_err(CE_NOTE,
1534 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1535 		    ldcp->id);
1536 		i_ldc_reset(ldcp, B_TRUE);
1537 		mutex_exit(&ldcp->tx_lock);
1538 		return (ECONNRESET);
1539 	}
1540 
1541 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1542 	ZERO_PKT(pkt);
1543 
1544 	/* initialize the packet */
1545 	pkt->type = LDC_CTRL;
1546 	pkt->stype = LDC_INFO;
1547 	pkt->ctrl = LDC_RTR;
1548 	pkt->env = ldcp->mode;
1549 	if (ldcp->mode != LDC_MODE_RAW)
1550 		pkt->seqid = LDC_INIT_SEQID;
1551 
1552 	ldcp->last_msg_rcd = msg->seqid;
1553 
1554 	/* initiate the send by calling into HV and set the new tail */
1555 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1556 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1557 
1558 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1559 	if (rv == 0) {
1560 		D2(ldcp->id,
1561 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1562 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1563 
1564 		ldcp->tx_tail = tx_tail;
1565 		ldcp->hstate |= TS_SENT_RTR;
1566 
1567 	} else {
1568 		cmn_err(CE_NOTE,
1569 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1570 		    ldcp->id);
1571 		i_ldc_reset(ldcp, B_TRUE);
1572 		mutex_exit(&ldcp->tx_lock);
1573 		return (ECONNRESET);
1574 	}
1575 
1576 	mutex_exit(&ldcp->tx_lock);
1577 	return (0);
1578 }
1579 
1580 /*
1581  * Process an incoming RTR ctrl message
1582  */
1583 static int
1584 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1585 {
1586 	int 		rv = 0;
1587 	boolean_t	sent_NACK = B_FALSE;
1588 
1589 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1590 
1591 	switch (msg->stype) {
1592 	case LDC_NACK:
1593 		/* RTR NACK received */
1594 		DWARN(ldcp->id,
1595 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1596 		    ldcp->id);
1597 
1598 		/* Reset the channel -- as we cannot continue */
1599 		mutex_enter(&ldcp->tx_lock);
1600 		i_ldc_reset(ldcp, B_TRUE);
1601 		mutex_exit(&ldcp->tx_lock);
1602 		rv = ECONNRESET;
1603 
1604 		break;
1605 
1606 	case LDC_INFO:
1607 
1608 		/* check mode */
1609 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1610 			DWARN(ldcp->id,
1611 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1612 			    "expecting 0x%x, got 0x%x\n",
1613 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1614 			/*
1615 			 * send NACK in response to MODE message
1616 			 * get the current tail for the response
1617 			 */
1618 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1619 			if (rv) {
1620 				/* if cannot send NACK - reset channel */
1621 				mutex_enter(&ldcp->tx_lock);
1622 				i_ldc_reset(ldcp, B_TRUE);
1623 				mutex_exit(&ldcp->tx_lock);
1624 				rv = ECONNRESET;
1625 				break;
1626 			}
1627 			sent_NACK = B_TRUE;
1628 		}
1629 		break;
1630 
1631 	default:
1632 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1633 		    ldcp->id);
1634 
1635 		/* Reset the channel -- as we cannot continue */
1636 		mutex_enter(&ldcp->tx_lock);
1637 		i_ldc_reset(ldcp, B_TRUE);
1638 		mutex_exit(&ldcp->tx_lock);
1639 		rv = ECONNRESET;
1640 		break;
1641 	}
1642 
1643 	/*
1644 	 * If either the connection was reset (when rv != 0) or
1645 	 * a NACK was sent, we return. In the case of a NACK
1646 	 * we dont want to consume the packet that came in but
1647 	 * not record that we received the RTR
1648 	 */
1649 	if (rv || sent_NACK)
1650 		return (rv);
1651 
1652 	ldcp->last_msg_snt = msg->seqid;
1653 	ldcp->hstate |= TS_RCVD_RTR;
1654 
1655 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1656 	if (rv) {
1657 		cmn_err(CE_NOTE,
1658 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1659 		    ldcp->id);
1660 		mutex_enter(&ldcp->tx_lock);
1661 		i_ldc_reset(ldcp, B_TRUE);
1662 		mutex_exit(&ldcp->tx_lock);
1663 		return (ECONNRESET);
1664 	}
1665 	D2(ldcp->id,
1666 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1667 
1668 	ldcp->hstate |= TS_SENT_RDX;
1669 	ldcp->tstate |= TS_HSHAKE_DONE;
1670 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1671 		ldcp->status = LDC_UP;
1672 
1673 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1674 
1675 	return (0);
1676 }
1677 
1678 
1679 /*
1680  * Process an incoming RDX ctrl message
1681  */
1682 static int
1683 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1684 {
1685 	int	rv = 0;
1686 
1687 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1688 
1689 	switch (msg->stype) {
1690 	case LDC_NACK:
1691 		/* RDX NACK received */
1692 		DWARN(ldcp->id,
1693 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1694 		    ldcp->id);
1695 
1696 		/* Reset the channel -- as we cannot continue */
1697 		mutex_enter(&ldcp->tx_lock);
1698 		i_ldc_reset(ldcp, B_TRUE);
1699 		mutex_exit(&ldcp->tx_lock);
1700 		rv = ECONNRESET;
1701 
1702 		break;
1703 
1704 	case LDC_INFO:
1705 
1706 		/*
1707 		 * if channel is UP and a RDX received after data transmission
1708 		 * has commenced it is an error
1709 		 */
1710 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1711 			DWARN(DBG_ALL_LDCS,
1712 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1713 			    " - LDC reset\n", ldcp->id);
1714 			mutex_enter(&ldcp->tx_lock);
1715 			i_ldc_reset(ldcp, B_TRUE);
1716 			mutex_exit(&ldcp->tx_lock);
1717 			return (ECONNRESET);
1718 		}
1719 
1720 		ldcp->hstate |= TS_RCVD_RDX;
1721 		ldcp->tstate |= TS_HSHAKE_DONE;
1722 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1723 			ldcp->status = LDC_UP;
1724 
1725 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1726 		break;
1727 
1728 	default:
1729 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1730 		    ldcp->id);
1731 
1732 		/* Reset the channel -- as we cannot continue */
1733 		mutex_enter(&ldcp->tx_lock);
1734 		i_ldc_reset(ldcp, B_TRUE);
1735 		mutex_exit(&ldcp->tx_lock);
1736 		rv = ECONNRESET;
1737 		break;
1738 	}
1739 
1740 	return (rv);
1741 }
1742 
1743 /*
1744  * Process an incoming ACK for a data packet
1745  */
1746 static int
1747 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1748 {
1749 	int		rv;
1750 	uint64_t 	tx_head;
1751 	ldc_msg_t	*pkt;
1752 
1753 	/* Obtain Tx lock */
1754 	mutex_enter(&ldcp->tx_lock);
1755 
1756 	/*
1757 	 * Read the current Tx head and tail
1758 	 */
1759 	rv = hv_ldc_tx_get_state(ldcp->id,
1760 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1761 	if (rv != 0) {
1762 		cmn_err(CE_WARN,
1763 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1764 		    ldcp->id);
1765 
1766 		/* Reset the channel -- as we cannot continue */
1767 		i_ldc_reset(ldcp, B_TRUE);
1768 		mutex_exit(&ldcp->tx_lock);
1769 		return (ECONNRESET);
1770 	}
1771 
1772 	/*
1773 	 * loop from where the previous ACK location was to the
1774 	 * current head location. This is how far the HV has
1775 	 * actually send pkts. Pkts between head and tail are
1776 	 * yet to be sent by HV.
1777 	 */
1778 	tx_head = ldcp->tx_ackd_head;
1779 	for (;;) {
1780 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1781 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1782 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1783 
1784 		if (pkt->seqid == msg->ackid) {
1785 			D2(ldcp->id,
1786 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1787 			    ldcp->id);
1788 			ldcp->last_ack_rcd = msg->ackid;
1789 			ldcp->tx_ackd_head = tx_head;
1790 			break;
1791 		}
1792 		if (tx_head == ldcp->tx_head) {
1793 			/* could not find packet */
1794 			DWARN(ldcp->id,
1795 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1796 			    ldcp->id);
1797 
1798 			/* Reset the channel -- as we cannot continue */
1799 			i_ldc_reset(ldcp, B_TRUE);
1800 			mutex_exit(&ldcp->tx_lock);
1801 			return (ECONNRESET);
1802 		}
1803 	}
1804 
1805 	mutex_exit(&ldcp->tx_lock);
1806 	return (0);
1807 }
1808 
1809 /*
1810  * Process incoming control message
1811  * Return 0 - session can continue
1812  *        EAGAIN - reprocess packet - state was changed
1813  *	  ECONNRESET - channel was reset
1814  */
1815 static int
1816 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1817 {
1818 	int 		rv = 0;
1819 
1820 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1821 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1822 
1823 	switch (ldcp->tstate & ~TS_IN_RESET) {
1824 
1825 	case TS_OPEN:
1826 	case TS_READY:
1827 
1828 		switch (msg->ctrl & LDC_CTRL_MASK) {
1829 		case LDC_VER:
1830 			/* process version message */
1831 			rv = i_ldc_process_VER(ldcp, msg);
1832 			break;
1833 		default:
1834 			DWARN(ldcp->id,
1835 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1836 			    "tstate=0x%x\n", ldcp->id,
1837 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1838 			break;
1839 		}
1840 
1841 		break;
1842 
1843 	case TS_VREADY:
1844 
1845 		switch (msg->ctrl & LDC_CTRL_MASK) {
1846 		case LDC_VER:
1847 			/* process version message */
1848 			rv = i_ldc_process_VER(ldcp, msg);
1849 			break;
1850 		case LDC_RTS:
1851 			/* process RTS message */
1852 			rv = i_ldc_process_RTS(ldcp, msg);
1853 			break;
1854 		case LDC_RTR:
1855 			/* process RTR message */
1856 			rv = i_ldc_process_RTR(ldcp, msg);
1857 			break;
1858 		case LDC_RDX:
1859 			/* process RDX message */
1860 			rv = i_ldc_process_RDX(ldcp, msg);
1861 			break;
1862 		default:
1863 			DWARN(ldcp->id,
1864 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1865 			    "tstate=0x%x\n", ldcp->id,
1866 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1867 			break;
1868 		}
1869 
1870 		break;
1871 
1872 	case TS_UP:
1873 
1874 		switch (msg->ctrl & LDC_CTRL_MASK) {
1875 		case LDC_VER:
1876 			DWARN(ldcp->id,
1877 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1878 			    "- LDC reset\n", ldcp->id);
1879 			/* peer is redoing version negotiation */
1880 			mutex_enter(&ldcp->tx_lock);
1881 			(void) i_ldc_txq_reconf(ldcp);
1882 			i_ldc_reset_state(ldcp);
1883 			mutex_exit(&ldcp->tx_lock);
1884 			rv = EAGAIN;
1885 			break;
1886 
1887 		case LDC_RDX:
1888 			/* process RDX message */
1889 			rv = i_ldc_process_RDX(ldcp, msg);
1890 			break;
1891 
1892 		default:
1893 			DWARN(ldcp->id,
1894 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1895 			    "tstate=0x%x\n", ldcp->id,
1896 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1897 			break;
1898 		}
1899 	}
1900 
1901 	return (rv);
1902 }
1903 
1904 /*
1905  * Register channel with the channel nexus
1906  */
1907 static int
1908 i_ldc_register_channel(ldc_chan_t *ldcp)
1909 {
1910 	int		rv = 0;
1911 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1912 
1913 	if (cinfo->dip == NULL) {
1914 		DWARN(ldcp->id,
1915 		    "i_ldc_register_channel: cnex has not registered\n");
1916 		return (EAGAIN);
1917 	}
1918 
1919 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1920 	if (rv) {
1921 		DWARN(ldcp->id,
1922 		    "i_ldc_register_channel: cannot register channel\n");
1923 		return (rv);
1924 	}
1925 
1926 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1927 	    i_ldc_tx_hdlr, ldcp, NULL);
1928 	if (rv) {
1929 		DWARN(ldcp->id,
1930 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1931 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1932 		return (rv);
1933 	}
1934 
1935 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1936 	    i_ldc_rx_hdlr, ldcp, NULL);
1937 	if (rv) {
1938 		DWARN(ldcp->id,
1939 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1940 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1941 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1942 		return (rv);
1943 	}
1944 
1945 	ldcp->tstate |= TS_CNEX_RDY;
1946 
1947 	return (0);
1948 }
1949 
1950 /*
1951  * Unregister a channel with the channel nexus
1952  */
1953 static int
1954 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1955 {
1956 	int		rv = 0;
1957 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1958 
1959 	if (cinfo->dip == NULL) {
1960 		DWARN(ldcp->id,
1961 		    "i_ldc_unregister_channel: cnex has not registered\n");
1962 		return (EAGAIN);
1963 	}
1964 
1965 	if (ldcp->tstate & TS_CNEX_RDY) {
1966 
1967 		/* Remove the Rx interrupt */
1968 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1969 		if (rv) {
1970 			if (rv != EAGAIN) {
1971 				DWARN(ldcp->id,
1972 				    "i_ldc_unregister_channel: err removing "
1973 				    "Rx intr\n");
1974 				return (rv);
1975 			}
1976 
1977 			/*
1978 			 * If interrupts are pending and handler has
1979 			 * finished running, clear interrupt and try
1980 			 * again
1981 			 */
1982 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1983 				return (rv);
1984 
1985 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1986 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1987 			    CNEX_RX_INTR);
1988 			if (rv) {
1989 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1990 				    "err removing Rx interrupt\n");
1991 				return (rv);
1992 			}
1993 		}
1994 
1995 		/* Remove the Tx interrupt */
1996 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1997 		if (rv) {
1998 			DWARN(ldcp->id,
1999 			    "i_ldc_unregister_channel: err removing Tx intr\n");
2000 			return (rv);
2001 		}
2002 
2003 		/* Unregister the channel */
2004 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2005 		if (rv) {
2006 			DWARN(ldcp->id,
2007 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2008 			return (rv);
2009 		}
2010 
2011 		ldcp->tstate &= ~TS_CNEX_RDY;
2012 	}
2013 
2014 	return (0);
2015 }
2016 
2017 
2018 /*
2019  * LDC transmit interrupt handler
2020  *    triggered for chanel up/down/reset events
2021  *    and Tx queue content changes
2022  */
2023 static uint_t
2024 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2025 {
2026 	_NOTE(ARGUNUSED(arg2))
2027 
2028 	int 		rv;
2029 	ldc_chan_t 	*ldcp;
2030 	boolean_t 	notify_client = B_FALSE;
2031 	uint64_t	notify_event = 0, link_state;
2032 
2033 	/* Get the channel for which interrupt was received */
2034 	ASSERT(arg1 != NULL);
2035 	ldcp = (ldc_chan_t *)arg1;
2036 
2037 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2038 	    ldcp->id, ldcp);
2039 
2040 	/* Lock channel */
2041 	mutex_enter(&ldcp->lock);
2042 
2043 	/* Obtain Tx lock */
2044 	mutex_enter(&ldcp->tx_lock);
2045 
2046 	/* mark interrupt as pending */
2047 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2048 
2049 	/* save current link state */
2050 	link_state = ldcp->link_state;
2051 
2052 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2053 	    &ldcp->link_state);
2054 	if (rv) {
2055 		cmn_err(CE_WARN,
2056 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2057 		    ldcp->id, rv);
2058 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2059 		mutex_exit(&ldcp->tx_lock);
2060 		mutex_exit(&ldcp->lock);
2061 		return (DDI_INTR_CLAIMED);
2062 	}
2063 
2064 	/*
2065 	 * reset the channel state if the channel went down
2066 	 * (other side unconfigured queue) or channel was reset
2067 	 * (other side reconfigured its queue)
2068 	 */
2069 	if (link_state != ldcp->link_state &&
2070 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2071 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2072 		i_ldc_reset(ldcp, B_FALSE);
2073 		notify_client = B_TRUE;
2074 		notify_event = LDC_EVT_DOWN;
2075 	}
2076 
2077 	if (link_state != ldcp->link_state &&
2078 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2079 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2080 		i_ldc_reset(ldcp, B_FALSE);
2081 		notify_client = B_TRUE;
2082 		notify_event = LDC_EVT_RESET;
2083 	}
2084 
2085 	if (link_state != ldcp->link_state &&
2086 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2087 	    ldcp->link_state == LDC_CHANNEL_UP) {
2088 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2089 		notify_client = B_TRUE;
2090 		notify_event = LDC_EVT_RESET;
2091 		ldcp->tstate |= TS_LINK_READY;
2092 		ldcp->status = LDC_READY;
2093 	}
2094 
2095 	/* if callbacks are disabled, do not notify */
2096 	if (!ldcp->cb_enabled)
2097 		notify_client = B_FALSE;
2098 
2099 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2100 	mutex_exit(&ldcp->tx_lock);
2101 
2102 	if (notify_client) {
2103 		ldcp->cb_inprogress = B_TRUE;
2104 		mutex_exit(&ldcp->lock);
2105 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2106 		if (rv) {
2107 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2108 			    "failure", ldcp->id);
2109 		}
2110 		mutex_enter(&ldcp->lock);
2111 		ldcp->cb_inprogress = B_FALSE;
2112 	}
2113 
2114 	mutex_exit(&ldcp->lock);
2115 
2116 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2117 
2118 	return (DDI_INTR_CLAIMED);
2119 }
2120 
2121 /*
2122  * Process the Rx HV queue.
2123  *
2124  * Returns 0 if data packets were found and no errors were encountered,
2125  * otherwise returns an error. In either case, the *notify argument is
2126  * set to indicate whether or not the client callback function should
2127  * be invoked. The *event argument is set to contain the callback event.
2128  *
2129  * Depending on the channel mode, packets are handled differently:
2130  *
2131  * RAW MODE
2132  * For raw mode channels, when a data packet is encountered,
2133  * processing stops and all packets are left on the queue to be removed
2134  * and processed by the ldc_read code path.
2135  *
2136  * UNRELIABLE MODE
2137  * For unreliable mode, when a data packet is encountered, processing
2138  * stops, and all packets are left on the queue to be removed and
2139  * processed by the ldc_read code path. Control packets are processed
2140  * inline if they are encountered before any data packets.
2141  *
2142  * RELIABLE MODE
2143  * For reliable mode channels, all packets on the receive queue
2144  * are processed: data packets are copied to the data queue and
2145  * control packets are processed inline. Packets are only left on
2146  * the receive queue when the data queue is full.
2147  */
2148 static uint_t
2149 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2150     uint64_t *notify_event)
2151 {
2152 	int		rv;
2153 	uint64_t 	rx_head, rx_tail;
2154 	ldc_msg_t 	*msg;
2155 	uint64_t	link_state, first_fragment = 0;
2156 	boolean_t	trace_length = B_TRUE;
2157 
2158 	ASSERT(MUTEX_HELD(&ldcp->lock));
2159 	*notify_client = B_FALSE;
2160 	*notify_event = 0;
2161 
2162 	/*
2163 	 * Read packet(s) from the queue
2164 	 */
2165 	for (;;) {
2166 
2167 		link_state = ldcp->link_state;
2168 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2169 		    &ldcp->link_state);
2170 		if (rv) {
2171 			cmn_err(CE_WARN,
2172 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2173 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2174 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2175 			return (EIO);
2176 		}
2177 
2178 		/*
2179 		 * reset the channel state if the channel went down
2180 		 * (other side unconfigured queue) or channel was reset
2181 		 * (other side reconfigured its queue)
2182 		 */
2183 
2184 		if (link_state != ldcp->link_state) {
2185 
2186 			switch (ldcp->link_state) {
2187 			case LDC_CHANNEL_DOWN:
2188 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2189 				    "link down\n", ldcp->id);
2190 				mutex_enter(&ldcp->tx_lock);
2191 				i_ldc_reset(ldcp, B_FALSE);
2192 				mutex_exit(&ldcp->tx_lock);
2193 				*notify_client = B_TRUE;
2194 				*notify_event = LDC_EVT_DOWN;
2195 				goto loop_exit;
2196 
2197 			case LDC_CHANNEL_UP:
2198 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2199 				    "channel link up\n", ldcp->id);
2200 
2201 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2202 					*notify_client = B_TRUE;
2203 					*notify_event = LDC_EVT_RESET;
2204 					ldcp->tstate |= TS_LINK_READY;
2205 					ldcp->status = LDC_READY;
2206 				}
2207 				break;
2208 
2209 			case LDC_CHANNEL_RESET:
2210 			default:
2211 #ifdef DEBUG
2212 force_reset:
2213 #endif
2214 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2215 				    "link reset\n", ldcp->id);
2216 				mutex_enter(&ldcp->tx_lock);
2217 				i_ldc_reset(ldcp, B_FALSE);
2218 				mutex_exit(&ldcp->tx_lock);
2219 				*notify_client = B_TRUE;
2220 				*notify_event = LDC_EVT_RESET;
2221 				break;
2222 			}
2223 		}
2224 
2225 #ifdef DEBUG
2226 		if (LDC_INJECT_RESET(ldcp))
2227 			goto force_reset;
2228 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2229 			i_ldc_mem_inject_dring_clear(ldcp);
2230 #endif
2231 		if (trace_length) {
2232 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2233 			trace_length = B_FALSE;
2234 		}
2235 
2236 		if (rx_head == rx_tail) {
2237 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2238 			    "No packets\n", ldcp->id);
2239 			break;
2240 		}
2241 
2242 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2243 		    "tail=0x%llx\n", rx_head, rx_tail);
2244 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2245 		    ldcp->rx_q_va + rx_head);
2246 
2247 		/* get the message */
2248 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2249 
2250 		/* if channel is in RAW mode or data pkt, notify and return */
2251 		if (ldcp->mode == LDC_MODE_RAW) {
2252 			*notify_client = B_TRUE;
2253 			*notify_event |= LDC_EVT_READ;
2254 			break;
2255 		}
2256 
2257 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2258 
2259 			/* discard packet if channel is not up */
2260 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2261 
2262 				/* move the head one position */
2263 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2264 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2265 
2266 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2267 					break;
2268 
2269 				continue;
2270 			} else {
2271 				uint64_t dq_head, dq_tail;
2272 
2273 				/* process only RELIABLE mode data packets */
2274 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2275 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2276 						*notify_client = B_TRUE;
2277 					*notify_event |= LDC_EVT_READ;
2278 					break;
2279 				}
2280 
2281 				/* don't process packet if queue full */
2282 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2283 				    &dq_tail, NULL);
2284 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2285 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2286 				if (dq_tail == dq_head ||
2287 				    LDC_INJECT_DQFULL(ldcp)) {
2288 					rv = ENOSPC;
2289 					break;
2290 				}
2291 			}
2292 		}
2293 
2294 		/* Check the sequence ID for the message received */
2295 		rv = i_ldc_check_seqid(ldcp, msg);
2296 		if (rv != 0) {
2297 
2298 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2299 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2300 			    rx_head, rx_tail);
2301 
2302 			/* Reset last_msg_rcd to start of message */
2303 			if (first_fragment != 0) {
2304 				ldcp->last_msg_rcd = first_fragment - 1;
2305 				first_fragment = 0;
2306 			}
2307 
2308 			/*
2309 			 * Send a NACK due to seqid mismatch
2310 			 */
2311 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2312 			    (msg->ctrl & LDC_CTRL_MASK));
2313 
2314 			if (rv) {
2315 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2316 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2317 				    ldcp->id);
2318 
2319 				/* if cannot send NACK - reset channel */
2320 				mutex_enter(&ldcp->tx_lock);
2321 				i_ldc_reset(ldcp, B_TRUE);
2322 				mutex_exit(&ldcp->tx_lock);
2323 
2324 				*notify_client = B_TRUE;
2325 				*notify_event = LDC_EVT_RESET;
2326 				break;
2327 			}
2328 
2329 			/* purge receive queue */
2330 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2331 			break;
2332 		}
2333 
2334 		/* record the message ID */
2335 		ldcp->last_msg_rcd = msg->seqid;
2336 
2337 		/* process control messages */
2338 		if (msg->type & LDC_CTRL) {
2339 			/* save current internal state */
2340 			uint64_t tstate = ldcp->tstate;
2341 
2342 			rv = i_ldc_ctrlmsg(ldcp, msg);
2343 			if (rv == EAGAIN) {
2344 				/* re-process pkt - state was adjusted */
2345 				continue;
2346 			}
2347 			if (rv == ECONNRESET) {
2348 				*notify_client = B_TRUE;
2349 				*notify_event = LDC_EVT_RESET;
2350 				break;
2351 			}
2352 
2353 			/*
2354 			 * control message processing was successful
2355 			 * channel transitioned to ready for communication
2356 			 */
2357 			if (rv == 0 && ldcp->tstate == TS_UP &&
2358 			    (tstate & ~TS_IN_RESET) !=
2359 			    (ldcp->tstate & ~TS_IN_RESET)) {
2360 				*notify_client = B_TRUE;
2361 				*notify_event = LDC_EVT_UP;
2362 			}
2363 		}
2364 
2365 		/* process data NACKs */
2366 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2367 			DWARN(ldcp->id,
2368 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2369 			    ldcp->id);
2370 			mutex_enter(&ldcp->tx_lock);
2371 			i_ldc_reset(ldcp, B_TRUE);
2372 			mutex_exit(&ldcp->tx_lock);
2373 			*notify_client = B_TRUE;
2374 			*notify_event = LDC_EVT_RESET;
2375 			break;
2376 		}
2377 
2378 		/* process data ACKs */
2379 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2380 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2381 				*notify_client = B_TRUE;
2382 				*notify_event = LDC_EVT_RESET;
2383 				break;
2384 			}
2385 		}
2386 
2387 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2388 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2389 
2390 			/*
2391 			 * Copy the data packet to the data queue. Note
2392 			 * that the copy routine updates the rx_head pointer.
2393 			 */
2394 			i_ldc_rxdq_copy(ldcp, &rx_head);
2395 
2396 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2397 				*notify_client = B_TRUE;
2398 			*notify_event |= LDC_EVT_READ;
2399 		} else {
2400 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2401 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2402 		}
2403 
2404 		/* move the head one position */
2405 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2406 			*notify_client = B_TRUE;
2407 			*notify_event = LDC_EVT_RESET;
2408 			break;
2409 		}
2410 
2411 	} /* for */
2412 
2413 loop_exit:
2414 
2415 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2416 		/* ACK data packets */
2417 		if ((*notify_event &
2418 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2419 			int ack_rv;
2420 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2421 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2422 				cmn_err(CE_NOTE,
2423 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2424 				    "send ACK\n", ldcp->id);
2425 
2426 				mutex_enter(&ldcp->tx_lock);
2427 				i_ldc_reset(ldcp, B_FALSE);
2428 				mutex_exit(&ldcp->tx_lock);
2429 
2430 				*notify_client = B_TRUE;
2431 				*notify_event = LDC_EVT_RESET;
2432 				goto skip_ackpeek;
2433 			}
2434 		}
2435 
2436 		/*
2437 		 * If we have no more space on the data queue, make sure
2438 		 * there are no ACKs on the rx queue waiting to be processed.
2439 		 */
2440 		if (rv == ENOSPC) {
2441 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2442 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2443 				*notify_client = B_TRUE;
2444 				*notify_event = LDC_EVT_RESET;
2445 			}
2446 			return (rv);
2447 		} else {
2448 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2449 		}
2450 	}
2451 
2452 skip_ackpeek:
2453 
2454 	/* Return, indicating whether or not data packets were found */
2455 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2456 		return (0);
2457 
2458 	return (ENOMSG);
2459 }
2460 
2461 /*
2462  * Process any ACK packets on the HV receive queue.
2463  *
2464  * This function is only used by RELIABLE mode channels when the
2465  * secondary data queue fills up and there are packets remaining on
2466  * the HV receive queue.
2467  */
2468 int
2469 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2470 {
2471 	int		rv = 0;
2472 	ldc_msg_t	*msg;
2473 
2474 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2475 		ldcp->rx_ack_head = rx_head;
2476 
2477 	while (ldcp->rx_ack_head != rx_tail) {
2478 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2479 
2480 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2481 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2482 				break;
2483 			msg->stype &= ~LDC_ACK;
2484 		}
2485 
2486 		ldcp->rx_ack_head =
2487 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2488 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2489 	}
2490 	return (rv);
2491 }
2492 
2493 /* -------------------------------------------------------------------------- */
2494 
2495 /*
2496  * LDC API functions
2497  */
2498 
2499 /*
2500  * Initialize the channel. Allocate internal structure and memory for
2501  * TX/RX queues, and initialize locks.
2502  */
2503 int
2504 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2505 {
2506 	ldc_chan_t 	*ldcp;
2507 	int		rv, exit_val;
2508 	uint64_t	ra_base, nentries;
2509 	uint64_t	qlen;
2510 
2511 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2512 
2513 	if (attr == NULL) {
2514 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2515 		return (EINVAL);
2516 	}
2517 	if (handle == NULL) {
2518 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2519 		return (EINVAL);
2520 	}
2521 
2522 	/* check if channel is valid */
2523 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2524 	if (rv == H_ECHANNEL) {
2525 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2526 		return (EINVAL);
2527 	}
2528 
2529 	/* check if the channel has already been initialized */
2530 	mutex_enter(&ldcssp->lock);
2531 	ldcp = ldcssp->chan_list;
2532 	while (ldcp != NULL) {
2533 		if (ldcp->id == id) {
2534 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2535 			    id);
2536 			mutex_exit(&ldcssp->lock);
2537 			return (EADDRINUSE);
2538 		}
2539 		ldcp = ldcp->next;
2540 	}
2541 	mutex_exit(&ldcssp->lock);
2542 
2543 	ASSERT(ldcp == NULL);
2544 
2545 	*handle = 0;
2546 
2547 	/* Allocate an ldcp structure */
2548 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2549 
2550 	/*
2551 	 * Initialize the channel and Tx lock
2552 	 *
2553 	 * The channel 'lock' protects the entire channel and
2554 	 * should be acquired before initializing, resetting,
2555 	 * destroying or reading from a channel.
2556 	 *
2557 	 * The 'tx_lock' should be acquired prior to transmitting
2558 	 * data over the channel. The lock should also be acquired
2559 	 * prior to channel reconfiguration (in order to prevent
2560 	 * concurrent writes).
2561 	 *
2562 	 * ORDERING: When both locks are being acquired, to prevent
2563 	 * deadlocks, the channel lock should be always acquired prior
2564 	 * to the tx_lock.
2565 	 */
2566 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2567 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2568 
2569 	/* Initialize the channel */
2570 	ldcp->id = id;
2571 	ldcp->cb = NULL;
2572 	ldcp->cb_arg = NULL;
2573 	ldcp->cb_inprogress = B_FALSE;
2574 	ldcp->cb_enabled = B_FALSE;
2575 	ldcp->next = NULL;
2576 
2577 	/* Read attributes */
2578 	ldcp->mode = attr->mode;
2579 	ldcp->devclass = attr->devclass;
2580 	ldcp->devinst = attr->instance;
2581 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2582 
2583 	D1(ldcp->id,
2584 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2585 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2586 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2587 
2588 	ldcp->next_vidx = 0;
2589 	ldcp->tstate = TS_IN_RESET;
2590 	ldcp->hstate = 0;
2591 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2592 	ldcp->last_ack_rcd = 0;
2593 	ldcp->last_msg_rcd = 0;
2594 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2595 
2596 	ldcp->stream_bufferp = NULL;
2597 	ldcp->exp_dring_list = NULL;
2598 	ldcp->imp_dring_list = NULL;
2599 	ldcp->mhdl_list = NULL;
2600 
2601 	ldcp->tx_intr_state = LDC_INTR_NONE;
2602 	ldcp->rx_intr_state = LDC_INTR_NONE;
2603 
2604 	/* Initialize payload size depending on whether channel is reliable */
2605 	switch (ldcp->mode) {
2606 	case LDC_MODE_RAW:
2607 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2608 		ldcp->read_p = i_ldc_read_raw;
2609 		ldcp->write_p = i_ldc_write_raw;
2610 		break;
2611 	case LDC_MODE_UNRELIABLE:
2612 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2613 		ldcp->read_p = i_ldc_read_packet;
2614 		ldcp->write_p = i_ldc_write_packet;
2615 		break;
2616 	case LDC_MODE_RELIABLE:
2617 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2618 
2619 		ldcp->stream_remains = 0;
2620 		ldcp->stream_offset = 0;
2621 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2622 		ldcp->read_p = i_ldc_read_stream;
2623 		ldcp->write_p = i_ldc_write_stream;
2624 		break;
2625 	default:
2626 		exit_val = EINVAL;
2627 		goto cleanup_on_exit;
2628 	}
2629 
2630 	/*
2631 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2632 	 * value is smaller than default length of ldc_queue_entries,
2633 	 * qlen is set to ldc_queue_entries. Ensure that computed
2634 	 * length is a power-of-two value.
2635 	 */
2636 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2637 	if (!ISP2(qlen)) {
2638 		uint64_t	tmp = 1;
2639 		while (qlen) {
2640 			qlen >>= 1; tmp <<= 1;
2641 		}
2642 		qlen = tmp;
2643 	}
2644 
2645 	ldcp->rx_q_entries =
2646 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2647 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2648 
2649 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2650 
2651 	/* Create a transmit queue */
2652 	ldcp->tx_q_va = (uint64_t)
2653 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2654 	if (ldcp->tx_q_va == NULL) {
2655 		cmn_err(CE_WARN,
2656 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2657 		    ldcp->id);
2658 		exit_val = ENOMEM;
2659 		goto cleanup_on_exit;
2660 	}
2661 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2662 
2663 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2664 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2665 
2666 	ldcp->tstate |= TS_TXQ_RDY;
2667 
2668 	/* Create a receive queue */
2669 	ldcp->rx_q_va = (uint64_t)
2670 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2671 	if (ldcp->rx_q_va == NULL) {
2672 		cmn_err(CE_WARN,
2673 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2674 		    ldcp->id);
2675 		exit_val = ENOMEM;
2676 		goto cleanup_on_exit;
2677 	}
2678 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2679 
2680 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2681 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2682 
2683 	ldcp->tstate |= TS_RXQ_RDY;
2684 
2685 	/* Setup a separate read data queue */
2686 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2687 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2688 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2689 
2690 		/* Make sure the data queue multiplier is a power of 2 */
2691 		if (!ISP2(ldc_rxdq_multiplier)) {
2692 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2693 			    "not a power of 2, resetting", ldcp->id);
2694 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2695 		}
2696 
2697 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2698 		ldcp->rx_dq_va = (uint64_t)
2699 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2700 		    KM_SLEEP);
2701 		if (ldcp->rx_dq_va == NULL) {
2702 			cmn_err(CE_WARN,
2703 			    "ldc_init: (0x%lx) RX data queue "
2704 			    "allocation failed\n", ldcp->id);
2705 			exit_val = ENOMEM;
2706 			goto cleanup_on_exit;
2707 		}
2708 
2709 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2710 
2711 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2712 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2713 		    ldcp->rx_dq_entries);
2714 	} else {
2715 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2716 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2717 	}
2718 
2719 	/* Init descriptor ring and memory handle list lock */
2720 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2721 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2722 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2723 
2724 	/* mark status as INITialized */
2725 	ldcp->status = LDC_INIT;
2726 
2727 	/* Add to channel list */
2728 	mutex_enter(&ldcssp->lock);
2729 	ldcp->next = ldcssp->chan_list;
2730 	ldcssp->chan_list = ldcp;
2731 	ldcssp->channel_count++;
2732 	mutex_exit(&ldcssp->lock);
2733 
2734 	/* set the handle */
2735 	*handle = (ldc_handle_t)ldcp;
2736 
2737 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2738 
2739 	return (0);
2740 
2741 cleanup_on_exit:
2742 
2743 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2744 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2745 
2746 	if (ldcp->tstate & TS_TXQ_RDY)
2747 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2748 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2749 
2750 	if (ldcp->tstate & TS_RXQ_RDY)
2751 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2752 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2753 
2754 	mutex_destroy(&ldcp->tx_lock);
2755 	mutex_destroy(&ldcp->lock);
2756 
2757 	if (ldcp)
2758 		kmem_free(ldcp, sizeof (ldc_chan_t));
2759 
2760 	return (exit_val);
2761 }
2762 
2763 /*
2764  * Finalizes the LDC connection. It will return EBUSY if the
2765  * channel is open. A ldc_close() has to be done prior to
2766  * a ldc_fini operation. It frees TX/RX queues, associated
2767  * with the channel
2768  */
2769 int
2770 ldc_fini(ldc_handle_t handle)
2771 {
2772 	ldc_chan_t 	*ldcp;
2773 	ldc_chan_t 	*tmp_ldcp;
2774 	uint64_t 	id;
2775 
2776 	if (handle == NULL) {
2777 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2778 		return (EINVAL);
2779 	}
2780 	ldcp = (ldc_chan_t *)handle;
2781 	id = ldcp->id;
2782 
2783 	mutex_enter(&ldcp->lock);
2784 
2785 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2786 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2787 		    ldcp->id);
2788 		mutex_exit(&ldcp->lock);
2789 		return (EBUSY);
2790 	}
2791 
2792 	/* Remove from the channel list */
2793 	mutex_enter(&ldcssp->lock);
2794 	tmp_ldcp = ldcssp->chan_list;
2795 	if (tmp_ldcp == ldcp) {
2796 		ldcssp->chan_list = ldcp->next;
2797 		ldcp->next = NULL;
2798 	} else {
2799 		while (tmp_ldcp != NULL) {
2800 			if (tmp_ldcp->next == ldcp) {
2801 				tmp_ldcp->next = ldcp->next;
2802 				ldcp->next = NULL;
2803 				break;
2804 			}
2805 			tmp_ldcp = tmp_ldcp->next;
2806 		}
2807 		if (tmp_ldcp == NULL) {
2808 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2809 			mutex_exit(&ldcssp->lock);
2810 			mutex_exit(&ldcp->lock);
2811 			return (EINVAL);
2812 		}
2813 	}
2814 
2815 	ldcssp->channel_count--;
2816 
2817 	mutex_exit(&ldcssp->lock);
2818 
2819 	/* Free the map table for this channel */
2820 	if (ldcp->mtbl) {
2821 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2822 		if (ldcp->mtbl->contigmem)
2823 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2824 		else
2825 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2826 		mutex_destroy(&ldcp->mtbl->lock);
2827 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2828 	}
2829 
2830 	/* Destroy descriptor ring and memory handle list lock */
2831 	mutex_destroy(&ldcp->exp_dlist_lock);
2832 	mutex_destroy(&ldcp->imp_dlist_lock);
2833 	mutex_destroy(&ldcp->mlist_lock);
2834 
2835 	/* Free the stream buffer for RELIABLE_MODE */
2836 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2837 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2838 
2839 	/* Free the RX queue */
2840 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2841 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2842 	ldcp->tstate &= ~TS_RXQ_RDY;
2843 
2844 	/* Free the RX data queue */
2845 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2846 		kmem_free((caddr_t)ldcp->rx_dq_va,
2847 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2848 	}
2849 
2850 	/* Free the TX queue */
2851 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2852 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2853 	ldcp->tstate &= ~TS_TXQ_RDY;
2854 
2855 	mutex_exit(&ldcp->lock);
2856 
2857 	/* Destroy mutex */
2858 	mutex_destroy(&ldcp->tx_lock);
2859 	mutex_destroy(&ldcp->lock);
2860 
2861 	/* free channel structure */
2862 	kmem_free(ldcp, sizeof (ldc_chan_t));
2863 
2864 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2865 
2866 	return (0);
2867 }
2868 
2869 /*
2870  * Open the LDC channel for use. It registers the TX/RX queues
2871  * with the Hypervisor. It also specifies the interrupt number
2872  * and target CPU for this channel
2873  */
2874 int
2875 ldc_open(ldc_handle_t handle)
2876 {
2877 	ldc_chan_t 	*ldcp;
2878 	int 		rv;
2879 
2880 	if (handle == NULL) {
2881 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2882 		return (EINVAL);
2883 	}
2884 
2885 	ldcp = (ldc_chan_t *)handle;
2886 
2887 	mutex_enter(&ldcp->lock);
2888 
2889 	if (ldcp->tstate < TS_INIT) {
2890 		DWARN(ldcp->id,
2891 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2892 		mutex_exit(&ldcp->lock);
2893 		return (EFAULT);
2894 	}
2895 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2896 		DWARN(ldcp->id,
2897 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2898 		mutex_exit(&ldcp->lock);
2899 		return (EFAULT);
2900 	}
2901 
2902 	/*
2903 	 * Unregister/Register the tx queue with the hypervisor
2904 	 */
2905 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2906 	if (rv) {
2907 		cmn_err(CE_WARN,
2908 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2909 		    ldcp->id);
2910 		mutex_exit(&ldcp->lock);
2911 		return (EIO);
2912 	}
2913 
2914 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2915 	if (rv) {
2916 		cmn_err(CE_WARN,
2917 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2918 		    ldcp->id);
2919 		mutex_exit(&ldcp->lock);
2920 		return (EIO);
2921 	}
2922 
2923 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2924 	    ldcp->id);
2925 
2926 	/*
2927 	 * Unregister/Register the rx queue with the hypervisor
2928 	 */
2929 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2930 	if (rv) {
2931 		cmn_err(CE_WARN,
2932 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2933 		    ldcp->id);
2934 		mutex_exit(&ldcp->lock);
2935 		return (EIO);
2936 	}
2937 
2938 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2939 	if (rv) {
2940 		cmn_err(CE_WARN,
2941 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2942 		    ldcp->id);
2943 		mutex_exit(&ldcp->lock);
2944 		return (EIO);
2945 	}
2946 
2947 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2948 	    ldcp->id);
2949 
2950 	ldcp->tstate |= TS_QCONF_RDY;
2951 
2952 	/* Register the channel with the channel nexus */
2953 	rv = i_ldc_register_channel(ldcp);
2954 	if (rv && rv != EAGAIN) {
2955 		cmn_err(CE_WARN,
2956 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2957 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2958 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2959 		mutex_exit(&ldcp->lock);
2960 		return (EIO);
2961 	}
2962 
2963 	/* mark channel in OPEN state */
2964 	ldcp->status = LDC_OPEN;
2965 
2966 	/* Read channel state */
2967 	rv = hv_ldc_tx_get_state(ldcp->id,
2968 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2969 	if (rv) {
2970 		cmn_err(CE_WARN,
2971 		    "ldc_open: (0x%lx) cannot read channel state\n",
2972 		    ldcp->id);
2973 		(void) i_ldc_unregister_channel(ldcp);
2974 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2975 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2976 		mutex_exit(&ldcp->lock);
2977 		return (EIO);
2978 	}
2979 
2980 	/*
2981 	 * set the ACKd head to current head location for reliable
2982 	 */
2983 	ldcp->tx_ackd_head = ldcp->tx_head;
2984 
2985 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2986 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2987 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2988 		ldcp->tstate |= TS_LINK_READY;
2989 		ldcp->status = LDC_READY;
2990 	}
2991 
2992 	/*
2993 	 * if channel is being opened in RAW mode - no handshake is needed
2994 	 * switch the channel READY and UP state
2995 	 */
2996 	if (ldcp->mode == LDC_MODE_RAW) {
2997 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2998 		ldcp->status = LDC_UP;
2999 	}
3000 
3001 	mutex_exit(&ldcp->lock);
3002 
3003 	/*
3004 	 * Increment number of open channels
3005 	 */
3006 	mutex_enter(&ldcssp->lock);
3007 	ldcssp->channels_open++;
3008 	mutex_exit(&ldcssp->lock);
3009 
3010 	D1(ldcp->id,
3011 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3012 	    "(tstate=0x%x, status=0x%x)\n",
3013 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3014 
3015 	return (0);
3016 }
3017 
3018 /*
3019  * Close the LDC connection. It will return EBUSY if there
3020  * are memory segments or descriptor rings either bound to or
3021  * mapped over the channel
3022  */
3023 int
3024 ldc_close(ldc_handle_t handle)
3025 {
3026 	ldc_chan_t 	*ldcp;
3027 	int		rv = 0, retries = 0;
3028 	boolean_t	chk_done = B_FALSE;
3029 
3030 	if (handle == NULL) {
3031 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3032 		return (EINVAL);
3033 	}
3034 	ldcp = (ldc_chan_t *)handle;
3035 
3036 	mutex_enter(&ldcp->lock);
3037 
3038 	/* return error if channel is not open */
3039 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3040 		DWARN(ldcp->id,
3041 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3042 		mutex_exit(&ldcp->lock);
3043 		return (EFAULT);
3044 	}
3045 
3046 	/* if any memory handles, drings, are bound or mapped cannot close */
3047 	if (ldcp->mhdl_list != NULL) {
3048 		DWARN(ldcp->id,
3049 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3050 		    ldcp->id);
3051 		mutex_exit(&ldcp->lock);
3052 		return (EBUSY);
3053 	}
3054 	if (ldcp->exp_dring_list != NULL) {
3055 		DWARN(ldcp->id,
3056 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3057 		    ldcp->id);
3058 		mutex_exit(&ldcp->lock);
3059 		return (EBUSY);
3060 	}
3061 	if (ldcp->imp_dring_list != NULL) {
3062 		DWARN(ldcp->id,
3063 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3064 		    ldcp->id);
3065 		mutex_exit(&ldcp->lock);
3066 		return (EBUSY);
3067 	}
3068 
3069 	if (ldcp->cb_inprogress) {
3070 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3071 		    ldcp->id);
3072 		mutex_exit(&ldcp->lock);
3073 		return (EWOULDBLOCK);
3074 	}
3075 
3076 	/* Obtain Tx lock */
3077 	mutex_enter(&ldcp->tx_lock);
3078 
3079 	/*
3080 	 * Wait for pending transmits to complete i.e Tx queue to drain
3081 	 * if there are pending pkts - wait 1 ms and retry again
3082 	 */
3083 	for (;;) {
3084 
3085 		rv = hv_ldc_tx_get_state(ldcp->id,
3086 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3087 		if (rv) {
3088 			cmn_err(CE_WARN,
3089 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3090 			mutex_exit(&ldcp->tx_lock);
3091 			mutex_exit(&ldcp->lock);
3092 			return (EIO);
3093 		}
3094 
3095 		if (ldcp->tx_head == ldcp->tx_tail ||
3096 		    ldcp->link_state != LDC_CHANNEL_UP) {
3097 			break;
3098 		}
3099 
3100 		if (chk_done) {
3101 			DWARN(ldcp->id,
3102 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3103 			    ldcp->id);
3104 			break;
3105 		}
3106 
3107 		/* wait for one ms and try again */
3108 		delay(drv_usectohz(1000));
3109 		chk_done = B_TRUE;
3110 	}
3111 
3112 	/*
3113 	 * Drain the Tx and Rx queues as we are closing the
3114 	 * channel. We dont care about any pending packets.
3115 	 * We have to also drain the queue prior to clearing
3116 	 * pending interrupts, otherwise the HV will trigger
3117 	 * an interrupt the moment the interrupt state is
3118 	 * cleared.
3119 	 */
3120 	(void) i_ldc_txq_reconf(ldcp);
3121 	(void) i_ldc_rxq_drain(ldcp);
3122 
3123 	/*
3124 	 * Unregister the channel with the nexus
3125 	 */
3126 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3127 
3128 		mutex_exit(&ldcp->tx_lock);
3129 		mutex_exit(&ldcp->lock);
3130 
3131 		/* if any error other than EAGAIN return back */
3132 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3133 			cmn_err(CE_WARN,
3134 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3135 			    ldcp->id, rv);
3136 			return (rv);
3137 		}
3138 
3139 		/*
3140 		 * As there could be pending interrupts we need
3141 		 * to wait and try again
3142 		 */
3143 		drv_usecwait(ldc_close_delay);
3144 		mutex_enter(&ldcp->lock);
3145 		mutex_enter(&ldcp->tx_lock);
3146 		retries++;
3147 	}
3148 
3149 	/*
3150 	 * Unregister queues
3151 	 */
3152 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3153 	if (rv) {
3154 		cmn_err(CE_WARN,
3155 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3156 		    ldcp->id);
3157 		mutex_exit(&ldcp->tx_lock);
3158 		mutex_exit(&ldcp->lock);
3159 		return (EIO);
3160 	}
3161 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3162 	if (rv) {
3163 		cmn_err(CE_WARN,
3164 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3165 		    ldcp->id);
3166 		mutex_exit(&ldcp->tx_lock);
3167 		mutex_exit(&ldcp->lock);
3168 		return (EIO);
3169 	}
3170 
3171 	ldcp->tstate &= ~TS_QCONF_RDY;
3172 
3173 	/* Reset channel state information */
3174 	i_ldc_reset_state(ldcp);
3175 
3176 	/* Mark channel as down and in initialized state */
3177 	ldcp->tx_ackd_head = 0;
3178 	ldcp->tx_head = 0;
3179 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3180 	ldcp->status = LDC_INIT;
3181 
3182 	mutex_exit(&ldcp->tx_lock);
3183 	mutex_exit(&ldcp->lock);
3184 
3185 	/* Decrement number of open channels */
3186 	mutex_enter(&ldcssp->lock);
3187 	ldcssp->channels_open--;
3188 	mutex_exit(&ldcssp->lock);
3189 
3190 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3191 
3192 	return (0);
3193 }
3194 
3195 /*
3196  * Register channel callback
3197  */
3198 int
3199 ldc_reg_callback(ldc_handle_t handle,
3200     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3201 {
3202 	ldc_chan_t *ldcp;
3203 
3204 	if (handle == NULL) {
3205 		DWARN(DBG_ALL_LDCS,
3206 		    "ldc_reg_callback: invalid channel handle\n");
3207 		return (EINVAL);
3208 	}
3209 	if (((uint64_t)cb) < KERNELBASE) {
3210 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3211 		return (EINVAL);
3212 	}
3213 	ldcp = (ldc_chan_t *)handle;
3214 
3215 	mutex_enter(&ldcp->lock);
3216 
3217 	if (ldcp->cb) {
3218 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3219 		    ldcp->id);
3220 		mutex_exit(&ldcp->lock);
3221 		return (EIO);
3222 	}
3223 	if (ldcp->cb_inprogress) {
3224 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3225 		    ldcp->id);
3226 		mutex_exit(&ldcp->lock);
3227 		return (EWOULDBLOCK);
3228 	}
3229 
3230 	ldcp->cb = cb;
3231 	ldcp->cb_arg = arg;
3232 	ldcp->cb_enabled = B_TRUE;
3233 
3234 	D1(ldcp->id,
3235 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3236 	    ldcp->id);
3237 
3238 	mutex_exit(&ldcp->lock);
3239 
3240 	return (0);
3241 }
3242 
3243 /*
3244  * Unregister channel callback
3245  */
3246 int
3247 ldc_unreg_callback(ldc_handle_t handle)
3248 {
3249 	ldc_chan_t *ldcp;
3250 
3251 	if (handle == NULL) {
3252 		DWARN(DBG_ALL_LDCS,
3253 		    "ldc_unreg_callback: invalid channel handle\n");
3254 		return (EINVAL);
3255 	}
3256 	ldcp = (ldc_chan_t *)handle;
3257 
3258 	mutex_enter(&ldcp->lock);
3259 
3260 	if (ldcp->cb == NULL) {
3261 		DWARN(ldcp->id,
3262 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3263 		    ldcp->id);
3264 		mutex_exit(&ldcp->lock);
3265 		return (EIO);
3266 	}
3267 	if (ldcp->cb_inprogress) {
3268 		DWARN(ldcp->id,
3269 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3270 		    ldcp->id);
3271 		mutex_exit(&ldcp->lock);
3272 		return (EWOULDBLOCK);
3273 	}
3274 
3275 	ldcp->cb = NULL;
3276 	ldcp->cb_arg = NULL;
3277 	ldcp->cb_enabled = B_FALSE;
3278 
3279 	D1(ldcp->id,
3280 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3281 	    ldcp->id);
3282 
3283 	mutex_exit(&ldcp->lock);
3284 
3285 	return (0);
3286 }
3287 
3288 
3289 /*
3290  * Bring a channel up by initiating a handshake with the peer
3291  * This call is asynchronous. It will complete at a later point
3292  * in time when the peer responds back with an RTR.
3293  */
3294 int
3295 ldc_up(ldc_handle_t handle)
3296 {
3297 	int 		rv;
3298 	ldc_chan_t 	*ldcp;
3299 	ldc_msg_t 	*ldcmsg;
3300 	uint64_t 	tx_tail, tstate, link_state;
3301 
3302 	if (handle == NULL) {
3303 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3304 		return (EINVAL);
3305 	}
3306 	ldcp = (ldc_chan_t *)handle;
3307 
3308 	mutex_enter(&ldcp->lock);
3309 
3310 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3311 
3312 	/* clear the reset state */
3313 	tstate = ldcp->tstate;
3314 	ldcp->tstate &= ~TS_IN_RESET;
3315 
3316 	if (ldcp->tstate == TS_UP) {
3317 		DWARN(ldcp->id,
3318 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3319 		    ldcp->id);
3320 
3321 		/* mark channel as up */
3322 		ldcp->status = LDC_UP;
3323 
3324 		/*
3325 		 * if channel was in reset state and there was
3326 		 * pending data clear interrupt state. this will
3327 		 * trigger an interrupt, causing the RX handler to
3328 		 * to invoke the client's callback
3329 		 */
3330 		if ((tstate & TS_IN_RESET) &&
3331 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3332 			D1(ldcp->id,
3333 			    "ldc_up: (0x%llx) channel has pending data, "
3334 			    "clearing interrupt\n", ldcp->id);
3335 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3336 		}
3337 
3338 		mutex_exit(&ldcp->lock);
3339 		return (0);
3340 	}
3341 
3342 	/* if the channel is in RAW mode - mark it as UP, if READY */
3343 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3344 		ldcp->tstate = TS_UP;
3345 		mutex_exit(&ldcp->lock);
3346 		return (0);
3347 	}
3348 
3349 	/* Don't start another handshake if there is one in progress */
3350 	if (ldcp->hstate) {
3351 		D1(ldcp->id,
3352 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3353 		    ldcp->id);
3354 		mutex_exit(&ldcp->lock);
3355 		return (0);
3356 	}
3357 
3358 	mutex_enter(&ldcp->tx_lock);
3359 
3360 	/* save current link state */
3361 	link_state = ldcp->link_state;
3362 
3363 	/* get the current tail for the LDC msg */
3364 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3365 	if (rv) {
3366 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3367 		    ldcp->id);
3368 		mutex_exit(&ldcp->tx_lock);
3369 		mutex_exit(&ldcp->lock);
3370 		return (ECONNREFUSED);
3371 	}
3372 
3373 	/*
3374 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3375 	 * from a previous state of DOWN, then mark the channel as
3376 	 * being ready for handshake.
3377 	 */
3378 	if ((link_state == LDC_CHANNEL_DOWN) &&
3379 	    (link_state != ldcp->link_state)) {
3380 
3381 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3382 		    (ldcp->link_state == LDC_CHANNEL_UP));
3383 
3384 		if (ldcp->mode == LDC_MODE_RAW) {
3385 			ldcp->status = LDC_UP;
3386 			ldcp->tstate = TS_UP;
3387 			mutex_exit(&ldcp->tx_lock);
3388 			mutex_exit(&ldcp->lock);
3389 			return (0);
3390 		} else {
3391 			ldcp->status = LDC_READY;
3392 			ldcp->tstate |= TS_LINK_READY;
3393 		}
3394 
3395 	}
3396 
3397 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3398 	ZERO_PKT(ldcmsg);
3399 
3400 	ldcmsg->type = LDC_CTRL;
3401 	ldcmsg->stype = LDC_INFO;
3402 	ldcmsg->ctrl = LDC_VER;
3403 	ldcp->next_vidx = 0;
3404 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3405 
3406 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3407 
3408 	/* initiate the send by calling into HV and set the new tail */
3409 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3410 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3411 
3412 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3413 	if (rv) {
3414 		DWARN(ldcp->id,
3415 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3416 		    ldcp->id, rv);
3417 		mutex_exit(&ldcp->tx_lock);
3418 		mutex_exit(&ldcp->lock);
3419 		return (rv);
3420 	}
3421 
3422 	ldcp->hstate |= TS_SENT_VER;
3423 	ldcp->tx_tail = tx_tail;
3424 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3425 
3426 	mutex_exit(&ldcp->tx_lock);
3427 	mutex_exit(&ldcp->lock);
3428 
3429 	return (rv);
3430 }
3431 
3432 
3433 /*
3434  * Bring a channel down by resetting its state and queues
3435  */
3436 int
3437 ldc_down(ldc_handle_t handle)
3438 {
3439 	ldc_chan_t 	*ldcp;
3440 
3441 	if (handle == NULL) {
3442 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3443 		return (EINVAL);
3444 	}
3445 	ldcp = (ldc_chan_t *)handle;
3446 	mutex_enter(&ldcp->lock);
3447 	mutex_enter(&ldcp->tx_lock);
3448 	i_ldc_reset(ldcp, B_TRUE);
3449 	mutex_exit(&ldcp->tx_lock);
3450 	mutex_exit(&ldcp->lock);
3451 
3452 	return (0);
3453 }
3454 
3455 /*
3456  * Get the current channel status
3457  */
3458 int
3459 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3460 {
3461 	ldc_chan_t *ldcp;
3462 
3463 	if (handle == NULL || status == NULL) {
3464 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3465 		return (EINVAL);
3466 	}
3467 	ldcp = (ldc_chan_t *)handle;
3468 
3469 	*status = ((ldc_chan_t *)handle)->status;
3470 
3471 	D1(ldcp->id,
3472 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3473 	return (0);
3474 }
3475 
3476 
3477 /*
3478  * Set the channel's callback mode - enable/disable callbacks
3479  */
3480 int
3481 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3482 {
3483 	ldc_chan_t 	*ldcp;
3484 
3485 	if (handle == NULL) {
3486 		DWARN(DBG_ALL_LDCS,
3487 		    "ldc_set_intr_mode: invalid channel handle\n");
3488 		return (EINVAL);
3489 	}
3490 	ldcp = (ldc_chan_t *)handle;
3491 
3492 	/*
3493 	 * Record no callbacks should be invoked
3494 	 */
3495 	mutex_enter(&ldcp->lock);
3496 
3497 	switch (cmode) {
3498 	case LDC_CB_DISABLE:
3499 		if (!ldcp->cb_enabled) {
3500 			DWARN(ldcp->id,
3501 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3502 			    ldcp->id);
3503 			break;
3504 		}
3505 		ldcp->cb_enabled = B_FALSE;
3506 
3507 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3508 		    ldcp->id);
3509 		break;
3510 
3511 	case LDC_CB_ENABLE:
3512 		if (ldcp->cb_enabled) {
3513 			DWARN(ldcp->id,
3514 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3515 			    ldcp->id);
3516 			break;
3517 		}
3518 		ldcp->cb_enabled = B_TRUE;
3519 
3520 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3521 		    ldcp->id);
3522 		break;
3523 	}
3524 
3525 	mutex_exit(&ldcp->lock);
3526 
3527 	return (0);
3528 }
3529 
3530 /*
3531  * Check to see if there are packets on the incoming queue
3532  * Will return hasdata = B_FALSE if there are no packets
3533  */
3534 int
3535 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3536 {
3537 	int 		rv;
3538 	uint64_t 	rx_head, rx_tail;
3539 	ldc_chan_t 	*ldcp;
3540 
3541 	if (handle == NULL) {
3542 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3543 		return (EINVAL);
3544 	}
3545 	ldcp = (ldc_chan_t *)handle;
3546 
3547 	*hasdata = B_FALSE;
3548 
3549 	mutex_enter(&ldcp->lock);
3550 
3551 	if (ldcp->tstate != TS_UP) {
3552 		D1(ldcp->id,
3553 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3554 		mutex_exit(&ldcp->lock);
3555 		return (ECONNRESET);
3556 	}
3557 
3558 	/* Read packet(s) from the queue */
3559 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3560 	    &ldcp->link_state);
3561 	if (rv != 0) {
3562 		cmn_err(CE_WARN,
3563 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3564 		mutex_exit(&ldcp->lock);
3565 		return (EIO);
3566 	}
3567 
3568 	/* reset the channel state if the channel went down */
3569 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3570 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3571 		mutex_enter(&ldcp->tx_lock);
3572 		i_ldc_reset(ldcp, B_FALSE);
3573 		mutex_exit(&ldcp->tx_lock);
3574 		mutex_exit(&ldcp->lock);
3575 		return (ECONNRESET);
3576 	}
3577 
3578 	switch (ldcp->mode) {
3579 	case LDC_MODE_RAW:
3580 		/*
3581 		 * In raw mode, there are no ctrl packets, so checking
3582 		 * if the queue is non-empty is sufficient.
3583 		 */
3584 		*hasdata = (rx_head != rx_tail);
3585 		break;
3586 
3587 	case LDC_MODE_UNRELIABLE:
3588 		/*
3589 		 * In unreliable mode, if the queue is non-empty, we need
3590 		 * to check if it actually contains unread data packets.
3591 		 * The queue may just contain ctrl packets.
3592 		 */
3593 		if (rx_head != rx_tail) {
3594 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3595 			/*
3596 			 * If no data packets were found on the queue,
3597 			 * all packets must have been control packets
3598 			 * which will now have been processed, leaving
3599 			 * the queue empty. If the interrupt state
3600 			 * is pending, we need to clear the interrupt
3601 			 * here.
3602 			 */
3603 			if (*hasdata == B_FALSE &&
3604 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3605 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3606 			}
3607 		}
3608 		break;
3609 
3610 	case LDC_MODE_RELIABLE:
3611 		/*
3612 		 * In reliable mode, first check for 'stream_remains' > 0.
3613 		 * Otherwise, if the data queue head and tail pointers
3614 		 * differ, there must be data to read.
3615 		 */
3616 		if (ldcp->stream_remains > 0)
3617 			*hasdata = B_TRUE;
3618 		else
3619 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3620 		break;
3621 
3622 	default:
3623 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3624 		    "(0x%x)", ldcp->id, ldcp->mode);
3625 		mutex_exit(&ldcp->lock);
3626 		return (EIO);
3627 	}
3628 
3629 	mutex_exit(&ldcp->lock);
3630 
3631 	return (0);
3632 }
3633 
3634 
3635 /*
3636  * Read 'size' amount of bytes or less. If incoming buffer
3637  * is more than 'size', ENOBUFS is returned.
3638  *
3639  * On return, size contains the number of bytes read.
3640  */
3641 int
3642 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3643 {
3644 	ldc_chan_t 	*ldcp;
3645 	uint64_t 	rx_head = 0, rx_tail = 0;
3646 	int		rv = 0, exit_val;
3647 
3648 	if (handle == NULL) {
3649 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3650 		return (EINVAL);
3651 	}
3652 
3653 	ldcp = (ldc_chan_t *)handle;
3654 
3655 	/* channel lock */
3656 	mutex_enter(&ldcp->lock);
3657 
3658 	if (ldcp->tstate != TS_UP) {
3659 		DWARN(ldcp->id,
3660 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3661 		    ldcp->id);
3662 		exit_val = ECONNRESET;
3663 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3664 		TRACE_RXDQ_LENGTH(ldcp);
3665 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3666 
3667 		/*
3668 		 * For reliable mode channels, the interrupt
3669 		 * state is only set to pending during
3670 		 * interrupt handling when the secondary data
3671 		 * queue became full, leaving unprocessed
3672 		 * packets on the Rx queue. If the interrupt
3673 		 * state is pending and space is now available
3674 		 * on the data queue, clear the interrupt.
3675 		 */
3676 		if (ldcp->rx_intr_state == LDC_INTR_PEND &&
3677 		    Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
3678 		    ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >=
3679 		    LDC_PACKET_SIZE) {
3680 			/* data queue is not full */
3681 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3682 		}
3683 
3684 		mutex_exit(&ldcp->lock);
3685 		return (exit_val);
3686 	} else {
3687 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3688 	}
3689 
3690 	/*
3691 	 * if queue has been drained - clear interrupt
3692 	 */
3693 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3694 	    &ldcp->link_state);
3695 	if (rv != 0) {
3696 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3697 		    ldcp->id);
3698 		mutex_enter(&ldcp->tx_lock);
3699 		i_ldc_reset(ldcp, B_TRUE);
3700 		mutex_exit(&ldcp->tx_lock);
3701 		mutex_exit(&ldcp->lock);
3702 		return (ECONNRESET);
3703 	}
3704 
3705 	if (exit_val == 0) {
3706 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3707 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3708 			mutex_enter(&ldcp->tx_lock);
3709 			i_ldc_reset(ldcp, B_FALSE);
3710 			exit_val = ECONNRESET;
3711 			mutex_exit(&ldcp->tx_lock);
3712 		}
3713 		if ((rv == 0) &&
3714 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3715 		    (rx_head == rx_tail)) {
3716 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3717 		}
3718 	}
3719 
3720 	mutex_exit(&ldcp->lock);
3721 	return (exit_val);
3722 }
3723 
3724 /*
3725  * Basic raw mondo read -
3726  * no interpretation of mondo contents at all.
3727  *
3728  * Enter and exit with ldcp->lock held by caller
3729  */
3730 static int
3731 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3732 {
3733 	uint64_t 	q_size_mask;
3734 	ldc_msg_t 	*msgp;
3735 	uint8_t		*msgbufp;
3736 	int		rv = 0, space;
3737 	uint64_t 	rx_head, rx_tail;
3738 
3739 	space = *sizep;
3740 
3741 	if (space < LDC_PAYLOAD_SIZE_RAW)
3742 		return (ENOBUFS);
3743 
3744 	ASSERT(mutex_owned(&ldcp->lock));
3745 
3746 	/* compute mask for increment */
3747 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3748 
3749 	/*
3750 	 * Read packet(s) from the queue
3751 	 */
3752 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3753 	    &ldcp->link_state);
3754 	if (rv != 0) {
3755 		cmn_err(CE_WARN,
3756 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3757 		    ldcp->id);
3758 		return (EIO);
3759 	}
3760 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3761 	    " rxt=0x%llx, st=0x%llx\n",
3762 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3763 
3764 	/* reset the channel state if the channel went down */
3765 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3766 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3767 		mutex_enter(&ldcp->tx_lock);
3768 		i_ldc_reset(ldcp, B_FALSE);
3769 		mutex_exit(&ldcp->tx_lock);
3770 		return (ECONNRESET);
3771 	}
3772 
3773 	/*
3774 	 * Check for empty queue
3775 	 */
3776 	if (rx_head == rx_tail) {
3777 		*sizep = 0;
3778 		return (0);
3779 	}
3780 
3781 	/* get the message */
3782 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3783 
3784 	/* if channel is in RAW mode, copy data and return */
3785 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3786 
3787 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3788 
3789 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3790 
3791 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3792 
3793 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3794 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3795 
3796 	return (rv);
3797 }
3798 
3799 /*
3800  * Process LDC mondos to build larger packets
3801  * with either un-reliable or reliable delivery.
3802  *
3803  * Enter and exit with ldcp->lock held by caller
3804  */
3805 static int
3806 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3807 {
3808 	int		rv = 0;
3809 	uint64_t 	rx_head = 0, rx_tail = 0;
3810 	uint64_t 	curr_head = 0;
3811 	ldc_msg_t 	*msg;
3812 	caddr_t 	target;
3813 	size_t 		len = 0, bytes_read = 0;
3814 	int 		retries = 0;
3815 	uint64_t 	q_va, q_size_mask;
3816 	uint64_t	first_fragment = 0;
3817 
3818 	target = target_bufp;
3819 
3820 	ASSERT(mutex_owned(&ldcp->lock));
3821 
3822 	/* check if the buffer and size are valid */
3823 	if (target_bufp == NULL || *sizep == 0) {
3824 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3825 		    ldcp->id);
3826 		return (EINVAL);
3827 	}
3828 
3829 	/* Set q_va and compute increment mask for the appropriate queue */
3830 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3831 		q_va	    = ldcp->rx_dq_va;
3832 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3833 	} else {
3834 		q_va	    = ldcp->rx_q_va;
3835 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3836 	}
3837 
3838 	/*
3839 	 * Read packet(s) from the queue
3840 	 */
3841 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3842 	    &ldcp->link_state);
3843 	if (rv != 0) {
3844 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3845 		    ldcp->id);
3846 		mutex_enter(&ldcp->tx_lock);
3847 		i_ldc_reset(ldcp, B_TRUE);
3848 		mutex_exit(&ldcp->tx_lock);
3849 		return (ECONNRESET);
3850 	}
3851 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3852 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3853 
3854 	/* reset the channel state if the channel went down */
3855 	if (ldcp->link_state != LDC_CHANNEL_UP)
3856 		goto channel_is_reset;
3857 
3858 	for (;;) {
3859 
3860 		if (curr_head == rx_tail) {
3861 			/*
3862 			 * If a data queue is being used, check the Rx HV
3863 			 * queue. This will copy over any new data packets
3864 			 * that have arrived.
3865 			 */
3866 			if (ldcp->mode == LDC_MODE_RELIABLE)
3867 				(void) i_ldc_chkq(ldcp);
3868 
3869 			rv = ldcp->readq_get_state(ldcp,
3870 			    &rx_head, &rx_tail, &ldcp->link_state);
3871 			if (rv != 0) {
3872 				cmn_err(CE_WARN,
3873 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3874 				    ldcp->id);
3875 				mutex_enter(&ldcp->tx_lock);
3876 				i_ldc_reset(ldcp, B_TRUE);
3877 				mutex_exit(&ldcp->tx_lock);
3878 				return (ECONNRESET);
3879 			}
3880 
3881 			if (ldcp->link_state != LDC_CHANNEL_UP)
3882 				goto channel_is_reset;
3883 
3884 			if (curr_head == rx_tail) {
3885 
3886 				/* If in the middle of a fragmented xfer */
3887 				if (first_fragment != 0) {
3888 
3889 					/* wait for ldc_delay usecs */
3890 					drv_usecwait(ldc_delay);
3891 
3892 					if (++retries < ldc_max_retries)
3893 						continue;
3894 
3895 					*sizep = 0;
3896 					if (ldcp->mode != LDC_MODE_RELIABLE)
3897 						ldcp->last_msg_rcd =
3898 						    first_fragment - 1;
3899 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3900 					    "(0x%llx) read timeout", ldcp->id);
3901 					return (EAGAIN);
3902 				}
3903 				*sizep = 0;
3904 				break;
3905 			}
3906 		}
3907 		retries = 0;
3908 
3909 		D2(ldcp->id,
3910 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3911 		    ldcp->id, curr_head, rx_head, rx_tail);
3912 
3913 		/* get the message */
3914 		msg = (ldc_msg_t *)(q_va + curr_head);
3915 
3916 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3917 		    ldcp->rx_q_va + curr_head);
3918 
3919 		/* Check the message ID for the message received */
3920 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3921 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3922 
3923 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3924 				    "error, q_ptrs=0x%lx,0x%lx",
3925 				    ldcp->id, rx_head, rx_tail);
3926 
3927 				/* throw away data */
3928 				bytes_read = 0;
3929 
3930 				/* Reset last_msg_rcd to start of message */
3931 				if (first_fragment != 0) {
3932 					ldcp->last_msg_rcd = first_fragment - 1;
3933 					first_fragment = 0;
3934 				}
3935 				/*
3936 				 * Send a NACK -- invalid seqid
3937 				 * get the current tail for the response
3938 				 */
3939 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3940 				    (msg->ctrl & LDC_CTRL_MASK));
3941 				if (rv) {
3942 					cmn_err(CE_NOTE,
3943 					    "ldc_read: (0x%lx) err sending "
3944 					    "NACK msg\n", ldcp->id);
3945 
3946 					/* if cannot send NACK - reset chan */
3947 					mutex_enter(&ldcp->tx_lock);
3948 					i_ldc_reset(ldcp, B_FALSE);
3949 					mutex_exit(&ldcp->tx_lock);
3950 					rv = ECONNRESET;
3951 					break;
3952 				}
3953 
3954 				/* purge receive queue */
3955 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3956 
3957 				break;
3958 			}
3959 
3960 			/*
3961 			 * Process any messages of type CTRL messages
3962 			 * Future implementations should try to pass these
3963 			 * to LDC link by resetting the intr state.
3964 			 *
3965 			 * NOTE: not done as a switch() as type can be
3966 			 * both ctrl+data
3967 			 */
3968 			if (msg->type & LDC_CTRL) {
3969 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3970 					if (rv == EAGAIN)
3971 						continue;
3972 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
3973 					*sizep = 0;
3974 					bytes_read = 0;
3975 					break;
3976 				}
3977 			}
3978 
3979 			/* process data ACKs */
3980 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3981 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3982 					*sizep = 0;
3983 					bytes_read = 0;
3984 					break;
3985 				}
3986 			}
3987 
3988 			/* process data NACKs */
3989 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
3990 				DWARN(ldcp->id,
3991 				    "ldc_read: (0x%llx) received DATA/NACK",
3992 				    ldcp->id);
3993 				mutex_enter(&ldcp->tx_lock);
3994 				i_ldc_reset(ldcp, B_TRUE);
3995 				mutex_exit(&ldcp->tx_lock);
3996 				return (ECONNRESET);
3997 			}
3998 		}
3999 
4000 		/* process data messages */
4001 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
4002 
4003 			uint8_t *msgbuf = (uint8_t *)(
4004 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
4005 			    msg->rdata : msg->udata);
4006 
4007 			D2(ldcp->id,
4008 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
4009 
4010 			/* get the packet length */
4011 			len = (msg->env & LDC_LEN_MASK);
4012 
4013 				/*
4014 				 * FUTURE OPTIMIZATION:
4015 				 * dont need to set q head for every
4016 				 * packet we read just need to do this when
4017 				 * we are done or need to wait for more
4018 				 * mondos to make a full packet - this is
4019 				 * currently expensive.
4020 				 */
4021 
4022 			if (first_fragment == 0) {
4023 
4024 				/*
4025 				 * first packets should always have the start
4026 				 * bit set (even for a single packet). If not
4027 				 * throw away the packet
4028 				 */
4029 				if (!(msg->env & LDC_FRAG_START)) {
4030 
4031 					DWARN(DBG_ALL_LDCS,
4032 					    "ldc_read: (0x%llx) not start - "
4033 					    "frag=%x\n", ldcp->id,
4034 					    (msg->env) & LDC_FRAG_MASK);
4035 
4036 					/* toss pkt, inc head, cont reading */
4037 					bytes_read = 0;
4038 					target = target_bufp;
4039 					curr_head =
4040 					    (curr_head + LDC_PACKET_SIZE)
4041 					    & q_size_mask;
4042 					if (rv = ldcp->readq_set_head(ldcp,
4043 					    curr_head))
4044 						break;
4045 
4046 					continue;
4047 				}
4048 
4049 				first_fragment = msg->seqid;
4050 			} else {
4051 				/* check to see if this is a pkt w/ START bit */
4052 				if (msg->env & LDC_FRAG_START) {
4053 					DWARN(DBG_ALL_LDCS,
4054 					    "ldc_read:(0x%llx) unexpected pkt"
4055 					    " env=0x%x discarding %d bytes,"
4056 					    " lastmsg=%d, currentmsg=%d\n",
4057 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4058 					    bytes_read, ldcp->last_msg_rcd,
4059 					    msg->seqid);
4060 
4061 					/* throw data we have read so far */
4062 					bytes_read = 0;
4063 					target = target_bufp;
4064 					first_fragment = msg->seqid;
4065 
4066 					if (rv = ldcp->readq_set_head(ldcp,
4067 					    curr_head))
4068 						break;
4069 				}
4070 			}
4071 
4072 			/* copy (next) pkt into buffer */
4073 			if (len <= (*sizep - bytes_read)) {
4074 				bcopy(msgbuf, target, len);
4075 				target += len;
4076 				bytes_read += len;
4077 			} else {
4078 				/*
4079 				 * there is not enough space in the buffer to
4080 				 * read this pkt. throw message away & continue
4081 				 * reading data from queue
4082 				 */
4083 				DWARN(DBG_ALL_LDCS,
4084 				    "ldc_read: (0x%llx) buffer too small, "
4085 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4086 				    curr_head, *sizep, bytes_read+len);
4087 
4088 				first_fragment = 0;
4089 				target = target_bufp;
4090 				bytes_read = 0;
4091 
4092 				/* throw away everything received so far */
4093 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4094 					break;
4095 
4096 				/* continue reading remaining pkts */
4097 				continue;
4098 			}
4099 		}
4100 
4101 		/* set the message id */
4102 		if (ldcp->mode != LDC_MODE_RELIABLE)
4103 			ldcp->last_msg_rcd = msg->seqid;
4104 
4105 		/* move the head one position */
4106 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4107 
4108 		if (msg->env & LDC_FRAG_STOP) {
4109 
4110 			/*
4111 			 * All pkts that are part of this fragmented transfer
4112 			 * have been read or this was a single pkt read
4113 			 * or there was an error
4114 			 */
4115 
4116 			/* set the queue head */
4117 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4118 				bytes_read = 0;
4119 
4120 			*sizep = bytes_read;
4121 
4122 			break;
4123 		}
4124 
4125 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4126 		if ((msg->type & LDC_CTRL) ||
4127 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4128 
4129 			/* set the queue head */
4130 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4131 				bytes_read = 0;
4132 				break;
4133 			}
4134 
4135 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4136 			    ldcp->id, curr_head);
4137 		}
4138 
4139 	} /* for (;;) */
4140 
4141 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4142 
4143 	return (rv);
4144 
4145 channel_is_reset:
4146 	mutex_enter(&ldcp->tx_lock);
4147 	i_ldc_reset(ldcp, B_FALSE);
4148 	mutex_exit(&ldcp->tx_lock);
4149 	return (ECONNRESET);
4150 }
4151 
4152 /*
4153  * Fetch and buffer incoming packets so we can hand them back as
4154  * a basic byte stream.
4155  *
4156  * Enter and exit with ldcp->lock held by caller
4157  */
4158 static int
4159 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4160 {
4161 	int	rv;
4162 	size_t	size;
4163 
4164 	ASSERT(mutex_owned(&ldcp->lock));
4165 
4166 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4167 	    ldcp->id, *sizep);
4168 
4169 	if (ldcp->stream_remains == 0) {
4170 		size = ldcp->mtu;
4171 		rv = i_ldc_read_packet(ldcp,
4172 		    (caddr_t)ldcp->stream_bufferp, &size);
4173 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4174 		    ldcp->id, size);
4175 
4176 		if (rv != 0)
4177 			return (rv);
4178 
4179 		ldcp->stream_remains = size;
4180 		ldcp->stream_offset = 0;
4181 	}
4182 
4183 	size = MIN(ldcp->stream_remains, *sizep);
4184 
4185 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4186 	ldcp->stream_offset += size;
4187 	ldcp->stream_remains -= size;
4188 
4189 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4190 	    ldcp->id, size);
4191 
4192 	*sizep = size;
4193 	return (0);
4194 }
4195 
4196 /*
4197  * Write specified amount of bytes to the channel
4198  * in multiple pkts of pkt_payload size. Each
4199  * packet is tagged with an unique packet ID in
4200  * the case of a reliable link.
4201  *
4202  * On return, size contains the number of bytes written.
4203  */
4204 int
4205 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4206 {
4207 	ldc_chan_t	*ldcp;
4208 	int		rv = 0;
4209 
4210 	if (handle == NULL) {
4211 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4212 		return (EINVAL);
4213 	}
4214 	ldcp = (ldc_chan_t *)handle;
4215 
4216 	/* check if writes can occur */
4217 	if (!mutex_tryenter(&ldcp->tx_lock)) {
4218 		/*
4219 		 * Could not get the lock - channel could
4220 		 * be in the process of being unconfigured
4221 		 * or reader has encountered an error
4222 		 */
4223 		return (EAGAIN);
4224 	}
4225 
4226 	/* check if non-zero data to write */
4227 	if (buf == NULL || sizep == NULL) {
4228 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4229 		    ldcp->id);
4230 		mutex_exit(&ldcp->tx_lock);
4231 		return (EINVAL);
4232 	}
4233 
4234 	if (*sizep == 0) {
4235 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4236 		    ldcp->id);
4237 		mutex_exit(&ldcp->tx_lock);
4238 		return (0);
4239 	}
4240 
4241 	/* Check if channel is UP for data exchange */
4242 	if (ldcp->tstate != TS_UP) {
4243 		DWARN(ldcp->id,
4244 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4245 		    ldcp->id);
4246 		*sizep = 0;
4247 		rv = ECONNRESET;
4248 	} else {
4249 		rv = ldcp->write_p(ldcp, buf, sizep);
4250 	}
4251 
4252 	mutex_exit(&ldcp->tx_lock);
4253 
4254 	return (rv);
4255 }
4256 
4257 /*
4258  * Write a raw packet to the channel
4259  * On return, size contains the number of bytes written.
4260  */
4261 static int
4262 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4263 {
4264 	ldc_msg_t 	*ldcmsg;
4265 	uint64_t 	tx_head, tx_tail, new_tail;
4266 	int		rv = 0;
4267 	size_t		size;
4268 
4269 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4270 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4271 
4272 	size = *sizep;
4273 
4274 	/*
4275 	 * Check to see if the packet size is less than or
4276 	 * equal to packet size support in raw mode
4277 	 */
4278 	if (size > ldcp->pkt_payload) {
4279 		DWARN(ldcp->id,
4280 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4281 		    ldcp->id, *sizep);
4282 		*sizep = 0;
4283 		return (EMSGSIZE);
4284 	}
4285 
4286 	/* get the qptrs for the tx queue */
4287 	rv = hv_ldc_tx_get_state(ldcp->id,
4288 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4289 	if (rv != 0) {
4290 		cmn_err(CE_WARN,
4291 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4292 		*sizep = 0;
4293 		return (EIO);
4294 	}
4295 
4296 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4297 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4298 		DWARN(ldcp->id,
4299 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4300 
4301 		*sizep = 0;
4302 		if (mutex_tryenter(&ldcp->lock)) {
4303 			i_ldc_reset(ldcp, B_FALSE);
4304 			mutex_exit(&ldcp->lock);
4305 		} else {
4306 			/*
4307 			 * Release Tx lock, and then reacquire channel
4308 			 * and Tx lock in correct order
4309 			 */
4310 			mutex_exit(&ldcp->tx_lock);
4311 			mutex_enter(&ldcp->lock);
4312 			mutex_enter(&ldcp->tx_lock);
4313 			i_ldc_reset(ldcp, B_FALSE);
4314 			mutex_exit(&ldcp->lock);
4315 		}
4316 		return (ECONNRESET);
4317 	}
4318 
4319 	tx_tail = ldcp->tx_tail;
4320 	tx_head = ldcp->tx_head;
4321 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4322 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4323 
4324 	if (new_tail == tx_head) {
4325 		DWARN(DBG_ALL_LDCS,
4326 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4327 		*sizep = 0;
4328 		return (EWOULDBLOCK);
4329 	}
4330 
4331 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4332 	    ldcp->id, size);
4333 
4334 	/* Send the data now */
4335 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4336 
4337 	/* copy the data into pkt */
4338 	bcopy((uint8_t *)buf, ldcmsg, size);
4339 
4340 	/* increment tail */
4341 	tx_tail = new_tail;
4342 
4343 	/*
4344 	 * All packets have been copied into the TX queue
4345 	 * update the tail ptr in the HV
4346 	 */
4347 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4348 	if (rv) {
4349 		if (rv == EWOULDBLOCK) {
4350 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4351 			    ldcp->id);
4352 			*sizep = 0;
4353 			return (EWOULDBLOCK);
4354 		}
4355 
4356 		*sizep = 0;
4357 		if (mutex_tryenter(&ldcp->lock)) {
4358 			i_ldc_reset(ldcp, B_FALSE);
4359 			mutex_exit(&ldcp->lock);
4360 		} else {
4361 			/*
4362 			 * Release Tx lock, and then reacquire channel
4363 			 * and Tx lock in correct order
4364 			 */
4365 			mutex_exit(&ldcp->tx_lock);
4366 			mutex_enter(&ldcp->lock);
4367 			mutex_enter(&ldcp->tx_lock);
4368 			i_ldc_reset(ldcp, B_FALSE);
4369 			mutex_exit(&ldcp->lock);
4370 		}
4371 		return (ECONNRESET);
4372 	}
4373 
4374 	ldcp->tx_tail = tx_tail;
4375 	*sizep = size;
4376 
4377 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4378 
4379 	return (rv);
4380 }
4381 
4382 
4383 /*
4384  * Write specified amount of bytes to the channel
4385  * in multiple pkts of pkt_payload size. Each
4386  * packet is tagged with an unique packet ID in
4387  * the case of a reliable link.
4388  *
4389  * On return, size contains the number of bytes written.
4390  * This function needs to ensure that the write size is < MTU size
4391  */
4392 static int
4393 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4394 {
4395 	ldc_msg_t 	*ldcmsg;
4396 	uint64_t 	tx_head, tx_tail, new_tail, start;
4397 	uint64_t	txq_size_mask, numavail;
4398 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4399 	size_t 		len, bytes_written = 0, remaining;
4400 	int		rv;
4401 	uint32_t	curr_seqid;
4402 
4403 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4404 
4405 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4406 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4407 
4408 	/* compute mask for increment */
4409 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4410 
4411 	/* get the qptrs for the tx queue */
4412 	rv = hv_ldc_tx_get_state(ldcp->id,
4413 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4414 	if (rv != 0) {
4415 		cmn_err(CE_WARN,
4416 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4417 		*size = 0;
4418 		return (EIO);
4419 	}
4420 
4421 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4422 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4423 		DWARN(ldcp->id,
4424 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4425 		*size = 0;
4426 		if (mutex_tryenter(&ldcp->lock)) {
4427 			i_ldc_reset(ldcp, B_FALSE);
4428 			mutex_exit(&ldcp->lock);
4429 		} else {
4430 			/*
4431 			 * Release Tx lock, and then reacquire channel
4432 			 * and Tx lock in correct order
4433 			 */
4434 			mutex_exit(&ldcp->tx_lock);
4435 			mutex_enter(&ldcp->lock);
4436 			mutex_enter(&ldcp->tx_lock);
4437 			i_ldc_reset(ldcp, B_FALSE);
4438 			mutex_exit(&ldcp->lock);
4439 		}
4440 		return (ECONNRESET);
4441 	}
4442 
4443 	tx_tail = ldcp->tx_tail;
4444 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4445 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4446 
4447 	/*
4448 	 * Check to see if the queue is full. The check is done using
4449 	 * the appropriate head based on the link mode.
4450 	 */
4451 	i_ldc_get_tx_head(ldcp, &tx_head);
4452 
4453 	if (new_tail == tx_head) {
4454 		DWARN(DBG_ALL_LDCS,
4455 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4456 		*size = 0;
4457 		return (EWOULDBLOCK);
4458 	}
4459 
4460 	/*
4461 	 * Make sure that the LDC Tx queue has enough space
4462 	 */
4463 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4464 	    + ldcp->tx_q_entries - 1;
4465 	numavail %= ldcp->tx_q_entries;
4466 
4467 	if (*size > (numavail * ldcp->pkt_payload)) {
4468 		DWARN(DBG_ALL_LDCS,
4469 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4470 		return (EWOULDBLOCK);
4471 	}
4472 
4473 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4474 	    ldcp->id, *size);
4475 
4476 	/* Send the data now */
4477 	bytes_written = 0;
4478 	curr_seqid = ldcp->last_msg_snt;
4479 	start = tx_tail;
4480 
4481 	while (*size > bytes_written) {
4482 
4483 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4484 
4485 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4486 		    ldcmsg->rdata : ldcmsg->udata);
4487 
4488 		ldcmsg->type = LDC_DATA;
4489 		ldcmsg->stype = LDC_INFO;
4490 		ldcmsg->ctrl = 0;
4491 
4492 		remaining = *size - bytes_written;
4493 		len = min(ldcp->pkt_payload, remaining);
4494 		ldcmsg->env = (uint8_t)len;
4495 
4496 		curr_seqid++;
4497 		ldcmsg->seqid = curr_seqid;
4498 
4499 		/* copy the data into pkt */
4500 		bcopy(source, msgbuf, len);
4501 
4502 		source += len;
4503 		bytes_written += len;
4504 
4505 		/* increment tail */
4506 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4507 
4508 		ASSERT(tx_tail != tx_head);
4509 	}
4510 
4511 	/* Set the start and stop bits */
4512 	ldcmsg->env |= LDC_FRAG_STOP;
4513 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4514 	ldcmsg->env |= LDC_FRAG_START;
4515 
4516 	/*
4517 	 * All packets have been copied into the TX queue
4518 	 * update the tail ptr in the HV
4519 	 */
4520 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4521 	if (rv == 0) {
4522 		ldcp->tx_tail = tx_tail;
4523 		ldcp->last_msg_snt = curr_seqid;
4524 		*size = bytes_written;
4525 	} else {
4526 		int rv2;
4527 
4528 		if (rv != EWOULDBLOCK) {
4529 			*size = 0;
4530 			if (mutex_tryenter(&ldcp->lock)) {
4531 				i_ldc_reset(ldcp, B_FALSE);
4532 				mutex_exit(&ldcp->lock);
4533 			} else {
4534 				/*
4535 				 * Release Tx lock, and then reacquire channel
4536 				 * and Tx lock in correct order
4537 				 */
4538 				mutex_exit(&ldcp->tx_lock);
4539 				mutex_enter(&ldcp->lock);
4540 				mutex_enter(&ldcp->tx_lock);
4541 				i_ldc_reset(ldcp, B_FALSE);
4542 				mutex_exit(&ldcp->lock);
4543 			}
4544 			return (ECONNRESET);
4545 		}
4546 
4547 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4548 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4549 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4550 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4551 
4552 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4553 		    &tx_head, &tx_tail, &ldcp->link_state);
4554 
4555 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4556 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4557 		    rv2, tx_head, tx_tail, ldcp->link_state);
4558 
4559 		*size = 0;
4560 	}
4561 
4562 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4563 
4564 	return (rv);
4565 }
4566 
4567 /*
4568  * Write specified amount of bytes to the channel
4569  * in multiple pkts of pkt_payload size. Each
4570  * packet is tagged with an unique packet ID in
4571  * the case of a reliable link.
4572  *
4573  * On return, size contains the number of bytes written.
4574  * This function needs to ensure that the write size is < MTU size
4575  */
4576 static int
4577 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4578 {
4579 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4580 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4581 
4582 	/* Truncate packet to max of MTU size */
4583 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4584 	return (i_ldc_write_packet(ldcp, buf, sizep));
4585 }
4586 
4587 
4588 /*
4589  * Interfaces for channel nexus to register/unregister with LDC module
4590  * The nexus will register functions to be used to register individual
4591  * channels with the nexus and enable interrupts for the channels
4592  */
4593 int
4594 ldc_register(ldc_cnex_t *cinfo)
4595 {
4596 	ldc_chan_t	*ldcp;
4597 
4598 	if (cinfo == NULL || cinfo->dip == NULL ||
4599 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4600 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4601 	    cinfo->clr_intr == NULL) {
4602 
4603 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4604 		return (EINVAL);
4605 	}
4606 
4607 	mutex_enter(&ldcssp->lock);
4608 
4609 	/* nexus registration */
4610 	ldcssp->cinfo.dip = cinfo->dip;
4611 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4612 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4613 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4614 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4615 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4616 
4617 	/* register any channels that might have been previously initialized */
4618 	ldcp = ldcssp->chan_list;
4619 	while (ldcp) {
4620 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4621 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4622 			(void) i_ldc_register_channel(ldcp);
4623 
4624 		ldcp = ldcp->next;
4625 	}
4626 
4627 	mutex_exit(&ldcssp->lock);
4628 
4629 	return (0);
4630 }
4631 
4632 int
4633 ldc_unregister(ldc_cnex_t *cinfo)
4634 {
4635 	if (cinfo == NULL || cinfo->dip == NULL) {
4636 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4637 		return (EINVAL);
4638 	}
4639 
4640 	mutex_enter(&ldcssp->lock);
4641 
4642 	if (cinfo->dip != ldcssp->cinfo.dip) {
4643 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4644 		mutex_exit(&ldcssp->lock);
4645 		return (EINVAL);
4646 	}
4647 
4648 	/* nexus unregister */
4649 	ldcssp->cinfo.dip = NULL;
4650 	ldcssp->cinfo.reg_chan = NULL;
4651 	ldcssp->cinfo.unreg_chan = NULL;
4652 	ldcssp->cinfo.add_intr = NULL;
4653 	ldcssp->cinfo.rem_intr = NULL;
4654 	ldcssp->cinfo.clr_intr = NULL;
4655 
4656 	mutex_exit(&ldcssp->lock);
4657 
4658 	return (0);
4659 }
4660