xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision 112116d842e816e29d26a8fe28ed25d201063169)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/cred.h>
45 #include <sys/promif.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/cyclic.h>
49 #include <sys/machsystm.h>
50 #include <sys/vm.h>
51 #include <sys/cpu.h>
52 #include <sys/intreg.h>
53 #include <sys/machcpuvar.h>
54 #include <sys/mmu.h>
55 #include <sys/pte.h>
56 #include <vm/hat.h>
57 #include <vm/as.h>
58 #include <vm/hat_sfmmu.h>
59 #include <sys/vm_machparam.h>
60 #include <vm/seg_kmem.h>
61 #include <vm/seg_kpm.h>
62 #include <sys/note.h>
63 #include <sys/ivintr.h>
64 #include <sys/hypervisor_api.h>
65 #include <sys/ldc.h>
66 #include <sys/ldc_impl.h>
67 #include <sys/cnex.h>
68 #include <sys/hsvc.h>
69 #include <sys/sdt.h>
70 
71 /* Core internal functions */
72 int i_ldc_h2v_error(int h_error);
73 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
74 
75 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
76 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
77 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
78 static void i_ldc_reset_state(ldc_chan_t *ldcp);
79 
80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
85     uint8_t ctrlmsg);
86 
87 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
92     uint64_t *tail, uint64_t *link_state);
93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
94     uint64_t rx_tail);
95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
96 
97 /* Interrupt handling functions */
98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
101     uint64_t *notify_event);
102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
103 
104 /* Read method functions */
105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
109 	size_t *sizep);
110 
111 /* Write method functions */
112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
117 	size_t *sizep);
118 
119 /* Pkt processing internal functions */
120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
127 
128 /* LDC Version */
129 static ldc_ver_t ldc_versions[] = { {1, 0} };
130 
131 /* number of supported versions */
132 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
133 
134 /* Invalid value for the ldc_chan_t rx_ack_head field */
135 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
136 
137 
138 /* Module State Pointer */
139 ldc_soft_state_t *ldcssp;
140 
141 static struct modldrv md = {
142 	&mod_miscops,			/* This is a misc module */
143 	"sun4v LDC module v%I%",	/* Name of the module */
144 };
145 
146 static struct modlinkage ml = {
147 	MODREV_1,
148 	&md,
149 	NULL
150 };
151 
152 static uint64_t ldc_sup_minor;		/* Supported minor number */
153 static hsvc_info_t ldc_hsvc = {
154 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 1, "ldc"
155 };
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * The length of the reliable-mode data queue in terms of the LDC
173  * receive queue length. i.e., the number of times larger than the
174  * LDC receive queue that the data queue should be. The HV receive
175  * queue is required to be a power of 2 and this implementation
176  * assumes the data queue will also be a power of 2. By making the
177  * multiplier a power of 2, we ensure the data queue will be a
178  * power of 2. We use a multiplier because the receive queue is
179  * sized to be sane relative to the MTU and the same is needed for
180  * the data queue.
181  */
182 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
183 
184 /*
185  * LDC retry count and delay - when the HV returns EWOULDBLOCK
186  * the operation is retried 'ldc_max_retries' times with a
187  * wait of 'ldc_delay' usecs between each retry.
188  */
189 int ldc_max_retries = LDC_MAX_RETRIES;
190 clock_t ldc_delay = LDC_DELAY;
191 
192 /*
193  * delay between each retry of channel unregistration in
194  * ldc_close(), to wait for pending interrupts to complete.
195  */
196 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
197 
198 #ifdef DEBUG
199 
200 /*
201  * Print debug messages
202  *
203  * set ldcdbg to 0x7 for enabling all msgs
204  * 0x4 - Warnings
205  * 0x2 - All debug messages
206  * 0x1 - Minimal debug messages
207  *
208  * set ldcdbgchan to the channel number you want to debug
209  * setting it to -1 prints debug messages for all channels
210  * NOTE: ldcdbgchan has no effect on error messages
211  */
212 
213 int ldcdbg = 0x0;
214 int64_t ldcdbgchan = DBG_ALL_LDCS;
215 uint64_t ldc_inject_err_flag = 0;
216 
217 void
218 ldcdebug(int64_t id, const char *fmt, ...)
219 {
220 	char buf[512];
221 	va_list ap;
222 
223 	/*
224 	 * Do not return if,
225 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
226 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
227 	 * debug channel = caller specified channel
228 	 */
229 	if ((id != DBG_ALL_LDCS) &&
230 	    (ldcdbgchan != DBG_ALL_LDCS) &&
231 	    (ldcdbgchan != id)) {
232 		return;
233 	}
234 
235 	va_start(ap, fmt);
236 	(void) vsprintf(buf, fmt, ap);
237 	va_end(ap);
238 
239 	cmn_err(CE_CONT, "?%s", buf);
240 }
241 
242 #define	LDC_ERR_RESET		0x1
243 #define	LDC_ERR_PKTLOSS		0x2
244 #define	LDC_ERR_DQFULL		0x4
245 #define	LDC_ERR_DRNGCLEAR	0x8
246 
247 static boolean_t
248 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
249 {
250 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
251 		return (B_FALSE);
252 
253 	if ((ldc_inject_err_flag & error) == 0)
254 		return (B_FALSE);
255 
256 	/* clear the injection state */
257 	ldc_inject_err_flag &= ~error;
258 
259 	return (B_TRUE);
260 }
261 
262 #define	D1		\
263 if (ldcdbg & 0x01)	\
264 	ldcdebug
265 
266 #define	D2		\
267 if (ldcdbg & 0x02)	\
268 	ldcdebug
269 
270 #define	DWARN		\
271 if (ldcdbg & 0x04)	\
272 	ldcdebug
273 
274 #define	DUMP_PAYLOAD(id, addr)						\
275 {									\
276 	char buf[65*3];							\
277 	int i;								\
278 	uint8_t *src = (uint8_t *)addr;					\
279 	for (i = 0; i < 64; i++, src++)					\
280 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
281 	(void) sprintf(&buf[i * 3], "|\n");				\
282 	D2((id), "payload: %s", buf);					\
283 }
284 
285 #define	DUMP_LDC_PKT(c, s, addr)					\
286 {									\
287 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
288 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
289 	if (msg->type == LDC_DATA) {                                    \
290 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
291 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
292 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
293 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
294 	    (msg->env & LDC_LEN_MASK));					\
295 	} else { 							\
296 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
297 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
298 	} 								\
299 }
300 
301 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
302 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
303 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
304 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
305 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
306 
307 #else
308 
309 #define	DBG_ALL_LDCS -1
310 
311 #define	D1
312 #define	D2
313 #define	DWARN
314 
315 #define	DUMP_PAYLOAD(id, addr)
316 #define	DUMP_LDC_PKT(c, s, addr)
317 
318 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
319 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
320 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
321 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
322 
323 #endif
324 
325 /*
326  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
327  * lengths. Just pass the head, tail, and entries values so that the
328  * length can be calculated in a dtrace script when the probe is enabled.
329  */
330 #define	TRACE_RXDQ_LENGTH(ldcp)						\
331 	DTRACE_PROBE4(rxdq__size,					\
332 	uint64_t, ldcp->id,						\
333 	uint64_t, ldcp->rx_dq_head,					\
334 	uint64_t, ldcp->rx_dq_tail,					\
335 	uint64_t, ldcp->rx_dq_entries)
336 
337 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
338 	DTRACE_PROBE4(rxhvq__size,					\
339 	uint64_t, ldcp->id,						\
340 	uint64_t, head,							\
341 	uint64_t, tail,							\
342 	uint64_t, ldcp->rx_q_entries)
343 
344 /* A dtrace SDT probe to ease tracing of data queue copy operations */
345 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
346 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
347 
348 /* The amount of contiguous space at the tail of the queue */
349 #define	Q_CONTIG_SPACE(head, tail, size)				\
350 	((head) <= (tail) ? ((size) - (tail)) :				\
351 	((head) - (tail) - LDC_PACKET_SIZE))
352 
353 #define	ZERO_PKT(p)			\
354 	bzero((p), sizeof (ldc_msg_t));
355 
356 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
357 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
358 
359 int
360 _init(void)
361 {
362 	int status;
363 	extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
364 
365 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
366 	if (status != 0) {
367 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
368 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
369 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
370 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
371 		return (-1);
372 	}
373 
374 	/* Initialize shared memory HV API version checking */
375 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
376 
377 	/* allocate soft state structure */
378 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
379 
380 	/* Link the module into the system */
381 	status = mod_install(&ml);
382 	if (status != 0) {
383 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
384 		return (status);
385 	}
386 
387 	/* Initialize the LDC state structure */
388 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
389 
390 	mutex_enter(&ldcssp->lock);
391 
392 	/* Create a cache for memory handles */
393 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
394 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
395 	if (ldcssp->memhdl_cache == NULL) {
396 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
397 		mutex_exit(&ldcssp->lock);
398 		return (-1);
399 	}
400 
401 	/* Create cache for memory segment structures */
402 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
403 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
404 	if (ldcssp->memseg_cache == NULL) {
405 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
406 		mutex_exit(&ldcssp->lock);
407 		return (-1);
408 	}
409 
410 
411 	ldcssp->channel_count = 0;
412 	ldcssp->channels_open = 0;
413 	ldcssp->chan_list = NULL;
414 	ldcssp->dring_list = NULL;
415 
416 	mutex_exit(&ldcssp->lock);
417 
418 	return (0);
419 }
420 
421 int
422 _info(struct modinfo *modinfop)
423 {
424 	/* Report status of the dynamically loadable driver module */
425 	return (mod_info(&ml, modinfop));
426 }
427 
428 int
429 _fini(void)
430 {
431 	int 		rv, status;
432 	ldc_chan_t 	*tmp_ldcp, *ldcp;
433 	ldc_dring_t 	*tmp_dringp, *dringp;
434 	ldc_mem_info_t 	minfo;
435 
436 	/* Unlink the driver module from the system */
437 	status = mod_remove(&ml);
438 	if (status) {
439 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
440 		return (EIO);
441 	}
442 
443 	/* Free descriptor rings */
444 	dringp = ldcssp->dring_list;
445 	while (dringp != NULL) {
446 		tmp_dringp = dringp->next;
447 
448 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
449 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
450 			if (minfo.status == LDC_BOUND) {
451 				(void) ldc_mem_dring_unbind(
452 				    (ldc_dring_handle_t)dringp);
453 			}
454 			if (minfo.status == LDC_MAPPED) {
455 				(void) ldc_mem_dring_unmap(
456 				    (ldc_dring_handle_t)dringp);
457 			}
458 		}
459 
460 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
461 		dringp = tmp_dringp;
462 	}
463 	ldcssp->dring_list = NULL;
464 
465 	/* close and finalize channels */
466 	ldcp = ldcssp->chan_list;
467 	while (ldcp != NULL) {
468 		tmp_ldcp = ldcp->next;
469 
470 		(void) ldc_close((ldc_handle_t)ldcp);
471 		(void) ldc_fini((ldc_handle_t)ldcp);
472 
473 		ldcp = tmp_ldcp;
474 	}
475 	ldcssp->chan_list = NULL;
476 
477 	/* Destroy kmem caches */
478 	kmem_cache_destroy(ldcssp->memhdl_cache);
479 	kmem_cache_destroy(ldcssp->memseg_cache);
480 
481 	/*
482 	 * We have successfully "removed" the driver.
483 	 * Destroying soft states
484 	 */
485 	mutex_destroy(&ldcssp->lock);
486 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
487 
488 	(void) hsvc_unregister(&ldc_hsvc);
489 
490 	return (status);
491 }
492 
493 /* -------------------------------------------------------------------------- */
494 
495 /*
496  * LDC Link Layer Internal Functions
497  */
498 
499 /*
500  * Translate HV Errors to sun4v error codes
501  */
502 int
503 i_ldc_h2v_error(int h_error)
504 {
505 	switch (h_error) {
506 
507 	case	H_EOK:
508 		return (0);
509 
510 	case	H_ENORADDR:
511 		return (EFAULT);
512 
513 	case	H_EBADPGSZ:
514 	case	H_EINVAL:
515 		return (EINVAL);
516 
517 	case	H_EWOULDBLOCK:
518 		return (EWOULDBLOCK);
519 
520 	case	H_ENOACCESS:
521 	case	H_ENOMAP:
522 		return (EACCES);
523 
524 	case	H_EIO:
525 	case	H_ECPUERROR:
526 		return (EIO);
527 
528 	case	H_ENOTSUPPORTED:
529 		return (ENOTSUP);
530 
531 	case 	H_ETOOMANY:
532 		return (ENOSPC);
533 
534 	case	H_ECHANNEL:
535 		return (ECHRNG);
536 	default:
537 		break;
538 	}
539 
540 	return (EIO);
541 }
542 
543 /*
544  * Reconfigure the transmit queue
545  */
546 static int
547 i_ldc_txq_reconf(ldc_chan_t *ldcp)
548 {
549 	int rv;
550 
551 	ASSERT(MUTEX_HELD(&ldcp->lock));
552 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
553 
554 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
555 	if (rv) {
556 		cmn_err(CE_WARN,
557 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
558 		return (EIO);
559 	}
560 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
561 	    &(ldcp->tx_tail), &(ldcp->link_state));
562 	if (rv) {
563 		cmn_err(CE_WARN,
564 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
565 		return (EIO);
566 	}
567 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
568 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
569 	    ldcp->link_state);
570 
571 	return (0);
572 }
573 
574 /*
575  * Reconfigure the receive queue
576  */
577 static int
578 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
579 {
580 	int rv;
581 	uint64_t rx_head, rx_tail;
582 
583 	ASSERT(MUTEX_HELD(&ldcp->lock));
584 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
585 	    &(ldcp->link_state));
586 	if (rv) {
587 		cmn_err(CE_WARN,
588 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
589 		    ldcp->id);
590 		return (EIO);
591 	}
592 
593 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
594 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
595 		    ldcp->rx_q_entries);
596 		if (rv) {
597 			cmn_err(CE_WARN,
598 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
599 			    ldcp->id);
600 			return (EIO);
601 		}
602 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
603 		    ldcp->id);
604 	}
605 
606 	return (0);
607 }
608 
609 
610 /*
611  * Drain the contents of the receive queue
612  */
613 static int
614 i_ldc_rxq_drain(ldc_chan_t *ldcp)
615 {
616 	int rv;
617 	uint64_t rx_head, rx_tail;
618 
619 	ASSERT(MUTEX_HELD(&ldcp->lock));
620 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
621 	    &(ldcp->link_state));
622 	if (rv) {
623 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
624 		    ldcp->id);
625 		return (EIO);
626 	}
627 
628 	/* flush contents by setting the head = tail */
629 	return (i_ldc_set_rx_head(ldcp, rx_tail));
630 }
631 
632 
633 /*
634  * Reset LDC state structure and its contents
635  */
636 static void
637 i_ldc_reset_state(ldc_chan_t *ldcp)
638 {
639 	ASSERT(MUTEX_HELD(&ldcp->lock));
640 	ldcp->last_msg_snt = LDC_INIT_SEQID;
641 	ldcp->last_ack_rcd = 0;
642 	ldcp->last_msg_rcd = 0;
643 	ldcp->tx_ackd_head = ldcp->tx_head;
644 	ldcp->stream_remains = 0;
645 	ldcp->next_vidx = 0;
646 	ldcp->hstate = 0;
647 	ldcp->tstate = TS_OPEN;
648 	ldcp->status = LDC_OPEN;
649 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
650 	ldcp->rx_dq_head = 0;
651 	ldcp->rx_dq_tail = 0;
652 
653 	if (ldcp->link_state == LDC_CHANNEL_UP ||
654 	    ldcp->link_state == LDC_CHANNEL_RESET) {
655 
656 		if (ldcp->mode == LDC_MODE_RAW) {
657 			ldcp->status = LDC_UP;
658 			ldcp->tstate = TS_UP;
659 		} else {
660 			ldcp->status = LDC_READY;
661 			ldcp->tstate |= TS_LINK_READY;
662 		}
663 	}
664 }
665 
666 /*
667  * Reset a LDC channel
668  */
669 void
670 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
671 {
672 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
673 
674 	ASSERT(MUTEX_HELD(&ldcp->lock));
675 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
676 
677 	/* reconfig Tx and Rx queues */
678 	(void) i_ldc_txq_reconf(ldcp);
679 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
680 
681 	/* Clear Tx and Rx interrupts */
682 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
683 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
684 
685 	/* Reset channel state */
686 	i_ldc_reset_state(ldcp);
687 
688 	/* Mark channel in reset */
689 	ldcp->tstate |= TS_IN_RESET;
690 }
691 
692 
693 /*
694  * Clear pending interrupts
695  */
696 static void
697 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
698 {
699 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
700 
701 	ASSERT(MUTEX_HELD(&ldcp->lock));
702 	ASSERT(cinfo->dip != NULL);
703 
704 	switch (itype) {
705 	case CNEX_TX_INTR:
706 		/* check Tx interrupt */
707 		if (ldcp->tx_intr_state)
708 			ldcp->tx_intr_state = LDC_INTR_NONE;
709 		else
710 			return;
711 		break;
712 
713 	case CNEX_RX_INTR:
714 		/* check Rx interrupt */
715 		if (ldcp->rx_intr_state)
716 			ldcp->rx_intr_state = LDC_INTR_NONE;
717 		else
718 			return;
719 		break;
720 	}
721 
722 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
723 	D2(ldcp->id,
724 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
725 	    ldcp->id, itype);
726 }
727 
728 /*
729  * Set the receive queue head
730  * Resets connection and returns an error if it fails.
731  */
732 static int
733 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
734 {
735 	int 	rv;
736 	int 	retries;
737 
738 	ASSERT(MUTEX_HELD(&ldcp->lock));
739 	for (retries = 0; retries < ldc_max_retries; retries++) {
740 
741 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
742 			return (0);
743 
744 		if (rv != H_EWOULDBLOCK)
745 			break;
746 
747 		/* wait for ldc_delay usecs */
748 		drv_usecwait(ldc_delay);
749 	}
750 
751 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
752 	    ldcp->id, head);
753 	mutex_enter(&ldcp->tx_lock);
754 	i_ldc_reset(ldcp, B_TRUE);
755 	mutex_exit(&ldcp->tx_lock);
756 
757 	return (ECONNRESET);
758 }
759 
760 /*
761  * Returns the tx_head to be used for transfer
762  */
763 static void
764 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
765 {
766 	ldc_msg_t 	*pkt;
767 
768 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
769 
770 	/* get current Tx head */
771 	*head = ldcp->tx_head;
772 
773 	/*
774 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
775 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
776 	 * up to the current location of tx_head. This needs to be done
777 	 * as the peer will only ACK DATA/INFO pkts.
778 	 */
779 	if (ldcp->mode == LDC_MODE_RELIABLE) {
780 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
781 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
782 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
783 				break;
784 			}
785 			/* advance ACKd head */
786 			ldcp->tx_ackd_head =
787 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
788 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
789 		}
790 		*head = ldcp->tx_ackd_head;
791 	}
792 }
793 
794 /*
795  * Returns the tx_tail to be used for transfer
796  * Re-reads the TX queue ptrs if and only if the
797  * the cached head and tail are equal (queue is full)
798  */
799 static int
800 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
801 {
802 	int 		rv;
803 	uint64_t 	current_head, new_tail;
804 
805 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
806 	/* Read the head and tail ptrs from HV */
807 	rv = hv_ldc_tx_get_state(ldcp->id,
808 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
809 	if (rv) {
810 		cmn_err(CE_WARN,
811 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
812 		    ldcp->id);
813 		return (EIO);
814 	}
815 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
816 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
817 		    ldcp->id);
818 		return (ECONNRESET);
819 	}
820 
821 	i_ldc_get_tx_head(ldcp, &current_head);
822 
823 	/* increment the tail */
824 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
825 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
826 
827 	if (new_tail == current_head) {
828 		DWARN(ldcp->id,
829 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
830 		    ldcp->id);
831 		return (EWOULDBLOCK);
832 	}
833 
834 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
835 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
836 
837 	*tail = ldcp->tx_tail;
838 	return (0);
839 }
840 
841 /*
842  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
843  * and retry ldc_max_retries times before returning an error.
844  * Returns 0, EWOULDBLOCK or EIO
845  */
846 static int
847 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
848 {
849 	int		rv, retval = EWOULDBLOCK;
850 	int 		retries;
851 
852 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
853 	for (retries = 0; retries < ldc_max_retries; retries++) {
854 
855 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
856 			retval = 0;
857 			break;
858 		}
859 		if (rv != H_EWOULDBLOCK) {
860 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
861 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
862 			retval = EIO;
863 			break;
864 		}
865 
866 		/* wait for ldc_delay usecs */
867 		drv_usecwait(ldc_delay);
868 	}
869 	return (retval);
870 }
871 
872 /*
873  * Copy a data packet from the HV receive queue to the data queue.
874  * Caller must ensure that the data queue is not already full.
875  *
876  * The *head argument represents the current head pointer for the HV
877  * receive queue. After copying a packet from the HV receive queue,
878  * the *head pointer will be updated. This allows the caller to update
879  * the head pointer in HV using the returned *head value.
880  */
881 void
882 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
883 {
884 	uint64_t	q_size, dq_size;
885 
886 	ASSERT(MUTEX_HELD(&ldcp->lock));
887 
888 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
889 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
890 
891 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
892 	    dq_size) >= LDC_PACKET_SIZE);
893 
894 	bcopy((void *)(ldcp->rx_q_va + *head),
895 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
896 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
897 
898 	/* Update rx head */
899 	*head = (*head + LDC_PACKET_SIZE) % q_size;
900 
901 	/* Update dq tail */
902 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
903 }
904 
905 /*
906  * Update the Rx data queue head pointer
907  */
908 static int
909 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
910 {
911 	ldcp->rx_dq_head = head;
912 	return (0);
913 }
914 
915 /*
916  * Get the Rx data queue head and tail pointers
917  */
918 static uint64_t
919 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
920     uint64_t *link_state)
921 {
922 	_NOTE(ARGUNUSED(link_state))
923 	*head = ldcp->rx_dq_head;
924 	*tail = ldcp->rx_dq_tail;
925 	return (0);
926 }
927 
928 /*
929  * Wrapper for the Rx HV queue set head function. Giving the
930  * data queue and HV queue set head functions the same type.
931  */
932 static uint64_t
933 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
934     uint64_t *link_state)
935 {
936 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
937 	    link_state)));
938 }
939 
940 /*
941  * LDC receive interrupt handler
942  *    triggered for channel with data pending to read
943  *    i.e. Rx queue content changes
944  */
945 static uint_t
946 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
947 {
948 	_NOTE(ARGUNUSED(arg2))
949 
950 	ldc_chan_t	*ldcp;
951 	boolean_t	notify;
952 	uint64_t	event;
953 	int		rv;
954 
955 	/* Get the channel for which interrupt was received */
956 	if (arg1 == NULL) {
957 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
958 		return (DDI_INTR_UNCLAIMED);
959 	}
960 
961 	ldcp = (ldc_chan_t *)arg1;
962 
963 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
964 	    ldcp->id, ldcp);
965 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
966 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
967 	    ldcp->link_state);
968 
969 	/* Lock channel */
970 	mutex_enter(&ldcp->lock);
971 
972 	/* Mark the interrupt as being actively handled */
973 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
974 
975 	(void) i_ldc_rx_process_hvq(ldcp, &notify, &event);
976 
977 	if (ldcp->mode != LDC_MODE_RELIABLE) {
978 		/*
979 		 * If there are no data packets on the queue, clear
980 		 * the interrupt. Otherwise, the ldc_read will clear
981 		 * interrupts after draining the queue. To indicate the
982 		 * interrupt has not yet been cleared, it is marked
983 		 * as pending.
984 		 */
985 		if ((event & LDC_EVT_READ) == 0) {
986 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
987 		} else {
988 			ldcp->rx_intr_state = LDC_INTR_PEND;
989 		}
990 	}
991 
992 	/* if callbacks are disabled, do not notify */
993 	if (notify && ldcp->cb_enabled) {
994 		ldcp->cb_inprogress = B_TRUE;
995 		mutex_exit(&ldcp->lock);
996 		rv = ldcp->cb(event, ldcp->cb_arg);
997 		if (rv) {
998 			DWARN(ldcp->id,
999 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1000 			    ldcp->id);
1001 		}
1002 		mutex_enter(&ldcp->lock);
1003 		ldcp->cb_inprogress = B_FALSE;
1004 	}
1005 
1006 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1007 		/*
1008 		 * If we are using a secondary data queue, clear the
1009 		 * interrupt. We should have processed all CTRL packets
1010 		 * and copied all DATA packets to the secondary queue.
1011 		 * Even if secondary queue filled up, clear the interrupts,
1012 		 * this will trigger another interrupt and force the
1013 		 * handler to copy more data.
1014 		 */
1015 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1016 	}
1017 
1018 	mutex_exit(&ldcp->lock);
1019 
1020 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1021 
1022 	return (DDI_INTR_CLAIMED);
1023 }
1024 
1025 /*
1026  * Wrapper for the Rx HV queue processing function to be used when
1027  * checking the Rx HV queue for data packets. Unlike the interrupt
1028  * handler code flow, the Rx interrupt is not cleared here and
1029  * callbacks are not made.
1030  */
1031 static uint_t
1032 i_ldc_chkq(ldc_chan_t *ldcp)
1033 {
1034 	boolean_t	notify;
1035 	uint64_t	event;
1036 
1037 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1038 }
1039 
1040 /*
1041  * Send a LDC message
1042  */
1043 static int
1044 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1045     uint8_t ctrlmsg)
1046 {
1047 	int		rv;
1048 	ldc_msg_t 	*pkt;
1049 	uint64_t	tx_tail;
1050 	uint32_t	curr_seqid;
1051 
1052 	/* Obtain Tx lock */
1053 	mutex_enter(&ldcp->tx_lock);
1054 
1055 	curr_seqid = ldcp->last_msg_snt;
1056 
1057 	/* get the current tail for the message */
1058 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1059 	if (rv) {
1060 		DWARN(ldcp->id,
1061 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1062 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1063 		    ldcp->id, pkttype, subtype, ctrlmsg);
1064 		mutex_exit(&ldcp->tx_lock);
1065 		return (rv);
1066 	}
1067 
1068 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1069 	ZERO_PKT(pkt);
1070 
1071 	/* Initialize the packet */
1072 	pkt->type = pkttype;
1073 	pkt->stype = subtype;
1074 	pkt->ctrl = ctrlmsg;
1075 
1076 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1077 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1078 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1079 		curr_seqid++;
1080 		if (ldcp->mode != LDC_MODE_RAW) {
1081 			pkt->seqid = curr_seqid;
1082 			pkt->ackid = ldcp->last_msg_rcd;
1083 		}
1084 	}
1085 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1086 
1087 	/* initiate the send by calling into HV and set the new tail */
1088 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1089 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1090 
1091 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1092 	if (rv) {
1093 		DWARN(ldcp->id,
1094 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1095 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1096 		    ldcp->id, pkttype, subtype, ctrlmsg);
1097 		mutex_exit(&ldcp->tx_lock);
1098 		return (EIO);
1099 	}
1100 
1101 	ldcp->last_msg_snt = curr_seqid;
1102 	ldcp->tx_tail = tx_tail;
1103 
1104 	mutex_exit(&ldcp->tx_lock);
1105 	return (0);
1106 }
1107 
1108 /*
1109  * Checks if packet was received in right order
1110  * in the case of a reliable link.
1111  * Returns 0 if in order, else EIO
1112  */
1113 static int
1114 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1115 {
1116 	/* No seqid checking for RAW mode */
1117 	if (ldcp->mode == LDC_MODE_RAW)
1118 		return (0);
1119 
1120 	/* No seqid checking for version, RTS, RTR message */
1121 	if (msg->ctrl == LDC_VER ||
1122 	    msg->ctrl == LDC_RTS ||
1123 	    msg->ctrl == LDC_RTR)
1124 		return (0);
1125 
1126 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1127 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1128 		DWARN(ldcp->id,
1129 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1130 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1131 		    (ldcp->last_msg_rcd + 1));
1132 		return (EIO);
1133 	}
1134 
1135 #ifdef DEBUG
1136 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1137 		DWARN(ldcp->id,
1138 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1139 		return (EIO);
1140 	}
1141 #endif
1142 
1143 	return (0);
1144 }
1145 
1146 
1147 /*
1148  * Process an incoming version ctrl message
1149  */
1150 static int
1151 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1152 {
1153 	int 		rv = 0, idx = ldcp->next_vidx;
1154 	ldc_msg_t 	*pkt;
1155 	uint64_t	tx_tail;
1156 	ldc_ver_t	*rcvd_ver;
1157 
1158 	/* get the received version */
1159 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1160 
1161 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1162 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1163 
1164 	/* Obtain Tx lock */
1165 	mutex_enter(&ldcp->tx_lock);
1166 
1167 	switch (msg->stype) {
1168 	case LDC_INFO:
1169 
1170 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1171 			(void) i_ldc_txq_reconf(ldcp);
1172 			i_ldc_reset_state(ldcp);
1173 			mutex_exit(&ldcp->tx_lock);
1174 			return (EAGAIN);
1175 		}
1176 
1177 		/* get the current tail and pkt for the response */
1178 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1179 		if (rv != 0) {
1180 			DWARN(ldcp->id,
1181 			    "i_ldc_process_VER: (0x%llx) err sending "
1182 			    "version ACK/NACK\n", ldcp->id);
1183 			i_ldc_reset(ldcp, B_TRUE);
1184 			mutex_exit(&ldcp->tx_lock);
1185 			return (ECONNRESET);
1186 		}
1187 
1188 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1189 		ZERO_PKT(pkt);
1190 
1191 		/* initialize the packet */
1192 		pkt->type = LDC_CTRL;
1193 		pkt->ctrl = LDC_VER;
1194 
1195 		for (;;) {
1196 
1197 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1198 			    rcvd_ver->major, rcvd_ver->minor,
1199 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1200 
1201 			if (rcvd_ver->major == ldc_versions[idx].major) {
1202 				/* major version match - ACK version */
1203 				pkt->stype = LDC_ACK;
1204 
1205 				/*
1206 				 * lower minor version to the one this endpt
1207 				 * supports, if necessary
1208 				 */
1209 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1210 					rcvd_ver->minor =
1211 					    ldc_versions[idx].minor;
1212 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1213 
1214 				break;
1215 			}
1216 
1217 			if (rcvd_ver->major > ldc_versions[idx].major) {
1218 
1219 				D1(ldcp->id, "i_ldc_process_VER: using next"
1220 				    " lower idx=%d, v%u.%u\n", idx,
1221 				    ldc_versions[idx].major,
1222 				    ldc_versions[idx].minor);
1223 
1224 				/* nack with next lower version */
1225 				pkt->stype = LDC_NACK;
1226 				bcopy(&ldc_versions[idx], pkt->udata,
1227 				    sizeof (ldc_versions[idx]));
1228 				ldcp->next_vidx = idx;
1229 				break;
1230 			}
1231 
1232 			/* next major version */
1233 			idx++;
1234 
1235 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1236 
1237 			if (idx == LDC_NUM_VERS) {
1238 				/* no version match - send NACK */
1239 				pkt->stype = LDC_NACK;
1240 				bzero(pkt->udata, sizeof (ldc_ver_t));
1241 				ldcp->next_vidx = 0;
1242 				break;
1243 			}
1244 		}
1245 
1246 		/* initiate the send by calling into HV and set the new tail */
1247 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1248 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1249 
1250 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1251 		if (rv == 0) {
1252 			ldcp->tx_tail = tx_tail;
1253 			if (pkt->stype == LDC_ACK) {
1254 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1255 				    " version ACK\n", ldcp->id);
1256 				/* Save the ACK'd version */
1257 				ldcp->version.major = rcvd_ver->major;
1258 				ldcp->version.minor = rcvd_ver->minor;
1259 				ldcp->hstate |= TS_RCVD_VER;
1260 				ldcp->tstate |= TS_VER_DONE;
1261 				D1(DBG_ALL_LDCS,
1262 				    "(0x%llx) Sent ACK, "
1263 				    "Agreed on version v%u.%u\n",
1264 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1265 			}
1266 		} else {
1267 			DWARN(ldcp->id,
1268 			    "i_ldc_process_VER: (0x%llx) error sending "
1269 			    "ACK/NACK\n", ldcp->id);
1270 			i_ldc_reset(ldcp, B_TRUE);
1271 			mutex_exit(&ldcp->tx_lock);
1272 			return (ECONNRESET);
1273 		}
1274 
1275 		break;
1276 
1277 	case LDC_ACK:
1278 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1279 			if (ldcp->version.major != rcvd_ver->major ||
1280 			    ldcp->version.minor != rcvd_ver->minor) {
1281 
1282 				/* mismatched version - reset connection */
1283 				DWARN(ldcp->id,
1284 				    "i_ldc_process_VER: (0x%llx) recvd"
1285 				    " ACK ver != sent ACK ver\n", ldcp->id);
1286 				i_ldc_reset(ldcp, B_TRUE);
1287 				mutex_exit(&ldcp->tx_lock);
1288 				return (ECONNRESET);
1289 			}
1290 		} else {
1291 			/* SUCCESS - we have agreed on a version */
1292 			ldcp->version.major = rcvd_ver->major;
1293 			ldcp->version.minor = rcvd_ver->minor;
1294 			ldcp->tstate |= TS_VER_DONE;
1295 		}
1296 
1297 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1298 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1299 
1300 		/* initiate RTS-RTR-RDX handshake */
1301 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1302 		if (rv) {
1303 			DWARN(ldcp->id,
1304 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1305 			    ldcp->id);
1306 			i_ldc_reset(ldcp, B_TRUE);
1307 			mutex_exit(&ldcp->tx_lock);
1308 			return (ECONNRESET);
1309 		}
1310 
1311 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1312 		ZERO_PKT(pkt);
1313 
1314 		pkt->type = LDC_CTRL;
1315 		pkt->stype = LDC_INFO;
1316 		pkt->ctrl = LDC_RTS;
1317 		pkt->env = ldcp->mode;
1318 		if (ldcp->mode != LDC_MODE_RAW)
1319 			pkt->seqid = LDC_INIT_SEQID;
1320 
1321 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1322 
1323 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1324 
1325 		/* initiate the send by calling into HV and set the new tail */
1326 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1327 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1328 
1329 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1330 		if (rv) {
1331 			D2(ldcp->id,
1332 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1333 			    ldcp->id);
1334 			i_ldc_reset(ldcp, B_TRUE);
1335 			mutex_exit(&ldcp->tx_lock);
1336 			return (ECONNRESET);
1337 		}
1338 
1339 		ldcp->tx_tail = tx_tail;
1340 		ldcp->hstate |= TS_SENT_RTS;
1341 
1342 		break;
1343 
1344 	case LDC_NACK:
1345 		/* check if version in NACK is zero */
1346 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1347 			/* version handshake failure */
1348 			DWARN(DBG_ALL_LDCS,
1349 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1350 			    ldcp->id);
1351 			i_ldc_reset(ldcp, B_TRUE);
1352 			mutex_exit(&ldcp->tx_lock);
1353 			return (ECONNRESET);
1354 		}
1355 
1356 		/* get the current tail and pkt for the response */
1357 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1358 		if (rv != 0) {
1359 			cmn_err(CE_NOTE,
1360 			    "i_ldc_process_VER: (0x%lx) err sending "
1361 			    "version ACK/NACK\n", ldcp->id);
1362 			i_ldc_reset(ldcp, B_TRUE);
1363 			mutex_exit(&ldcp->tx_lock);
1364 			return (ECONNRESET);
1365 		}
1366 
1367 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1368 		ZERO_PKT(pkt);
1369 
1370 		/* initialize the packet */
1371 		pkt->type = LDC_CTRL;
1372 		pkt->ctrl = LDC_VER;
1373 		pkt->stype = LDC_INFO;
1374 
1375 		/* check ver in NACK msg has a match */
1376 		for (;;) {
1377 			if (rcvd_ver->major == ldc_versions[idx].major) {
1378 				/*
1379 				 * major version match - resubmit request
1380 				 * if lower minor version to the one this endpt
1381 				 * supports, if necessary
1382 				 */
1383 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1384 					rcvd_ver->minor =
1385 					    ldc_versions[idx].minor;
1386 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1387 				break;
1388 			}
1389 
1390 			if (rcvd_ver->major > ldc_versions[idx].major) {
1391 
1392 				D1(ldcp->id, "i_ldc_process_VER: using next"
1393 				    " lower idx=%d, v%u.%u\n", idx,
1394 				    ldc_versions[idx].major,
1395 				    ldc_versions[idx].minor);
1396 
1397 				/* send next lower version */
1398 				bcopy(&ldc_versions[idx], pkt->udata,
1399 				    sizeof (ldc_versions[idx]));
1400 				ldcp->next_vidx = idx;
1401 				break;
1402 			}
1403 
1404 			/* next version */
1405 			idx++;
1406 
1407 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1408 
1409 			if (idx == LDC_NUM_VERS) {
1410 				/* no version match - terminate */
1411 				ldcp->next_vidx = 0;
1412 				mutex_exit(&ldcp->tx_lock);
1413 				return (ECONNRESET);
1414 			}
1415 		}
1416 
1417 		/* initiate the send by calling into HV and set the new tail */
1418 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1419 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1420 
1421 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1422 		if (rv == 0) {
1423 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1424 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1425 			    ldc_versions[idx].minor);
1426 			ldcp->tx_tail = tx_tail;
1427 		} else {
1428 			cmn_err(CE_NOTE,
1429 			    "i_ldc_process_VER: (0x%lx) error sending version"
1430 			    "INFO\n", ldcp->id);
1431 			i_ldc_reset(ldcp, B_TRUE);
1432 			mutex_exit(&ldcp->tx_lock);
1433 			return (ECONNRESET);
1434 		}
1435 
1436 		break;
1437 	}
1438 
1439 	mutex_exit(&ldcp->tx_lock);
1440 	return (rv);
1441 }
1442 
1443 
1444 /*
1445  * Process an incoming RTS ctrl message
1446  */
1447 static int
1448 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1449 {
1450 	int 		rv = 0;
1451 	ldc_msg_t 	*pkt;
1452 	uint64_t	tx_tail;
1453 	boolean_t	sent_NACK = B_FALSE;
1454 
1455 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1456 
1457 	switch (msg->stype) {
1458 	case LDC_NACK:
1459 		DWARN(ldcp->id,
1460 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1461 		    ldcp->id);
1462 
1463 		/* Reset the channel -- as we cannot continue */
1464 		mutex_enter(&ldcp->tx_lock);
1465 		i_ldc_reset(ldcp, B_TRUE);
1466 		mutex_exit(&ldcp->tx_lock);
1467 		rv = ECONNRESET;
1468 		break;
1469 
1470 	case LDC_INFO:
1471 
1472 		/* check mode */
1473 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1474 			cmn_err(CE_NOTE,
1475 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1476 			    ldcp->id);
1477 			/*
1478 			 * send NACK in response to MODE message
1479 			 * get the current tail for the response
1480 			 */
1481 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1482 			if (rv) {
1483 				/* if cannot send NACK - reset channel */
1484 				mutex_enter(&ldcp->tx_lock);
1485 				i_ldc_reset(ldcp, B_TRUE);
1486 				mutex_exit(&ldcp->tx_lock);
1487 				rv = ECONNRESET;
1488 				break;
1489 			}
1490 			sent_NACK = B_TRUE;
1491 		}
1492 		break;
1493 	default:
1494 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1495 		    ldcp->id);
1496 		mutex_enter(&ldcp->tx_lock);
1497 		i_ldc_reset(ldcp, B_TRUE);
1498 		mutex_exit(&ldcp->tx_lock);
1499 		rv = ECONNRESET;
1500 		break;
1501 	}
1502 
1503 	/*
1504 	 * If either the connection was reset (when rv != 0) or
1505 	 * a NACK was sent, we return. In the case of a NACK
1506 	 * we dont want to consume the packet that came in but
1507 	 * not record that we received the RTS
1508 	 */
1509 	if (rv || sent_NACK)
1510 		return (rv);
1511 
1512 	/* record RTS received */
1513 	ldcp->hstate |= TS_RCVD_RTS;
1514 
1515 	/* store initial SEQID info */
1516 	ldcp->last_msg_snt = msg->seqid;
1517 
1518 	/* Obtain Tx lock */
1519 	mutex_enter(&ldcp->tx_lock);
1520 
1521 	/* get the current tail for the response */
1522 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1523 	if (rv != 0) {
1524 		cmn_err(CE_NOTE,
1525 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1526 		    ldcp->id);
1527 		i_ldc_reset(ldcp, B_TRUE);
1528 		mutex_exit(&ldcp->tx_lock);
1529 		return (ECONNRESET);
1530 	}
1531 
1532 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1533 	ZERO_PKT(pkt);
1534 
1535 	/* initialize the packet */
1536 	pkt->type = LDC_CTRL;
1537 	pkt->stype = LDC_INFO;
1538 	pkt->ctrl = LDC_RTR;
1539 	pkt->env = ldcp->mode;
1540 	if (ldcp->mode != LDC_MODE_RAW)
1541 		pkt->seqid = LDC_INIT_SEQID;
1542 
1543 	ldcp->last_msg_rcd = msg->seqid;
1544 
1545 	/* initiate the send by calling into HV and set the new tail */
1546 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1547 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1548 
1549 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1550 	if (rv == 0) {
1551 		D2(ldcp->id,
1552 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1553 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1554 
1555 		ldcp->tx_tail = tx_tail;
1556 		ldcp->hstate |= TS_SENT_RTR;
1557 
1558 	} else {
1559 		cmn_err(CE_NOTE,
1560 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1561 		    ldcp->id);
1562 		i_ldc_reset(ldcp, B_TRUE);
1563 		mutex_exit(&ldcp->tx_lock);
1564 		return (ECONNRESET);
1565 	}
1566 
1567 	mutex_exit(&ldcp->tx_lock);
1568 	return (0);
1569 }
1570 
1571 /*
1572  * Process an incoming RTR ctrl message
1573  */
1574 static int
1575 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1576 {
1577 	int 		rv = 0;
1578 	boolean_t	sent_NACK = B_FALSE;
1579 
1580 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1581 
1582 	switch (msg->stype) {
1583 	case LDC_NACK:
1584 		/* RTR NACK received */
1585 		DWARN(ldcp->id,
1586 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1587 		    ldcp->id);
1588 
1589 		/* Reset the channel -- as we cannot continue */
1590 		mutex_enter(&ldcp->tx_lock);
1591 		i_ldc_reset(ldcp, B_TRUE);
1592 		mutex_exit(&ldcp->tx_lock);
1593 		rv = ECONNRESET;
1594 
1595 		break;
1596 
1597 	case LDC_INFO:
1598 
1599 		/* check mode */
1600 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1601 			DWARN(ldcp->id,
1602 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1603 			    "expecting 0x%x, got 0x%x\n",
1604 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1605 			/*
1606 			 * send NACK in response to MODE message
1607 			 * get the current tail for the response
1608 			 */
1609 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1610 			if (rv) {
1611 				/* if cannot send NACK - reset channel */
1612 				mutex_enter(&ldcp->tx_lock);
1613 				i_ldc_reset(ldcp, B_TRUE);
1614 				mutex_exit(&ldcp->tx_lock);
1615 				rv = ECONNRESET;
1616 				break;
1617 			}
1618 			sent_NACK = B_TRUE;
1619 		}
1620 		break;
1621 
1622 	default:
1623 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1624 		    ldcp->id);
1625 
1626 		/* Reset the channel -- as we cannot continue */
1627 		mutex_enter(&ldcp->tx_lock);
1628 		i_ldc_reset(ldcp, B_TRUE);
1629 		mutex_exit(&ldcp->tx_lock);
1630 		rv = ECONNRESET;
1631 		break;
1632 	}
1633 
1634 	/*
1635 	 * If either the connection was reset (when rv != 0) or
1636 	 * a NACK was sent, we return. In the case of a NACK
1637 	 * we dont want to consume the packet that came in but
1638 	 * not record that we received the RTR
1639 	 */
1640 	if (rv || sent_NACK)
1641 		return (rv);
1642 
1643 	ldcp->last_msg_snt = msg->seqid;
1644 	ldcp->hstate |= TS_RCVD_RTR;
1645 
1646 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1647 	if (rv) {
1648 		cmn_err(CE_NOTE,
1649 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1650 		    ldcp->id);
1651 		mutex_enter(&ldcp->tx_lock);
1652 		i_ldc_reset(ldcp, B_TRUE);
1653 		mutex_exit(&ldcp->tx_lock);
1654 		return (ECONNRESET);
1655 	}
1656 	D2(ldcp->id,
1657 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1658 
1659 	ldcp->hstate |= TS_SENT_RDX;
1660 	ldcp->tstate |= TS_HSHAKE_DONE;
1661 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1662 		ldcp->status = LDC_UP;
1663 
1664 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1665 
1666 	return (0);
1667 }
1668 
1669 
1670 /*
1671  * Process an incoming RDX ctrl message
1672  */
1673 static int
1674 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1675 {
1676 	int	rv = 0;
1677 
1678 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1679 
1680 	switch (msg->stype) {
1681 	case LDC_NACK:
1682 		/* RDX NACK received */
1683 		DWARN(ldcp->id,
1684 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1685 		    ldcp->id);
1686 
1687 		/* Reset the channel -- as we cannot continue */
1688 		mutex_enter(&ldcp->tx_lock);
1689 		i_ldc_reset(ldcp, B_TRUE);
1690 		mutex_exit(&ldcp->tx_lock);
1691 		rv = ECONNRESET;
1692 
1693 		break;
1694 
1695 	case LDC_INFO:
1696 
1697 		/*
1698 		 * if channel is UP and a RDX received after data transmission
1699 		 * has commenced it is an error
1700 		 */
1701 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1702 			DWARN(DBG_ALL_LDCS,
1703 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1704 			    " - LDC reset\n", ldcp->id);
1705 			mutex_enter(&ldcp->tx_lock);
1706 			i_ldc_reset(ldcp, B_TRUE);
1707 			mutex_exit(&ldcp->tx_lock);
1708 			return (ECONNRESET);
1709 		}
1710 
1711 		ldcp->hstate |= TS_RCVD_RDX;
1712 		ldcp->tstate |= TS_HSHAKE_DONE;
1713 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1714 			ldcp->status = LDC_UP;
1715 
1716 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1717 		break;
1718 
1719 	default:
1720 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1721 		    ldcp->id);
1722 
1723 		/* Reset the channel -- as we cannot continue */
1724 		mutex_enter(&ldcp->tx_lock);
1725 		i_ldc_reset(ldcp, B_TRUE);
1726 		mutex_exit(&ldcp->tx_lock);
1727 		rv = ECONNRESET;
1728 		break;
1729 	}
1730 
1731 	return (rv);
1732 }
1733 
1734 /*
1735  * Process an incoming ACK for a data packet
1736  */
1737 static int
1738 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1739 {
1740 	int		rv;
1741 	uint64_t 	tx_head;
1742 	ldc_msg_t	*pkt;
1743 
1744 	/* Obtain Tx lock */
1745 	mutex_enter(&ldcp->tx_lock);
1746 
1747 	/*
1748 	 * Read the current Tx head and tail
1749 	 */
1750 	rv = hv_ldc_tx_get_state(ldcp->id,
1751 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1752 	if (rv != 0) {
1753 		cmn_err(CE_WARN,
1754 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1755 		    ldcp->id);
1756 
1757 		/* Reset the channel -- as we cannot continue */
1758 		i_ldc_reset(ldcp, B_TRUE);
1759 		mutex_exit(&ldcp->tx_lock);
1760 		return (ECONNRESET);
1761 	}
1762 
1763 	/*
1764 	 * loop from where the previous ACK location was to the
1765 	 * current head location. This is how far the HV has
1766 	 * actually send pkts. Pkts between head and tail are
1767 	 * yet to be sent by HV.
1768 	 */
1769 	tx_head = ldcp->tx_ackd_head;
1770 	for (;;) {
1771 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1772 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1773 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1774 
1775 		if (pkt->seqid == msg->ackid) {
1776 			D2(ldcp->id,
1777 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1778 			    ldcp->id);
1779 			ldcp->last_ack_rcd = msg->ackid;
1780 			ldcp->tx_ackd_head = tx_head;
1781 			break;
1782 		}
1783 		if (tx_head == ldcp->tx_head) {
1784 			/* could not find packet */
1785 			DWARN(ldcp->id,
1786 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1787 			    ldcp->id);
1788 
1789 			/* Reset the channel -- as we cannot continue */
1790 			i_ldc_reset(ldcp, B_TRUE);
1791 			mutex_exit(&ldcp->tx_lock);
1792 			return (ECONNRESET);
1793 		}
1794 	}
1795 
1796 	mutex_exit(&ldcp->tx_lock);
1797 	return (0);
1798 }
1799 
1800 /*
1801  * Process incoming control message
1802  * Return 0 - session can continue
1803  *        EAGAIN - reprocess packet - state was changed
1804  *	  ECONNRESET - channel was reset
1805  */
1806 static int
1807 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1808 {
1809 	int 		rv = 0;
1810 
1811 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1812 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1813 
1814 	switch (ldcp->tstate & ~TS_IN_RESET) {
1815 
1816 	case TS_OPEN:
1817 	case TS_READY:
1818 
1819 		switch (msg->ctrl & LDC_CTRL_MASK) {
1820 		case LDC_VER:
1821 			/* process version message */
1822 			rv = i_ldc_process_VER(ldcp, msg);
1823 			break;
1824 		default:
1825 			DWARN(ldcp->id,
1826 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1827 			    "tstate=0x%x\n", ldcp->id,
1828 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1829 			break;
1830 		}
1831 
1832 		break;
1833 
1834 	case TS_VREADY:
1835 
1836 		switch (msg->ctrl & LDC_CTRL_MASK) {
1837 		case LDC_VER:
1838 			/* process version message */
1839 			rv = i_ldc_process_VER(ldcp, msg);
1840 			break;
1841 		case LDC_RTS:
1842 			/* process RTS message */
1843 			rv = i_ldc_process_RTS(ldcp, msg);
1844 			break;
1845 		case LDC_RTR:
1846 			/* process RTR message */
1847 			rv = i_ldc_process_RTR(ldcp, msg);
1848 			break;
1849 		case LDC_RDX:
1850 			/* process RDX message */
1851 			rv = i_ldc_process_RDX(ldcp, msg);
1852 			break;
1853 		default:
1854 			DWARN(ldcp->id,
1855 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1856 			    "tstate=0x%x\n", ldcp->id,
1857 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1858 			break;
1859 		}
1860 
1861 		break;
1862 
1863 	case TS_UP:
1864 
1865 		switch (msg->ctrl & LDC_CTRL_MASK) {
1866 		case LDC_VER:
1867 			DWARN(ldcp->id,
1868 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1869 			    "- LDC reset\n", ldcp->id);
1870 			/* peer is redoing version negotiation */
1871 			mutex_enter(&ldcp->tx_lock);
1872 			(void) i_ldc_txq_reconf(ldcp);
1873 			i_ldc_reset_state(ldcp);
1874 			mutex_exit(&ldcp->tx_lock);
1875 			rv = EAGAIN;
1876 			break;
1877 
1878 		case LDC_RDX:
1879 			/* process RDX message */
1880 			rv = i_ldc_process_RDX(ldcp, msg);
1881 			break;
1882 
1883 		default:
1884 			DWARN(ldcp->id,
1885 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1886 			    "tstate=0x%x\n", ldcp->id,
1887 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1888 			break;
1889 		}
1890 	}
1891 
1892 	return (rv);
1893 }
1894 
1895 /*
1896  * Register channel with the channel nexus
1897  */
1898 static int
1899 i_ldc_register_channel(ldc_chan_t *ldcp)
1900 {
1901 	int		rv = 0;
1902 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1903 
1904 	if (cinfo->dip == NULL) {
1905 		DWARN(ldcp->id,
1906 		    "i_ldc_register_channel: cnex has not registered\n");
1907 		return (EAGAIN);
1908 	}
1909 
1910 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1911 	if (rv) {
1912 		DWARN(ldcp->id,
1913 		    "i_ldc_register_channel: cannot register channel\n");
1914 		return (rv);
1915 	}
1916 
1917 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1918 	    i_ldc_tx_hdlr, ldcp, NULL);
1919 	if (rv) {
1920 		DWARN(ldcp->id,
1921 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1922 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1923 		return (rv);
1924 	}
1925 
1926 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1927 	    i_ldc_rx_hdlr, ldcp, NULL);
1928 	if (rv) {
1929 		DWARN(ldcp->id,
1930 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1931 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1932 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1933 		return (rv);
1934 	}
1935 
1936 	ldcp->tstate |= TS_CNEX_RDY;
1937 
1938 	return (0);
1939 }
1940 
1941 /*
1942  * Unregister a channel with the channel nexus
1943  */
1944 static int
1945 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1946 {
1947 	int		rv = 0;
1948 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1949 
1950 	if (cinfo->dip == NULL) {
1951 		DWARN(ldcp->id,
1952 		    "i_ldc_unregister_channel: cnex has not registered\n");
1953 		return (EAGAIN);
1954 	}
1955 
1956 	if (ldcp->tstate & TS_CNEX_RDY) {
1957 
1958 		/* Remove the Rx interrupt */
1959 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1960 		if (rv) {
1961 			if (rv != EAGAIN) {
1962 				DWARN(ldcp->id,
1963 				    "i_ldc_unregister_channel: err removing "
1964 				    "Rx intr\n");
1965 				return (rv);
1966 			}
1967 
1968 			/*
1969 			 * If interrupts are pending and handler has
1970 			 * finished running, clear interrupt and try
1971 			 * again
1972 			 */
1973 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1974 				return (rv);
1975 
1976 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1977 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1978 			    CNEX_RX_INTR);
1979 			if (rv) {
1980 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1981 				    "err removing Rx interrupt\n");
1982 				return (rv);
1983 			}
1984 		}
1985 
1986 		/* Remove the Tx interrupt */
1987 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1988 		if (rv) {
1989 			DWARN(ldcp->id,
1990 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1991 			return (rv);
1992 		}
1993 
1994 		/* Unregister the channel */
1995 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1996 		if (rv) {
1997 			DWARN(ldcp->id,
1998 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1999 			return (rv);
2000 		}
2001 
2002 		ldcp->tstate &= ~TS_CNEX_RDY;
2003 	}
2004 
2005 	return (0);
2006 }
2007 
2008 
2009 /*
2010  * LDC transmit interrupt handler
2011  *    triggered for chanel up/down/reset events
2012  *    and Tx queue content changes
2013  */
2014 static uint_t
2015 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2016 {
2017 	_NOTE(ARGUNUSED(arg2))
2018 
2019 	int 		rv;
2020 	ldc_chan_t 	*ldcp;
2021 	boolean_t 	notify_client = B_FALSE;
2022 	uint64_t	notify_event = 0, link_state;
2023 
2024 	/* Get the channel for which interrupt was received */
2025 	ASSERT(arg1 != NULL);
2026 	ldcp = (ldc_chan_t *)arg1;
2027 
2028 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2029 	    ldcp->id, ldcp);
2030 
2031 	/* Lock channel */
2032 	mutex_enter(&ldcp->lock);
2033 
2034 	/* Obtain Tx lock */
2035 	mutex_enter(&ldcp->tx_lock);
2036 
2037 	/* mark interrupt as pending */
2038 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2039 
2040 	/* save current link state */
2041 	link_state = ldcp->link_state;
2042 
2043 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2044 	    &ldcp->link_state);
2045 	if (rv) {
2046 		cmn_err(CE_WARN,
2047 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2048 		    ldcp->id, rv);
2049 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2050 		mutex_exit(&ldcp->tx_lock);
2051 		mutex_exit(&ldcp->lock);
2052 		return (DDI_INTR_CLAIMED);
2053 	}
2054 
2055 	/*
2056 	 * reset the channel state if the channel went down
2057 	 * (other side unconfigured queue) or channel was reset
2058 	 * (other side reconfigured its queue)
2059 	 */
2060 	if (link_state != ldcp->link_state &&
2061 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2062 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2063 		i_ldc_reset(ldcp, B_FALSE);
2064 		notify_client = B_TRUE;
2065 		notify_event = LDC_EVT_DOWN;
2066 	}
2067 
2068 	if (link_state != ldcp->link_state &&
2069 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2070 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2071 		i_ldc_reset(ldcp, B_FALSE);
2072 		notify_client = B_TRUE;
2073 		notify_event = LDC_EVT_RESET;
2074 	}
2075 
2076 	if (link_state != ldcp->link_state &&
2077 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2078 	    ldcp->link_state == LDC_CHANNEL_UP) {
2079 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2080 		notify_client = B_TRUE;
2081 		notify_event = LDC_EVT_RESET;
2082 		ldcp->tstate |= TS_LINK_READY;
2083 		ldcp->status = LDC_READY;
2084 	}
2085 
2086 	/* if callbacks are disabled, do not notify */
2087 	if (!ldcp->cb_enabled)
2088 		notify_client = B_FALSE;
2089 
2090 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2091 	mutex_exit(&ldcp->tx_lock);
2092 
2093 	if (notify_client) {
2094 		ldcp->cb_inprogress = B_TRUE;
2095 		mutex_exit(&ldcp->lock);
2096 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2097 		if (rv) {
2098 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2099 			    "failure", ldcp->id);
2100 		}
2101 		mutex_enter(&ldcp->lock);
2102 		ldcp->cb_inprogress = B_FALSE;
2103 	}
2104 
2105 	mutex_exit(&ldcp->lock);
2106 
2107 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2108 
2109 	return (DDI_INTR_CLAIMED);
2110 }
2111 
2112 /*
2113  * Process the Rx HV queue.
2114  *
2115  * Returns 0 if data packets were found and no errors were encountered,
2116  * otherwise returns an error. In either case, the *notify argument is
2117  * set to indicate whether or not the client callback function should
2118  * be invoked. The *event argument is set to contain the callback event.
2119  *
2120  * Depending on the channel mode, packets are handled differently:
2121  *
2122  * RAW MODE
2123  * For raw mode channels, when a data packet is encountered,
2124  * processing stops and all packets are left on the queue to be removed
2125  * and processed by the ldc_read code path.
2126  *
2127  * UNRELIABLE MODE
2128  * For unreliable mode, when a data packet is encountered, processing
2129  * stops, and all packets are left on the queue to be removed and
2130  * processed by the ldc_read code path. Control packets are processed
2131  * inline if they are encountered before any data packets.
2132  *
2133  * RELIABLE MODE
2134  * For reliable mode channels, all packets on the receive queue
2135  * are processed: data packets are copied to the data queue and
2136  * control packets are processed inline. Packets are only left on
2137  * the receive queue when the data queue is full.
2138  */
2139 static uint_t
2140 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2141     uint64_t *notify_event)
2142 {
2143 	int		rv;
2144 	uint64_t 	rx_head, rx_tail;
2145 	ldc_msg_t 	*msg;
2146 	uint64_t	link_state, first_fragment = 0;
2147 	boolean_t	trace_length = B_TRUE;
2148 
2149 	ASSERT(MUTEX_HELD(&ldcp->lock));
2150 	*notify_client = B_FALSE;
2151 	*notify_event = 0;
2152 
2153 	/*
2154 	 * Read packet(s) from the queue
2155 	 */
2156 	for (;;) {
2157 
2158 		link_state = ldcp->link_state;
2159 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2160 		    &ldcp->link_state);
2161 		if (rv) {
2162 			cmn_err(CE_WARN,
2163 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2164 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2165 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2166 			return (EIO);
2167 		}
2168 
2169 		/*
2170 		 * reset the channel state if the channel went down
2171 		 * (other side unconfigured queue) or channel was reset
2172 		 * (other side reconfigured its queue)
2173 		 */
2174 
2175 		if (link_state != ldcp->link_state) {
2176 
2177 			switch (ldcp->link_state) {
2178 			case LDC_CHANNEL_DOWN:
2179 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2180 				    "link down\n", ldcp->id);
2181 				mutex_enter(&ldcp->tx_lock);
2182 				i_ldc_reset(ldcp, B_FALSE);
2183 				mutex_exit(&ldcp->tx_lock);
2184 				*notify_client = B_TRUE;
2185 				*notify_event = LDC_EVT_DOWN;
2186 				goto loop_exit;
2187 
2188 			case LDC_CHANNEL_UP:
2189 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2190 				    "channel link up\n", ldcp->id);
2191 
2192 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2193 					*notify_client = B_TRUE;
2194 					*notify_event = LDC_EVT_RESET;
2195 					ldcp->tstate |= TS_LINK_READY;
2196 					ldcp->status = LDC_READY;
2197 				}
2198 				break;
2199 
2200 			case LDC_CHANNEL_RESET:
2201 			default:
2202 #ifdef DEBUG
2203 force_reset:
2204 #endif
2205 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2206 				    "link reset\n", ldcp->id);
2207 				mutex_enter(&ldcp->tx_lock);
2208 				i_ldc_reset(ldcp, B_FALSE);
2209 				mutex_exit(&ldcp->tx_lock);
2210 				*notify_client = B_TRUE;
2211 				*notify_event = LDC_EVT_RESET;
2212 				break;
2213 			}
2214 		}
2215 
2216 #ifdef DEBUG
2217 		if (LDC_INJECT_RESET(ldcp))
2218 			goto force_reset;
2219 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2220 			i_ldc_mem_inject_dring_clear(ldcp);
2221 #endif
2222 		if (trace_length) {
2223 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2224 			trace_length = B_FALSE;
2225 		}
2226 
2227 		if (rx_head == rx_tail) {
2228 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2229 			    "No packets\n", ldcp->id);
2230 			break;
2231 		}
2232 
2233 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2234 		    "tail=0x%llx\n", rx_head, rx_tail);
2235 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2236 		    ldcp->rx_q_va + rx_head);
2237 
2238 		/* get the message */
2239 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2240 
2241 		/* if channel is in RAW mode or data pkt, notify and return */
2242 		if (ldcp->mode == LDC_MODE_RAW) {
2243 			*notify_client = B_TRUE;
2244 			*notify_event |= LDC_EVT_READ;
2245 			break;
2246 		}
2247 
2248 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2249 
2250 			/* discard packet if channel is not up */
2251 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2252 
2253 				/* move the head one position */
2254 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2255 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2256 
2257 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2258 					break;
2259 
2260 				continue;
2261 			} else {
2262 				uint64_t dq_head, dq_tail;
2263 
2264 				/* process only RELIABLE mode data packets */
2265 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2266 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2267 						*notify_client = B_TRUE;
2268 					*notify_event |= LDC_EVT_READ;
2269 					break;
2270 				}
2271 
2272 				/* don't process packet if queue full */
2273 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2274 				    &dq_tail, NULL);
2275 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2276 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2277 				if (dq_tail == dq_head ||
2278 				    LDC_INJECT_DQFULL(ldcp)) {
2279 					rv = ENOSPC;
2280 					break;
2281 				}
2282 			}
2283 		}
2284 
2285 		/* Check the sequence ID for the message received */
2286 		rv = i_ldc_check_seqid(ldcp, msg);
2287 		if (rv != 0) {
2288 
2289 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2290 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2291 			    rx_head, rx_tail);
2292 
2293 			/* Reset last_msg_rcd to start of message */
2294 			if (first_fragment != 0) {
2295 				ldcp->last_msg_rcd = first_fragment - 1;
2296 				first_fragment = 0;
2297 			}
2298 
2299 			/*
2300 			 * Send a NACK due to seqid mismatch
2301 			 */
2302 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2303 			    (msg->ctrl & LDC_CTRL_MASK));
2304 
2305 			if (rv) {
2306 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2307 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2308 				    ldcp->id);
2309 
2310 				/* if cannot send NACK - reset channel */
2311 				mutex_enter(&ldcp->tx_lock);
2312 				i_ldc_reset(ldcp, B_TRUE);
2313 				mutex_exit(&ldcp->tx_lock);
2314 
2315 				*notify_client = B_TRUE;
2316 				*notify_event = LDC_EVT_RESET;
2317 				break;
2318 			}
2319 
2320 			/* purge receive queue */
2321 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2322 			break;
2323 		}
2324 
2325 		/* record the message ID */
2326 		ldcp->last_msg_rcd = msg->seqid;
2327 
2328 		/* process control messages */
2329 		if (msg->type & LDC_CTRL) {
2330 			/* save current internal state */
2331 			uint64_t tstate = ldcp->tstate;
2332 
2333 			rv = i_ldc_ctrlmsg(ldcp, msg);
2334 			if (rv == EAGAIN) {
2335 				/* re-process pkt - state was adjusted */
2336 				continue;
2337 			}
2338 			if (rv == ECONNRESET) {
2339 				*notify_client = B_TRUE;
2340 				*notify_event = LDC_EVT_RESET;
2341 				break;
2342 			}
2343 
2344 			/*
2345 			 * control message processing was successful
2346 			 * channel transitioned to ready for communication
2347 			 */
2348 			if (rv == 0 && ldcp->tstate == TS_UP &&
2349 			    (tstate & ~TS_IN_RESET) !=
2350 			    (ldcp->tstate & ~TS_IN_RESET)) {
2351 				*notify_client = B_TRUE;
2352 				*notify_event = LDC_EVT_UP;
2353 			}
2354 		}
2355 
2356 		/* process data NACKs */
2357 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2358 			DWARN(ldcp->id,
2359 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2360 			    ldcp->id);
2361 			mutex_enter(&ldcp->tx_lock);
2362 			i_ldc_reset(ldcp, B_TRUE);
2363 			mutex_exit(&ldcp->tx_lock);
2364 			*notify_client = B_TRUE;
2365 			*notify_event = LDC_EVT_RESET;
2366 			break;
2367 		}
2368 
2369 		/* process data ACKs */
2370 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2371 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2372 				*notify_client = B_TRUE;
2373 				*notify_event = LDC_EVT_RESET;
2374 				break;
2375 			}
2376 		}
2377 
2378 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2379 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2380 
2381 			/*
2382 			 * Copy the data packet to the data queue. Note
2383 			 * that the copy routine updates the rx_head pointer.
2384 			 */
2385 			i_ldc_rxdq_copy(ldcp, &rx_head);
2386 
2387 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2388 				*notify_client = B_TRUE;
2389 			*notify_event |= LDC_EVT_READ;
2390 		} else {
2391 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2392 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2393 		}
2394 
2395 		/* move the head one position */
2396 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2397 			*notify_client = B_TRUE;
2398 			*notify_event = LDC_EVT_RESET;
2399 			break;
2400 		}
2401 
2402 	} /* for */
2403 
2404 loop_exit:
2405 
2406 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2407 		/* ACK data packets */
2408 		if ((*notify_event &
2409 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2410 			int ack_rv;
2411 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2412 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2413 				cmn_err(CE_NOTE,
2414 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2415 				    "send ACK\n", ldcp->id);
2416 
2417 				mutex_enter(&ldcp->tx_lock);
2418 				i_ldc_reset(ldcp, B_FALSE);
2419 				mutex_exit(&ldcp->tx_lock);
2420 
2421 				*notify_client = B_TRUE;
2422 				*notify_event = LDC_EVT_RESET;
2423 				goto skip_ackpeek;
2424 			}
2425 		}
2426 
2427 		/*
2428 		 * If we have no more space on the data queue, make sure
2429 		 * there are no ACKs on the rx queue waiting to be processed.
2430 		 */
2431 		if (rv == ENOSPC) {
2432 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2433 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2434 				*notify_client = B_TRUE;
2435 				*notify_event = LDC_EVT_RESET;
2436 			}
2437 		} else {
2438 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2439 		}
2440 	}
2441 
2442 skip_ackpeek:
2443 
2444 	/* Return, indicating whether or not data packets were found */
2445 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2446 		return (0);
2447 
2448 	return (ENOMSG);
2449 }
2450 
2451 /*
2452  * Process any ACK packets on the HV receive queue.
2453  *
2454  * This function is only used by RELIABLE mode channels when the
2455  * secondary data queue fills up and there are packets remaining on
2456  * the HV receive queue.
2457  */
2458 int
2459 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2460 {
2461 	int		rv = 0;
2462 	ldc_msg_t	*msg;
2463 
2464 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2465 		ldcp->rx_ack_head = rx_head;
2466 
2467 	while (ldcp->rx_ack_head != rx_tail) {
2468 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2469 
2470 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2471 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2472 				break;
2473 			msg->stype &= ~LDC_ACK;
2474 		}
2475 
2476 		ldcp->rx_ack_head =
2477 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2478 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2479 	}
2480 	return (rv);
2481 }
2482 
2483 /* -------------------------------------------------------------------------- */
2484 
2485 /*
2486  * LDC API functions
2487  */
2488 
2489 /*
2490  * Initialize the channel. Allocate internal structure and memory for
2491  * TX/RX queues, and initialize locks.
2492  */
2493 int
2494 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2495 {
2496 	ldc_chan_t 	*ldcp;
2497 	int		rv, exit_val;
2498 	uint64_t	ra_base, nentries;
2499 	uint64_t	qlen;
2500 
2501 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2502 
2503 	if (attr == NULL) {
2504 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2505 		return (EINVAL);
2506 	}
2507 	if (handle == NULL) {
2508 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2509 		return (EINVAL);
2510 	}
2511 
2512 	/* check if channel is valid */
2513 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2514 	if (rv == H_ECHANNEL) {
2515 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2516 		return (EINVAL);
2517 	}
2518 
2519 	/* check if the channel has already been initialized */
2520 	mutex_enter(&ldcssp->lock);
2521 	ldcp = ldcssp->chan_list;
2522 	while (ldcp != NULL) {
2523 		if (ldcp->id == id) {
2524 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2525 			    id);
2526 			mutex_exit(&ldcssp->lock);
2527 			return (EADDRINUSE);
2528 		}
2529 		ldcp = ldcp->next;
2530 	}
2531 	mutex_exit(&ldcssp->lock);
2532 
2533 	ASSERT(ldcp == NULL);
2534 
2535 	*handle = 0;
2536 
2537 	/* Allocate an ldcp structure */
2538 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2539 
2540 	/*
2541 	 * Initialize the channel and Tx lock
2542 	 *
2543 	 * The channel 'lock' protects the entire channel and
2544 	 * should be acquired before initializing, resetting,
2545 	 * destroying or reading from a channel.
2546 	 *
2547 	 * The 'tx_lock' should be acquired prior to transmitting
2548 	 * data over the channel. The lock should also be acquired
2549 	 * prior to channel reconfiguration (in order to prevent
2550 	 * concurrent writes).
2551 	 *
2552 	 * ORDERING: When both locks are being acquired, to prevent
2553 	 * deadlocks, the channel lock should be always acquired prior
2554 	 * to the tx_lock.
2555 	 */
2556 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2557 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2558 
2559 	/* Initialize the channel */
2560 	ldcp->id = id;
2561 	ldcp->cb = NULL;
2562 	ldcp->cb_arg = NULL;
2563 	ldcp->cb_inprogress = B_FALSE;
2564 	ldcp->cb_enabled = B_FALSE;
2565 	ldcp->next = NULL;
2566 
2567 	/* Read attributes */
2568 	ldcp->mode = attr->mode;
2569 	ldcp->devclass = attr->devclass;
2570 	ldcp->devinst = attr->instance;
2571 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2572 
2573 	D1(ldcp->id,
2574 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2575 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2576 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2577 
2578 	ldcp->next_vidx = 0;
2579 	ldcp->tstate = TS_IN_RESET;
2580 	ldcp->hstate = 0;
2581 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2582 	ldcp->last_ack_rcd = 0;
2583 	ldcp->last_msg_rcd = 0;
2584 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2585 
2586 	ldcp->stream_bufferp = NULL;
2587 	ldcp->exp_dring_list = NULL;
2588 	ldcp->imp_dring_list = NULL;
2589 	ldcp->mhdl_list = NULL;
2590 
2591 	ldcp->tx_intr_state = LDC_INTR_NONE;
2592 	ldcp->rx_intr_state = LDC_INTR_NONE;
2593 
2594 	/* Initialize payload size depending on whether channel is reliable */
2595 	switch (ldcp->mode) {
2596 	case LDC_MODE_RAW:
2597 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2598 		ldcp->read_p = i_ldc_read_raw;
2599 		ldcp->write_p = i_ldc_write_raw;
2600 		break;
2601 	case LDC_MODE_UNRELIABLE:
2602 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2603 		ldcp->read_p = i_ldc_read_packet;
2604 		ldcp->write_p = i_ldc_write_packet;
2605 		break;
2606 	case LDC_MODE_RELIABLE:
2607 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2608 
2609 		ldcp->stream_remains = 0;
2610 		ldcp->stream_offset = 0;
2611 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2612 		ldcp->read_p = i_ldc_read_stream;
2613 		ldcp->write_p = i_ldc_write_stream;
2614 		break;
2615 	default:
2616 		exit_val = EINVAL;
2617 		goto cleanup_on_exit;
2618 	}
2619 
2620 	/*
2621 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2622 	 * value is smaller than default length of ldc_queue_entries,
2623 	 * qlen is set to ldc_queue_entries. Ensure that computed
2624 	 * length is a power-of-two value.
2625 	 */
2626 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2627 	if (!ISP2(qlen)) {
2628 		uint64_t	tmp = 1;
2629 		while (qlen) {
2630 			qlen >>= 1; tmp <<= 1;
2631 		}
2632 		qlen = tmp;
2633 	}
2634 
2635 	ldcp->rx_q_entries =
2636 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2637 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2638 
2639 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2640 
2641 	/* Create a transmit queue */
2642 	ldcp->tx_q_va = (uint64_t)
2643 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2644 	if (ldcp->tx_q_va == NULL) {
2645 		cmn_err(CE_WARN,
2646 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2647 		    ldcp->id);
2648 		exit_val = ENOMEM;
2649 		goto cleanup_on_exit;
2650 	}
2651 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2652 
2653 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2654 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2655 
2656 	ldcp->tstate |= TS_TXQ_RDY;
2657 
2658 	/* Create a receive queue */
2659 	ldcp->rx_q_va = (uint64_t)
2660 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2661 	if (ldcp->rx_q_va == NULL) {
2662 		cmn_err(CE_WARN,
2663 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2664 		    ldcp->id);
2665 		exit_val = ENOMEM;
2666 		goto cleanup_on_exit;
2667 	}
2668 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2669 
2670 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2671 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2672 
2673 	ldcp->tstate |= TS_RXQ_RDY;
2674 
2675 	/* Setup a separate read data queue */
2676 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2677 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2678 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2679 
2680 		/* Make sure the data queue multiplier is a power of 2 */
2681 		if (!ISP2(ldc_rxdq_multiplier)) {
2682 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2683 			    "not a power of 2, resetting", ldcp->id);
2684 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2685 		}
2686 
2687 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2688 		ldcp->rx_dq_va = (uint64_t)
2689 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2690 		    KM_SLEEP);
2691 		if (ldcp->rx_dq_va == NULL) {
2692 			cmn_err(CE_WARN,
2693 			    "ldc_init: (0x%lx) RX data queue "
2694 			    "allocation failed\n", ldcp->id);
2695 			exit_val = ENOMEM;
2696 			goto cleanup_on_exit;
2697 		}
2698 
2699 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2700 
2701 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2702 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2703 		    ldcp->rx_dq_entries);
2704 	} else {
2705 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2706 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2707 	}
2708 
2709 	/* Init descriptor ring and memory handle list lock */
2710 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2711 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2712 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2713 
2714 	/* mark status as INITialized */
2715 	ldcp->status = LDC_INIT;
2716 
2717 	/* Add to channel list */
2718 	mutex_enter(&ldcssp->lock);
2719 	ldcp->next = ldcssp->chan_list;
2720 	ldcssp->chan_list = ldcp;
2721 	ldcssp->channel_count++;
2722 	mutex_exit(&ldcssp->lock);
2723 
2724 	/* set the handle */
2725 	*handle = (ldc_handle_t)ldcp;
2726 
2727 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2728 
2729 	return (0);
2730 
2731 cleanup_on_exit:
2732 
2733 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2734 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2735 
2736 	if (ldcp->tstate & TS_TXQ_RDY)
2737 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2738 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2739 
2740 	if (ldcp->tstate & TS_RXQ_RDY)
2741 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2742 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2743 
2744 	mutex_destroy(&ldcp->tx_lock);
2745 	mutex_destroy(&ldcp->lock);
2746 
2747 	if (ldcp)
2748 		kmem_free(ldcp, sizeof (ldc_chan_t));
2749 
2750 	return (exit_val);
2751 }
2752 
2753 /*
2754  * Finalizes the LDC connection. It will return EBUSY if the
2755  * channel is open. A ldc_close() has to be done prior to
2756  * a ldc_fini operation. It frees TX/RX queues, associated
2757  * with the channel
2758  */
2759 int
2760 ldc_fini(ldc_handle_t handle)
2761 {
2762 	ldc_chan_t 	*ldcp;
2763 	ldc_chan_t 	*tmp_ldcp;
2764 	uint64_t 	id;
2765 
2766 	if (handle == NULL) {
2767 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2768 		return (EINVAL);
2769 	}
2770 	ldcp = (ldc_chan_t *)handle;
2771 	id = ldcp->id;
2772 
2773 	mutex_enter(&ldcp->lock);
2774 
2775 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2776 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2777 		    ldcp->id);
2778 		mutex_exit(&ldcp->lock);
2779 		return (EBUSY);
2780 	}
2781 
2782 	/* Remove from the channel list */
2783 	mutex_enter(&ldcssp->lock);
2784 	tmp_ldcp = ldcssp->chan_list;
2785 	if (tmp_ldcp == ldcp) {
2786 		ldcssp->chan_list = ldcp->next;
2787 		ldcp->next = NULL;
2788 	} else {
2789 		while (tmp_ldcp != NULL) {
2790 			if (tmp_ldcp->next == ldcp) {
2791 				tmp_ldcp->next = ldcp->next;
2792 				ldcp->next = NULL;
2793 				break;
2794 			}
2795 			tmp_ldcp = tmp_ldcp->next;
2796 		}
2797 		if (tmp_ldcp == NULL) {
2798 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2799 			mutex_exit(&ldcssp->lock);
2800 			mutex_exit(&ldcp->lock);
2801 			return (EINVAL);
2802 		}
2803 	}
2804 
2805 	ldcssp->channel_count--;
2806 
2807 	mutex_exit(&ldcssp->lock);
2808 
2809 	/* Free the map table for this channel */
2810 	if (ldcp->mtbl) {
2811 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2812 		if (ldcp->mtbl->contigmem)
2813 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2814 		else
2815 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2816 		mutex_destroy(&ldcp->mtbl->lock);
2817 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2818 	}
2819 
2820 	/* Destroy descriptor ring and memory handle list lock */
2821 	mutex_destroy(&ldcp->exp_dlist_lock);
2822 	mutex_destroy(&ldcp->imp_dlist_lock);
2823 	mutex_destroy(&ldcp->mlist_lock);
2824 
2825 	/* Free the stream buffer for RELIABLE_MODE */
2826 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2827 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2828 
2829 	/* Free the RX queue */
2830 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2831 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2832 	ldcp->tstate &= ~TS_RXQ_RDY;
2833 
2834 	/* Free the RX data queue */
2835 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2836 		kmem_free((caddr_t)ldcp->rx_dq_va,
2837 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2838 	}
2839 
2840 	/* Free the TX queue */
2841 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2842 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2843 	ldcp->tstate &= ~TS_TXQ_RDY;
2844 
2845 	mutex_exit(&ldcp->lock);
2846 
2847 	/* Destroy mutex */
2848 	mutex_destroy(&ldcp->tx_lock);
2849 	mutex_destroy(&ldcp->lock);
2850 
2851 	/* free channel structure */
2852 	kmem_free(ldcp, sizeof (ldc_chan_t));
2853 
2854 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2855 
2856 	return (0);
2857 }
2858 
2859 /*
2860  * Open the LDC channel for use. It registers the TX/RX queues
2861  * with the Hypervisor. It also specifies the interrupt number
2862  * and target CPU for this channel
2863  */
2864 int
2865 ldc_open(ldc_handle_t handle)
2866 {
2867 	ldc_chan_t 	*ldcp;
2868 	int 		rv;
2869 
2870 	if (handle == NULL) {
2871 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2872 		return (EINVAL);
2873 	}
2874 
2875 	ldcp = (ldc_chan_t *)handle;
2876 
2877 	mutex_enter(&ldcp->lock);
2878 
2879 	if (ldcp->tstate < TS_INIT) {
2880 		DWARN(ldcp->id,
2881 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2882 		mutex_exit(&ldcp->lock);
2883 		return (EFAULT);
2884 	}
2885 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2886 		DWARN(ldcp->id,
2887 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2888 		mutex_exit(&ldcp->lock);
2889 		return (EFAULT);
2890 	}
2891 
2892 	/*
2893 	 * Unregister/Register the tx queue with the hypervisor
2894 	 */
2895 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2896 	if (rv) {
2897 		cmn_err(CE_WARN,
2898 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2899 		    ldcp->id);
2900 		mutex_exit(&ldcp->lock);
2901 		return (EIO);
2902 	}
2903 
2904 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2905 	if (rv) {
2906 		cmn_err(CE_WARN,
2907 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2908 		    ldcp->id);
2909 		mutex_exit(&ldcp->lock);
2910 		return (EIO);
2911 	}
2912 
2913 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2914 	    ldcp->id);
2915 
2916 	/*
2917 	 * Unregister/Register the rx queue with the hypervisor
2918 	 */
2919 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2920 	if (rv) {
2921 		cmn_err(CE_WARN,
2922 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2923 		    ldcp->id);
2924 		mutex_exit(&ldcp->lock);
2925 		return (EIO);
2926 	}
2927 
2928 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2929 	if (rv) {
2930 		cmn_err(CE_WARN,
2931 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2932 		    ldcp->id);
2933 		mutex_exit(&ldcp->lock);
2934 		return (EIO);
2935 	}
2936 
2937 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2938 	    ldcp->id);
2939 
2940 	ldcp->tstate |= TS_QCONF_RDY;
2941 
2942 	/* Register the channel with the channel nexus */
2943 	rv = i_ldc_register_channel(ldcp);
2944 	if (rv && rv != EAGAIN) {
2945 		cmn_err(CE_WARN,
2946 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2947 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2948 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2949 		mutex_exit(&ldcp->lock);
2950 		return (EIO);
2951 	}
2952 
2953 	/* mark channel in OPEN state */
2954 	ldcp->status = LDC_OPEN;
2955 
2956 	/* Read channel state */
2957 	rv = hv_ldc_tx_get_state(ldcp->id,
2958 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2959 	if (rv) {
2960 		cmn_err(CE_WARN,
2961 		    "ldc_open: (0x%lx) cannot read channel state\n",
2962 		    ldcp->id);
2963 		(void) i_ldc_unregister_channel(ldcp);
2964 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2965 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2966 		mutex_exit(&ldcp->lock);
2967 		return (EIO);
2968 	}
2969 
2970 	/*
2971 	 * set the ACKd head to current head location for reliable
2972 	 */
2973 	ldcp->tx_ackd_head = ldcp->tx_head;
2974 
2975 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2976 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2977 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2978 		ldcp->tstate |= TS_LINK_READY;
2979 		ldcp->status = LDC_READY;
2980 	}
2981 
2982 	/*
2983 	 * if channel is being opened in RAW mode - no handshake is needed
2984 	 * switch the channel READY and UP state
2985 	 */
2986 	if (ldcp->mode == LDC_MODE_RAW) {
2987 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2988 		ldcp->status = LDC_UP;
2989 	}
2990 
2991 	mutex_exit(&ldcp->lock);
2992 
2993 	/*
2994 	 * Increment number of open channels
2995 	 */
2996 	mutex_enter(&ldcssp->lock);
2997 	ldcssp->channels_open++;
2998 	mutex_exit(&ldcssp->lock);
2999 
3000 	D1(ldcp->id,
3001 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3002 	    "(tstate=0x%x, status=0x%x)\n",
3003 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3004 
3005 	return (0);
3006 }
3007 
3008 /*
3009  * Close the LDC connection. It will return EBUSY if there
3010  * are memory segments or descriptor rings either bound to or
3011  * mapped over the channel
3012  */
3013 int
3014 ldc_close(ldc_handle_t handle)
3015 {
3016 	ldc_chan_t 	*ldcp;
3017 	int		rv = 0, retries = 0;
3018 	boolean_t	chk_done = B_FALSE;
3019 
3020 	if (handle == NULL) {
3021 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3022 		return (EINVAL);
3023 	}
3024 	ldcp = (ldc_chan_t *)handle;
3025 
3026 	mutex_enter(&ldcp->lock);
3027 
3028 	/* return error if channel is not open */
3029 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3030 		DWARN(ldcp->id,
3031 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3032 		mutex_exit(&ldcp->lock);
3033 		return (EFAULT);
3034 	}
3035 
3036 	/* if any memory handles, drings, are bound or mapped cannot close */
3037 	if (ldcp->mhdl_list != NULL) {
3038 		DWARN(ldcp->id,
3039 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3040 		    ldcp->id);
3041 		mutex_exit(&ldcp->lock);
3042 		return (EBUSY);
3043 	}
3044 	if (ldcp->exp_dring_list != NULL) {
3045 		DWARN(ldcp->id,
3046 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3047 		    ldcp->id);
3048 		mutex_exit(&ldcp->lock);
3049 		return (EBUSY);
3050 	}
3051 	if (ldcp->imp_dring_list != NULL) {
3052 		DWARN(ldcp->id,
3053 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3054 		    ldcp->id);
3055 		mutex_exit(&ldcp->lock);
3056 		return (EBUSY);
3057 	}
3058 
3059 	if (ldcp->cb_inprogress) {
3060 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3061 		    ldcp->id);
3062 		mutex_exit(&ldcp->lock);
3063 		return (EWOULDBLOCK);
3064 	}
3065 
3066 	/* Obtain Tx lock */
3067 	mutex_enter(&ldcp->tx_lock);
3068 
3069 	/*
3070 	 * Wait for pending transmits to complete i.e Tx queue to drain
3071 	 * if there are pending pkts - wait 1 ms and retry again
3072 	 */
3073 	for (;;) {
3074 
3075 		rv = hv_ldc_tx_get_state(ldcp->id,
3076 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3077 		if (rv) {
3078 			cmn_err(CE_WARN,
3079 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3080 			mutex_exit(&ldcp->tx_lock);
3081 			mutex_exit(&ldcp->lock);
3082 			return (EIO);
3083 		}
3084 
3085 		if (ldcp->tx_head == ldcp->tx_tail ||
3086 		    ldcp->link_state != LDC_CHANNEL_UP) {
3087 			break;
3088 		}
3089 
3090 		if (chk_done) {
3091 			DWARN(ldcp->id,
3092 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3093 			    ldcp->id);
3094 			break;
3095 		}
3096 
3097 		/* wait for one ms and try again */
3098 		delay(drv_usectohz(1000));
3099 		chk_done = B_TRUE;
3100 	}
3101 
3102 	/*
3103 	 * Drain the Tx and Rx queues as we are closing the
3104 	 * channel. We dont care about any pending packets.
3105 	 * We have to also drain the queue prior to clearing
3106 	 * pending interrupts, otherwise the HV will trigger
3107 	 * an interrupt the moment the interrupt state is
3108 	 * cleared.
3109 	 */
3110 	(void) i_ldc_txq_reconf(ldcp);
3111 	(void) i_ldc_rxq_drain(ldcp);
3112 
3113 	/*
3114 	 * Unregister the channel with the nexus
3115 	 */
3116 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3117 
3118 		mutex_exit(&ldcp->tx_lock);
3119 		mutex_exit(&ldcp->lock);
3120 
3121 		/* if any error other than EAGAIN return back */
3122 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3123 			cmn_err(CE_WARN,
3124 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3125 			    ldcp->id, rv);
3126 			return (rv);
3127 		}
3128 
3129 		/*
3130 		 * As there could be pending interrupts we need
3131 		 * to wait and try again
3132 		 */
3133 		drv_usecwait(ldc_close_delay);
3134 		mutex_enter(&ldcp->lock);
3135 		mutex_enter(&ldcp->tx_lock);
3136 		retries++;
3137 	}
3138 
3139 	/*
3140 	 * Unregister queues
3141 	 */
3142 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3143 	if (rv) {
3144 		cmn_err(CE_WARN,
3145 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3146 		    ldcp->id);
3147 		mutex_exit(&ldcp->tx_lock);
3148 		mutex_exit(&ldcp->lock);
3149 		return (EIO);
3150 	}
3151 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3152 	if (rv) {
3153 		cmn_err(CE_WARN,
3154 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3155 		    ldcp->id);
3156 		mutex_exit(&ldcp->tx_lock);
3157 		mutex_exit(&ldcp->lock);
3158 		return (EIO);
3159 	}
3160 
3161 	ldcp->tstate &= ~TS_QCONF_RDY;
3162 
3163 	/* Reset channel state information */
3164 	i_ldc_reset_state(ldcp);
3165 
3166 	/* Mark channel as down and in initialized state */
3167 	ldcp->tx_ackd_head = 0;
3168 	ldcp->tx_head = 0;
3169 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3170 	ldcp->status = LDC_INIT;
3171 
3172 	mutex_exit(&ldcp->tx_lock);
3173 	mutex_exit(&ldcp->lock);
3174 
3175 	/* Decrement number of open channels */
3176 	mutex_enter(&ldcssp->lock);
3177 	ldcssp->channels_open--;
3178 	mutex_exit(&ldcssp->lock);
3179 
3180 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3181 
3182 	return (0);
3183 }
3184 
3185 /*
3186  * Register channel callback
3187  */
3188 int
3189 ldc_reg_callback(ldc_handle_t handle,
3190     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3191 {
3192 	ldc_chan_t *ldcp;
3193 
3194 	if (handle == NULL) {
3195 		DWARN(DBG_ALL_LDCS,
3196 		    "ldc_reg_callback: invalid channel handle\n");
3197 		return (EINVAL);
3198 	}
3199 	if (((uint64_t)cb) < KERNELBASE) {
3200 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3201 		return (EINVAL);
3202 	}
3203 	ldcp = (ldc_chan_t *)handle;
3204 
3205 	mutex_enter(&ldcp->lock);
3206 
3207 	if (ldcp->cb) {
3208 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3209 		    ldcp->id);
3210 		mutex_exit(&ldcp->lock);
3211 		return (EIO);
3212 	}
3213 	if (ldcp->cb_inprogress) {
3214 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3215 		    ldcp->id);
3216 		mutex_exit(&ldcp->lock);
3217 		return (EWOULDBLOCK);
3218 	}
3219 
3220 	ldcp->cb = cb;
3221 	ldcp->cb_arg = arg;
3222 	ldcp->cb_enabled = B_TRUE;
3223 
3224 	D1(ldcp->id,
3225 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3226 	    ldcp->id);
3227 
3228 	mutex_exit(&ldcp->lock);
3229 
3230 	return (0);
3231 }
3232 
3233 /*
3234  * Unregister channel callback
3235  */
3236 int
3237 ldc_unreg_callback(ldc_handle_t handle)
3238 {
3239 	ldc_chan_t *ldcp;
3240 
3241 	if (handle == NULL) {
3242 		DWARN(DBG_ALL_LDCS,
3243 		    "ldc_unreg_callback: invalid channel handle\n");
3244 		return (EINVAL);
3245 	}
3246 	ldcp = (ldc_chan_t *)handle;
3247 
3248 	mutex_enter(&ldcp->lock);
3249 
3250 	if (ldcp->cb == NULL) {
3251 		DWARN(ldcp->id,
3252 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3253 		    ldcp->id);
3254 		mutex_exit(&ldcp->lock);
3255 		return (EIO);
3256 	}
3257 	if (ldcp->cb_inprogress) {
3258 		DWARN(ldcp->id,
3259 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3260 		    ldcp->id);
3261 		mutex_exit(&ldcp->lock);
3262 		return (EWOULDBLOCK);
3263 	}
3264 
3265 	ldcp->cb = NULL;
3266 	ldcp->cb_arg = NULL;
3267 	ldcp->cb_enabled = B_FALSE;
3268 
3269 	D1(ldcp->id,
3270 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3271 	    ldcp->id);
3272 
3273 	mutex_exit(&ldcp->lock);
3274 
3275 	return (0);
3276 }
3277 
3278 
3279 /*
3280  * Bring a channel up by initiating a handshake with the peer
3281  * This call is asynchronous. It will complete at a later point
3282  * in time when the peer responds back with an RTR.
3283  */
3284 int
3285 ldc_up(ldc_handle_t handle)
3286 {
3287 	int 		rv;
3288 	ldc_chan_t 	*ldcp;
3289 	ldc_msg_t 	*ldcmsg;
3290 	uint64_t 	tx_tail, tstate, link_state;
3291 
3292 	if (handle == NULL) {
3293 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3294 		return (EINVAL);
3295 	}
3296 	ldcp = (ldc_chan_t *)handle;
3297 
3298 	mutex_enter(&ldcp->lock);
3299 
3300 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3301 
3302 	/* clear the reset state */
3303 	tstate = ldcp->tstate;
3304 	ldcp->tstate &= ~TS_IN_RESET;
3305 
3306 	if (ldcp->tstate == TS_UP) {
3307 		DWARN(ldcp->id,
3308 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3309 		    ldcp->id);
3310 
3311 		/* mark channel as up */
3312 		ldcp->status = LDC_UP;
3313 
3314 		/*
3315 		 * if channel was in reset state and there was
3316 		 * pending data clear interrupt state. this will
3317 		 * trigger an interrupt, causing the RX handler to
3318 		 * to invoke the client's callback
3319 		 */
3320 		if ((tstate & TS_IN_RESET) &&
3321 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3322 			D1(ldcp->id,
3323 			    "ldc_up: (0x%llx) channel has pending data, "
3324 			    "clearing interrupt\n", ldcp->id);
3325 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3326 		}
3327 
3328 		mutex_exit(&ldcp->lock);
3329 		return (0);
3330 	}
3331 
3332 	/* if the channel is in RAW mode - mark it as UP, if READY */
3333 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3334 		ldcp->tstate = TS_UP;
3335 		mutex_exit(&ldcp->lock);
3336 		return (0);
3337 	}
3338 
3339 	/* Don't start another handshake if there is one in progress */
3340 	if (ldcp->hstate) {
3341 		D1(ldcp->id,
3342 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3343 		    ldcp->id);
3344 		mutex_exit(&ldcp->lock);
3345 		return (0);
3346 	}
3347 
3348 	mutex_enter(&ldcp->tx_lock);
3349 
3350 	/* save current link state */
3351 	link_state = ldcp->link_state;
3352 
3353 	/* get the current tail for the LDC msg */
3354 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3355 	if (rv) {
3356 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3357 		    ldcp->id);
3358 		mutex_exit(&ldcp->tx_lock);
3359 		mutex_exit(&ldcp->lock);
3360 		return (ECONNREFUSED);
3361 	}
3362 
3363 	/*
3364 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3365 	 * from a previous state of DOWN, then mark the channel as
3366 	 * being ready for handshake.
3367 	 */
3368 	if ((link_state == LDC_CHANNEL_DOWN) &&
3369 	    (link_state != ldcp->link_state)) {
3370 
3371 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3372 		    (ldcp->link_state == LDC_CHANNEL_UP));
3373 
3374 		if (ldcp->mode == LDC_MODE_RAW) {
3375 			ldcp->status = LDC_UP;
3376 			ldcp->tstate = TS_UP;
3377 			mutex_exit(&ldcp->tx_lock);
3378 			mutex_exit(&ldcp->lock);
3379 			return (0);
3380 		} else {
3381 			ldcp->status = LDC_READY;
3382 			ldcp->tstate |= TS_LINK_READY;
3383 		}
3384 
3385 	}
3386 
3387 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3388 	ZERO_PKT(ldcmsg);
3389 
3390 	ldcmsg->type = LDC_CTRL;
3391 	ldcmsg->stype = LDC_INFO;
3392 	ldcmsg->ctrl = LDC_VER;
3393 	ldcp->next_vidx = 0;
3394 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3395 
3396 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3397 
3398 	/* initiate the send by calling into HV and set the new tail */
3399 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3400 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3401 
3402 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3403 	if (rv) {
3404 		DWARN(ldcp->id,
3405 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3406 		    ldcp->id, rv);
3407 		mutex_exit(&ldcp->tx_lock);
3408 		mutex_exit(&ldcp->lock);
3409 		return (rv);
3410 	}
3411 
3412 	ldcp->hstate |= TS_SENT_VER;
3413 	ldcp->tx_tail = tx_tail;
3414 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3415 
3416 	mutex_exit(&ldcp->tx_lock);
3417 	mutex_exit(&ldcp->lock);
3418 
3419 	return (rv);
3420 }
3421 
3422 
3423 /*
3424  * Bring a channel down by resetting its state and queues
3425  */
3426 int
3427 ldc_down(ldc_handle_t handle)
3428 {
3429 	ldc_chan_t 	*ldcp;
3430 
3431 	if (handle == NULL) {
3432 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3433 		return (EINVAL);
3434 	}
3435 	ldcp = (ldc_chan_t *)handle;
3436 	mutex_enter(&ldcp->lock);
3437 	mutex_enter(&ldcp->tx_lock);
3438 	i_ldc_reset(ldcp, B_TRUE);
3439 	mutex_exit(&ldcp->tx_lock);
3440 	mutex_exit(&ldcp->lock);
3441 
3442 	return (0);
3443 }
3444 
3445 /*
3446  * Get the current channel status
3447  */
3448 int
3449 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3450 {
3451 	ldc_chan_t *ldcp;
3452 
3453 	if (handle == NULL || status == NULL) {
3454 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3455 		return (EINVAL);
3456 	}
3457 	ldcp = (ldc_chan_t *)handle;
3458 
3459 	*status = ((ldc_chan_t *)handle)->status;
3460 
3461 	D1(ldcp->id,
3462 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3463 	return (0);
3464 }
3465 
3466 
3467 /*
3468  * Set the channel's callback mode - enable/disable callbacks
3469  */
3470 int
3471 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3472 {
3473 	ldc_chan_t 	*ldcp;
3474 
3475 	if (handle == NULL) {
3476 		DWARN(DBG_ALL_LDCS,
3477 		    "ldc_set_intr_mode: invalid channel handle\n");
3478 		return (EINVAL);
3479 	}
3480 	ldcp = (ldc_chan_t *)handle;
3481 
3482 	/*
3483 	 * Record no callbacks should be invoked
3484 	 */
3485 	mutex_enter(&ldcp->lock);
3486 
3487 	switch (cmode) {
3488 	case LDC_CB_DISABLE:
3489 		if (!ldcp->cb_enabled) {
3490 			DWARN(ldcp->id,
3491 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3492 			    ldcp->id);
3493 			break;
3494 		}
3495 		ldcp->cb_enabled = B_FALSE;
3496 
3497 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3498 		    ldcp->id);
3499 		break;
3500 
3501 	case LDC_CB_ENABLE:
3502 		if (ldcp->cb_enabled) {
3503 			DWARN(ldcp->id,
3504 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3505 			    ldcp->id);
3506 			break;
3507 		}
3508 		ldcp->cb_enabled = B_TRUE;
3509 
3510 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3511 		    ldcp->id);
3512 		break;
3513 	}
3514 
3515 	mutex_exit(&ldcp->lock);
3516 
3517 	return (0);
3518 }
3519 
3520 /*
3521  * Check to see if there are packets on the incoming queue
3522  * Will return hasdata = B_FALSE if there are no packets
3523  */
3524 int
3525 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3526 {
3527 	int 		rv;
3528 	uint64_t 	rx_head, rx_tail;
3529 	ldc_chan_t 	*ldcp;
3530 
3531 	if (handle == NULL) {
3532 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3533 		return (EINVAL);
3534 	}
3535 	ldcp = (ldc_chan_t *)handle;
3536 
3537 	*hasdata = B_FALSE;
3538 
3539 	mutex_enter(&ldcp->lock);
3540 
3541 	if (ldcp->tstate != TS_UP) {
3542 		D1(ldcp->id,
3543 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3544 		mutex_exit(&ldcp->lock);
3545 		return (ECONNRESET);
3546 	}
3547 
3548 	/* Read packet(s) from the queue */
3549 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3550 	    &ldcp->link_state);
3551 	if (rv != 0) {
3552 		cmn_err(CE_WARN,
3553 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3554 		mutex_exit(&ldcp->lock);
3555 		return (EIO);
3556 	}
3557 
3558 	/* reset the channel state if the channel went down */
3559 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3560 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3561 		mutex_enter(&ldcp->tx_lock);
3562 		i_ldc_reset(ldcp, B_FALSE);
3563 		mutex_exit(&ldcp->tx_lock);
3564 		mutex_exit(&ldcp->lock);
3565 		return (ECONNRESET);
3566 	}
3567 
3568 	switch (ldcp->mode) {
3569 	case LDC_MODE_RAW:
3570 		/*
3571 		 * In raw mode, there are no ctrl packets, so checking
3572 		 * if the queue is non-empty is sufficient.
3573 		 */
3574 		*hasdata = (rx_head != rx_tail);
3575 		break;
3576 
3577 	case LDC_MODE_UNRELIABLE:
3578 		/*
3579 		 * In unreliable mode, if the queue is non-empty, we need
3580 		 * to check if it actually contains unread data packets.
3581 		 * The queue may just contain ctrl packets.
3582 		 */
3583 		if (rx_head != rx_tail) {
3584 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3585 			/*
3586 			 * If no data packets were found on the queue,
3587 			 * all packets must have been control packets
3588 			 * which will now have been processed, leaving
3589 			 * the queue empty. If the interrupt state
3590 			 * is pending, we need to clear the interrupt
3591 			 * here.
3592 			 */
3593 			if (*hasdata == B_FALSE &&
3594 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3595 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3596 			}
3597 		}
3598 		break;
3599 
3600 	case LDC_MODE_RELIABLE:
3601 		/*
3602 		 * In reliable mode, first check for 'stream_remains' > 0.
3603 		 * Otherwise, if the data queue head and tail pointers
3604 		 * differ, there must be data to read.
3605 		 */
3606 		if (ldcp->stream_remains > 0)
3607 			*hasdata = B_TRUE;
3608 		else
3609 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3610 		break;
3611 
3612 	default:
3613 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3614 		    "(0x%x)", ldcp->id, ldcp->mode);
3615 		mutex_exit(&ldcp->lock);
3616 		return (EIO);
3617 	}
3618 
3619 	mutex_exit(&ldcp->lock);
3620 
3621 	return (0);
3622 }
3623 
3624 
3625 /*
3626  * Read 'size' amount of bytes or less. If incoming buffer
3627  * is more than 'size', ENOBUFS is returned.
3628  *
3629  * On return, size contains the number of bytes read.
3630  */
3631 int
3632 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3633 {
3634 	ldc_chan_t 	*ldcp;
3635 	uint64_t 	rx_head = 0, rx_tail = 0;
3636 	int		rv = 0, exit_val;
3637 
3638 	if (handle == NULL) {
3639 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3640 		return (EINVAL);
3641 	}
3642 
3643 	ldcp = (ldc_chan_t *)handle;
3644 
3645 	/* channel lock */
3646 	mutex_enter(&ldcp->lock);
3647 
3648 	if (ldcp->tstate != TS_UP) {
3649 		DWARN(ldcp->id,
3650 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3651 		    ldcp->id);
3652 		exit_val = ECONNRESET;
3653 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3654 		TRACE_RXDQ_LENGTH(ldcp);
3655 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3656 		mutex_exit(&ldcp->lock);
3657 		return (exit_val);
3658 	} else {
3659 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3660 	}
3661 
3662 	/*
3663 	 * if queue has been drained - clear interrupt
3664 	 */
3665 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3666 	    &ldcp->link_state);
3667 	if (rv != 0) {
3668 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3669 		    ldcp->id);
3670 		mutex_enter(&ldcp->tx_lock);
3671 		i_ldc_reset(ldcp, B_TRUE);
3672 		mutex_exit(&ldcp->tx_lock);
3673 		mutex_exit(&ldcp->lock);
3674 		return (ECONNRESET);
3675 	}
3676 
3677 	if (exit_val == 0) {
3678 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3679 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3680 			mutex_enter(&ldcp->tx_lock);
3681 			i_ldc_reset(ldcp, B_FALSE);
3682 			exit_val = ECONNRESET;
3683 			mutex_exit(&ldcp->tx_lock);
3684 		}
3685 		if ((rv == 0) &&
3686 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3687 		    (rx_head == rx_tail)) {
3688 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3689 		}
3690 	}
3691 
3692 	mutex_exit(&ldcp->lock);
3693 	return (exit_val);
3694 }
3695 
3696 /*
3697  * Basic raw mondo read -
3698  * no interpretation of mondo contents at all.
3699  *
3700  * Enter and exit with ldcp->lock held by caller
3701  */
3702 static int
3703 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3704 {
3705 	uint64_t 	q_size_mask;
3706 	ldc_msg_t 	*msgp;
3707 	uint8_t		*msgbufp;
3708 	int		rv = 0, space;
3709 	uint64_t 	rx_head, rx_tail;
3710 
3711 	space = *sizep;
3712 
3713 	if (space < LDC_PAYLOAD_SIZE_RAW)
3714 		return (ENOBUFS);
3715 
3716 	ASSERT(mutex_owned(&ldcp->lock));
3717 
3718 	/* compute mask for increment */
3719 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3720 
3721 	/*
3722 	 * Read packet(s) from the queue
3723 	 */
3724 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3725 	    &ldcp->link_state);
3726 	if (rv != 0) {
3727 		cmn_err(CE_WARN,
3728 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3729 		    ldcp->id);
3730 		return (EIO);
3731 	}
3732 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3733 	    " rxt=0x%llx, st=0x%llx\n",
3734 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3735 
3736 	/* reset the channel state if the channel went down */
3737 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3738 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3739 		mutex_enter(&ldcp->tx_lock);
3740 		i_ldc_reset(ldcp, B_FALSE);
3741 		mutex_exit(&ldcp->tx_lock);
3742 		return (ECONNRESET);
3743 	}
3744 
3745 	/*
3746 	 * Check for empty queue
3747 	 */
3748 	if (rx_head == rx_tail) {
3749 		*sizep = 0;
3750 		return (0);
3751 	}
3752 
3753 	/* get the message */
3754 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3755 
3756 	/* if channel is in RAW mode, copy data and return */
3757 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3758 
3759 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3760 
3761 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3762 
3763 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3764 
3765 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3766 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3767 
3768 	return (rv);
3769 }
3770 
3771 /*
3772  * Process LDC mondos to build larger packets
3773  * with either un-reliable or reliable delivery.
3774  *
3775  * Enter and exit with ldcp->lock held by caller
3776  */
3777 static int
3778 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3779 {
3780 	int		rv = 0;
3781 	uint64_t 	rx_head = 0, rx_tail = 0;
3782 	uint64_t 	curr_head = 0;
3783 	ldc_msg_t 	*msg;
3784 	caddr_t 	target;
3785 	size_t 		len = 0, bytes_read = 0;
3786 	int 		retries = 0;
3787 	uint64_t 	q_va, q_size_mask;
3788 	uint64_t	first_fragment = 0;
3789 
3790 	target = target_bufp;
3791 
3792 	ASSERT(mutex_owned(&ldcp->lock));
3793 
3794 	/* check if the buffer and size are valid */
3795 	if (target_bufp == NULL || *sizep == 0) {
3796 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3797 		    ldcp->id);
3798 		return (EINVAL);
3799 	}
3800 
3801 	/* Set q_va and compute increment mask for the appropriate queue */
3802 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3803 		q_va	    = ldcp->rx_dq_va;
3804 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3805 	} else {
3806 		q_va	    = ldcp->rx_q_va;
3807 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3808 	}
3809 
3810 	/*
3811 	 * Read packet(s) from the queue
3812 	 */
3813 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3814 	    &ldcp->link_state);
3815 	if (rv != 0) {
3816 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3817 		    ldcp->id);
3818 		mutex_enter(&ldcp->tx_lock);
3819 		i_ldc_reset(ldcp, B_TRUE);
3820 		mutex_exit(&ldcp->tx_lock);
3821 		return (ECONNRESET);
3822 	}
3823 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3824 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3825 
3826 	/* reset the channel state if the channel went down */
3827 	if (ldcp->link_state != LDC_CHANNEL_UP)
3828 		goto channel_is_reset;
3829 
3830 	for (;;) {
3831 
3832 		if (curr_head == rx_tail) {
3833 			/*
3834 			 * If a data queue is being used, check the Rx HV
3835 			 * queue. This will copy over any new data packets
3836 			 * that have arrived.
3837 			 */
3838 			if (ldcp->mode == LDC_MODE_RELIABLE)
3839 				(void) i_ldc_chkq(ldcp);
3840 
3841 			rv = ldcp->readq_get_state(ldcp,
3842 			    &rx_head, &rx_tail, &ldcp->link_state);
3843 			if (rv != 0) {
3844 				cmn_err(CE_WARN,
3845 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3846 				    ldcp->id);
3847 				mutex_enter(&ldcp->tx_lock);
3848 				i_ldc_reset(ldcp, B_TRUE);
3849 				mutex_exit(&ldcp->tx_lock);
3850 				return (ECONNRESET);
3851 			}
3852 
3853 			if (ldcp->link_state != LDC_CHANNEL_UP)
3854 				goto channel_is_reset;
3855 
3856 			if (curr_head == rx_tail) {
3857 
3858 				/* If in the middle of a fragmented xfer */
3859 				if (first_fragment != 0) {
3860 
3861 					/* wait for ldc_delay usecs */
3862 					drv_usecwait(ldc_delay);
3863 
3864 					if (++retries < ldc_max_retries)
3865 						continue;
3866 
3867 					*sizep = 0;
3868 					if (ldcp->mode != LDC_MODE_RELIABLE)
3869 						ldcp->last_msg_rcd =
3870 						    first_fragment - 1;
3871 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3872 					    "(0x%llx) read timeout", ldcp->id);
3873 					return (EAGAIN);
3874 				}
3875 				*sizep = 0;
3876 				break;
3877 			}
3878 		}
3879 		retries = 0;
3880 
3881 		D2(ldcp->id,
3882 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3883 		    ldcp->id, curr_head, rx_head, rx_tail);
3884 
3885 		/* get the message */
3886 		msg = (ldc_msg_t *)(q_va + curr_head);
3887 
3888 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3889 		    ldcp->rx_q_va + curr_head);
3890 
3891 		/* Check the message ID for the message received */
3892 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3893 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3894 
3895 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3896 				    "error, q_ptrs=0x%lx,0x%lx",
3897 				    ldcp->id, rx_head, rx_tail);
3898 
3899 				/* throw away data */
3900 				bytes_read = 0;
3901 
3902 				/* Reset last_msg_rcd to start of message */
3903 				if (first_fragment != 0) {
3904 					ldcp->last_msg_rcd = first_fragment - 1;
3905 					first_fragment = 0;
3906 				}
3907 				/*
3908 				 * Send a NACK -- invalid seqid
3909 				 * get the current tail for the response
3910 				 */
3911 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3912 				    (msg->ctrl & LDC_CTRL_MASK));
3913 				if (rv) {
3914 					cmn_err(CE_NOTE,
3915 					    "ldc_read: (0x%lx) err sending "
3916 					    "NACK msg\n", ldcp->id);
3917 
3918 					/* if cannot send NACK - reset chan */
3919 					mutex_enter(&ldcp->tx_lock);
3920 					i_ldc_reset(ldcp, B_FALSE);
3921 					mutex_exit(&ldcp->tx_lock);
3922 					rv = ECONNRESET;
3923 					break;
3924 				}
3925 
3926 				/* purge receive queue */
3927 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3928 
3929 				break;
3930 			}
3931 
3932 			/*
3933 			 * Process any messages of type CTRL messages
3934 			 * Future implementations should try to pass these
3935 			 * to LDC link by resetting the intr state.
3936 			 *
3937 			 * NOTE: not done as a switch() as type can be
3938 			 * both ctrl+data
3939 			 */
3940 			if (msg->type & LDC_CTRL) {
3941 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3942 					if (rv == EAGAIN)
3943 						continue;
3944 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
3945 					*sizep = 0;
3946 					bytes_read = 0;
3947 					break;
3948 				}
3949 			}
3950 
3951 			/* process data ACKs */
3952 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3953 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3954 					*sizep = 0;
3955 					bytes_read = 0;
3956 					break;
3957 				}
3958 			}
3959 
3960 			/* process data NACKs */
3961 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
3962 				DWARN(ldcp->id,
3963 				    "ldc_read: (0x%llx) received DATA/NACK",
3964 				    ldcp->id);
3965 				mutex_enter(&ldcp->tx_lock);
3966 				i_ldc_reset(ldcp, B_TRUE);
3967 				mutex_exit(&ldcp->tx_lock);
3968 				return (ECONNRESET);
3969 			}
3970 		}
3971 
3972 		/* process data messages */
3973 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3974 
3975 			uint8_t *msgbuf = (uint8_t *)(
3976 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
3977 			    msg->rdata : msg->udata);
3978 
3979 			D2(ldcp->id,
3980 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3981 
3982 			/* get the packet length */
3983 			len = (msg->env & LDC_LEN_MASK);
3984 
3985 				/*
3986 				 * FUTURE OPTIMIZATION:
3987 				 * dont need to set q head for every
3988 				 * packet we read just need to do this when
3989 				 * we are done or need to wait for more
3990 				 * mondos to make a full packet - this is
3991 				 * currently expensive.
3992 				 */
3993 
3994 			if (first_fragment == 0) {
3995 
3996 				/*
3997 				 * first packets should always have the start
3998 				 * bit set (even for a single packet). If not
3999 				 * throw away the packet
4000 				 */
4001 				if (!(msg->env & LDC_FRAG_START)) {
4002 
4003 					DWARN(DBG_ALL_LDCS,
4004 					    "ldc_read: (0x%llx) not start - "
4005 					    "frag=%x\n", ldcp->id,
4006 					    (msg->env) & LDC_FRAG_MASK);
4007 
4008 					/* toss pkt, inc head, cont reading */
4009 					bytes_read = 0;
4010 					target = target_bufp;
4011 					curr_head =
4012 					    (curr_head + LDC_PACKET_SIZE)
4013 					    & q_size_mask;
4014 					if (rv = ldcp->readq_set_head(ldcp,
4015 					    curr_head))
4016 						break;
4017 
4018 					continue;
4019 				}
4020 
4021 				first_fragment = msg->seqid;
4022 			} else {
4023 				/* check to see if this is a pkt w/ START bit */
4024 				if (msg->env & LDC_FRAG_START) {
4025 					DWARN(DBG_ALL_LDCS,
4026 					    "ldc_read:(0x%llx) unexpected pkt"
4027 					    " env=0x%x discarding %d bytes,"
4028 					    " lastmsg=%d, currentmsg=%d\n",
4029 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4030 					    bytes_read, ldcp->last_msg_rcd,
4031 					    msg->seqid);
4032 
4033 					/* throw data we have read so far */
4034 					bytes_read = 0;
4035 					target = target_bufp;
4036 					first_fragment = msg->seqid;
4037 
4038 					if (rv = ldcp->readq_set_head(ldcp,
4039 					    curr_head))
4040 						break;
4041 				}
4042 			}
4043 
4044 			/* copy (next) pkt into buffer */
4045 			if (len <= (*sizep - bytes_read)) {
4046 				bcopy(msgbuf, target, len);
4047 				target += len;
4048 				bytes_read += len;
4049 			} else {
4050 				/*
4051 				 * there is not enough space in the buffer to
4052 				 * read this pkt. throw message away & continue
4053 				 * reading data from queue
4054 				 */
4055 				DWARN(DBG_ALL_LDCS,
4056 				    "ldc_read: (0x%llx) buffer too small, "
4057 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4058 				    curr_head, *sizep, bytes_read+len);
4059 
4060 				first_fragment = 0;
4061 				target = target_bufp;
4062 				bytes_read = 0;
4063 
4064 				/* throw away everything received so far */
4065 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4066 					break;
4067 
4068 				/* continue reading remaining pkts */
4069 				continue;
4070 			}
4071 		}
4072 
4073 		/* set the message id */
4074 		if (ldcp->mode != LDC_MODE_RELIABLE)
4075 			ldcp->last_msg_rcd = msg->seqid;
4076 
4077 		/* move the head one position */
4078 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4079 
4080 		if (msg->env & LDC_FRAG_STOP) {
4081 
4082 			/*
4083 			 * All pkts that are part of this fragmented transfer
4084 			 * have been read or this was a single pkt read
4085 			 * or there was an error
4086 			 */
4087 
4088 			/* set the queue head */
4089 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4090 				bytes_read = 0;
4091 
4092 			*sizep = bytes_read;
4093 
4094 			break;
4095 		}
4096 
4097 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4098 		if ((msg->type & LDC_CTRL) ||
4099 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4100 
4101 			/* set the queue head */
4102 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4103 				bytes_read = 0;
4104 				break;
4105 			}
4106 
4107 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4108 			    ldcp->id, curr_head);
4109 		}
4110 
4111 	} /* for (;;) */
4112 
4113 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4114 
4115 	return (rv);
4116 
4117 channel_is_reset:
4118 	mutex_enter(&ldcp->tx_lock);
4119 	i_ldc_reset(ldcp, B_FALSE);
4120 	mutex_exit(&ldcp->tx_lock);
4121 	return (ECONNRESET);
4122 }
4123 
4124 /*
4125  * Fetch and buffer incoming packets so we can hand them back as
4126  * a basic byte stream.
4127  *
4128  * Enter and exit with ldcp->lock held by caller
4129  */
4130 static int
4131 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4132 {
4133 	int	rv;
4134 	size_t	size;
4135 
4136 	ASSERT(mutex_owned(&ldcp->lock));
4137 
4138 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4139 	    ldcp->id, *sizep);
4140 
4141 	if (ldcp->stream_remains == 0) {
4142 		size = ldcp->mtu;
4143 		rv = i_ldc_read_packet(ldcp,
4144 		    (caddr_t)ldcp->stream_bufferp, &size);
4145 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4146 		    ldcp->id, size);
4147 
4148 		if (rv != 0)
4149 			return (rv);
4150 
4151 		ldcp->stream_remains = size;
4152 		ldcp->stream_offset = 0;
4153 	}
4154 
4155 	size = MIN(ldcp->stream_remains, *sizep);
4156 
4157 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4158 	ldcp->stream_offset += size;
4159 	ldcp->stream_remains -= size;
4160 
4161 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4162 	    ldcp->id, size);
4163 
4164 	*sizep = size;
4165 	return (0);
4166 }
4167 
4168 /*
4169  * Write specified amount of bytes to the channel
4170  * in multiple pkts of pkt_payload size. Each
4171  * packet is tagged with an unique packet ID in
4172  * the case of a reliable link.
4173  *
4174  * On return, size contains the number of bytes written.
4175  */
4176 int
4177 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4178 {
4179 	ldc_chan_t	*ldcp;
4180 	int		rv = 0;
4181 
4182 	if (handle == NULL) {
4183 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4184 		return (EINVAL);
4185 	}
4186 	ldcp = (ldc_chan_t *)handle;
4187 
4188 	/* check if writes can occur */
4189 	if (!mutex_tryenter(&ldcp->tx_lock)) {
4190 		/*
4191 		 * Could not get the lock - channel could
4192 		 * be in the process of being unconfigured
4193 		 * or reader has encountered an error
4194 		 */
4195 		return (EAGAIN);
4196 	}
4197 
4198 	/* check if non-zero data to write */
4199 	if (buf == NULL || sizep == NULL) {
4200 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4201 		    ldcp->id);
4202 		mutex_exit(&ldcp->tx_lock);
4203 		return (EINVAL);
4204 	}
4205 
4206 	if (*sizep == 0) {
4207 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4208 		    ldcp->id);
4209 		mutex_exit(&ldcp->tx_lock);
4210 		return (0);
4211 	}
4212 
4213 	/* Check if channel is UP for data exchange */
4214 	if (ldcp->tstate != TS_UP) {
4215 		DWARN(ldcp->id,
4216 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4217 		    ldcp->id);
4218 		*sizep = 0;
4219 		rv = ECONNRESET;
4220 	} else {
4221 		rv = ldcp->write_p(ldcp, buf, sizep);
4222 	}
4223 
4224 	mutex_exit(&ldcp->tx_lock);
4225 
4226 	return (rv);
4227 }
4228 
4229 /*
4230  * Write a raw packet to the channel
4231  * On return, size contains the number of bytes written.
4232  */
4233 static int
4234 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4235 {
4236 	ldc_msg_t 	*ldcmsg;
4237 	uint64_t 	tx_head, tx_tail, new_tail;
4238 	int		rv = 0;
4239 	size_t		size;
4240 
4241 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4242 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4243 
4244 	size = *sizep;
4245 
4246 	/*
4247 	 * Check to see if the packet size is less than or
4248 	 * equal to packet size support in raw mode
4249 	 */
4250 	if (size > ldcp->pkt_payload) {
4251 		DWARN(ldcp->id,
4252 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4253 		    ldcp->id, *sizep);
4254 		*sizep = 0;
4255 		return (EMSGSIZE);
4256 	}
4257 
4258 	/* get the qptrs for the tx queue */
4259 	rv = hv_ldc_tx_get_state(ldcp->id,
4260 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4261 	if (rv != 0) {
4262 		cmn_err(CE_WARN,
4263 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4264 		*sizep = 0;
4265 		return (EIO);
4266 	}
4267 
4268 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4269 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4270 		DWARN(ldcp->id,
4271 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4272 
4273 		*sizep = 0;
4274 		if (mutex_tryenter(&ldcp->lock)) {
4275 			i_ldc_reset(ldcp, B_FALSE);
4276 			mutex_exit(&ldcp->lock);
4277 		} else {
4278 			/*
4279 			 * Release Tx lock, and then reacquire channel
4280 			 * and Tx lock in correct order
4281 			 */
4282 			mutex_exit(&ldcp->tx_lock);
4283 			mutex_enter(&ldcp->lock);
4284 			mutex_enter(&ldcp->tx_lock);
4285 			i_ldc_reset(ldcp, B_FALSE);
4286 			mutex_exit(&ldcp->lock);
4287 		}
4288 		return (ECONNRESET);
4289 	}
4290 
4291 	tx_tail = ldcp->tx_tail;
4292 	tx_head = ldcp->tx_head;
4293 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4294 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4295 
4296 	if (new_tail == tx_head) {
4297 		DWARN(DBG_ALL_LDCS,
4298 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4299 		*sizep = 0;
4300 		return (EWOULDBLOCK);
4301 	}
4302 
4303 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4304 	    ldcp->id, size);
4305 
4306 	/* Send the data now */
4307 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4308 
4309 	/* copy the data into pkt */
4310 	bcopy((uint8_t *)buf, ldcmsg, size);
4311 
4312 	/* increment tail */
4313 	tx_tail = new_tail;
4314 
4315 	/*
4316 	 * All packets have been copied into the TX queue
4317 	 * update the tail ptr in the HV
4318 	 */
4319 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4320 	if (rv) {
4321 		if (rv == EWOULDBLOCK) {
4322 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4323 			    ldcp->id);
4324 			*sizep = 0;
4325 			return (EWOULDBLOCK);
4326 		}
4327 
4328 		*sizep = 0;
4329 		if (mutex_tryenter(&ldcp->lock)) {
4330 			i_ldc_reset(ldcp, B_FALSE);
4331 			mutex_exit(&ldcp->lock);
4332 		} else {
4333 			/*
4334 			 * Release Tx lock, and then reacquire channel
4335 			 * and Tx lock in correct order
4336 			 */
4337 			mutex_exit(&ldcp->tx_lock);
4338 			mutex_enter(&ldcp->lock);
4339 			mutex_enter(&ldcp->tx_lock);
4340 			i_ldc_reset(ldcp, B_FALSE);
4341 			mutex_exit(&ldcp->lock);
4342 		}
4343 		return (ECONNRESET);
4344 	}
4345 
4346 	ldcp->tx_tail = tx_tail;
4347 	*sizep = size;
4348 
4349 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4350 
4351 	return (rv);
4352 }
4353 
4354 
4355 /*
4356  * Write specified amount of bytes to the channel
4357  * in multiple pkts of pkt_payload size. Each
4358  * packet is tagged with an unique packet ID in
4359  * the case of a reliable link.
4360  *
4361  * On return, size contains the number of bytes written.
4362  * This function needs to ensure that the write size is < MTU size
4363  */
4364 static int
4365 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4366 {
4367 	ldc_msg_t 	*ldcmsg;
4368 	uint64_t 	tx_head, tx_tail, new_tail, start;
4369 	uint64_t	txq_size_mask, numavail;
4370 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4371 	size_t 		len, bytes_written = 0, remaining;
4372 	int		rv;
4373 	uint32_t	curr_seqid;
4374 
4375 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4376 
4377 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4378 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4379 
4380 	/* compute mask for increment */
4381 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4382 
4383 	/* get the qptrs for the tx queue */
4384 	rv = hv_ldc_tx_get_state(ldcp->id,
4385 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4386 	if (rv != 0) {
4387 		cmn_err(CE_WARN,
4388 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4389 		*size = 0;
4390 		return (EIO);
4391 	}
4392 
4393 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4394 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4395 		DWARN(ldcp->id,
4396 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4397 		*size = 0;
4398 		if (mutex_tryenter(&ldcp->lock)) {
4399 			i_ldc_reset(ldcp, B_FALSE);
4400 			mutex_exit(&ldcp->lock);
4401 		} else {
4402 			/*
4403 			 * Release Tx lock, and then reacquire channel
4404 			 * and Tx lock in correct order
4405 			 */
4406 			mutex_exit(&ldcp->tx_lock);
4407 			mutex_enter(&ldcp->lock);
4408 			mutex_enter(&ldcp->tx_lock);
4409 			i_ldc_reset(ldcp, B_FALSE);
4410 			mutex_exit(&ldcp->lock);
4411 		}
4412 		return (ECONNRESET);
4413 	}
4414 
4415 	tx_tail = ldcp->tx_tail;
4416 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4417 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4418 
4419 	/*
4420 	 * Check to see if the queue is full. The check is done using
4421 	 * the appropriate head based on the link mode.
4422 	 */
4423 	i_ldc_get_tx_head(ldcp, &tx_head);
4424 
4425 	if (new_tail == tx_head) {
4426 		DWARN(DBG_ALL_LDCS,
4427 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4428 		*size = 0;
4429 		return (EWOULDBLOCK);
4430 	}
4431 
4432 	/*
4433 	 * Make sure that the LDC Tx queue has enough space
4434 	 */
4435 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4436 	    + ldcp->tx_q_entries - 1;
4437 	numavail %= ldcp->tx_q_entries;
4438 
4439 	if (*size > (numavail * ldcp->pkt_payload)) {
4440 		DWARN(DBG_ALL_LDCS,
4441 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4442 		return (EWOULDBLOCK);
4443 	}
4444 
4445 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4446 	    ldcp->id, *size);
4447 
4448 	/* Send the data now */
4449 	bytes_written = 0;
4450 	curr_seqid = ldcp->last_msg_snt;
4451 	start = tx_tail;
4452 
4453 	while (*size > bytes_written) {
4454 
4455 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4456 
4457 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4458 		    ldcmsg->rdata : ldcmsg->udata);
4459 
4460 		ldcmsg->type = LDC_DATA;
4461 		ldcmsg->stype = LDC_INFO;
4462 		ldcmsg->ctrl = 0;
4463 
4464 		remaining = *size - bytes_written;
4465 		len = min(ldcp->pkt_payload, remaining);
4466 		ldcmsg->env = (uint8_t)len;
4467 
4468 		curr_seqid++;
4469 		ldcmsg->seqid = curr_seqid;
4470 
4471 		/* copy the data into pkt */
4472 		bcopy(source, msgbuf, len);
4473 
4474 		source += len;
4475 		bytes_written += len;
4476 
4477 		/* increment tail */
4478 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4479 
4480 		ASSERT(tx_tail != tx_head);
4481 	}
4482 
4483 	/* Set the start and stop bits */
4484 	ldcmsg->env |= LDC_FRAG_STOP;
4485 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4486 	ldcmsg->env |= LDC_FRAG_START;
4487 
4488 	/*
4489 	 * All packets have been copied into the TX queue
4490 	 * update the tail ptr in the HV
4491 	 */
4492 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4493 	if (rv == 0) {
4494 		ldcp->tx_tail = tx_tail;
4495 		ldcp->last_msg_snt = curr_seqid;
4496 		*size = bytes_written;
4497 	} else {
4498 		int rv2;
4499 
4500 		if (rv != EWOULDBLOCK) {
4501 			*size = 0;
4502 			if (mutex_tryenter(&ldcp->lock)) {
4503 				i_ldc_reset(ldcp, B_FALSE);
4504 				mutex_exit(&ldcp->lock);
4505 			} else {
4506 				/*
4507 				 * Release Tx lock, and then reacquire channel
4508 				 * and Tx lock in correct order
4509 				 */
4510 				mutex_exit(&ldcp->tx_lock);
4511 				mutex_enter(&ldcp->lock);
4512 				mutex_enter(&ldcp->tx_lock);
4513 				i_ldc_reset(ldcp, B_FALSE);
4514 				mutex_exit(&ldcp->lock);
4515 			}
4516 			return (ECONNRESET);
4517 		}
4518 
4519 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4520 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4521 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4522 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4523 
4524 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4525 		    &tx_head, &tx_tail, &ldcp->link_state);
4526 
4527 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4528 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4529 		    rv2, tx_head, tx_tail, ldcp->link_state);
4530 
4531 		*size = 0;
4532 	}
4533 
4534 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4535 
4536 	return (rv);
4537 }
4538 
4539 /*
4540  * Write specified amount of bytes to the channel
4541  * in multiple pkts of pkt_payload size. Each
4542  * packet is tagged with an unique packet ID in
4543  * the case of a reliable link.
4544  *
4545  * On return, size contains the number of bytes written.
4546  * This function needs to ensure that the write size is < MTU size
4547  */
4548 static int
4549 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4550 {
4551 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4552 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4553 
4554 	/* Truncate packet to max of MTU size */
4555 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4556 	return (i_ldc_write_packet(ldcp, buf, sizep));
4557 }
4558 
4559 
4560 /*
4561  * Interfaces for channel nexus to register/unregister with LDC module
4562  * The nexus will register functions to be used to register individual
4563  * channels with the nexus and enable interrupts for the channels
4564  */
4565 int
4566 ldc_register(ldc_cnex_t *cinfo)
4567 {
4568 	ldc_chan_t	*ldcp;
4569 
4570 	if (cinfo == NULL || cinfo->dip == NULL ||
4571 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4572 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4573 	    cinfo->clr_intr == NULL) {
4574 
4575 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4576 		return (EINVAL);
4577 	}
4578 
4579 	mutex_enter(&ldcssp->lock);
4580 
4581 	/* nexus registration */
4582 	ldcssp->cinfo.dip = cinfo->dip;
4583 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4584 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4585 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4586 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4587 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4588 
4589 	/* register any channels that might have been previously initialized */
4590 	ldcp = ldcssp->chan_list;
4591 	while (ldcp) {
4592 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4593 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4594 			(void) i_ldc_register_channel(ldcp);
4595 
4596 		ldcp = ldcp->next;
4597 	}
4598 
4599 	mutex_exit(&ldcssp->lock);
4600 
4601 	return (0);
4602 }
4603 
4604 int
4605 ldc_unregister(ldc_cnex_t *cinfo)
4606 {
4607 	if (cinfo == NULL || cinfo->dip == NULL) {
4608 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4609 		return (EINVAL);
4610 	}
4611 
4612 	mutex_enter(&ldcssp->lock);
4613 
4614 	if (cinfo->dip != ldcssp->cinfo.dip) {
4615 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4616 		mutex_exit(&ldcssp->lock);
4617 		return (EINVAL);
4618 	}
4619 
4620 	/* nexus unregister */
4621 	ldcssp->cinfo.dip = NULL;
4622 	ldcssp->cinfo.reg_chan = NULL;
4623 	ldcssp->cinfo.unreg_chan = NULL;
4624 	ldcssp->cinfo.add_intr = NULL;
4625 	ldcssp->cinfo.rem_intr = NULL;
4626 	ldcssp->cinfo.clr_intr = NULL;
4627 
4628 	mutex_exit(&ldcssp->lock);
4629 
4630 	return (0);
4631 }
4632