xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision 0616c1c344750b61fbfd80b1185254b28a9fe60d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * sun4v LDC Link Layer
29  */
30 #include <sys/types.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/cred.h>
35 #include <sys/kmem.h>
36 #include <sys/conf.h>
37 #include <sys/cmn_err.h>
38 #include <sys/ksynch.h>
39 #include <sys/modctl.h>
40 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
41 #include <sys/debug.h>
42 #include <sys/cred.h>
43 #include <sys/promif.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/cyclic.h>
47 #include <sys/machsystm.h>
48 #include <sys/vm.h>
49 #include <sys/cpu.h>
50 #include <sys/intreg.h>
51 #include <sys/machcpuvar.h>
52 #include <sys/mmu.h>
53 #include <sys/pte.h>
54 #include <vm/hat.h>
55 #include <vm/as.h>
56 #include <vm/hat_sfmmu.h>
57 #include <sys/vm_machparam.h>
58 #include <vm/seg_kmem.h>
59 #include <vm/seg_kpm.h>
60 #include <sys/note.h>
61 #include <sys/ivintr.h>
62 #include <sys/hypervisor_api.h>
63 #include <sys/ldc.h>
64 #include <sys/ldc_impl.h>
65 #include <sys/cnex.h>
66 #include <sys/hsvc.h>
67 #include <sys/sdt.h>
68 
69 /* Core internal functions */
70 int i_ldc_h2v_error(int h_error);
71 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
72 
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
76 static void i_ldc_reset_state(ldc_chan_t *ldcp);
77 
78 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
79 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
80 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
81 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
82 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
83     uint8_t ctrlmsg);
84 
85 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
86 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
87 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
88     uint64_t *tail, uint64_t *link_state);
89 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
92     uint64_t rx_tail);
93 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
94 
95 /* Interrupt handling functions */
96 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
97 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
98 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
99     uint64_t *notify_event);
100 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
101 
102 /* Read method functions */
103 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
104 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
105 	size_t *sizep);
106 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 
109 /* Write method functions */
110 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
111 	size_t *sizep);
112 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 
117 /* Pkt processing internal functions */
118 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
119 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
120 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
121 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
122 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 
126 /* LDC Version */
127 static ldc_ver_t ldc_versions[] = { {1, 0} };
128 
129 /* number of supported versions */
130 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
131 
132 /* Invalid value for the ldc_chan_t rx_ack_head field */
133 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
134 
135 
136 /* Module State Pointer */
137 ldc_soft_state_t *ldcssp;
138 
139 static struct modldrv md = {
140 	&mod_miscops,			/* This is a misc module */
141 	"sun4v LDC module",		/* Name of the module */
142 };
143 
144 static struct modlinkage ml = {
145 	MODREV_1,
146 	&md,
147 	NULL
148 };
149 
150 static uint64_t ldc_sup_minor;		/* Supported minor number */
151 static hsvc_info_t ldc_hsvc = {
152 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 1, "ldc"
153 };
154 
155 /*
156  * The no. of MTU size messages that can be stored in
157  * the LDC Tx queue. The number of Tx queue entries is
158  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
159  */
160 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
161 
162 /*
163  * The minimum queue length. This is the size of the smallest
164  * LDC queue. If the computed value is less than this default,
165  * the queue length is rounded up to 'ldc_queue_entries'.
166  */
167 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
168 
169 /*
170  * The length of the reliable-mode data queue in terms of the LDC
171  * receive queue length. i.e., the number of times larger than the
172  * LDC receive queue that the data queue should be. The HV receive
173  * queue is required to be a power of 2 and this implementation
174  * assumes the data queue will also be a power of 2. By making the
175  * multiplier a power of 2, we ensure the data queue will be a
176  * power of 2. We use a multiplier because the receive queue is
177  * sized to be sane relative to the MTU and the same is needed for
178  * the data queue.
179  */
180 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
181 
182 /*
183  * LDC retry count and delay - when the HV returns EWOULDBLOCK
184  * the operation is retried 'ldc_max_retries' times with a
185  * wait of 'ldc_delay' usecs between each retry.
186  */
187 int ldc_max_retries = LDC_MAX_RETRIES;
188 clock_t ldc_delay = LDC_DELAY;
189 
190 /*
191  * delay between each retry of channel unregistration in
192  * ldc_close(), to wait for pending interrupts to complete.
193  */
194 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
195 
196 #ifdef DEBUG
197 
198 /*
199  * Print debug messages
200  *
201  * set ldcdbg to 0x7 for enabling all msgs
202  * 0x4 - Warnings
203  * 0x2 - All debug messages
204  * 0x1 - Minimal debug messages
205  *
206  * set ldcdbgchan to the channel number you want to debug
207  * setting it to -1 prints debug messages for all channels
208  * NOTE: ldcdbgchan has no effect on error messages
209  */
210 
211 int ldcdbg = 0x0;
212 int64_t ldcdbgchan = DBG_ALL_LDCS;
213 uint64_t ldc_inject_err_flag = 0;
214 
215 void
216 ldcdebug(int64_t id, const char *fmt, ...)
217 {
218 	char buf[512];
219 	va_list ap;
220 
221 	/*
222 	 * Do not return if,
223 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
224 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
225 	 * debug channel = caller specified channel
226 	 */
227 	if ((id != DBG_ALL_LDCS) &&
228 	    (ldcdbgchan != DBG_ALL_LDCS) &&
229 	    (ldcdbgchan != id)) {
230 		return;
231 	}
232 
233 	va_start(ap, fmt);
234 	(void) vsprintf(buf, fmt, ap);
235 	va_end(ap);
236 
237 	cmn_err(CE_CONT, "?%s", buf);
238 }
239 
240 #define	LDC_ERR_RESET		0x1
241 #define	LDC_ERR_PKTLOSS		0x2
242 #define	LDC_ERR_DQFULL		0x4
243 #define	LDC_ERR_DRNGCLEAR	0x8
244 
245 static boolean_t
246 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
247 {
248 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
249 		return (B_FALSE);
250 
251 	if ((ldc_inject_err_flag & error) == 0)
252 		return (B_FALSE);
253 
254 	/* clear the injection state */
255 	ldc_inject_err_flag &= ~error;
256 
257 	return (B_TRUE);
258 }
259 
260 #define	D1		\
261 if (ldcdbg & 0x01)	\
262 	ldcdebug
263 
264 #define	D2		\
265 if (ldcdbg & 0x02)	\
266 	ldcdebug
267 
268 #define	DWARN		\
269 if (ldcdbg & 0x04)	\
270 	ldcdebug
271 
272 #define	DUMP_PAYLOAD(id, addr)						\
273 {									\
274 	char buf[65*3];							\
275 	int i;								\
276 	uint8_t *src = (uint8_t *)addr;					\
277 	for (i = 0; i < 64; i++, src++)					\
278 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
279 	(void) sprintf(&buf[i * 3], "|\n");				\
280 	D2((id), "payload: %s", buf);					\
281 }
282 
283 #define	DUMP_LDC_PKT(c, s, addr)					\
284 {									\
285 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
286 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
287 	if (msg->type == LDC_DATA) {                                    \
288 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
289 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
290 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
291 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
292 	    (msg->env & LDC_LEN_MASK));					\
293 	} else { 							\
294 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
295 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
296 	} 								\
297 }
298 
299 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
300 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
301 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
302 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
303 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
304 
305 #else
306 
307 #define	DBG_ALL_LDCS -1
308 
309 #define	D1
310 #define	D2
311 #define	DWARN
312 
313 #define	DUMP_PAYLOAD(id, addr)
314 #define	DUMP_LDC_PKT(c, s, addr)
315 
316 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
317 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
318 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
319 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
320 
321 #endif
322 
323 /*
324  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
325  * lengths. Just pass the head, tail, and entries values so that the
326  * length can be calculated in a dtrace script when the probe is enabled.
327  */
328 #define	TRACE_RXDQ_LENGTH(ldcp)						\
329 	DTRACE_PROBE4(rxdq__size,					\
330 	uint64_t, ldcp->id,						\
331 	uint64_t, ldcp->rx_dq_head,					\
332 	uint64_t, ldcp->rx_dq_tail,					\
333 	uint64_t, ldcp->rx_dq_entries)
334 
335 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
336 	DTRACE_PROBE4(rxhvq__size,					\
337 	uint64_t, ldcp->id,						\
338 	uint64_t, head,							\
339 	uint64_t, tail,							\
340 	uint64_t, ldcp->rx_q_entries)
341 
342 /* A dtrace SDT probe to ease tracing of data queue copy operations */
343 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
344 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
345 
346 /* The amount of contiguous space at the tail of the queue */
347 #define	Q_CONTIG_SPACE(head, tail, size)				\
348 	((head) <= (tail) ? ((size) - (tail)) :				\
349 	((head) - (tail) - LDC_PACKET_SIZE))
350 
351 #define	ZERO_PKT(p)			\
352 	bzero((p), sizeof (ldc_msg_t));
353 
354 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
355 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
356 
357 int
358 _init(void)
359 {
360 	int status;
361 	extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
362 
363 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
364 	if (status != 0) {
365 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
366 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
367 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
368 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
369 		return (-1);
370 	}
371 
372 	/* Initialize shared memory HV API version checking */
373 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
374 
375 	/* allocate soft state structure */
376 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
377 
378 	/* Link the module into the system */
379 	status = mod_install(&ml);
380 	if (status != 0) {
381 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
382 		return (status);
383 	}
384 
385 	/* Initialize the LDC state structure */
386 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
387 
388 	mutex_enter(&ldcssp->lock);
389 
390 	/* Create a cache for memory handles */
391 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
392 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
393 	if (ldcssp->memhdl_cache == NULL) {
394 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
395 		mutex_exit(&ldcssp->lock);
396 		return (-1);
397 	}
398 
399 	/* Create cache for memory segment structures */
400 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
401 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
402 	if (ldcssp->memseg_cache == NULL) {
403 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
404 		mutex_exit(&ldcssp->lock);
405 		return (-1);
406 	}
407 
408 
409 	ldcssp->channel_count = 0;
410 	ldcssp->channels_open = 0;
411 	ldcssp->chan_list = NULL;
412 	ldcssp->dring_list = NULL;
413 
414 	mutex_exit(&ldcssp->lock);
415 
416 	return (0);
417 }
418 
419 int
420 _info(struct modinfo *modinfop)
421 {
422 	/* Report status of the dynamically loadable driver module */
423 	return (mod_info(&ml, modinfop));
424 }
425 
426 int
427 _fini(void)
428 {
429 	int 		rv, status;
430 	ldc_chan_t 	*tmp_ldcp, *ldcp;
431 	ldc_dring_t 	*tmp_dringp, *dringp;
432 	ldc_mem_info_t 	minfo;
433 
434 	/* Unlink the driver module from the system */
435 	status = mod_remove(&ml);
436 	if (status) {
437 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
438 		return (EIO);
439 	}
440 
441 	/* Free descriptor rings */
442 	dringp = ldcssp->dring_list;
443 	while (dringp != NULL) {
444 		tmp_dringp = dringp->next;
445 
446 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
447 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
448 			if (minfo.status == LDC_BOUND) {
449 				(void) ldc_mem_dring_unbind(
450 				    (ldc_dring_handle_t)dringp);
451 			}
452 			if (minfo.status == LDC_MAPPED) {
453 				(void) ldc_mem_dring_unmap(
454 				    (ldc_dring_handle_t)dringp);
455 			}
456 		}
457 
458 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
459 		dringp = tmp_dringp;
460 	}
461 	ldcssp->dring_list = NULL;
462 
463 	/* close and finalize channels */
464 	ldcp = ldcssp->chan_list;
465 	while (ldcp != NULL) {
466 		tmp_ldcp = ldcp->next;
467 
468 		(void) ldc_close((ldc_handle_t)ldcp);
469 		(void) ldc_fini((ldc_handle_t)ldcp);
470 
471 		ldcp = tmp_ldcp;
472 	}
473 	ldcssp->chan_list = NULL;
474 
475 	/* Destroy kmem caches */
476 	kmem_cache_destroy(ldcssp->memhdl_cache);
477 	kmem_cache_destroy(ldcssp->memseg_cache);
478 
479 	/*
480 	 * We have successfully "removed" the driver.
481 	 * Destroying soft states
482 	 */
483 	mutex_destroy(&ldcssp->lock);
484 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
485 
486 	(void) hsvc_unregister(&ldc_hsvc);
487 
488 	return (status);
489 }
490 
491 /* -------------------------------------------------------------------------- */
492 
493 /*
494  * LDC Link Layer Internal Functions
495  */
496 
497 /*
498  * Translate HV Errors to sun4v error codes
499  */
500 int
501 i_ldc_h2v_error(int h_error)
502 {
503 	switch (h_error) {
504 
505 	case	H_EOK:
506 		return (0);
507 
508 	case	H_ENORADDR:
509 		return (EFAULT);
510 
511 	case	H_EBADPGSZ:
512 	case	H_EINVAL:
513 		return (EINVAL);
514 
515 	case	H_EWOULDBLOCK:
516 		return (EWOULDBLOCK);
517 
518 	case	H_ENOACCESS:
519 	case	H_ENOMAP:
520 		return (EACCES);
521 
522 	case	H_EIO:
523 	case	H_ECPUERROR:
524 		return (EIO);
525 
526 	case	H_ENOTSUPPORTED:
527 		return (ENOTSUP);
528 
529 	case 	H_ETOOMANY:
530 		return (ENOSPC);
531 
532 	case	H_ECHANNEL:
533 		return (ECHRNG);
534 	default:
535 		break;
536 	}
537 
538 	return (EIO);
539 }
540 
541 /*
542  * Reconfigure the transmit queue
543  */
544 static int
545 i_ldc_txq_reconf(ldc_chan_t *ldcp)
546 {
547 	int rv;
548 
549 	ASSERT(MUTEX_HELD(&ldcp->lock));
550 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
551 
552 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
553 	if (rv) {
554 		cmn_err(CE_WARN,
555 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
556 		return (EIO);
557 	}
558 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
559 	    &(ldcp->tx_tail), &(ldcp->link_state));
560 	if (rv) {
561 		cmn_err(CE_WARN,
562 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
563 		return (EIO);
564 	}
565 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
566 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
567 	    ldcp->link_state);
568 
569 	return (0);
570 }
571 
572 /*
573  * Reconfigure the receive queue
574  */
575 static int
576 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
577 {
578 	int rv;
579 	uint64_t rx_head, rx_tail;
580 
581 	ASSERT(MUTEX_HELD(&ldcp->lock));
582 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
583 	    &(ldcp->link_state));
584 	if (rv) {
585 		cmn_err(CE_WARN,
586 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
587 		    ldcp->id);
588 		return (EIO);
589 	}
590 
591 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
592 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
593 		    ldcp->rx_q_entries);
594 		if (rv) {
595 			cmn_err(CE_WARN,
596 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
597 			    ldcp->id);
598 			return (EIO);
599 		}
600 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
601 		    ldcp->id);
602 	}
603 
604 	return (0);
605 }
606 
607 
608 /*
609  * Drain the contents of the receive queue
610  */
611 static int
612 i_ldc_rxq_drain(ldc_chan_t *ldcp)
613 {
614 	int rv;
615 	uint64_t rx_head, rx_tail;
616 
617 	ASSERT(MUTEX_HELD(&ldcp->lock));
618 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
619 	    &(ldcp->link_state));
620 	if (rv) {
621 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
622 		    ldcp->id);
623 		return (EIO);
624 	}
625 
626 	/* flush contents by setting the head = tail */
627 	return (i_ldc_set_rx_head(ldcp, rx_tail));
628 }
629 
630 
631 /*
632  * Reset LDC state structure and its contents
633  */
634 static void
635 i_ldc_reset_state(ldc_chan_t *ldcp)
636 {
637 	ASSERT(MUTEX_HELD(&ldcp->lock));
638 	ldcp->last_msg_snt = LDC_INIT_SEQID;
639 	ldcp->last_ack_rcd = 0;
640 	ldcp->last_msg_rcd = 0;
641 	ldcp->tx_ackd_head = ldcp->tx_head;
642 	ldcp->stream_remains = 0;
643 	ldcp->next_vidx = 0;
644 	ldcp->hstate = 0;
645 	ldcp->tstate = TS_OPEN;
646 	ldcp->status = LDC_OPEN;
647 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
648 	ldcp->rx_dq_head = 0;
649 	ldcp->rx_dq_tail = 0;
650 
651 	if (ldcp->link_state == LDC_CHANNEL_UP ||
652 	    ldcp->link_state == LDC_CHANNEL_RESET) {
653 
654 		if (ldcp->mode == LDC_MODE_RAW) {
655 			ldcp->status = LDC_UP;
656 			ldcp->tstate = TS_UP;
657 		} else {
658 			ldcp->status = LDC_READY;
659 			ldcp->tstate |= TS_LINK_READY;
660 		}
661 	}
662 }
663 
664 /*
665  * Reset a LDC channel
666  */
667 void
668 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
669 {
670 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
671 
672 	ASSERT(MUTEX_HELD(&ldcp->lock));
673 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
674 
675 	/* reconfig Tx and Rx queues */
676 	(void) i_ldc_txq_reconf(ldcp);
677 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
678 
679 	/* Clear Tx and Rx interrupts */
680 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
681 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
682 
683 	/* Reset channel state */
684 	i_ldc_reset_state(ldcp);
685 
686 	/* Mark channel in reset */
687 	ldcp->tstate |= TS_IN_RESET;
688 }
689 
690 
691 /*
692  * Clear pending interrupts
693  */
694 static void
695 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
696 {
697 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
698 
699 	ASSERT(MUTEX_HELD(&ldcp->lock));
700 	ASSERT(cinfo->dip != NULL);
701 
702 	switch (itype) {
703 	case CNEX_TX_INTR:
704 		/* check Tx interrupt */
705 		if (ldcp->tx_intr_state)
706 			ldcp->tx_intr_state = LDC_INTR_NONE;
707 		else
708 			return;
709 		break;
710 
711 	case CNEX_RX_INTR:
712 		/* check Rx interrupt */
713 		if (ldcp->rx_intr_state)
714 			ldcp->rx_intr_state = LDC_INTR_NONE;
715 		else
716 			return;
717 		break;
718 	}
719 
720 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
721 	D2(ldcp->id,
722 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
723 	    ldcp->id, itype);
724 }
725 
726 /*
727  * Set the receive queue head
728  * Resets connection and returns an error if it fails.
729  */
730 static int
731 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
732 {
733 	int 	rv;
734 	int 	retries;
735 
736 	ASSERT(MUTEX_HELD(&ldcp->lock));
737 	for (retries = 0; retries < ldc_max_retries; retries++) {
738 
739 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
740 			return (0);
741 
742 		if (rv != H_EWOULDBLOCK)
743 			break;
744 
745 		/* wait for ldc_delay usecs */
746 		drv_usecwait(ldc_delay);
747 	}
748 
749 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
750 	    ldcp->id, head);
751 	mutex_enter(&ldcp->tx_lock);
752 	i_ldc_reset(ldcp, B_TRUE);
753 	mutex_exit(&ldcp->tx_lock);
754 
755 	return (ECONNRESET);
756 }
757 
758 /*
759  * Returns the tx_head to be used for transfer
760  */
761 static void
762 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
763 {
764 	ldc_msg_t 	*pkt;
765 
766 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
767 
768 	/* get current Tx head */
769 	*head = ldcp->tx_head;
770 
771 	/*
772 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
773 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
774 	 * up to the current location of tx_head. This needs to be done
775 	 * as the peer will only ACK DATA/INFO pkts.
776 	 */
777 	if (ldcp->mode == LDC_MODE_RELIABLE) {
778 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
779 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
780 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
781 				break;
782 			}
783 			/* advance ACKd head */
784 			ldcp->tx_ackd_head =
785 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
786 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
787 		}
788 		*head = ldcp->tx_ackd_head;
789 	}
790 }
791 
792 /*
793  * Returns the tx_tail to be used for transfer
794  * Re-reads the TX queue ptrs if and only if the
795  * the cached head and tail are equal (queue is full)
796  */
797 static int
798 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
799 {
800 	int 		rv;
801 	uint64_t 	current_head, new_tail;
802 
803 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
804 	/* Read the head and tail ptrs from HV */
805 	rv = hv_ldc_tx_get_state(ldcp->id,
806 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
807 	if (rv) {
808 		cmn_err(CE_WARN,
809 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
810 		    ldcp->id);
811 		return (EIO);
812 	}
813 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
814 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
815 		    ldcp->id);
816 		return (ECONNRESET);
817 	}
818 
819 	i_ldc_get_tx_head(ldcp, &current_head);
820 
821 	/* increment the tail */
822 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
823 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
824 
825 	if (new_tail == current_head) {
826 		DWARN(ldcp->id,
827 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
828 		    ldcp->id);
829 		return (EWOULDBLOCK);
830 	}
831 
832 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
833 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
834 
835 	*tail = ldcp->tx_tail;
836 	return (0);
837 }
838 
839 /*
840  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
841  * and retry ldc_max_retries times before returning an error.
842  * Returns 0, EWOULDBLOCK or EIO
843  */
844 static int
845 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
846 {
847 	int		rv, retval = EWOULDBLOCK;
848 	int 		retries;
849 
850 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
851 	for (retries = 0; retries < ldc_max_retries; retries++) {
852 
853 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
854 			retval = 0;
855 			break;
856 		}
857 		if (rv != H_EWOULDBLOCK) {
858 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
859 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
860 			retval = EIO;
861 			break;
862 		}
863 
864 		/* wait for ldc_delay usecs */
865 		drv_usecwait(ldc_delay);
866 	}
867 	return (retval);
868 }
869 
870 /*
871  * Copy a data packet from the HV receive queue to the data queue.
872  * Caller must ensure that the data queue is not already full.
873  *
874  * The *head argument represents the current head pointer for the HV
875  * receive queue. After copying a packet from the HV receive queue,
876  * the *head pointer will be updated. This allows the caller to update
877  * the head pointer in HV using the returned *head value.
878  */
879 void
880 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
881 {
882 	uint64_t	q_size, dq_size;
883 
884 	ASSERT(MUTEX_HELD(&ldcp->lock));
885 
886 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
887 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
888 
889 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
890 	    dq_size) >= LDC_PACKET_SIZE);
891 
892 	bcopy((void *)(ldcp->rx_q_va + *head),
893 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
894 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
895 
896 	/* Update rx head */
897 	*head = (*head + LDC_PACKET_SIZE) % q_size;
898 
899 	/* Update dq tail */
900 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
901 }
902 
903 /*
904  * Update the Rx data queue head pointer
905  */
906 static int
907 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
908 {
909 	ldcp->rx_dq_head = head;
910 	return (0);
911 }
912 
913 /*
914  * Get the Rx data queue head and tail pointers
915  */
916 static uint64_t
917 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
918     uint64_t *link_state)
919 {
920 	_NOTE(ARGUNUSED(link_state))
921 	*head = ldcp->rx_dq_head;
922 	*tail = ldcp->rx_dq_tail;
923 	return (0);
924 }
925 
926 /*
927  * Wrapper for the Rx HV queue set head function. Giving the
928  * data queue and HV queue set head functions the same type.
929  */
930 static uint64_t
931 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
932     uint64_t *link_state)
933 {
934 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
935 	    link_state)));
936 }
937 
938 /*
939  * LDC receive interrupt handler
940  *    triggered for channel with data pending to read
941  *    i.e. Rx queue content changes
942  */
943 static uint_t
944 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
945 {
946 	_NOTE(ARGUNUSED(arg2))
947 
948 	ldc_chan_t	*ldcp;
949 	boolean_t	notify;
950 	uint64_t	event;
951 	int		rv, status;
952 
953 	/* Get the channel for which interrupt was received */
954 	if (arg1 == NULL) {
955 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
956 		return (DDI_INTR_UNCLAIMED);
957 	}
958 
959 	ldcp = (ldc_chan_t *)arg1;
960 
961 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
962 	    ldcp->id, ldcp);
963 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
964 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
965 	    ldcp->link_state);
966 
967 	/* Lock channel */
968 	mutex_enter(&ldcp->lock);
969 
970 	/* Mark the interrupt as being actively handled */
971 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
972 
973 	status = i_ldc_rx_process_hvq(ldcp, &notify, &event);
974 
975 	if (ldcp->mode != LDC_MODE_RELIABLE) {
976 		/*
977 		 * If there are no data packets on the queue, clear
978 		 * the interrupt. Otherwise, the ldc_read will clear
979 		 * interrupts after draining the queue. To indicate the
980 		 * interrupt has not yet been cleared, it is marked
981 		 * as pending.
982 		 */
983 		if ((event & LDC_EVT_READ) == 0) {
984 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
985 		} else {
986 			ldcp->rx_intr_state = LDC_INTR_PEND;
987 		}
988 	}
989 
990 	/* if callbacks are disabled, do not notify */
991 	if (notify && ldcp->cb_enabled) {
992 		ldcp->cb_inprogress = B_TRUE;
993 		mutex_exit(&ldcp->lock);
994 		rv = ldcp->cb(event, ldcp->cb_arg);
995 		if (rv) {
996 			DWARN(ldcp->id,
997 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
998 			    ldcp->id);
999 		}
1000 		mutex_enter(&ldcp->lock);
1001 		ldcp->cb_inprogress = B_FALSE;
1002 	}
1003 
1004 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1005 		if (status == ENOSPC) {
1006 			/*
1007 			 * Here, ENOSPC indicates the secondary data
1008 			 * queue is full and the Rx queue is non-empty.
1009 			 * Much like how reliable and raw modes are
1010 			 * handled above, since the Rx queue is non-
1011 			 * empty, we mark the interrupt as pending to
1012 			 * indicate it has not yet been cleared.
1013 			 */
1014 			ldcp->rx_intr_state = LDC_INTR_PEND;
1015 		} else {
1016 			/*
1017 			 * We have processed all CTRL packets and
1018 			 * copied all DATA packets to the secondary
1019 			 * queue. Clear the interrupt.
1020 			 */
1021 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1022 		}
1023 	}
1024 
1025 	mutex_exit(&ldcp->lock);
1026 
1027 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1028 
1029 	return (DDI_INTR_CLAIMED);
1030 }
1031 
1032 /*
1033  * Wrapper for the Rx HV queue processing function to be used when
1034  * checking the Rx HV queue for data packets. Unlike the interrupt
1035  * handler code flow, the Rx interrupt is not cleared here and
1036  * callbacks are not made.
1037  */
1038 static uint_t
1039 i_ldc_chkq(ldc_chan_t *ldcp)
1040 {
1041 	boolean_t	notify;
1042 	uint64_t	event;
1043 
1044 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1045 }
1046 
1047 /*
1048  * Send a LDC message
1049  */
1050 static int
1051 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1052     uint8_t ctrlmsg)
1053 {
1054 	int		rv;
1055 	ldc_msg_t 	*pkt;
1056 	uint64_t	tx_tail;
1057 	uint32_t	curr_seqid;
1058 
1059 	/* Obtain Tx lock */
1060 	mutex_enter(&ldcp->tx_lock);
1061 
1062 	curr_seqid = ldcp->last_msg_snt;
1063 
1064 	/* get the current tail for the message */
1065 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1066 	if (rv) {
1067 		DWARN(ldcp->id,
1068 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1069 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1070 		    ldcp->id, pkttype, subtype, ctrlmsg);
1071 		mutex_exit(&ldcp->tx_lock);
1072 		return (rv);
1073 	}
1074 
1075 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1076 	ZERO_PKT(pkt);
1077 
1078 	/* Initialize the packet */
1079 	pkt->type = pkttype;
1080 	pkt->stype = subtype;
1081 	pkt->ctrl = ctrlmsg;
1082 
1083 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1084 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1085 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1086 		curr_seqid++;
1087 		if (ldcp->mode != LDC_MODE_RAW) {
1088 			pkt->seqid = curr_seqid;
1089 			pkt->ackid = ldcp->last_msg_rcd;
1090 		}
1091 	}
1092 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1093 
1094 	/* initiate the send by calling into HV and set the new tail */
1095 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1096 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1097 
1098 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1099 	if (rv) {
1100 		DWARN(ldcp->id,
1101 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1102 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1103 		    ldcp->id, pkttype, subtype, ctrlmsg);
1104 		mutex_exit(&ldcp->tx_lock);
1105 		return (EIO);
1106 	}
1107 
1108 	ldcp->last_msg_snt = curr_seqid;
1109 	ldcp->tx_tail = tx_tail;
1110 
1111 	mutex_exit(&ldcp->tx_lock);
1112 	return (0);
1113 }
1114 
1115 /*
1116  * Checks if packet was received in right order
1117  * in the case of a reliable link.
1118  * Returns 0 if in order, else EIO
1119  */
1120 static int
1121 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1122 {
1123 	/* No seqid checking for RAW mode */
1124 	if (ldcp->mode == LDC_MODE_RAW)
1125 		return (0);
1126 
1127 	/* No seqid checking for version, RTS, RTR message */
1128 	if (msg->ctrl == LDC_VER ||
1129 	    msg->ctrl == LDC_RTS ||
1130 	    msg->ctrl == LDC_RTR)
1131 		return (0);
1132 
1133 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1134 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1135 		DWARN(ldcp->id,
1136 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1137 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1138 		    (ldcp->last_msg_rcd + 1));
1139 		return (EIO);
1140 	}
1141 
1142 #ifdef DEBUG
1143 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1144 		DWARN(ldcp->id,
1145 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1146 		return (EIO);
1147 	}
1148 #endif
1149 
1150 	return (0);
1151 }
1152 
1153 
1154 /*
1155  * Process an incoming version ctrl message
1156  */
1157 static int
1158 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1159 {
1160 	int 		rv = 0, idx = ldcp->next_vidx;
1161 	ldc_msg_t 	*pkt;
1162 	uint64_t	tx_tail;
1163 	ldc_ver_t	*rcvd_ver;
1164 
1165 	/* get the received version */
1166 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1167 
1168 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1169 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1170 
1171 	/* Obtain Tx lock */
1172 	mutex_enter(&ldcp->tx_lock);
1173 
1174 	switch (msg->stype) {
1175 	case LDC_INFO:
1176 
1177 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1178 			(void) i_ldc_txq_reconf(ldcp);
1179 			i_ldc_reset_state(ldcp);
1180 			mutex_exit(&ldcp->tx_lock);
1181 			return (EAGAIN);
1182 		}
1183 
1184 		/* get the current tail and pkt for the response */
1185 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1186 		if (rv != 0) {
1187 			DWARN(ldcp->id,
1188 			    "i_ldc_process_VER: (0x%llx) err sending "
1189 			    "version ACK/NACK\n", ldcp->id);
1190 			i_ldc_reset(ldcp, B_TRUE);
1191 			mutex_exit(&ldcp->tx_lock);
1192 			return (ECONNRESET);
1193 		}
1194 
1195 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1196 		ZERO_PKT(pkt);
1197 
1198 		/* initialize the packet */
1199 		pkt->type = LDC_CTRL;
1200 		pkt->ctrl = LDC_VER;
1201 
1202 		for (;;) {
1203 
1204 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1205 			    rcvd_ver->major, rcvd_ver->minor,
1206 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1207 
1208 			if (rcvd_ver->major == ldc_versions[idx].major) {
1209 				/* major version match - ACK version */
1210 				pkt->stype = LDC_ACK;
1211 
1212 				/*
1213 				 * lower minor version to the one this endpt
1214 				 * supports, if necessary
1215 				 */
1216 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1217 					rcvd_ver->minor =
1218 					    ldc_versions[idx].minor;
1219 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1220 
1221 				break;
1222 			}
1223 
1224 			if (rcvd_ver->major > ldc_versions[idx].major) {
1225 
1226 				D1(ldcp->id, "i_ldc_process_VER: using next"
1227 				    " lower idx=%d, v%u.%u\n", idx,
1228 				    ldc_versions[idx].major,
1229 				    ldc_versions[idx].minor);
1230 
1231 				/* nack with next lower version */
1232 				pkt->stype = LDC_NACK;
1233 				bcopy(&ldc_versions[idx], pkt->udata,
1234 				    sizeof (ldc_versions[idx]));
1235 				ldcp->next_vidx = idx;
1236 				break;
1237 			}
1238 
1239 			/* next major version */
1240 			idx++;
1241 
1242 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1243 
1244 			if (idx == LDC_NUM_VERS) {
1245 				/* no version match - send NACK */
1246 				pkt->stype = LDC_NACK;
1247 				bzero(pkt->udata, sizeof (ldc_ver_t));
1248 				ldcp->next_vidx = 0;
1249 				break;
1250 			}
1251 		}
1252 
1253 		/* initiate the send by calling into HV and set the new tail */
1254 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1255 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1256 
1257 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1258 		if (rv == 0) {
1259 			ldcp->tx_tail = tx_tail;
1260 			if (pkt->stype == LDC_ACK) {
1261 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1262 				    " version ACK\n", ldcp->id);
1263 				/* Save the ACK'd version */
1264 				ldcp->version.major = rcvd_ver->major;
1265 				ldcp->version.minor = rcvd_ver->minor;
1266 				ldcp->hstate |= TS_RCVD_VER;
1267 				ldcp->tstate |= TS_VER_DONE;
1268 				D1(DBG_ALL_LDCS,
1269 				    "(0x%llx) Sent ACK, "
1270 				    "Agreed on version v%u.%u\n",
1271 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1272 			}
1273 		} else {
1274 			DWARN(ldcp->id,
1275 			    "i_ldc_process_VER: (0x%llx) error sending "
1276 			    "ACK/NACK\n", ldcp->id);
1277 			i_ldc_reset(ldcp, B_TRUE);
1278 			mutex_exit(&ldcp->tx_lock);
1279 			return (ECONNRESET);
1280 		}
1281 
1282 		break;
1283 
1284 	case LDC_ACK:
1285 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1286 			if (ldcp->version.major != rcvd_ver->major ||
1287 			    ldcp->version.minor != rcvd_ver->minor) {
1288 
1289 				/* mismatched version - reset connection */
1290 				DWARN(ldcp->id,
1291 				    "i_ldc_process_VER: (0x%llx) recvd"
1292 				    " ACK ver != sent ACK ver\n", ldcp->id);
1293 				i_ldc_reset(ldcp, B_TRUE);
1294 				mutex_exit(&ldcp->tx_lock);
1295 				return (ECONNRESET);
1296 			}
1297 		} else {
1298 			/* SUCCESS - we have agreed on a version */
1299 			ldcp->version.major = rcvd_ver->major;
1300 			ldcp->version.minor = rcvd_ver->minor;
1301 			ldcp->tstate |= TS_VER_DONE;
1302 		}
1303 
1304 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1305 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1306 
1307 		/* initiate RTS-RTR-RDX handshake */
1308 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1309 		if (rv) {
1310 			DWARN(ldcp->id,
1311 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1312 			    ldcp->id);
1313 			i_ldc_reset(ldcp, B_TRUE);
1314 			mutex_exit(&ldcp->tx_lock);
1315 			return (ECONNRESET);
1316 		}
1317 
1318 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1319 		ZERO_PKT(pkt);
1320 
1321 		pkt->type = LDC_CTRL;
1322 		pkt->stype = LDC_INFO;
1323 		pkt->ctrl = LDC_RTS;
1324 		pkt->env = ldcp->mode;
1325 		if (ldcp->mode != LDC_MODE_RAW)
1326 			pkt->seqid = LDC_INIT_SEQID;
1327 
1328 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1329 
1330 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1331 
1332 		/* initiate the send by calling into HV and set the new tail */
1333 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1334 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1335 
1336 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1337 		if (rv) {
1338 			D2(ldcp->id,
1339 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1340 			    ldcp->id);
1341 			i_ldc_reset(ldcp, B_TRUE);
1342 			mutex_exit(&ldcp->tx_lock);
1343 			return (ECONNRESET);
1344 		}
1345 
1346 		ldcp->tx_tail = tx_tail;
1347 		ldcp->hstate |= TS_SENT_RTS;
1348 
1349 		break;
1350 
1351 	case LDC_NACK:
1352 		/* check if version in NACK is zero */
1353 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1354 			/* version handshake failure */
1355 			DWARN(DBG_ALL_LDCS,
1356 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1357 			    ldcp->id);
1358 			i_ldc_reset(ldcp, B_TRUE);
1359 			mutex_exit(&ldcp->tx_lock);
1360 			return (ECONNRESET);
1361 		}
1362 
1363 		/* get the current tail and pkt for the response */
1364 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1365 		if (rv != 0) {
1366 			cmn_err(CE_NOTE,
1367 			    "i_ldc_process_VER: (0x%lx) err sending "
1368 			    "version ACK/NACK\n", ldcp->id);
1369 			i_ldc_reset(ldcp, B_TRUE);
1370 			mutex_exit(&ldcp->tx_lock);
1371 			return (ECONNRESET);
1372 		}
1373 
1374 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1375 		ZERO_PKT(pkt);
1376 
1377 		/* initialize the packet */
1378 		pkt->type = LDC_CTRL;
1379 		pkt->ctrl = LDC_VER;
1380 		pkt->stype = LDC_INFO;
1381 
1382 		/* check ver in NACK msg has a match */
1383 		for (;;) {
1384 			if (rcvd_ver->major == ldc_versions[idx].major) {
1385 				/*
1386 				 * major version match - resubmit request
1387 				 * if lower minor version to the one this endpt
1388 				 * supports, if necessary
1389 				 */
1390 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1391 					rcvd_ver->minor =
1392 					    ldc_versions[idx].minor;
1393 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1394 				break;
1395 			}
1396 
1397 			if (rcvd_ver->major > ldc_versions[idx].major) {
1398 
1399 				D1(ldcp->id, "i_ldc_process_VER: using next"
1400 				    " lower idx=%d, v%u.%u\n", idx,
1401 				    ldc_versions[idx].major,
1402 				    ldc_versions[idx].minor);
1403 
1404 				/* send next lower version */
1405 				bcopy(&ldc_versions[idx], pkt->udata,
1406 				    sizeof (ldc_versions[idx]));
1407 				ldcp->next_vidx = idx;
1408 				break;
1409 			}
1410 
1411 			/* next version */
1412 			idx++;
1413 
1414 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1415 
1416 			if (idx == LDC_NUM_VERS) {
1417 				/* no version match - terminate */
1418 				ldcp->next_vidx = 0;
1419 				mutex_exit(&ldcp->tx_lock);
1420 				return (ECONNRESET);
1421 			}
1422 		}
1423 
1424 		/* initiate the send by calling into HV and set the new tail */
1425 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1426 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1427 
1428 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1429 		if (rv == 0) {
1430 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1431 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1432 			    ldc_versions[idx].minor);
1433 			ldcp->tx_tail = tx_tail;
1434 		} else {
1435 			cmn_err(CE_NOTE,
1436 			    "i_ldc_process_VER: (0x%lx) error sending version"
1437 			    "INFO\n", ldcp->id);
1438 			i_ldc_reset(ldcp, B_TRUE);
1439 			mutex_exit(&ldcp->tx_lock);
1440 			return (ECONNRESET);
1441 		}
1442 
1443 		break;
1444 	}
1445 
1446 	mutex_exit(&ldcp->tx_lock);
1447 	return (rv);
1448 }
1449 
1450 
1451 /*
1452  * Process an incoming RTS ctrl message
1453  */
1454 static int
1455 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1456 {
1457 	int 		rv = 0;
1458 	ldc_msg_t 	*pkt;
1459 	uint64_t	tx_tail;
1460 	boolean_t	sent_NACK = B_FALSE;
1461 
1462 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1463 
1464 	switch (msg->stype) {
1465 	case LDC_NACK:
1466 		DWARN(ldcp->id,
1467 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1468 		    ldcp->id);
1469 
1470 		/* Reset the channel -- as we cannot continue */
1471 		mutex_enter(&ldcp->tx_lock);
1472 		i_ldc_reset(ldcp, B_TRUE);
1473 		mutex_exit(&ldcp->tx_lock);
1474 		rv = ECONNRESET;
1475 		break;
1476 
1477 	case LDC_INFO:
1478 
1479 		/* check mode */
1480 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1481 			cmn_err(CE_NOTE,
1482 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1483 			    ldcp->id);
1484 			/*
1485 			 * send NACK in response to MODE message
1486 			 * get the current tail for the response
1487 			 */
1488 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1489 			if (rv) {
1490 				/* if cannot send NACK - reset channel */
1491 				mutex_enter(&ldcp->tx_lock);
1492 				i_ldc_reset(ldcp, B_TRUE);
1493 				mutex_exit(&ldcp->tx_lock);
1494 				rv = ECONNRESET;
1495 				break;
1496 			}
1497 			sent_NACK = B_TRUE;
1498 		}
1499 		break;
1500 	default:
1501 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1502 		    ldcp->id);
1503 		mutex_enter(&ldcp->tx_lock);
1504 		i_ldc_reset(ldcp, B_TRUE);
1505 		mutex_exit(&ldcp->tx_lock);
1506 		rv = ECONNRESET;
1507 		break;
1508 	}
1509 
1510 	/*
1511 	 * If either the connection was reset (when rv != 0) or
1512 	 * a NACK was sent, we return. In the case of a NACK
1513 	 * we dont want to consume the packet that came in but
1514 	 * not record that we received the RTS
1515 	 */
1516 	if (rv || sent_NACK)
1517 		return (rv);
1518 
1519 	/* record RTS received */
1520 	ldcp->hstate |= TS_RCVD_RTS;
1521 
1522 	/* store initial SEQID info */
1523 	ldcp->last_msg_snt = msg->seqid;
1524 
1525 	/* Obtain Tx lock */
1526 	mutex_enter(&ldcp->tx_lock);
1527 
1528 	/* get the current tail for the response */
1529 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1530 	if (rv != 0) {
1531 		cmn_err(CE_NOTE,
1532 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1533 		    ldcp->id);
1534 		i_ldc_reset(ldcp, B_TRUE);
1535 		mutex_exit(&ldcp->tx_lock);
1536 		return (ECONNRESET);
1537 	}
1538 
1539 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1540 	ZERO_PKT(pkt);
1541 
1542 	/* initialize the packet */
1543 	pkt->type = LDC_CTRL;
1544 	pkt->stype = LDC_INFO;
1545 	pkt->ctrl = LDC_RTR;
1546 	pkt->env = ldcp->mode;
1547 	if (ldcp->mode != LDC_MODE_RAW)
1548 		pkt->seqid = LDC_INIT_SEQID;
1549 
1550 	ldcp->last_msg_rcd = msg->seqid;
1551 
1552 	/* initiate the send by calling into HV and set the new tail */
1553 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1554 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1555 
1556 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1557 	if (rv == 0) {
1558 		D2(ldcp->id,
1559 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1560 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1561 
1562 		ldcp->tx_tail = tx_tail;
1563 		ldcp->hstate |= TS_SENT_RTR;
1564 
1565 	} else {
1566 		cmn_err(CE_NOTE,
1567 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1568 		    ldcp->id);
1569 		i_ldc_reset(ldcp, B_TRUE);
1570 		mutex_exit(&ldcp->tx_lock);
1571 		return (ECONNRESET);
1572 	}
1573 
1574 	mutex_exit(&ldcp->tx_lock);
1575 	return (0);
1576 }
1577 
1578 /*
1579  * Process an incoming RTR ctrl message
1580  */
1581 static int
1582 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1583 {
1584 	int 		rv = 0;
1585 	boolean_t	sent_NACK = B_FALSE;
1586 
1587 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1588 
1589 	switch (msg->stype) {
1590 	case LDC_NACK:
1591 		/* RTR NACK received */
1592 		DWARN(ldcp->id,
1593 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1594 		    ldcp->id);
1595 
1596 		/* Reset the channel -- as we cannot continue */
1597 		mutex_enter(&ldcp->tx_lock);
1598 		i_ldc_reset(ldcp, B_TRUE);
1599 		mutex_exit(&ldcp->tx_lock);
1600 		rv = ECONNRESET;
1601 
1602 		break;
1603 
1604 	case LDC_INFO:
1605 
1606 		/* check mode */
1607 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1608 			DWARN(ldcp->id,
1609 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1610 			    "expecting 0x%x, got 0x%x\n",
1611 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1612 			/*
1613 			 * send NACK in response to MODE message
1614 			 * get the current tail for the response
1615 			 */
1616 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1617 			if (rv) {
1618 				/* if cannot send NACK - reset channel */
1619 				mutex_enter(&ldcp->tx_lock);
1620 				i_ldc_reset(ldcp, B_TRUE);
1621 				mutex_exit(&ldcp->tx_lock);
1622 				rv = ECONNRESET;
1623 				break;
1624 			}
1625 			sent_NACK = B_TRUE;
1626 		}
1627 		break;
1628 
1629 	default:
1630 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1631 		    ldcp->id);
1632 
1633 		/* Reset the channel -- as we cannot continue */
1634 		mutex_enter(&ldcp->tx_lock);
1635 		i_ldc_reset(ldcp, B_TRUE);
1636 		mutex_exit(&ldcp->tx_lock);
1637 		rv = ECONNRESET;
1638 		break;
1639 	}
1640 
1641 	/*
1642 	 * If either the connection was reset (when rv != 0) or
1643 	 * a NACK was sent, we return. In the case of a NACK
1644 	 * we dont want to consume the packet that came in but
1645 	 * not record that we received the RTR
1646 	 */
1647 	if (rv || sent_NACK)
1648 		return (rv);
1649 
1650 	ldcp->last_msg_snt = msg->seqid;
1651 	ldcp->hstate |= TS_RCVD_RTR;
1652 
1653 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1654 	if (rv) {
1655 		cmn_err(CE_NOTE,
1656 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1657 		    ldcp->id);
1658 		mutex_enter(&ldcp->tx_lock);
1659 		i_ldc_reset(ldcp, B_TRUE);
1660 		mutex_exit(&ldcp->tx_lock);
1661 		return (ECONNRESET);
1662 	}
1663 	D2(ldcp->id,
1664 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1665 
1666 	ldcp->hstate |= TS_SENT_RDX;
1667 	ldcp->tstate |= TS_HSHAKE_DONE;
1668 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1669 		ldcp->status = LDC_UP;
1670 
1671 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1672 
1673 	return (0);
1674 }
1675 
1676 
1677 /*
1678  * Process an incoming RDX ctrl message
1679  */
1680 static int
1681 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1682 {
1683 	int	rv = 0;
1684 
1685 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1686 
1687 	switch (msg->stype) {
1688 	case LDC_NACK:
1689 		/* RDX NACK received */
1690 		DWARN(ldcp->id,
1691 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1692 		    ldcp->id);
1693 
1694 		/* Reset the channel -- as we cannot continue */
1695 		mutex_enter(&ldcp->tx_lock);
1696 		i_ldc_reset(ldcp, B_TRUE);
1697 		mutex_exit(&ldcp->tx_lock);
1698 		rv = ECONNRESET;
1699 
1700 		break;
1701 
1702 	case LDC_INFO:
1703 
1704 		/*
1705 		 * if channel is UP and a RDX received after data transmission
1706 		 * has commenced it is an error
1707 		 */
1708 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1709 			DWARN(DBG_ALL_LDCS,
1710 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1711 			    " - LDC reset\n", ldcp->id);
1712 			mutex_enter(&ldcp->tx_lock);
1713 			i_ldc_reset(ldcp, B_TRUE);
1714 			mutex_exit(&ldcp->tx_lock);
1715 			return (ECONNRESET);
1716 		}
1717 
1718 		ldcp->hstate |= TS_RCVD_RDX;
1719 		ldcp->tstate |= TS_HSHAKE_DONE;
1720 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1721 			ldcp->status = LDC_UP;
1722 
1723 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1724 		break;
1725 
1726 	default:
1727 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1728 		    ldcp->id);
1729 
1730 		/* Reset the channel -- as we cannot continue */
1731 		mutex_enter(&ldcp->tx_lock);
1732 		i_ldc_reset(ldcp, B_TRUE);
1733 		mutex_exit(&ldcp->tx_lock);
1734 		rv = ECONNRESET;
1735 		break;
1736 	}
1737 
1738 	return (rv);
1739 }
1740 
1741 /*
1742  * Process an incoming ACK for a data packet
1743  */
1744 static int
1745 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1746 {
1747 	int		rv;
1748 	uint64_t 	tx_head;
1749 	ldc_msg_t	*pkt;
1750 
1751 	/* Obtain Tx lock */
1752 	mutex_enter(&ldcp->tx_lock);
1753 
1754 	/*
1755 	 * Read the current Tx head and tail
1756 	 */
1757 	rv = hv_ldc_tx_get_state(ldcp->id,
1758 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1759 	if (rv != 0) {
1760 		cmn_err(CE_WARN,
1761 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1762 		    ldcp->id);
1763 
1764 		/* Reset the channel -- as we cannot continue */
1765 		i_ldc_reset(ldcp, B_TRUE);
1766 		mutex_exit(&ldcp->tx_lock);
1767 		return (ECONNRESET);
1768 	}
1769 
1770 	/*
1771 	 * loop from where the previous ACK location was to the
1772 	 * current head location. This is how far the HV has
1773 	 * actually send pkts. Pkts between head and tail are
1774 	 * yet to be sent by HV.
1775 	 */
1776 	tx_head = ldcp->tx_ackd_head;
1777 	for (;;) {
1778 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1779 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1780 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1781 
1782 		if (pkt->seqid == msg->ackid) {
1783 			D2(ldcp->id,
1784 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1785 			    ldcp->id);
1786 			ldcp->last_ack_rcd = msg->ackid;
1787 			ldcp->tx_ackd_head = tx_head;
1788 			break;
1789 		}
1790 		if (tx_head == ldcp->tx_head) {
1791 			/* could not find packet */
1792 			DWARN(ldcp->id,
1793 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1794 			    ldcp->id);
1795 
1796 			/* Reset the channel -- as we cannot continue */
1797 			i_ldc_reset(ldcp, B_TRUE);
1798 			mutex_exit(&ldcp->tx_lock);
1799 			return (ECONNRESET);
1800 		}
1801 	}
1802 
1803 	mutex_exit(&ldcp->tx_lock);
1804 	return (0);
1805 }
1806 
1807 /*
1808  * Process incoming control message
1809  * Return 0 - session can continue
1810  *        EAGAIN - reprocess packet - state was changed
1811  *	  ECONNRESET - channel was reset
1812  */
1813 static int
1814 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1815 {
1816 	int 		rv = 0;
1817 
1818 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1819 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1820 
1821 	switch (ldcp->tstate & ~TS_IN_RESET) {
1822 
1823 	case TS_OPEN:
1824 	case TS_READY:
1825 
1826 		switch (msg->ctrl & LDC_CTRL_MASK) {
1827 		case LDC_VER:
1828 			/* process version message */
1829 			rv = i_ldc_process_VER(ldcp, msg);
1830 			break;
1831 		default:
1832 			DWARN(ldcp->id,
1833 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1834 			    "tstate=0x%x\n", ldcp->id,
1835 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1836 			break;
1837 		}
1838 
1839 		break;
1840 
1841 	case TS_VREADY:
1842 
1843 		switch (msg->ctrl & LDC_CTRL_MASK) {
1844 		case LDC_VER:
1845 			/* process version message */
1846 			rv = i_ldc_process_VER(ldcp, msg);
1847 			break;
1848 		case LDC_RTS:
1849 			/* process RTS message */
1850 			rv = i_ldc_process_RTS(ldcp, msg);
1851 			break;
1852 		case LDC_RTR:
1853 			/* process RTR message */
1854 			rv = i_ldc_process_RTR(ldcp, msg);
1855 			break;
1856 		case LDC_RDX:
1857 			/* process RDX message */
1858 			rv = i_ldc_process_RDX(ldcp, msg);
1859 			break;
1860 		default:
1861 			DWARN(ldcp->id,
1862 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1863 			    "tstate=0x%x\n", ldcp->id,
1864 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1865 			break;
1866 		}
1867 
1868 		break;
1869 
1870 	case TS_UP:
1871 
1872 		switch (msg->ctrl & LDC_CTRL_MASK) {
1873 		case LDC_VER:
1874 			DWARN(ldcp->id,
1875 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1876 			    "- LDC reset\n", ldcp->id);
1877 			/* peer is redoing version negotiation */
1878 			mutex_enter(&ldcp->tx_lock);
1879 			(void) i_ldc_txq_reconf(ldcp);
1880 			i_ldc_reset_state(ldcp);
1881 			mutex_exit(&ldcp->tx_lock);
1882 			rv = EAGAIN;
1883 			break;
1884 
1885 		case LDC_RDX:
1886 			/* process RDX message */
1887 			rv = i_ldc_process_RDX(ldcp, msg);
1888 			break;
1889 
1890 		default:
1891 			DWARN(ldcp->id,
1892 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1893 			    "tstate=0x%x\n", ldcp->id,
1894 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1895 			break;
1896 		}
1897 	}
1898 
1899 	return (rv);
1900 }
1901 
1902 /*
1903  * Register channel with the channel nexus
1904  */
1905 static int
1906 i_ldc_register_channel(ldc_chan_t *ldcp)
1907 {
1908 	int		rv = 0;
1909 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1910 
1911 	if (cinfo->dip == NULL) {
1912 		DWARN(ldcp->id,
1913 		    "i_ldc_register_channel: cnex has not registered\n");
1914 		return (EAGAIN);
1915 	}
1916 
1917 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1918 	if (rv) {
1919 		DWARN(ldcp->id,
1920 		    "i_ldc_register_channel: cannot register channel\n");
1921 		return (rv);
1922 	}
1923 
1924 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1925 	    i_ldc_tx_hdlr, ldcp, NULL);
1926 	if (rv) {
1927 		DWARN(ldcp->id,
1928 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1929 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1930 		return (rv);
1931 	}
1932 
1933 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1934 	    i_ldc_rx_hdlr, ldcp, NULL);
1935 	if (rv) {
1936 		DWARN(ldcp->id,
1937 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1938 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1939 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1940 		return (rv);
1941 	}
1942 
1943 	ldcp->tstate |= TS_CNEX_RDY;
1944 
1945 	return (0);
1946 }
1947 
1948 /*
1949  * Unregister a channel with the channel nexus
1950  */
1951 static int
1952 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1953 {
1954 	int		rv = 0;
1955 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1956 
1957 	if (cinfo->dip == NULL) {
1958 		DWARN(ldcp->id,
1959 		    "i_ldc_unregister_channel: cnex has not registered\n");
1960 		return (EAGAIN);
1961 	}
1962 
1963 	if (ldcp->tstate & TS_CNEX_RDY) {
1964 
1965 		/* Remove the Rx interrupt */
1966 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1967 		if (rv) {
1968 			if (rv != EAGAIN) {
1969 				DWARN(ldcp->id,
1970 				    "i_ldc_unregister_channel: err removing "
1971 				    "Rx intr\n");
1972 				return (rv);
1973 			}
1974 
1975 			/*
1976 			 * If interrupts are pending and handler has
1977 			 * finished running, clear interrupt and try
1978 			 * again
1979 			 */
1980 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1981 				return (rv);
1982 
1983 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1984 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1985 			    CNEX_RX_INTR);
1986 			if (rv) {
1987 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1988 				    "err removing Rx interrupt\n");
1989 				return (rv);
1990 			}
1991 		}
1992 
1993 		/* Remove the Tx interrupt */
1994 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1995 		if (rv) {
1996 			DWARN(ldcp->id,
1997 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1998 			return (rv);
1999 		}
2000 
2001 		/* Unregister the channel */
2002 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2003 		if (rv) {
2004 			DWARN(ldcp->id,
2005 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2006 			return (rv);
2007 		}
2008 
2009 		ldcp->tstate &= ~TS_CNEX_RDY;
2010 	}
2011 
2012 	return (0);
2013 }
2014 
2015 
2016 /*
2017  * LDC transmit interrupt handler
2018  *    triggered for chanel up/down/reset events
2019  *    and Tx queue content changes
2020  */
2021 static uint_t
2022 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2023 {
2024 	_NOTE(ARGUNUSED(arg2))
2025 
2026 	int 		rv;
2027 	ldc_chan_t 	*ldcp;
2028 	boolean_t 	notify_client = B_FALSE;
2029 	uint64_t	notify_event = 0, link_state;
2030 
2031 	/* Get the channel for which interrupt was received */
2032 	ASSERT(arg1 != NULL);
2033 	ldcp = (ldc_chan_t *)arg1;
2034 
2035 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2036 	    ldcp->id, ldcp);
2037 
2038 	/* Lock channel */
2039 	mutex_enter(&ldcp->lock);
2040 
2041 	/* Obtain Tx lock */
2042 	mutex_enter(&ldcp->tx_lock);
2043 
2044 	/* mark interrupt as pending */
2045 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2046 
2047 	/* save current link state */
2048 	link_state = ldcp->link_state;
2049 
2050 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2051 	    &ldcp->link_state);
2052 	if (rv) {
2053 		cmn_err(CE_WARN,
2054 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2055 		    ldcp->id, rv);
2056 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2057 		mutex_exit(&ldcp->tx_lock);
2058 		mutex_exit(&ldcp->lock);
2059 		return (DDI_INTR_CLAIMED);
2060 	}
2061 
2062 	/*
2063 	 * reset the channel state if the channel went down
2064 	 * (other side unconfigured queue) or channel was reset
2065 	 * (other side reconfigured its queue)
2066 	 */
2067 	if (link_state != ldcp->link_state &&
2068 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2069 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2070 		i_ldc_reset(ldcp, B_FALSE);
2071 		notify_client = B_TRUE;
2072 		notify_event = LDC_EVT_DOWN;
2073 	}
2074 
2075 	if (link_state != ldcp->link_state &&
2076 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2077 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2078 		i_ldc_reset(ldcp, B_FALSE);
2079 		notify_client = B_TRUE;
2080 		notify_event = LDC_EVT_RESET;
2081 	}
2082 
2083 	if (link_state != ldcp->link_state &&
2084 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2085 	    ldcp->link_state == LDC_CHANNEL_UP) {
2086 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2087 		notify_client = B_TRUE;
2088 		notify_event = LDC_EVT_RESET;
2089 		ldcp->tstate |= TS_LINK_READY;
2090 		ldcp->status = LDC_READY;
2091 	}
2092 
2093 	/* if callbacks are disabled, do not notify */
2094 	if (!ldcp->cb_enabled)
2095 		notify_client = B_FALSE;
2096 
2097 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2098 	mutex_exit(&ldcp->tx_lock);
2099 
2100 	if (notify_client) {
2101 		ldcp->cb_inprogress = B_TRUE;
2102 		mutex_exit(&ldcp->lock);
2103 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2104 		if (rv) {
2105 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2106 			    "failure", ldcp->id);
2107 		}
2108 		mutex_enter(&ldcp->lock);
2109 		ldcp->cb_inprogress = B_FALSE;
2110 	}
2111 
2112 	mutex_exit(&ldcp->lock);
2113 
2114 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2115 
2116 	return (DDI_INTR_CLAIMED);
2117 }
2118 
2119 /*
2120  * Process the Rx HV queue.
2121  *
2122  * Returns 0 if data packets were found and no errors were encountered,
2123  * otherwise returns an error. In either case, the *notify argument is
2124  * set to indicate whether or not the client callback function should
2125  * be invoked. The *event argument is set to contain the callback event.
2126  *
2127  * Depending on the channel mode, packets are handled differently:
2128  *
2129  * RAW MODE
2130  * For raw mode channels, when a data packet is encountered,
2131  * processing stops and all packets are left on the queue to be removed
2132  * and processed by the ldc_read code path.
2133  *
2134  * UNRELIABLE MODE
2135  * For unreliable mode, when a data packet is encountered, processing
2136  * stops, and all packets are left on the queue to be removed and
2137  * processed by the ldc_read code path. Control packets are processed
2138  * inline if they are encountered before any data packets.
2139  *
2140  * RELIABLE MODE
2141  * For reliable mode channels, all packets on the receive queue
2142  * are processed: data packets are copied to the data queue and
2143  * control packets are processed inline. Packets are only left on
2144  * the receive queue when the data queue is full.
2145  */
2146 static uint_t
2147 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2148     uint64_t *notify_event)
2149 {
2150 	int		rv;
2151 	uint64_t 	rx_head, rx_tail;
2152 	ldc_msg_t 	*msg;
2153 	uint64_t	link_state, first_fragment = 0;
2154 	boolean_t	trace_length = B_TRUE;
2155 
2156 	ASSERT(MUTEX_HELD(&ldcp->lock));
2157 	*notify_client = B_FALSE;
2158 	*notify_event = 0;
2159 
2160 	/*
2161 	 * Read packet(s) from the queue
2162 	 */
2163 	for (;;) {
2164 
2165 		link_state = ldcp->link_state;
2166 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2167 		    &ldcp->link_state);
2168 		if (rv) {
2169 			cmn_err(CE_WARN,
2170 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2171 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2172 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2173 			return (EIO);
2174 		}
2175 
2176 		/*
2177 		 * reset the channel state if the channel went down
2178 		 * (other side unconfigured queue) or channel was reset
2179 		 * (other side reconfigured its queue)
2180 		 */
2181 
2182 		if (link_state != ldcp->link_state) {
2183 
2184 			switch (ldcp->link_state) {
2185 			case LDC_CHANNEL_DOWN:
2186 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2187 				    "link down\n", ldcp->id);
2188 				mutex_enter(&ldcp->tx_lock);
2189 				i_ldc_reset(ldcp, B_FALSE);
2190 				mutex_exit(&ldcp->tx_lock);
2191 				*notify_client = B_TRUE;
2192 				*notify_event = LDC_EVT_DOWN;
2193 				goto loop_exit;
2194 
2195 			case LDC_CHANNEL_UP:
2196 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2197 				    "channel link up\n", ldcp->id);
2198 
2199 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2200 					*notify_client = B_TRUE;
2201 					*notify_event = LDC_EVT_RESET;
2202 					ldcp->tstate |= TS_LINK_READY;
2203 					ldcp->status = LDC_READY;
2204 				}
2205 				break;
2206 
2207 			case LDC_CHANNEL_RESET:
2208 			default:
2209 #ifdef DEBUG
2210 force_reset:
2211 #endif
2212 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2213 				    "link reset\n", ldcp->id);
2214 				mutex_enter(&ldcp->tx_lock);
2215 				i_ldc_reset(ldcp, B_FALSE);
2216 				mutex_exit(&ldcp->tx_lock);
2217 				*notify_client = B_TRUE;
2218 				*notify_event = LDC_EVT_RESET;
2219 				break;
2220 			}
2221 		}
2222 
2223 #ifdef DEBUG
2224 		if (LDC_INJECT_RESET(ldcp))
2225 			goto force_reset;
2226 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2227 			i_ldc_mem_inject_dring_clear(ldcp);
2228 #endif
2229 		if (trace_length) {
2230 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2231 			trace_length = B_FALSE;
2232 		}
2233 
2234 		if (rx_head == rx_tail) {
2235 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2236 			    "No packets\n", ldcp->id);
2237 			break;
2238 		}
2239 
2240 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2241 		    "tail=0x%llx\n", rx_head, rx_tail);
2242 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2243 		    ldcp->rx_q_va + rx_head);
2244 
2245 		/* get the message */
2246 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2247 
2248 		/* if channel is in RAW mode or data pkt, notify and return */
2249 		if (ldcp->mode == LDC_MODE_RAW) {
2250 			*notify_client = B_TRUE;
2251 			*notify_event |= LDC_EVT_READ;
2252 			break;
2253 		}
2254 
2255 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2256 
2257 			/* discard packet if channel is not up */
2258 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2259 
2260 				/* move the head one position */
2261 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2262 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2263 
2264 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2265 					break;
2266 
2267 				continue;
2268 			} else {
2269 				uint64_t dq_head, dq_tail;
2270 
2271 				/* process only RELIABLE mode data packets */
2272 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2273 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2274 						*notify_client = B_TRUE;
2275 					*notify_event |= LDC_EVT_READ;
2276 					break;
2277 				}
2278 
2279 				/* don't process packet if queue full */
2280 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2281 				    &dq_tail, NULL);
2282 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2283 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2284 				if (dq_tail == dq_head ||
2285 				    LDC_INJECT_DQFULL(ldcp)) {
2286 					rv = ENOSPC;
2287 					break;
2288 				}
2289 			}
2290 		}
2291 
2292 		/* Check the sequence ID for the message received */
2293 		rv = i_ldc_check_seqid(ldcp, msg);
2294 		if (rv != 0) {
2295 
2296 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2297 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2298 			    rx_head, rx_tail);
2299 
2300 			/* Reset last_msg_rcd to start of message */
2301 			if (first_fragment != 0) {
2302 				ldcp->last_msg_rcd = first_fragment - 1;
2303 				first_fragment = 0;
2304 			}
2305 
2306 			/*
2307 			 * Send a NACK due to seqid mismatch
2308 			 */
2309 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2310 			    (msg->ctrl & LDC_CTRL_MASK));
2311 
2312 			if (rv) {
2313 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2314 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2315 				    ldcp->id);
2316 
2317 				/* if cannot send NACK - reset channel */
2318 				mutex_enter(&ldcp->tx_lock);
2319 				i_ldc_reset(ldcp, B_TRUE);
2320 				mutex_exit(&ldcp->tx_lock);
2321 
2322 				*notify_client = B_TRUE;
2323 				*notify_event = LDC_EVT_RESET;
2324 				break;
2325 			}
2326 
2327 			/* purge receive queue */
2328 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2329 			break;
2330 		}
2331 
2332 		/* record the message ID */
2333 		ldcp->last_msg_rcd = msg->seqid;
2334 
2335 		/* process control messages */
2336 		if (msg->type & LDC_CTRL) {
2337 			/* save current internal state */
2338 			uint64_t tstate = ldcp->tstate;
2339 
2340 			rv = i_ldc_ctrlmsg(ldcp, msg);
2341 			if (rv == EAGAIN) {
2342 				/* re-process pkt - state was adjusted */
2343 				continue;
2344 			}
2345 			if (rv == ECONNRESET) {
2346 				*notify_client = B_TRUE;
2347 				*notify_event = LDC_EVT_RESET;
2348 				break;
2349 			}
2350 
2351 			/*
2352 			 * control message processing was successful
2353 			 * channel transitioned to ready for communication
2354 			 */
2355 			if (rv == 0 && ldcp->tstate == TS_UP &&
2356 			    (tstate & ~TS_IN_RESET) !=
2357 			    (ldcp->tstate & ~TS_IN_RESET)) {
2358 				*notify_client = B_TRUE;
2359 				*notify_event = LDC_EVT_UP;
2360 			}
2361 		}
2362 
2363 		/* process data NACKs */
2364 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2365 			DWARN(ldcp->id,
2366 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2367 			    ldcp->id);
2368 			mutex_enter(&ldcp->tx_lock);
2369 			i_ldc_reset(ldcp, B_TRUE);
2370 			mutex_exit(&ldcp->tx_lock);
2371 			*notify_client = B_TRUE;
2372 			*notify_event = LDC_EVT_RESET;
2373 			break;
2374 		}
2375 
2376 		/* process data ACKs */
2377 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2378 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2379 				*notify_client = B_TRUE;
2380 				*notify_event = LDC_EVT_RESET;
2381 				break;
2382 			}
2383 		}
2384 
2385 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2386 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2387 
2388 			/*
2389 			 * Copy the data packet to the data queue. Note
2390 			 * that the copy routine updates the rx_head pointer.
2391 			 */
2392 			i_ldc_rxdq_copy(ldcp, &rx_head);
2393 
2394 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2395 				*notify_client = B_TRUE;
2396 			*notify_event |= LDC_EVT_READ;
2397 		} else {
2398 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2399 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2400 		}
2401 
2402 		/* move the head one position */
2403 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2404 			*notify_client = B_TRUE;
2405 			*notify_event = LDC_EVT_RESET;
2406 			break;
2407 		}
2408 
2409 	} /* for */
2410 
2411 loop_exit:
2412 
2413 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2414 		/* ACK data packets */
2415 		if ((*notify_event &
2416 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2417 			int ack_rv;
2418 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2419 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2420 				cmn_err(CE_NOTE,
2421 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2422 				    "send ACK\n", ldcp->id);
2423 
2424 				mutex_enter(&ldcp->tx_lock);
2425 				i_ldc_reset(ldcp, B_FALSE);
2426 				mutex_exit(&ldcp->tx_lock);
2427 
2428 				*notify_client = B_TRUE;
2429 				*notify_event = LDC_EVT_RESET;
2430 				goto skip_ackpeek;
2431 			}
2432 		}
2433 
2434 		/*
2435 		 * If we have no more space on the data queue, make sure
2436 		 * there are no ACKs on the rx queue waiting to be processed.
2437 		 */
2438 		if (rv == ENOSPC) {
2439 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2440 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2441 				*notify_client = B_TRUE;
2442 				*notify_event = LDC_EVT_RESET;
2443 			}
2444 			return (rv);
2445 		} else {
2446 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2447 		}
2448 	}
2449 
2450 skip_ackpeek:
2451 
2452 	/* Return, indicating whether or not data packets were found */
2453 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2454 		return (0);
2455 
2456 	return (ENOMSG);
2457 }
2458 
2459 /*
2460  * Process any ACK packets on the HV receive queue.
2461  *
2462  * This function is only used by RELIABLE mode channels when the
2463  * secondary data queue fills up and there are packets remaining on
2464  * the HV receive queue.
2465  */
2466 int
2467 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2468 {
2469 	int		rv = 0;
2470 	ldc_msg_t	*msg;
2471 
2472 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2473 		ldcp->rx_ack_head = rx_head;
2474 
2475 	while (ldcp->rx_ack_head != rx_tail) {
2476 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2477 
2478 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2479 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2480 				break;
2481 			msg->stype &= ~LDC_ACK;
2482 		}
2483 
2484 		ldcp->rx_ack_head =
2485 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2486 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2487 	}
2488 	return (rv);
2489 }
2490 
2491 /* -------------------------------------------------------------------------- */
2492 
2493 /*
2494  * LDC API functions
2495  */
2496 
2497 /*
2498  * Initialize the channel. Allocate internal structure and memory for
2499  * TX/RX queues, and initialize locks.
2500  */
2501 int
2502 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2503 {
2504 	ldc_chan_t 	*ldcp;
2505 	int		rv, exit_val;
2506 	uint64_t	ra_base, nentries;
2507 	uint64_t	qlen;
2508 
2509 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2510 
2511 	if (attr == NULL) {
2512 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2513 		return (EINVAL);
2514 	}
2515 	if (handle == NULL) {
2516 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2517 		return (EINVAL);
2518 	}
2519 
2520 	/* check if channel is valid */
2521 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2522 	if (rv == H_ECHANNEL) {
2523 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2524 		return (EINVAL);
2525 	}
2526 
2527 	/* check if the channel has already been initialized */
2528 	mutex_enter(&ldcssp->lock);
2529 	ldcp = ldcssp->chan_list;
2530 	while (ldcp != NULL) {
2531 		if (ldcp->id == id) {
2532 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2533 			    id);
2534 			mutex_exit(&ldcssp->lock);
2535 			return (EADDRINUSE);
2536 		}
2537 		ldcp = ldcp->next;
2538 	}
2539 	mutex_exit(&ldcssp->lock);
2540 
2541 	ASSERT(ldcp == NULL);
2542 
2543 	*handle = 0;
2544 
2545 	/* Allocate an ldcp structure */
2546 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2547 
2548 	/*
2549 	 * Initialize the channel and Tx lock
2550 	 *
2551 	 * The channel 'lock' protects the entire channel and
2552 	 * should be acquired before initializing, resetting,
2553 	 * destroying or reading from a channel.
2554 	 *
2555 	 * The 'tx_lock' should be acquired prior to transmitting
2556 	 * data over the channel. The lock should also be acquired
2557 	 * prior to channel reconfiguration (in order to prevent
2558 	 * concurrent writes).
2559 	 *
2560 	 * ORDERING: When both locks are being acquired, to prevent
2561 	 * deadlocks, the channel lock should be always acquired prior
2562 	 * to the tx_lock.
2563 	 */
2564 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2565 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2566 
2567 	/* Initialize the channel */
2568 	ldcp->id = id;
2569 	ldcp->cb = NULL;
2570 	ldcp->cb_arg = NULL;
2571 	ldcp->cb_inprogress = B_FALSE;
2572 	ldcp->cb_enabled = B_FALSE;
2573 	ldcp->next = NULL;
2574 
2575 	/* Read attributes */
2576 	ldcp->mode = attr->mode;
2577 	ldcp->devclass = attr->devclass;
2578 	ldcp->devinst = attr->instance;
2579 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2580 
2581 	D1(ldcp->id,
2582 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2583 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2584 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2585 
2586 	ldcp->next_vidx = 0;
2587 	ldcp->tstate = TS_IN_RESET;
2588 	ldcp->hstate = 0;
2589 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2590 	ldcp->last_ack_rcd = 0;
2591 	ldcp->last_msg_rcd = 0;
2592 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2593 
2594 	ldcp->stream_bufferp = NULL;
2595 	ldcp->exp_dring_list = NULL;
2596 	ldcp->imp_dring_list = NULL;
2597 	ldcp->mhdl_list = NULL;
2598 
2599 	ldcp->tx_intr_state = LDC_INTR_NONE;
2600 	ldcp->rx_intr_state = LDC_INTR_NONE;
2601 
2602 	/* Initialize payload size depending on whether channel is reliable */
2603 	switch (ldcp->mode) {
2604 	case LDC_MODE_RAW:
2605 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2606 		ldcp->read_p = i_ldc_read_raw;
2607 		ldcp->write_p = i_ldc_write_raw;
2608 		break;
2609 	case LDC_MODE_UNRELIABLE:
2610 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2611 		ldcp->read_p = i_ldc_read_packet;
2612 		ldcp->write_p = i_ldc_write_packet;
2613 		break;
2614 	case LDC_MODE_RELIABLE:
2615 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2616 
2617 		ldcp->stream_remains = 0;
2618 		ldcp->stream_offset = 0;
2619 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2620 		ldcp->read_p = i_ldc_read_stream;
2621 		ldcp->write_p = i_ldc_write_stream;
2622 		break;
2623 	default:
2624 		exit_val = EINVAL;
2625 		goto cleanup_on_exit;
2626 	}
2627 
2628 	/*
2629 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2630 	 * value is smaller than default length of ldc_queue_entries,
2631 	 * qlen is set to ldc_queue_entries. Ensure that computed
2632 	 * length is a power-of-two value.
2633 	 */
2634 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2635 	if (!ISP2(qlen)) {
2636 		uint64_t	tmp = 1;
2637 		while (qlen) {
2638 			qlen >>= 1; tmp <<= 1;
2639 		}
2640 		qlen = tmp;
2641 	}
2642 
2643 	ldcp->rx_q_entries =
2644 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2645 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2646 
2647 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2648 
2649 	/* Create a transmit queue */
2650 	ldcp->tx_q_va = (uint64_t)
2651 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2652 	if (ldcp->tx_q_va == NULL) {
2653 		cmn_err(CE_WARN,
2654 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2655 		    ldcp->id);
2656 		exit_val = ENOMEM;
2657 		goto cleanup_on_exit;
2658 	}
2659 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2660 
2661 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2662 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2663 
2664 	ldcp->tstate |= TS_TXQ_RDY;
2665 
2666 	/* Create a receive queue */
2667 	ldcp->rx_q_va = (uint64_t)
2668 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2669 	if (ldcp->rx_q_va == NULL) {
2670 		cmn_err(CE_WARN,
2671 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2672 		    ldcp->id);
2673 		exit_val = ENOMEM;
2674 		goto cleanup_on_exit;
2675 	}
2676 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2677 
2678 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2679 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2680 
2681 	ldcp->tstate |= TS_RXQ_RDY;
2682 
2683 	/* Setup a separate read data queue */
2684 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2685 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2686 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2687 
2688 		/* Make sure the data queue multiplier is a power of 2 */
2689 		if (!ISP2(ldc_rxdq_multiplier)) {
2690 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2691 			    "not a power of 2, resetting", ldcp->id);
2692 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2693 		}
2694 
2695 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2696 		ldcp->rx_dq_va = (uint64_t)
2697 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2698 		    KM_SLEEP);
2699 		if (ldcp->rx_dq_va == NULL) {
2700 			cmn_err(CE_WARN,
2701 			    "ldc_init: (0x%lx) RX data queue "
2702 			    "allocation failed\n", ldcp->id);
2703 			exit_val = ENOMEM;
2704 			goto cleanup_on_exit;
2705 		}
2706 
2707 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2708 
2709 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2710 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2711 		    ldcp->rx_dq_entries);
2712 	} else {
2713 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2714 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2715 	}
2716 
2717 	/* Init descriptor ring and memory handle list lock */
2718 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2719 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2720 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2721 
2722 	/* mark status as INITialized */
2723 	ldcp->status = LDC_INIT;
2724 
2725 	/* Add to channel list */
2726 	mutex_enter(&ldcssp->lock);
2727 	ldcp->next = ldcssp->chan_list;
2728 	ldcssp->chan_list = ldcp;
2729 	ldcssp->channel_count++;
2730 	mutex_exit(&ldcssp->lock);
2731 
2732 	/* set the handle */
2733 	*handle = (ldc_handle_t)ldcp;
2734 
2735 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2736 
2737 	return (0);
2738 
2739 cleanup_on_exit:
2740 
2741 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2742 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2743 
2744 	if (ldcp->tstate & TS_TXQ_RDY)
2745 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2746 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2747 
2748 	if (ldcp->tstate & TS_RXQ_RDY)
2749 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2750 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2751 
2752 	mutex_destroy(&ldcp->tx_lock);
2753 	mutex_destroy(&ldcp->lock);
2754 
2755 	if (ldcp)
2756 		kmem_free(ldcp, sizeof (ldc_chan_t));
2757 
2758 	return (exit_val);
2759 }
2760 
2761 /*
2762  * Finalizes the LDC connection. It will return EBUSY if the
2763  * channel is open. A ldc_close() has to be done prior to
2764  * a ldc_fini operation. It frees TX/RX queues, associated
2765  * with the channel
2766  */
2767 int
2768 ldc_fini(ldc_handle_t handle)
2769 {
2770 	ldc_chan_t 	*ldcp;
2771 	ldc_chan_t 	*tmp_ldcp;
2772 	uint64_t 	id;
2773 
2774 	if (handle == NULL) {
2775 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2776 		return (EINVAL);
2777 	}
2778 	ldcp = (ldc_chan_t *)handle;
2779 	id = ldcp->id;
2780 
2781 	mutex_enter(&ldcp->lock);
2782 
2783 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2784 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2785 		    ldcp->id);
2786 		mutex_exit(&ldcp->lock);
2787 		return (EBUSY);
2788 	}
2789 
2790 	/* Remove from the channel list */
2791 	mutex_enter(&ldcssp->lock);
2792 	tmp_ldcp = ldcssp->chan_list;
2793 	if (tmp_ldcp == ldcp) {
2794 		ldcssp->chan_list = ldcp->next;
2795 		ldcp->next = NULL;
2796 	} else {
2797 		while (tmp_ldcp != NULL) {
2798 			if (tmp_ldcp->next == ldcp) {
2799 				tmp_ldcp->next = ldcp->next;
2800 				ldcp->next = NULL;
2801 				break;
2802 			}
2803 			tmp_ldcp = tmp_ldcp->next;
2804 		}
2805 		if (tmp_ldcp == NULL) {
2806 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2807 			mutex_exit(&ldcssp->lock);
2808 			mutex_exit(&ldcp->lock);
2809 			return (EINVAL);
2810 		}
2811 	}
2812 
2813 	ldcssp->channel_count--;
2814 
2815 	mutex_exit(&ldcssp->lock);
2816 
2817 	/* Free the map table for this channel */
2818 	if (ldcp->mtbl) {
2819 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2820 		if (ldcp->mtbl->contigmem)
2821 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2822 		else
2823 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2824 		mutex_destroy(&ldcp->mtbl->lock);
2825 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2826 	}
2827 
2828 	/* Destroy descriptor ring and memory handle list lock */
2829 	mutex_destroy(&ldcp->exp_dlist_lock);
2830 	mutex_destroy(&ldcp->imp_dlist_lock);
2831 	mutex_destroy(&ldcp->mlist_lock);
2832 
2833 	/* Free the stream buffer for RELIABLE_MODE */
2834 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2835 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2836 
2837 	/* Free the RX queue */
2838 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2839 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2840 	ldcp->tstate &= ~TS_RXQ_RDY;
2841 
2842 	/* Free the RX data queue */
2843 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2844 		kmem_free((caddr_t)ldcp->rx_dq_va,
2845 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2846 	}
2847 
2848 	/* Free the TX queue */
2849 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2850 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2851 	ldcp->tstate &= ~TS_TXQ_RDY;
2852 
2853 	mutex_exit(&ldcp->lock);
2854 
2855 	/* Destroy mutex */
2856 	mutex_destroy(&ldcp->tx_lock);
2857 	mutex_destroy(&ldcp->lock);
2858 
2859 	/* free channel structure */
2860 	kmem_free(ldcp, sizeof (ldc_chan_t));
2861 
2862 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2863 
2864 	return (0);
2865 }
2866 
2867 /*
2868  * Open the LDC channel for use. It registers the TX/RX queues
2869  * with the Hypervisor. It also specifies the interrupt number
2870  * and target CPU for this channel
2871  */
2872 int
2873 ldc_open(ldc_handle_t handle)
2874 {
2875 	ldc_chan_t 	*ldcp;
2876 	int 		rv;
2877 
2878 	if (handle == NULL) {
2879 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2880 		return (EINVAL);
2881 	}
2882 
2883 	ldcp = (ldc_chan_t *)handle;
2884 
2885 	mutex_enter(&ldcp->lock);
2886 
2887 	if (ldcp->tstate < TS_INIT) {
2888 		DWARN(ldcp->id,
2889 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2890 		mutex_exit(&ldcp->lock);
2891 		return (EFAULT);
2892 	}
2893 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2894 		DWARN(ldcp->id,
2895 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2896 		mutex_exit(&ldcp->lock);
2897 		return (EFAULT);
2898 	}
2899 
2900 	/*
2901 	 * Unregister/Register the tx queue with the hypervisor
2902 	 */
2903 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2904 	if (rv) {
2905 		cmn_err(CE_WARN,
2906 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2907 		    ldcp->id);
2908 		mutex_exit(&ldcp->lock);
2909 		return (EIO);
2910 	}
2911 
2912 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2913 	if (rv) {
2914 		cmn_err(CE_WARN,
2915 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2916 		    ldcp->id);
2917 		mutex_exit(&ldcp->lock);
2918 		return (EIO);
2919 	}
2920 
2921 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2922 	    ldcp->id);
2923 
2924 	/*
2925 	 * Unregister/Register the rx queue with the hypervisor
2926 	 */
2927 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2928 	if (rv) {
2929 		cmn_err(CE_WARN,
2930 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2931 		    ldcp->id);
2932 		mutex_exit(&ldcp->lock);
2933 		return (EIO);
2934 	}
2935 
2936 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2937 	if (rv) {
2938 		cmn_err(CE_WARN,
2939 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2940 		    ldcp->id);
2941 		mutex_exit(&ldcp->lock);
2942 		return (EIO);
2943 	}
2944 
2945 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2946 	    ldcp->id);
2947 
2948 	ldcp->tstate |= TS_QCONF_RDY;
2949 
2950 	/* Register the channel with the channel nexus */
2951 	rv = i_ldc_register_channel(ldcp);
2952 	if (rv && rv != EAGAIN) {
2953 		cmn_err(CE_WARN,
2954 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2955 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2956 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2957 		mutex_exit(&ldcp->lock);
2958 		return (EIO);
2959 	}
2960 
2961 	/* mark channel in OPEN state */
2962 	ldcp->status = LDC_OPEN;
2963 
2964 	/* Read channel state */
2965 	rv = hv_ldc_tx_get_state(ldcp->id,
2966 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2967 	if (rv) {
2968 		cmn_err(CE_WARN,
2969 		    "ldc_open: (0x%lx) cannot read channel state\n",
2970 		    ldcp->id);
2971 		(void) i_ldc_unregister_channel(ldcp);
2972 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2973 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2974 		mutex_exit(&ldcp->lock);
2975 		return (EIO);
2976 	}
2977 
2978 	/*
2979 	 * set the ACKd head to current head location for reliable
2980 	 */
2981 	ldcp->tx_ackd_head = ldcp->tx_head;
2982 
2983 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2984 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2985 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2986 		ldcp->tstate |= TS_LINK_READY;
2987 		ldcp->status = LDC_READY;
2988 	}
2989 
2990 	/*
2991 	 * if channel is being opened in RAW mode - no handshake is needed
2992 	 * switch the channel READY and UP state
2993 	 */
2994 	if (ldcp->mode == LDC_MODE_RAW) {
2995 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2996 		ldcp->status = LDC_UP;
2997 	}
2998 
2999 	mutex_exit(&ldcp->lock);
3000 
3001 	/*
3002 	 * Increment number of open channels
3003 	 */
3004 	mutex_enter(&ldcssp->lock);
3005 	ldcssp->channels_open++;
3006 	mutex_exit(&ldcssp->lock);
3007 
3008 	D1(ldcp->id,
3009 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3010 	    "(tstate=0x%x, status=0x%x)\n",
3011 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3012 
3013 	return (0);
3014 }
3015 
3016 /*
3017  * Close the LDC connection. It will return EBUSY if there
3018  * are memory segments or descriptor rings either bound to or
3019  * mapped over the channel
3020  */
3021 int
3022 ldc_close(ldc_handle_t handle)
3023 {
3024 	ldc_chan_t 	*ldcp;
3025 	int		rv = 0, retries = 0;
3026 	boolean_t	chk_done = B_FALSE;
3027 
3028 	if (handle == NULL) {
3029 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3030 		return (EINVAL);
3031 	}
3032 	ldcp = (ldc_chan_t *)handle;
3033 
3034 	mutex_enter(&ldcp->lock);
3035 
3036 	/* return error if channel is not open */
3037 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3038 		DWARN(ldcp->id,
3039 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3040 		mutex_exit(&ldcp->lock);
3041 		return (EFAULT);
3042 	}
3043 
3044 	/* if any memory handles, drings, are bound or mapped cannot close */
3045 	if (ldcp->mhdl_list != NULL) {
3046 		DWARN(ldcp->id,
3047 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3048 		    ldcp->id);
3049 		mutex_exit(&ldcp->lock);
3050 		return (EBUSY);
3051 	}
3052 	if (ldcp->exp_dring_list != NULL) {
3053 		DWARN(ldcp->id,
3054 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3055 		    ldcp->id);
3056 		mutex_exit(&ldcp->lock);
3057 		return (EBUSY);
3058 	}
3059 	if (ldcp->imp_dring_list != NULL) {
3060 		DWARN(ldcp->id,
3061 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3062 		    ldcp->id);
3063 		mutex_exit(&ldcp->lock);
3064 		return (EBUSY);
3065 	}
3066 
3067 	if (ldcp->cb_inprogress) {
3068 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3069 		    ldcp->id);
3070 		mutex_exit(&ldcp->lock);
3071 		return (EWOULDBLOCK);
3072 	}
3073 
3074 	/* Obtain Tx lock */
3075 	mutex_enter(&ldcp->tx_lock);
3076 
3077 	/*
3078 	 * Wait for pending transmits to complete i.e Tx queue to drain
3079 	 * if there are pending pkts - wait 1 ms and retry again
3080 	 */
3081 	for (;;) {
3082 
3083 		rv = hv_ldc_tx_get_state(ldcp->id,
3084 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3085 		if (rv) {
3086 			cmn_err(CE_WARN,
3087 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3088 			mutex_exit(&ldcp->tx_lock);
3089 			mutex_exit(&ldcp->lock);
3090 			return (EIO);
3091 		}
3092 
3093 		if (ldcp->tx_head == ldcp->tx_tail ||
3094 		    ldcp->link_state != LDC_CHANNEL_UP) {
3095 			break;
3096 		}
3097 
3098 		if (chk_done) {
3099 			DWARN(ldcp->id,
3100 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3101 			    ldcp->id);
3102 			break;
3103 		}
3104 
3105 		/* wait for one ms and try again */
3106 		delay(drv_usectohz(1000));
3107 		chk_done = B_TRUE;
3108 	}
3109 
3110 	/*
3111 	 * Drain the Tx and Rx queues as we are closing the
3112 	 * channel. We dont care about any pending packets.
3113 	 * We have to also drain the queue prior to clearing
3114 	 * pending interrupts, otherwise the HV will trigger
3115 	 * an interrupt the moment the interrupt state is
3116 	 * cleared.
3117 	 */
3118 	(void) i_ldc_txq_reconf(ldcp);
3119 	(void) i_ldc_rxq_drain(ldcp);
3120 
3121 	/*
3122 	 * Unregister the channel with the nexus
3123 	 */
3124 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3125 
3126 		mutex_exit(&ldcp->tx_lock);
3127 		mutex_exit(&ldcp->lock);
3128 
3129 		/* if any error other than EAGAIN return back */
3130 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3131 			cmn_err(CE_WARN,
3132 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3133 			    ldcp->id, rv);
3134 			return (rv);
3135 		}
3136 
3137 		/*
3138 		 * As there could be pending interrupts we need
3139 		 * to wait and try again
3140 		 */
3141 		drv_usecwait(ldc_close_delay);
3142 		mutex_enter(&ldcp->lock);
3143 		mutex_enter(&ldcp->tx_lock);
3144 		retries++;
3145 	}
3146 
3147 	/*
3148 	 * Unregister queues
3149 	 */
3150 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3151 	if (rv) {
3152 		cmn_err(CE_WARN,
3153 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3154 		    ldcp->id);
3155 		mutex_exit(&ldcp->tx_lock);
3156 		mutex_exit(&ldcp->lock);
3157 		return (EIO);
3158 	}
3159 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3160 	if (rv) {
3161 		cmn_err(CE_WARN,
3162 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3163 		    ldcp->id);
3164 		mutex_exit(&ldcp->tx_lock);
3165 		mutex_exit(&ldcp->lock);
3166 		return (EIO);
3167 	}
3168 
3169 	ldcp->tstate &= ~TS_QCONF_RDY;
3170 
3171 	/* Reset channel state information */
3172 	i_ldc_reset_state(ldcp);
3173 
3174 	/* Mark channel as down and in initialized state */
3175 	ldcp->tx_ackd_head = 0;
3176 	ldcp->tx_head = 0;
3177 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3178 	ldcp->status = LDC_INIT;
3179 
3180 	mutex_exit(&ldcp->tx_lock);
3181 	mutex_exit(&ldcp->lock);
3182 
3183 	/* Decrement number of open channels */
3184 	mutex_enter(&ldcssp->lock);
3185 	ldcssp->channels_open--;
3186 	mutex_exit(&ldcssp->lock);
3187 
3188 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3189 
3190 	return (0);
3191 }
3192 
3193 /*
3194  * Register channel callback
3195  */
3196 int
3197 ldc_reg_callback(ldc_handle_t handle,
3198     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3199 {
3200 	ldc_chan_t *ldcp;
3201 
3202 	if (handle == NULL) {
3203 		DWARN(DBG_ALL_LDCS,
3204 		    "ldc_reg_callback: invalid channel handle\n");
3205 		return (EINVAL);
3206 	}
3207 	if (((uint64_t)cb) < KERNELBASE) {
3208 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3209 		return (EINVAL);
3210 	}
3211 	ldcp = (ldc_chan_t *)handle;
3212 
3213 	mutex_enter(&ldcp->lock);
3214 
3215 	if (ldcp->cb) {
3216 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3217 		    ldcp->id);
3218 		mutex_exit(&ldcp->lock);
3219 		return (EIO);
3220 	}
3221 	if (ldcp->cb_inprogress) {
3222 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3223 		    ldcp->id);
3224 		mutex_exit(&ldcp->lock);
3225 		return (EWOULDBLOCK);
3226 	}
3227 
3228 	ldcp->cb = cb;
3229 	ldcp->cb_arg = arg;
3230 	ldcp->cb_enabled = B_TRUE;
3231 
3232 	D1(ldcp->id,
3233 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3234 	    ldcp->id);
3235 
3236 	mutex_exit(&ldcp->lock);
3237 
3238 	return (0);
3239 }
3240 
3241 /*
3242  * Unregister channel callback
3243  */
3244 int
3245 ldc_unreg_callback(ldc_handle_t handle)
3246 {
3247 	ldc_chan_t *ldcp;
3248 
3249 	if (handle == NULL) {
3250 		DWARN(DBG_ALL_LDCS,
3251 		    "ldc_unreg_callback: invalid channel handle\n");
3252 		return (EINVAL);
3253 	}
3254 	ldcp = (ldc_chan_t *)handle;
3255 
3256 	mutex_enter(&ldcp->lock);
3257 
3258 	if (ldcp->cb == NULL) {
3259 		DWARN(ldcp->id,
3260 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3261 		    ldcp->id);
3262 		mutex_exit(&ldcp->lock);
3263 		return (EIO);
3264 	}
3265 	if (ldcp->cb_inprogress) {
3266 		DWARN(ldcp->id,
3267 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3268 		    ldcp->id);
3269 		mutex_exit(&ldcp->lock);
3270 		return (EWOULDBLOCK);
3271 	}
3272 
3273 	ldcp->cb = NULL;
3274 	ldcp->cb_arg = NULL;
3275 	ldcp->cb_enabled = B_FALSE;
3276 
3277 	D1(ldcp->id,
3278 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3279 	    ldcp->id);
3280 
3281 	mutex_exit(&ldcp->lock);
3282 
3283 	return (0);
3284 }
3285 
3286 
3287 /*
3288  * Bring a channel up by initiating a handshake with the peer
3289  * This call is asynchronous. It will complete at a later point
3290  * in time when the peer responds back with an RTR.
3291  */
3292 int
3293 ldc_up(ldc_handle_t handle)
3294 {
3295 	int 		rv;
3296 	ldc_chan_t 	*ldcp;
3297 	ldc_msg_t 	*ldcmsg;
3298 	uint64_t 	tx_tail, tstate, link_state;
3299 
3300 	if (handle == NULL) {
3301 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3302 		return (EINVAL);
3303 	}
3304 	ldcp = (ldc_chan_t *)handle;
3305 
3306 	mutex_enter(&ldcp->lock);
3307 
3308 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3309 
3310 	/* clear the reset state */
3311 	tstate = ldcp->tstate;
3312 	ldcp->tstate &= ~TS_IN_RESET;
3313 
3314 	if (ldcp->tstate == TS_UP) {
3315 		DWARN(ldcp->id,
3316 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3317 		    ldcp->id);
3318 
3319 		/* mark channel as up */
3320 		ldcp->status = LDC_UP;
3321 
3322 		/*
3323 		 * if channel was in reset state and there was
3324 		 * pending data clear interrupt state. this will
3325 		 * trigger an interrupt, causing the RX handler to
3326 		 * to invoke the client's callback
3327 		 */
3328 		if ((tstate & TS_IN_RESET) &&
3329 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3330 			D1(ldcp->id,
3331 			    "ldc_up: (0x%llx) channel has pending data, "
3332 			    "clearing interrupt\n", ldcp->id);
3333 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3334 		}
3335 
3336 		mutex_exit(&ldcp->lock);
3337 		return (0);
3338 	}
3339 
3340 	/* if the channel is in RAW mode - mark it as UP, if READY */
3341 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3342 		ldcp->tstate = TS_UP;
3343 		mutex_exit(&ldcp->lock);
3344 		return (0);
3345 	}
3346 
3347 	/* Don't start another handshake if there is one in progress */
3348 	if (ldcp->hstate) {
3349 		D1(ldcp->id,
3350 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3351 		    ldcp->id);
3352 		mutex_exit(&ldcp->lock);
3353 		return (0);
3354 	}
3355 
3356 	mutex_enter(&ldcp->tx_lock);
3357 
3358 	/* save current link state */
3359 	link_state = ldcp->link_state;
3360 
3361 	/* get the current tail for the LDC msg */
3362 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3363 	if (rv) {
3364 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3365 		    ldcp->id);
3366 		mutex_exit(&ldcp->tx_lock);
3367 		mutex_exit(&ldcp->lock);
3368 		return (ECONNREFUSED);
3369 	}
3370 
3371 	/*
3372 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3373 	 * from a previous state of DOWN, then mark the channel as
3374 	 * being ready for handshake.
3375 	 */
3376 	if ((link_state == LDC_CHANNEL_DOWN) &&
3377 	    (link_state != ldcp->link_state)) {
3378 
3379 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3380 		    (ldcp->link_state == LDC_CHANNEL_UP));
3381 
3382 		if (ldcp->mode == LDC_MODE_RAW) {
3383 			ldcp->status = LDC_UP;
3384 			ldcp->tstate = TS_UP;
3385 			mutex_exit(&ldcp->tx_lock);
3386 			mutex_exit(&ldcp->lock);
3387 			return (0);
3388 		} else {
3389 			ldcp->status = LDC_READY;
3390 			ldcp->tstate |= TS_LINK_READY;
3391 		}
3392 
3393 	}
3394 
3395 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3396 	ZERO_PKT(ldcmsg);
3397 
3398 	ldcmsg->type = LDC_CTRL;
3399 	ldcmsg->stype = LDC_INFO;
3400 	ldcmsg->ctrl = LDC_VER;
3401 	ldcp->next_vidx = 0;
3402 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3403 
3404 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3405 
3406 	/* initiate the send by calling into HV and set the new tail */
3407 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3408 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3409 
3410 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3411 	if (rv) {
3412 		DWARN(ldcp->id,
3413 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3414 		    ldcp->id, rv);
3415 		mutex_exit(&ldcp->tx_lock);
3416 		mutex_exit(&ldcp->lock);
3417 		return (rv);
3418 	}
3419 
3420 	ldcp->hstate |= TS_SENT_VER;
3421 	ldcp->tx_tail = tx_tail;
3422 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3423 
3424 	mutex_exit(&ldcp->tx_lock);
3425 	mutex_exit(&ldcp->lock);
3426 
3427 	return (rv);
3428 }
3429 
3430 
3431 /*
3432  * Bring a channel down by resetting its state and queues
3433  */
3434 int
3435 ldc_down(ldc_handle_t handle)
3436 {
3437 	ldc_chan_t 	*ldcp;
3438 
3439 	if (handle == NULL) {
3440 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3441 		return (EINVAL);
3442 	}
3443 	ldcp = (ldc_chan_t *)handle;
3444 	mutex_enter(&ldcp->lock);
3445 	mutex_enter(&ldcp->tx_lock);
3446 	i_ldc_reset(ldcp, B_TRUE);
3447 	mutex_exit(&ldcp->tx_lock);
3448 	mutex_exit(&ldcp->lock);
3449 
3450 	return (0);
3451 }
3452 
3453 /*
3454  * Get the current channel status
3455  */
3456 int
3457 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3458 {
3459 	ldc_chan_t *ldcp;
3460 
3461 	if (handle == NULL || status == NULL) {
3462 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3463 		return (EINVAL);
3464 	}
3465 	ldcp = (ldc_chan_t *)handle;
3466 
3467 	*status = ((ldc_chan_t *)handle)->status;
3468 
3469 	D1(ldcp->id,
3470 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3471 	return (0);
3472 }
3473 
3474 
3475 /*
3476  * Set the channel's callback mode - enable/disable callbacks
3477  */
3478 int
3479 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3480 {
3481 	ldc_chan_t 	*ldcp;
3482 
3483 	if (handle == NULL) {
3484 		DWARN(DBG_ALL_LDCS,
3485 		    "ldc_set_intr_mode: invalid channel handle\n");
3486 		return (EINVAL);
3487 	}
3488 	ldcp = (ldc_chan_t *)handle;
3489 
3490 	/*
3491 	 * Record no callbacks should be invoked
3492 	 */
3493 	mutex_enter(&ldcp->lock);
3494 
3495 	switch (cmode) {
3496 	case LDC_CB_DISABLE:
3497 		if (!ldcp->cb_enabled) {
3498 			DWARN(ldcp->id,
3499 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3500 			    ldcp->id);
3501 			break;
3502 		}
3503 		ldcp->cb_enabled = B_FALSE;
3504 
3505 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3506 		    ldcp->id);
3507 		break;
3508 
3509 	case LDC_CB_ENABLE:
3510 		if (ldcp->cb_enabled) {
3511 			DWARN(ldcp->id,
3512 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3513 			    ldcp->id);
3514 			break;
3515 		}
3516 		ldcp->cb_enabled = B_TRUE;
3517 
3518 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3519 		    ldcp->id);
3520 		break;
3521 	}
3522 
3523 	mutex_exit(&ldcp->lock);
3524 
3525 	return (0);
3526 }
3527 
3528 /*
3529  * Check to see if there are packets on the incoming queue
3530  * Will return hasdata = B_FALSE if there are no packets
3531  */
3532 int
3533 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3534 {
3535 	int 		rv;
3536 	uint64_t 	rx_head, rx_tail;
3537 	ldc_chan_t 	*ldcp;
3538 
3539 	if (handle == NULL) {
3540 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3541 		return (EINVAL);
3542 	}
3543 	ldcp = (ldc_chan_t *)handle;
3544 
3545 	*hasdata = B_FALSE;
3546 
3547 	mutex_enter(&ldcp->lock);
3548 
3549 	if (ldcp->tstate != TS_UP) {
3550 		D1(ldcp->id,
3551 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3552 		mutex_exit(&ldcp->lock);
3553 		return (ECONNRESET);
3554 	}
3555 
3556 	/* Read packet(s) from the queue */
3557 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3558 	    &ldcp->link_state);
3559 	if (rv != 0) {
3560 		cmn_err(CE_WARN,
3561 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3562 		mutex_exit(&ldcp->lock);
3563 		return (EIO);
3564 	}
3565 
3566 	/* reset the channel state if the channel went down */
3567 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3568 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3569 		mutex_enter(&ldcp->tx_lock);
3570 		i_ldc_reset(ldcp, B_FALSE);
3571 		mutex_exit(&ldcp->tx_lock);
3572 		mutex_exit(&ldcp->lock);
3573 		return (ECONNRESET);
3574 	}
3575 
3576 	switch (ldcp->mode) {
3577 	case LDC_MODE_RAW:
3578 		/*
3579 		 * In raw mode, there are no ctrl packets, so checking
3580 		 * if the queue is non-empty is sufficient.
3581 		 */
3582 		*hasdata = (rx_head != rx_tail);
3583 		break;
3584 
3585 	case LDC_MODE_UNRELIABLE:
3586 		/*
3587 		 * In unreliable mode, if the queue is non-empty, we need
3588 		 * to check if it actually contains unread data packets.
3589 		 * The queue may just contain ctrl packets.
3590 		 */
3591 		if (rx_head != rx_tail) {
3592 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3593 			/*
3594 			 * If no data packets were found on the queue,
3595 			 * all packets must have been control packets
3596 			 * which will now have been processed, leaving
3597 			 * the queue empty. If the interrupt state
3598 			 * is pending, we need to clear the interrupt
3599 			 * here.
3600 			 */
3601 			if (*hasdata == B_FALSE &&
3602 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3603 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3604 			}
3605 		}
3606 		break;
3607 
3608 	case LDC_MODE_RELIABLE:
3609 		/*
3610 		 * In reliable mode, first check for 'stream_remains' > 0.
3611 		 * Otherwise, if the data queue head and tail pointers
3612 		 * differ, there must be data to read.
3613 		 */
3614 		if (ldcp->stream_remains > 0)
3615 			*hasdata = B_TRUE;
3616 		else
3617 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3618 		break;
3619 
3620 	default:
3621 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3622 		    "(0x%x)", ldcp->id, ldcp->mode);
3623 		mutex_exit(&ldcp->lock);
3624 		return (EIO);
3625 	}
3626 
3627 	mutex_exit(&ldcp->lock);
3628 
3629 	return (0);
3630 }
3631 
3632 
3633 /*
3634  * Read 'size' amount of bytes or less. If incoming buffer
3635  * is more than 'size', ENOBUFS is returned.
3636  *
3637  * On return, size contains the number of bytes read.
3638  */
3639 int
3640 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3641 {
3642 	ldc_chan_t 	*ldcp;
3643 	uint64_t 	rx_head = 0, rx_tail = 0;
3644 	int		rv = 0, exit_val;
3645 
3646 	if (handle == NULL) {
3647 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3648 		return (EINVAL);
3649 	}
3650 
3651 	ldcp = (ldc_chan_t *)handle;
3652 
3653 	/* channel lock */
3654 	mutex_enter(&ldcp->lock);
3655 
3656 	if (ldcp->tstate != TS_UP) {
3657 		DWARN(ldcp->id,
3658 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3659 		    ldcp->id);
3660 		exit_val = ECONNRESET;
3661 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3662 		TRACE_RXDQ_LENGTH(ldcp);
3663 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3664 
3665 		/*
3666 		 * For reliable mode channels, the interrupt
3667 		 * state is only set to pending during
3668 		 * interrupt handling when the secondary data
3669 		 * queue became full, leaving unprocessed
3670 		 * packets on the Rx queue. If the interrupt
3671 		 * state is pending and space is now available
3672 		 * on the data queue, clear the interrupt.
3673 		 */
3674 		if (ldcp->rx_intr_state == LDC_INTR_PEND &&
3675 		    Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
3676 		    ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >=
3677 		    LDC_PACKET_SIZE) {
3678 			/* data queue is not full */
3679 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3680 		}
3681 
3682 		mutex_exit(&ldcp->lock);
3683 		return (exit_val);
3684 	} else {
3685 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3686 	}
3687 
3688 	/*
3689 	 * if queue has been drained - clear interrupt
3690 	 */
3691 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3692 	    &ldcp->link_state);
3693 	if (rv != 0) {
3694 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3695 		    ldcp->id);
3696 		mutex_enter(&ldcp->tx_lock);
3697 		i_ldc_reset(ldcp, B_TRUE);
3698 		mutex_exit(&ldcp->tx_lock);
3699 		mutex_exit(&ldcp->lock);
3700 		return (ECONNRESET);
3701 	}
3702 
3703 	if (exit_val == 0) {
3704 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3705 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3706 			mutex_enter(&ldcp->tx_lock);
3707 			i_ldc_reset(ldcp, B_FALSE);
3708 			exit_val = ECONNRESET;
3709 			mutex_exit(&ldcp->tx_lock);
3710 		}
3711 		if ((rv == 0) &&
3712 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3713 		    (rx_head == rx_tail)) {
3714 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3715 		}
3716 	}
3717 
3718 	mutex_exit(&ldcp->lock);
3719 	return (exit_val);
3720 }
3721 
3722 /*
3723  * Basic raw mondo read -
3724  * no interpretation of mondo contents at all.
3725  *
3726  * Enter and exit with ldcp->lock held by caller
3727  */
3728 static int
3729 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3730 {
3731 	uint64_t 	q_size_mask;
3732 	ldc_msg_t 	*msgp;
3733 	uint8_t		*msgbufp;
3734 	int		rv = 0, space;
3735 	uint64_t 	rx_head, rx_tail;
3736 
3737 	space = *sizep;
3738 
3739 	if (space < LDC_PAYLOAD_SIZE_RAW)
3740 		return (ENOBUFS);
3741 
3742 	ASSERT(mutex_owned(&ldcp->lock));
3743 
3744 	/* compute mask for increment */
3745 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3746 
3747 	/*
3748 	 * Read packet(s) from the queue
3749 	 */
3750 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3751 	    &ldcp->link_state);
3752 	if (rv != 0) {
3753 		cmn_err(CE_WARN,
3754 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3755 		    ldcp->id);
3756 		return (EIO);
3757 	}
3758 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3759 	    " rxt=0x%llx, st=0x%llx\n",
3760 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3761 
3762 	/* reset the channel state if the channel went down */
3763 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3764 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3765 		mutex_enter(&ldcp->tx_lock);
3766 		i_ldc_reset(ldcp, B_FALSE);
3767 		mutex_exit(&ldcp->tx_lock);
3768 		return (ECONNRESET);
3769 	}
3770 
3771 	/*
3772 	 * Check for empty queue
3773 	 */
3774 	if (rx_head == rx_tail) {
3775 		*sizep = 0;
3776 		return (0);
3777 	}
3778 
3779 	/* get the message */
3780 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3781 
3782 	/* if channel is in RAW mode, copy data and return */
3783 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3784 
3785 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3786 
3787 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3788 
3789 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3790 
3791 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3792 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3793 
3794 	return (rv);
3795 }
3796 
3797 /*
3798  * Process LDC mondos to build larger packets
3799  * with either un-reliable or reliable delivery.
3800  *
3801  * Enter and exit with ldcp->lock held by caller
3802  */
3803 static int
3804 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3805 {
3806 	int		rv = 0;
3807 	uint64_t 	rx_head = 0, rx_tail = 0;
3808 	uint64_t 	curr_head = 0;
3809 	ldc_msg_t 	*msg;
3810 	caddr_t 	target;
3811 	size_t 		len = 0, bytes_read = 0;
3812 	int 		retries = 0;
3813 	uint64_t 	q_va, q_size_mask;
3814 	uint64_t	first_fragment = 0;
3815 
3816 	target = target_bufp;
3817 
3818 	ASSERT(mutex_owned(&ldcp->lock));
3819 
3820 	/* check if the buffer and size are valid */
3821 	if (target_bufp == NULL || *sizep == 0) {
3822 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3823 		    ldcp->id);
3824 		return (EINVAL);
3825 	}
3826 
3827 	/* Set q_va and compute increment mask for the appropriate queue */
3828 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3829 		q_va	    = ldcp->rx_dq_va;
3830 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3831 	} else {
3832 		q_va	    = ldcp->rx_q_va;
3833 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3834 	}
3835 
3836 	/*
3837 	 * Read packet(s) from the queue
3838 	 */
3839 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3840 	    &ldcp->link_state);
3841 	if (rv != 0) {
3842 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3843 		    ldcp->id);
3844 		mutex_enter(&ldcp->tx_lock);
3845 		i_ldc_reset(ldcp, B_TRUE);
3846 		mutex_exit(&ldcp->tx_lock);
3847 		return (ECONNRESET);
3848 	}
3849 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3850 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3851 
3852 	/* reset the channel state if the channel went down */
3853 	if (ldcp->link_state != LDC_CHANNEL_UP)
3854 		goto channel_is_reset;
3855 
3856 	for (;;) {
3857 
3858 		if (curr_head == rx_tail) {
3859 			/*
3860 			 * If a data queue is being used, check the Rx HV
3861 			 * queue. This will copy over any new data packets
3862 			 * that have arrived.
3863 			 */
3864 			if (ldcp->mode == LDC_MODE_RELIABLE)
3865 				(void) i_ldc_chkq(ldcp);
3866 
3867 			rv = ldcp->readq_get_state(ldcp,
3868 			    &rx_head, &rx_tail, &ldcp->link_state);
3869 			if (rv != 0) {
3870 				cmn_err(CE_WARN,
3871 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3872 				    ldcp->id);
3873 				mutex_enter(&ldcp->tx_lock);
3874 				i_ldc_reset(ldcp, B_TRUE);
3875 				mutex_exit(&ldcp->tx_lock);
3876 				return (ECONNRESET);
3877 			}
3878 
3879 			if (ldcp->link_state != LDC_CHANNEL_UP)
3880 				goto channel_is_reset;
3881 
3882 			if (curr_head == rx_tail) {
3883 
3884 				/* If in the middle of a fragmented xfer */
3885 				if (first_fragment != 0) {
3886 
3887 					/* wait for ldc_delay usecs */
3888 					drv_usecwait(ldc_delay);
3889 
3890 					if (++retries < ldc_max_retries)
3891 						continue;
3892 
3893 					*sizep = 0;
3894 					if (ldcp->mode != LDC_MODE_RELIABLE)
3895 						ldcp->last_msg_rcd =
3896 						    first_fragment - 1;
3897 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3898 					    "(0x%llx) read timeout", ldcp->id);
3899 					return (EAGAIN);
3900 				}
3901 				*sizep = 0;
3902 				break;
3903 			}
3904 		}
3905 		retries = 0;
3906 
3907 		D2(ldcp->id,
3908 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3909 		    ldcp->id, curr_head, rx_head, rx_tail);
3910 
3911 		/* get the message */
3912 		msg = (ldc_msg_t *)(q_va + curr_head);
3913 
3914 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3915 		    ldcp->rx_q_va + curr_head);
3916 
3917 		/* Check the message ID for the message received */
3918 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3919 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3920 
3921 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3922 				    "error, q_ptrs=0x%lx,0x%lx",
3923 				    ldcp->id, rx_head, rx_tail);
3924 
3925 				/* throw away data */
3926 				bytes_read = 0;
3927 
3928 				/* Reset last_msg_rcd to start of message */
3929 				if (first_fragment != 0) {
3930 					ldcp->last_msg_rcd = first_fragment - 1;
3931 					first_fragment = 0;
3932 				}
3933 				/*
3934 				 * Send a NACK -- invalid seqid
3935 				 * get the current tail for the response
3936 				 */
3937 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3938 				    (msg->ctrl & LDC_CTRL_MASK));
3939 				if (rv) {
3940 					cmn_err(CE_NOTE,
3941 					    "ldc_read: (0x%lx) err sending "
3942 					    "NACK msg\n", ldcp->id);
3943 
3944 					/* if cannot send NACK - reset chan */
3945 					mutex_enter(&ldcp->tx_lock);
3946 					i_ldc_reset(ldcp, B_FALSE);
3947 					mutex_exit(&ldcp->tx_lock);
3948 					rv = ECONNRESET;
3949 					break;
3950 				}
3951 
3952 				/* purge receive queue */
3953 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3954 
3955 				break;
3956 			}
3957 
3958 			/*
3959 			 * Process any messages of type CTRL messages
3960 			 * Future implementations should try to pass these
3961 			 * to LDC link by resetting the intr state.
3962 			 *
3963 			 * NOTE: not done as a switch() as type can be
3964 			 * both ctrl+data
3965 			 */
3966 			if (msg->type & LDC_CTRL) {
3967 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3968 					if (rv == EAGAIN)
3969 						continue;
3970 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
3971 					*sizep = 0;
3972 					bytes_read = 0;
3973 					break;
3974 				}
3975 			}
3976 
3977 			/* process data ACKs */
3978 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3979 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3980 					*sizep = 0;
3981 					bytes_read = 0;
3982 					break;
3983 				}
3984 			}
3985 
3986 			/* process data NACKs */
3987 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
3988 				DWARN(ldcp->id,
3989 				    "ldc_read: (0x%llx) received DATA/NACK",
3990 				    ldcp->id);
3991 				mutex_enter(&ldcp->tx_lock);
3992 				i_ldc_reset(ldcp, B_TRUE);
3993 				mutex_exit(&ldcp->tx_lock);
3994 				return (ECONNRESET);
3995 			}
3996 		}
3997 
3998 		/* process data messages */
3999 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
4000 
4001 			uint8_t *msgbuf = (uint8_t *)(
4002 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
4003 			    msg->rdata : msg->udata);
4004 
4005 			D2(ldcp->id,
4006 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
4007 
4008 			/* get the packet length */
4009 			len = (msg->env & LDC_LEN_MASK);
4010 
4011 				/*
4012 				 * FUTURE OPTIMIZATION:
4013 				 * dont need to set q head for every
4014 				 * packet we read just need to do this when
4015 				 * we are done or need to wait for more
4016 				 * mondos to make a full packet - this is
4017 				 * currently expensive.
4018 				 */
4019 
4020 			if (first_fragment == 0) {
4021 
4022 				/*
4023 				 * first packets should always have the start
4024 				 * bit set (even for a single packet). If not
4025 				 * throw away the packet
4026 				 */
4027 				if (!(msg->env & LDC_FRAG_START)) {
4028 
4029 					DWARN(DBG_ALL_LDCS,
4030 					    "ldc_read: (0x%llx) not start - "
4031 					    "frag=%x\n", ldcp->id,
4032 					    (msg->env) & LDC_FRAG_MASK);
4033 
4034 					/* toss pkt, inc head, cont reading */
4035 					bytes_read = 0;
4036 					target = target_bufp;
4037 					curr_head =
4038 					    (curr_head + LDC_PACKET_SIZE)
4039 					    & q_size_mask;
4040 					if (rv = ldcp->readq_set_head(ldcp,
4041 					    curr_head))
4042 						break;
4043 
4044 					continue;
4045 				}
4046 
4047 				first_fragment = msg->seqid;
4048 			} else {
4049 				/* check to see if this is a pkt w/ START bit */
4050 				if (msg->env & LDC_FRAG_START) {
4051 					DWARN(DBG_ALL_LDCS,
4052 					    "ldc_read:(0x%llx) unexpected pkt"
4053 					    " env=0x%x discarding %d bytes,"
4054 					    " lastmsg=%d, currentmsg=%d\n",
4055 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4056 					    bytes_read, ldcp->last_msg_rcd,
4057 					    msg->seqid);
4058 
4059 					/* throw data we have read so far */
4060 					bytes_read = 0;
4061 					target = target_bufp;
4062 					first_fragment = msg->seqid;
4063 
4064 					if (rv = ldcp->readq_set_head(ldcp,
4065 					    curr_head))
4066 						break;
4067 				}
4068 			}
4069 
4070 			/* copy (next) pkt into buffer */
4071 			if (len <= (*sizep - bytes_read)) {
4072 				bcopy(msgbuf, target, len);
4073 				target += len;
4074 				bytes_read += len;
4075 			} else {
4076 				/*
4077 				 * there is not enough space in the buffer to
4078 				 * read this pkt. throw message away & continue
4079 				 * reading data from queue
4080 				 */
4081 				DWARN(DBG_ALL_LDCS,
4082 				    "ldc_read: (0x%llx) buffer too small, "
4083 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4084 				    curr_head, *sizep, bytes_read+len);
4085 
4086 				first_fragment = 0;
4087 				target = target_bufp;
4088 				bytes_read = 0;
4089 
4090 				/* throw away everything received so far */
4091 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4092 					break;
4093 
4094 				/* continue reading remaining pkts */
4095 				continue;
4096 			}
4097 		}
4098 
4099 		/* set the message id */
4100 		if (ldcp->mode != LDC_MODE_RELIABLE)
4101 			ldcp->last_msg_rcd = msg->seqid;
4102 
4103 		/* move the head one position */
4104 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4105 
4106 		if (msg->env & LDC_FRAG_STOP) {
4107 
4108 			/*
4109 			 * All pkts that are part of this fragmented transfer
4110 			 * have been read or this was a single pkt read
4111 			 * or there was an error
4112 			 */
4113 
4114 			/* set the queue head */
4115 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4116 				bytes_read = 0;
4117 
4118 			*sizep = bytes_read;
4119 
4120 			break;
4121 		}
4122 
4123 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4124 		if ((msg->type & LDC_CTRL) ||
4125 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4126 
4127 			/* set the queue head */
4128 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4129 				bytes_read = 0;
4130 				break;
4131 			}
4132 
4133 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4134 			    ldcp->id, curr_head);
4135 		}
4136 
4137 	} /* for (;;) */
4138 
4139 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4140 
4141 	return (rv);
4142 
4143 channel_is_reset:
4144 	mutex_enter(&ldcp->tx_lock);
4145 	i_ldc_reset(ldcp, B_FALSE);
4146 	mutex_exit(&ldcp->tx_lock);
4147 	return (ECONNRESET);
4148 }
4149 
4150 /*
4151  * Fetch and buffer incoming packets so we can hand them back as
4152  * a basic byte stream.
4153  *
4154  * Enter and exit with ldcp->lock held by caller
4155  */
4156 static int
4157 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4158 {
4159 	int	rv;
4160 	size_t	size;
4161 
4162 	ASSERT(mutex_owned(&ldcp->lock));
4163 
4164 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4165 	    ldcp->id, *sizep);
4166 
4167 	if (ldcp->stream_remains == 0) {
4168 		size = ldcp->mtu;
4169 		rv = i_ldc_read_packet(ldcp,
4170 		    (caddr_t)ldcp->stream_bufferp, &size);
4171 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4172 		    ldcp->id, size);
4173 
4174 		if (rv != 0)
4175 			return (rv);
4176 
4177 		ldcp->stream_remains = size;
4178 		ldcp->stream_offset = 0;
4179 	}
4180 
4181 	size = MIN(ldcp->stream_remains, *sizep);
4182 
4183 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4184 	ldcp->stream_offset += size;
4185 	ldcp->stream_remains -= size;
4186 
4187 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4188 	    ldcp->id, size);
4189 
4190 	*sizep = size;
4191 	return (0);
4192 }
4193 
4194 /*
4195  * Write specified amount of bytes to the channel
4196  * in multiple pkts of pkt_payload size. Each
4197  * packet is tagged with an unique packet ID in
4198  * the case of a reliable link.
4199  *
4200  * On return, size contains the number of bytes written.
4201  */
4202 int
4203 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4204 {
4205 	ldc_chan_t	*ldcp;
4206 	int		rv = 0;
4207 
4208 	if (handle == NULL) {
4209 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4210 		return (EINVAL);
4211 	}
4212 	ldcp = (ldc_chan_t *)handle;
4213 
4214 	/* check if writes can occur */
4215 	if (!mutex_tryenter(&ldcp->tx_lock)) {
4216 		/*
4217 		 * Could not get the lock - channel could
4218 		 * be in the process of being unconfigured
4219 		 * or reader has encountered an error
4220 		 */
4221 		return (EAGAIN);
4222 	}
4223 
4224 	/* check if non-zero data to write */
4225 	if (buf == NULL || sizep == NULL) {
4226 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4227 		    ldcp->id);
4228 		mutex_exit(&ldcp->tx_lock);
4229 		return (EINVAL);
4230 	}
4231 
4232 	if (*sizep == 0) {
4233 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4234 		    ldcp->id);
4235 		mutex_exit(&ldcp->tx_lock);
4236 		return (0);
4237 	}
4238 
4239 	/* Check if channel is UP for data exchange */
4240 	if (ldcp->tstate != TS_UP) {
4241 		DWARN(ldcp->id,
4242 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4243 		    ldcp->id);
4244 		*sizep = 0;
4245 		rv = ECONNRESET;
4246 	} else {
4247 		rv = ldcp->write_p(ldcp, buf, sizep);
4248 	}
4249 
4250 	mutex_exit(&ldcp->tx_lock);
4251 
4252 	return (rv);
4253 }
4254 
4255 /*
4256  * Write a raw packet to the channel
4257  * On return, size contains the number of bytes written.
4258  */
4259 static int
4260 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4261 {
4262 	ldc_msg_t 	*ldcmsg;
4263 	uint64_t 	tx_head, tx_tail, new_tail;
4264 	int		rv = 0;
4265 	size_t		size;
4266 
4267 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4268 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4269 
4270 	size = *sizep;
4271 
4272 	/*
4273 	 * Check to see if the packet size is less than or
4274 	 * equal to packet size support in raw mode
4275 	 */
4276 	if (size > ldcp->pkt_payload) {
4277 		DWARN(ldcp->id,
4278 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4279 		    ldcp->id, *sizep);
4280 		*sizep = 0;
4281 		return (EMSGSIZE);
4282 	}
4283 
4284 	/* get the qptrs for the tx queue */
4285 	rv = hv_ldc_tx_get_state(ldcp->id,
4286 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4287 	if (rv != 0) {
4288 		cmn_err(CE_WARN,
4289 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4290 		*sizep = 0;
4291 		return (EIO);
4292 	}
4293 
4294 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4295 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4296 		DWARN(ldcp->id,
4297 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4298 
4299 		*sizep = 0;
4300 		if (mutex_tryenter(&ldcp->lock)) {
4301 			i_ldc_reset(ldcp, B_FALSE);
4302 			mutex_exit(&ldcp->lock);
4303 		} else {
4304 			/*
4305 			 * Release Tx lock, and then reacquire channel
4306 			 * and Tx lock in correct order
4307 			 */
4308 			mutex_exit(&ldcp->tx_lock);
4309 			mutex_enter(&ldcp->lock);
4310 			mutex_enter(&ldcp->tx_lock);
4311 			i_ldc_reset(ldcp, B_FALSE);
4312 			mutex_exit(&ldcp->lock);
4313 		}
4314 		return (ECONNRESET);
4315 	}
4316 
4317 	tx_tail = ldcp->tx_tail;
4318 	tx_head = ldcp->tx_head;
4319 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4320 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4321 
4322 	if (new_tail == tx_head) {
4323 		DWARN(DBG_ALL_LDCS,
4324 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4325 		*sizep = 0;
4326 		return (EWOULDBLOCK);
4327 	}
4328 
4329 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4330 	    ldcp->id, size);
4331 
4332 	/* Send the data now */
4333 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4334 
4335 	/* copy the data into pkt */
4336 	bcopy((uint8_t *)buf, ldcmsg, size);
4337 
4338 	/* increment tail */
4339 	tx_tail = new_tail;
4340 
4341 	/*
4342 	 * All packets have been copied into the TX queue
4343 	 * update the tail ptr in the HV
4344 	 */
4345 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4346 	if (rv) {
4347 		if (rv == EWOULDBLOCK) {
4348 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4349 			    ldcp->id);
4350 			*sizep = 0;
4351 			return (EWOULDBLOCK);
4352 		}
4353 
4354 		*sizep = 0;
4355 		if (mutex_tryenter(&ldcp->lock)) {
4356 			i_ldc_reset(ldcp, B_FALSE);
4357 			mutex_exit(&ldcp->lock);
4358 		} else {
4359 			/*
4360 			 * Release Tx lock, and then reacquire channel
4361 			 * and Tx lock in correct order
4362 			 */
4363 			mutex_exit(&ldcp->tx_lock);
4364 			mutex_enter(&ldcp->lock);
4365 			mutex_enter(&ldcp->tx_lock);
4366 			i_ldc_reset(ldcp, B_FALSE);
4367 			mutex_exit(&ldcp->lock);
4368 		}
4369 		return (ECONNRESET);
4370 	}
4371 
4372 	ldcp->tx_tail = tx_tail;
4373 	*sizep = size;
4374 
4375 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4376 
4377 	return (rv);
4378 }
4379 
4380 
4381 /*
4382  * Write specified amount of bytes to the channel
4383  * in multiple pkts of pkt_payload size. Each
4384  * packet is tagged with an unique packet ID in
4385  * the case of a reliable link.
4386  *
4387  * On return, size contains the number of bytes written.
4388  * This function needs to ensure that the write size is < MTU size
4389  */
4390 static int
4391 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4392 {
4393 	ldc_msg_t 	*ldcmsg;
4394 	uint64_t 	tx_head, tx_tail, new_tail, start;
4395 	uint64_t	txq_size_mask, numavail;
4396 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4397 	size_t 		len, bytes_written = 0, remaining;
4398 	int		rv;
4399 	uint32_t	curr_seqid;
4400 
4401 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4402 
4403 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4404 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4405 
4406 	/* compute mask for increment */
4407 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4408 
4409 	/* get the qptrs for the tx queue */
4410 	rv = hv_ldc_tx_get_state(ldcp->id,
4411 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4412 	if (rv != 0) {
4413 		cmn_err(CE_WARN,
4414 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4415 		*size = 0;
4416 		return (EIO);
4417 	}
4418 
4419 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4420 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4421 		DWARN(ldcp->id,
4422 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4423 		*size = 0;
4424 		if (mutex_tryenter(&ldcp->lock)) {
4425 			i_ldc_reset(ldcp, B_FALSE);
4426 			mutex_exit(&ldcp->lock);
4427 		} else {
4428 			/*
4429 			 * Release Tx lock, and then reacquire channel
4430 			 * and Tx lock in correct order
4431 			 */
4432 			mutex_exit(&ldcp->tx_lock);
4433 			mutex_enter(&ldcp->lock);
4434 			mutex_enter(&ldcp->tx_lock);
4435 			i_ldc_reset(ldcp, B_FALSE);
4436 			mutex_exit(&ldcp->lock);
4437 		}
4438 		return (ECONNRESET);
4439 	}
4440 
4441 	tx_tail = ldcp->tx_tail;
4442 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4443 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4444 
4445 	/*
4446 	 * Check to see if the queue is full. The check is done using
4447 	 * the appropriate head based on the link mode.
4448 	 */
4449 	i_ldc_get_tx_head(ldcp, &tx_head);
4450 
4451 	if (new_tail == tx_head) {
4452 		DWARN(DBG_ALL_LDCS,
4453 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4454 		*size = 0;
4455 		return (EWOULDBLOCK);
4456 	}
4457 
4458 	/*
4459 	 * Make sure that the LDC Tx queue has enough space
4460 	 */
4461 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4462 	    + ldcp->tx_q_entries - 1;
4463 	numavail %= ldcp->tx_q_entries;
4464 
4465 	if (*size > (numavail * ldcp->pkt_payload)) {
4466 		DWARN(DBG_ALL_LDCS,
4467 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4468 		return (EWOULDBLOCK);
4469 	}
4470 
4471 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4472 	    ldcp->id, *size);
4473 
4474 	/* Send the data now */
4475 	bytes_written = 0;
4476 	curr_seqid = ldcp->last_msg_snt;
4477 	start = tx_tail;
4478 
4479 	while (*size > bytes_written) {
4480 
4481 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4482 
4483 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4484 		    ldcmsg->rdata : ldcmsg->udata);
4485 
4486 		ldcmsg->type = LDC_DATA;
4487 		ldcmsg->stype = LDC_INFO;
4488 		ldcmsg->ctrl = 0;
4489 
4490 		remaining = *size - bytes_written;
4491 		len = min(ldcp->pkt_payload, remaining);
4492 		ldcmsg->env = (uint8_t)len;
4493 
4494 		curr_seqid++;
4495 		ldcmsg->seqid = curr_seqid;
4496 
4497 		/* copy the data into pkt */
4498 		bcopy(source, msgbuf, len);
4499 
4500 		source += len;
4501 		bytes_written += len;
4502 
4503 		/* increment tail */
4504 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4505 
4506 		ASSERT(tx_tail != tx_head);
4507 	}
4508 
4509 	/* Set the start and stop bits */
4510 	ldcmsg->env |= LDC_FRAG_STOP;
4511 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4512 	ldcmsg->env |= LDC_FRAG_START;
4513 
4514 	/*
4515 	 * All packets have been copied into the TX queue
4516 	 * update the tail ptr in the HV
4517 	 */
4518 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4519 	if (rv == 0) {
4520 		ldcp->tx_tail = tx_tail;
4521 		ldcp->last_msg_snt = curr_seqid;
4522 		*size = bytes_written;
4523 	} else {
4524 		int rv2;
4525 
4526 		if (rv != EWOULDBLOCK) {
4527 			*size = 0;
4528 			if (mutex_tryenter(&ldcp->lock)) {
4529 				i_ldc_reset(ldcp, B_FALSE);
4530 				mutex_exit(&ldcp->lock);
4531 			} else {
4532 				/*
4533 				 * Release Tx lock, and then reacquire channel
4534 				 * and Tx lock in correct order
4535 				 */
4536 				mutex_exit(&ldcp->tx_lock);
4537 				mutex_enter(&ldcp->lock);
4538 				mutex_enter(&ldcp->tx_lock);
4539 				i_ldc_reset(ldcp, B_FALSE);
4540 				mutex_exit(&ldcp->lock);
4541 			}
4542 			return (ECONNRESET);
4543 		}
4544 
4545 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4546 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4547 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4548 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4549 
4550 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4551 		    &tx_head, &tx_tail, &ldcp->link_state);
4552 
4553 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4554 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4555 		    rv2, tx_head, tx_tail, ldcp->link_state);
4556 
4557 		*size = 0;
4558 	}
4559 
4560 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4561 
4562 	return (rv);
4563 }
4564 
4565 /*
4566  * Write specified amount of bytes to the channel
4567  * in multiple pkts of pkt_payload size. Each
4568  * packet is tagged with an unique packet ID in
4569  * the case of a reliable link.
4570  *
4571  * On return, size contains the number of bytes written.
4572  * This function needs to ensure that the write size is < MTU size
4573  */
4574 static int
4575 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4576 {
4577 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4578 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4579 
4580 	/* Truncate packet to max of MTU size */
4581 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4582 	return (i_ldc_write_packet(ldcp, buf, sizep));
4583 }
4584 
4585 
4586 /*
4587  * Interfaces for channel nexus to register/unregister with LDC module
4588  * The nexus will register functions to be used to register individual
4589  * channels with the nexus and enable interrupts for the channels
4590  */
4591 int
4592 ldc_register(ldc_cnex_t *cinfo)
4593 {
4594 	ldc_chan_t	*ldcp;
4595 
4596 	if (cinfo == NULL || cinfo->dip == NULL ||
4597 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4598 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4599 	    cinfo->clr_intr == NULL) {
4600 
4601 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4602 		return (EINVAL);
4603 	}
4604 
4605 	mutex_enter(&ldcssp->lock);
4606 
4607 	/* nexus registration */
4608 	ldcssp->cinfo.dip = cinfo->dip;
4609 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4610 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4611 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4612 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4613 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4614 
4615 	/* register any channels that might have been previously initialized */
4616 	ldcp = ldcssp->chan_list;
4617 	while (ldcp) {
4618 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4619 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4620 			(void) i_ldc_register_channel(ldcp);
4621 
4622 		ldcp = ldcp->next;
4623 	}
4624 
4625 	mutex_exit(&ldcssp->lock);
4626 
4627 	return (0);
4628 }
4629 
4630 int
4631 ldc_unregister(ldc_cnex_t *cinfo)
4632 {
4633 	if (cinfo == NULL || cinfo->dip == NULL) {
4634 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4635 		return (EINVAL);
4636 	}
4637 
4638 	mutex_enter(&ldcssp->lock);
4639 
4640 	if (cinfo->dip != ldcssp->cinfo.dip) {
4641 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4642 		mutex_exit(&ldcssp->lock);
4643 		return (EINVAL);
4644 	}
4645 
4646 	/* nexus unregister */
4647 	ldcssp->cinfo.dip = NULL;
4648 	ldcssp->cinfo.reg_chan = NULL;
4649 	ldcssp->cinfo.unreg_chan = NULL;
4650 	ldcssp->cinfo.add_intr = NULL;
4651 	ldcssp->cinfo.rem_intr = NULL;
4652 	ldcssp->cinfo.clr_intr = NULL;
4653 
4654 	mutex_exit(&ldcssp->lock);
4655 
4656 	return (0);
4657 }
4658