xref: /titanic_44/usr/src/uts/sun4v/io/ldc.c (revision 7c64d3750da7fda7e450b8f9b0b963905ded6379)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/cred.h>
45 #include <sys/promif.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/cyclic.h>
49 #include <sys/machsystm.h>
50 #include <sys/vm.h>
51 #include <sys/cpu.h>
52 #include <sys/intreg.h>
53 #include <sys/machcpuvar.h>
54 #include <sys/mmu.h>
55 #include <sys/pte.h>
56 #include <vm/hat.h>
57 #include <vm/as.h>
58 #include <vm/hat_sfmmu.h>
59 #include <sys/vm_machparam.h>
60 #include <vm/seg_kmem.h>
61 #include <vm/seg_kpm.h>
62 #include <sys/note.h>
63 #include <sys/ivintr.h>
64 #include <sys/hypervisor_api.h>
65 #include <sys/ldc.h>
66 #include <sys/ldc_impl.h>
67 #include <sys/cnex.h>
68 #include <sys/hsvc.h>
69 #include <sys/sdt.h>
70 
71 /* Core internal functions */
72 int i_ldc_h2v_error(int h_error);
73 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
74 
75 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
76 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
77 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
78 static void i_ldc_reset_state(ldc_chan_t *ldcp);
79 
80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
85     uint8_t ctrlmsg);
86 
87 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
92     uint64_t *tail, uint64_t *link_state);
93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
94     uint64_t rx_tail);
95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
96 
97 /* Interrupt handling functions */
98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
101     uint64_t *notify_event);
102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
103 
104 /* Read method functions */
105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
109 	size_t *sizep);
110 
111 /* Write method functions */
112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
117 	size_t *sizep);
118 
119 /* Pkt processing internal functions */
120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
127 
128 /* LDC Version */
129 static ldc_ver_t ldc_versions[] = { {1, 0} };
130 
131 /* number of supported versions */
132 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
133 
134 /* Invalid value for the ldc_chan_t rx_ack_head field */
135 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
136 
137 
138 /* Module State Pointer */
139 ldc_soft_state_t *ldcssp;
140 
141 static struct modldrv md = {
142 	&mod_miscops,			/* This is a misc module */
143 	"sun4v LDC module v%I%",	/* Name of the module */
144 };
145 
146 static struct modlinkage ml = {
147 	MODREV_1,
148 	&md,
149 	NULL
150 };
151 
152 static uint64_t ldc_sup_minor;		/* Supported minor number */
153 static hsvc_info_t ldc_hsvc = {
154 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
155 };
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * The length of the reliable-mode data queue in terms of the LDC
173  * receive queue length. i.e., the number of times larger than the
174  * LDC receive queue that the data queue should be. The HV receive
175  * queue is required to be a power of 2 and this implementation
176  * assumes the data queue will also be a power of 2. By making the
177  * multiplier a power of 2, we ensure the data queue will be a
178  * power of 2. We use a multiplier because the receive queue is
179  * sized to be sane relative to the MTU and the same is needed for
180  * the data queue.
181  */
182 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
183 
184 /*
185  * LDC retry count and delay - when the HV returns EWOULDBLOCK
186  * the operation is retried 'ldc_max_retries' times with a
187  * wait of 'ldc_delay' usecs between each retry.
188  */
189 int ldc_max_retries = LDC_MAX_RETRIES;
190 clock_t ldc_delay = LDC_DELAY;
191 
192 /*
193  * delay between each retry of channel unregistration in
194  * ldc_close(), to wait for pending interrupts to complete.
195  */
196 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
197 
198 #ifdef DEBUG
199 
200 /*
201  * Print debug messages
202  *
203  * set ldcdbg to 0x7 for enabling all msgs
204  * 0x4 - Warnings
205  * 0x2 - All debug messages
206  * 0x1 - Minimal debug messages
207  *
208  * set ldcdbgchan to the channel number you want to debug
209  * setting it to -1 prints debug messages for all channels
210  * NOTE: ldcdbgchan has no effect on error messages
211  */
212 
213 int ldcdbg = 0x0;
214 int64_t ldcdbgchan = DBG_ALL_LDCS;
215 uint64_t ldc_inject_err_flag = 0;
216 
217 void
218 ldcdebug(int64_t id, const char *fmt, ...)
219 {
220 	char buf[512];
221 	va_list ap;
222 
223 	/*
224 	 * Do not return if,
225 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
226 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
227 	 * debug channel = caller specified channel
228 	 */
229 	if ((id != DBG_ALL_LDCS) &&
230 	    (ldcdbgchan != DBG_ALL_LDCS) &&
231 	    (ldcdbgchan != id)) {
232 		return;
233 	}
234 
235 	va_start(ap, fmt);
236 	(void) vsprintf(buf, fmt, ap);
237 	va_end(ap);
238 
239 	cmn_err(CE_CONT, "?%s", buf);
240 }
241 
242 #define	LDC_ERR_RESET	0x1
243 #define	LDC_ERR_PKTLOSS	0x2
244 #define	LDC_ERR_DQFULL	0x4
245 
246 static boolean_t
247 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
248 {
249 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
250 		return (B_FALSE);
251 
252 	if ((ldc_inject_err_flag & error) == 0)
253 		return (B_FALSE);
254 
255 	/* clear the injection state */
256 	ldc_inject_err_flag &= ~error;
257 
258 	return (B_TRUE);
259 }
260 
261 #define	D1		\
262 if (ldcdbg & 0x01)	\
263 	ldcdebug
264 
265 #define	D2		\
266 if (ldcdbg & 0x02)	\
267 	ldcdebug
268 
269 #define	DWARN		\
270 if (ldcdbg & 0x04)	\
271 	ldcdebug
272 
273 #define	DUMP_PAYLOAD(id, addr)						\
274 {									\
275 	char buf[65*3];							\
276 	int i;								\
277 	uint8_t *src = (uint8_t *)addr;					\
278 	for (i = 0; i < 64; i++, src++)					\
279 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
280 	(void) sprintf(&buf[i * 3], "|\n");				\
281 	D2((id), "payload: %s", buf);					\
282 }
283 
284 #define	DUMP_LDC_PKT(c, s, addr)					\
285 {									\
286 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
287 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
288 	if (msg->type == LDC_DATA) {                                    \
289 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
290 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
291 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
292 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
293 	    (msg->env & LDC_LEN_MASK));					\
294 	} else { 							\
295 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
296 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
297 	} 								\
298 }
299 
300 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
301 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
302 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
303 
304 #else
305 
306 #define	DBG_ALL_LDCS -1
307 
308 #define	D1
309 #define	D2
310 #define	DWARN
311 
312 #define	DUMP_PAYLOAD(id, addr)
313 #define	DUMP_LDC_PKT(c, s, addr)
314 
315 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
316 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
317 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
318 
319 #endif
320 
321 /*
322  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
323  * lengths. Just pass the head, tail, and entries values so that the
324  * length can be calculated in a dtrace script when the probe is enabled.
325  */
326 #define	TRACE_RXDQ_LENGTH(ldcp)						\
327 	DTRACE_PROBE4(rxdq__size,					\
328 	uint64_t, ldcp->id,						\
329 	uint64_t, ldcp->rx_dq_head,					\
330 	uint64_t, ldcp->rx_dq_tail,					\
331 	uint64_t, ldcp->rx_dq_entries)
332 
333 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
334 	DTRACE_PROBE4(rxhvq__size,					\
335 	uint64_t, ldcp->id,						\
336 	uint64_t, head,							\
337 	uint64_t, tail,							\
338 	uint64_t, ldcp->rx_q_entries)
339 
340 /* A dtrace SDT probe to ease tracing of data queue copy operations */
341 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
342 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
343 
344 /* The amount of contiguous space at the tail of the queue */
345 #define	Q_CONTIG_SPACE(head, tail, size)				\
346 	((head) <= (tail) ? ((size) - (tail)) :				\
347 	((head) - (tail) - LDC_PACKET_SIZE))
348 
349 #define	ZERO_PKT(p)			\
350 	bzero((p), sizeof (ldc_msg_t));
351 
352 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
353 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
354 
355 int
356 _init(void)
357 {
358 	int status;
359 
360 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
361 	if (status != 0) {
362 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
363 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
364 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
365 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
366 		return (-1);
367 	}
368 
369 	/* allocate soft state structure */
370 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
371 
372 	/* Link the module into the system */
373 	status = mod_install(&ml);
374 	if (status != 0) {
375 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
376 		return (status);
377 	}
378 
379 	/* Initialize the LDC state structure */
380 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
381 
382 	mutex_enter(&ldcssp->lock);
383 
384 	/* Create a cache for memory handles */
385 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
386 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
387 	if (ldcssp->memhdl_cache == NULL) {
388 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
389 		mutex_exit(&ldcssp->lock);
390 		return (-1);
391 	}
392 
393 	/* Create cache for memory segment structures */
394 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
395 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
396 	if (ldcssp->memseg_cache == NULL) {
397 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
398 		mutex_exit(&ldcssp->lock);
399 		return (-1);
400 	}
401 
402 
403 	ldcssp->channel_count = 0;
404 	ldcssp->channels_open = 0;
405 	ldcssp->chan_list = NULL;
406 	ldcssp->dring_list = NULL;
407 
408 	mutex_exit(&ldcssp->lock);
409 
410 	return (0);
411 }
412 
413 int
414 _info(struct modinfo *modinfop)
415 {
416 	/* Report status of the dynamically loadable driver module */
417 	return (mod_info(&ml, modinfop));
418 }
419 
420 int
421 _fini(void)
422 {
423 	int 		rv, status;
424 	ldc_chan_t 	*tmp_ldcp, *ldcp;
425 	ldc_dring_t 	*tmp_dringp, *dringp;
426 	ldc_mem_info_t 	minfo;
427 
428 	/* Unlink the driver module from the system */
429 	status = mod_remove(&ml);
430 	if (status) {
431 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
432 		return (EIO);
433 	}
434 
435 	/* Free descriptor rings */
436 	dringp = ldcssp->dring_list;
437 	while (dringp != NULL) {
438 		tmp_dringp = dringp->next;
439 
440 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
441 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
442 			if (minfo.status == LDC_BOUND) {
443 				(void) ldc_mem_dring_unbind(
444 				    (ldc_dring_handle_t)dringp);
445 			}
446 			if (minfo.status == LDC_MAPPED) {
447 				(void) ldc_mem_dring_unmap(
448 				    (ldc_dring_handle_t)dringp);
449 			}
450 		}
451 
452 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
453 		dringp = tmp_dringp;
454 	}
455 	ldcssp->dring_list = NULL;
456 
457 	/* close and finalize channels */
458 	ldcp = ldcssp->chan_list;
459 	while (ldcp != NULL) {
460 		tmp_ldcp = ldcp->next;
461 
462 		(void) ldc_close((ldc_handle_t)ldcp);
463 		(void) ldc_fini((ldc_handle_t)ldcp);
464 
465 		ldcp = tmp_ldcp;
466 	}
467 	ldcssp->chan_list = NULL;
468 
469 	/* Destroy kmem caches */
470 	kmem_cache_destroy(ldcssp->memhdl_cache);
471 	kmem_cache_destroy(ldcssp->memseg_cache);
472 
473 	/*
474 	 * We have successfully "removed" the driver.
475 	 * Destroying soft states
476 	 */
477 	mutex_destroy(&ldcssp->lock);
478 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
479 
480 	(void) hsvc_unregister(&ldc_hsvc);
481 
482 	return (status);
483 }
484 
485 /* -------------------------------------------------------------------------- */
486 
487 /*
488  * LDC Link Layer Internal Functions
489  */
490 
491 /*
492  * Translate HV Errors to sun4v error codes
493  */
494 int
495 i_ldc_h2v_error(int h_error)
496 {
497 	switch (h_error) {
498 
499 	case	H_EOK:
500 		return (0);
501 
502 	case	H_ENORADDR:
503 		return (EFAULT);
504 
505 	case	H_EBADPGSZ:
506 	case	H_EINVAL:
507 		return (EINVAL);
508 
509 	case	H_EWOULDBLOCK:
510 		return (EWOULDBLOCK);
511 
512 	case	H_ENOACCESS:
513 	case	H_ENOMAP:
514 		return (EACCES);
515 
516 	case	H_EIO:
517 	case	H_ECPUERROR:
518 		return (EIO);
519 
520 	case	H_ENOTSUPPORTED:
521 		return (ENOTSUP);
522 
523 	case 	H_ETOOMANY:
524 		return (ENOSPC);
525 
526 	case	H_ECHANNEL:
527 		return (ECHRNG);
528 	default:
529 		break;
530 	}
531 
532 	return (EIO);
533 }
534 
535 /*
536  * Reconfigure the transmit queue
537  */
538 static int
539 i_ldc_txq_reconf(ldc_chan_t *ldcp)
540 {
541 	int rv;
542 
543 	ASSERT(MUTEX_HELD(&ldcp->lock));
544 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
545 
546 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
547 	if (rv) {
548 		cmn_err(CE_WARN,
549 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
550 		return (EIO);
551 	}
552 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
553 	    &(ldcp->tx_tail), &(ldcp->link_state));
554 	if (rv) {
555 		cmn_err(CE_WARN,
556 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
557 		return (EIO);
558 	}
559 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
560 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
561 	    ldcp->link_state);
562 
563 	return (0);
564 }
565 
566 /*
567  * Reconfigure the receive queue
568  */
569 static int
570 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
571 {
572 	int rv;
573 	uint64_t rx_head, rx_tail;
574 
575 	ASSERT(MUTEX_HELD(&ldcp->lock));
576 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
577 	    &(ldcp->link_state));
578 	if (rv) {
579 		cmn_err(CE_WARN,
580 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
581 		    ldcp->id);
582 		return (EIO);
583 	}
584 
585 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
586 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
587 		    ldcp->rx_q_entries);
588 		if (rv) {
589 			cmn_err(CE_WARN,
590 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
591 			    ldcp->id);
592 			return (EIO);
593 		}
594 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
595 		    ldcp->id);
596 	}
597 
598 	return (0);
599 }
600 
601 
602 /*
603  * Drain the contents of the receive queue
604  */
605 static int
606 i_ldc_rxq_drain(ldc_chan_t *ldcp)
607 {
608 	int rv;
609 	uint64_t rx_head, rx_tail;
610 
611 	ASSERT(MUTEX_HELD(&ldcp->lock));
612 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
613 	    &(ldcp->link_state));
614 	if (rv) {
615 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
616 		    ldcp->id);
617 		return (EIO);
618 	}
619 
620 	/* flush contents by setting the head = tail */
621 	return (i_ldc_set_rx_head(ldcp, rx_tail));
622 }
623 
624 
625 /*
626  * Reset LDC state structure and its contents
627  */
628 static void
629 i_ldc_reset_state(ldc_chan_t *ldcp)
630 {
631 	ASSERT(MUTEX_HELD(&ldcp->lock));
632 	ldcp->last_msg_snt = LDC_INIT_SEQID;
633 	ldcp->last_ack_rcd = 0;
634 	ldcp->last_msg_rcd = 0;
635 	ldcp->tx_ackd_head = ldcp->tx_head;
636 	ldcp->stream_remains = 0;
637 	ldcp->next_vidx = 0;
638 	ldcp->hstate = 0;
639 	ldcp->tstate = TS_OPEN;
640 	ldcp->status = LDC_OPEN;
641 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
642 	ldcp->rx_dq_head = 0;
643 	ldcp->rx_dq_tail = 0;
644 
645 	if (ldcp->link_state == LDC_CHANNEL_UP ||
646 	    ldcp->link_state == LDC_CHANNEL_RESET) {
647 
648 		if (ldcp->mode == LDC_MODE_RAW) {
649 			ldcp->status = LDC_UP;
650 			ldcp->tstate = TS_UP;
651 		} else {
652 			ldcp->status = LDC_READY;
653 			ldcp->tstate |= TS_LINK_READY;
654 		}
655 	}
656 }
657 
658 /*
659  * Reset a LDC channel
660  */
661 void
662 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
663 {
664 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
665 
666 	ASSERT(MUTEX_HELD(&ldcp->lock));
667 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
668 
669 	/* reconfig Tx and Rx queues */
670 	(void) i_ldc_txq_reconf(ldcp);
671 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
672 
673 	/* Clear Tx and Rx interrupts */
674 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
675 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
676 
677 	/* Reset channel state */
678 	i_ldc_reset_state(ldcp);
679 
680 	/* Mark channel in reset */
681 	ldcp->tstate |= TS_IN_RESET;
682 }
683 
684 
685 /*
686  * Clear pending interrupts
687  */
688 static void
689 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
690 {
691 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
692 
693 	ASSERT(MUTEX_HELD(&ldcp->lock));
694 	ASSERT(cinfo->dip != NULL);
695 
696 	switch (itype) {
697 	case CNEX_TX_INTR:
698 		/* check Tx interrupt */
699 		if (ldcp->tx_intr_state)
700 			ldcp->tx_intr_state = LDC_INTR_NONE;
701 		else
702 			return;
703 		break;
704 
705 	case CNEX_RX_INTR:
706 		/* check Rx interrupt */
707 		if (ldcp->rx_intr_state)
708 			ldcp->rx_intr_state = LDC_INTR_NONE;
709 		else
710 			return;
711 		break;
712 	}
713 
714 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
715 	D2(ldcp->id,
716 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
717 	    ldcp->id, itype);
718 }
719 
720 /*
721  * Set the receive queue head
722  * Resets connection and returns an error if it fails.
723  */
724 static int
725 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
726 {
727 	int 	rv;
728 	int 	retries;
729 
730 	ASSERT(MUTEX_HELD(&ldcp->lock));
731 	for (retries = 0; retries < ldc_max_retries; retries++) {
732 
733 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
734 			return (0);
735 
736 		if (rv != H_EWOULDBLOCK)
737 			break;
738 
739 		/* wait for ldc_delay usecs */
740 		drv_usecwait(ldc_delay);
741 	}
742 
743 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
744 	    ldcp->id, head);
745 	mutex_enter(&ldcp->tx_lock);
746 	i_ldc_reset(ldcp, B_TRUE);
747 	mutex_exit(&ldcp->tx_lock);
748 
749 	return (ECONNRESET);
750 }
751 
752 /*
753  * Returns the tx_head to be used for transfer
754  */
755 static void
756 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
757 {
758 	ldc_msg_t 	*pkt;
759 
760 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
761 
762 	/* get current Tx head */
763 	*head = ldcp->tx_head;
764 
765 	/*
766 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
767 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
768 	 * up to the current location of tx_head. This needs to be done
769 	 * as the peer will only ACK DATA/INFO pkts.
770 	 */
771 	if (ldcp->mode == LDC_MODE_RELIABLE) {
772 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
773 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
774 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
775 				break;
776 			}
777 			/* advance ACKd head */
778 			ldcp->tx_ackd_head =
779 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
780 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
781 		}
782 		*head = ldcp->tx_ackd_head;
783 	}
784 }
785 
786 /*
787  * Returns the tx_tail to be used for transfer
788  * Re-reads the TX queue ptrs if and only if the
789  * the cached head and tail are equal (queue is full)
790  */
791 static int
792 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
793 {
794 	int 		rv;
795 	uint64_t 	current_head, new_tail;
796 
797 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
798 	/* Read the head and tail ptrs from HV */
799 	rv = hv_ldc_tx_get_state(ldcp->id,
800 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
801 	if (rv) {
802 		cmn_err(CE_WARN,
803 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
804 		    ldcp->id);
805 		return (EIO);
806 	}
807 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
808 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
809 		    ldcp->id);
810 		return (ECONNRESET);
811 	}
812 
813 	i_ldc_get_tx_head(ldcp, &current_head);
814 
815 	/* increment the tail */
816 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
817 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
818 
819 	if (new_tail == current_head) {
820 		DWARN(ldcp->id,
821 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
822 		    ldcp->id);
823 		return (EWOULDBLOCK);
824 	}
825 
826 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
827 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
828 
829 	*tail = ldcp->tx_tail;
830 	return (0);
831 }
832 
833 /*
834  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
835  * and retry ldc_max_retries times before returning an error.
836  * Returns 0, EWOULDBLOCK or EIO
837  */
838 static int
839 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
840 {
841 	int		rv, retval = EWOULDBLOCK;
842 	int 		retries;
843 
844 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
845 	for (retries = 0; retries < ldc_max_retries; retries++) {
846 
847 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
848 			retval = 0;
849 			break;
850 		}
851 		if (rv != H_EWOULDBLOCK) {
852 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
853 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
854 			retval = EIO;
855 			break;
856 		}
857 
858 		/* wait for ldc_delay usecs */
859 		drv_usecwait(ldc_delay);
860 	}
861 	return (retval);
862 }
863 
864 /*
865  * Copy a data packet from the HV receive queue to the data queue.
866  * Caller must ensure that the data queue is not already full.
867  *
868  * The *head argument represents the current head pointer for the HV
869  * receive queue. After copying a packet from the HV receive queue,
870  * the *head pointer will be updated. This allows the caller to update
871  * the head pointer in HV using the returned *head value.
872  */
873 void
874 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
875 {
876 	uint64_t	q_size, dq_size;
877 
878 	ASSERT(MUTEX_HELD(&ldcp->lock));
879 
880 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
881 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
882 
883 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
884 	    dq_size) >= LDC_PACKET_SIZE);
885 
886 	bcopy((void *)(ldcp->rx_q_va + *head),
887 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
888 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
889 
890 	/* Update rx head */
891 	*head = (*head + LDC_PACKET_SIZE) % q_size;
892 
893 	/* Update dq tail */
894 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
895 }
896 
897 /*
898  * Update the Rx data queue head pointer
899  */
900 static int
901 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
902 {
903 	ldcp->rx_dq_head = head;
904 	return (0);
905 }
906 
907 /*
908  * Get the Rx data queue head and tail pointers
909  */
910 static uint64_t
911 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
912     uint64_t *link_state)
913 {
914 	_NOTE(ARGUNUSED(link_state))
915 	*head = ldcp->rx_dq_head;
916 	*tail = ldcp->rx_dq_tail;
917 	return (0);
918 }
919 
920 /*
921  * Wrapper for the Rx HV queue set head function. Giving the
922  * data queue and HV queue set head functions the same type.
923  */
924 static uint64_t
925 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
926     uint64_t *link_state)
927 {
928 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
929 	    link_state)));
930 }
931 
932 /*
933  * LDC receive interrupt handler
934  *    triggered for channel with data pending to read
935  *    i.e. Rx queue content changes
936  */
937 static uint_t
938 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
939 {
940 	_NOTE(ARGUNUSED(arg2))
941 
942 	ldc_chan_t	*ldcp;
943 	boolean_t	notify;
944 	uint64_t	event;
945 	int		rv;
946 
947 	/* Get the channel for which interrupt was received */
948 	if (arg1 == NULL) {
949 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
950 		return (DDI_INTR_UNCLAIMED);
951 	}
952 
953 	ldcp = (ldc_chan_t *)arg1;
954 
955 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
956 	    ldcp->id, ldcp);
957 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
958 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
959 	    ldcp->link_state);
960 
961 	/* Lock channel */
962 	mutex_enter(&ldcp->lock);
963 
964 	/* Mark the interrupt as being actively handled */
965 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
966 
967 	(void) i_ldc_rx_process_hvq(ldcp, &notify, &event);
968 
969 	if (ldcp->mode != LDC_MODE_RELIABLE) {
970 		/*
971 		 * If there are no data packets on the queue, clear
972 		 * the interrupt. Otherwise, the ldc_read will clear
973 		 * interrupts after draining the queue. To indicate the
974 		 * interrupt has not yet been cleared, it is marked
975 		 * as pending.
976 		 */
977 		if ((event & LDC_EVT_READ) == 0) {
978 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
979 		} else {
980 			ldcp->rx_intr_state = LDC_INTR_PEND;
981 		}
982 	}
983 
984 	/* if callbacks are disabled, do not notify */
985 	if (notify && ldcp->cb_enabled) {
986 		ldcp->cb_inprogress = B_TRUE;
987 		mutex_exit(&ldcp->lock);
988 		rv = ldcp->cb(event, ldcp->cb_arg);
989 		if (rv) {
990 			DWARN(ldcp->id,
991 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
992 			    ldcp->id);
993 		}
994 		mutex_enter(&ldcp->lock);
995 		ldcp->cb_inprogress = B_FALSE;
996 	}
997 
998 	if (ldcp->mode == LDC_MODE_RELIABLE) {
999 		/*
1000 		 * If we are using a secondary data queue, clear the
1001 		 * interrupt. We should have processed all CTRL packets
1002 		 * and copied all DATA packets to the secondary queue.
1003 		 * Even if secondary queue filled up, clear the interrupts,
1004 		 * this will trigger another interrupt and force the
1005 		 * handler to copy more data.
1006 		 */
1007 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1008 	}
1009 
1010 	mutex_exit(&ldcp->lock);
1011 
1012 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1013 
1014 	return (DDI_INTR_CLAIMED);
1015 }
1016 
1017 /*
1018  * Wrapper for the Rx HV queue processing function to be used when
1019  * checking the Rx HV queue for data packets. Unlike the interrupt
1020  * handler code flow, the Rx interrupt is not cleared here and
1021  * callbacks are not made.
1022  */
1023 static uint_t
1024 i_ldc_chkq(ldc_chan_t *ldcp)
1025 {
1026 	boolean_t	notify;
1027 	uint64_t	event;
1028 
1029 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1030 }
1031 
1032 /*
1033  * Send a LDC message
1034  */
1035 static int
1036 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1037     uint8_t ctrlmsg)
1038 {
1039 	int		rv;
1040 	ldc_msg_t 	*pkt;
1041 	uint64_t	tx_tail;
1042 	uint32_t	curr_seqid;
1043 
1044 	/* Obtain Tx lock */
1045 	mutex_enter(&ldcp->tx_lock);
1046 
1047 	curr_seqid = ldcp->last_msg_snt;
1048 
1049 	/* get the current tail for the message */
1050 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1051 	if (rv) {
1052 		DWARN(ldcp->id,
1053 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1054 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1055 		    ldcp->id, pkttype, subtype, ctrlmsg);
1056 		mutex_exit(&ldcp->tx_lock);
1057 		return (rv);
1058 	}
1059 
1060 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1061 	ZERO_PKT(pkt);
1062 
1063 	/* Initialize the packet */
1064 	pkt->type = pkttype;
1065 	pkt->stype = subtype;
1066 	pkt->ctrl = ctrlmsg;
1067 
1068 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1069 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1070 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1071 		curr_seqid++;
1072 		if (ldcp->mode != LDC_MODE_RAW) {
1073 			pkt->seqid = curr_seqid;
1074 			pkt->ackid = ldcp->last_msg_rcd;
1075 		}
1076 	}
1077 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1078 
1079 	/* initiate the send by calling into HV and set the new tail */
1080 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1081 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1082 
1083 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1084 	if (rv) {
1085 		DWARN(ldcp->id,
1086 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1087 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1088 		    ldcp->id, pkttype, subtype, ctrlmsg);
1089 		mutex_exit(&ldcp->tx_lock);
1090 		return (EIO);
1091 	}
1092 
1093 	ldcp->last_msg_snt = curr_seqid;
1094 	ldcp->tx_tail = tx_tail;
1095 
1096 	mutex_exit(&ldcp->tx_lock);
1097 	return (0);
1098 }
1099 
1100 /*
1101  * Checks if packet was received in right order
1102  * in the case of a reliable link.
1103  * Returns 0 if in order, else EIO
1104  */
1105 static int
1106 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1107 {
1108 	/* No seqid checking for RAW mode */
1109 	if (ldcp->mode == LDC_MODE_RAW)
1110 		return (0);
1111 
1112 	/* No seqid checking for version, RTS, RTR message */
1113 	if (msg->ctrl == LDC_VER ||
1114 	    msg->ctrl == LDC_RTS ||
1115 	    msg->ctrl == LDC_RTR)
1116 		return (0);
1117 
1118 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1119 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1120 		DWARN(ldcp->id,
1121 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1122 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1123 		    (ldcp->last_msg_rcd + 1));
1124 		return (EIO);
1125 	}
1126 
1127 #ifdef DEBUG
1128 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1129 		DWARN(ldcp->id,
1130 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1131 		return (EIO);
1132 	}
1133 #endif
1134 
1135 	return (0);
1136 }
1137 
1138 
1139 /*
1140  * Process an incoming version ctrl message
1141  */
1142 static int
1143 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1144 {
1145 	int 		rv = 0, idx = ldcp->next_vidx;
1146 	ldc_msg_t 	*pkt;
1147 	uint64_t	tx_tail;
1148 	ldc_ver_t	*rcvd_ver;
1149 
1150 	/* get the received version */
1151 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1152 
1153 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1154 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1155 
1156 	/* Obtain Tx lock */
1157 	mutex_enter(&ldcp->tx_lock);
1158 
1159 	switch (msg->stype) {
1160 	case LDC_INFO:
1161 
1162 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1163 			(void) i_ldc_txq_reconf(ldcp);
1164 			i_ldc_reset_state(ldcp);
1165 			mutex_exit(&ldcp->tx_lock);
1166 			return (EAGAIN);
1167 		}
1168 
1169 		/* get the current tail and pkt for the response */
1170 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1171 		if (rv != 0) {
1172 			DWARN(ldcp->id,
1173 			    "i_ldc_process_VER: (0x%llx) err sending "
1174 			    "version ACK/NACK\n", ldcp->id);
1175 			i_ldc_reset(ldcp, B_TRUE);
1176 			mutex_exit(&ldcp->tx_lock);
1177 			return (ECONNRESET);
1178 		}
1179 
1180 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1181 		ZERO_PKT(pkt);
1182 
1183 		/* initialize the packet */
1184 		pkt->type = LDC_CTRL;
1185 		pkt->ctrl = LDC_VER;
1186 
1187 		for (;;) {
1188 
1189 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1190 			    rcvd_ver->major, rcvd_ver->minor,
1191 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1192 
1193 			if (rcvd_ver->major == ldc_versions[idx].major) {
1194 				/* major version match - ACK version */
1195 				pkt->stype = LDC_ACK;
1196 
1197 				/*
1198 				 * lower minor version to the one this endpt
1199 				 * supports, if necessary
1200 				 */
1201 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1202 					rcvd_ver->minor =
1203 					    ldc_versions[idx].minor;
1204 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1205 
1206 				break;
1207 			}
1208 
1209 			if (rcvd_ver->major > ldc_versions[idx].major) {
1210 
1211 				D1(ldcp->id, "i_ldc_process_VER: using next"
1212 				    " lower idx=%d, v%u.%u\n", idx,
1213 				    ldc_versions[idx].major,
1214 				    ldc_versions[idx].minor);
1215 
1216 				/* nack with next lower version */
1217 				pkt->stype = LDC_NACK;
1218 				bcopy(&ldc_versions[idx], pkt->udata,
1219 				    sizeof (ldc_versions[idx]));
1220 				ldcp->next_vidx = idx;
1221 				break;
1222 			}
1223 
1224 			/* next major version */
1225 			idx++;
1226 
1227 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1228 
1229 			if (idx == LDC_NUM_VERS) {
1230 				/* no version match - send NACK */
1231 				pkt->stype = LDC_NACK;
1232 				bzero(pkt->udata, sizeof (ldc_ver_t));
1233 				ldcp->next_vidx = 0;
1234 				break;
1235 			}
1236 		}
1237 
1238 		/* initiate the send by calling into HV and set the new tail */
1239 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1240 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1241 
1242 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1243 		if (rv == 0) {
1244 			ldcp->tx_tail = tx_tail;
1245 			if (pkt->stype == LDC_ACK) {
1246 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1247 				    " version ACK\n", ldcp->id);
1248 				/* Save the ACK'd version */
1249 				ldcp->version.major = rcvd_ver->major;
1250 				ldcp->version.minor = rcvd_ver->minor;
1251 				ldcp->hstate |= TS_RCVD_VER;
1252 				ldcp->tstate |= TS_VER_DONE;
1253 				D1(DBG_ALL_LDCS,
1254 				    "(0x%llx) Sent ACK, "
1255 				    "Agreed on version v%u.%u\n",
1256 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1257 			}
1258 		} else {
1259 			DWARN(ldcp->id,
1260 			    "i_ldc_process_VER: (0x%llx) error sending "
1261 			    "ACK/NACK\n", ldcp->id);
1262 			i_ldc_reset(ldcp, B_TRUE);
1263 			mutex_exit(&ldcp->tx_lock);
1264 			return (ECONNRESET);
1265 		}
1266 
1267 		break;
1268 
1269 	case LDC_ACK:
1270 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1271 			if (ldcp->version.major != rcvd_ver->major ||
1272 			    ldcp->version.minor != rcvd_ver->minor) {
1273 
1274 				/* mismatched version - reset connection */
1275 				DWARN(ldcp->id,
1276 				    "i_ldc_process_VER: (0x%llx) recvd"
1277 				    " ACK ver != sent ACK ver\n", ldcp->id);
1278 				i_ldc_reset(ldcp, B_TRUE);
1279 				mutex_exit(&ldcp->tx_lock);
1280 				return (ECONNRESET);
1281 			}
1282 		} else {
1283 			/* SUCCESS - we have agreed on a version */
1284 			ldcp->version.major = rcvd_ver->major;
1285 			ldcp->version.minor = rcvd_ver->minor;
1286 			ldcp->tstate |= TS_VER_DONE;
1287 		}
1288 
1289 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1290 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1291 
1292 		/* initiate RTS-RTR-RDX handshake */
1293 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1294 		if (rv) {
1295 			DWARN(ldcp->id,
1296 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1297 			    ldcp->id);
1298 			i_ldc_reset(ldcp, B_TRUE);
1299 			mutex_exit(&ldcp->tx_lock);
1300 			return (ECONNRESET);
1301 		}
1302 
1303 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1304 		ZERO_PKT(pkt);
1305 
1306 		pkt->type = LDC_CTRL;
1307 		pkt->stype = LDC_INFO;
1308 		pkt->ctrl = LDC_RTS;
1309 		pkt->env = ldcp->mode;
1310 		if (ldcp->mode != LDC_MODE_RAW)
1311 			pkt->seqid = LDC_INIT_SEQID;
1312 
1313 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1314 
1315 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1316 
1317 		/* initiate the send by calling into HV and set the new tail */
1318 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1319 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1320 
1321 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1322 		if (rv) {
1323 			D2(ldcp->id,
1324 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1325 			    ldcp->id);
1326 			i_ldc_reset(ldcp, B_TRUE);
1327 			mutex_exit(&ldcp->tx_lock);
1328 			return (ECONNRESET);
1329 		}
1330 
1331 		ldcp->tx_tail = tx_tail;
1332 		ldcp->hstate |= TS_SENT_RTS;
1333 
1334 		break;
1335 
1336 	case LDC_NACK:
1337 		/* check if version in NACK is zero */
1338 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1339 			/* version handshake failure */
1340 			DWARN(DBG_ALL_LDCS,
1341 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1342 			    ldcp->id);
1343 			i_ldc_reset(ldcp, B_TRUE);
1344 			mutex_exit(&ldcp->tx_lock);
1345 			return (ECONNRESET);
1346 		}
1347 
1348 		/* get the current tail and pkt for the response */
1349 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1350 		if (rv != 0) {
1351 			cmn_err(CE_NOTE,
1352 			    "i_ldc_process_VER: (0x%lx) err sending "
1353 			    "version ACK/NACK\n", ldcp->id);
1354 			i_ldc_reset(ldcp, B_TRUE);
1355 			mutex_exit(&ldcp->tx_lock);
1356 			return (ECONNRESET);
1357 		}
1358 
1359 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1360 		ZERO_PKT(pkt);
1361 
1362 		/* initialize the packet */
1363 		pkt->type = LDC_CTRL;
1364 		pkt->ctrl = LDC_VER;
1365 		pkt->stype = LDC_INFO;
1366 
1367 		/* check ver in NACK msg has a match */
1368 		for (;;) {
1369 			if (rcvd_ver->major == ldc_versions[idx].major) {
1370 				/*
1371 				 * major version match - resubmit request
1372 				 * if lower minor version to the one this endpt
1373 				 * supports, if necessary
1374 				 */
1375 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1376 					rcvd_ver->minor =
1377 					    ldc_versions[idx].minor;
1378 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1379 				break;
1380 			}
1381 
1382 			if (rcvd_ver->major > ldc_versions[idx].major) {
1383 
1384 				D1(ldcp->id, "i_ldc_process_VER: using next"
1385 				    " lower idx=%d, v%u.%u\n", idx,
1386 				    ldc_versions[idx].major,
1387 				    ldc_versions[idx].minor);
1388 
1389 				/* send next lower version */
1390 				bcopy(&ldc_versions[idx], pkt->udata,
1391 				    sizeof (ldc_versions[idx]));
1392 				ldcp->next_vidx = idx;
1393 				break;
1394 			}
1395 
1396 			/* next version */
1397 			idx++;
1398 
1399 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1400 
1401 			if (idx == LDC_NUM_VERS) {
1402 				/* no version match - terminate */
1403 				ldcp->next_vidx = 0;
1404 				mutex_exit(&ldcp->tx_lock);
1405 				return (ECONNRESET);
1406 			}
1407 		}
1408 
1409 		/* initiate the send by calling into HV and set the new tail */
1410 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1411 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1412 
1413 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1414 		if (rv == 0) {
1415 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1416 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1417 			    ldc_versions[idx].minor);
1418 			ldcp->tx_tail = tx_tail;
1419 		} else {
1420 			cmn_err(CE_NOTE,
1421 			    "i_ldc_process_VER: (0x%lx) error sending version"
1422 			    "INFO\n", ldcp->id);
1423 			i_ldc_reset(ldcp, B_TRUE);
1424 			mutex_exit(&ldcp->tx_lock);
1425 			return (ECONNRESET);
1426 		}
1427 
1428 		break;
1429 	}
1430 
1431 	mutex_exit(&ldcp->tx_lock);
1432 	return (rv);
1433 }
1434 
1435 
1436 /*
1437  * Process an incoming RTS ctrl message
1438  */
1439 static int
1440 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1441 {
1442 	int 		rv = 0;
1443 	ldc_msg_t 	*pkt;
1444 	uint64_t	tx_tail;
1445 	boolean_t	sent_NACK = B_FALSE;
1446 
1447 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1448 
1449 	switch (msg->stype) {
1450 	case LDC_NACK:
1451 		DWARN(ldcp->id,
1452 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1453 		    ldcp->id);
1454 
1455 		/* Reset the channel -- as we cannot continue */
1456 		mutex_enter(&ldcp->tx_lock);
1457 		i_ldc_reset(ldcp, B_TRUE);
1458 		mutex_exit(&ldcp->tx_lock);
1459 		rv = ECONNRESET;
1460 		break;
1461 
1462 	case LDC_INFO:
1463 
1464 		/* check mode */
1465 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1466 			cmn_err(CE_NOTE,
1467 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1468 			    ldcp->id);
1469 			/*
1470 			 * send NACK in response to MODE message
1471 			 * get the current tail for the response
1472 			 */
1473 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1474 			if (rv) {
1475 				/* if cannot send NACK - reset channel */
1476 				mutex_enter(&ldcp->tx_lock);
1477 				i_ldc_reset(ldcp, B_TRUE);
1478 				mutex_exit(&ldcp->tx_lock);
1479 				rv = ECONNRESET;
1480 				break;
1481 			}
1482 			sent_NACK = B_TRUE;
1483 		}
1484 		break;
1485 	default:
1486 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1487 		    ldcp->id);
1488 		mutex_enter(&ldcp->tx_lock);
1489 		i_ldc_reset(ldcp, B_TRUE);
1490 		mutex_exit(&ldcp->tx_lock);
1491 		rv = ECONNRESET;
1492 		break;
1493 	}
1494 
1495 	/*
1496 	 * If either the connection was reset (when rv != 0) or
1497 	 * a NACK was sent, we return. In the case of a NACK
1498 	 * we dont want to consume the packet that came in but
1499 	 * not record that we received the RTS
1500 	 */
1501 	if (rv || sent_NACK)
1502 		return (rv);
1503 
1504 	/* record RTS received */
1505 	ldcp->hstate |= TS_RCVD_RTS;
1506 
1507 	/* store initial SEQID info */
1508 	ldcp->last_msg_snt = msg->seqid;
1509 
1510 	/* Obtain Tx lock */
1511 	mutex_enter(&ldcp->tx_lock);
1512 
1513 	/* get the current tail for the response */
1514 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1515 	if (rv != 0) {
1516 		cmn_err(CE_NOTE,
1517 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1518 		    ldcp->id);
1519 		i_ldc_reset(ldcp, B_TRUE);
1520 		mutex_exit(&ldcp->tx_lock);
1521 		return (ECONNRESET);
1522 	}
1523 
1524 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1525 	ZERO_PKT(pkt);
1526 
1527 	/* initialize the packet */
1528 	pkt->type = LDC_CTRL;
1529 	pkt->stype = LDC_INFO;
1530 	pkt->ctrl = LDC_RTR;
1531 	pkt->env = ldcp->mode;
1532 	if (ldcp->mode != LDC_MODE_RAW)
1533 		pkt->seqid = LDC_INIT_SEQID;
1534 
1535 	ldcp->last_msg_rcd = msg->seqid;
1536 
1537 	/* initiate the send by calling into HV and set the new tail */
1538 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1539 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1540 
1541 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1542 	if (rv == 0) {
1543 		D2(ldcp->id,
1544 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1545 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1546 
1547 		ldcp->tx_tail = tx_tail;
1548 		ldcp->hstate |= TS_SENT_RTR;
1549 
1550 	} else {
1551 		cmn_err(CE_NOTE,
1552 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1553 		    ldcp->id);
1554 		i_ldc_reset(ldcp, B_TRUE);
1555 		mutex_exit(&ldcp->tx_lock);
1556 		return (ECONNRESET);
1557 	}
1558 
1559 	mutex_exit(&ldcp->tx_lock);
1560 	return (0);
1561 }
1562 
1563 /*
1564  * Process an incoming RTR ctrl message
1565  */
1566 static int
1567 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1568 {
1569 	int 		rv = 0;
1570 	boolean_t	sent_NACK = B_FALSE;
1571 
1572 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1573 
1574 	switch (msg->stype) {
1575 	case LDC_NACK:
1576 		/* RTR NACK received */
1577 		DWARN(ldcp->id,
1578 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1579 		    ldcp->id);
1580 
1581 		/* Reset the channel -- as we cannot continue */
1582 		mutex_enter(&ldcp->tx_lock);
1583 		i_ldc_reset(ldcp, B_TRUE);
1584 		mutex_exit(&ldcp->tx_lock);
1585 		rv = ECONNRESET;
1586 
1587 		break;
1588 
1589 	case LDC_INFO:
1590 
1591 		/* check mode */
1592 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1593 			DWARN(ldcp->id,
1594 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1595 			    "expecting 0x%x, got 0x%x\n",
1596 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1597 			/*
1598 			 * send NACK in response to MODE message
1599 			 * get the current tail for the response
1600 			 */
1601 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1602 			if (rv) {
1603 				/* if cannot send NACK - reset channel */
1604 				mutex_enter(&ldcp->tx_lock);
1605 				i_ldc_reset(ldcp, B_TRUE);
1606 				mutex_exit(&ldcp->tx_lock);
1607 				rv = ECONNRESET;
1608 				break;
1609 			}
1610 			sent_NACK = B_TRUE;
1611 		}
1612 		break;
1613 
1614 	default:
1615 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1616 		    ldcp->id);
1617 
1618 		/* Reset the channel -- as we cannot continue */
1619 		mutex_enter(&ldcp->tx_lock);
1620 		i_ldc_reset(ldcp, B_TRUE);
1621 		mutex_exit(&ldcp->tx_lock);
1622 		rv = ECONNRESET;
1623 		break;
1624 	}
1625 
1626 	/*
1627 	 * If either the connection was reset (when rv != 0) or
1628 	 * a NACK was sent, we return. In the case of a NACK
1629 	 * we dont want to consume the packet that came in but
1630 	 * not record that we received the RTR
1631 	 */
1632 	if (rv || sent_NACK)
1633 		return (rv);
1634 
1635 	ldcp->last_msg_snt = msg->seqid;
1636 	ldcp->hstate |= TS_RCVD_RTR;
1637 
1638 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1639 	if (rv) {
1640 		cmn_err(CE_NOTE,
1641 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1642 		    ldcp->id);
1643 		mutex_enter(&ldcp->tx_lock);
1644 		i_ldc_reset(ldcp, B_TRUE);
1645 		mutex_exit(&ldcp->tx_lock);
1646 		return (ECONNRESET);
1647 	}
1648 	D2(ldcp->id,
1649 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1650 
1651 	ldcp->hstate |= TS_SENT_RDX;
1652 	ldcp->tstate |= TS_HSHAKE_DONE;
1653 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1654 		ldcp->status = LDC_UP;
1655 
1656 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1657 
1658 	return (0);
1659 }
1660 
1661 
1662 /*
1663  * Process an incoming RDX ctrl message
1664  */
1665 static int
1666 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1667 {
1668 	int	rv = 0;
1669 
1670 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1671 
1672 	switch (msg->stype) {
1673 	case LDC_NACK:
1674 		/* RDX NACK received */
1675 		DWARN(ldcp->id,
1676 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1677 		    ldcp->id);
1678 
1679 		/* Reset the channel -- as we cannot continue */
1680 		mutex_enter(&ldcp->tx_lock);
1681 		i_ldc_reset(ldcp, B_TRUE);
1682 		mutex_exit(&ldcp->tx_lock);
1683 		rv = ECONNRESET;
1684 
1685 		break;
1686 
1687 	case LDC_INFO:
1688 
1689 		/*
1690 		 * if channel is UP and a RDX received after data transmission
1691 		 * has commenced it is an error
1692 		 */
1693 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1694 			DWARN(DBG_ALL_LDCS,
1695 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1696 			    " - LDC reset\n", ldcp->id);
1697 			mutex_enter(&ldcp->tx_lock);
1698 			i_ldc_reset(ldcp, B_TRUE);
1699 			mutex_exit(&ldcp->tx_lock);
1700 			return (ECONNRESET);
1701 		}
1702 
1703 		ldcp->hstate |= TS_RCVD_RDX;
1704 		ldcp->tstate |= TS_HSHAKE_DONE;
1705 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1706 			ldcp->status = LDC_UP;
1707 
1708 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1709 		break;
1710 
1711 	default:
1712 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1713 		    ldcp->id);
1714 
1715 		/* Reset the channel -- as we cannot continue */
1716 		mutex_enter(&ldcp->tx_lock);
1717 		i_ldc_reset(ldcp, B_TRUE);
1718 		mutex_exit(&ldcp->tx_lock);
1719 		rv = ECONNRESET;
1720 		break;
1721 	}
1722 
1723 	return (rv);
1724 }
1725 
1726 /*
1727  * Process an incoming ACK for a data packet
1728  */
1729 static int
1730 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1731 {
1732 	int		rv;
1733 	uint64_t 	tx_head;
1734 	ldc_msg_t	*pkt;
1735 
1736 	/* Obtain Tx lock */
1737 	mutex_enter(&ldcp->tx_lock);
1738 
1739 	/*
1740 	 * Read the current Tx head and tail
1741 	 */
1742 	rv = hv_ldc_tx_get_state(ldcp->id,
1743 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1744 	if (rv != 0) {
1745 		cmn_err(CE_WARN,
1746 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1747 		    ldcp->id);
1748 
1749 		/* Reset the channel -- as we cannot continue */
1750 		i_ldc_reset(ldcp, B_TRUE);
1751 		mutex_exit(&ldcp->tx_lock);
1752 		return (ECONNRESET);
1753 	}
1754 
1755 	/*
1756 	 * loop from where the previous ACK location was to the
1757 	 * current head location. This is how far the HV has
1758 	 * actually send pkts. Pkts between head and tail are
1759 	 * yet to be sent by HV.
1760 	 */
1761 	tx_head = ldcp->tx_ackd_head;
1762 	for (;;) {
1763 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1764 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1765 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1766 
1767 		if (pkt->seqid == msg->ackid) {
1768 			D2(ldcp->id,
1769 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1770 			    ldcp->id);
1771 			ldcp->last_ack_rcd = msg->ackid;
1772 			ldcp->tx_ackd_head = tx_head;
1773 			break;
1774 		}
1775 		if (tx_head == ldcp->tx_head) {
1776 			/* could not find packet */
1777 			DWARN(ldcp->id,
1778 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1779 			    ldcp->id);
1780 
1781 			/* Reset the channel -- as we cannot continue */
1782 			i_ldc_reset(ldcp, B_TRUE);
1783 			mutex_exit(&ldcp->tx_lock);
1784 			return (ECONNRESET);
1785 		}
1786 	}
1787 
1788 	mutex_exit(&ldcp->tx_lock);
1789 	return (0);
1790 }
1791 
1792 /*
1793  * Process incoming control message
1794  * Return 0 - session can continue
1795  *        EAGAIN - reprocess packet - state was changed
1796  *	  ECONNRESET - channel was reset
1797  */
1798 static int
1799 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1800 {
1801 	int 		rv = 0;
1802 
1803 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1804 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1805 
1806 	switch (ldcp->tstate & ~TS_IN_RESET) {
1807 
1808 	case TS_OPEN:
1809 	case TS_READY:
1810 
1811 		switch (msg->ctrl & LDC_CTRL_MASK) {
1812 		case LDC_VER:
1813 			/* process version message */
1814 			rv = i_ldc_process_VER(ldcp, msg);
1815 			break;
1816 		default:
1817 			DWARN(ldcp->id,
1818 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1819 			    "tstate=0x%x\n", ldcp->id,
1820 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1821 			break;
1822 		}
1823 
1824 		break;
1825 
1826 	case TS_VREADY:
1827 
1828 		switch (msg->ctrl & LDC_CTRL_MASK) {
1829 		case LDC_VER:
1830 			/* process version message */
1831 			rv = i_ldc_process_VER(ldcp, msg);
1832 			break;
1833 		case LDC_RTS:
1834 			/* process RTS message */
1835 			rv = i_ldc_process_RTS(ldcp, msg);
1836 			break;
1837 		case LDC_RTR:
1838 			/* process RTR message */
1839 			rv = i_ldc_process_RTR(ldcp, msg);
1840 			break;
1841 		case LDC_RDX:
1842 			/* process RDX message */
1843 			rv = i_ldc_process_RDX(ldcp, msg);
1844 			break;
1845 		default:
1846 			DWARN(ldcp->id,
1847 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1848 			    "tstate=0x%x\n", ldcp->id,
1849 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1850 			break;
1851 		}
1852 
1853 		break;
1854 
1855 	case TS_UP:
1856 
1857 		switch (msg->ctrl & LDC_CTRL_MASK) {
1858 		case LDC_VER:
1859 			DWARN(ldcp->id,
1860 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1861 			    "- LDC reset\n", ldcp->id);
1862 			/* peer is redoing version negotiation */
1863 			mutex_enter(&ldcp->tx_lock);
1864 			(void) i_ldc_txq_reconf(ldcp);
1865 			i_ldc_reset_state(ldcp);
1866 			mutex_exit(&ldcp->tx_lock);
1867 			rv = EAGAIN;
1868 			break;
1869 
1870 		case LDC_RDX:
1871 			/* process RDX message */
1872 			rv = i_ldc_process_RDX(ldcp, msg);
1873 			break;
1874 
1875 		default:
1876 			DWARN(ldcp->id,
1877 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1878 			    "tstate=0x%x\n", ldcp->id,
1879 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1880 			break;
1881 		}
1882 	}
1883 
1884 	return (rv);
1885 }
1886 
1887 /*
1888  * Register channel with the channel nexus
1889  */
1890 static int
1891 i_ldc_register_channel(ldc_chan_t *ldcp)
1892 {
1893 	int		rv = 0;
1894 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1895 
1896 	if (cinfo->dip == NULL) {
1897 		DWARN(ldcp->id,
1898 		    "i_ldc_register_channel: cnex has not registered\n");
1899 		return (EAGAIN);
1900 	}
1901 
1902 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1903 	if (rv) {
1904 		DWARN(ldcp->id,
1905 		    "i_ldc_register_channel: cannot register channel\n");
1906 		return (rv);
1907 	}
1908 
1909 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1910 	    i_ldc_tx_hdlr, ldcp, NULL);
1911 	if (rv) {
1912 		DWARN(ldcp->id,
1913 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1914 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1915 		return (rv);
1916 	}
1917 
1918 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1919 	    i_ldc_rx_hdlr, ldcp, NULL);
1920 	if (rv) {
1921 		DWARN(ldcp->id,
1922 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1923 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1924 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1925 		return (rv);
1926 	}
1927 
1928 	ldcp->tstate |= TS_CNEX_RDY;
1929 
1930 	return (0);
1931 }
1932 
1933 /*
1934  * Unregister a channel with the channel nexus
1935  */
1936 static int
1937 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1938 {
1939 	int		rv = 0;
1940 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1941 
1942 	if (cinfo->dip == NULL) {
1943 		DWARN(ldcp->id,
1944 		    "i_ldc_unregister_channel: cnex has not registered\n");
1945 		return (EAGAIN);
1946 	}
1947 
1948 	if (ldcp->tstate & TS_CNEX_RDY) {
1949 
1950 		/* Remove the Rx interrupt */
1951 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1952 		if (rv) {
1953 			if (rv != EAGAIN) {
1954 				DWARN(ldcp->id,
1955 				    "i_ldc_unregister_channel: err removing "
1956 				    "Rx intr\n");
1957 				return (rv);
1958 			}
1959 
1960 			/*
1961 			 * If interrupts are pending and handler has
1962 			 * finished running, clear interrupt and try
1963 			 * again
1964 			 */
1965 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1966 				return (rv);
1967 
1968 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1969 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1970 			    CNEX_RX_INTR);
1971 			if (rv) {
1972 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1973 				    "err removing Rx interrupt\n");
1974 				return (rv);
1975 			}
1976 		}
1977 
1978 		/* Remove the Tx interrupt */
1979 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1980 		if (rv) {
1981 			DWARN(ldcp->id,
1982 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1983 			return (rv);
1984 		}
1985 
1986 		/* Unregister the channel */
1987 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1988 		if (rv) {
1989 			DWARN(ldcp->id,
1990 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1991 			return (rv);
1992 		}
1993 
1994 		ldcp->tstate &= ~TS_CNEX_RDY;
1995 	}
1996 
1997 	return (0);
1998 }
1999 
2000 
2001 /*
2002  * LDC transmit interrupt handler
2003  *    triggered for chanel up/down/reset events
2004  *    and Tx queue content changes
2005  */
2006 static uint_t
2007 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2008 {
2009 	_NOTE(ARGUNUSED(arg2))
2010 
2011 	int 		rv;
2012 	ldc_chan_t 	*ldcp;
2013 	boolean_t 	notify_client = B_FALSE;
2014 	uint64_t	notify_event = 0, link_state;
2015 
2016 	/* Get the channel for which interrupt was received */
2017 	ASSERT(arg1 != NULL);
2018 	ldcp = (ldc_chan_t *)arg1;
2019 
2020 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2021 	    ldcp->id, ldcp);
2022 
2023 	/* Lock channel */
2024 	mutex_enter(&ldcp->lock);
2025 
2026 	/* Obtain Tx lock */
2027 	mutex_enter(&ldcp->tx_lock);
2028 
2029 	/* mark interrupt as pending */
2030 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2031 
2032 	/* save current link state */
2033 	link_state = ldcp->link_state;
2034 
2035 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2036 	    &ldcp->link_state);
2037 	if (rv) {
2038 		cmn_err(CE_WARN,
2039 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2040 		    ldcp->id, rv);
2041 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2042 		mutex_exit(&ldcp->tx_lock);
2043 		mutex_exit(&ldcp->lock);
2044 		return (DDI_INTR_CLAIMED);
2045 	}
2046 
2047 	/*
2048 	 * reset the channel state if the channel went down
2049 	 * (other side unconfigured queue) or channel was reset
2050 	 * (other side reconfigured its queue)
2051 	 */
2052 	if (link_state != ldcp->link_state &&
2053 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2054 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2055 		i_ldc_reset(ldcp, B_FALSE);
2056 		notify_client = B_TRUE;
2057 		notify_event = LDC_EVT_DOWN;
2058 	}
2059 
2060 	if (link_state != ldcp->link_state &&
2061 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2062 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2063 		i_ldc_reset(ldcp, B_FALSE);
2064 		notify_client = B_TRUE;
2065 		notify_event = LDC_EVT_RESET;
2066 	}
2067 
2068 	if (link_state != ldcp->link_state &&
2069 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2070 	    ldcp->link_state == LDC_CHANNEL_UP) {
2071 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2072 		notify_client = B_TRUE;
2073 		notify_event = LDC_EVT_RESET;
2074 		ldcp->tstate |= TS_LINK_READY;
2075 		ldcp->status = LDC_READY;
2076 	}
2077 
2078 	/* if callbacks are disabled, do not notify */
2079 	if (!ldcp->cb_enabled)
2080 		notify_client = B_FALSE;
2081 
2082 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2083 	mutex_exit(&ldcp->tx_lock);
2084 
2085 	if (notify_client) {
2086 		ldcp->cb_inprogress = B_TRUE;
2087 		mutex_exit(&ldcp->lock);
2088 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2089 		if (rv) {
2090 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2091 			    "failure", ldcp->id);
2092 		}
2093 		mutex_enter(&ldcp->lock);
2094 		ldcp->cb_inprogress = B_FALSE;
2095 	}
2096 
2097 	mutex_exit(&ldcp->lock);
2098 
2099 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2100 
2101 	return (DDI_INTR_CLAIMED);
2102 }
2103 
2104 /*
2105  * Process the Rx HV queue.
2106  *
2107  * Returns 0 if data packets were found and no errors were encountered,
2108  * otherwise returns an error. In either case, the *notify argument is
2109  * set to indicate whether or not the client callback function should
2110  * be invoked. The *event argument is set to contain the callback event.
2111  *
2112  * Depending on the channel mode, packets are handled differently:
2113  *
2114  * RAW MODE
2115  * For raw mode channels, when a data packet is encountered,
2116  * processing stops and all packets are left on the queue to be removed
2117  * and processed by the ldc_read code path.
2118  *
2119  * UNRELIABLE MODE
2120  * For unreliable mode, when a data packet is encountered, processing
2121  * stops, and all packets are left on the queue to be removed and
2122  * processed by the ldc_read code path. Control packets are processed
2123  * inline if they are encountered before any data packets.
2124  *
2125  * RELIABLE MODE
2126  * For reliable mode channels, all packets on the receive queue
2127  * are processed: data packets are copied to the data queue and
2128  * control packets are processed inline. Packets are only left on
2129  * the receive queue when the data queue is full.
2130  */
2131 static uint_t
2132 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2133     uint64_t *notify_event)
2134 {
2135 	int		rv;
2136 	uint64_t 	rx_head, rx_tail;
2137 	ldc_msg_t 	*msg;
2138 	uint64_t	link_state, first_fragment = 0;
2139 	boolean_t	trace_length = B_TRUE;
2140 
2141 	ASSERT(MUTEX_HELD(&ldcp->lock));
2142 	*notify_client = B_FALSE;
2143 	*notify_event = 0;
2144 
2145 	/*
2146 	 * Read packet(s) from the queue
2147 	 */
2148 	for (;;) {
2149 
2150 		link_state = ldcp->link_state;
2151 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2152 		    &ldcp->link_state);
2153 		if (rv) {
2154 			cmn_err(CE_WARN,
2155 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2156 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2157 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2158 			return (EIO);
2159 		}
2160 
2161 		/*
2162 		 * reset the channel state if the channel went down
2163 		 * (other side unconfigured queue) or channel was reset
2164 		 * (other side reconfigured its queue)
2165 		 */
2166 
2167 		if (link_state != ldcp->link_state) {
2168 
2169 			switch (ldcp->link_state) {
2170 			case LDC_CHANNEL_DOWN:
2171 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2172 				    "link down\n", ldcp->id);
2173 				mutex_enter(&ldcp->tx_lock);
2174 				i_ldc_reset(ldcp, B_FALSE);
2175 				mutex_exit(&ldcp->tx_lock);
2176 				*notify_client = B_TRUE;
2177 				*notify_event = LDC_EVT_DOWN;
2178 				goto loop_exit;
2179 
2180 			case LDC_CHANNEL_UP:
2181 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2182 				    "channel link up\n", ldcp->id);
2183 
2184 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2185 					*notify_client = B_TRUE;
2186 					*notify_event = LDC_EVT_RESET;
2187 					ldcp->tstate |= TS_LINK_READY;
2188 					ldcp->status = LDC_READY;
2189 				}
2190 				break;
2191 
2192 			case LDC_CHANNEL_RESET:
2193 			default:
2194 #ifdef DEBUG
2195 force_reset:
2196 #endif
2197 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2198 				    "link reset\n", ldcp->id);
2199 				mutex_enter(&ldcp->tx_lock);
2200 				i_ldc_reset(ldcp, B_FALSE);
2201 				mutex_exit(&ldcp->tx_lock);
2202 				*notify_client = B_TRUE;
2203 				*notify_event = LDC_EVT_RESET;
2204 				break;
2205 			}
2206 		}
2207 
2208 #ifdef DEBUG
2209 		if (LDC_INJECT_RESET(ldcp))
2210 			goto force_reset;
2211 #endif
2212 		if (trace_length) {
2213 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2214 			trace_length = B_FALSE;
2215 		}
2216 
2217 		if (rx_head == rx_tail) {
2218 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2219 			    "No packets\n", ldcp->id);
2220 			break;
2221 		}
2222 
2223 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2224 		    "tail=0x%llx\n", rx_head, rx_tail);
2225 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2226 		    ldcp->rx_q_va + rx_head);
2227 
2228 		/* get the message */
2229 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2230 
2231 		/* if channel is in RAW mode or data pkt, notify and return */
2232 		if (ldcp->mode == LDC_MODE_RAW) {
2233 			*notify_client = B_TRUE;
2234 			*notify_event |= LDC_EVT_READ;
2235 			break;
2236 		}
2237 
2238 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2239 
2240 			/* discard packet if channel is not up */
2241 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2242 
2243 				/* move the head one position */
2244 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2245 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2246 
2247 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2248 					break;
2249 
2250 				continue;
2251 			} else {
2252 				uint64_t dq_head, dq_tail;
2253 
2254 				/* process only RELIABLE mode data packets */
2255 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2256 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2257 						*notify_client = B_TRUE;
2258 					*notify_event |= LDC_EVT_READ;
2259 					break;
2260 				}
2261 
2262 				/* don't process packet if queue full */
2263 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2264 				    &dq_tail, NULL);
2265 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2266 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2267 				if (dq_tail == dq_head ||
2268 				    LDC_INJECT_DQFULL(ldcp)) {
2269 					rv = ENOSPC;
2270 					break;
2271 				}
2272 			}
2273 		}
2274 
2275 		/* Check the sequence ID for the message received */
2276 		rv = i_ldc_check_seqid(ldcp, msg);
2277 		if (rv != 0) {
2278 
2279 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2280 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2281 			    rx_head, rx_tail);
2282 
2283 			/* Reset last_msg_rcd to start of message */
2284 			if (first_fragment != 0) {
2285 				ldcp->last_msg_rcd = first_fragment - 1;
2286 				first_fragment = 0;
2287 			}
2288 
2289 			/*
2290 			 * Send a NACK due to seqid mismatch
2291 			 */
2292 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2293 			    (msg->ctrl & LDC_CTRL_MASK));
2294 
2295 			if (rv) {
2296 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2297 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2298 				    ldcp->id);
2299 
2300 				/* if cannot send NACK - reset channel */
2301 				mutex_enter(&ldcp->tx_lock);
2302 				i_ldc_reset(ldcp, B_TRUE);
2303 				mutex_exit(&ldcp->tx_lock);
2304 
2305 				*notify_client = B_TRUE;
2306 				*notify_event = LDC_EVT_RESET;
2307 				break;
2308 			}
2309 
2310 			/* purge receive queue */
2311 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2312 			break;
2313 		}
2314 
2315 		/* record the message ID */
2316 		ldcp->last_msg_rcd = msg->seqid;
2317 
2318 		/* process control messages */
2319 		if (msg->type & LDC_CTRL) {
2320 			/* save current internal state */
2321 			uint64_t tstate = ldcp->tstate;
2322 
2323 			rv = i_ldc_ctrlmsg(ldcp, msg);
2324 			if (rv == EAGAIN) {
2325 				/* re-process pkt - state was adjusted */
2326 				continue;
2327 			}
2328 			if (rv == ECONNRESET) {
2329 				*notify_client = B_TRUE;
2330 				*notify_event = LDC_EVT_RESET;
2331 				break;
2332 			}
2333 
2334 			/*
2335 			 * control message processing was successful
2336 			 * channel transitioned to ready for communication
2337 			 */
2338 			if (rv == 0 && ldcp->tstate == TS_UP &&
2339 			    (tstate & ~TS_IN_RESET) !=
2340 			    (ldcp->tstate & ~TS_IN_RESET)) {
2341 				*notify_client = B_TRUE;
2342 				*notify_event = LDC_EVT_UP;
2343 			}
2344 		}
2345 
2346 		/* process data NACKs */
2347 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2348 			DWARN(ldcp->id,
2349 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2350 			    ldcp->id);
2351 			mutex_enter(&ldcp->tx_lock);
2352 			i_ldc_reset(ldcp, B_TRUE);
2353 			mutex_exit(&ldcp->tx_lock);
2354 			*notify_client = B_TRUE;
2355 			*notify_event = LDC_EVT_RESET;
2356 			break;
2357 		}
2358 
2359 		/* process data ACKs */
2360 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2361 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2362 				*notify_client = B_TRUE;
2363 				*notify_event = LDC_EVT_RESET;
2364 				break;
2365 			}
2366 		}
2367 
2368 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2369 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2370 
2371 			/*
2372 			 * Copy the data packet to the data queue. Note
2373 			 * that the copy routine updates the rx_head pointer.
2374 			 */
2375 			i_ldc_rxdq_copy(ldcp, &rx_head);
2376 
2377 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2378 				*notify_client = B_TRUE;
2379 			*notify_event |= LDC_EVT_READ;
2380 		} else {
2381 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2382 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2383 		}
2384 
2385 		/* move the head one position */
2386 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2387 			*notify_client = B_TRUE;
2388 			*notify_event = LDC_EVT_RESET;
2389 			break;
2390 		}
2391 
2392 	} /* for */
2393 
2394 loop_exit:
2395 
2396 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2397 		/* ACK data packets */
2398 		if ((*notify_event &
2399 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2400 			int ack_rv;
2401 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2402 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2403 				cmn_err(CE_NOTE,
2404 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2405 				    "send ACK\n", ldcp->id);
2406 
2407 				mutex_enter(&ldcp->tx_lock);
2408 				i_ldc_reset(ldcp, B_FALSE);
2409 				mutex_exit(&ldcp->tx_lock);
2410 
2411 				*notify_client = B_TRUE;
2412 				*notify_event = LDC_EVT_RESET;
2413 				goto skip_ackpeek;
2414 			}
2415 		}
2416 
2417 		/*
2418 		 * If we have no more space on the data queue, make sure
2419 		 * there are no ACKs on the rx queue waiting to be processed.
2420 		 */
2421 		if (rv == ENOSPC) {
2422 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2423 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2424 				*notify_client = B_TRUE;
2425 				*notify_event = LDC_EVT_RESET;
2426 			}
2427 		} else {
2428 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2429 		}
2430 	}
2431 
2432 skip_ackpeek:
2433 
2434 	/* Return, indicating whether or not data packets were found */
2435 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2436 		return (0);
2437 
2438 	return (ENOMSG);
2439 }
2440 
2441 /*
2442  * Process any ACK packets on the HV receive queue.
2443  *
2444  * This function is only used by RELIABLE mode channels when the
2445  * secondary data queue fills up and there are packets remaining on
2446  * the HV receive queue.
2447  */
2448 int
2449 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2450 {
2451 	int		rv = 0;
2452 	ldc_msg_t	*msg;
2453 
2454 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2455 		ldcp->rx_ack_head = rx_head;
2456 
2457 	while (ldcp->rx_ack_head != rx_tail) {
2458 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2459 
2460 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2461 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2462 				break;
2463 			msg->stype &= ~LDC_ACK;
2464 		}
2465 
2466 		ldcp->rx_ack_head =
2467 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2468 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2469 	}
2470 	return (rv);
2471 }
2472 
2473 /* -------------------------------------------------------------------------- */
2474 
2475 /*
2476  * LDC API functions
2477  */
2478 
2479 /*
2480  * Initialize the channel. Allocate internal structure and memory for
2481  * TX/RX queues, and initialize locks.
2482  */
2483 int
2484 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2485 {
2486 	ldc_chan_t 	*ldcp;
2487 	int		rv, exit_val;
2488 	uint64_t	ra_base, nentries;
2489 	uint64_t	qlen;
2490 
2491 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2492 
2493 	if (attr == NULL) {
2494 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2495 		return (EINVAL);
2496 	}
2497 	if (handle == NULL) {
2498 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2499 		return (EINVAL);
2500 	}
2501 
2502 	/* check if channel is valid */
2503 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2504 	if (rv == H_ECHANNEL) {
2505 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2506 		return (EINVAL);
2507 	}
2508 
2509 	/* check if the channel has already been initialized */
2510 	mutex_enter(&ldcssp->lock);
2511 	ldcp = ldcssp->chan_list;
2512 	while (ldcp != NULL) {
2513 		if (ldcp->id == id) {
2514 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2515 			    id);
2516 			mutex_exit(&ldcssp->lock);
2517 			return (EADDRINUSE);
2518 		}
2519 		ldcp = ldcp->next;
2520 	}
2521 	mutex_exit(&ldcssp->lock);
2522 
2523 	ASSERT(ldcp == NULL);
2524 
2525 	*handle = 0;
2526 
2527 	/* Allocate an ldcp structure */
2528 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2529 
2530 	/*
2531 	 * Initialize the channel and Tx lock
2532 	 *
2533 	 * The channel 'lock' protects the entire channel and
2534 	 * should be acquired before initializing, resetting,
2535 	 * destroying or reading from a channel.
2536 	 *
2537 	 * The 'tx_lock' should be acquired prior to transmitting
2538 	 * data over the channel. The lock should also be acquired
2539 	 * prior to channel reconfiguration (in order to prevent
2540 	 * concurrent writes).
2541 	 *
2542 	 * ORDERING: When both locks are being acquired, to prevent
2543 	 * deadlocks, the channel lock should be always acquired prior
2544 	 * to the tx_lock.
2545 	 */
2546 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2547 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2548 
2549 	/* Initialize the channel */
2550 	ldcp->id = id;
2551 	ldcp->cb = NULL;
2552 	ldcp->cb_arg = NULL;
2553 	ldcp->cb_inprogress = B_FALSE;
2554 	ldcp->cb_enabled = B_FALSE;
2555 	ldcp->next = NULL;
2556 
2557 	/* Read attributes */
2558 	ldcp->mode = attr->mode;
2559 	ldcp->devclass = attr->devclass;
2560 	ldcp->devinst = attr->instance;
2561 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2562 
2563 	D1(ldcp->id,
2564 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2565 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2566 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2567 
2568 	ldcp->next_vidx = 0;
2569 	ldcp->tstate = TS_IN_RESET;
2570 	ldcp->hstate = 0;
2571 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2572 	ldcp->last_ack_rcd = 0;
2573 	ldcp->last_msg_rcd = 0;
2574 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2575 
2576 	ldcp->stream_bufferp = NULL;
2577 	ldcp->exp_dring_list = NULL;
2578 	ldcp->imp_dring_list = NULL;
2579 	ldcp->mhdl_list = NULL;
2580 
2581 	ldcp->tx_intr_state = LDC_INTR_NONE;
2582 	ldcp->rx_intr_state = LDC_INTR_NONE;
2583 
2584 	/* Initialize payload size depending on whether channel is reliable */
2585 	switch (ldcp->mode) {
2586 	case LDC_MODE_RAW:
2587 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2588 		ldcp->read_p = i_ldc_read_raw;
2589 		ldcp->write_p = i_ldc_write_raw;
2590 		break;
2591 	case LDC_MODE_UNRELIABLE:
2592 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2593 		ldcp->read_p = i_ldc_read_packet;
2594 		ldcp->write_p = i_ldc_write_packet;
2595 		break;
2596 	case LDC_MODE_RELIABLE:
2597 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2598 
2599 		ldcp->stream_remains = 0;
2600 		ldcp->stream_offset = 0;
2601 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2602 		ldcp->read_p = i_ldc_read_stream;
2603 		ldcp->write_p = i_ldc_write_stream;
2604 		break;
2605 	default:
2606 		exit_val = EINVAL;
2607 		goto cleanup_on_exit;
2608 	}
2609 
2610 	/*
2611 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2612 	 * value is smaller than default length of ldc_queue_entries,
2613 	 * qlen is set to ldc_queue_entries. Ensure that computed
2614 	 * length is a power-of-two value.
2615 	 */
2616 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2617 	if (!ISP2(qlen)) {
2618 		uint64_t	tmp = 1;
2619 		while (qlen) {
2620 			qlen >>= 1; tmp <<= 1;
2621 		}
2622 		qlen = tmp;
2623 	}
2624 
2625 	ldcp->rx_q_entries =
2626 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2627 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2628 
2629 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2630 
2631 	/* Create a transmit queue */
2632 	ldcp->tx_q_va = (uint64_t)
2633 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2634 	if (ldcp->tx_q_va == NULL) {
2635 		cmn_err(CE_WARN,
2636 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2637 		    ldcp->id);
2638 		exit_val = ENOMEM;
2639 		goto cleanup_on_exit;
2640 	}
2641 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2642 
2643 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2644 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2645 
2646 	ldcp->tstate |= TS_TXQ_RDY;
2647 
2648 	/* Create a receive queue */
2649 	ldcp->rx_q_va = (uint64_t)
2650 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2651 	if (ldcp->rx_q_va == NULL) {
2652 		cmn_err(CE_WARN,
2653 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2654 		    ldcp->id);
2655 		exit_val = ENOMEM;
2656 		goto cleanup_on_exit;
2657 	}
2658 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2659 
2660 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2661 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2662 
2663 	ldcp->tstate |= TS_RXQ_RDY;
2664 
2665 	/* Setup a separate read data queue */
2666 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2667 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2668 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2669 
2670 		/* Make sure the data queue multiplier is a power of 2 */
2671 		if (!ISP2(ldc_rxdq_multiplier)) {
2672 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2673 			    "not a power of 2, resetting", ldcp->id);
2674 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2675 		}
2676 
2677 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2678 		ldcp->rx_dq_va = (uint64_t)
2679 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2680 		    KM_SLEEP);
2681 		if (ldcp->rx_dq_va == NULL) {
2682 			cmn_err(CE_WARN,
2683 			    "ldc_init: (0x%lx) RX data queue "
2684 			    "allocation failed\n", ldcp->id);
2685 			exit_val = ENOMEM;
2686 			goto cleanup_on_exit;
2687 		}
2688 
2689 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2690 
2691 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2692 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2693 		    ldcp->rx_dq_entries);
2694 	} else {
2695 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2696 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2697 	}
2698 
2699 	/* Init descriptor ring and memory handle list lock */
2700 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2701 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2702 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2703 
2704 	/* mark status as INITialized */
2705 	ldcp->status = LDC_INIT;
2706 
2707 	/* Add to channel list */
2708 	mutex_enter(&ldcssp->lock);
2709 	ldcp->next = ldcssp->chan_list;
2710 	ldcssp->chan_list = ldcp;
2711 	ldcssp->channel_count++;
2712 	mutex_exit(&ldcssp->lock);
2713 
2714 	/* set the handle */
2715 	*handle = (ldc_handle_t)ldcp;
2716 
2717 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2718 
2719 	return (0);
2720 
2721 cleanup_on_exit:
2722 
2723 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2724 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2725 
2726 	if (ldcp->tstate & TS_TXQ_RDY)
2727 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2728 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2729 
2730 	if (ldcp->tstate & TS_RXQ_RDY)
2731 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2732 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2733 
2734 	mutex_destroy(&ldcp->tx_lock);
2735 	mutex_destroy(&ldcp->lock);
2736 
2737 	if (ldcp)
2738 		kmem_free(ldcp, sizeof (ldc_chan_t));
2739 
2740 	return (exit_val);
2741 }
2742 
2743 /*
2744  * Finalizes the LDC connection. It will return EBUSY if the
2745  * channel is open. A ldc_close() has to be done prior to
2746  * a ldc_fini operation. It frees TX/RX queues, associated
2747  * with the channel
2748  */
2749 int
2750 ldc_fini(ldc_handle_t handle)
2751 {
2752 	ldc_chan_t 	*ldcp;
2753 	ldc_chan_t 	*tmp_ldcp;
2754 	uint64_t 	id;
2755 
2756 	if (handle == NULL) {
2757 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2758 		return (EINVAL);
2759 	}
2760 	ldcp = (ldc_chan_t *)handle;
2761 	id = ldcp->id;
2762 
2763 	mutex_enter(&ldcp->lock);
2764 
2765 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2766 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2767 		    ldcp->id);
2768 		mutex_exit(&ldcp->lock);
2769 		return (EBUSY);
2770 	}
2771 
2772 	/* Remove from the channel list */
2773 	mutex_enter(&ldcssp->lock);
2774 	tmp_ldcp = ldcssp->chan_list;
2775 	if (tmp_ldcp == ldcp) {
2776 		ldcssp->chan_list = ldcp->next;
2777 		ldcp->next = NULL;
2778 	} else {
2779 		while (tmp_ldcp != NULL) {
2780 			if (tmp_ldcp->next == ldcp) {
2781 				tmp_ldcp->next = ldcp->next;
2782 				ldcp->next = NULL;
2783 				break;
2784 			}
2785 			tmp_ldcp = tmp_ldcp->next;
2786 		}
2787 		if (tmp_ldcp == NULL) {
2788 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2789 			mutex_exit(&ldcssp->lock);
2790 			mutex_exit(&ldcp->lock);
2791 			return (EINVAL);
2792 		}
2793 	}
2794 
2795 	ldcssp->channel_count--;
2796 
2797 	mutex_exit(&ldcssp->lock);
2798 
2799 	/* Free the map table for this channel */
2800 	if (ldcp->mtbl) {
2801 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2802 		if (ldcp->mtbl->contigmem)
2803 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2804 		else
2805 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2806 		mutex_destroy(&ldcp->mtbl->lock);
2807 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2808 	}
2809 
2810 	/* Destroy descriptor ring and memory handle list lock */
2811 	mutex_destroy(&ldcp->exp_dlist_lock);
2812 	mutex_destroy(&ldcp->imp_dlist_lock);
2813 	mutex_destroy(&ldcp->mlist_lock);
2814 
2815 	/* Free the stream buffer for RELIABLE_MODE */
2816 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2817 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2818 
2819 	/* Free the RX queue */
2820 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2821 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2822 	ldcp->tstate &= ~TS_RXQ_RDY;
2823 
2824 	/* Free the RX data queue */
2825 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2826 		kmem_free((caddr_t)ldcp->rx_dq_va,
2827 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2828 	}
2829 
2830 	/* Free the TX queue */
2831 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2832 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2833 	ldcp->tstate &= ~TS_TXQ_RDY;
2834 
2835 	mutex_exit(&ldcp->lock);
2836 
2837 	/* Destroy mutex */
2838 	mutex_destroy(&ldcp->tx_lock);
2839 	mutex_destroy(&ldcp->lock);
2840 
2841 	/* free channel structure */
2842 	kmem_free(ldcp, sizeof (ldc_chan_t));
2843 
2844 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2845 
2846 	return (0);
2847 }
2848 
2849 /*
2850  * Open the LDC channel for use. It registers the TX/RX queues
2851  * with the Hypervisor. It also specifies the interrupt number
2852  * and target CPU for this channel
2853  */
2854 int
2855 ldc_open(ldc_handle_t handle)
2856 {
2857 	ldc_chan_t 	*ldcp;
2858 	int 		rv;
2859 
2860 	if (handle == NULL) {
2861 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2862 		return (EINVAL);
2863 	}
2864 
2865 	ldcp = (ldc_chan_t *)handle;
2866 
2867 	mutex_enter(&ldcp->lock);
2868 
2869 	if (ldcp->tstate < TS_INIT) {
2870 		DWARN(ldcp->id,
2871 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2872 		mutex_exit(&ldcp->lock);
2873 		return (EFAULT);
2874 	}
2875 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2876 		DWARN(ldcp->id,
2877 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2878 		mutex_exit(&ldcp->lock);
2879 		return (EFAULT);
2880 	}
2881 
2882 	/*
2883 	 * Unregister/Register the tx queue with the hypervisor
2884 	 */
2885 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2886 	if (rv) {
2887 		cmn_err(CE_WARN,
2888 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2889 		    ldcp->id);
2890 		mutex_exit(&ldcp->lock);
2891 		return (EIO);
2892 	}
2893 
2894 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2895 	if (rv) {
2896 		cmn_err(CE_WARN,
2897 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2898 		    ldcp->id);
2899 		mutex_exit(&ldcp->lock);
2900 		return (EIO);
2901 	}
2902 
2903 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2904 	    ldcp->id);
2905 
2906 	/*
2907 	 * Unregister/Register the rx queue with the hypervisor
2908 	 */
2909 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2910 	if (rv) {
2911 		cmn_err(CE_WARN,
2912 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2913 		    ldcp->id);
2914 		mutex_exit(&ldcp->lock);
2915 		return (EIO);
2916 	}
2917 
2918 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2919 	if (rv) {
2920 		cmn_err(CE_WARN,
2921 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2922 		    ldcp->id);
2923 		mutex_exit(&ldcp->lock);
2924 		return (EIO);
2925 	}
2926 
2927 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2928 	    ldcp->id);
2929 
2930 	ldcp->tstate |= TS_QCONF_RDY;
2931 
2932 	/* Register the channel with the channel nexus */
2933 	rv = i_ldc_register_channel(ldcp);
2934 	if (rv && rv != EAGAIN) {
2935 		cmn_err(CE_WARN,
2936 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2937 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2938 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2939 		mutex_exit(&ldcp->lock);
2940 		return (EIO);
2941 	}
2942 
2943 	/* mark channel in OPEN state */
2944 	ldcp->status = LDC_OPEN;
2945 
2946 	/* Read channel state */
2947 	rv = hv_ldc_tx_get_state(ldcp->id,
2948 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2949 	if (rv) {
2950 		cmn_err(CE_WARN,
2951 		    "ldc_open: (0x%lx) cannot read channel state\n",
2952 		    ldcp->id);
2953 		(void) i_ldc_unregister_channel(ldcp);
2954 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2955 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2956 		mutex_exit(&ldcp->lock);
2957 		return (EIO);
2958 	}
2959 
2960 	/*
2961 	 * set the ACKd head to current head location for reliable
2962 	 */
2963 	ldcp->tx_ackd_head = ldcp->tx_head;
2964 
2965 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2966 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2967 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2968 		ldcp->tstate |= TS_LINK_READY;
2969 		ldcp->status = LDC_READY;
2970 	}
2971 
2972 	/*
2973 	 * if channel is being opened in RAW mode - no handshake is needed
2974 	 * switch the channel READY and UP state
2975 	 */
2976 	if (ldcp->mode == LDC_MODE_RAW) {
2977 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2978 		ldcp->status = LDC_UP;
2979 	}
2980 
2981 	mutex_exit(&ldcp->lock);
2982 
2983 	/*
2984 	 * Increment number of open channels
2985 	 */
2986 	mutex_enter(&ldcssp->lock);
2987 	ldcssp->channels_open++;
2988 	mutex_exit(&ldcssp->lock);
2989 
2990 	D1(ldcp->id,
2991 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
2992 	    "(tstate=0x%x, status=0x%x)\n",
2993 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
2994 
2995 	return (0);
2996 }
2997 
2998 /*
2999  * Close the LDC connection. It will return EBUSY if there
3000  * are memory segments or descriptor rings either bound to or
3001  * mapped over the channel
3002  */
3003 int
3004 ldc_close(ldc_handle_t handle)
3005 {
3006 	ldc_chan_t 	*ldcp;
3007 	int		rv = 0, retries = 0;
3008 	boolean_t	chk_done = B_FALSE;
3009 
3010 	if (handle == NULL) {
3011 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3012 		return (EINVAL);
3013 	}
3014 	ldcp = (ldc_chan_t *)handle;
3015 
3016 	mutex_enter(&ldcp->lock);
3017 
3018 	/* return error if channel is not open */
3019 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3020 		DWARN(ldcp->id,
3021 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3022 		mutex_exit(&ldcp->lock);
3023 		return (EFAULT);
3024 	}
3025 
3026 	/* if any memory handles, drings, are bound or mapped cannot close */
3027 	if (ldcp->mhdl_list != NULL) {
3028 		DWARN(ldcp->id,
3029 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3030 		    ldcp->id);
3031 		mutex_exit(&ldcp->lock);
3032 		return (EBUSY);
3033 	}
3034 	if (ldcp->exp_dring_list != NULL) {
3035 		DWARN(ldcp->id,
3036 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3037 		    ldcp->id);
3038 		mutex_exit(&ldcp->lock);
3039 		return (EBUSY);
3040 	}
3041 	if (ldcp->imp_dring_list != NULL) {
3042 		DWARN(ldcp->id,
3043 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3044 		    ldcp->id);
3045 		mutex_exit(&ldcp->lock);
3046 		return (EBUSY);
3047 	}
3048 
3049 	if (ldcp->cb_inprogress) {
3050 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3051 		    ldcp->id);
3052 		mutex_exit(&ldcp->lock);
3053 		return (EWOULDBLOCK);
3054 	}
3055 
3056 	/* Obtain Tx lock */
3057 	mutex_enter(&ldcp->tx_lock);
3058 
3059 	/*
3060 	 * Wait for pending transmits to complete i.e Tx queue to drain
3061 	 * if there are pending pkts - wait 1 ms and retry again
3062 	 */
3063 	for (;;) {
3064 
3065 		rv = hv_ldc_tx_get_state(ldcp->id,
3066 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3067 		if (rv) {
3068 			cmn_err(CE_WARN,
3069 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3070 			mutex_exit(&ldcp->tx_lock);
3071 			mutex_exit(&ldcp->lock);
3072 			return (EIO);
3073 		}
3074 
3075 		if (ldcp->tx_head == ldcp->tx_tail ||
3076 		    ldcp->link_state != LDC_CHANNEL_UP) {
3077 			break;
3078 		}
3079 
3080 		if (chk_done) {
3081 			DWARN(ldcp->id,
3082 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3083 			    ldcp->id);
3084 			break;
3085 		}
3086 
3087 		/* wait for one ms and try again */
3088 		delay(drv_usectohz(1000));
3089 		chk_done = B_TRUE;
3090 	}
3091 
3092 	/*
3093 	 * Drain the Tx and Rx queues as we are closing the
3094 	 * channel. We dont care about any pending packets.
3095 	 * We have to also drain the queue prior to clearing
3096 	 * pending interrupts, otherwise the HV will trigger
3097 	 * an interrupt the moment the interrupt state is
3098 	 * cleared.
3099 	 */
3100 	(void) i_ldc_txq_reconf(ldcp);
3101 	(void) i_ldc_rxq_drain(ldcp);
3102 
3103 	/*
3104 	 * Unregister the channel with the nexus
3105 	 */
3106 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3107 
3108 		mutex_exit(&ldcp->tx_lock);
3109 		mutex_exit(&ldcp->lock);
3110 
3111 		/* if any error other than EAGAIN return back */
3112 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3113 			cmn_err(CE_WARN,
3114 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3115 			    ldcp->id, rv);
3116 			return (rv);
3117 		}
3118 
3119 		/*
3120 		 * As there could be pending interrupts we need
3121 		 * to wait and try again
3122 		 */
3123 		drv_usecwait(ldc_close_delay);
3124 		mutex_enter(&ldcp->lock);
3125 		mutex_enter(&ldcp->tx_lock);
3126 		retries++;
3127 	}
3128 
3129 	/*
3130 	 * Unregister queues
3131 	 */
3132 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3133 	if (rv) {
3134 		cmn_err(CE_WARN,
3135 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3136 		    ldcp->id);
3137 		mutex_exit(&ldcp->tx_lock);
3138 		mutex_exit(&ldcp->lock);
3139 		return (EIO);
3140 	}
3141 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3142 	if (rv) {
3143 		cmn_err(CE_WARN,
3144 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3145 		    ldcp->id);
3146 		mutex_exit(&ldcp->tx_lock);
3147 		mutex_exit(&ldcp->lock);
3148 		return (EIO);
3149 	}
3150 
3151 	ldcp->tstate &= ~TS_QCONF_RDY;
3152 
3153 	/* Reset channel state information */
3154 	i_ldc_reset_state(ldcp);
3155 
3156 	/* Mark channel as down and in initialized state */
3157 	ldcp->tx_ackd_head = 0;
3158 	ldcp->tx_head = 0;
3159 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3160 	ldcp->status = LDC_INIT;
3161 
3162 	mutex_exit(&ldcp->tx_lock);
3163 	mutex_exit(&ldcp->lock);
3164 
3165 	/* Decrement number of open channels */
3166 	mutex_enter(&ldcssp->lock);
3167 	ldcssp->channels_open--;
3168 	mutex_exit(&ldcssp->lock);
3169 
3170 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3171 
3172 	return (0);
3173 }
3174 
3175 /*
3176  * Register channel callback
3177  */
3178 int
3179 ldc_reg_callback(ldc_handle_t handle,
3180     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3181 {
3182 	ldc_chan_t *ldcp;
3183 
3184 	if (handle == NULL) {
3185 		DWARN(DBG_ALL_LDCS,
3186 		    "ldc_reg_callback: invalid channel handle\n");
3187 		return (EINVAL);
3188 	}
3189 	if (((uint64_t)cb) < KERNELBASE) {
3190 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3191 		return (EINVAL);
3192 	}
3193 	ldcp = (ldc_chan_t *)handle;
3194 
3195 	mutex_enter(&ldcp->lock);
3196 
3197 	if (ldcp->cb) {
3198 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3199 		    ldcp->id);
3200 		mutex_exit(&ldcp->lock);
3201 		return (EIO);
3202 	}
3203 	if (ldcp->cb_inprogress) {
3204 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3205 		    ldcp->id);
3206 		mutex_exit(&ldcp->lock);
3207 		return (EWOULDBLOCK);
3208 	}
3209 
3210 	ldcp->cb = cb;
3211 	ldcp->cb_arg = arg;
3212 	ldcp->cb_enabled = B_TRUE;
3213 
3214 	D1(ldcp->id,
3215 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3216 	    ldcp->id);
3217 
3218 	mutex_exit(&ldcp->lock);
3219 
3220 	return (0);
3221 }
3222 
3223 /*
3224  * Unregister channel callback
3225  */
3226 int
3227 ldc_unreg_callback(ldc_handle_t handle)
3228 {
3229 	ldc_chan_t *ldcp;
3230 
3231 	if (handle == NULL) {
3232 		DWARN(DBG_ALL_LDCS,
3233 		    "ldc_unreg_callback: invalid channel handle\n");
3234 		return (EINVAL);
3235 	}
3236 	ldcp = (ldc_chan_t *)handle;
3237 
3238 	mutex_enter(&ldcp->lock);
3239 
3240 	if (ldcp->cb == NULL) {
3241 		DWARN(ldcp->id,
3242 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3243 		    ldcp->id);
3244 		mutex_exit(&ldcp->lock);
3245 		return (EIO);
3246 	}
3247 	if (ldcp->cb_inprogress) {
3248 		DWARN(ldcp->id,
3249 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3250 		    ldcp->id);
3251 		mutex_exit(&ldcp->lock);
3252 		return (EWOULDBLOCK);
3253 	}
3254 
3255 	ldcp->cb = NULL;
3256 	ldcp->cb_arg = NULL;
3257 	ldcp->cb_enabled = B_FALSE;
3258 
3259 	D1(ldcp->id,
3260 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3261 	    ldcp->id);
3262 
3263 	mutex_exit(&ldcp->lock);
3264 
3265 	return (0);
3266 }
3267 
3268 
3269 /*
3270  * Bring a channel up by initiating a handshake with the peer
3271  * This call is asynchronous. It will complete at a later point
3272  * in time when the peer responds back with an RTR.
3273  */
3274 int
3275 ldc_up(ldc_handle_t handle)
3276 {
3277 	int 		rv;
3278 	ldc_chan_t 	*ldcp;
3279 	ldc_msg_t 	*ldcmsg;
3280 	uint64_t 	tx_tail, tstate, link_state;
3281 
3282 	if (handle == NULL) {
3283 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3284 		return (EINVAL);
3285 	}
3286 	ldcp = (ldc_chan_t *)handle;
3287 
3288 	mutex_enter(&ldcp->lock);
3289 
3290 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3291 
3292 	/* clear the reset state */
3293 	tstate = ldcp->tstate;
3294 	ldcp->tstate &= ~TS_IN_RESET;
3295 
3296 	if (ldcp->tstate == TS_UP) {
3297 		DWARN(ldcp->id,
3298 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3299 		    ldcp->id);
3300 
3301 		/* mark channel as up */
3302 		ldcp->status = LDC_UP;
3303 
3304 		/*
3305 		 * if channel was in reset state and there was
3306 		 * pending data clear interrupt state. this will
3307 		 * trigger an interrupt, causing the RX handler to
3308 		 * to invoke the client's callback
3309 		 */
3310 		if ((tstate & TS_IN_RESET) &&
3311 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3312 			D1(ldcp->id,
3313 			    "ldc_up: (0x%llx) channel has pending data, "
3314 			    "clearing interrupt\n", ldcp->id);
3315 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3316 		}
3317 
3318 		mutex_exit(&ldcp->lock);
3319 		return (0);
3320 	}
3321 
3322 	/* if the channel is in RAW mode - mark it as UP, if READY */
3323 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3324 		ldcp->tstate = TS_UP;
3325 		mutex_exit(&ldcp->lock);
3326 		return (0);
3327 	}
3328 
3329 	/* Don't start another handshake if there is one in progress */
3330 	if (ldcp->hstate) {
3331 		D1(ldcp->id,
3332 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3333 		    ldcp->id);
3334 		mutex_exit(&ldcp->lock);
3335 		return (0);
3336 	}
3337 
3338 	mutex_enter(&ldcp->tx_lock);
3339 
3340 	/* save current link state */
3341 	link_state = ldcp->link_state;
3342 
3343 	/* get the current tail for the LDC msg */
3344 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3345 	if (rv) {
3346 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3347 		    ldcp->id);
3348 		mutex_exit(&ldcp->tx_lock);
3349 		mutex_exit(&ldcp->lock);
3350 		return (ECONNREFUSED);
3351 	}
3352 
3353 	/*
3354 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3355 	 * from a previous state of DOWN, then mark the channel as
3356 	 * being ready for handshake.
3357 	 */
3358 	if ((link_state == LDC_CHANNEL_DOWN) &&
3359 	    (link_state != ldcp->link_state)) {
3360 
3361 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3362 		    (ldcp->link_state == LDC_CHANNEL_UP));
3363 
3364 		if (ldcp->mode == LDC_MODE_RAW) {
3365 			ldcp->status = LDC_UP;
3366 			ldcp->tstate = TS_UP;
3367 			mutex_exit(&ldcp->tx_lock);
3368 			mutex_exit(&ldcp->lock);
3369 			return (0);
3370 		} else {
3371 			ldcp->status = LDC_READY;
3372 			ldcp->tstate |= TS_LINK_READY;
3373 		}
3374 
3375 	}
3376 
3377 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3378 	ZERO_PKT(ldcmsg);
3379 
3380 	ldcmsg->type = LDC_CTRL;
3381 	ldcmsg->stype = LDC_INFO;
3382 	ldcmsg->ctrl = LDC_VER;
3383 	ldcp->next_vidx = 0;
3384 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3385 
3386 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3387 
3388 	/* initiate the send by calling into HV and set the new tail */
3389 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3390 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3391 
3392 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3393 	if (rv) {
3394 		DWARN(ldcp->id,
3395 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3396 		    ldcp->id, rv);
3397 		mutex_exit(&ldcp->tx_lock);
3398 		mutex_exit(&ldcp->lock);
3399 		return (rv);
3400 	}
3401 
3402 	ldcp->hstate |= TS_SENT_VER;
3403 	ldcp->tx_tail = tx_tail;
3404 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3405 
3406 	mutex_exit(&ldcp->tx_lock);
3407 	mutex_exit(&ldcp->lock);
3408 
3409 	return (rv);
3410 }
3411 
3412 
3413 /*
3414  * Bring a channel down by resetting its state and queues
3415  */
3416 int
3417 ldc_down(ldc_handle_t handle)
3418 {
3419 	ldc_chan_t 	*ldcp;
3420 
3421 	if (handle == NULL) {
3422 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3423 		return (EINVAL);
3424 	}
3425 	ldcp = (ldc_chan_t *)handle;
3426 	mutex_enter(&ldcp->lock);
3427 	mutex_enter(&ldcp->tx_lock);
3428 	i_ldc_reset(ldcp, B_TRUE);
3429 	mutex_exit(&ldcp->tx_lock);
3430 	mutex_exit(&ldcp->lock);
3431 
3432 	return (0);
3433 }
3434 
3435 /*
3436  * Get the current channel status
3437  */
3438 int
3439 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3440 {
3441 	ldc_chan_t *ldcp;
3442 
3443 	if (handle == NULL || status == NULL) {
3444 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3445 		return (EINVAL);
3446 	}
3447 	ldcp = (ldc_chan_t *)handle;
3448 
3449 	*status = ((ldc_chan_t *)handle)->status;
3450 
3451 	D1(ldcp->id,
3452 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3453 	return (0);
3454 }
3455 
3456 
3457 /*
3458  * Set the channel's callback mode - enable/disable callbacks
3459  */
3460 int
3461 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3462 {
3463 	ldc_chan_t 	*ldcp;
3464 
3465 	if (handle == NULL) {
3466 		DWARN(DBG_ALL_LDCS,
3467 		    "ldc_set_intr_mode: invalid channel handle\n");
3468 		return (EINVAL);
3469 	}
3470 	ldcp = (ldc_chan_t *)handle;
3471 
3472 	/*
3473 	 * Record no callbacks should be invoked
3474 	 */
3475 	mutex_enter(&ldcp->lock);
3476 
3477 	switch (cmode) {
3478 	case LDC_CB_DISABLE:
3479 		if (!ldcp->cb_enabled) {
3480 			DWARN(ldcp->id,
3481 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3482 			    ldcp->id);
3483 			break;
3484 		}
3485 		ldcp->cb_enabled = B_FALSE;
3486 
3487 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3488 		    ldcp->id);
3489 		break;
3490 
3491 	case LDC_CB_ENABLE:
3492 		if (ldcp->cb_enabled) {
3493 			DWARN(ldcp->id,
3494 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3495 			    ldcp->id);
3496 			break;
3497 		}
3498 		ldcp->cb_enabled = B_TRUE;
3499 
3500 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3501 		    ldcp->id);
3502 		break;
3503 	}
3504 
3505 	mutex_exit(&ldcp->lock);
3506 
3507 	return (0);
3508 }
3509 
3510 /*
3511  * Check to see if there are packets on the incoming queue
3512  * Will return hasdata = B_FALSE if there are no packets
3513  */
3514 int
3515 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3516 {
3517 	int 		rv;
3518 	uint64_t 	rx_head, rx_tail;
3519 	ldc_chan_t 	*ldcp;
3520 
3521 	if (handle == NULL) {
3522 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3523 		return (EINVAL);
3524 	}
3525 	ldcp = (ldc_chan_t *)handle;
3526 
3527 	*hasdata = B_FALSE;
3528 
3529 	mutex_enter(&ldcp->lock);
3530 
3531 	if (ldcp->tstate != TS_UP) {
3532 		D1(ldcp->id,
3533 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3534 		mutex_exit(&ldcp->lock);
3535 		return (ECONNRESET);
3536 	}
3537 
3538 	/* Read packet(s) from the queue */
3539 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3540 	    &ldcp->link_state);
3541 	if (rv != 0) {
3542 		cmn_err(CE_WARN,
3543 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3544 		mutex_exit(&ldcp->lock);
3545 		return (EIO);
3546 	}
3547 
3548 	/* reset the channel state if the channel went down */
3549 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3550 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3551 		mutex_enter(&ldcp->tx_lock);
3552 		i_ldc_reset(ldcp, B_FALSE);
3553 		mutex_exit(&ldcp->tx_lock);
3554 		mutex_exit(&ldcp->lock);
3555 		return (ECONNRESET);
3556 	}
3557 
3558 	switch (ldcp->mode) {
3559 	case LDC_MODE_RAW:
3560 		/*
3561 		 * In raw mode, there are no ctrl packets, so checking
3562 		 * if the queue is non-empty is sufficient.
3563 		 */
3564 		*hasdata = (rx_head != rx_tail);
3565 		break;
3566 
3567 	case LDC_MODE_UNRELIABLE:
3568 		/*
3569 		 * In unreliable mode, if the queue is non-empty, we need
3570 		 * to check if it actually contains unread data packets.
3571 		 * The queue may just contain ctrl packets.
3572 		 */
3573 		if (rx_head != rx_tail)
3574 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3575 		break;
3576 
3577 	case LDC_MODE_RELIABLE:
3578 		/*
3579 		 * In reliable mode, first check for 'stream_remains' > 0.
3580 		 * Otherwise, if the data queue head and tail pointers
3581 		 * differ, there must be data to read.
3582 		 */
3583 		if (ldcp->stream_remains > 0)
3584 			*hasdata = B_TRUE;
3585 		else
3586 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3587 		break;
3588 
3589 	default:
3590 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3591 		    "(0x%x)", ldcp->id, ldcp->mode);
3592 		mutex_exit(&ldcp->lock);
3593 		return (EIO);
3594 	}
3595 
3596 	mutex_exit(&ldcp->lock);
3597 
3598 	return (0);
3599 }
3600 
3601 
3602 /*
3603  * Read 'size' amount of bytes or less. If incoming buffer
3604  * is more than 'size', ENOBUFS is returned.
3605  *
3606  * On return, size contains the number of bytes read.
3607  */
3608 int
3609 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3610 {
3611 	ldc_chan_t 	*ldcp;
3612 	uint64_t 	rx_head = 0, rx_tail = 0;
3613 	int		rv = 0, exit_val;
3614 
3615 	if (handle == NULL) {
3616 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3617 		return (EINVAL);
3618 	}
3619 
3620 	ldcp = (ldc_chan_t *)handle;
3621 
3622 	/* channel lock */
3623 	mutex_enter(&ldcp->lock);
3624 
3625 	if (ldcp->tstate != TS_UP) {
3626 		DWARN(ldcp->id,
3627 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3628 		    ldcp->id);
3629 		exit_val = ECONNRESET;
3630 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3631 		TRACE_RXDQ_LENGTH(ldcp);
3632 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3633 		mutex_exit(&ldcp->lock);
3634 		return (exit_val);
3635 	} else {
3636 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3637 	}
3638 
3639 	/*
3640 	 * if queue has been drained - clear interrupt
3641 	 */
3642 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3643 	    &ldcp->link_state);
3644 	if (rv != 0) {
3645 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3646 		    ldcp->id);
3647 		mutex_enter(&ldcp->tx_lock);
3648 		i_ldc_reset(ldcp, B_TRUE);
3649 		mutex_exit(&ldcp->tx_lock);
3650 		mutex_exit(&ldcp->lock);
3651 		return (ECONNRESET);
3652 	}
3653 
3654 	if (exit_val == 0) {
3655 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3656 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3657 			mutex_enter(&ldcp->tx_lock);
3658 			i_ldc_reset(ldcp, B_FALSE);
3659 			exit_val = ECONNRESET;
3660 			mutex_exit(&ldcp->tx_lock);
3661 		}
3662 		if ((rv == 0) &&
3663 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3664 		    (rx_head == rx_tail)) {
3665 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3666 		}
3667 	}
3668 
3669 	mutex_exit(&ldcp->lock);
3670 	return (exit_val);
3671 }
3672 
3673 /*
3674  * Basic raw mondo read -
3675  * no interpretation of mondo contents at all.
3676  *
3677  * Enter and exit with ldcp->lock held by caller
3678  */
3679 static int
3680 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3681 {
3682 	uint64_t 	q_size_mask;
3683 	ldc_msg_t 	*msgp;
3684 	uint8_t		*msgbufp;
3685 	int		rv = 0, space;
3686 	uint64_t 	rx_head, rx_tail;
3687 
3688 	space = *sizep;
3689 
3690 	if (space < LDC_PAYLOAD_SIZE_RAW)
3691 		return (ENOBUFS);
3692 
3693 	ASSERT(mutex_owned(&ldcp->lock));
3694 
3695 	/* compute mask for increment */
3696 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3697 
3698 	/*
3699 	 * Read packet(s) from the queue
3700 	 */
3701 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3702 	    &ldcp->link_state);
3703 	if (rv != 0) {
3704 		cmn_err(CE_WARN,
3705 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3706 		    ldcp->id);
3707 		return (EIO);
3708 	}
3709 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3710 	    " rxt=0x%llx, st=0x%llx\n",
3711 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3712 
3713 	/* reset the channel state if the channel went down */
3714 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3715 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3716 		mutex_enter(&ldcp->tx_lock);
3717 		i_ldc_reset(ldcp, B_FALSE);
3718 		mutex_exit(&ldcp->tx_lock);
3719 		return (ECONNRESET);
3720 	}
3721 
3722 	/*
3723 	 * Check for empty queue
3724 	 */
3725 	if (rx_head == rx_tail) {
3726 		*sizep = 0;
3727 		return (0);
3728 	}
3729 
3730 	/* get the message */
3731 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3732 
3733 	/* if channel is in RAW mode, copy data and return */
3734 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3735 
3736 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3737 
3738 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3739 
3740 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3741 
3742 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3743 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3744 
3745 	return (rv);
3746 }
3747 
3748 /*
3749  * Process LDC mondos to build larger packets
3750  * with either un-reliable or reliable delivery.
3751  *
3752  * Enter and exit with ldcp->lock held by caller
3753  */
3754 static int
3755 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3756 {
3757 	int		rv = 0;
3758 	uint64_t 	rx_head = 0, rx_tail = 0;
3759 	uint64_t 	curr_head = 0;
3760 	ldc_msg_t 	*msg;
3761 	caddr_t 	target;
3762 	size_t 		len = 0, bytes_read = 0;
3763 	int 		retries = 0;
3764 	uint64_t 	q_va, q_size_mask;
3765 	uint64_t	first_fragment = 0;
3766 
3767 	target = target_bufp;
3768 
3769 	ASSERT(mutex_owned(&ldcp->lock));
3770 
3771 	/* check if the buffer and size are valid */
3772 	if (target_bufp == NULL || *sizep == 0) {
3773 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3774 		    ldcp->id);
3775 		return (EINVAL);
3776 	}
3777 
3778 	/* Set q_va and compute increment mask for the appropriate queue */
3779 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3780 		q_va	    = ldcp->rx_dq_va;
3781 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3782 	} else {
3783 		q_va	    = ldcp->rx_q_va;
3784 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3785 	}
3786 
3787 	/*
3788 	 * Read packet(s) from the queue
3789 	 */
3790 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3791 	    &ldcp->link_state);
3792 	if (rv != 0) {
3793 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3794 		    ldcp->id);
3795 		mutex_enter(&ldcp->tx_lock);
3796 		i_ldc_reset(ldcp, B_TRUE);
3797 		mutex_exit(&ldcp->tx_lock);
3798 		return (ECONNRESET);
3799 	}
3800 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3801 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3802 
3803 	/* reset the channel state if the channel went down */
3804 	if (ldcp->link_state != LDC_CHANNEL_UP)
3805 		goto channel_is_reset;
3806 
3807 	for (;;) {
3808 
3809 		if (curr_head == rx_tail) {
3810 			/*
3811 			 * If a data queue is being used, check the Rx HV
3812 			 * queue. This will copy over any new data packets
3813 			 * that have arrived.
3814 			 */
3815 			if (ldcp->mode == LDC_MODE_RELIABLE)
3816 				(void) i_ldc_chkq(ldcp);
3817 
3818 			rv = ldcp->readq_get_state(ldcp,
3819 			    &rx_head, &rx_tail, &ldcp->link_state);
3820 			if (rv != 0) {
3821 				cmn_err(CE_WARN,
3822 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3823 				    ldcp->id);
3824 				mutex_enter(&ldcp->tx_lock);
3825 				i_ldc_reset(ldcp, B_TRUE);
3826 				mutex_exit(&ldcp->tx_lock);
3827 				return (ECONNRESET);
3828 			}
3829 
3830 			if (ldcp->link_state != LDC_CHANNEL_UP)
3831 				goto channel_is_reset;
3832 
3833 			if (curr_head == rx_tail) {
3834 
3835 				/* If in the middle of a fragmented xfer */
3836 				if (first_fragment != 0) {
3837 
3838 					/* wait for ldc_delay usecs */
3839 					drv_usecwait(ldc_delay);
3840 
3841 					if (++retries < ldc_max_retries)
3842 						continue;
3843 
3844 					*sizep = 0;
3845 					if (ldcp->mode != LDC_MODE_RELIABLE)
3846 						ldcp->last_msg_rcd =
3847 						    first_fragment - 1;
3848 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3849 					    "(0x%llx) read timeout", ldcp->id);
3850 					return (EAGAIN);
3851 				}
3852 				*sizep = 0;
3853 				break;
3854 			}
3855 		}
3856 		retries = 0;
3857 
3858 		D2(ldcp->id,
3859 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3860 		    ldcp->id, curr_head, rx_head, rx_tail);
3861 
3862 		/* get the message */
3863 		msg = (ldc_msg_t *)(q_va + curr_head);
3864 
3865 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3866 		    ldcp->rx_q_va + curr_head);
3867 
3868 		/* Check the message ID for the message received */
3869 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3870 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3871 
3872 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3873 				    "error, q_ptrs=0x%lx,0x%lx",
3874 				    ldcp->id, rx_head, rx_tail);
3875 
3876 				/* throw away data */
3877 				bytes_read = 0;
3878 
3879 				/* Reset last_msg_rcd to start of message */
3880 				if (first_fragment != 0) {
3881 					ldcp->last_msg_rcd = first_fragment - 1;
3882 					first_fragment = 0;
3883 				}
3884 				/*
3885 				 * Send a NACK -- invalid seqid
3886 				 * get the current tail for the response
3887 				 */
3888 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3889 				    (msg->ctrl & LDC_CTRL_MASK));
3890 				if (rv) {
3891 					cmn_err(CE_NOTE,
3892 					    "ldc_read: (0x%lx) err sending "
3893 					    "NACK msg\n", ldcp->id);
3894 
3895 					/* if cannot send NACK - reset chan */
3896 					mutex_enter(&ldcp->tx_lock);
3897 					i_ldc_reset(ldcp, B_FALSE);
3898 					mutex_exit(&ldcp->tx_lock);
3899 					rv = ECONNRESET;
3900 					break;
3901 				}
3902 
3903 				/* purge receive queue */
3904 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3905 
3906 				break;
3907 			}
3908 
3909 			/*
3910 			 * Process any messages of type CTRL messages
3911 			 * Future implementations should try to pass these
3912 			 * to LDC link by resetting the intr state.
3913 			 *
3914 			 * NOTE: not done as a switch() as type can be
3915 			 * both ctrl+data
3916 			 */
3917 			if (msg->type & LDC_CTRL) {
3918 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3919 					if (rv == EAGAIN)
3920 						continue;
3921 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
3922 					*sizep = 0;
3923 					bytes_read = 0;
3924 					break;
3925 				}
3926 			}
3927 
3928 			/* process data ACKs */
3929 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3930 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3931 					*sizep = 0;
3932 					bytes_read = 0;
3933 					break;
3934 				}
3935 			}
3936 
3937 			/* process data NACKs */
3938 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
3939 				DWARN(ldcp->id,
3940 				    "ldc_read: (0x%llx) received DATA/NACK",
3941 				    ldcp->id);
3942 				mutex_enter(&ldcp->tx_lock);
3943 				i_ldc_reset(ldcp, B_TRUE);
3944 				mutex_exit(&ldcp->tx_lock);
3945 				return (ECONNRESET);
3946 			}
3947 		}
3948 
3949 		/* process data messages */
3950 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3951 
3952 			uint8_t *msgbuf = (uint8_t *)(
3953 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
3954 			    msg->rdata : msg->udata);
3955 
3956 			D2(ldcp->id,
3957 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3958 
3959 			/* get the packet length */
3960 			len = (msg->env & LDC_LEN_MASK);
3961 
3962 				/*
3963 				 * FUTURE OPTIMIZATION:
3964 				 * dont need to set q head for every
3965 				 * packet we read just need to do this when
3966 				 * we are done or need to wait for more
3967 				 * mondos to make a full packet - this is
3968 				 * currently expensive.
3969 				 */
3970 
3971 			if (first_fragment == 0) {
3972 
3973 				/*
3974 				 * first packets should always have the start
3975 				 * bit set (even for a single packet). If not
3976 				 * throw away the packet
3977 				 */
3978 				if (!(msg->env & LDC_FRAG_START)) {
3979 
3980 					DWARN(DBG_ALL_LDCS,
3981 					    "ldc_read: (0x%llx) not start - "
3982 					    "frag=%x\n", ldcp->id,
3983 					    (msg->env) & LDC_FRAG_MASK);
3984 
3985 					/* toss pkt, inc head, cont reading */
3986 					bytes_read = 0;
3987 					target = target_bufp;
3988 					curr_head =
3989 					    (curr_head + LDC_PACKET_SIZE)
3990 					    & q_size_mask;
3991 					if (rv = ldcp->readq_set_head(ldcp,
3992 					    curr_head))
3993 						break;
3994 
3995 					continue;
3996 				}
3997 
3998 				first_fragment = msg->seqid;
3999 			} else {
4000 				/* check to see if this is a pkt w/ START bit */
4001 				if (msg->env & LDC_FRAG_START) {
4002 					DWARN(DBG_ALL_LDCS,
4003 					    "ldc_read:(0x%llx) unexpected pkt"
4004 					    " env=0x%x discarding %d bytes,"
4005 					    " lastmsg=%d, currentmsg=%d\n",
4006 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4007 					    bytes_read, ldcp->last_msg_rcd,
4008 					    msg->seqid);
4009 
4010 					/* throw data we have read so far */
4011 					bytes_read = 0;
4012 					target = target_bufp;
4013 					first_fragment = msg->seqid;
4014 
4015 					if (rv = ldcp->readq_set_head(ldcp,
4016 					    curr_head))
4017 						break;
4018 				}
4019 			}
4020 
4021 			/* copy (next) pkt into buffer */
4022 			if (len <= (*sizep - bytes_read)) {
4023 				bcopy(msgbuf, target, len);
4024 				target += len;
4025 				bytes_read += len;
4026 			} else {
4027 				/*
4028 				 * there is not enough space in the buffer to
4029 				 * read this pkt. throw message away & continue
4030 				 * reading data from queue
4031 				 */
4032 				DWARN(DBG_ALL_LDCS,
4033 				    "ldc_read: (0x%llx) buffer too small, "
4034 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4035 				    curr_head, *sizep, bytes_read+len);
4036 
4037 				first_fragment = 0;
4038 				target = target_bufp;
4039 				bytes_read = 0;
4040 
4041 				/* throw away everything received so far */
4042 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4043 					break;
4044 
4045 				/* continue reading remaining pkts */
4046 				continue;
4047 			}
4048 		}
4049 
4050 		/* set the message id */
4051 		if (ldcp->mode != LDC_MODE_RELIABLE)
4052 			ldcp->last_msg_rcd = msg->seqid;
4053 
4054 		/* move the head one position */
4055 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4056 
4057 		if (msg->env & LDC_FRAG_STOP) {
4058 
4059 			/*
4060 			 * All pkts that are part of this fragmented transfer
4061 			 * have been read or this was a single pkt read
4062 			 * or there was an error
4063 			 */
4064 
4065 			/* set the queue head */
4066 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4067 				bytes_read = 0;
4068 
4069 			*sizep = bytes_read;
4070 
4071 			break;
4072 		}
4073 
4074 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4075 		if ((msg->type & LDC_CTRL) ||
4076 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4077 
4078 			/* set the queue head */
4079 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4080 				bytes_read = 0;
4081 				break;
4082 			}
4083 
4084 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4085 			    ldcp->id, curr_head);
4086 		}
4087 
4088 	} /* for (;;) */
4089 
4090 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4091 
4092 	return (rv);
4093 
4094 channel_is_reset:
4095 	mutex_enter(&ldcp->tx_lock);
4096 	i_ldc_reset(ldcp, B_FALSE);
4097 	mutex_exit(&ldcp->tx_lock);
4098 	return (ECONNRESET);
4099 }
4100 
4101 /*
4102  * Fetch and buffer incoming packets so we can hand them back as
4103  * a basic byte stream.
4104  *
4105  * Enter and exit with ldcp->lock held by caller
4106  */
4107 static int
4108 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4109 {
4110 	int	rv;
4111 	size_t	size;
4112 
4113 	ASSERT(mutex_owned(&ldcp->lock));
4114 
4115 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4116 	    ldcp->id, *sizep);
4117 
4118 	if (ldcp->stream_remains == 0) {
4119 		size = ldcp->mtu;
4120 		rv = i_ldc_read_packet(ldcp,
4121 		    (caddr_t)ldcp->stream_bufferp, &size);
4122 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4123 		    ldcp->id, size);
4124 
4125 		if (rv != 0)
4126 			return (rv);
4127 
4128 		ldcp->stream_remains = size;
4129 		ldcp->stream_offset = 0;
4130 	}
4131 
4132 	size = MIN(ldcp->stream_remains, *sizep);
4133 
4134 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4135 	ldcp->stream_offset += size;
4136 	ldcp->stream_remains -= size;
4137 
4138 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4139 	    ldcp->id, size);
4140 
4141 	*sizep = size;
4142 	return (0);
4143 }
4144 
4145 /*
4146  * Write specified amount of bytes to the channel
4147  * in multiple pkts of pkt_payload size. Each
4148  * packet is tagged with an unique packet ID in
4149  * the case of a reliable link.
4150  *
4151  * On return, size contains the number of bytes written.
4152  */
4153 int
4154 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4155 {
4156 	ldc_chan_t	*ldcp;
4157 	int		rv = 0;
4158 
4159 	if (handle == NULL) {
4160 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4161 		return (EINVAL);
4162 	}
4163 	ldcp = (ldc_chan_t *)handle;
4164 
4165 	/* check if writes can occur */
4166 	if (!mutex_tryenter(&ldcp->tx_lock)) {
4167 		/*
4168 		 * Could not get the lock - channel could
4169 		 * be in the process of being unconfigured
4170 		 * or reader has encountered an error
4171 		 */
4172 		return (EAGAIN);
4173 	}
4174 
4175 	/* check if non-zero data to write */
4176 	if (buf == NULL || sizep == NULL) {
4177 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4178 		    ldcp->id);
4179 		mutex_exit(&ldcp->tx_lock);
4180 		return (EINVAL);
4181 	}
4182 
4183 	if (*sizep == 0) {
4184 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4185 		    ldcp->id);
4186 		mutex_exit(&ldcp->tx_lock);
4187 		return (0);
4188 	}
4189 
4190 	/* Check if channel is UP for data exchange */
4191 	if (ldcp->tstate != TS_UP) {
4192 		DWARN(ldcp->id,
4193 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4194 		    ldcp->id);
4195 		*sizep = 0;
4196 		rv = ECONNRESET;
4197 	} else {
4198 		rv = ldcp->write_p(ldcp, buf, sizep);
4199 	}
4200 
4201 	mutex_exit(&ldcp->tx_lock);
4202 
4203 	return (rv);
4204 }
4205 
4206 /*
4207  * Write a raw packet to the channel
4208  * On return, size contains the number of bytes written.
4209  */
4210 static int
4211 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4212 {
4213 	ldc_msg_t 	*ldcmsg;
4214 	uint64_t 	tx_head, tx_tail, new_tail;
4215 	int		rv = 0;
4216 	size_t		size;
4217 
4218 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4219 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4220 
4221 	size = *sizep;
4222 
4223 	/*
4224 	 * Check to see if the packet size is less than or
4225 	 * equal to packet size support in raw mode
4226 	 */
4227 	if (size > ldcp->pkt_payload) {
4228 		DWARN(ldcp->id,
4229 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4230 		    ldcp->id, *sizep);
4231 		*sizep = 0;
4232 		return (EMSGSIZE);
4233 	}
4234 
4235 	/* get the qptrs for the tx queue */
4236 	rv = hv_ldc_tx_get_state(ldcp->id,
4237 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4238 	if (rv != 0) {
4239 		cmn_err(CE_WARN,
4240 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4241 		*sizep = 0;
4242 		return (EIO);
4243 	}
4244 
4245 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4246 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4247 		DWARN(ldcp->id,
4248 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4249 
4250 		*sizep = 0;
4251 		if (mutex_tryenter(&ldcp->lock)) {
4252 			i_ldc_reset(ldcp, B_FALSE);
4253 			mutex_exit(&ldcp->lock);
4254 		} else {
4255 			/*
4256 			 * Release Tx lock, and then reacquire channel
4257 			 * and Tx lock in correct order
4258 			 */
4259 			mutex_exit(&ldcp->tx_lock);
4260 			mutex_enter(&ldcp->lock);
4261 			mutex_enter(&ldcp->tx_lock);
4262 			i_ldc_reset(ldcp, B_FALSE);
4263 			mutex_exit(&ldcp->lock);
4264 		}
4265 		return (ECONNRESET);
4266 	}
4267 
4268 	tx_tail = ldcp->tx_tail;
4269 	tx_head = ldcp->tx_head;
4270 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4271 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4272 
4273 	if (new_tail == tx_head) {
4274 		DWARN(DBG_ALL_LDCS,
4275 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4276 		*sizep = 0;
4277 		return (EWOULDBLOCK);
4278 	}
4279 
4280 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4281 	    ldcp->id, size);
4282 
4283 	/* Send the data now */
4284 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4285 
4286 	/* copy the data into pkt */
4287 	bcopy((uint8_t *)buf, ldcmsg, size);
4288 
4289 	/* increment tail */
4290 	tx_tail = new_tail;
4291 
4292 	/*
4293 	 * All packets have been copied into the TX queue
4294 	 * update the tail ptr in the HV
4295 	 */
4296 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4297 	if (rv) {
4298 		if (rv == EWOULDBLOCK) {
4299 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4300 			    ldcp->id);
4301 			*sizep = 0;
4302 			return (EWOULDBLOCK);
4303 		}
4304 
4305 		*sizep = 0;
4306 		if (mutex_tryenter(&ldcp->lock)) {
4307 			i_ldc_reset(ldcp, B_FALSE);
4308 			mutex_exit(&ldcp->lock);
4309 		} else {
4310 			/*
4311 			 * Release Tx lock, and then reacquire channel
4312 			 * and Tx lock in correct order
4313 			 */
4314 			mutex_exit(&ldcp->tx_lock);
4315 			mutex_enter(&ldcp->lock);
4316 			mutex_enter(&ldcp->tx_lock);
4317 			i_ldc_reset(ldcp, B_FALSE);
4318 			mutex_exit(&ldcp->lock);
4319 		}
4320 		return (ECONNRESET);
4321 	}
4322 
4323 	ldcp->tx_tail = tx_tail;
4324 	*sizep = size;
4325 
4326 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4327 
4328 	return (rv);
4329 }
4330 
4331 
4332 /*
4333  * Write specified amount of bytes to the channel
4334  * in multiple pkts of pkt_payload size. Each
4335  * packet is tagged with an unique packet ID in
4336  * the case of a reliable link.
4337  *
4338  * On return, size contains the number of bytes written.
4339  * This function needs to ensure that the write size is < MTU size
4340  */
4341 static int
4342 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4343 {
4344 	ldc_msg_t 	*ldcmsg;
4345 	uint64_t 	tx_head, tx_tail, new_tail, start;
4346 	uint64_t	txq_size_mask, numavail;
4347 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4348 	size_t 		len, bytes_written = 0, remaining;
4349 	int		rv;
4350 	uint32_t	curr_seqid;
4351 
4352 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4353 
4354 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4355 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4356 
4357 	/* compute mask for increment */
4358 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4359 
4360 	/* get the qptrs for the tx queue */
4361 	rv = hv_ldc_tx_get_state(ldcp->id,
4362 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4363 	if (rv != 0) {
4364 		cmn_err(CE_WARN,
4365 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4366 		*size = 0;
4367 		return (EIO);
4368 	}
4369 
4370 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4371 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4372 		DWARN(ldcp->id,
4373 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4374 		*size = 0;
4375 		if (mutex_tryenter(&ldcp->lock)) {
4376 			i_ldc_reset(ldcp, B_FALSE);
4377 			mutex_exit(&ldcp->lock);
4378 		} else {
4379 			/*
4380 			 * Release Tx lock, and then reacquire channel
4381 			 * and Tx lock in correct order
4382 			 */
4383 			mutex_exit(&ldcp->tx_lock);
4384 			mutex_enter(&ldcp->lock);
4385 			mutex_enter(&ldcp->tx_lock);
4386 			i_ldc_reset(ldcp, B_FALSE);
4387 			mutex_exit(&ldcp->lock);
4388 		}
4389 		return (ECONNRESET);
4390 	}
4391 
4392 	tx_tail = ldcp->tx_tail;
4393 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4394 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4395 
4396 	/*
4397 	 * Check to see if the queue is full. The check is done using
4398 	 * the appropriate head based on the link mode.
4399 	 */
4400 	i_ldc_get_tx_head(ldcp, &tx_head);
4401 
4402 	if (new_tail == tx_head) {
4403 		DWARN(DBG_ALL_LDCS,
4404 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4405 		*size = 0;
4406 		return (EWOULDBLOCK);
4407 	}
4408 
4409 	/*
4410 	 * Make sure that the LDC Tx queue has enough space
4411 	 */
4412 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4413 	    + ldcp->tx_q_entries - 1;
4414 	numavail %= ldcp->tx_q_entries;
4415 
4416 	if (*size > (numavail * ldcp->pkt_payload)) {
4417 		DWARN(DBG_ALL_LDCS,
4418 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4419 		return (EWOULDBLOCK);
4420 	}
4421 
4422 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4423 	    ldcp->id, *size);
4424 
4425 	/* Send the data now */
4426 	bytes_written = 0;
4427 	curr_seqid = ldcp->last_msg_snt;
4428 	start = tx_tail;
4429 
4430 	while (*size > bytes_written) {
4431 
4432 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4433 
4434 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4435 		    ldcmsg->rdata : ldcmsg->udata);
4436 
4437 		ldcmsg->type = LDC_DATA;
4438 		ldcmsg->stype = LDC_INFO;
4439 		ldcmsg->ctrl = 0;
4440 
4441 		remaining = *size - bytes_written;
4442 		len = min(ldcp->pkt_payload, remaining);
4443 		ldcmsg->env = (uint8_t)len;
4444 
4445 		curr_seqid++;
4446 		ldcmsg->seqid = curr_seqid;
4447 
4448 		/* copy the data into pkt */
4449 		bcopy(source, msgbuf, len);
4450 
4451 		source += len;
4452 		bytes_written += len;
4453 
4454 		/* increment tail */
4455 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4456 
4457 		ASSERT(tx_tail != tx_head);
4458 	}
4459 
4460 	/* Set the start and stop bits */
4461 	ldcmsg->env |= LDC_FRAG_STOP;
4462 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4463 	ldcmsg->env |= LDC_FRAG_START;
4464 
4465 	/*
4466 	 * All packets have been copied into the TX queue
4467 	 * update the tail ptr in the HV
4468 	 */
4469 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4470 	if (rv == 0) {
4471 		ldcp->tx_tail = tx_tail;
4472 		ldcp->last_msg_snt = curr_seqid;
4473 		*size = bytes_written;
4474 	} else {
4475 		int rv2;
4476 
4477 		if (rv != EWOULDBLOCK) {
4478 			*size = 0;
4479 			if (mutex_tryenter(&ldcp->lock)) {
4480 				i_ldc_reset(ldcp, B_FALSE);
4481 				mutex_exit(&ldcp->lock);
4482 			} else {
4483 				/*
4484 				 * Release Tx lock, and then reacquire channel
4485 				 * and Tx lock in correct order
4486 				 */
4487 				mutex_exit(&ldcp->tx_lock);
4488 				mutex_enter(&ldcp->lock);
4489 				mutex_enter(&ldcp->tx_lock);
4490 				i_ldc_reset(ldcp, B_FALSE);
4491 				mutex_exit(&ldcp->lock);
4492 			}
4493 			return (ECONNRESET);
4494 		}
4495 
4496 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4497 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4498 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4499 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4500 
4501 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4502 		    &tx_head, &tx_tail, &ldcp->link_state);
4503 
4504 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4505 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4506 		    rv2, tx_head, tx_tail, ldcp->link_state);
4507 
4508 		*size = 0;
4509 	}
4510 
4511 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4512 
4513 	return (rv);
4514 }
4515 
4516 /*
4517  * Write specified amount of bytes to the channel
4518  * in multiple pkts of pkt_payload size. Each
4519  * packet is tagged with an unique packet ID in
4520  * the case of a reliable link.
4521  *
4522  * On return, size contains the number of bytes written.
4523  * This function needs to ensure that the write size is < MTU size
4524  */
4525 static int
4526 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4527 {
4528 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4529 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4530 
4531 	/* Truncate packet to max of MTU size */
4532 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4533 	return (i_ldc_write_packet(ldcp, buf, sizep));
4534 }
4535 
4536 
4537 /*
4538  * Interfaces for channel nexus to register/unregister with LDC module
4539  * The nexus will register functions to be used to register individual
4540  * channels with the nexus and enable interrupts for the channels
4541  */
4542 int
4543 ldc_register(ldc_cnex_t *cinfo)
4544 {
4545 	ldc_chan_t	*ldcp;
4546 
4547 	if (cinfo == NULL || cinfo->dip == NULL ||
4548 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4549 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4550 	    cinfo->clr_intr == NULL) {
4551 
4552 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4553 		return (EINVAL);
4554 	}
4555 
4556 	mutex_enter(&ldcssp->lock);
4557 
4558 	/* nexus registration */
4559 	ldcssp->cinfo.dip = cinfo->dip;
4560 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4561 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4562 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4563 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4564 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4565 
4566 	/* register any channels that might have been previously initialized */
4567 	ldcp = ldcssp->chan_list;
4568 	while (ldcp) {
4569 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4570 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4571 			(void) i_ldc_register_channel(ldcp);
4572 
4573 		ldcp = ldcp->next;
4574 	}
4575 
4576 	mutex_exit(&ldcssp->lock);
4577 
4578 	return (0);
4579 }
4580 
4581 int
4582 ldc_unregister(ldc_cnex_t *cinfo)
4583 {
4584 	if (cinfo == NULL || cinfo->dip == NULL) {
4585 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4586 		return (EINVAL);
4587 	}
4588 
4589 	mutex_enter(&ldcssp->lock);
4590 
4591 	if (cinfo->dip != ldcssp->cinfo.dip) {
4592 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4593 		mutex_exit(&ldcssp->lock);
4594 		return (EINVAL);
4595 	}
4596 
4597 	/* nexus unregister */
4598 	ldcssp->cinfo.dip = NULL;
4599 	ldcssp->cinfo.reg_chan = NULL;
4600 	ldcssp->cinfo.unreg_chan = NULL;
4601 	ldcssp->cinfo.add_intr = NULL;
4602 	ldcssp->cinfo.rem_intr = NULL;
4603 	ldcssp->cinfo.clr_intr = NULL;
4604 
4605 	mutex_exit(&ldcssp->lock);
4606 
4607 	return (0);
4608 }
4609