xref: /illumos-gate/usr/src/uts/sun4v/io/ldc.c (revision 1de082f7b7fd4b6629e14b0f9b8f94f6c0bda3c2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * sun4v LDC Link Layer
29  */
30 #include <sys/types.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/cred.h>
35 #include <sys/kmem.h>
36 #include <sys/conf.h>
37 #include <sys/cmn_err.h>
38 #include <sys/ksynch.h>
39 #include <sys/modctl.h>
40 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
41 #include <sys/debug.h>
42 #include <sys/cred.h>
43 #include <sys/promif.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/cyclic.h>
47 #include <sys/machsystm.h>
48 #include <sys/vm.h>
49 #include <sys/cpu.h>
50 #include <sys/intreg.h>
51 #include <sys/machcpuvar.h>
52 #include <sys/mmu.h>
53 #include <sys/pte.h>
54 #include <vm/hat.h>
55 #include <vm/as.h>
56 #include <vm/hat_sfmmu.h>
57 #include <sys/vm_machparam.h>
58 #include <vm/seg_kmem.h>
59 #include <vm/seg_kpm.h>
60 #include <sys/note.h>
61 #include <sys/ivintr.h>
62 #include <sys/hypervisor_api.h>
63 #include <sys/ldc.h>
64 #include <sys/ldc_impl.h>
65 #include <sys/cnex.h>
66 #include <sys/hsvc.h>
67 #include <sys/sdt.h>
68 #include <sys/kldc.h>
69 
70 /* Core internal functions */
71 int i_ldc_h2v_error(int h_error);
72 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
73 
74 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
75 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
76 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
77 static void i_ldc_reset_state(ldc_chan_t *ldcp);
78 static void i_ldc_debug_enter(void);
79 
80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
85     uint8_t ctrlmsg);
86 
87 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
92     uint64_t *tail, uint64_t *link_state);
93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
94     uint64_t rx_tail);
95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
96 
97 /* Interrupt handling functions */
98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
101     uint64_t *notify_event);
102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
103 
104 /* Read method functions */
105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
109 	size_t *sizep);
110 
111 /* Write method functions */
112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
117 	size_t *sizep);
118 
119 /* Pkt processing internal functions */
120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
127 
128 /* LDC Version */
129 static ldc_ver_t ldc_versions[] = { {1, 0} };
130 
131 /* number of supported versions */
132 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
133 
134 /* Invalid value for the ldc_chan_t rx_ack_head field */
135 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
136 
137 
138 /* Module State Pointer */
139 ldc_soft_state_t *ldcssp;
140 
141 static struct modldrv md = {
142 	&mod_miscops,			/* This is a misc module */
143 	"sun4v LDC module",		/* Name of the module */
144 };
145 
146 static struct modlinkage ml = {
147 	MODREV_1,
148 	&md,
149 	NULL
150 };
151 
152 static uint64_t ldc_sup_minor;		/* Supported minor number */
153 static hsvc_info_t ldc_hsvc = {
154 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 1, "ldc"
155 };
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * The length of the reliable-mode data queue in terms of the LDC
173  * receive queue length. i.e., the number of times larger than the
174  * LDC receive queue that the data queue should be. The HV receive
175  * queue is required to be a power of 2 and this implementation
176  * assumes the data queue will also be a power of 2. By making the
177  * multiplier a power of 2, we ensure the data queue will be a
178  * power of 2. We use a multiplier because the receive queue is
179  * sized to be sane relative to the MTU and the same is needed for
180  * the data queue.
181  */
182 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
183 
184 /*
185  * LDC retry count and delay - when the HV returns EWOULDBLOCK
186  * the operation is retried 'ldc_max_retries' times with a
187  * wait of 'ldc_delay' usecs between each retry.
188  */
189 int ldc_max_retries = LDC_MAX_RETRIES;
190 clock_t ldc_delay = LDC_DELAY;
191 
192 /*
193  * Channels which have a devclass satisfying the following
194  * will be reset when entering the prom or kmdb.
195  *
196  *   LDC_DEVCLASS_PROM_RESET(devclass) != 0
197  *
198  * By default, only block device service channels are reset.
199  */
200 #define	LDC_DEVCLASS_BIT(dc)		(0x1 << (dc))
201 #define	LDC_DEVCLASS_PROM_RESET(dc)	\
202 	(LDC_DEVCLASS_BIT(dc) & ldc_debug_reset_mask)
203 static uint64_t ldc_debug_reset_mask = LDC_DEVCLASS_BIT(LDC_DEV_BLK_SVC);
204 
205 /*
206  * delay between each retry of channel unregistration in
207  * ldc_close(), to wait for pending interrupts to complete.
208  */
209 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
210 
211 #ifdef DEBUG
212 
213 /*
214  * Print debug messages
215  *
216  * set ldcdbg to 0x7 for enabling all msgs
217  * 0x4 - Warnings
218  * 0x2 - All debug messages
219  * 0x1 - Minimal debug messages
220  *
221  * set ldcdbgchan to the channel number you want to debug
222  * setting it to -1 prints debug messages for all channels
223  * NOTE: ldcdbgchan has no effect on error messages
224  */
225 
226 int ldcdbg = 0x0;
227 int64_t ldcdbgchan = DBG_ALL_LDCS;
228 uint64_t ldc_inject_err_flag = 0;
229 
230 void
231 ldcdebug(int64_t id, const char *fmt, ...)
232 {
233 	char buf[512];
234 	va_list ap;
235 
236 	/*
237 	 * Do not return if,
238 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
239 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
240 	 * debug channel = caller specified channel
241 	 */
242 	if ((id != DBG_ALL_LDCS) &&
243 	    (ldcdbgchan != DBG_ALL_LDCS) &&
244 	    (ldcdbgchan != id)) {
245 		return;
246 	}
247 
248 	va_start(ap, fmt);
249 	(void) vsprintf(buf, fmt, ap);
250 	va_end(ap);
251 
252 	cmn_err(CE_CONT, "?%s", buf);
253 }
254 
255 #define	LDC_ERR_RESET		0x1
256 #define	LDC_ERR_PKTLOSS		0x2
257 #define	LDC_ERR_DQFULL		0x4
258 #define	LDC_ERR_DRNGCLEAR	0x8
259 
260 static boolean_t
261 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
262 {
263 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
264 		return (B_FALSE);
265 
266 	if ((ldc_inject_err_flag & error) == 0)
267 		return (B_FALSE);
268 
269 	/* clear the injection state */
270 	ldc_inject_err_flag &= ~error;
271 
272 	return (B_TRUE);
273 }
274 
275 #define	D1		\
276 if (ldcdbg & 0x01)	\
277 	ldcdebug
278 
279 #define	D2		\
280 if (ldcdbg & 0x02)	\
281 	ldcdebug
282 
283 #define	DWARN		\
284 if (ldcdbg & 0x04)	\
285 	ldcdebug
286 
287 #define	DUMP_PAYLOAD(id, addr)						\
288 {									\
289 	char buf[65*3];							\
290 	int i;								\
291 	uint8_t *src = (uint8_t *)addr;					\
292 	for (i = 0; i < 64; i++, src++)					\
293 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
294 	(void) sprintf(&buf[i * 3], "|\n");				\
295 	D2((id), "payload: %s", buf);					\
296 }
297 
298 #define	DUMP_LDC_PKT(c, s, addr)					\
299 {									\
300 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
301 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
302 	if (msg->type == LDC_DATA) {                                    \
303 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
304 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
305 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
306 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
307 	    (msg->env & LDC_LEN_MASK));					\
308 	} else { 							\
309 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
310 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
311 	} 								\
312 }
313 
314 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
315 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
316 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
317 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
318 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
319 
320 #else
321 
322 #define	DBG_ALL_LDCS -1
323 
324 #define	D1
325 #define	D2
326 #define	DWARN
327 
328 #define	DUMP_PAYLOAD(id, addr)
329 #define	DUMP_LDC_PKT(c, s, addr)
330 
331 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
332 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
333 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
334 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
335 
336 #endif
337 
338 /*
339  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
340  * lengths. Just pass the head, tail, and entries values so that the
341  * length can be calculated in a dtrace script when the probe is enabled.
342  */
343 #define	TRACE_RXDQ_LENGTH(ldcp)						\
344 	DTRACE_PROBE4(rxdq__size,					\
345 	uint64_t, ldcp->id,						\
346 	uint64_t, ldcp->rx_dq_head,					\
347 	uint64_t, ldcp->rx_dq_tail,					\
348 	uint64_t, ldcp->rx_dq_entries)
349 
350 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
351 	DTRACE_PROBE4(rxhvq__size,					\
352 	uint64_t, ldcp->id,						\
353 	uint64_t, head,							\
354 	uint64_t, tail,							\
355 	uint64_t, ldcp->rx_q_entries)
356 
357 /* A dtrace SDT probe to ease tracing of data queue copy operations */
358 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
359 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
360 
361 /* The amount of contiguous space at the tail of the queue */
362 #define	Q_CONTIG_SPACE(head, tail, size)				\
363 	((head) <= (tail) ? ((size) - (tail)) :				\
364 	((head) - (tail) - LDC_PACKET_SIZE))
365 
366 #define	ZERO_PKT(p)			\
367 	bzero((p), sizeof (ldc_msg_t));
368 
369 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
370 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
371 
372 int
373 _init(void)
374 {
375 	int status;
376 	extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
377 
378 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
379 	if (status != 0) {
380 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
381 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
382 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
383 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
384 		return (-1);
385 	}
386 
387 	/* Initialize shared memory HV API version checking */
388 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
389 
390 	/* allocate soft state structure */
391 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
392 
393 	/* Link the module into the system */
394 	status = mod_install(&ml);
395 	if (status != 0) {
396 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
397 		return (status);
398 	}
399 
400 	/* Initialize the LDC state structure */
401 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
402 
403 	mutex_enter(&ldcssp->lock);
404 
405 	/* Create a cache for memory handles */
406 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
407 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
408 	if (ldcssp->memhdl_cache == NULL) {
409 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
410 		mutex_exit(&ldcssp->lock);
411 		return (-1);
412 	}
413 
414 	/* Create cache for memory segment structures */
415 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
416 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
417 	if (ldcssp->memseg_cache == NULL) {
418 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
419 		mutex_exit(&ldcssp->lock);
420 		return (-1);
421 	}
422 
423 
424 	ldcssp->channel_count = 0;
425 	ldcssp->channels_open = 0;
426 	ldcssp->chan_list = NULL;
427 	ldcssp->dring_list = NULL;
428 
429 	/* Register debug_enter callback */
430 	kldc_set_debug_cb(&i_ldc_debug_enter);
431 
432 	mutex_exit(&ldcssp->lock);
433 
434 	return (0);
435 }
436 
437 int
438 _info(struct modinfo *modinfop)
439 {
440 	/* Report status of the dynamically loadable driver module */
441 	return (mod_info(&ml, modinfop));
442 }
443 
444 int
445 _fini(void)
446 {
447 	int 		rv, status;
448 	ldc_chan_t 	*tmp_ldcp, *ldcp;
449 	ldc_dring_t 	*tmp_dringp, *dringp;
450 	ldc_mem_info_t 	minfo;
451 
452 	/* Unlink the driver module from the system */
453 	status = mod_remove(&ml);
454 	if (status) {
455 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
456 		return (EIO);
457 	}
458 
459 	/* Unregister debug_enter callback */
460 	kldc_set_debug_cb(NULL);
461 
462 	/* Free descriptor rings */
463 	dringp = ldcssp->dring_list;
464 	while (dringp != NULL) {
465 		tmp_dringp = dringp->next;
466 
467 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
468 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
469 			if (minfo.status == LDC_BOUND) {
470 				(void) ldc_mem_dring_unbind(
471 				    (ldc_dring_handle_t)dringp);
472 			}
473 			if (minfo.status == LDC_MAPPED) {
474 				(void) ldc_mem_dring_unmap(
475 				    (ldc_dring_handle_t)dringp);
476 			}
477 		}
478 
479 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
480 		dringp = tmp_dringp;
481 	}
482 	ldcssp->dring_list = NULL;
483 
484 	/* close and finalize channels */
485 	ldcp = ldcssp->chan_list;
486 	while (ldcp != NULL) {
487 		tmp_ldcp = ldcp->next;
488 
489 		(void) ldc_close((ldc_handle_t)ldcp);
490 		(void) ldc_fini((ldc_handle_t)ldcp);
491 
492 		ldcp = tmp_ldcp;
493 	}
494 	ldcssp->chan_list = NULL;
495 
496 	/* Destroy kmem caches */
497 	kmem_cache_destroy(ldcssp->memhdl_cache);
498 	kmem_cache_destroy(ldcssp->memseg_cache);
499 
500 	/*
501 	 * We have successfully "removed" the driver.
502 	 * Destroying soft states
503 	 */
504 	mutex_destroy(&ldcssp->lock);
505 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
506 
507 	(void) hsvc_unregister(&ldc_hsvc);
508 
509 	return (status);
510 }
511 
512 /* -------------------------------------------------------------------------- */
513 
514 /*
515  * LDC Link Layer Internal Functions
516  */
517 
518 /*
519  * Translate HV Errors to sun4v error codes
520  */
521 int
522 i_ldc_h2v_error(int h_error)
523 {
524 	switch (h_error) {
525 
526 	case	H_EOK:
527 		return (0);
528 
529 	case	H_ENORADDR:
530 		return (EFAULT);
531 
532 	case	H_EBADPGSZ:
533 	case	H_EINVAL:
534 		return (EINVAL);
535 
536 	case	H_EWOULDBLOCK:
537 		return (EWOULDBLOCK);
538 
539 	case	H_ENOACCESS:
540 	case	H_ENOMAP:
541 		return (EACCES);
542 
543 	case	H_EIO:
544 	case	H_ECPUERROR:
545 		return (EIO);
546 
547 	case	H_ENOTSUPPORTED:
548 		return (ENOTSUP);
549 
550 	case 	H_ETOOMANY:
551 		return (ENOSPC);
552 
553 	case	H_ECHANNEL:
554 		return (ECHRNG);
555 	default:
556 		break;
557 	}
558 
559 	return (EIO);
560 }
561 
562 /*
563  * Reconfigure the transmit queue
564  */
565 static int
566 i_ldc_txq_reconf(ldc_chan_t *ldcp)
567 {
568 	int rv;
569 
570 	ASSERT(MUTEX_HELD(&ldcp->lock));
571 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
572 
573 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
574 	if (rv) {
575 		cmn_err(CE_WARN,
576 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
577 		return (EIO);
578 	}
579 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
580 	    &(ldcp->tx_tail), &(ldcp->link_state));
581 	if (rv) {
582 		cmn_err(CE_WARN,
583 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
584 		return (EIO);
585 	}
586 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
587 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
588 	    ldcp->link_state);
589 
590 	return (0);
591 }
592 
593 /*
594  * Reconfigure the receive queue
595  */
596 static int
597 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
598 {
599 	int rv;
600 	uint64_t rx_head, rx_tail;
601 
602 	ASSERT(MUTEX_HELD(&ldcp->lock));
603 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
604 	    &(ldcp->link_state));
605 	if (rv) {
606 		cmn_err(CE_WARN,
607 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
608 		    ldcp->id);
609 		return (EIO);
610 	}
611 
612 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
613 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
614 		    ldcp->rx_q_entries);
615 		if (rv) {
616 			cmn_err(CE_WARN,
617 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
618 			    ldcp->id);
619 			return (EIO);
620 		}
621 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
622 		    ldcp->id);
623 	}
624 
625 	return (0);
626 }
627 
628 
629 /*
630  * Drain the contents of the receive queue
631  */
632 static int
633 i_ldc_rxq_drain(ldc_chan_t *ldcp)
634 {
635 	int rv;
636 	uint64_t rx_head, rx_tail;
637 
638 	ASSERT(MUTEX_HELD(&ldcp->lock));
639 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
640 	    &(ldcp->link_state));
641 	if (rv) {
642 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
643 		    ldcp->id);
644 		return (EIO);
645 	}
646 
647 	/* If the queue is already empty just return success. */
648 	if (rx_head == rx_tail)
649 		return (0);
650 
651 	/* flush contents by setting the head = tail */
652 	return (i_ldc_set_rx_head(ldcp, rx_tail));
653 }
654 
655 
656 /*
657  * Reset LDC state structure and its contents
658  */
659 static void
660 i_ldc_reset_state(ldc_chan_t *ldcp)
661 {
662 	ASSERT(MUTEX_HELD(&ldcp->lock));
663 	ldcp->last_msg_snt = LDC_INIT_SEQID;
664 	ldcp->last_ack_rcd = 0;
665 	ldcp->last_msg_rcd = 0;
666 	ldcp->tx_ackd_head = ldcp->tx_head;
667 	ldcp->stream_remains = 0;
668 	ldcp->next_vidx = 0;
669 	ldcp->hstate = 0;
670 	ldcp->tstate = TS_OPEN;
671 	ldcp->status = LDC_OPEN;
672 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
673 	ldcp->rx_dq_head = 0;
674 	ldcp->rx_dq_tail = 0;
675 
676 	if (ldcp->link_state == LDC_CHANNEL_UP ||
677 	    ldcp->link_state == LDC_CHANNEL_RESET) {
678 
679 		if (ldcp->mode == LDC_MODE_RAW) {
680 			ldcp->status = LDC_UP;
681 			ldcp->tstate = TS_UP;
682 		} else {
683 			ldcp->status = LDC_READY;
684 			ldcp->tstate |= TS_LINK_READY;
685 		}
686 	}
687 }
688 
689 /*
690  * Reset a LDC channel
691  */
692 void
693 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
694 {
695 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
696 
697 	ASSERT(MUTEX_HELD(&ldcp->lock));
698 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
699 
700 	/* reconfig Tx and Rx queues */
701 	(void) i_ldc_txq_reconf(ldcp);
702 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
703 
704 	/* Clear Tx and Rx interrupts */
705 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
706 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
707 
708 	/* Reset channel state */
709 	i_ldc_reset_state(ldcp);
710 
711 	/* Mark channel in reset */
712 	ldcp->tstate |= TS_IN_RESET;
713 }
714 
715 /*
716  * Walk the channel list and reset channels if they are of the right
717  * devclass and their Rx queues have been configured. No locks are
718  * taken because the function is only invoked by the kernel just before
719  * entering the prom or debugger when the system is single-threaded.
720  */
721 static void
722 i_ldc_debug_enter(void)
723 {
724 	ldc_chan_t *ldcp;
725 
726 	ldcp = ldcssp->chan_list;
727 	while (ldcp != NULL) {
728 		if (((ldcp->tstate & TS_QCONF_RDY) == TS_QCONF_RDY) &&
729 		    (LDC_DEVCLASS_PROM_RESET(ldcp->devclass) != 0)) {
730 			(void) hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
731 			    ldcp->rx_q_entries);
732 		}
733 		ldcp = ldcp->next;
734 	}
735 }
736 
737 /*
738  * Clear pending interrupts
739  */
740 static void
741 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
742 {
743 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
744 
745 	ASSERT(MUTEX_HELD(&ldcp->lock));
746 	ASSERT(cinfo->dip != NULL);
747 
748 	switch (itype) {
749 	case CNEX_TX_INTR:
750 		/* check Tx interrupt */
751 		if (ldcp->tx_intr_state)
752 			ldcp->tx_intr_state = LDC_INTR_NONE;
753 		else
754 			return;
755 		break;
756 
757 	case CNEX_RX_INTR:
758 		/* check Rx interrupt */
759 		if (ldcp->rx_intr_state)
760 			ldcp->rx_intr_state = LDC_INTR_NONE;
761 		else
762 			return;
763 		break;
764 	}
765 
766 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
767 	D2(ldcp->id,
768 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
769 	    ldcp->id, itype);
770 }
771 
772 /*
773  * Set the receive queue head
774  * Resets connection and returns an error if it fails.
775  */
776 static int
777 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
778 {
779 	int 	rv;
780 	int 	retries;
781 
782 	ASSERT(MUTEX_HELD(&ldcp->lock));
783 	for (retries = 0; retries < ldc_max_retries; retries++) {
784 
785 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
786 			return (0);
787 
788 		if (rv != H_EWOULDBLOCK)
789 			break;
790 
791 		/* wait for ldc_delay usecs */
792 		drv_usecwait(ldc_delay);
793 	}
794 
795 	cmn_err(CE_WARN, "ldc_set_rx_qhead: (0x%lx) cannot set qhead 0x%lx",
796 	    ldcp->id, head);
797 	mutex_enter(&ldcp->tx_lock);
798 	i_ldc_reset(ldcp, B_TRUE);
799 	mutex_exit(&ldcp->tx_lock);
800 
801 	return (ECONNRESET);
802 }
803 
804 /*
805  * Returns the tx_head to be used for transfer
806  */
807 static void
808 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
809 {
810 	ldc_msg_t 	*pkt;
811 
812 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
813 
814 	/* get current Tx head */
815 	*head = ldcp->tx_head;
816 
817 	/*
818 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
819 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
820 	 * up to the current location of tx_head. This needs to be done
821 	 * as the peer will only ACK DATA/INFO pkts.
822 	 */
823 	if (ldcp->mode == LDC_MODE_RELIABLE) {
824 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
825 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
826 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
827 				break;
828 			}
829 			/* advance ACKd head */
830 			ldcp->tx_ackd_head =
831 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
832 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
833 		}
834 		*head = ldcp->tx_ackd_head;
835 	}
836 }
837 
838 /*
839  * Returns the tx_tail to be used for transfer
840  * Re-reads the TX queue ptrs if and only if the
841  * the cached head and tail are equal (queue is full)
842  */
843 static int
844 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
845 {
846 	int 		rv;
847 	uint64_t 	current_head, new_tail;
848 
849 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
850 	/* Read the head and tail ptrs from HV */
851 	rv = hv_ldc_tx_get_state(ldcp->id,
852 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
853 	if (rv) {
854 		cmn_err(CE_WARN,
855 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
856 		    ldcp->id);
857 		return (EIO);
858 	}
859 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
860 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
861 		    ldcp->id);
862 		return (ECONNRESET);
863 	}
864 
865 	i_ldc_get_tx_head(ldcp, &current_head);
866 
867 	/* increment the tail */
868 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
869 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
870 
871 	if (new_tail == current_head) {
872 		DWARN(ldcp->id,
873 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
874 		    ldcp->id);
875 		return (EWOULDBLOCK);
876 	}
877 
878 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
879 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
880 
881 	*tail = ldcp->tx_tail;
882 	return (0);
883 }
884 
885 /*
886  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
887  * and retry ldc_max_retries times before returning an error.
888  * Returns 0, EWOULDBLOCK or EIO
889  */
890 static int
891 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
892 {
893 	int		rv, retval = EWOULDBLOCK;
894 	int 		retries;
895 
896 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
897 	for (retries = 0; retries < ldc_max_retries; retries++) {
898 
899 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
900 			retval = 0;
901 			break;
902 		}
903 		if (rv != H_EWOULDBLOCK) {
904 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
905 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
906 			retval = EIO;
907 			break;
908 		}
909 
910 		/* wait for ldc_delay usecs */
911 		drv_usecwait(ldc_delay);
912 	}
913 	return (retval);
914 }
915 
916 /*
917  * Copy a data packet from the HV receive queue to the data queue.
918  * Caller must ensure that the data queue is not already full.
919  *
920  * The *head argument represents the current head pointer for the HV
921  * receive queue. After copying a packet from the HV receive queue,
922  * the *head pointer will be updated. This allows the caller to update
923  * the head pointer in HV using the returned *head value.
924  */
925 void
926 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
927 {
928 	uint64_t	q_size, dq_size;
929 
930 	ASSERT(MUTEX_HELD(&ldcp->lock));
931 
932 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
933 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
934 
935 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
936 	    dq_size) >= LDC_PACKET_SIZE);
937 
938 	bcopy((void *)(ldcp->rx_q_va + *head),
939 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
940 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
941 
942 	/* Update rx head */
943 	*head = (*head + LDC_PACKET_SIZE) % q_size;
944 
945 	/* Update dq tail */
946 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
947 }
948 
949 /*
950  * Update the Rx data queue head pointer
951  */
952 static int
953 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
954 {
955 	ldcp->rx_dq_head = head;
956 	return (0);
957 }
958 
959 /*
960  * Get the Rx data queue head and tail pointers
961  */
962 static uint64_t
963 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
964     uint64_t *link_state)
965 {
966 	_NOTE(ARGUNUSED(link_state))
967 	*head = ldcp->rx_dq_head;
968 	*tail = ldcp->rx_dq_tail;
969 	return (0);
970 }
971 
972 /*
973  * Wrapper for the Rx HV queue set head function. Giving the
974  * data queue and HV queue set head functions the same type.
975  */
976 static uint64_t
977 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
978     uint64_t *link_state)
979 {
980 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
981 	    link_state)));
982 }
983 
984 /*
985  * LDC receive interrupt handler
986  *    triggered for channel with data pending to read
987  *    i.e. Rx queue content changes
988  */
989 static uint_t
990 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
991 {
992 	_NOTE(ARGUNUSED(arg2))
993 
994 	ldc_chan_t	*ldcp;
995 	boolean_t	notify;
996 	uint64_t	event;
997 	int		rv, status;
998 
999 	/* Get the channel for which interrupt was received */
1000 	if (arg1 == NULL) {
1001 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1002 		return (DDI_INTR_UNCLAIMED);
1003 	}
1004 
1005 	ldcp = (ldc_chan_t *)arg1;
1006 
1007 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1008 	    ldcp->id, ldcp);
1009 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1010 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1011 	    ldcp->link_state);
1012 
1013 	/* Lock channel */
1014 	mutex_enter(&ldcp->lock);
1015 
1016 	/* Mark the interrupt as being actively handled */
1017 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1018 
1019 	status = i_ldc_rx_process_hvq(ldcp, &notify, &event);
1020 
1021 	if (ldcp->mode != LDC_MODE_RELIABLE) {
1022 		/*
1023 		 * If there are no data packets on the queue, clear
1024 		 * the interrupt. Otherwise, the ldc_read will clear
1025 		 * interrupts after draining the queue. To indicate the
1026 		 * interrupt has not yet been cleared, it is marked
1027 		 * as pending.
1028 		 */
1029 		if ((event & LDC_EVT_READ) == 0) {
1030 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1031 		} else {
1032 			ldcp->rx_intr_state = LDC_INTR_PEND;
1033 		}
1034 	}
1035 
1036 	/* if callbacks are disabled, do not notify */
1037 	if (notify && ldcp->cb_enabled) {
1038 		ldcp->cb_inprogress = B_TRUE;
1039 		mutex_exit(&ldcp->lock);
1040 		rv = ldcp->cb(event, ldcp->cb_arg);
1041 		if (rv) {
1042 			DWARN(ldcp->id,
1043 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1044 			    ldcp->id);
1045 		}
1046 		mutex_enter(&ldcp->lock);
1047 		ldcp->cb_inprogress = B_FALSE;
1048 	}
1049 
1050 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1051 		if (status == ENOSPC) {
1052 			/*
1053 			 * Here, ENOSPC indicates the secondary data
1054 			 * queue is full and the Rx queue is non-empty.
1055 			 * Much like how reliable and raw modes are
1056 			 * handled above, since the Rx queue is non-
1057 			 * empty, we mark the interrupt as pending to
1058 			 * indicate it has not yet been cleared.
1059 			 */
1060 			ldcp->rx_intr_state = LDC_INTR_PEND;
1061 		} else {
1062 			/*
1063 			 * We have processed all CTRL packets and
1064 			 * copied all DATA packets to the secondary
1065 			 * queue. Clear the interrupt.
1066 			 */
1067 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1068 		}
1069 	}
1070 
1071 	mutex_exit(&ldcp->lock);
1072 
1073 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1074 
1075 	return (DDI_INTR_CLAIMED);
1076 }
1077 
1078 /*
1079  * Wrapper for the Rx HV queue processing function to be used when
1080  * checking the Rx HV queue for data packets. Unlike the interrupt
1081  * handler code flow, the Rx interrupt is not cleared here and
1082  * callbacks are not made.
1083  */
1084 static uint_t
1085 i_ldc_chkq(ldc_chan_t *ldcp)
1086 {
1087 	boolean_t	notify;
1088 	uint64_t	event;
1089 
1090 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1091 }
1092 
1093 /*
1094  * Send a LDC message
1095  */
1096 static int
1097 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1098     uint8_t ctrlmsg)
1099 {
1100 	int		rv;
1101 	ldc_msg_t 	*pkt;
1102 	uint64_t	tx_tail;
1103 	uint32_t	curr_seqid;
1104 
1105 	/* Obtain Tx lock */
1106 	mutex_enter(&ldcp->tx_lock);
1107 
1108 	curr_seqid = ldcp->last_msg_snt;
1109 
1110 	/* get the current tail for the message */
1111 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1112 	if (rv) {
1113 		DWARN(ldcp->id,
1114 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1115 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1116 		    ldcp->id, pkttype, subtype, ctrlmsg);
1117 		mutex_exit(&ldcp->tx_lock);
1118 		return (rv);
1119 	}
1120 
1121 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1122 	ZERO_PKT(pkt);
1123 
1124 	/* Initialize the packet */
1125 	pkt->type = pkttype;
1126 	pkt->stype = subtype;
1127 	pkt->ctrl = ctrlmsg;
1128 
1129 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1130 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1131 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1132 		curr_seqid++;
1133 		if (ldcp->mode != LDC_MODE_RAW) {
1134 			pkt->seqid = curr_seqid;
1135 			pkt->ackid = ldcp->last_msg_rcd;
1136 		}
1137 	}
1138 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1139 
1140 	/* initiate the send by calling into HV and set the new tail */
1141 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1142 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1143 
1144 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1145 	if (rv) {
1146 		DWARN(ldcp->id,
1147 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1148 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1149 		    ldcp->id, pkttype, subtype, ctrlmsg);
1150 		mutex_exit(&ldcp->tx_lock);
1151 		return (EIO);
1152 	}
1153 
1154 	ldcp->last_msg_snt = curr_seqid;
1155 	ldcp->tx_tail = tx_tail;
1156 
1157 	mutex_exit(&ldcp->tx_lock);
1158 	return (0);
1159 }
1160 
1161 /*
1162  * Checks if packet was received in right order
1163  * in the case of a reliable link.
1164  * Returns 0 if in order, else EIO
1165  */
1166 static int
1167 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1168 {
1169 	/* No seqid checking for RAW mode */
1170 	if (ldcp->mode == LDC_MODE_RAW)
1171 		return (0);
1172 
1173 	/* No seqid checking for version, RTS, RTR message */
1174 	if (msg->ctrl == LDC_VER ||
1175 	    msg->ctrl == LDC_RTS ||
1176 	    msg->ctrl == LDC_RTR)
1177 		return (0);
1178 
1179 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1180 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1181 		DWARN(ldcp->id,
1182 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1183 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1184 		    (ldcp->last_msg_rcd + 1));
1185 		return (EIO);
1186 	}
1187 
1188 #ifdef DEBUG
1189 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1190 		DWARN(ldcp->id,
1191 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1192 		return (EIO);
1193 	}
1194 #endif
1195 
1196 	return (0);
1197 }
1198 
1199 
1200 /*
1201  * Process an incoming version ctrl message
1202  */
1203 static int
1204 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1205 {
1206 	int 		rv = 0, idx = ldcp->next_vidx;
1207 	ldc_msg_t 	*pkt;
1208 	uint64_t	tx_tail;
1209 	ldc_ver_t	*rcvd_ver;
1210 
1211 	/* get the received version */
1212 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1213 
1214 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1215 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1216 
1217 	/* Obtain Tx lock */
1218 	mutex_enter(&ldcp->tx_lock);
1219 
1220 	switch (msg->stype) {
1221 	case LDC_INFO:
1222 
1223 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1224 			(void) i_ldc_txq_reconf(ldcp);
1225 			i_ldc_reset_state(ldcp);
1226 			mutex_exit(&ldcp->tx_lock);
1227 			return (EAGAIN);
1228 		}
1229 
1230 		/* get the current tail and pkt for the response */
1231 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1232 		if (rv != 0) {
1233 			DWARN(ldcp->id,
1234 			    "i_ldc_process_VER: (0x%llx) err sending "
1235 			    "version ACK/NACK\n", ldcp->id);
1236 			i_ldc_reset(ldcp, B_TRUE);
1237 			mutex_exit(&ldcp->tx_lock);
1238 			return (ECONNRESET);
1239 		}
1240 
1241 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1242 		ZERO_PKT(pkt);
1243 
1244 		/* initialize the packet */
1245 		pkt->type = LDC_CTRL;
1246 		pkt->ctrl = LDC_VER;
1247 
1248 		for (;;) {
1249 
1250 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1251 			    rcvd_ver->major, rcvd_ver->minor,
1252 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1253 
1254 			if (rcvd_ver->major == ldc_versions[idx].major) {
1255 				/* major version match - ACK version */
1256 				pkt->stype = LDC_ACK;
1257 
1258 				/*
1259 				 * lower minor version to the one this endpt
1260 				 * supports, if necessary
1261 				 */
1262 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1263 					rcvd_ver->minor =
1264 					    ldc_versions[idx].minor;
1265 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1266 
1267 				break;
1268 			}
1269 
1270 			if (rcvd_ver->major > ldc_versions[idx].major) {
1271 
1272 				D1(ldcp->id, "i_ldc_process_VER: using next"
1273 				    " lower idx=%d, v%u.%u\n", idx,
1274 				    ldc_versions[idx].major,
1275 				    ldc_versions[idx].minor);
1276 
1277 				/* nack with next lower version */
1278 				pkt->stype = LDC_NACK;
1279 				bcopy(&ldc_versions[idx], pkt->udata,
1280 				    sizeof (ldc_versions[idx]));
1281 				ldcp->next_vidx = idx;
1282 				break;
1283 			}
1284 
1285 			/* next major version */
1286 			idx++;
1287 
1288 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1289 
1290 			if (idx == LDC_NUM_VERS) {
1291 				/* no version match - send NACK */
1292 				pkt->stype = LDC_NACK;
1293 				bzero(pkt->udata, sizeof (ldc_ver_t));
1294 				ldcp->next_vidx = 0;
1295 				break;
1296 			}
1297 		}
1298 
1299 		/* initiate the send by calling into HV and set the new tail */
1300 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1301 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1302 
1303 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1304 		if (rv == 0) {
1305 			ldcp->tx_tail = tx_tail;
1306 			if (pkt->stype == LDC_ACK) {
1307 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1308 				    " version ACK\n", ldcp->id);
1309 				/* Save the ACK'd version */
1310 				ldcp->version.major = rcvd_ver->major;
1311 				ldcp->version.minor = rcvd_ver->minor;
1312 				ldcp->hstate |= TS_RCVD_VER;
1313 				ldcp->tstate |= TS_VER_DONE;
1314 				D1(DBG_ALL_LDCS,
1315 				    "(0x%llx) Sent ACK, "
1316 				    "Agreed on version v%u.%u\n",
1317 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1318 			}
1319 		} else {
1320 			DWARN(ldcp->id,
1321 			    "i_ldc_process_VER: (0x%llx) error sending "
1322 			    "ACK/NACK\n", ldcp->id);
1323 			i_ldc_reset(ldcp, B_TRUE);
1324 			mutex_exit(&ldcp->tx_lock);
1325 			return (ECONNRESET);
1326 		}
1327 
1328 		break;
1329 
1330 	case LDC_ACK:
1331 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1332 			if (ldcp->version.major != rcvd_ver->major ||
1333 			    ldcp->version.minor != rcvd_ver->minor) {
1334 
1335 				/* mismatched version - reset connection */
1336 				DWARN(ldcp->id,
1337 				    "i_ldc_process_VER: (0x%llx) recvd"
1338 				    " ACK ver != sent ACK ver\n", ldcp->id);
1339 				i_ldc_reset(ldcp, B_TRUE);
1340 				mutex_exit(&ldcp->tx_lock);
1341 				return (ECONNRESET);
1342 			}
1343 		} else {
1344 			/* SUCCESS - we have agreed on a version */
1345 			ldcp->version.major = rcvd_ver->major;
1346 			ldcp->version.minor = rcvd_ver->minor;
1347 			ldcp->tstate |= TS_VER_DONE;
1348 		}
1349 
1350 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1351 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1352 
1353 		/* initiate RTS-RTR-RDX handshake */
1354 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1355 		if (rv) {
1356 			DWARN(ldcp->id,
1357 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1358 			    ldcp->id);
1359 			i_ldc_reset(ldcp, B_TRUE);
1360 			mutex_exit(&ldcp->tx_lock);
1361 			return (ECONNRESET);
1362 		}
1363 
1364 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1365 		ZERO_PKT(pkt);
1366 
1367 		pkt->type = LDC_CTRL;
1368 		pkt->stype = LDC_INFO;
1369 		pkt->ctrl = LDC_RTS;
1370 		pkt->env = ldcp->mode;
1371 		if (ldcp->mode != LDC_MODE_RAW)
1372 			pkt->seqid = LDC_INIT_SEQID;
1373 
1374 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1375 
1376 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1377 
1378 		/* initiate the send by calling into HV and set the new tail */
1379 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1380 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1381 
1382 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1383 		if (rv) {
1384 			D2(ldcp->id,
1385 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1386 			    ldcp->id);
1387 			i_ldc_reset(ldcp, B_TRUE);
1388 			mutex_exit(&ldcp->tx_lock);
1389 			return (ECONNRESET);
1390 		}
1391 
1392 		ldcp->tx_tail = tx_tail;
1393 		ldcp->hstate |= TS_SENT_RTS;
1394 
1395 		break;
1396 
1397 	case LDC_NACK:
1398 		/* check if version in NACK is zero */
1399 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1400 			/* version handshake failure */
1401 			DWARN(DBG_ALL_LDCS,
1402 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1403 			    ldcp->id);
1404 			i_ldc_reset(ldcp, B_TRUE);
1405 			mutex_exit(&ldcp->tx_lock);
1406 			return (ECONNRESET);
1407 		}
1408 
1409 		/* get the current tail and pkt for the response */
1410 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1411 		if (rv != 0) {
1412 			cmn_err(CE_NOTE,
1413 			    "i_ldc_process_VER: (0x%lx) err sending "
1414 			    "version ACK/NACK\n", ldcp->id);
1415 			i_ldc_reset(ldcp, B_TRUE);
1416 			mutex_exit(&ldcp->tx_lock);
1417 			return (ECONNRESET);
1418 		}
1419 
1420 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1421 		ZERO_PKT(pkt);
1422 
1423 		/* initialize the packet */
1424 		pkt->type = LDC_CTRL;
1425 		pkt->ctrl = LDC_VER;
1426 		pkt->stype = LDC_INFO;
1427 
1428 		/* check ver in NACK msg has a match */
1429 		for (;;) {
1430 			if (rcvd_ver->major == ldc_versions[idx].major) {
1431 				/*
1432 				 * major version match - resubmit request
1433 				 * if lower minor version to the one this endpt
1434 				 * supports, if necessary
1435 				 */
1436 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1437 					rcvd_ver->minor =
1438 					    ldc_versions[idx].minor;
1439 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1440 				break;
1441 			}
1442 
1443 			if (rcvd_ver->major > ldc_versions[idx].major) {
1444 
1445 				D1(ldcp->id, "i_ldc_process_VER: using next"
1446 				    " lower idx=%d, v%u.%u\n", idx,
1447 				    ldc_versions[idx].major,
1448 				    ldc_versions[idx].minor);
1449 
1450 				/* send next lower version */
1451 				bcopy(&ldc_versions[idx], pkt->udata,
1452 				    sizeof (ldc_versions[idx]));
1453 				ldcp->next_vidx = idx;
1454 				break;
1455 			}
1456 
1457 			/* next version */
1458 			idx++;
1459 
1460 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1461 
1462 			if (idx == LDC_NUM_VERS) {
1463 				/* no version match - terminate */
1464 				ldcp->next_vidx = 0;
1465 				mutex_exit(&ldcp->tx_lock);
1466 				return (ECONNRESET);
1467 			}
1468 		}
1469 
1470 		/* initiate the send by calling into HV and set the new tail */
1471 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1472 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1473 
1474 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1475 		if (rv == 0) {
1476 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1477 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1478 			    ldc_versions[idx].minor);
1479 			ldcp->tx_tail = tx_tail;
1480 		} else {
1481 			cmn_err(CE_NOTE,
1482 			    "i_ldc_process_VER: (0x%lx) error sending version"
1483 			    "INFO\n", ldcp->id);
1484 			i_ldc_reset(ldcp, B_TRUE);
1485 			mutex_exit(&ldcp->tx_lock);
1486 			return (ECONNRESET);
1487 		}
1488 
1489 		break;
1490 	}
1491 
1492 	mutex_exit(&ldcp->tx_lock);
1493 	return (rv);
1494 }
1495 
1496 
1497 /*
1498  * Process an incoming RTS ctrl message
1499  */
1500 static int
1501 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1502 {
1503 	int 		rv = 0;
1504 	ldc_msg_t 	*pkt;
1505 	uint64_t	tx_tail;
1506 	boolean_t	sent_NACK = B_FALSE;
1507 
1508 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1509 
1510 	switch (msg->stype) {
1511 	case LDC_NACK:
1512 		DWARN(ldcp->id,
1513 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1514 		    ldcp->id);
1515 
1516 		/* Reset the channel -- as we cannot continue */
1517 		mutex_enter(&ldcp->tx_lock);
1518 		i_ldc_reset(ldcp, B_TRUE);
1519 		mutex_exit(&ldcp->tx_lock);
1520 		rv = ECONNRESET;
1521 		break;
1522 
1523 	case LDC_INFO:
1524 
1525 		/* check mode */
1526 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1527 			cmn_err(CE_NOTE,
1528 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1529 			    ldcp->id);
1530 			/*
1531 			 * send NACK in response to MODE message
1532 			 * get the current tail for the response
1533 			 */
1534 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1535 			if (rv) {
1536 				/* if cannot send NACK - reset channel */
1537 				mutex_enter(&ldcp->tx_lock);
1538 				i_ldc_reset(ldcp, B_TRUE);
1539 				mutex_exit(&ldcp->tx_lock);
1540 				rv = ECONNRESET;
1541 				break;
1542 			}
1543 			sent_NACK = B_TRUE;
1544 		}
1545 		break;
1546 	default:
1547 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1548 		    ldcp->id);
1549 		mutex_enter(&ldcp->tx_lock);
1550 		i_ldc_reset(ldcp, B_TRUE);
1551 		mutex_exit(&ldcp->tx_lock);
1552 		rv = ECONNRESET;
1553 		break;
1554 	}
1555 
1556 	/*
1557 	 * If either the connection was reset (when rv != 0) or
1558 	 * a NACK was sent, we return. In the case of a NACK
1559 	 * we dont want to consume the packet that came in but
1560 	 * not record that we received the RTS
1561 	 */
1562 	if (rv || sent_NACK)
1563 		return (rv);
1564 
1565 	/* record RTS received */
1566 	ldcp->hstate |= TS_RCVD_RTS;
1567 
1568 	/* store initial SEQID info */
1569 	ldcp->last_msg_snt = msg->seqid;
1570 
1571 	/* Obtain Tx lock */
1572 	mutex_enter(&ldcp->tx_lock);
1573 
1574 	/* get the current tail for the response */
1575 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1576 	if (rv != 0) {
1577 		cmn_err(CE_NOTE,
1578 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1579 		    ldcp->id);
1580 		i_ldc_reset(ldcp, B_TRUE);
1581 		mutex_exit(&ldcp->tx_lock);
1582 		return (ECONNRESET);
1583 	}
1584 
1585 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1586 	ZERO_PKT(pkt);
1587 
1588 	/* initialize the packet */
1589 	pkt->type = LDC_CTRL;
1590 	pkt->stype = LDC_INFO;
1591 	pkt->ctrl = LDC_RTR;
1592 	pkt->env = ldcp->mode;
1593 	if (ldcp->mode != LDC_MODE_RAW)
1594 		pkt->seqid = LDC_INIT_SEQID;
1595 
1596 	ldcp->last_msg_rcd = msg->seqid;
1597 
1598 	/* initiate the send by calling into HV and set the new tail */
1599 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1600 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1601 
1602 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1603 	if (rv == 0) {
1604 		D2(ldcp->id,
1605 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1606 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1607 
1608 		ldcp->tx_tail = tx_tail;
1609 		ldcp->hstate |= TS_SENT_RTR;
1610 
1611 	} else {
1612 		cmn_err(CE_NOTE,
1613 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1614 		    ldcp->id);
1615 		i_ldc_reset(ldcp, B_TRUE);
1616 		mutex_exit(&ldcp->tx_lock);
1617 		return (ECONNRESET);
1618 	}
1619 
1620 	mutex_exit(&ldcp->tx_lock);
1621 	return (0);
1622 }
1623 
1624 /*
1625  * Process an incoming RTR ctrl message
1626  */
1627 static int
1628 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1629 {
1630 	int 		rv = 0;
1631 	boolean_t	sent_NACK = B_FALSE;
1632 
1633 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1634 
1635 	switch (msg->stype) {
1636 	case LDC_NACK:
1637 		/* RTR NACK received */
1638 		DWARN(ldcp->id,
1639 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1640 		    ldcp->id);
1641 
1642 		/* Reset the channel -- as we cannot continue */
1643 		mutex_enter(&ldcp->tx_lock);
1644 		i_ldc_reset(ldcp, B_TRUE);
1645 		mutex_exit(&ldcp->tx_lock);
1646 		rv = ECONNRESET;
1647 
1648 		break;
1649 
1650 	case LDC_INFO:
1651 
1652 		/* check mode */
1653 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1654 			DWARN(ldcp->id,
1655 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1656 			    "expecting 0x%x, got 0x%x\n",
1657 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1658 			/*
1659 			 * send NACK in response to MODE message
1660 			 * get the current tail for the response
1661 			 */
1662 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1663 			if (rv) {
1664 				/* if cannot send NACK - reset channel */
1665 				mutex_enter(&ldcp->tx_lock);
1666 				i_ldc_reset(ldcp, B_TRUE);
1667 				mutex_exit(&ldcp->tx_lock);
1668 				rv = ECONNRESET;
1669 				break;
1670 			}
1671 			sent_NACK = B_TRUE;
1672 		}
1673 		break;
1674 
1675 	default:
1676 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1677 		    ldcp->id);
1678 
1679 		/* Reset the channel -- as we cannot continue */
1680 		mutex_enter(&ldcp->tx_lock);
1681 		i_ldc_reset(ldcp, B_TRUE);
1682 		mutex_exit(&ldcp->tx_lock);
1683 		rv = ECONNRESET;
1684 		break;
1685 	}
1686 
1687 	/*
1688 	 * If either the connection was reset (when rv != 0) or
1689 	 * a NACK was sent, we return. In the case of a NACK
1690 	 * we dont want to consume the packet that came in but
1691 	 * not record that we received the RTR
1692 	 */
1693 	if (rv || sent_NACK)
1694 		return (rv);
1695 
1696 	ldcp->last_msg_snt = msg->seqid;
1697 	ldcp->hstate |= TS_RCVD_RTR;
1698 
1699 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1700 	if (rv) {
1701 		cmn_err(CE_NOTE,
1702 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1703 		    ldcp->id);
1704 		mutex_enter(&ldcp->tx_lock);
1705 		i_ldc_reset(ldcp, B_TRUE);
1706 		mutex_exit(&ldcp->tx_lock);
1707 		return (ECONNRESET);
1708 	}
1709 	D2(ldcp->id,
1710 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1711 
1712 	ldcp->hstate |= TS_SENT_RDX;
1713 	ldcp->tstate |= TS_HSHAKE_DONE;
1714 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1715 		ldcp->status = LDC_UP;
1716 
1717 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1718 
1719 	return (0);
1720 }
1721 
1722 
1723 /*
1724  * Process an incoming RDX ctrl message
1725  */
1726 static int
1727 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1728 {
1729 	int	rv = 0;
1730 
1731 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1732 
1733 	switch (msg->stype) {
1734 	case LDC_NACK:
1735 		/* RDX NACK received */
1736 		DWARN(ldcp->id,
1737 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1738 		    ldcp->id);
1739 
1740 		/* Reset the channel -- as we cannot continue */
1741 		mutex_enter(&ldcp->tx_lock);
1742 		i_ldc_reset(ldcp, B_TRUE);
1743 		mutex_exit(&ldcp->tx_lock);
1744 		rv = ECONNRESET;
1745 
1746 		break;
1747 
1748 	case LDC_INFO:
1749 
1750 		/*
1751 		 * if channel is UP and a RDX received after data transmission
1752 		 * has commenced it is an error
1753 		 */
1754 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1755 			DWARN(DBG_ALL_LDCS,
1756 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1757 			    " - LDC reset\n", ldcp->id);
1758 			mutex_enter(&ldcp->tx_lock);
1759 			i_ldc_reset(ldcp, B_TRUE);
1760 			mutex_exit(&ldcp->tx_lock);
1761 			return (ECONNRESET);
1762 		}
1763 
1764 		ldcp->hstate |= TS_RCVD_RDX;
1765 		ldcp->tstate |= TS_HSHAKE_DONE;
1766 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1767 			ldcp->status = LDC_UP;
1768 
1769 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1770 		break;
1771 
1772 	default:
1773 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1774 		    ldcp->id);
1775 
1776 		/* Reset the channel -- as we cannot continue */
1777 		mutex_enter(&ldcp->tx_lock);
1778 		i_ldc_reset(ldcp, B_TRUE);
1779 		mutex_exit(&ldcp->tx_lock);
1780 		rv = ECONNRESET;
1781 		break;
1782 	}
1783 
1784 	return (rv);
1785 }
1786 
1787 /*
1788  * Process an incoming ACK for a data packet
1789  */
1790 static int
1791 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1792 {
1793 	int		rv;
1794 	uint64_t 	tx_head;
1795 	ldc_msg_t	*pkt;
1796 
1797 	/* Obtain Tx lock */
1798 	mutex_enter(&ldcp->tx_lock);
1799 
1800 	/*
1801 	 * Read the current Tx head and tail
1802 	 */
1803 	rv = hv_ldc_tx_get_state(ldcp->id,
1804 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1805 	if (rv != 0) {
1806 		cmn_err(CE_WARN,
1807 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1808 		    ldcp->id);
1809 
1810 		/* Reset the channel -- as we cannot continue */
1811 		i_ldc_reset(ldcp, B_TRUE);
1812 		mutex_exit(&ldcp->tx_lock);
1813 		return (ECONNRESET);
1814 	}
1815 
1816 	/*
1817 	 * loop from where the previous ACK location was to the
1818 	 * current head location. This is how far the HV has
1819 	 * actually send pkts. Pkts between head and tail are
1820 	 * yet to be sent by HV.
1821 	 */
1822 	tx_head = ldcp->tx_ackd_head;
1823 	for (;;) {
1824 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1825 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1826 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1827 
1828 		if (pkt->seqid == msg->ackid) {
1829 			D2(ldcp->id,
1830 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1831 			    ldcp->id);
1832 			ldcp->last_ack_rcd = msg->ackid;
1833 			ldcp->tx_ackd_head = tx_head;
1834 			break;
1835 		}
1836 		if (tx_head == ldcp->tx_head) {
1837 			/* could not find packet */
1838 			DWARN(ldcp->id,
1839 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1840 			    ldcp->id);
1841 
1842 			/* Reset the channel -- as we cannot continue */
1843 			i_ldc_reset(ldcp, B_TRUE);
1844 			mutex_exit(&ldcp->tx_lock);
1845 			return (ECONNRESET);
1846 		}
1847 	}
1848 
1849 	mutex_exit(&ldcp->tx_lock);
1850 	return (0);
1851 }
1852 
1853 /*
1854  * Process incoming control message
1855  * Return 0 - session can continue
1856  *        EAGAIN - reprocess packet - state was changed
1857  *	  ECONNRESET - channel was reset
1858  */
1859 static int
1860 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1861 {
1862 	int 		rv = 0;
1863 
1864 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1865 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1866 
1867 	switch (ldcp->tstate & ~TS_IN_RESET) {
1868 
1869 	case TS_OPEN:
1870 	case TS_READY:
1871 
1872 		switch (msg->ctrl & LDC_CTRL_MASK) {
1873 		case LDC_VER:
1874 			/* process version message */
1875 			rv = i_ldc_process_VER(ldcp, msg);
1876 			break;
1877 		default:
1878 			DWARN(ldcp->id,
1879 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1880 			    "tstate=0x%x\n", ldcp->id,
1881 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1882 			break;
1883 		}
1884 
1885 		break;
1886 
1887 	case TS_VREADY:
1888 
1889 		switch (msg->ctrl & LDC_CTRL_MASK) {
1890 		case LDC_VER:
1891 			/* process version message */
1892 			rv = i_ldc_process_VER(ldcp, msg);
1893 			break;
1894 		case LDC_RTS:
1895 			/* process RTS message */
1896 			rv = i_ldc_process_RTS(ldcp, msg);
1897 			break;
1898 		case LDC_RTR:
1899 			/* process RTR message */
1900 			rv = i_ldc_process_RTR(ldcp, msg);
1901 			break;
1902 		case LDC_RDX:
1903 			/* process RDX message */
1904 			rv = i_ldc_process_RDX(ldcp, msg);
1905 			break;
1906 		default:
1907 			DWARN(ldcp->id,
1908 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1909 			    "tstate=0x%x\n", ldcp->id,
1910 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1911 			break;
1912 		}
1913 
1914 		break;
1915 
1916 	case TS_UP:
1917 
1918 		switch (msg->ctrl & LDC_CTRL_MASK) {
1919 		case LDC_VER:
1920 			DWARN(ldcp->id,
1921 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1922 			    "- LDC reset\n", ldcp->id);
1923 			/* peer is redoing version negotiation */
1924 			mutex_enter(&ldcp->tx_lock);
1925 			(void) i_ldc_txq_reconf(ldcp);
1926 			i_ldc_reset_state(ldcp);
1927 			mutex_exit(&ldcp->tx_lock);
1928 			rv = EAGAIN;
1929 			break;
1930 
1931 		case LDC_RDX:
1932 			/* process RDX message */
1933 			rv = i_ldc_process_RDX(ldcp, msg);
1934 			break;
1935 
1936 		default:
1937 			DWARN(ldcp->id,
1938 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1939 			    "tstate=0x%x\n", ldcp->id,
1940 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1941 			break;
1942 		}
1943 	}
1944 
1945 	return (rv);
1946 }
1947 
1948 /*
1949  * Register channel with the channel nexus
1950  */
1951 static int
1952 i_ldc_register_channel(ldc_chan_t *ldcp)
1953 {
1954 	int		rv = 0;
1955 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1956 
1957 	if (cinfo->dip == NULL) {
1958 		DWARN(ldcp->id,
1959 		    "i_ldc_register_channel: cnex has not registered\n");
1960 		return (EAGAIN);
1961 	}
1962 
1963 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1964 	if (rv) {
1965 		DWARN(ldcp->id,
1966 		    "i_ldc_register_channel: cannot register channel\n");
1967 		return (rv);
1968 	}
1969 
1970 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1971 	    i_ldc_tx_hdlr, ldcp, NULL);
1972 	if (rv) {
1973 		DWARN(ldcp->id,
1974 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1975 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1976 		return (rv);
1977 	}
1978 
1979 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1980 	    i_ldc_rx_hdlr, ldcp, NULL);
1981 	if (rv) {
1982 		DWARN(ldcp->id,
1983 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1984 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1985 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1986 		return (rv);
1987 	}
1988 
1989 	ldcp->tstate |= TS_CNEX_RDY;
1990 
1991 	return (0);
1992 }
1993 
1994 /*
1995  * Unregister a channel with the channel nexus
1996  */
1997 static int
1998 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1999 {
2000 	int		rv = 0;
2001 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
2002 
2003 	if (cinfo->dip == NULL) {
2004 		DWARN(ldcp->id,
2005 		    "i_ldc_unregister_channel: cnex has not registered\n");
2006 		return (EAGAIN);
2007 	}
2008 
2009 	if (ldcp->tstate & TS_CNEX_RDY) {
2010 
2011 		/* Remove the Rx interrupt */
2012 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
2013 		if (rv) {
2014 			if (rv != EAGAIN) {
2015 				DWARN(ldcp->id,
2016 				    "i_ldc_unregister_channel: err removing "
2017 				    "Rx intr\n");
2018 				return (rv);
2019 			}
2020 
2021 			/*
2022 			 * If interrupts are pending and handler has
2023 			 * finished running, clear interrupt and try
2024 			 * again
2025 			 */
2026 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
2027 				return (rv);
2028 
2029 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2030 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
2031 			    CNEX_RX_INTR);
2032 			if (rv) {
2033 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
2034 				    "err removing Rx interrupt\n");
2035 				return (rv);
2036 			}
2037 		}
2038 
2039 		/* Remove the Tx interrupt */
2040 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2041 		if (rv) {
2042 			DWARN(ldcp->id,
2043 			    "i_ldc_unregister_channel: err removing Tx intr\n");
2044 			return (rv);
2045 		}
2046 
2047 		/* Unregister the channel */
2048 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2049 		if (rv) {
2050 			DWARN(ldcp->id,
2051 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2052 			return (rv);
2053 		}
2054 
2055 		ldcp->tstate &= ~TS_CNEX_RDY;
2056 	}
2057 
2058 	return (0);
2059 }
2060 
2061 
2062 /*
2063  * LDC transmit interrupt handler
2064  *    triggered for chanel up/down/reset events
2065  *    and Tx queue content changes
2066  */
2067 static uint_t
2068 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2069 {
2070 	_NOTE(ARGUNUSED(arg2))
2071 
2072 	int 		rv;
2073 	ldc_chan_t 	*ldcp;
2074 	boolean_t 	notify_client = B_FALSE;
2075 	uint64_t	notify_event = 0, link_state;
2076 
2077 	/* Get the channel for which interrupt was received */
2078 	ASSERT(arg1 != NULL);
2079 	ldcp = (ldc_chan_t *)arg1;
2080 
2081 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2082 	    ldcp->id, ldcp);
2083 
2084 	/* Lock channel */
2085 	mutex_enter(&ldcp->lock);
2086 
2087 	/* Obtain Tx lock */
2088 	mutex_enter(&ldcp->tx_lock);
2089 
2090 	/* mark interrupt as pending */
2091 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2092 
2093 	/* save current link state */
2094 	link_state = ldcp->link_state;
2095 
2096 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2097 	    &ldcp->link_state);
2098 	if (rv) {
2099 		cmn_err(CE_WARN,
2100 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2101 		    ldcp->id, rv);
2102 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2103 		mutex_exit(&ldcp->tx_lock);
2104 		mutex_exit(&ldcp->lock);
2105 		return (DDI_INTR_CLAIMED);
2106 	}
2107 
2108 	/*
2109 	 * reset the channel state if the channel went down
2110 	 * (other side unconfigured queue) or channel was reset
2111 	 * (other side reconfigured its queue)
2112 	 */
2113 	if (link_state != ldcp->link_state &&
2114 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2115 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2116 		i_ldc_reset(ldcp, B_FALSE);
2117 		notify_client = B_TRUE;
2118 		notify_event = LDC_EVT_DOWN;
2119 	}
2120 
2121 	if (link_state != ldcp->link_state &&
2122 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2123 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2124 		i_ldc_reset(ldcp, B_FALSE);
2125 		notify_client = B_TRUE;
2126 		notify_event = LDC_EVT_RESET;
2127 	}
2128 
2129 	if (link_state != ldcp->link_state &&
2130 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2131 	    ldcp->link_state == LDC_CHANNEL_UP) {
2132 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2133 		notify_client = B_TRUE;
2134 		notify_event = LDC_EVT_RESET;
2135 		ldcp->tstate |= TS_LINK_READY;
2136 		ldcp->status = LDC_READY;
2137 	}
2138 
2139 	/* if callbacks are disabled, do not notify */
2140 	if (!ldcp->cb_enabled)
2141 		notify_client = B_FALSE;
2142 
2143 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2144 	mutex_exit(&ldcp->tx_lock);
2145 
2146 	if (notify_client) {
2147 		ldcp->cb_inprogress = B_TRUE;
2148 		mutex_exit(&ldcp->lock);
2149 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2150 		if (rv) {
2151 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2152 			    "failure", ldcp->id);
2153 		}
2154 		mutex_enter(&ldcp->lock);
2155 		ldcp->cb_inprogress = B_FALSE;
2156 	}
2157 
2158 	mutex_exit(&ldcp->lock);
2159 
2160 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2161 
2162 	return (DDI_INTR_CLAIMED);
2163 }
2164 
2165 /*
2166  * Process the Rx HV queue.
2167  *
2168  * Returns 0 if data packets were found and no errors were encountered,
2169  * otherwise returns an error. In either case, the *notify argument is
2170  * set to indicate whether or not the client callback function should
2171  * be invoked. The *event argument is set to contain the callback event.
2172  *
2173  * Depending on the channel mode, packets are handled differently:
2174  *
2175  * RAW MODE
2176  * For raw mode channels, when a data packet is encountered,
2177  * processing stops and all packets are left on the queue to be removed
2178  * and processed by the ldc_read code path.
2179  *
2180  * UNRELIABLE MODE
2181  * For unreliable mode, when a data packet is encountered, processing
2182  * stops, and all packets are left on the queue to be removed and
2183  * processed by the ldc_read code path. Control packets are processed
2184  * inline if they are encountered before any data packets.
2185  *
2186  * RELIABLE MODE
2187  * For reliable mode channels, all packets on the receive queue
2188  * are processed: data packets are copied to the data queue and
2189  * control packets are processed inline. Packets are only left on
2190  * the receive queue when the data queue is full.
2191  */
2192 static uint_t
2193 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2194     uint64_t *notify_event)
2195 {
2196 	int		rv;
2197 	uint64_t 	rx_head, rx_tail;
2198 	ldc_msg_t 	*msg;
2199 	uint64_t	link_state, first_fragment = 0;
2200 	boolean_t	trace_length = B_TRUE;
2201 
2202 	ASSERT(MUTEX_HELD(&ldcp->lock));
2203 	*notify_client = B_FALSE;
2204 	*notify_event = 0;
2205 
2206 	/*
2207 	 * Read packet(s) from the queue
2208 	 */
2209 	for (;;) {
2210 
2211 		link_state = ldcp->link_state;
2212 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2213 		    &ldcp->link_state);
2214 		if (rv) {
2215 			cmn_err(CE_WARN,
2216 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2217 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2218 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2219 			return (EIO);
2220 		}
2221 
2222 		/*
2223 		 * reset the channel state if the channel went down
2224 		 * (other side unconfigured queue) or channel was reset
2225 		 * (other side reconfigured its queue)
2226 		 */
2227 
2228 		if (link_state != ldcp->link_state) {
2229 
2230 			switch (ldcp->link_state) {
2231 			case LDC_CHANNEL_DOWN:
2232 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2233 				    "link down\n", ldcp->id);
2234 				mutex_enter(&ldcp->tx_lock);
2235 				i_ldc_reset(ldcp, B_FALSE);
2236 				mutex_exit(&ldcp->tx_lock);
2237 				*notify_client = B_TRUE;
2238 				*notify_event = LDC_EVT_DOWN;
2239 				goto loop_exit;
2240 
2241 			case LDC_CHANNEL_UP:
2242 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2243 				    "channel link up\n", ldcp->id);
2244 
2245 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2246 					*notify_client = B_TRUE;
2247 					*notify_event = LDC_EVT_RESET;
2248 					ldcp->tstate |= TS_LINK_READY;
2249 					ldcp->status = LDC_READY;
2250 				}
2251 				break;
2252 
2253 			case LDC_CHANNEL_RESET:
2254 			default:
2255 #ifdef DEBUG
2256 force_reset:
2257 #endif
2258 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2259 				    "link reset\n", ldcp->id);
2260 				mutex_enter(&ldcp->tx_lock);
2261 				i_ldc_reset(ldcp, B_FALSE);
2262 				mutex_exit(&ldcp->tx_lock);
2263 				*notify_client = B_TRUE;
2264 				*notify_event = LDC_EVT_RESET;
2265 				break;
2266 			}
2267 		}
2268 
2269 #ifdef DEBUG
2270 		if (LDC_INJECT_RESET(ldcp))
2271 			goto force_reset;
2272 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2273 			i_ldc_mem_inject_dring_clear(ldcp);
2274 #endif
2275 		if (trace_length) {
2276 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2277 			trace_length = B_FALSE;
2278 		}
2279 
2280 		if (rx_head == rx_tail) {
2281 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2282 			    "No packets\n", ldcp->id);
2283 			break;
2284 		}
2285 
2286 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2287 		    "tail=0x%llx\n", rx_head, rx_tail);
2288 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2289 		    ldcp->rx_q_va + rx_head);
2290 
2291 		/* get the message */
2292 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2293 
2294 		/* if channel is in RAW mode or data pkt, notify and return */
2295 		if (ldcp->mode == LDC_MODE_RAW) {
2296 			*notify_client = B_TRUE;
2297 			*notify_event |= LDC_EVT_READ;
2298 			break;
2299 		}
2300 
2301 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2302 
2303 			/* discard packet if channel is not up */
2304 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2305 
2306 				/* move the head one position */
2307 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2308 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2309 
2310 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2311 					break;
2312 
2313 				continue;
2314 			} else {
2315 				uint64_t dq_head, dq_tail;
2316 
2317 				/* process only RELIABLE mode data packets */
2318 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2319 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2320 						*notify_client = B_TRUE;
2321 					*notify_event |= LDC_EVT_READ;
2322 					break;
2323 				}
2324 
2325 				/* don't process packet if queue full */
2326 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2327 				    &dq_tail, NULL);
2328 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2329 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2330 				if (dq_tail == dq_head ||
2331 				    LDC_INJECT_DQFULL(ldcp)) {
2332 					rv = ENOSPC;
2333 					break;
2334 				}
2335 			}
2336 		}
2337 
2338 		/* Check the sequence ID for the message received */
2339 		rv = i_ldc_check_seqid(ldcp, msg);
2340 		if (rv != 0) {
2341 
2342 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2343 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2344 			    rx_head, rx_tail);
2345 
2346 			/* Reset last_msg_rcd to start of message */
2347 			if (first_fragment != 0) {
2348 				ldcp->last_msg_rcd = first_fragment - 1;
2349 				first_fragment = 0;
2350 			}
2351 
2352 			/*
2353 			 * Send a NACK due to seqid mismatch
2354 			 */
2355 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2356 			    (msg->ctrl & LDC_CTRL_MASK));
2357 
2358 			if (rv) {
2359 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2360 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2361 				    ldcp->id);
2362 
2363 				/* if cannot send NACK - reset channel */
2364 				mutex_enter(&ldcp->tx_lock);
2365 				i_ldc_reset(ldcp, B_TRUE);
2366 				mutex_exit(&ldcp->tx_lock);
2367 
2368 				*notify_client = B_TRUE;
2369 				*notify_event = LDC_EVT_RESET;
2370 				break;
2371 			}
2372 
2373 			/* purge receive queue */
2374 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2375 			break;
2376 		}
2377 
2378 		/* record the message ID */
2379 		ldcp->last_msg_rcd = msg->seqid;
2380 
2381 		/* process control messages */
2382 		if (msg->type & LDC_CTRL) {
2383 			/* save current internal state */
2384 			uint64_t tstate = ldcp->tstate;
2385 
2386 			rv = i_ldc_ctrlmsg(ldcp, msg);
2387 			if (rv == EAGAIN) {
2388 				/* re-process pkt - state was adjusted */
2389 				continue;
2390 			}
2391 			if (rv == ECONNRESET) {
2392 				*notify_client = B_TRUE;
2393 				*notify_event = LDC_EVT_RESET;
2394 				break;
2395 			}
2396 
2397 			/*
2398 			 * control message processing was successful
2399 			 * channel transitioned to ready for communication
2400 			 */
2401 			if (rv == 0 && ldcp->tstate == TS_UP &&
2402 			    (tstate & ~TS_IN_RESET) !=
2403 			    (ldcp->tstate & ~TS_IN_RESET)) {
2404 				*notify_client = B_TRUE;
2405 				*notify_event = LDC_EVT_UP;
2406 			}
2407 		}
2408 
2409 		/* process data NACKs */
2410 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2411 			DWARN(ldcp->id,
2412 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2413 			    ldcp->id);
2414 			mutex_enter(&ldcp->tx_lock);
2415 			i_ldc_reset(ldcp, B_TRUE);
2416 			mutex_exit(&ldcp->tx_lock);
2417 			*notify_client = B_TRUE;
2418 			*notify_event = LDC_EVT_RESET;
2419 			break;
2420 		}
2421 
2422 		/* process data ACKs */
2423 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2424 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2425 				*notify_client = B_TRUE;
2426 				*notify_event = LDC_EVT_RESET;
2427 				break;
2428 			}
2429 		}
2430 
2431 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2432 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2433 
2434 			/*
2435 			 * Copy the data packet to the data queue. Note
2436 			 * that the copy routine updates the rx_head pointer.
2437 			 */
2438 			i_ldc_rxdq_copy(ldcp, &rx_head);
2439 
2440 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2441 				*notify_client = B_TRUE;
2442 			*notify_event |= LDC_EVT_READ;
2443 		} else {
2444 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2445 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2446 		}
2447 
2448 		/* move the head one position */
2449 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2450 			*notify_client = B_TRUE;
2451 			*notify_event = LDC_EVT_RESET;
2452 			break;
2453 		}
2454 
2455 	} /* for */
2456 
2457 loop_exit:
2458 
2459 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2460 		/* ACK data packets */
2461 		if ((*notify_event &
2462 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2463 			int ack_rv;
2464 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2465 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2466 				cmn_err(CE_NOTE,
2467 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2468 				    "send ACK\n", ldcp->id);
2469 
2470 				mutex_enter(&ldcp->tx_lock);
2471 				i_ldc_reset(ldcp, B_FALSE);
2472 				mutex_exit(&ldcp->tx_lock);
2473 
2474 				*notify_client = B_TRUE;
2475 				*notify_event = LDC_EVT_RESET;
2476 				goto skip_ackpeek;
2477 			}
2478 		}
2479 
2480 		/*
2481 		 * If we have no more space on the data queue, make sure
2482 		 * there are no ACKs on the rx queue waiting to be processed.
2483 		 */
2484 		if (rv == ENOSPC) {
2485 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2486 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2487 				*notify_client = B_TRUE;
2488 				*notify_event = LDC_EVT_RESET;
2489 			}
2490 			return (rv);
2491 		} else {
2492 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2493 		}
2494 	}
2495 
2496 skip_ackpeek:
2497 
2498 	/* Return, indicating whether or not data packets were found */
2499 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2500 		return (0);
2501 
2502 	return (ENOMSG);
2503 }
2504 
2505 /*
2506  * Process any ACK packets on the HV receive queue.
2507  *
2508  * This function is only used by RELIABLE mode channels when the
2509  * secondary data queue fills up and there are packets remaining on
2510  * the HV receive queue.
2511  */
2512 int
2513 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2514 {
2515 	int		rv = 0;
2516 	ldc_msg_t	*msg;
2517 
2518 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2519 		ldcp->rx_ack_head = rx_head;
2520 
2521 	while (ldcp->rx_ack_head != rx_tail) {
2522 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2523 
2524 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2525 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2526 				break;
2527 			msg->stype &= ~LDC_ACK;
2528 		}
2529 
2530 		ldcp->rx_ack_head =
2531 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2532 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2533 	}
2534 	return (rv);
2535 }
2536 
2537 /* -------------------------------------------------------------------------- */
2538 
2539 /*
2540  * LDC API functions
2541  */
2542 
2543 /*
2544  * Initialize the channel. Allocate internal structure and memory for
2545  * TX/RX queues, and initialize locks.
2546  */
2547 int
2548 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2549 {
2550 	ldc_chan_t 	*ldcp;
2551 	int		rv, exit_val;
2552 	uint64_t	ra_base, nentries;
2553 	uint64_t	qlen;
2554 
2555 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2556 
2557 	if (attr == NULL) {
2558 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2559 		return (EINVAL);
2560 	}
2561 	if (handle == NULL) {
2562 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2563 		return (EINVAL);
2564 	}
2565 
2566 	/* check if channel is valid */
2567 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2568 	if (rv == H_ECHANNEL) {
2569 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2570 		return (EINVAL);
2571 	}
2572 
2573 	/* check if the channel has already been initialized */
2574 	mutex_enter(&ldcssp->lock);
2575 	ldcp = ldcssp->chan_list;
2576 	while (ldcp != NULL) {
2577 		if (ldcp->id == id) {
2578 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2579 			    id);
2580 			mutex_exit(&ldcssp->lock);
2581 			return (EADDRINUSE);
2582 		}
2583 		ldcp = ldcp->next;
2584 	}
2585 	mutex_exit(&ldcssp->lock);
2586 
2587 	ASSERT(ldcp == NULL);
2588 
2589 	*handle = 0;
2590 
2591 	/* Allocate an ldcp structure */
2592 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2593 
2594 	/*
2595 	 * Initialize the channel and Tx lock
2596 	 *
2597 	 * The channel 'lock' protects the entire channel and
2598 	 * should be acquired before initializing, resetting,
2599 	 * destroying or reading from a channel.
2600 	 *
2601 	 * The 'tx_lock' should be acquired prior to transmitting
2602 	 * data over the channel. The lock should also be acquired
2603 	 * prior to channel reconfiguration (in order to prevent
2604 	 * concurrent writes).
2605 	 *
2606 	 * ORDERING: When both locks are being acquired, to prevent
2607 	 * deadlocks, the channel lock should be always acquired prior
2608 	 * to the tx_lock.
2609 	 */
2610 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2611 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2612 
2613 	/* Initialize the channel */
2614 	ldcp->id = id;
2615 	ldcp->cb = NULL;
2616 	ldcp->cb_arg = NULL;
2617 	ldcp->cb_inprogress = B_FALSE;
2618 	ldcp->cb_enabled = B_FALSE;
2619 	ldcp->next = NULL;
2620 
2621 	/* Read attributes */
2622 	ldcp->mode = attr->mode;
2623 	ldcp->devclass = attr->devclass;
2624 	ldcp->devinst = attr->instance;
2625 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2626 
2627 	D1(ldcp->id,
2628 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2629 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2630 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2631 
2632 	ldcp->next_vidx = 0;
2633 	ldcp->tstate = TS_IN_RESET;
2634 	ldcp->hstate = 0;
2635 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2636 	ldcp->last_ack_rcd = 0;
2637 	ldcp->last_msg_rcd = 0;
2638 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2639 
2640 	ldcp->stream_bufferp = NULL;
2641 	ldcp->exp_dring_list = NULL;
2642 	ldcp->imp_dring_list = NULL;
2643 	ldcp->mhdl_list = NULL;
2644 
2645 	ldcp->tx_intr_state = LDC_INTR_NONE;
2646 	ldcp->rx_intr_state = LDC_INTR_NONE;
2647 
2648 	/* Initialize payload size depending on whether channel is reliable */
2649 	switch (ldcp->mode) {
2650 	case LDC_MODE_RAW:
2651 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2652 		ldcp->read_p = i_ldc_read_raw;
2653 		ldcp->write_p = i_ldc_write_raw;
2654 		break;
2655 	case LDC_MODE_UNRELIABLE:
2656 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2657 		ldcp->read_p = i_ldc_read_packet;
2658 		ldcp->write_p = i_ldc_write_packet;
2659 		break;
2660 	case LDC_MODE_RELIABLE:
2661 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2662 
2663 		ldcp->stream_remains = 0;
2664 		ldcp->stream_offset = 0;
2665 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2666 		ldcp->read_p = i_ldc_read_stream;
2667 		ldcp->write_p = i_ldc_write_stream;
2668 		break;
2669 	default:
2670 		exit_val = EINVAL;
2671 		goto cleanup_on_exit;
2672 	}
2673 
2674 	/*
2675 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2676 	 * value is smaller than default length of ldc_queue_entries,
2677 	 * qlen is set to ldc_queue_entries. Ensure that computed
2678 	 * length is a power-of-two value.
2679 	 */
2680 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2681 	if (!ISP2(qlen)) {
2682 		uint64_t	tmp = 1;
2683 		while (qlen) {
2684 			qlen >>= 1; tmp <<= 1;
2685 		}
2686 		qlen = tmp;
2687 	}
2688 
2689 	ldcp->rx_q_entries =
2690 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2691 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2692 
2693 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2694 
2695 	/* Create a transmit queue */
2696 	ldcp->tx_q_va = (uint64_t)
2697 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2698 	if (ldcp->tx_q_va == NULL) {
2699 		cmn_err(CE_WARN,
2700 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2701 		    ldcp->id);
2702 		exit_val = ENOMEM;
2703 		goto cleanup_on_exit;
2704 	}
2705 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2706 
2707 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2708 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2709 
2710 	ldcp->tstate |= TS_TXQ_RDY;
2711 
2712 	/* Create a receive queue */
2713 	ldcp->rx_q_va = (uint64_t)
2714 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2715 	if (ldcp->rx_q_va == NULL) {
2716 		cmn_err(CE_WARN,
2717 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2718 		    ldcp->id);
2719 		exit_val = ENOMEM;
2720 		goto cleanup_on_exit;
2721 	}
2722 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2723 
2724 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2725 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2726 
2727 	ldcp->tstate |= TS_RXQ_RDY;
2728 
2729 	/* Setup a separate read data queue */
2730 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2731 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2732 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2733 
2734 		/* Make sure the data queue multiplier is a power of 2 */
2735 		if (!ISP2(ldc_rxdq_multiplier)) {
2736 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2737 			    "not a power of 2, resetting", ldcp->id);
2738 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2739 		}
2740 
2741 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2742 		ldcp->rx_dq_va = (uint64_t)
2743 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2744 		    KM_SLEEP);
2745 		if (ldcp->rx_dq_va == NULL) {
2746 			cmn_err(CE_WARN,
2747 			    "ldc_init: (0x%lx) RX data queue "
2748 			    "allocation failed\n", ldcp->id);
2749 			exit_val = ENOMEM;
2750 			goto cleanup_on_exit;
2751 		}
2752 
2753 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2754 
2755 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2756 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2757 		    ldcp->rx_dq_entries);
2758 	} else {
2759 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2760 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2761 	}
2762 
2763 	/* Init descriptor ring and memory handle list lock */
2764 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2765 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2766 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2767 
2768 	/* mark status as INITialized */
2769 	ldcp->status = LDC_INIT;
2770 
2771 	/* Add to channel list */
2772 	mutex_enter(&ldcssp->lock);
2773 	ldcp->next = ldcssp->chan_list;
2774 	ldcssp->chan_list = ldcp;
2775 	ldcssp->channel_count++;
2776 	mutex_exit(&ldcssp->lock);
2777 
2778 	/* set the handle */
2779 	*handle = (ldc_handle_t)ldcp;
2780 
2781 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2782 
2783 	return (0);
2784 
2785 cleanup_on_exit:
2786 
2787 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2788 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2789 
2790 	if (ldcp->tstate & TS_TXQ_RDY)
2791 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2792 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2793 
2794 	if (ldcp->tstate & TS_RXQ_RDY)
2795 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2796 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2797 
2798 	mutex_destroy(&ldcp->tx_lock);
2799 	mutex_destroy(&ldcp->lock);
2800 
2801 	if (ldcp)
2802 		kmem_free(ldcp, sizeof (ldc_chan_t));
2803 
2804 	return (exit_val);
2805 }
2806 
2807 /*
2808  * Finalizes the LDC connection. It will return EBUSY if the
2809  * channel is open. A ldc_close() has to be done prior to
2810  * a ldc_fini operation. It frees TX/RX queues, associated
2811  * with the channel
2812  */
2813 int
2814 ldc_fini(ldc_handle_t handle)
2815 {
2816 	ldc_chan_t 	*ldcp;
2817 	ldc_chan_t 	*tmp_ldcp;
2818 	uint64_t 	id;
2819 
2820 	if (handle == NULL) {
2821 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2822 		return (EINVAL);
2823 	}
2824 	ldcp = (ldc_chan_t *)handle;
2825 	id = ldcp->id;
2826 
2827 	mutex_enter(&ldcp->lock);
2828 
2829 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2830 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2831 		    ldcp->id);
2832 		mutex_exit(&ldcp->lock);
2833 		return (EBUSY);
2834 	}
2835 
2836 	/* Remove from the channel list */
2837 	mutex_enter(&ldcssp->lock);
2838 	tmp_ldcp = ldcssp->chan_list;
2839 	if (tmp_ldcp == ldcp) {
2840 		ldcssp->chan_list = ldcp->next;
2841 		ldcp->next = NULL;
2842 	} else {
2843 		while (tmp_ldcp != NULL) {
2844 			if (tmp_ldcp->next == ldcp) {
2845 				tmp_ldcp->next = ldcp->next;
2846 				ldcp->next = NULL;
2847 				break;
2848 			}
2849 			tmp_ldcp = tmp_ldcp->next;
2850 		}
2851 		if (tmp_ldcp == NULL) {
2852 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2853 			mutex_exit(&ldcssp->lock);
2854 			mutex_exit(&ldcp->lock);
2855 			return (EINVAL);
2856 		}
2857 	}
2858 
2859 	ldcssp->channel_count--;
2860 
2861 	mutex_exit(&ldcssp->lock);
2862 
2863 	/* Free the map table for this channel */
2864 	if (ldcp->mtbl) {
2865 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2866 		if (ldcp->mtbl->contigmem)
2867 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2868 		else
2869 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2870 		mutex_destroy(&ldcp->mtbl->lock);
2871 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2872 	}
2873 
2874 	/* Destroy descriptor ring and memory handle list lock */
2875 	mutex_destroy(&ldcp->exp_dlist_lock);
2876 	mutex_destroy(&ldcp->imp_dlist_lock);
2877 	mutex_destroy(&ldcp->mlist_lock);
2878 
2879 	/* Free the stream buffer for RELIABLE_MODE */
2880 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2881 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2882 
2883 	/* Free the RX queue */
2884 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2885 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2886 	ldcp->tstate &= ~TS_RXQ_RDY;
2887 
2888 	/* Free the RX data queue */
2889 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2890 		kmem_free((caddr_t)ldcp->rx_dq_va,
2891 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2892 	}
2893 
2894 	/* Free the TX queue */
2895 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2896 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2897 	ldcp->tstate &= ~TS_TXQ_RDY;
2898 
2899 	mutex_exit(&ldcp->lock);
2900 
2901 	/* Destroy mutex */
2902 	mutex_destroy(&ldcp->tx_lock);
2903 	mutex_destroy(&ldcp->lock);
2904 
2905 	/* free channel structure */
2906 	kmem_free(ldcp, sizeof (ldc_chan_t));
2907 
2908 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2909 
2910 	return (0);
2911 }
2912 
2913 /*
2914  * Open the LDC channel for use. It registers the TX/RX queues
2915  * with the Hypervisor. It also specifies the interrupt number
2916  * and target CPU for this channel
2917  */
2918 int
2919 ldc_open(ldc_handle_t handle)
2920 {
2921 	ldc_chan_t 	*ldcp;
2922 	int 		rv;
2923 
2924 	if (handle == NULL) {
2925 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2926 		return (EINVAL);
2927 	}
2928 
2929 	ldcp = (ldc_chan_t *)handle;
2930 
2931 	mutex_enter(&ldcp->lock);
2932 
2933 	if (ldcp->tstate < TS_INIT) {
2934 		DWARN(ldcp->id,
2935 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2936 		mutex_exit(&ldcp->lock);
2937 		return (EFAULT);
2938 	}
2939 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2940 		DWARN(ldcp->id,
2941 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2942 		mutex_exit(&ldcp->lock);
2943 		return (EFAULT);
2944 	}
2945 
2946 	/*
2947 	 * Unregister/Register the tx queue with the hypervisor
2948 	 */
2949 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2950 	if (rv) {
2951 		cmn_err(CE_WARN,
2952 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2953 		    ldcp->id);
2954 		mutex_exit(&ldcp->lock);
2955 		return (EIO);
2956 	}
2957 
2958 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2959 	if (rv) {
2960 		cmn_err(CE_WARN,
2961 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2962 		    ldcp->id);
2963 		mutex_exit(&ldcp->lock);
2964 		return (EIO);
2965 	}
2966 
2967 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2968 	    ldcp->id);
2969 
2970 	/*
2971 	 * Unregister/Register the rx queue with the hypervisor
2972 	 */
2973 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2974 	if (rv) {
2975 		cmn_err(CE_WARN,
2976 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2977 		    ldcp->id);
2978 		mutex_exit(&ldcp->lock);
2979 		return (EIO);
2980 	}
2981 
2982 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2983 	if (rv) {
2984 		cmn_err(CE_WARN,
2985 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2986 		    ldcp->id);
2987 		mutex_exit(&ldcp->lock);
2988 		return (EIO);
2989 	}
2990 
2991 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2992 	    ldcp->id);
2993 
2994 	ldcp->tstate |= TS_QCONF_RDY;
2995 
2996 	/* Register the channel with the channel nexus */
2997 	rv = i_ldc_register_channel(ldcp);
2998 	if (rv && rv != EAGAIN) {
2999 		cmn_err(CE_WARN,
3000 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
3001 		ldcp->tstate &= ~TS_QCONF_RDY;
3002 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3003 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3004 		mutex_exit(&ldcp->lock);
3005 		return (EIO);
3006 	}
3007 
3008 	/* mark channel in OPEN state */
3009 	ldcp->status = LDC_OPEN;
3010 
3011 	/* Read channel state */
3012 	rv = hv_ldc_tx_get_state(ldcp->id,
3013 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3014 	if (rv) {
3015 		cmn_err(CE_WARN,
3016 		    "ldc_open: (0x%lx) cannot read channel state\n",
3017 		    ldcp->id);
3018 		(void) i_ldc_unregister_channel(ldcp);
3019 		ldcp->tstate &= ~TS_QCONF_RDY;
3020 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3021 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3022 		mutex_exit(&ldcp->lock);
3023 		return (EIO);
3024 	}
3025 
3026 	/*
3027 	 * set the ACKd head to current head location for reliable
3028 	 */
3029 	ldcp->tx_ackd_head = ldcp->tx_head;
3030 
3031 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
3032 	if (ldcp->link_state == LDC_CHANNEL_UP ||
3033 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3034 		ldcp->tstate |= TS_LINK_READY;
3035 		ldcp->status = LDC_READY;
3036 	}
3037 
3038 	/*
3039 	 * if channel is being opened in RAW mode - no handshake is needed
3040 	 * switch the channel READY and UP state
3041 	 */
3042 	if (ldcp->mode == LDC_MODE_RAW) {
3043 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
3044 		ldcp->status = LDC_UP;
3045 	}
3046 
3047 	mutex_exit(&ldcp->lock);
3048 
3049 	/*
3050 	 * Increment number of open channels
3051 	 */
3052 	mutex_enter(&ldcssp->lock);
3053 	ldcssp->channels_open++;
3054 	mutex_exit(&ldcssp->lock);
3055 
3056 	D1(ldcp->id,
3057 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3058 	    "(tstate=0x%x, status=0x%x)\n",
3059 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3060 
3061 	return (0);
3062 }
3063 
3064 /*
3065  * Close the LDC connection. It will return EBUSY if there
3066  * are memory segments or descriptor rings either bound to or
3067  * mapped over the channel
3068  */
3069 int
3070 ldc_close(ldc_handle_t handle)
3071 {
3072 	ldc_chan_t 	*ldcp;
3073 	int		rv = 0, retries = 0;
3074 	boolean_t	chk_done = B_FALSE;
3075 
3076 	if (handle == NULL) {
3077 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3078 		return (EINVAL);
3079 	}
3080 	ldcp = (ldc_chan_t *)handle;
3081 
3082 	mutex_enter(&ldcp->lock);
3083 
3084 	/* return error if channel is not open */
3085 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3086 		DWARN(ldcp->id,
3087 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3088 		mutex_exit(&ldcp->lock);
3089 		return (EFAULT);
3090 	}
3091 
3092 	/* if any memory handles, drings, are bound or mapped cannot close */
3093 	if (ldcp->mhdl_list != NULL) {
3094 		DWARN(ldcp->id,
3095 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3096 		    ldcp->id);
3097 		mutex_exit(&ldcp->lock);
3098 		return (EBUSY);
3099 	}
3100 	if (ldcp->exp_dring_list != NULL) {
3101 		DWARN(ldcp->id,
3102 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3103 		    ldcp->id);
3104 		mutex_exit(&ldcp->lock);
3105 		return (EBUSY);
3106 	}
3107 	if (ldcp->imp_dring_list != NULL) {
3108 		DWARN(ldcp->id,
3109 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3110 		    ldcp->id);
3111 		mutex_exit(&ldcp->lock);
3112 		return (EBUSY);
3113 	}
3114 
3115 	if (ldcp->cb_inprogress) {
3116 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3117 		    ldcp->id);
3118 		mutex_exit(&ldcp->lock);
3119 		return (EWOULDBLOCK);
3120 	}
3121 
3122 	/* Obtain Tx lock */
3123 	mutex_enter(&ldcp->tx_lock);
3124 
3125 	/*
3126 	 * Wait for pending transmits to complete i.e Tx queue to drain
3127 	 * if there are pending pkts - wait 1 ms and retry again
3128 	 */
3129 	for (;;) {
3130 
3131 		rv = hv_ldc_tx_get_state(ldcp->id,
3132 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3133 		if (rv) {
3134 			cmn_err(CE_WARN,
3135 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3136 			mutex_exit(&ldcp->tx_lock);
3137 			mutex_exit(&ldcp->lock);
3138 			return (EIO);
3139 		}
3140 
3141 		if (ldcp->tx_head == ldcp->tx_tail ||
3142 		    ldcp->link_state != LDC_CHANNEL_UP) {
3143 			break;
3144 		}
3145 
3146 		if (chk_done) {
3147 			DWARN(ldcp->id,
3148 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3149 			    ldcp->id);
3150 			break;
3151 		}
3152 
3153 		/* wait for one ms and try again */
3154 		delay(drv_usectohz(1000));
3155 		chk_done = B_TRUE;
3156 	}
3157 
3158 	/*
3159 	 * Drain the Tx and Rx queues as we are closing the
3160 	 * channel. We dont care about any pending packets.
3161 	 * We have to also drain the queue prior to clearing
3162 	 * pending interrupts, otherwise the HV will trigger
3163 	 * an interrupt the moment the interrupt state is
3164 	 * cleared.
3165 	 */
3166 	(void) i_ldc_txq_reconf(ldcp);
3167 	(void) i_ldc_rxq_drain(ldcp);
3168 
3169 	/*
3170 	 * Unregister the channel with the nexus
3171 	 */
3172 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3173 
3174 		mutex_exit(&ldcp->tx_lock);
3175 		mutex_exit(&ldcp->lock);
3176 
3177 		/* if any error other than EAGAIN return back */
3178 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3179 			cmn_err(CE_WARN,
3180 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3181 			    ldcp->id, rv);
3182 			return (rv);
3183 		}
3184 
3185 		/*
3186 		 * As there could be pending interrupts we need
3187 		 * to wait and try again
3188 		 */
3189 		drv_usecwait(ldc_close_delay);
3190 		mutex_enter(&ldcp->lock);
3191 		mutex_enter(&ldcp->tx_lock);
3192 		retries++;
3193 	}
3194 
3195 	ldcp->tstate &= ~TS_QCONF_RDY;
3196 
3197 	/*
3198 	 * Unregister queues
3199 	 */
3200 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3201 	if (rv) {
3202 		cmn_err(CE_WARN,
3203 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3204 		    ldcp->id);
3205 		mutex_exit(&ldcp->tx_lock);
3206 		mutex_exit(&ldcp->lock);
3207 		return (EIO);
3208 	}
3209 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3210 	if (rv) {
3211 		cmn_err(CE_WARN,
3212 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3213 		    ldcp->id);
3214 		mutex_exit(&ldcp->tx_lock);
3215 		mutex_exit(&ldcp->lock);
3216 		return (EIO);
3217 	}
3218 
3219 	/* Reset channel state information */
3220 	i_ldc_reset_state(ldcp);
3221 
3222 	/* Mark channel as down and in initialized state */
3223 	ldcp->tx_ackd_head = 0;
3224 	ldcp->tx_head = 0;
3225 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3226 	ldcp->status = LDC_INIT;
3227 
3228 	mutex_exit(&ldcp->tx_lock);
3229 	mutex_exit(&ldcp->lock);
3230 
3231 	/* Decrement number of open channels */
3232 	mutex_enter(&ldcssp->lock);
3233 	ldcssp->channels_open--;
3234 	mutex_exit(&ldcssp->lock);
3235 
3236 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3237 
3238 	return (0);
3239 }
3240 
3241 /*
3242  * Register channel callback
3243  */
3244 int
3245 ldc_reg_callback(ldc_handle_t handle,
3246     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3247 {
3248 	ldc_chan_t *ldcp;
3249 
3250 	if (handle == NULL) {
3251 		DWARN(DBG_ALL_LDCS,
3252 		    "ldc_reg_callback: invalid channel handle\n");
3253 		return (EINVAL);
3254 	}
3255 	if (((uint64_t)cb) < KERNELBASE) {
3256 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3257 		return (EINVAL);
3258 	}
3259 	ldcp = (ldc_chan_t *)handle;
3260 
3261 	mutex_enter(&ldcp->lock);
3262 
3263 	if (ldcp->cb) {
3264 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3265 		    ldcp->id);
3266 		mutex_exit(&ldcp->lock);
3267 		return (EIO);
3268 	}
3269 	if (ldcp->cb_inprogress) {
3270 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3271 		    ldcp->id);
3272 		mutex_exit(&ldcp->lock);
3273 		return (EWOULDBLOCK);
3274 	}
3275 
3276 	ldcp->cb = cb;
3277 	ldcp->cb_arg = arg;
3278 	ldcp->cb_enabled = B_TRUE;
3279 
3280 	D1(ldcp->id,
3281 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3282 	    ldcp->id);
3283 
3284 	mutex_exit(&ldcp->lock);
3285 
3286 	return (0);
3287 }
3288 
3289 /*
3290  * Unregister channel callback
3291  */
3292 int
3293 ldc_unreg_callback(ldc_handle_t handle)
3294 {
3295 	ldc_chan_t *ldcp;
3296 
3297 	if (handle == NULL) {
3298 		DWARN(DBG_ALL_LDCS,
3299 		    "ldc_unreg_callback: invalid channel handle\n");
3300 		return (EINVAL);
3301 	}
3302 	ldcp = (ldc_chan_t *)handle;
3303 
3304 	mutex_enter(&ldcp->lock);
3305 
3306 	if (ldcp->cb == NULL) {
3307 		DWARN(ldcp->id,
3308 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3309 		    ldcp->id);
3310 		mutex_exit(&ldcp->lock);
3311 		return (EIO);
3312 	}
3313 	if (ldcp->cb_inprogress) {
3314 		DWARN(ldcp->id,
3315 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3316 		    ldcp->id);
3317 		mutex_exit(&ldcp->lock);
3318 		return (EWOULDBLOCK);
3319 	}
3320 
3321 	ldcp->cb = NULL;
3322 	ldcp->cb_arg = NULL;
3323 	ldcp->cb_enabled = B_FALSE;
3324 
3325 	D1(ldcp->id,
3326 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3327 	    ldcp->id);
3328 
3329 	mutex_exit(&ldcp->lock);
3330 
3331 	return (0);
3332 }
3333 
3334 
3335 /*
3336  * Bring a channel up by initiating a handshake with the peer
3337  * This call is asynchronous. It will complete at a later point
3338  * in time when the peer responds back with an RTR.
3339  */
3340 int
3341 ldc_up(ldc_handle_t handle)
3342 {
3343 	int 		rv;
3344 	ldc_chan_t 	*ldcp;
3345 	ldc_msg_t 	*ldcmsg;
3346 	uint64_t 	tx_tail, tstate, link_state;
3347 
3348 	if (handle == NULL) {
3349 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3350 		return (EINVAL);
3351 	}
3352 	ldcp = (ldc_chan_t *)handle;
3353 
3354 	mutex_enter(&ldcp->lock);
3355 
3356 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3357 
3358 	/* clear the reset state */
3359 	tstate = ldcp->tstate;
3360 	ldcp->tstate &= ~TS_IN_RESET;
3361 
3362 	if (ldcp->tstate == TS_UP) {
3363 		DWARN(ldcp->id,
3364 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3365 		    ldcp->id);
3366 
3367 		/* mark channel as up */
3368 		ldcp->status = LDC_UP;
3369 
3370 		/*
3371 		 * if channel was in reset state and there was
3372 		 * pending data clear interrupt state. this will
3373 		 * trigger an interrupt, causing the RX handler to
3374 		 * to invoke the client's callback
3375 		 */
3376 		if ((tstate & TS_IN_RESET) &&
3377 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3378 			D1(ldcp->id,
3379 			    "ldc_up: (0x%llx) channel has pending data, "
3380 			    "clearing interrupt\n", ldcp->id);
3381 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3382 		}
3383 
3384 		mutex_exit(&ldcp->lock);
3385 		return (0);
3386 	}
3387 
3388 	/* if the channel is in RAW mode - mark it as UP, if READY */
3389 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3390 		ldcp->tstate = TS_UP;
3391 		mutex_exit(&ldcp->lock);
3392 		return (0);
3393 	}
3394 
3395 	/* Don't start another handshake if there is one in progress */
3396 	if (ldcp->hstate) {
3397 		D1(ldcp->id,
3398 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3399 		    ldcp->id);
3400 		mutex_exit(&ldcp->lock);
3401 		return (0);
3402 	}
3403 
3404 	mutex_enter(&ldcp->tx_lock);
3405 
3406 	/* save current link state */
3407 	link_state = ldcp->link_state;
3408 
3409 	/* get the current tail for the LDC msg */
3410 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3411 	if (rv) {
3412 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3413 		    ldcp->id);
3414 		mutex_exit(&ldcp->tx_lock);
3415 		mutex_exit(&ldcp->lock);
3416 		return (ECONNREFUSED);
3417 	}
3418 
3419 	/*
3420 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3421 	 * from a previous state of DOWN, then mark the channel as
3422 	 * being ready for handshake.
3423 	 */
3424 	if ((link_state == LDC_CHANNEL_DOWN) &&
3425 	    (link_state != ldcp->link_state)) {
3426 
3427 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3428 		    (ldcp->link_state == LDC_CHANNEL_UP));
3429 
3430 		if (ldcp->mode == LDC_MODE_RAW) {
3431 			ldcp->status = LDC_UP;
3432 			ldcp->tstate = TS_UP;
3433 			mutex_exit(&ldcp->tx_lock);
3434 			mutex_exit(&ldcp->lock);
3435 			return (0);
3436 		} else {
3437 			ldcp->status = LDC_READY;
3438 			ldcp->tstate |= TS_LINK_READY;
3439 		}
3440 
3441 	}
3442 
3443 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3444 	ZERO_PKT(ldcmsg);
3445 
3446 	ldcmsg->type = LDC_CTRL;
3447 	ldcmsg->stype = LDC_INFO;
3448 	ldcmsg->ctrl = LDC_VER;
3449 	ldcp->next_vidx = 0;
3450 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3451 
3452 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3453 
3454 	/* initiate the send by calling into HV and set the new tail */
3455 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3456 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3457 
3458 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3459 	if (rv) {
3460 		DWARN(ldcp->id,
3461 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3462 		    ldcp->id, rv);
3463 		mutex_exit(&ldcp->tx_lock);
3464 		mutex_exit(&ldcp->lock);
3465 		return (rv);
3466 	}
3467 
3468 	ldcp->hstate |= TS_SENT_VER;
3469 	ldcp->tx_tail = tx_tail;
3470 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3471 
3472 	mutex_exit(&ldcp->tx_lock);
3473 	mutex_exit(&ldcp->lock);
3474 
3475 	return (rv);
3476 }
3477 
3478 
3479 /*
3480  * Bring a channel down by resetting its state and queues
3481  */
3482 int
3483 ldc_down(ldc_handle_t handle)
3484 {
3485 	ldc_chan_t 	*ldcp;
3486 
3487 	if (handle == NULL) {
3488 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3489 		return (EINVAL);
3490 	}
3491 	ldcp = (ldc_chan_t *)handle;
3492 	mutex_enter(&ldcp->lock);
3493 	mutex_enter(&ldcp->tx_lock);
3494 	i_ldc_reset(ldcp, B_TRUE);
3495 	mutex_exit(&ldcp->tx_lock);
3496 	mutex_exit(&ldcp->lock);
3497 
3498 	return (0);
3499 }
3500 
3501 /*
3502  * Get the current channel status
3503  */
3504 int
3505 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3506 {
3507 	ldc_chan_t *ldcp;
3508 
3509 	if (handle == NULL || status == NULL) {
3510 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3511 		return (EINVAL);
3512 	}
3513 	ldcp = (ldc_chan_t *)handle;
3514 
3515 	*status = ((ldc_chan_t *)handle)->status;
3516 
3517 	D1(ldcp->id,
3518 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3519 	return (0);
3520 }
3521 
3522 
3523 /*
3524  * Set the channel's callback mode - enable/disable callbacks
3525  */
3526 int
3527 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3528 {
3529 	ldc_chan_t 	*ldcp;
3530 
3531 	if (handle == NULL) {
3532 		DWARN(DBG_ALL_LDCS,
3533 		    "ldc_set_intr_mode: invalid channel handle\n");
3534 		return (EINVAL);
3535 	}
3536 	ldcp = (ldc_chan_t *)handle;
3537 
3538 	/*
3539 	 * Record no callbacks should be invoked
3540 	 */
3541 	mutex_enter(&ldcp->lock);
3542 
3543 	switch (cmode) {
3544 	case LDC_CB_DISABLE:
3545 		if (!ldcp->cb_enabled) {
3546 			DWARN(ldcp->id,
3547 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3548 			    ldcp->id);
3549 			break;
3550 		}
3551 		ldcp->cb_enabled = B_FALSE;
3552 
3553 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3554 		    ldcp->id);
3555 		break;
3556 
3557 	case LDC_CB_ENABLE:
3558 		if (ldcp->cb_enabled) {
3559 			DWARN(ldcp->id,
3560 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3561 			    ldcp->id);
3562 			break;
3563 		}
3564 		ldcp->cb_enabled = B_TRUE;
3565 
3566 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3567 		    ldcp->id);
3568 		break;
3569 	}
3570 
3571 	mutex_exit(&ldcp->lock);
3572 
3573 	return (0);
3574 }
3575 
3576 /*
3577  * Check to see if there are packets on the incoming queue
3578  * Will return hasdata = B_FALSE if there are no packets
3579  */
3580 int
3581 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3582 {
3583 	int 		rv;
3584 	uint64_t 	rx_head, rx_tail;
3585 	ldc_chan_t 	*ldcp;
3586 
3587 	if (handle == NULL) {
3588 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3589 		return (EINVAL);
3590 	}
3591 	ldcp = (ldc_chan_t *)handle;
3592 
3593 	*hasdata = B_FALSE;
3594 
3595 	mutex_enter(&ldcp->lock);
3596 
3597 	if (ldcp->tstate != TS_UP) {
3598 		D1(ldcp->id,
3599 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3600 		mutex_exit(&ldcp->lock);
3601 		return (ECONNRESET);
3602 	}
3603 
3604 	/* Read packet(s) from the queue */
3605 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3606 	    &ldcp->link_state);
3607 	if (rv != 0) {
3608 		cmn_err(CE_WARN,
3609 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3610 		mutex_exit(&ldcp->lock);
3611 		return (EIO);
3612 	}
3613 
3614 	/* reset the channel state if the channel went down */
3615 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3616 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3617 		mutex_enter(&ldcp->tx_lock);
3618 		i_ldc_reset(ldcp, B_FALSE);
3619 		mutex_exit(&ldcp->tx_lock);
3620 		mutex_exit(&ldcp->lock);
3621 		return (ECONNRESET);
3622 	}
3623 
3624 	switch (ldcp->mode) {
3625 	case LDC_MODE_RAW:
3626 		/*
3627 		 * In raw mode, there are no ctrl packets, so checking
3628 		 * if the queue is non-empty is sufficient.
3629 		 */
3630 		*hasdata = (rx_head != rx_tail);
3631 		break;
3632 
3633 	case LDC_MODE_UNRELIABLE:
3634 		/*
3635 		 * In unreliable mode, if the queue is non-empty, we need
3636 		 * to check if it actually contains unread data packets.
3637 		 * The queue may just contain ctrl packets.
3638 		 */
3639 		if (rx_head != rx_tail) {
3640 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3641 			/*
3642 			 * If no data packets were found on the queue,
3643 			 * all packets must have been control packets
3644 			 * which will now have been processed, leaving
3645 			 * the queue empty. If the interrupt state
3646 			 * is pending, we need to clear the interrupt
3647 			 * here.
3648 			 */
3649 			if (*hasdata == B_FALSE &&
3650 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3651 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3652 			}
3653 		}
3654 		break;
3655 
3656 	case LDC_MODE_RELIABLE:
3657 		/*
3658 		 * In reliable mode, first check for 'stream_remains' > 0.
3659 		 * Otherwise, if the data queue head and tail pointers
3660 		 * differ, there must be data to read.
3661 		 */
3662 		if (ldcp->stream_remains > 0)
3663 			*hasdata = B_TRUE;
3664 		else
3665 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3666 		break;
3667 
3668 	default:
3669 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3670 		    "(0x%x)", ldcp->id, ldcp->mode);
3671 		mutex_exit(&ldcp->lock);
3672 		return (EIO);
3673 	}
3674 
3675 	mutex_exit(&ldcp->lock);
3676 
3677 	return (0);
3678 }
3679 
3680 
3681 /*
3682  * Read 'size' amount of bytes or less. If incoming buffer
3683  * is more than 'size', ENOBUFS is returned.
3684  *
3685  * On return, size contains the number of bytes read.
3686  */
3687 int
3688 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3689 {
3690 	ldc_chan_t 	*ldcp;
3691 	uint64_t 	rx_head = 0, rx_tail = 0;
3692 	int		rv = 0, exit_val;
3693 
3694 	if (handle == NULL) {
3695 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3696 		return (EINVAL);
3697 	}
3698 
3699 	ldcp = (ldc_chan_t *)handle;
3700 
3701 	/* channel lock */
3702 	mutex_enter(&ldcp->lock);
3703 
3704 	if (ldcp->tstate != TS_UP) {
3705 		DWARN(ldcp->id,
3706 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3707 		    ldcp->id);
3708 		exit_val = ECONNRESET;
3709 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3710 		TRACE_RXDQ_LENGTH(ldcp);
3711 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3712 
3713 		/*
3714 		 * For reliable mode channels, the interrupt
3715 		 * state is only set to pending during
3716 		 * interrupt handling when the secondary data
3717 		 * queue became full, leaving unprocessed
3718 		 * packets on the Rx queue. If the interrupt
3719 		 * state is pending and space is now available
3720 		 * on the data queue, clear the interrupt.
3721 		 */
3722 		if (ldcp->rx_intr_state == LDC_INTR_PEND &&
3723 		    Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
3724 		    ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >=
3725 		    LDC_PACKET_SIZE) {
3726 			/* data queue is not full */
3727 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3728 		}
3729 
3730 		mutex_exit(&ldcp->lock);
3731 		return (exit_val);
3732 	} else {
3733 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3734 	}
3735 
3736 	/*
3737 	 * if queue has been drained - clear interrupt
3738 	 */
3739 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3740 	    &ldcp->link_state);
3741 	if (rv != 0) {
3742 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3743 		    ldcp->id);
3744 		mutex_enter(&ldcp->tx_lock);
3745 		i_ldc_reset(ldcp, B_TRUE);
3746 		mutex_exit(&ldcp->tx_lock);
3747 		mutex_exit(&ldcp->lock);
3748 		return (ECONNRESET);
3749 	}
3750 
3751 	if (exit_val == 0) {
3752 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3753 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3754 			mutex_enter(&ldcp->tx_lock);
3755 			i_ldc_reset(ldcp, B_FALSE);
3756 			exit_val = ECONNRESET;
3757 			mutex_exit(&ldcp->tx_lock);
3758 		}
3759 		if ((rv == 0) &&
3760 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3761 		    (rx_head == rx_tail)) {
3762 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3763 		}
3764 	}
3765 
3766 	mutex_exit(&ldcp->lock);
3767 	return (exit_val);
3768 }
3769 
3770 /*
3771  * Basic raw mondo read -
3772  * no interpretation of mondo contents at all.
3773  *
3774  * Enter and exit with ldcp->lock held by caller
3775  */
3776 static int
3777 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3778 {
3779 	uint64_t 	q_size_mask;
3780 	ldc_msg_t 	*msgp;
3781 	uint8_t		*msgbufp;
3782 	int		rv = 0, space;
3783 	uint64_t 	rx_head, rx_tail;
3784 
3785 	space = *sizep;
3786 
3787 	if (space < LDC_PAYLOAD_SIZE_RAW)
3788 		return (ENOBUFS);
3789 
3790 	ASSERT(mutex_owned(&ldcp->lock));
3791 
3792 	/* compute mask for increment */
3793 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3794 
3795 	/*
3796 	 * Read packet(s) from the queue
3797 	 */
3798 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3799 	    &ldcp->link_state);
3800 	if (rv != 0) {
3801 		cmn_err(CE_WARN,
3802 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3803 		    ldcp->id);
3804 		return (EIO);
3805 	}
3806 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3807 	    " rxt=0x%llx, st=0x%llx\n",
3808 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3809 
3810 	/* reset the channel state if the channel went down */
3811 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3812 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3813 		mutex_enter(&ldcp->tx_lock);
3814 		i_ldc_reset(ldcp, B_FALSE);
3815 		mutex_exit(&ldcp->tx_lock);
3816 		return (ECONNRESET);
3817 	}
3818 
3819 	/*
3820 	 * Check for empty queue
3821 	 */
3822 	if (rx_head == rx_tail) {
3823 		*sizep = 0;
3824 		return (0);
3825 	}
3826 
3827 	/* get the message */
3828 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3829 
3830 	/* if channel is in RAW mode, copy data and return */
3831 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3832 
3833 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3834 
3835 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3836 
3837 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3838 
3839 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3840 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3841 
3842 	return (rv);
3843 }
3844 
3845 /*
3846  * Process LDC mondos to build larger packets
3847  * with either un-reliable or reliable delivery.
3848  *
3849  * Enter and exit with ldcp->lock held by caller
3850  */
3851 static int
3852 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3853 {
3854 	int		rv = 0;
3855 	uint64_t 	rx_head = 0, rx_tail = 0;
3856 	uint64_t 	curr_head = 0;
3857 	ldc_msg_t 	*msg;
3858 	caddr_t 	target;
3859 	size_t 		len = 0, bytes_read = 0;
3860 	int 		retries = 0;
3861 	uint64_t 	q_va, q_size_mask;
3862 	uint64_t	first_fragment = 0;
3863 
3864 	target = target_bufp;
3865 
3866 	ASSERT(mutex_owned(&ldcp->lock));
3867 
3868 	/* check if the buffer and size are valid */
3869 	if (target_bufp == NULL || *sizep == 0) {
3870 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3871 		    ldcp->id);
3872 		return (EINVAL);
3873 	}
3874 
3875 	/* Set q_va and compute increment mask for the appropriate queue */
3876 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3877 		q_va	    = ldcp->rx_dq_va;
3878 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3879 	} else {
3880 		q_va	    = ldcp->rx_q_va;
3881 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3882 	}
3883 
3884 	/*
3885 	 * Read packet(s) from the queue
3886 	 */
3887 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3888 	    &ldcp->link_state);
3889 	if (rv != 0) {
3890 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3891 		    ldcp->id);
3892 		mutex_enter(&ldcp->tx_lock);
3893 		i_ldc_reset(ldcp, B_TRUE);
3894 		mutex_exit(&ldcp->tx_lock);
3895 		return (ECONNRESET);
3896 	}
3897 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3898 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3899 
3900 	/* reset the channel state if the channel went down */
3901 	if (ldcp->link_state != LDC_CHANNEL_UP)
3902 		goto channel_is_reset;
3903 
3904 	for (;;) {
3905 
3906 		if (curr_head == rx_tail) {
3907 			/*
3908 			 * If a data queue is being used, check the Rx HV
3909 			 * queue. This will copy over any new data packets
3910 			 * that have arrived.
3911 			 */
3912 			if (ldcp->mode == LDC_MODE_RELIABLE)
3913 				(void) i_ldc_chkq(ldcp);
3914 
3915 			rv = ldcp->readq_get_state(ldcp,
3916 			    &rx_head, &rx_tail, &ldcp->link_state);
3917 			if (rv != 0) {
3918 				cmn_err(CE_WARN,
3919 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3920 				    ldcp->id);
3921 				mutex_enter(&ldcp->tx_lock);
3922 				i_ldc_reset(ldcp, B_TRUE);
3923 				mutex_exit(&ldcp->tx_lock);
3924 				return (ECONNRESET);
3925 			}
3926 
3927 			if (ldcp->link_state != LDC_CHANNEL_UP)
3928 				goto channel_is_reset;
3929 
3930 			if (curr_head == rx_tail) {
3931 
3932 				/* If in the middle of a fragmented xfer */
3933 				if (first_fragment != 0) {
3934 
3935 					/* wait for ldc_delay usecs */
3936 					drv_usecwait(ldc_delay);
3937 
3938 					if (++retries < ldc_max_retries)
3939 						continue;
3940 
3941 					*sizep = 0;
3942 					if (ldcp->mode != LDC_MODE_RELIABLE)
3943 						ldcp->last_msg_rcd =
3944 						    first_fragment - 1;
3945 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3946 					    "(0x%llx) read timeout", ldcp->id);
3947 					return (EAGAIN);
3948 				}
3949 				*sizep = 0;
3950 				break;
3951 			}
3952 		}
3953 		retries = 0;
3954 
3955 		D2(ldcp->id,
3956 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3957 		    ldcp->id, curr_head, rx_head, rx_tail);
3958 
3959 		/* get the message */
3960 		msg = (ldc_msg_t *)(q_va + curr_head);
3961 
3962 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3963 		    ldcp->rx_q_va + curr_head);
3964 
3965 		/* Check the message ID for the message received */
3966 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3967 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3968 
3969 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3970 				    "error, q_ptrs=0x%lx,0x%lx",
3971 				    ldcp->id, rx_head, rx_tail);
3972 
3973 				/* throw away data */
3974 				bytes_read = 0;
3975 
3976 				/* Reset last_msg_rcd to start of message */
3977 				if (first_fragment != 0) {
3978 					ldcp->last_msg_rcd = first_fragment - 1;
3979 					first_fragment = 0;
3980 				}
3981 				/*
3982 				 * Send a NACK -- invalid seqid
3983 				 * get the current tail for the response
3984 				 */
3985 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3986 				    (msg->ctrl & LDC_CTRL_MASK));
3987 				if (rv) {
3988 					cmn_err(CE_NOTE,
3989 					    "ldc_read: (0x%lx) err sending "
3990 					    "NACK msg\n", ldcp->id);
3991 
3992 					/* if cannot send NACK - reset chan */
3993 					mutex_enter(&ldcp->tx_lock);
3994 					i_ldc_reset(ldcp, B_FALSE);
3995 					mutex_exit(&ldcp->tx_lock);
3996 					rv = ECONNRESET;
3997 					break;
3998 				}
3999 
4000 				/* purge receive queue */
4001 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
4002 
4003 				break;
4004 			}
4005 
4006 			/*
4007 			 * Process any messages of type CTRL messages
4008 			 * Future implementations should try to pass these
4009 			 * to LDC link by resetting the intr state.
4010 			 *
4011 			 * NOTE: not done as a switch() as type can be
4012 			 * both ctrl+data
4013 			 */
4014 			if (msg->type & LDC_CTRL) {
4015 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
4016 					if (rv == EAGAIN)
4017 						continue;
4018 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
4019 					*sizep = 0;
4020 					bytes_read = 0;
4021 					break;
4022 				}
4023 			}
4024 
4025 			/* process data ACKs */
4026 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
4027 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
4028 					*sizep = 0;
4029 					bytes_read = 0;
4030 					break;
4031 				}
4032 			}
4033 
4034 			/* process data NACKs */
4035 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
4036 				DWARN(ldcp->id,
4037 				    "ldc_read: (0x%llx) received DATA/NACK",
4038 				    ldcp->id);
4039 				mutex_enter(&ldcp->tx_lock);
4040 				i_ldc_reset(ldcp, B_TRUE);
4041 				mutex_exit(&ldcp->tx_lock);
4042 				return (ECONNRESET);
4043 			}
4044 		}
4045 
4046 		/* process data messages */
4047 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
4048 
4049 			uint8_t *msgbuf = (uint8_t *)(
4050 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
4051 			    msg->rdata : msg->udata);
4052 
4053 			D2(ldcp->id,
4054 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
4055 
4056 			/* get the packet length */
4057 			len = (msg->env & LDC_LEN_MASK);
4058 
4059 				/*
4060 				 * FUTURE OPTIMIZATION:
4061 				 * dont need to set q head for every
4062 				 * packet we read just need to do this when
4063 				 * we are done or need to wait for more
4064 				 * mondos to make a full packet - this is
4065 				 * currently expensive.
4066 				 */
4067 
4068 			if (first_fragment == 0) {
4069 
4070 				/*
4071 				 * first packets should always have the start
4072 				 * bit set (even for a single packet). If not
4073 				 * throw away the packet
4074 				 */
4075 				if (!(msg->env & LDC_FRAG_START)) {
4076 
4077 					DWARN(DBG_ALL_LDCS,
4078 					    "ldc_read: (0x%llx) not start - "
4079 					    "frag=%x\n", ldcp->id,
4080 					    (msg->env) & LDC_FRAG_MASK);
4081 
4082 					/* toss pkt, inc head, cont reading */
4083 					bytes_read = 0;
4084 					target = target_bufp;
4085 					curr_head =
4086 					    (curr_head + LDC_PACKET_SIZE)
4087 					    & q_size_mask;
4088 					if (rv = ldcp->readq_set_head(ldcp,
4089 					    curr_head))
4090 						break;
4091 
4092 					continue;
4093 				}
4094 
4095 				first_fragment = msg->seqid;
4096 			} else {
4097 				/* check to see if this is a pkt w/ START bit */
4098 				if (msg->env & LDC_FRAG_START) {
4099 					DWARN(DBG_ALL_LDCS,
4100 					    "ldc_read:(0x%llx) unexpected pkt"
4101 					    " env=0x%x discarding %d bytes,"
4102 					    " lastmsg=%d, currentmsg=%d\n",
4103 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4104 					    bytes_read, ldcp->last_msg_rcd,
4105 					    msg->seqid);
4106 
4107 					/* throw data we have read so far */
4108 					bytes_read = 0;
4109 					target = target_bufp;
4110 					first_fragment = msg->seqid;
4111 
4112 					if (rv = ldcp->readq_set_head(ldcp,
4113 					    curr_head))
4114 						break;
4115 				}
4116 			}
4117 
4118 			/* copy (next) pkt into buffer */
4119 			if (len <= (*sizep - bytes_read)) {
4120 				bcopy(msgbuf, target, len);
4121 				target += len;
4122 				bytes_read += len;
4123 			} else {
4124 				/*
4125 				 * there is not enough space in the buffer to
4126 				 * read this pkt. throw message away & continue
4127 				 * reading data from queue
4128 				 */
4129 				DWARN(DBG_ALL_LDCS,
4130 				    "ldc_read: (0x%llx) buffer too small, "
4131 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4132 				    curr_head, *sizep, bytes_read+len);
4133 
4134 				first_fragment = 0;
4135 				target = target_bufp;
4136 				bytes_read = 0;
4137 
4138 				/* throw away everything received so far */
4139 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4140 					break;
4141 
4142 				/* continue reading remaining pkts */
4143 				continue;
4144 			}
4145 		}
4146 
4147 		/* set the message id */
4148 		if (ldcp->mode != LDC_MODE_RELIABLE)
4149 			ldcp->last_msg_rcd = msg->seqid;
4150 
4151 		/* move the head one position */
4152 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4153 
4154 		if (msg->env & LDC_FRAG_STOP) {
4155 
4156 			/*
4157 			 * All pkts that are part of this fragmented transfer
4158 			 * have been read or this was a single pkt read
4159 			 * or there was an error
4160 			 */
4161 
4162 			/* set the queue head */
4163 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4164 				bytes_read = 0;
4165 
4166 			*sizep = bytes_read;
4167 
4168 			break;
4169 		}
4170 
4171 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4172 		if ((msg->type & LDC_CTRL) ||
4173 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4174 
4175 			/* set the queue head */
4176 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4177 				bytes_read = 0;
4178 				break;
4179 			}
4180 
4181 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4182 			    ldcp->id, curr_head);
4183 		}
4184 
4185 	} /* for (;;) */
4186 
4187 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4188 
4189 	return (rv);
4190 
4191 channel_is_reset:
4192 	mutex_enter(&ldcp->tx_lock);
4193 	i_ldc_reset(ldcp, B_FALSE);
4194 	mutex_exit(&ldcp->tx_lock);
4195 	return (ECONNRESET);
4196 }
4197 
4198 /*
4199  * Fetch and buffer incoming packets so we can hand them back as
4200  * a basic byte stream.
4201  *
4202  * Enter and exit with ldcp->lock held by caller
4203  */
4204 static int
4205 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4206 {
4207 	int	rv;
4208 	size_t	size;
4209 
4210 	ASSERT(mutex_owned(&ldcp->lock));
4211 
4212 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4213 	    ldcp->id, *sizep);
4214 
4215 	if (ldcp->stream_remains == 0) {
4216 		size = ldcp->mtu;
4217 		rv = i_ldc_read_packet(ldcp,
4218 		    (caddr_t)ldcp->stream_bufferp, &size);
4219 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4220 		    ldcp->id, size);
4221 
4222 		if (rv != 0)
4223 			return (rv);
4224 
4225 		ldcp->stream_remains = size;
4226 		ldcp->stream_offset = 0;
4227 	}
4228 
4229 	size = MIN(ldcp->stream_remains, *sizep);
4230 
4231 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4232 	ldcp->stream_offset += size;
4233 	ldcp->stream_remains -= size;
4234 
4235 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4236 	    ldcp->id, size);
4237 
4238 	*sizep = size;
4239 	return (0);
4240 }
4241 
4242 /*
4243  * Write specified amount of bytes to the channel
4244  * in multiple pkts of pkt_payload size. Each
4245  * packet is tagged with an unique packet ID in
4246  * the case of a reliable link.
4247  *
4248  * On return, size contains the number of bytes written.
4249  */
4250 int
4251 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4252 {
4253 	ldc_chan_t	*ldcp;
4254 	int		rv = 0;
4255 
4256 	if (handle == NULL) {
4257 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4258 		return (EINVAL);
4259 	}
4260 	ldcp = (ldc_chan_t *)handle;
4261 
4262 	/* check if writes can occur */
4263 	if (!mutex_tryenter(&ldcp->tx_lock)) {
4264 		/*
4265 		 * Could not get the lock - channel could
4266 		 * be in the process of being unconfigured
4267 		 * or reader has encountered an error
4268 		 */
4269 		return (EAGAIN);
4270 	}
4271 
4272 	/* check if non-zero data to write */
4273 	if (buf == NULL || sizep == NULL) {
4274 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4275 		    ldcp->id);
4276 		mutex_exit(&ldcp->tx_lock);
4277 		return (EINVAL);
4278 	}
4279 
4280 	if (*sizep == 0) {
4281 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4282 		    ldcp->id);
4283 		mutex_exit(&ldcp->tx_lock);
4284 		return (0);
4285 	}
4286 
4287 	/* Check if channel is UP for data exchange */
4288 	if (ldcp->tstate != TS_UP) {
4289 		DWARN(ldcp->id,
4290 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4291 		    ldcp->id);
4292 		*sizep = 0;
4293 		rv = ECONNRESET;
4294 	} else {
4295 		rv = ldcp->write_p(ldcp, buf, sizep);
4296 	}
4297 
4298 	mutex_exit(&ldcp->tx_lock);
4299 
4300 	return (rv);
4301 }
4302 
4303 /*
4304  * Write a raw packet to the channel
4305  * On return, size contains the number of bytes written.
4306  */
4307 static int
4308 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4309 {
4310 	ldc_msg_t 	*ldcmsg;
4311 	uint64_t 	tx_head, tx_tail, new_tail;
4312 	int		rv = 0;
4313 	size_t		size;
4314 
4315 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4316 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4317 
4318 	size = *sizep;
4319 
4320 	/*
4321 	 * Check to see if the packet size is less than or
4322 	 * equal to packet size support in raw mode
4323 	 */
4324 	if (size > ldcp->pkt_payload) {
4325 		DWARN(ldcp->id,
4326 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4327 		    ldcp->id, *sizep);
4328 		*sizep = 0;
4329 		return (EMSGSIZE);
4330 	}
4331 
4332 	/* get the qptrs for the tx queue */
4333 	rv = hv_ldc_tx_get_state(ldcp->id,
4334 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4335 	if (rv != 0) {
4336 		cmn_err(CE_WARN,
4337 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4338 		*sizep = 0;
4339 		return (EIO);
4340 	}
4341 
4342 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4343 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4344 		DWARN(ldcp->id,
4345 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4346 
4347 		*sizep = 0;
4348 		if (mutex_tryenter(&ldcp->lock)) {
4349 			i_ldc_reset(ldcp, B_FALSE);
4350 			mutex_exit(&ldcp->lock);
4351 		} else {
4352 			/*
4353 			 * Release Tx lock, and then reacquire channel
4354 			 * and Tx lock in correct order
4355 			 */
4356 			mutex_exit(&ldcp->tx_lock);
4357 			mutex_enter(&ldcp->lock);
4358 			mutex_enter(&ldcp->tx_lock);
4359 			i_ldc_reset(ldcp, B_FALSE);
4360 			mutex_exit(&ldcp->lock);
4361 		}
4362 		return (ECONNRESET);
4363 	}
4364 
4365 	tx_tail = ldcp->tx_tail;
4366 	tx_head = ldcp->tx_head;
4367 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4368 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4369 
4370 	if (new_tail == tx_head) {
4371 		DWARN(DBG_ALL_LDCS,
4372 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4373 		*sizep = 0;
4374 		return (EWOULDBLOCK);
4375 	}
4376 
4377 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4378 	    ldcp->id, size);
4379 
4380 	/* Send the data now */
4381 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4382 
4383 	/* copy the data into pkt */
4384 	bcopy((uint8_t *)buf, ldcmsg, size);
4385 
4386 	/* increment tail */
4387 	tx_tail = new_tail;
4388 
4389 	/*
4390 	 * All packets have been copied into the TX queue
4391 	 * update the tail ptr in the HV
4392 	 */
4393 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4394 	if (rv) {
4395 		if (rv == EWOULDBLOCK) {
4396 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4397 			    ldcp->id);
4398 			*sizep = 0;
4399 			return (EWOULDBLOCK);
4400 		}
4401 
4402 		*sizep = 0;
4403 		if (mutex_tryenter(&ldcp->lock)) {
4404 			i_ldc_reset(ldcp, B_FALSE);
4405 			mutex_exit(&ldcp->lock);
4406 		} else {
4407 			/*
4408 			 * Release Tx lock, and then reacquire channel
4409 			 * and Tx lock in correct order
4410 			 */
4411 			mutex_exit(&ldcp->tx_lock);
4412 			mutex_enter(&ldcp->lock);
4413 			mutex_enter(&ldcp->tx_lock);
4414 			i_ldc_reset(ldcp, B_FALSE);
4415 			mutex_exit(&ldcp->lock);
4416 		}
4417 		return (ECONNRESET);
4418 	}
4419 
4420 	ldcp->tx_tail = tx_tail;
4421 	*sizep = size;
4422 
4423 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4424 
4425 	return (rv);
4426 }
4427 
4428 
4429 /*
4430  * Write specified amount of bytes to the channel
4431  * in multiple pkts of pkt_payload size. Each
4432  * packet is tagged with an unique packet ID in
4433  * the case of a reliable link.
4434  *
4435  * On return, size contains the number of bytes written.
4436  * This function needs to ensure that the write size is < MTU size
4437  */
4438 static int
4439 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4440 {
4441 	ldc_msg_t 	*ldcmsg;
4442 	uint64_t 	tx_head, tx_tail, new_tail, start;
4443 	uint64_t	txq_size_mask, numavail;
4444 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4445 	size_t 		len, bytes_written = 0, remaining;
4446 	int		rv;
4447 	uint32_t	curr_seqid;
4448 
4449 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4450 
4451 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4452 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4453 
4454 	/* compute mask for increment */
4455 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4456 
4457 	/* get the qptrs for the tx queue */
4458 	rv = hv_ldc_tx_get_state(ldcp->id,
4459 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4460 	if (rv != 0) {
4461 		cmn_err(CE_WARN,
4462 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4463 		*size = 0;
4464 		return (EIO);
4465 	}
4466 
4467 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4468 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4469 		DWARN(ldcp->id,
4470 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4471 		*size = 0;
4472 		if (mutex_tryenter(&ldcp->lock)) {
4473 			i_ldc_reset(ldcp, B_FALSE);
4474 			mutex_exit(&ldcp->lock);
4475 		} else {
4476 			/*
4477 			 * Release Tx lock, and then reacquire channel
4478 			 * and Tx lock in correct order
4479 			 */
4480 			mutex_exit(&ldcp->tx_lock);
4481 			mutex_enter(&ldcp->lock);
4482 			mutex_enter(&ldcp->tx_lock);
4483 			i_ldc_reset(ldcp, B_FALSE);
4484 			mutex_exit(&ldcp->lock);
4485 		}
4486 		return (ECONNRESET);
4487 	}
4488 
4489 	tx_tail = ldcp->tx_tail;
4490 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4491 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4492 
4493 	/*
4494 	 * Check to see if the queue is full. The check is done using
4495 	 * the appropriate head based on the link mode.
4496 	 */
4497 	i_ldc_get_tx_head(ldcp, &tx_head);
4498 
4499 	if (new_tail == tx_head) {
4500 		DWARN(DBG_ALL_LDCS,
4501 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4502 		*size = 0;
4503 		return (EWOULDBLOCK);
4504 	}
4505 
4506 	/*
4507 	 * Make sure that the LDC Tx queue has enough space
4508 	 */
4509 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4510 	    + ldcp->tx_q_entries - 1;
4511 	numavail %= ldcp->tx_q_entries;
4512 
4513 	if (*size > (numavail * ldcp->pkt_payload)) {
4514 		DWARN(DBG_ALL_LDCS,
4515 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4516 		return (EWOULDBLOCK);
4517 	}
4518 
4519 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4520 	    ldcp->id, *size);
4521 
4522 	/* Send the data now */
4523 	bytes_written = 0;
4524 	curr_seqid = ldcp->last_msg_snt;
4525 	start = tx_tail;
4526 
4527 	while (*size > bytes_written) {
4528 
4529 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4530 
4531 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4532 		    ldcmsg->rdata : ldcmsg->udata);
4533 
4534 		ldcmsg->type = LDC_DATA;
4535 		ldcmsg->stype = LDC_INFO;
4536 		ldcmsg->ctrl = 0;
4537 
4538 		remaining = *size - bytes_written;
4539 		len = min(ldcp->pkt_payload, remaining);
4540 		ldcmsg->env = (uint8_t)len;
4541 
4542 		curr_seqid++;
4543 		ldcmsg->seqid = curr_seqid;
4544 
4545 		/* copy the data into pkt */
4546 		bcopy(source, msgbuf, len);
4547 
4548 		source += len;
4549 		bytes_written += len;
4550 
4551 		/* increment tail */
4552 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4553 
4554 		ASSERT(tx_tail != tx_head);
4555 	}
4556 
4557 	/* Set the start and stop bits */
4558 	ldcmsg->env |= LDC_FRAG_STOP;
4559 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4560 	ldcmsg->env |= LDC_FRAG_START;
4561 
4562 	/*
4563 	 * All packets have been copied into the TX queue
4564 	 * update the tail ptr in the HV
4565 	 */
4566 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4567 	if (rv == 0) {
4568 		ldcp->tx_tail = tx_tail;
4569 		ldcp->last_msg_snt = curr_seqid;
4570 		*size = bytes_written;
4571 	} else {
4572 		int rv2;
4573 
4574 		if (rv != EWOULDBLOCK) {
4575 			*size = 0;
4576 			if (mutex_tryenter(&ldcp->lock)) {
4577 				i_ldc_reset(ldcp, B_FALSE);
4578 				mutex_exit(&ldcp->lock);
4579 			} else {
4580 				/*
4581 				 * Release Tx lock, and then reacquire channel
4582 				 * and Tx lock in correct order
4583 				 */
4584 				mutex_exit(&ldcp->tx_lock);
4585 				mutex_enter(&ldcp->lock);
4586 				mutex_enter(&ldcp->tx_lock);
4587 				i_ldc_reset(ldcp, B_FALSE);
4588 				mutex_exit(&ldcp->lock);
4589 			}
4590 			return (ECONNRESET);
4591 		}
4592 
4593 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4594 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4595 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4596 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4597 
4598 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4599 		    &tx_head, &tx_tail, &ldcp->link_state);
4600 
4601 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4602 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4603 		    rv2, tx_head, tx_tail, ldcp->link_state);
4604 
4605 		*size = 0;
4606 	}
4607 
4608 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4609 
4610 	return (rv);
4611 }
4612 
4613 /*
4614  * Write specified amount of bytes to the channel
4615  * in multiple pkts of pkt_payload size. Each
4616  * packet is tagged with an unique packet ID in
4617  * the case of a reliable link.
4618  *
4619  * On return, size contains the number of bytes written.
4620  * This function needs to ensure that the write size is < MTU size
4621  */
4622 static int
4623 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4624 {
4625 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4626 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4627 
4628 	/* Truncate packet to max of MTU size */
4629 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4630 	return (i_ldc_write_packet(ldcp, buf, sizep));
4631 }
4632 
4633 
4634 /*
4635  * Interfaces for channel nexus to register/unregister with LDC module
4636  * The nexus will register functions to be used to register individual
4637  * channels with the nexus and enable interrupts for the channels
4638  */
4639 int
4640 ldc_register(ldc_cnex_t *cinfo)
4641 {
4642 	ldc_chan_t	*ldcp;
4643 
4644 	if (cinfo == NULL || cinfo->dip == NULL ||
4645 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4646 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4647 	    cinfo->clr_intr == NULL) {
4648 
4649 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4650 		return (EINVAL);
4651 	}
4652 
4653 	mutex_enter(&ldcssp->lock);
4654 
4655 	/* nexus registration */
4656 	ldcssp->cinfo.dip = cinfo->dip;
4657 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4658 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4659 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4660 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4661 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4662 
4663 	/* register any channels that might have been previously initialized */
4664 	ldcp = ldcssp->chan_list;
4665 	while (ldcp) {
4666 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4667 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4668 			(void) i_ldc_register_channel(ldcp);
4669 
4670 		ldcp = ldcp->next;
4671 	}
4672 
4673 	mutex_exit(&ldcssp->lock);
4674 
4675 	return (0);
4676 }
4677 
4678 int
4679 ldc_unregister(ldc_cnex_t *cinfo)
4680 {
4681 	if (cinfo == NULL || cinfo->dip == NULL) {
4682 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4683 		return (EINVAL);
4684 	}
4685 
4686 	mutex_enter(&ldcssp->lock);
4687 
4688 	if (cinfo->dip != ldcssp->cinfo.dip) {
4689 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4690 		mutex_exit(&ldcssp->lock);
4691 		return (EINVAL);
4692 	}
4693 
4694 	/* nexus unregister */
4695 	ldcssp->cinfo.dip = NULL;
4696 	ldcssp->cinfo.reg_chan = NULL;
4697 	ldcssp->cinfo.unreg_chan = NULL;
4698 	ldcssp->cinfo.add_intr = NULL;
4699 	ldcssp->cinfo.rem_intr = NULL;
4700 	ldcssp->cinfo.clr_intr = NULL;
4701 
4702 	mutex_exit(&ldcssp->lock);
4703 
4704 	return (0);
4705 }
4706