xref: /illumos-gate/usr/src/uts/sun4v/io/ldc.c (revision b3619796d92b4472acfed6b7c813f83cef335013)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * sun4v LDC Link Layer
28  */
29 #include <sys/types.h>
30 #include <sys/file.h>
31 #include <sys/errno.h>
32 #include <sys/open.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/cmn_err.h>
37 #include <sys/ksynch.h>
38 #include <sys/modctl.h>
39 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
40 #include <sys/debug.h>
41 #include <sys/cred.h>
42 #include <sys/promif.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/cyclic.h>
46 #include <sys/machsystm.h>
47 #include <sys/vm.h>
48 #include <sys/cpu.h>
49 #include <sys/intreg.h>
50 #include <sys/machcpuvar.h>
51 #include <sys/mmu.h>
52 #include <sys/pte.h>
53 #include <vm/hat.h>
54 #include <vm/as.h>
55 #include <vm/hat_sfmmu.h>
56 #include <sys/vm_machparam.h>
57 #include <vm/seg_kmem.h>
58 #include <vm/seg_kpm.h>
59 #include <sys/note.h>
60 #include <sys/ivintr.h>
61 #include <sys/hypervisor_api.h>
62 #include <sys/ldc.h>
63 #include <sys/ldc_impl.h>
64 #include <sys/cnex.h>
65 #include <sys/hsvc.h>
66 #include <sys/sdt.h>
67 #include <sys/kldc.h>
68 
69 /* Core internal functions */
70 int i_ldc_h2v_error(int h_error);
71 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
72 
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static void i_ldc_rxq_drain(ldc_chan_t *ldcp);
76 static void i_ldc_reset_state(ldc_chan_t *ldcp);
77 static void i_ldc_debug_enter(void);
78 
79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
80 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
81 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
82 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
83 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
84     uint8_t ctrlmsg);
85 
86 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
87 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
88 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
89     uint64_t *tail, uint64_t *link_state);
90 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
91     uint64_t *tail, uint64_t *link_state);
92 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
93     uint64_t rx_tail);
94 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
95 
96 /* Interrupt handling functions */
97 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
98 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
100     uint64_t *notify_event);
101 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
102 
103 /* Read method functions */
104 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
105 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
106 	size_t *sizep);
107 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
108 	size_t *sizep);
109 
110 /* Write method functions */
111 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
112 	size_t *sizep);
113 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
114 	size_t *sizep);
115 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
116 	size_t *sizep);
117 
118 /* Pkt processing internal functions */
119 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
120 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
122 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 
127 /* Imported functions */
128 extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
129 extern void i_ldc_init_mapin(ldc_soft_state_t *ldcssp, uint64_t major,
130 	uint64_t minor);
131 
132 /* LDC Version */
133 static ldc_ver_t ldc_versions[] = { {1, 0} };
134 
135 /* number of supported versions */
136 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
137 
138 /* Invalid value for the ldc_chan_t rx_ack_head field */
139 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
140 
141 
142 /* Module State Pointer */
143 ldc_soft_state_t *ldcssp;
144 
145 static struct modldrv md = {
146 	&mod_miscops,			/* This is a misc module */
147 	"sun4v LDC module",		/* Name of the module */
148 };
149 
150 static struct modlinkage ml = {
151 	MODREV_1,
152 	&md,
153 	NULL
154 };
155 
156 static uint64_t ldc_sup_minor;		/* Supported minor number */
157 static hsvc_info_t ldc_hsvc = {
158 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 2, "ldc"
159 };
160 
161 /*
162  * The no. of MTU size messages that can be stored in
163  * the LDC Tx queue. The number of Tx queue entries is
164  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
165  */
166 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
167 
168 /*
169  * The minimum queue length. This is the size of the smallest
170  * LDC queue. If the computed value is less than this default,
171  * the queue length is rounded up to 'ldc_queue_entries'.
172  */
173 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
174 
175 /*
176  * The length of the reliable-mode data queue in terms of the LDC
177  * receive queue length. i.e., the number of times larger than the
178  * LDC receive queue that the data queue should be. The HV receive
179  * queue is required to be a power of 2 and this implementation
180  * assumes the data queue will also be a power of 2. By making the
181  * multiplier a power of 2, we ensure the data queue will be a
182  * power of 2. We use a multiplier because the receive queue is
183  * sized to be sane relative to the MTU and the same is needed for
184  * the data queue.
185  */
186 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
187 
188 /*
189  * LDC retry count and delay - when the HV returns EWOULDBLOCK
190  * the operation is retried 'ldc_max_retries' times with a
191  * wait of 'ldc_delay' usecs between each retry.
192  */
193 int ldc_max_retries = LDC_MAX_RETRIES;
194 clock_t ldc_delay = LDC_DELAY;
195 
196 /*
197  * Channels which have a devclass satisfying the following
198  * will be reset when entering the prom or kmdb.
199  *
200  *   LDC_DEVCLASS_PROM_RESET(devclass) != 0
201  *
202  * By default, only block device service channels are reset.
203  */
204 #define	LDC_DEVCLASS_BIT(dc)		(0x1 << (dc))
205 #define	LDC_DEVCLASS_PROM_RESET(dc)	\
206 	(LDC_DEVCLASS_BIT(dc) & ldc_debug_reset_mask)
207 static uint64_t ldc_debug_reset_mask = LDC_DEVCLASS_BIT(LDC_DEV_BLK_SVC) |
208     LDC_DEVCLASS_BIT(LDC_DEV_GENERIC);
209 
210 /*
211  * delay between each retry of channel unregistration in
212  * ldc_close(), to wait for pending interrupts to complete.
213  */
214 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
215 
216 
217 /*
218  * Reserved mapin space for descriptor rings.
219  */
220 uint64_t ldc_dring_direct_map_rsvd = LDC_DIRECT_MAP_SIZE_DEFAULT;
221 
222 /*
223  * Maximum direct map space allowed per channel.
224  */
225 uint64_t	ldc_direct_map_size_max = (16 * 1024 * 1024);	/* 16 MB */
226 
227 #ifdef DEBUG
228 
229 /*
230  * Print debug messages
231  *
232  * set ldcdbg to 0x7 for enabling all msgs
233  * 0x4 - Warnings
234  * 0x2 - All debug messages
235  * 0x1 - Minimal debug messages
236  *
237  * set ldcdbgchan to the channel number you want to debug
238  * setting it to -1 prints debug messages for all channels
239  * NOTE: ldcdbgchan has no effect on error messages
240  */
241 
242 int ldcdbg = 0x0;
243 int64_t ldcdbgchan = DBG_ALL_LDCS;
244 uint64_t ldc_inject_err_flag = 0;
245 
246 void
247 ldcdebug(int64_t id, const char *fmt, ...)
248 {
249 	char buf[512];
250 	va_list ap;
251 
252 	/*
253 	 * Do not return if,
254 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
255 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
256 	 * debug channel = caller specified channel
257 	 */
258 	if ((id != DBG_ALL_LDCS) &&
259 	    (ldcdbgchan != DBG_ALL_LDCS) &&
260 	    (ldcdbgchan != id)) {
261 		return;
262 	}
263 
264 	va_start(ap, fmt);
265 	(void) vsprintf(buf, fmt, ap);
266 	va_end(ap);
267 
268 	cmn_err(CE_CONT, "?%s", buf);
269 }
270 
271 #define	LDC_ERR_RESET		0x1
272 #define	LDC_ERR_PKTLOSS		0x2
273 #define	LDC_ERR_DQFULL		0x4
274 #define	LDC_ERR_DRNGCLEAR	0x8
275 
276 static boolean_t
277 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
278 {
279 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
280 		return (B_FALSE);
281 
282 	if ((ldc_inject_err_flag & error) == 0)
283 		return (B_FALSE);
284 
285 	/* clear the injection state */
286 	ldc_inject_err_flag &= ~error;
287 
288 	return (B_TRUE);
289 }
290 
291 #define	DUMP_PAYLOAD(id, addr)						\
292 {									\
293 	char buf[65*3];							\
294 	int i;								\
295 	uint8_t *src = (uint8_t *)addr;					\
296 	for (i = 0; i < 64; i++, src++)					\
297 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
298 	(void) sprintf(&buf[i * 3], "|\n");				\
299 	D2((id), "payload: %s", buf);					\
300 }
301 
302 #define	DUMP_LDC_PKT(c, s, addr)					\
303 {									\
304 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
305 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
306 	if (msg->type == LDC_DATA) {                                    \
307 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
308 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
309 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
310 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
311 	    (msg->env & LDC_LEN_MASK));					\
312 	} else {							\
313 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
314 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
315 	}								\
316 }
317 
318 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
319 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
320 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
321 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
322 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
323 
324 #else
325 
326 #define	DBG_ALL_LDCS -1
327 
328 #define	DUMP_PAYLOAD(id, addr)
329 #define	DUMP_LDC_PKT(c, s, addr)
330 
331 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
332 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
333 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
334 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
335 
336 #endif
337 
338 /*
339  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
340  * lengths. Just pass the head, tail, and entries values so that the
341  * length can be calculated in a dtrace script when the probe is enabled.
342  */
343 #define	TRACE_RXDQ_LENGTH(ldcp)						\
344 	DTRACE_PROBE4(rxdq__size,					\
345 	uint64_t, ldcp->id,						\
346 	uint64_t, ldcp->rx_dq_head,					\
347 	uint64_t, ldcp->rx_dq_tail,					\
348 	uint64_t, ldcp->rx_dq_entries)
349 
350 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
351 	DTRACE_PROBE4(rxhvq__size,					\
352 	uint64_t, ldcp->id,						\
353 	uint64_t, head,							\
354 	uint64_t, tail,							\
355 	uint64_t, ldcp->rx_q_entries)
356 
357 /* A dtrace SDT probe to ease tracing of data queue copy operations */
358 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
359 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
360 
361 /* The amount of contiguous space at the tail of the queue */
362 #define	Q_CONTIG_SPACE(head, tail, size)				\
363 	((head) <= (tail) ? ((size) - (tail)) :				\
364 	((head) - (tail) - LDC_PACKET_SIZE))
365 
366 #define	ZERO_PKT(p)			\
367 	bzero((p), sizeof (ldc_msg_t));
368 
369 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
370 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
371 
372 int
373 _init(void)
374 {
375 	int status;
376 
377 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
378 	if (status != 0) {
379 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
380 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
381 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
382 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
383 		return (-1);
384 	}
385 
386 	/* Initialize shared memory HV API version checking */
387 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
388 
389 	/* allocate soft state structure */
390 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
391 
392 	i_ldc_init_mapin(ldcssp, ldc_hsvc.hsvc_major, ldc_sup_minor);
393 
394 	/* Link the module into the system */
395 	status = mod_install(&ml);
396 	if (status != 0) {
397 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
398 		return (status);
399 	}
400 
401 	/* Initialize the LDC state structure */
402 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
403 
404 	mutex_enter(&ldcssp->lock);
405 
406 	/* Create a cache for memory handles */
407 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
408 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
409 	if (ldcssp->memhdl_cache == NULL) {
410 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
411 		mutex_exit(&ldcssp->lock);
412 		return (-1);
413 	}
414 
415 	/* Create cache for memory segment structures */
416 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
417 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
418 	if (ldcssp->memseg_cache == NULL) {
419 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
420 		mutex_exit(&ldcssp->lock);
421 		return (-1);
422 	}
423 
424 
425 	ldcssp->channel_count = 0;
426 	ldcssp->channels_open = 0;
427 	ldcssp->chan_list = NULL;
428 	ldcssp->dring_list = NULL;
429 
430 	/* Register debug_enter callback */
431 	kldc_set_debug_cb(&i_ldc_debug_enter);
432 
433 	mutex_exit(&ldcssp->lock);
434 
435 	return (0);
436 }
437 
438 int
439 _info(struct modinfo *modinfop)
440 {
441 	/* Report status of the dynamically loadable driver module */
442 	return (mod_info(&ml, modinfop));
443 }
444 
445 int
446 _fini(void)
447 {
448 	int		rv, status;
449 	ldc_chan_t	*tmp_ldcp, *ldcp;
450 	ldc_dring_t	*tmp_dringp, *dringp;
451 	ldc_mem_info_t	minfo;
452 
453 	/* Unlink the driver module from the system */
454 	status = mod_remove(&ml);
455 	if (status) {
456 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
457 		return (EIO);
458 	}
459 
460 	/* Unregister debug_enter callback */
461 	kldc_set_debug_cb(NULL);
462 
463 	/* Free descriptor rings */
464 	dringp = ldcssp->dring_list;
465 	while (dringp != NULL) {
466 		tmp_dringp = dringp->next;
467 
468 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
469 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
470 			if (minfo.status == LDC_BOUND) {
471 				(void) ldc_mem_dring_unbind(
472 				    (ldc_dring_handle_t)dringp);
473 			}
474 			if (minfo.status == LDC_MAPPED) {
475 				(void) ldc_mem_dring_unmap(
476 				    (ldc_dring_handle_t)dringp);
477 			}
478 		}
479 
480 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
481 		dringp = tmp_dringp;
482 	}
483 	ldcssp->dring_list = NULL;
484 
485 	/* close and finalize channels */
486 	ldcp = ldcssp->chan_list;
487 	while (ldcp != NULL) {
488 		tmp_ldcp = ldcp->next;
489 
490 		(void) ldc_close((ldc_handle_t)ldcp);
491 		(void) ldc_fini((ldc_handle_t)ldcp);
492 
493 		ldcp = tmp_ldcp;
494 	}
495 	ldcssp->chan_list = NULL;
496 
497 	/* Destroy kmem caches */
498 	kmem_cache_destroy(ldcssp->memhdl_cache);
499 	kmem_cache_destroy(ldcssp->memseg_cache);
500 
501 	/*
502 	 * We have successfully "removed" the driver.
503 	 * Destroying soft states
504 	 */
505 	mutex_destroy(&ldcssp->lock);
506 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
507 
508 	(void) hsvc_unregister(&ldc_hsvc);
509 
510 	return (status);
511 }
512 
513 /* -------------------------------------------------------------------------- */
514 
515 /*
516  * LDC Link Layer Internal Functions
517  */
518 
519 /*
520  * Translate HV Errors to sun4v error codes
521  */
522 int
523 i_ldc_h2v_error(int h_error)
524 {
525 	switch (h_error) {
526 
527 	case	H_EOK:
528 		return (0);
529 
530 	case	H_ENORADDR:
531 		return (EFAULT);
532 
533 	case	H_EBADPGSZ:
534 	case	H_EINVAL:
535 		return (EINVAL);
536 
537 	case	H_EWOULDBLOCK:
538 		return (EWOULDBLOCK);
539 
540 	case	H_ENOACCESS:
541 	case	H_ENOMAP:
542 		return (EACCES);
543 
544 	case	H_EIO:
545 	case	H_ECPUERROR:
546 		return (EIO);
547 
548 	case	H_ENOTSUPPORTED:
549 		return (ENOTSUP);
550 
551 	case	H_ETOOMANY:
552 		return (ENOSPC);
553 
554 	case	H_ECHANNEL:
555 		return (ECHRNG);
556 	default:
557 		break;
558 	}
559 
560 	return (EIO);
561 }
562 
563 /*
564  * Reconfigure the transmit queue
565  */
566 static int
567 i_ldc_txq_reconf(ldc_chan_t *ldcp)
568 {
569 	int rv;
570 
571 	ASSERT(MUTEX_HELD(&ldcp->lock));
572 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
573 
574 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
575 	if (rv) {
576 		cmn_err(CE_WARN,
577 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
578 		return (EIO);
579 	}
580 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
581 	    &(ldcp->tx_tail), &(ldcp->link_state));
582 	if (rv) {
583 		cmn_err(CE_WARN,
584 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
585 		return (EIO);
586 	}
587 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
588 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
589 	    ldcp->link_state);
590 
591 	return (0);
592 }
593 
594 /*
595  * Reconfigure the receive queue
596  */
597 static int
598 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
599 {
600 	int rv;
601 	uint64_t rx_head, rx_tail;
602 
603 	ASSERT(MUTEX_HELD(&ldcp->lock));
604 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
605 	    &(ldcp->link_state));
606 	if (rv) {
607 		cmn_err(CE_WARN,
608 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
609 		    ldcp->id);
610 		return (EIO);
611 	}
612 
613 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
614 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
615 		    ldcp->rx_q_entries);
616 		if (rv) {
617 			cmn_err(CE_WARN,
618 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
619 			    ldcp->id);
620 			return (EIO);
621 		}
622 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
623 		    ldcp->id);
624 	}
625 
626 	return (0);
627 }
628 
629 
630 /*
631  * Drain the contents of the receive queue
632  */
633 static void
634 i_ldc_rxq_drain(ldc_chan_t *ldcp)
635 {
636 	int rv;
637 	uint64_t rx_head, rx_tail;
638 	int retries = 0;
639 
640 	ASSERT(MUTEX_HELD(&ldcp->lock));
641 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
642 	    &(ldcp->link_state));
643 	if (rv) {
644 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state, "
645 		    "rv = 0x%x", ldcp->id, rv);
646 		return;
647 	}
648 
649 	/* If the queue is already empty just return success. */
650 	if (rx_head == rx_tail)
651 		return;
652 
653 	/*
654 	 * We are draining the queue in order to close the channel.
655 	 * Call hv_ldc_rx_set_qhead directly instead of i_ldc_set_rx_head
656 	 * because we do not need to reset the channel if the set
657 	 * qhead fails.
658 	 */
659 	if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0)
660 		return;
661 
662 	while ((rv == H_EWOULDBLOCK) && (retries++ < ldc_max_retries)) {
663 		drv_usecwait(ldc_delay);
664 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0)
665 			return;
666 	}
667 
668 	cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot set qhead 0x%lx, "
669 	    "rv = 0x%x", ldcp->id, rx_tail, rv);
670 }
671 
672 
673 /*
674  * Reset LDC state structure and its contents
675  */
676 static void
677 i_ldc_reset_state(ldc_chan_t *ldcp)
678 {
679 	ASSERT(MUTEX_HELD(&ldcp->lock));
680 	ldcp->last_msg_snt = LDC_INIT_SEQID;
681 	ldcp->last_ack_rcd = 0;
682 	ldcp->last_msg_rcd = 0;
683 	ldcp->tx_ackd_head = ldcp->tx_head;
684 	ldcp->stream_remains = 0;
685 	ldcp->next_vidx = 0;
686 	ldcp->hstate = 0;
687 	ldcp->tstate = TS_OPEN;
688 	ldcp->status = LDC_OPEN;
689 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
690 	ldcp->rx_dq_head = 0;
691 	ldcp->rx_dq_tail = 0;
692 
693 	if (ldcp->link_state == LDC_CHANNEL_UP ||
694 	    ldcp->link_state == LDC_CHANNEL_RESET) {
695 
696 		if (ldcp->mode == LDC_MODE_RAW) {
697 			ldcp->status = LDC_UP;
698 			ldcp->tstate = TS_UP;
699 		} else {
700 			ldcp->status = LDC_READY;
701 			ldcp->tstate |= TS_LINK_READY;
702 		}
703 	}
704 }
705 
706 /*
707  * Reset a LDC channel
708  */
709 void
710 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
711 {
712 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
713 
714 	ASSERT(MUTEX_HELD(&ldcp->lock));
715 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
716 
717 	/* reconfig Tx and Rx queues */
718 	(void) i_ldc_txq_reconf(ldcp);
719 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
720 
721 	/* Clear Tx and Rx interrupts */
722 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
723 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
724 
725 	/* Reset channel state */
726 	i_ldc_reset_state(ldcp);
727 
728 	/* Mark channel in reset */
729 	ldcp->tstate |= TS_IN_RESET;
730 }
731 
732 /*
733  * Walk the channel list and reset channels if they are of the right
734  * devclass and their Rx queues have been configured. No locks are
735  * taken because the function is only invoked by the kernel just before
736  * entering the prom or debugger when the system is single-threaded.
737  */
738 static void
739 i_ldc_debug_enter(void)
740 {
741 	ldc_chan_t *ldcp;
742 
743 	ldcp = ldcssp->chan_list;
744 	while (ldcp != NULL) {
745 		if (((ldcp->tstate & TS_QCONF_RDY) == TS_QCONF_RDY) &&
746 		    (LDC_DEVCLASS_PROM_RESET(ldcp->devclass) != 0)) {
747 			(void) hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
748 			    ldcp->rx_q_entries);
749 		}
750 		ldcp = ldcp->next;
751 	}
752 }
753 
754 /*
755  * Clear pending interrupts
756  */
757 static void
758 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
759 {
760 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
761 
762 	ASSERT(MUTEX_HELD(&ldcp->lock));
763 	ASSERT(cinfo->dip != NULL);
764 
765 	switch (itype) {
766 	case CNEX_TX_INTR:
767 		/* check Tx interrupt */
768 		if (ldcp->tx_intr_state)
769 			ldcp->tx_intr_state = LDC_INTR_NONE;
770 		else
771 			return;
772 		break;
773 
774 	case CNEX_RX_INTR:
775 		/* check Rx interrupt */
776 		if (ldcp->rx_intr_state)
777 			ldcp->rx_intr_state = LDC_INTR_NONE;
778 		else
779 			return;
780 		break;
781 	}
782 
783 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
784 	D2(ldcp->id,
785 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
786 	    ldcp->id, itype);
787 }
788 
789 /*
790  * Set the receive queue head
791  * Resets connection and returns an error if it fails.
792  */
793 static int
794 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
795 {
796 	int	rv;
797 	int	retries;
798 
799 	ASSERT(MUTEX_HELD(&ldcp->lock));
800 	for (retries = 0; retries < ldc_max_retries; retries++) {
801 
802 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
803 			return (0);
804 
805 		if (rv != H_EWOULDBLOCK)
806 			break;
807 
808 		/* wait for ldc_delay usecs */
809 		drv_usecwait(ldc_delay);
810 	}
811 
812 	cmn_err(CE_WARN, "ldc_set_rx_qhead: (0x%lx) cannot set qhead 0x%lx, "
813 	    "rv = 0x%x", ldcp->id, head, rv);
814 	mutex_enter(&ldcp->tx_lock);
815 	i_ldc_reset(ldcp, B_TRUE);
816 	mutex_exit(&ldcp->tx_lock);
817 
818 	return (ECONNRESET);
819 }
820 
821 /*
822  * Returns the tx_head to be used for transfer
823  */
824 static void
825 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
826 {
827 	ldc_msg_t	*pkt;
828 
829 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
830 
831 	/* get current Tx head */
832 	*head = ldcp->tx_head;
833 
834 	/*
835 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
836 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
837 	 * up to the current location of tx_head. This needs to be done
838 	 * as the peer will only ACK DATA/INFO pkts.
839 	 */
840 	if (ldcp->mode == LDC_MODE_RELIABLE) {
841 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
842 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
843 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
844 				break;
845 			}
846 			/* advance ACKd head */
847 			ldcp->tx_ackd_head =
848 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
849 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
850 		}
851 		*head = ldcp->tx_ackd_head;
852 	}
853 }
854 
855 /*
856  * Returns the tx_tail to be used for transfer
857  * Re-reads the TX queue ptrs if and only if the
858  * the cached head and tail are equal (queue is full)
859  */
860 static int
861 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
862 {
863 	int		rv;
864 	uint64_t	current_head, new_tail;
865 
866 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
867 	/* Read the head and tail ptrs from HV */
868 	rv = hv_ldc_tx_get_state(ldcp->id,
869 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
870 	if (rv) {
871 		cmn_err(CE_WARN,
872 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
873 		    ldcp->id);
874 		return (EIO);
875 	}
876 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
877 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
878 		    ldcp->id);
879 		return (ECONNRESET);
880 	}
881 
882 	i_ldc_get_tx_head(ldcp, &current_head);
883 
884 	/* increment the tail */
885 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
886 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
887 
888 	if (new_tail == current_head) {
889 		DWARN(ldcp->id,
890 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
891 		    ldcp->id);
892 		return (EWOULDBLOCK);
893 	}
894 
895 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
896 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
897 
898 	*tail = ldcp->tx_tail;
899 	return (0);
900 }
901 
902 /*
903  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
904  * and retry ldc_max_retries times before returning an error.
905  * Returns 0, EWOULDBLOCK or EIO
906  */
907 static int
908 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
909 {
910 	int		rv, retval = EWOULDBLOCK;
911 	int		retries;
912 
913 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
914 	for (retries = 0; retries < ldc_max_retries; retries++) {
915 
916 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
917 			retval = 0;
918 			break;
919 		}
920 		if (rv != H_EWOULDBLOCK) {
921 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
922 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
923 			retval = EIO;
924 			break;
925 		}
926 
927 		/* wait for ldc_delay usecs */
928 		drv_usecwait(ldc_delay);
929 	}
930 	return (retval);
931 }
932 
933 /*
934  * Copy a data packet from the HV receive queue to the data queue.
935  * Caller must ensure that the data queue is not already full.
936  *
937  * The *head argument represents the current head pointer for the HV
938  * receive queue. After copying a packet from the HV receive queue,
939  * the *head pointer will be updated. This allows the caller to update
940  * the head pointer in HV using the returned *head value.
941  */
942 void
943 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
944 {
945 	uint64_t	q_size, dq_size;
946 
947 	ASSERT(MUTEX_HELD(&ldcp->lock));
948 
949 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
950 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
951 
952 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
953 	    dq_size) >= LDC_PACKET_SIZE);
954 
955 	bcopy((void *)(ldcp->rx_q_va + *head),
956 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
957 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
958 
959 	/* Update rx head */
960 	*head = (*head + LDC_PACKET_SIZE) % q_size;
961 
962 	/* Update dq tail */
963 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
964 }
965 
966 /*
967  * Update the Rx data queue head pointer
968  */
969 static int
970 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
971 {
972 	ldcp->rx_dq_head = head;
973 	return (0);
974 }
975 
976 /*
977  * Get the Rx data queue head and tail pointers
978  */
979 static uint64_t
980 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
981     uint64_t *link_state)
982 {
983 	_NOTE(ARGUNUSED(link_state))
984 	*head = ldcp->rx_dq_head;
985 	*tail = ldcp->rx_dq_tail;
986 	return (0);
987 }
988 
989 /*
990  * Wrapper for the Rx HV queue set head function. Giving the
991  * data queue and HV queue set head functions the same type.
992  */
993 static uint64_t
994 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
995     uint64_t *link_state)
996 {
997 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
998 	    link_state)));
999 }
1000 
1001 /*
1002  * LDC receive interrupt handler
1003  *    triggered for channel with data pending to read
1004  *    i.e. Rx queue content changes
1005  */
1006 static uint_t
1007 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1008 {
1009 	_NOTE(ARGUNUSED(arg2))
1010 
1011 	ldc_chan_t	*ldcp;
1012 	boolean_t	notify;
1013 	uint64_t	event;
1014 	int		rv, status;
1015 
1016 	/* Get the channel for which interrupt was received */
1017 	if (arg1 == NULL) {
1018 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1019 		return (DDI_INTR_UNCLAIMED);
1020 	}
1021 
1022 	ldcp = (ldc_chan_t *)arg1;
1023 
1024 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1025 	    ldcp->id, ldcp);
1026 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1027 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1028 	    ldcp->link_state);
1029 
1030 	/* Lock channel */
1031 	mutex_enter(&ldcp->lock);
1032 
1033 	/* Mark the interrupt as being actively handled */
1034 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1035 
1036 	status = i_ldc_rx_process_hvq(ldcp, &notify, &event);
1037 
1038 	if (ldcp->mode != LDC_MODE_RELIABLE) {
1039 		/*
1040 		 * If there are no data packets on the queue, clear
1041 		 * the interrupt. Otherwise, the ldc_read will clear
1042 		 * interrupts after draining the queue. To indicate the
1043 		 * interrupt has not yet been cleared, it is marked
1044 		 * as pending.
1045 		 */
1046 		if ((event & LDC_EVT_READ) == 0) {
1047 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1048 		} else {
1049 			ldcp->rx_intr_state = LDC_INTR_PEND;
1050 		}
1051 	}
1052 
1053 	/* if callbacks are disabled, do not notify */
1054 	if (notify && ldcp->cb_enabled) {
1055 		ldcp->cb_inprogress = B_TRUE;
1056 		mutex_exit(&ldcp->lock);
1057 		rv = ldcp->cb(event, ldcp->cb_arg);
1058 		if (rv) {
1059 			DWARN(ldcp->id,
1060 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1061 			    ldcp->id);
1062 		}
1063 		mutex_enter(&ldcp->lock);
1064 		ldcp->cb_inprogress = B_FALSE;
1065 	}
1066 
1067 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1068 		if (status == ENOSPC) {
1069 			/*
1070 			 * Here, ENOSPC indicates the secondary data
1071 			 * queue is full and the Rx queue is non-empty.
1072 			 * Much like how reliable and raw modes are
1073 			 * handled above, since the Rx queue is non-
1074 			 * empty, we mark the interrupt as pending to
1075 			 * indicate it has not yet been cleared.
1076 			 */
1077 			ldcp->rx_intr_state = LDC_INTR_PEND;
1078 		} else {
1079 			/*
1080 			 * We have processed all CTRL packets and
1081 			 * copied all DATA packets to the secondary
1082 			 * queue. Clear the interrupt.
1083 			 */
1084 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1085 		}
1086 	}
1087 
1088 	mutex_exit(&ldcp->lock);
1089 
1090 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1091 
1092 	return (DDI_INTR_CLAIMED);
1093 }
1094 
1095 /*
1096  * Wrapper for the Rx HV queue processing function to be used when
1097  * checking the Rx HV queue for data packets. Unlike the interrupt
1098  * handler code flow, the Rx interrupt is not cleared here and
1099  * callbacks are not made.
1100  */
1101 static uint_t
1102 i_ldc_chkq(ldc_chan_t *ldcp)
1103 {
1104 	boolean_t	notify;
1105 	uint64_t	event;
1106 
1107 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1108 }
1109 
1110 /*
1111  * Send a LDC message
1112  */
1113 static int
1114 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1115     uint8_t ctrlmsg)
1116 {
1117 	int		rv;
1118 	ldc_msg_t	*pkt;
1119 	uint64_t	tx_tail;
1120 	uint32_t	curr_seqid;
1121 
1122 	/* Obtain Tx lock */
1123 	mutex_enter(&ldcp->tx_lock);
1124 
1125 	curr_seqid = ldcp->last_msg_snt;
1126 
1127 	/* get the current tail for the message */
1128 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1129 	if (rv) {
1130 		DWARN(ldcp->id,
1131 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1132 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1133 		    ldcp->id, pkttype, subtype, ctrlmsg);
1134 		mutex_exit(&ldcp->tx_lock);
1135 		return (rv);
1136 	}
1137 
1138 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1139 	ZERO_PKT(pkt);
1140 
1141 	/* Initialize the packet */
1142 	pkt->type = pkttype;
1143 	pkt->stype = subtype;
1144 	pkt->ctrl = ctrlmsg;
1145 
1146 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1147 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1148 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1149 		curr_seqid++;
1150 		if (ldcp->mode != LDC_MODE_RAW) {
1151 			pkt->seqid = curr_seqid;
1152 			pkt->ackid = ldcp->last_msg_rcd;
1153 		}
1154 	}
1155 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1156 
1157 	/* initiate the send by calling into HV and set the new tail */
1158 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1159 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1160 
1161 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1162 	if (rv) {
1163 		DWARN(ldcp->id,
1164 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1165 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1166 		    ldcp->id, pkttype, subtype, ctrlmsg);
1167 		mutex_exit(&ldcp->tx_lock);
1168 		return (EIO);
1169 	}
1170 
1171 	ldcp->last_msg_snt = curr_seqid;
1172 	ldcp->tx_tail = tx_tail;
1173 
1174 	mutex_exit(&ldcp->tx_lock);
1175 	return (0);
1176 }
1177 
1178 /*
1179  * Checks if packet was received in right order
1180  * in the case of a reliable link.
1181  * Returns 0 if in order, else EIO
1182  */
1183 static int
1184 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1185 {
1186 	/* No seqid checking for RAW mode */
1187 	if (ldcp->mode == LDC_MODE_RAW)
1188 		return (0);
1189 
1190 	/* No seqid checking for version, RTS, RTR message */
1191 	if (msg->ctrl == LDC_VER ||
1192 	    msg->ctrl == LDC_RTS ||
1193 	    msg->ctrl == LDC_RTR)
1194 		return (0);
1195 
1196 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1197 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1198 		DWARN(ldcp->id,
1199 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1200 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1201 		    (ldcp->last_msg_rcd + 1));
1202 		return (EIO);
1203 	}
1204 
1205 #ifdef DEBUG
1206 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1207 		DWARN(ldcp->id,
1208 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1209 		return (EIO);
1210 	}
1211 #endif
1212 
1213 	return (0);
1214 }
1215 
1216 
1217 /*
1218  * Process an incoming version ctrl message
1219  */
1220 static int
1221 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1222 {
1223 	int		rv = 0, idx = ldcp->next_vidx;
1224 	ldc_msg_t	*pkt;
1225 	uint64_t	tx_tail;
1226 	ldc_ver_t	*rcvd_ver;
1227 
1228 	/* get the received version */
1229 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1230 
1231 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1232 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1233 
1234 	/* Obtain Tx lock */
1235 	mutex_enter(&ldcp->tx_lock);
1236 
1237 	switch (msg->stype) {
1238 	case LDC_INFO:
1239 
1240 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1241 			(void) i_ldc_txq_reconf(ldcp);
1242 			i_ldc_reset_state(ldcp);
1243 			mutex_exit(&ldcp->tx_lock);
1244 			return (EAGAIN);
1245 		}
1246 
1247 		/* get the current tail and pkt for the response */
1248 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1249 		if (rv != 0) {
1250 			DWARN(ldcp->id,
1251 			    "i_ldc_process_VER: (0x%llx) err sending "
1252 			    "version ACK/NACK\n", ldcp->id);
1253 			i_ldc_reset(ldcp, B_TRUE);
1254 			mutex_exit(&ldcp->tx_lock);
1255 			return (ECONNRESET);
1256 		}
1257 
1258 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1259 		ZERO_PKT(pkt);
1260 
1261 		/* initialize the packet */
1262 		pkt->type = LDC_CTRL;
1263 		pkt->ctrl = LDC_VER;
1264 
1265 		for (;;) {
1266 
1267 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1268 			    rcvd_ver->major, rcvd_ver->minor,
1269 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1270 
1271 			if (rcvd_ver->major == ldc_versions[idx].major) {
1272 				/* major version match - ACK version */
1273 				pkt->stype = LDC_ACK;
1274 
1275 				/*
1276 				 * lower minor version to the one this endpt
1277 				 * supports, if necessary
1278 				 */
1279 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1280 					rcvd_ver->minor =
1281 					    ldc_versions[idx].minor;
1282 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1283 
1284 				break;
1285 			}
1286 
1287 			if (rcvd_ver->major > ldc_versions[idx].major) {
1288 
1289 				D1(ldcp->id, "i_ldc_process_VER: using next"
1290 				    " lower idx=%d, v%u.%u\n", idx,
1291 				    ldc_versions[idx].major,
1292 				    ldc_versions[idx].minor);
1293 
1294 				/* nack with next lower version */
1295 				pkt->stype = LDC_NACK;
1296 				bcopy(&ldc_versions[idx], pkt->udata,
1297 				    sizeof (ldc_versions[idx]));
1298 				ldcp->next_vidx = idx;
1299 				break;
1300 			}
1301 
1302 			/* next major version */
1303 			idx++;
1304 
1305 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1306 
1307 			if (idx == LDC_NUM_VERS) {
1308 				/* no version match - send NACK */
1309 				pkt->stype = LDC_NACK;
1310 				bzero(pkt->udata, sizeof (ldc_ver_t));
1311 				ldcp->next_vidx = 0;
1312 				break;
1313 			}
1314 		}
1315 
1316 		/* initiate the send by calling into HV and set the new tail */
1317 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1318 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1319 
1320 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1321 		if (rv == 0) {
1322 			ldcp->tx_tail = tx_tail;
1323 			if (pkt->stype == LDC_ACK) {
1324 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1325 				    " version ACK\n", ldcp->id);
1326 				/* Save the ACK'd version */
1327 				ldcp->version.major = rcvd_ver->major;
1328 				ldcp->version.minor = rcvd_ver->minor;
1329 				ldcp->hstate |= TS_RCVD_VER;
1330 				ldcp->tstate |= TS_VER_DONE;
1331 				D1(DBG_ALL_LDCS,
1332 				    "(0x%llx) Sent ACK, "
1333 				    "Agreed on version v%u.%u\n",
1334 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1335 			}
1336 		} else {
1337 			DWARN(ldcp->id,
1338 			    "i_ldc_process_VER: (0x%llx) error sending "
1339 			    "ACK/NACK\n", ldcp->id);
1340 			i_ldc_reset(ldcp, B_TRUE);
1341 			mutex_exit(&ldcp->tx_lock);
1342 			return (ECONNRESET);
1343 		}
1344 
1345 		break;
1346 
1347 	case LDC_ACK:
1348 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1349 			if (ldcp->version.major != rcvd_ver->major ||
1350 			    ldcp->version.minor != rcvd_ver->minor) {
1351 
1352 				/* mismatched version - reset connection */
1353 				DWARN(ldcp->id,
1354 				    "i_ldc_process_VER: (0x%llx) recvd"
1355 				    " ACK ver != sent ACK ver\n", ldcp->id);
1356 				i_ldc_reset(ldcp, B_TRUE);
1357 				mutex_exit(&ldcp->tx_lock);
1358 				return (ECONNRESET);
1359 			}
1360 		} else {
1361 			/* SUCCESS - we have agreed on a version */
1362 			ldcp->version.major = rcvd_ver->major;
1363 			ldcp->version.minor = rcvd_ver->minor;
1364 			ldcp->tstate |= TS_VER_DONE;
1365 		}
1366 
1367 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1368 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1369 
1370 		/* initiate RTS-RTR-RDX handshake */
1371 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1372 		if (rv) {
1373 			DWARN(ldcp->id,
1374 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1375 			    ldcp->id);
1376 			i_ldc_reset(ldcp, B_TRUE);
1377 			mutex_exit(&ldcp->tx_lock);
1378 			return (ECONNRESET);
1379 		}
1380 
1381 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1382 		ZERO_PKT(pkt);
1383 
1384 		pkt->type = LDC_CTRL;
1385 		pkt->stype = LDC_INFO;
1386 		pkt->ctrl = LDC_RTS;
1387 		pkt->env = ldcp->mode;
1388 		if (ldcp->mode != LDC_MODE_RAW)
1389 			pkt->seqid = LDC_INIT_SEQID;
1390 
1391 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1392 
1393 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1394 
1395 		/* initiate the send by calling into HV and set the new tail */
1396 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1397 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1398 
1399 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1400 		if (rv) {
1401 			D2(ldcp->id,
1402 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1403 			    ldcp->id);
1404 			i_ldc_reset(ldcp, B_TRUE);
1405 			mutex_exit(&ldcp->tx_lock);
1406 			return (ECONNRESET);
1407 		}
1408 
1409 		ldcp->tx_tail = tx_tail;
1410 		ldcp->hstate |= TS_SENT_RTS;
1411 
1412 		break;
1413 
1414 	case LDC_NACK:
1415 		/* check if version in NACK is zero */
1416 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1417 			/* version handshake failure */
1418 			DWARN(DBG_ALL_LDCS,
1419 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1420 			    ldcp->id);
1421 			i_ldc_reset(ldcp, B_TRUE);
1422 			mutex_exit(&ldcp->tx_lock);
1423 			return (ECONNRESET);
1424 		}
1425 
1426 		/* get the current tail and pkt for the response */
1427 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1428 		if (rv != 0) {
1429 			cmn_err(CE_NOTE,
1430 			    "i_ldc_process_VER: (0x%lx) err sending "
1431 			    "version ACK/NACK\n", ldcp->id);
1432 			i_ldc_reset(ldcp, B_TRUE);
1433 			mutex_exit(&ldcp->tx_lock);
1434 			return (ECONNRESET);
1435 		}
1436 
1437 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1438 		ZERO_PKT(pkt);
1439 
1440 		/* initialize the packet */
1441 		pkt->type = LDC_CTRL;
1442 		pkt->ctrl = LDC_VER;
1443 		pkt->stype = LDC_INFO;
1444 
1445 		/* check ver in NACK msg has a match */
1446 		for (;;) {
1447 			if (rcvd_ver->major == ldc_versions[idx].major) {
1448 				/*
1449 				 * major version match - resubmit request
1450 				 * if lower minor version to the one this endpt
1451 				 * supports, if necessary
1452 				 */
1453 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1454 					rcvd_ver->minor =
1455 					    ldc_versions[idx].minor;
1456 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1457 				break;
1458 			}
1459 
1460 			if (rcvd_ver->major > ldc_versions[idx].major) {
1461 
1462 				D1(ldcp->id, "i_ldc_process_VER: using next"
1463 				    " lower idx=%d, v%u.%u\n", idx,
1464 				    ldc_versions[idx].major,
1465 				    ldc_versions[idx].minor);
1466 
1467 				/* send next lower version */
1468 				bcopy(&ldc_versions[idx], pkt->udata,
1469 				    sizeof (ldc_versions[idx]));
1470 				ldcp->next_vidx = idx;
1471 				break;
1472 			}
1473 
1474 			/* next version */
1475 			idx++;
1476 
1477 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1478 
1479 			if (idx == LDC_NUM_VERS) {
1480 				/* no version match - terminate */
1481 				ldcp->next_vidx = 0;
1482 				mutex_exit(&ldcp->tx_lock);
1483 				return (ECONNRESET);
1484 			}
1485 		}
1486 
1487 		/* initiate the send by calling into HV and set the new tail */
1488 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1489 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1490 
1491 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1492 		if (rv == 0) {
1493 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1494 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1495 			    ldc_versions[idx].minor);
1496 			ldcp->tx_tail = tx_tail;
1497 		} else {
1498 			cmn_err(CE_NOTE,
1499 			    "i_ldc_process_VER: (0x%lx) error sending version"
1500 			    "INFO\n", ldcp->id);
1501 			i_ldc_reset(ldcp, B_TRUE);
1502 			mutex_exit(&ldcp->tx_lock);
1503 			return (ECONNRESET);
1504 		}
1505 
1506 		break;
1507 	}
1508 
1509 	mutex_exit(&ldcp->tx_lock);
1510 	return (rv);
1511 }
1512 
1513 
1514 /*
1515  * Process an incoming RTS ctrl message
1516  */
1517 static int
1518 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1519 {
1520 	int		rv = 0;
1521 	ldc_msg_t	*pkt;
1522 	uint64_t	tx_tail;
1523 	boolean_t	sent_NACK = B_FALSE;
1524 
1525 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1526 
1527 	switch (msg->stype) {
1528 	case LDC_NACK:
1529 		DWARN(ldcp->id,
1530 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1531 		    ldcp->id);
1532 
1533 		/* Reset the channel -- as we cannot continue */
1534 		mutex_enter(&ldcp->tx_lock);
1535 		i_ldc_reset(ldcp, B_TRUE);
1536 		mutex_exit(&ldcp->tx_lock);
1537 		rv = ECONNRESET;
1538 		break;
1539 
1540 	case LDC_INFO:
1541 
1542 		/* check mode */
1543 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1544 			cmn_err(CE_NOTE,
1545 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1546 			    ldcp->id);
1547 			/*
1548 			 * send NACK in response to MODE message
1549 			 * get the current tail for the response
1550 			 */
1551 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1552 			if (rv) {
1553 				/* if cannot send NACK - reset channel */
1554 				mutex_enter(&ldcp->tx_lock);
1555 				i_ldc_reset(ldcp, B_TRUE);
1556 				mutex_exit(&ldcp->tx_lock);
1557 				rv = ECONNRESET;
1558 				break;
1559 			}
1560 			sent_NACK = B_TRUE;
1561 		}
1562 		break;
1563 	default:
1564 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1565 		    ldcp->id);
1566 		mutex_enter(&ldcp->tx_lock);
1567 		i_ldc_reset(ldcp, B_TRUE);
1568 		mutex_exit(&ldcp->tx_lock);
1569 		rv = ECONNRESET;
1570 		break;
1571 	}
1572 
1573 	/*
1574 	 * If either the connection was reset (when rv != 0) or
1575 	 * a NACK was sent, we return. In the case of a NACK
1576 	 * we dont want to consume the packet that came in but
1577 	 * not record that we received the RTS
1578 	 */
1579 	if (rv || sent_NACK)
1580 		return (rv);
1581 
1582 	/* record RTS received */
1583 	ldcp->hstate |= TS_RCVD_RTS;
1584 
1585 	/* store initial SEQID info */
1586 	ldcp->last_msg_snt = msg->seqid;
1587 
1588 	/* Obtain Tx lock */
1589 	mutex_enter(&ldcp->tx_lock);
1590 
1591 	/* get the current tail for the response */
1592 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1593 	if (rv != 0) {
1594 		cmn_err(CE_NOTE,
1595 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1596 		    ldcp->id);
1597 		i_ldc_reset(ldcp, B_TRUE);
1598 		mutex_exit(&ldcp->tx_lock);
1599 		return (ECONNRESET);
1600 	}
1601 
1602 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1603 	ZERO_PKT(pkt);
1604 
1605 	/* initialize the packet */
1606 	pkt->type = LDC_CTRL;
1607 	pkt->stype = LDC_INFO;
1608 	pkt->ctrl = LDC_RTR;
1609 	pkt->env = ldcp->mode;
1610 	if (ldcp->mode != LDC_MODE_RAW)
1611 		pkt->seqid = LDC_INIT_SEQID;
1612 
1613 	ldcp->last_msg_rcd = msg->seqid;
1614 
1615 	/* initiate the send by calling into HV and set the new tail */
1616 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1617 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1618 
1619 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1620 	if (rv == 0) {
1621 		D2(ldcp->id,
1622 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1623 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1624 
1625 		ldcp->tx_tail = tx_tail;
1626 		ldcp->hstate |= TS_SENT_RTR;
1627 
1628 	} else {
1629 		cmn_err(CE_NOTE,
1630 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1631 		    ldcp->id);
1632 		i_ldc_reset(ldcp, B_TRUE);
1633 		mutex_exit(&ldcp->tx_lock);
1634 		return (ECONNRESET);
1635 	}
1636 
1637 	mutex_exit(&ldcp->tx_lock);
1638 	return (0);
1639 }
1640 
1641 /*
1642  * Process an incoming RTR ctrl message
1643  */
1644 static int
1645 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1646 {
1647 	int		rv = 0;
1648 	boolean_t	sent_NACK = B_FALSE;
1649 
1650 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1651 
1652 	switch (msg->stype) {
1653 	case LDC_NACK:
1654 		/* RTR NACK received */
1655 		DWARN(ldcp->id,
1656 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1657 		    ldcp->id);
1658 
1659 		/* Reset the channel -- as we cannot continue */
1660 		mutex_enter(&ldcp->tx_lock);
1661 		i_ldc_reset(ldcp, B_TRUE);
1662 		mutex_exit(&ldcp->tx_lock);
1663 		rv = ECONNRESET;
1664 
1665 		break;
1666 
1667 	case LDC_INFO:
1668 
1669 		/* check mode */
1670 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1671 			DWARN(ldcp->id,
1672 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1673 			    "expecting 0x%x, got 0x%x\n",
1674 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1675 			/*
1676 			 * send NACK in response to MODE message
1677 			 * get the current tail for the response
1678 			 */
1679 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1680 			if (rv) {
1681 				/* if cannot send NACK - reset channel */
1682 				mutex_enter(&ldcp->tx_lock);
1683 				i_ldc_reset(ldcp, B_TRUE);
1684 				mutex_exit(&ldcp->tx_lock);
1685 				rv = ECONNRESET;
1686 				break;
1687 			}
1688 			sent_NACK = B_TRUE;
1689 		}
1690 		break;
1691 
1692 	default:
1693 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1694 		    ldcp->id);
1695 
1696 		/* Reset the channel -- as we cannot continue */
1697 		mutex_enter(&ldcp->tx_lock);
1698 		i_ldc_reset(ldcp, B_TRUE);
1699 		mutex_exit(&ldcp->tx_lock);
1700 		rv = ECONNRESET;
1701 		break;
1702 	}
1703 
1704 	/*
1705 	 * If either the connection was reset (when rv != 0) or
1706 	 * a NACK was sent, we return. In the case of a NACK
1707 	 * we dont want to consume the packet that came in but
1708 	 * not record that we received the RTR
1709 	 */
1710 	if (rv || sent_NACK)
1711 		return (rv);
1712 
1713 	ldcp->last_msg_snt = msg->seqid;
1714 	ldcp->hstate |= TS_RCVD_RTR;
1715 
1716 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1717 	if (rv) {
1718 		cmn_err(CE_NOTE,
1719 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1720 		    ldcp->id);
1721 		mutex_enter(&ldcp->tx_lock);
1722 		i_ldc_reset(ldcp, B_TRUE);
1723 		mutex_exit(&ldcp->tx_lock);
1724 		return (ECONNRESET);
1725 	}
1726 	D2(ldcp->id,
1727 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1728 
1729 	ldcp->hstate |= TS_SENT_RDX;
1730 	ldcp->tstate |= TS_HSHAKE_DONE;
1731 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1732 		ldcp->status = LDC_UP;
1733 
1734 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1735 
1736 	return (0);
1737 }
1738 
1739 
1740 /*
1741  * Process an incoming RDX ctrl message
1742  */
1743 static int
1744 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1745 {
1746 	int	rv = 0;
1747 
1748 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1749 
1750 	switch (msg->stype) {
1751 	case LDC_NACK:
1752 		/* RDX NACK received */
1753 		DWARN(ldcp->id,
1754 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1755 		    ldcp->id);
1756 
1757 		/* Reset the channel -- as we cannot continue */
1758 		mutex_enter(&ldcp->tx_lock);
1759 		i_ldc_reset(ldcp, B_TRUE);
1760 		mutex_exit(&ldcp->tx_lock);
1761 		rv = ECONNRESET;
1762 
1763 		break;
1764 
1765 	case LDC_INFO:
1766 
1767 		/*
1768 		 * if channel is UP and a RDX received after data transmission
1769 		 * has commenced it is an error
1770 		 */
1771 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1772 			DWARN(DBG_ALL_LDCS,
1773 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1774 			    " - LDC reset\n", ldcp->id);
1775 			mutex_enter(&ldcp->tx_lock);
1776 			i_ldc_reset(ldcp, B_TRUE);
1777 			mutex_exit(&ldcp->tx_lock);
1778 			return (ECONNRESET);
1779 		}
1780 
1781 		ldcp->hstate |= TS_RCVD_RDX;
1782 		ldcp->tstate |= TS_HSHAKE_DONE;
1783 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1784 			ldcp->status = LDC_UP;
1785 
1786 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1787 		break;
1788 
1789 	default:
1790 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1791 		    ldcp->id);
1792 
1793 		/* Reset the channel -- as we cannot continue */
1794 		mutex_enter(&ldcp->tx_lock);
1795 		i_ldc_reset(ldcp, B_TRUE);
1796 		mutex_exit(&ldcp->tx_lock);
1797 		rv = ECONNRESET;
1798 		break;
1799 	}
1800 
1801 	return (rv);
1802 }
1803 
1804 /*
1805  * Process an incoming ACK for a data packet
1806  */
1807 static int
1808 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1809 {
1810 	int		rv;
1811 	uint64_t	tx_head;
1812 	ldc_msg_t	*pkt;
1813 
1814 	/* Obtain Tx lock */
1815 	mutex_enter(&ldcp->tx_lock);
1816 
1817 	/*
1818 	 * Read the current Tx head and tail
1819 	 */
1820 	rv = hv_ldc_tx_get_state(ldcp->id,
1821 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1822 	if (rv != 0) {
1823 		cmn_err(CE_WARN,
1824 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1825 		    ldcp->id);
1826 
1827 		/* Reset the channel -- as we cannot continue */
1828 		i_ldc_reset(ldcp, B_TRUE);
1829 		mutex_exit(&ldcp->tx_lock);
1830 		return (ECONNRESET);
1831 	}
1832 
1833 	/*
1834 	 * loop from where the previous ACK location was to the
1835 	 * current head location. This is how far the HV has
1836 	 * actually send pkts. Pkts between head and tail are
1837 	 * yet to be sent by HV.
1838 	 */
1839 	tx_head = ldcp->tx_ackd_head;
1840 	for (;;) {
1841 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1842 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1843 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1844 
1845 		if (pkt->seqid == msg->ackid) {
1846 			D2(ldcp->id,
1847 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1848 			    ldcp->id);
1849 			ldcp->last_ack_rcd = msg->ackid;
1850 			ldcp->tx_ackd_head = tx_head;
1851 			break;
1852 		}
1853 		if (tx_head == ldcp->tx_head) {
1854 			/* could not find packet */
1855 			DWARN(ldcp->id,
1856 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1857 			    ldcp->id);
1858 
1859 			/* Reset the channel -- as we cannot continue */
1860 			i_ldc_reset(ldcp, B_TRUE);
1861 			mutex_exit(&ldcp->tx_lock);
1862 			return (ECONNRESET);
1863 		}
1864 	}
1865 
1866 	mutex_exit(&ldcp->tx_lock);
1867 	return (0);
1868 }
1869 
1870 /*
1871  * Process incoming control message
1872  * Return 0 - session can continue
1873  *        EAGAIN - reprocess packet - state was changed
1874  *	  ECONNRESET - channel was reset
1875  */
1876 static int
1877 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1878 {
1879 	int		rv = 0;
1880 
1881 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1882 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1883 
1884 	switch (ldcp->tstate & ~TS_IN_RESET) {
1885 
1886 	case TS_OPEN:
1887 	case TS_READY:
1888 
1889 		switch (msg->ctrl & LDC_CTRL_MASK) {
1890 		case LDC_VER:
1891 			/* process version message */
1892 			rv = i_ldc_process_VER(ldcp, msg);
1893 			break;
1894 		default:
1895 			DWARN(ldcp->id,
1896 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1897 			    "tstate=0x%x\n", ldcp->id,
1898 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1899 			break;
1900 		}
1901 
1902 		break;
1903 
1904 	case TS_VREADY:
1905 
1906 		switch (msg->ctrl & LDC_CTRL_MASK) {
1907 		case LDC_VER:
1908 			/* process version message */
1909 			rv = i_ldc_process_VER(ldcp, msg);
1910 			break;
1911 		case LDC_RTS:
1912 			/* process RTS message */
1913 			rv = i_ldc_process_RTS(ldcp, msg);
1914 			break;
1915 		case LDC_RTR:
1916 			/* process RTR message */
1917 			rv = i_ldc_process_RTR(ldcp, msg);
1918 			break;
1919 		case LDC_RDX:
1920 			/* process RDX message */
1921 			rv = i_ldc_process_RDX(ldcp, msg);
1922 			break;
1923 		default:
1924 			DWARN(ldcp->id,
1925 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1926 			    "tstate=0x%x\n", ldcp->id,
1927 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1928 			break;
1929 		}
1930 
1931 		break;
1932 
1933 	case TS_UP:
1934 
1935 		switch (msg->ctrl & LDC_CTRL_MASK) {
1936 		case LDC_VER:
1937 			DWARN(ldcp->id,
1938 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1939 			    "- LDC reset\n", ldcp->id);
1940 			/* peer is redoing version negotiation */
1941 			mutex_enter(&ldcp->tx_lock);
1942 			(void) i_ldc_txq_reconf(ldcp);
1943 			i_ldc_reset_state(ldcp);
1944 			mutex_exit(&ldcp->tx_lock);
1945 			rv = EAGAIN;
1946 			break;
1947 
1948 		case LDC_RDX:
1949 			/* process RDX message */
1950 			rv = i_ldc_process_RDX(ldcp, msg);
1951 			break;
1952 
1953 		default:
1954 			DWARN(ldcp->id,
1955 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1956 			    "tstate=0x%x\n", ldcp->id,
1957 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1958 			break;
1959 		}
1960 	}
1961 
1962 	return (rv);
1963 }
1964 
1965 /*
1966  * Register channel with the channel nexus
1967  */
1968 static int
1969 i_ldc_register_channel(ldc_chan_t *ldcp)
1970 {
1971 	int		rv = 0;
1972 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1973 
1974 	if (cinfo->dip == NULL) {
1975 		DWARN(ldcp->id,
1976 		    "i_ldc_register_channel: cnex has not registered\n");
1977 		return (EAGAIN);
1978 	}
1979 
1980 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1981 	if (rv) {
1982 		DWARN(ldcp->id,
1983 		    "i_ldc_register_channel: cannot register channel\n");
1984 		return (rv);
1985 	}
1986 
1987 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1988 	    i_ldc_tx_hdlr, ldcp, NULL);
1989 	if (rv) {
1990 		DWARN(ldcp->id,
1991 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1992 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1993 		return (rv);
1994 	}
1995 
1996 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1997 	    i_ldc_rx_hdlr, ldcp, NULL);
1998 	if (rv) {
1999 		DWARN(ldcp->id,
2000 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
2001 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2002 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
2003 		return (rv);
2004 	}
2005 
2006 	ldcp->tstate |= TS_CNEX_RDY;
2007 
2008 	return (0);
2009 }
2010 
2011 /*
2012  * Unregister a channel with the channel nexus
2013  */
2014 static int
2015 i_ldc_unregister_channel(ldc_chan_t *ldcp)
2016 {
2017 	int		rv = 0;
2018 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
2019 
2020 	if (cinfo->dip == NULL) {
2021 		DWARN(ldcp->id,
2022 		    "i_ldc_unregister_channel: cnex has not registered\n");
2023 		return (EAGAIN);
2024 	}
2025 
2026 	if (ldcp->tstate & TS_CNEX_RDY) {
2027 
2028 		/* Remove the Rx interrupt */
2029 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
2030 		if (rv) {
2031 			if (rv != EAGAIN) {
2032 				DWARN(ldcp->id,
2033 				    "i_ldc_unregister_channel: err removing "
2034 				    "Rx intr\n");
2035 				return (rv);
2036 			}
2037 
2038 			/*
2039 			 * If interrupts are pending and handler has
2040 			 * finished running, clear interrupt and try
2041 			 * again
2042 			 */
2043 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
2044 				return (rv);
2045 
2046 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2047 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
2048 			    CNEX_RX_INTR);
2049 			if (rv) {
2050 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
2051 				    "err removing Rx interrupt\n");
2052 				return (rv);
2053 			}
2054 		}
2055 
2056 		/* Remove the Tx interrupt */
2057 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2058 		if (rv) {
2059 			DWARN(ldcp->id,
2060 			    "i_ldc_unregister_channel: err removing Tx intr\n");
2061 			return (rv);
2062 		}
2063 
2064 		/* Unregister the channel */
2065 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2066 		if (rv) {
2067 			DWARN(ldcp->id,
2068 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2069 			return (rv);
2070 		}
2071 
2072 		ldcp->tstate &= ~TS_CNEX_RDY;
2073 	}
2074 
2075 	return (0);
2076 }
2077 
2078 
2079 /*
2080  * LDC transmit interrupt handler
2081  *    triggered for chanel up/down/reset events
2082  *    and Tx queue content changes
2083  */
2084 static uint_t
2085 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2086 {
2087 	_NOTE(ARGUNUSED(arg2))
2088 
2089 	int		rv;
2090 	ldc_chan_t	*ldcp;
2091 	boolean_t	notify_client = B_FALSE;
2092 	uint64_t	notify_event = 0, link_state;
2093 
2094 	/* Get the channel for which interrupt was received */
2095 	ASSERT(arg1 != NULL);
2096 	ldcp = (ldc_chan_t *)arg1;
2097 
2098 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2099 	    ldcp->id, ldcp);
2100 
2101 	/* Lock channel */
2102 	mutex_enter(&ldcp->lock);
2103 
2104 	/* Obtain Tx lock */
2105 	mutex_enter(&ldcp->tx_lock);
2106 
2107 	/* mark interrupt as pending */
2108 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2109 
2110 	/* save current link state */
2111 	link_state = ldcp->link_state;
2112 
2113 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2114 	    &ldcp->link_state);
2115 	if (rv) {
2116 		cmn_err(CE_WARN,
2117 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2118 		    ldcp->id, rv);
2119 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2120 		mutex_exit(&ldcp->tx_lock);
2121 		mutex_exit(&ldcp->lock);
2122 		return (DDI_INTR_CLAIMED);
2123 	}
2124 
2125 	/*
2126 	 * reset the channel state if the channel went down
2127 	 * (other side unconfigured queue) or channel was reset
2128 	 * (other side reconfigured its queue)
2129 	 */
2130 	if (link_state != ldcp->link_state &&
2131 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2132 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2133 		i_ldc_reset(ldcp, B_FALSE);
2134 		notify_client = B_TRUE;
2135 		notify_event = LDC_EVT_DOWN;
2136 	}
2137 
2138 	if (link_state != ldcp->link_state &&
2139 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2140 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2141 		i_ldc_reset(ldcp, B_FALSE);
2142 		notify_client = B_TRUE;
2143 		notify_event = LDC_EVT_RESET;
2144 	}
2145 
2146 	if (link_state != ldcp->link_state &&
2147 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2148 	    ldcp->link_state == LDC_CHANNEL_UP) {
2149 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2150 		notify_client = B_TRUE;
2151 		notify_event = LDC_EVT_RESET;
2152 		ldcp->tstate |= TS_LINK_READY;
2153 		ldcp->status = LDC_READY;
2154 	}
2155 
2156 	/* if callbacks are disabled, do not notify */
2157 	if (!ldcp->cb_enabled)
2158 		notify_client = B_FALSE;
2159 
2160 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2161 	mutex_exit(&ldcp->tx_lock);
2162 
2163 	if (notify_client) {
2164 		ldcp->cb_inprogress = B_TRUE;
2165 		mutex_exit(&ldcp->lock);
2166 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2167 		if (rv) {
2168 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2169 			    "failure", ldcp->id);
2170 		}
2171 		mutex_enter(&ldcp->lock);
2172 		ldcp->cb_inprogress = B_FALSE;
2173 	}
2174 
2175 	mutex_exit(&ldcp->lock);
2176 
2177 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2178 
2179 	return (DDI_INTR_CLAIMED);
2180 }
2181 
2182 /*
2183  * Process the Rx HV queue.
2184  *
2185  * Returns 0 if data packets were found and no errors were encountered,
2186  * otherwise returns an error. In either case, the *notify argument is
2187  * set to indicate whether or not the client callback function should
2188  * be invoked. The *event argument is set to contain the callback event.
2189  *
2190  * Depending on the channel mode, packets are handled differently:
2191  *
2192  * RAW MODE
2193  * For raw mode channels, when a data packet is encountered,
2194  * processing stops and all packets are left on the queue to be removed
2195  * and processed by the ldc_read code path.
2196  *
2197  * UNRELIABLE MODE
2198  * For unreliable mode, when a data packet is encountered, processing
2199  * stops, and all packets are left on the queue to be removed and
2200  * processed by the ldc_read code path. Control packets are processed
2201  * inline if they are encountered before any data packets.
2202  *
2203  * RELIABLE MODE
2204  * For reliable mode channels, all packets on the receive queue
2205  * are processed: data packets are copied to the data queue and
2206  * control packets are processed inline. Packets are only left on
2207  * the receive queue when the data queue is full.
2208  */
2209 static uint_t
2210 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2211     uint64_t *notify_event)
2212 {
2213 	int		rv;
2214 	uint64_t	rx_head, rx_tail;
2215 	ldc_msg_t	*msg;
2216 	uint64_t	link_state, first_fragment = 0;
2217 	boolean_t	trace_length = B_TRUE;
2218 
2219 	ASSERT(MUTEX_HELD(&ldcp->lock));
2220 	*notify_client = B_FALSE;
2221 	*notify_event = 0;
2222 
2223 	/*
2224 	 * Read packet(s) from the queue
2225 	 */
2226 	for (;;) {
2227 
2228 		link_state = ldcp->link_state;
2229 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2230 		    &ldcp->link_state);
2231 		if (rv) {
2232 			cmn_err(CE_WARN,
2233 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2234 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2235 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2236 			return (EIO);
2237 		}
2238 
2239 		/*
2240 		 * reset the channel state if the channel went down
2241 		 * (other side unconfigured queue) or channel was reset
2242 		 * (other side reconfigured its queue)
2243 		 */
2244 
2245 		if (link_state != ldcp->link_state) {
2246 
2247 			switch (ldcp->link_state) {
2248 			case LDC_CHANNEL_DOWN:
2249 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2250 				    "link down\n", ldcp->id);
2251 				mutex_enter(&ldcp->tx_lock);
2252 				i_ldc_reset(ldcp, B_FALSE);
2253 				mutex_exit(&ldcp->tx_lock);
2254 				*notify_client = B_TRUE;
2255 				*notify_event = LDC_EVT_DOWN;
2256 				goto loop_exit;
2257 
2258 			case LDC_CHANNEL_UP:
2259 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2260 				    "channel link up\n", ldcp->id);
2261 
2262 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2263 					*notify_client = B_TRUE;
2264 					*notify_event = LDC_EVT_RESET;
2265 					ldcp->tstate |= TS_LINK_READY;
2266 					ldcp->status = LDC_READY;
2267 				}
2268 				break;
2269 
2270 			case LDC_CHANNEL_RESET:
2271 			default:
2272 #ifdef DEBUG
2273 force_reset:
2274 #endif
2275 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2276 				    "link reset\n", ldcp->id);
2277 				mutex_enter(&ldcp->tx_lock);
2278 				i_ldc_reset(ldcp, B_FALSE);
2279 				mutex_exit(&ldcp->tx_lock);
2280 				*notify_client = B_TRUE;
2281 				*notify_event = LDC_EVT_RESET;
2282 				break;
2283 			}
2284 		}
2285 
2286 #ifdef DEBUG
2287 		if (LDC_INJECT_RESET(ldcp))
2288 			goto force_reset;
2289 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2290 			i_ldc_mem_inject_dring_clear(ldcp);
2291 #endif
2292 		if (trace_length) {
2293 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2294 			trace_length = B_FALSE;
2295 		}
2296 
2297 		if (rx_head == rx_tail) {
2298 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2299 			    "No packets\n", ldcp->id);
2300 			break;
2301 		}
2302 
2303 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2304 		    "tail=0x%llx\n", rx_head, rx_tail);
2305 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2306 		    ldcp->rx_q_va + rx_head);
2307 
2308 		/* get the message */
2309 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2310 
2311 		/* if channel is in RAW mode or data pkt, notify and return */
2312 		if (ldcp->mode == LDC_MODE_RAW) {
2313 			*notify_client = B_TRUE;
2314 			*notify_event |= LDC_EVT_READ;
2315 			break;
2316 		}
2317 
2318 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2319 
2320 			/* discard packet if channel is not up */
2321 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2322 
2323 				/* move the head one position */
2324 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2325 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2326 
2327 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2328 					break;
2329 
2330 				continue;
2331 			} else {
2332 				uint64_t dq_head, dq_tail;
2333 
2334 				/* process only RELIABLE mode data packets */
2335 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2336 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2337 						*notify_client = B_TRUE;
2338 					*notify_event |= LDC_EVT_READ;
2339 					break;
2340 				}
2341 
2342 				/* don't process packet if queue full */
2343 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2344 				    &dq_tail, NULL);
2345 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2346 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2347 				if (dq_tail == dq_head ||
2348 				    LDC_INJECT_DQFULL(ldcp)) {
2349 					rv = ENOSPC;
2350 					break;
2351 				}
2352 			}
2353 		}
2354 
2355 		/* Check the sequence ID for the message received */
2356 		rv = i_ldc_check_seqid(ldcp, msg);
2357 		if (rv != 0) {
2358 
2359 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2360 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2361 			    rx_head, rx_tail);
2362 
2363 			/* Reset last_msg_rcd to start of message */
2364 			if (first_fragment != 0) {
2365 				ldcp->last_msg_rcd = first_fragment - 1;
2366 				first_fragment = 0;
2367 			}
2368 
2369 			/*
2370 			 * Send a NACK due to seqid mismatch
2371 			 */
2372 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2373 			    (msg->ctrl & LDC_CTRL_MASK));
2374 
2375 			if (rv) {
2376 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2377 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2378 				    ldcp->id);
2379 
2380 				/* if cannot send NACK - reset channel */
2381 				mutex_enter(&ldcp->tx_lock);
2382 				i_ldc_reset(ldcp, B_TRUE);
2383 				mutex_exit(&ldcp->tx_lock);
2384 
2385 				*notify_client = B_TRUE;
2386 				*notify_event = LDC_EVT_RESET;
2387 				break;
2388 			}
2389 
2390 			/* purge receive queue */
2391 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2392 			break;
2393 		}
2394 
2395 		/* record the message ID */
2396 		ldcp->last_msg_rcd = msg->seqid;
2397 
2398 		/* process control messages */
2399 		if (msg->type & LDC_CTRL) {
2400 			/* save current internal state */
2401 			uint64_t tstate = ldcp->tstate;
2402 
2403 			rv = i_ldc_ctrlmsg(ldcp, msg);
2404 			if (rv == EAGAIN) {
2405 				/* re-process pkt - state was adjusted */
2406 				continue;
2407 			}
2408 			if (rv == ECONNRESET) {
2409 				*notify_client = B_TRUE;
2410 				*notify_event = LDC_EVT_RESET;
2411 				break;
2412 			}
2413 
2414 			/*
2415 			 * control message processing was successful
2416 			 * channel transitioned to ready for communication
2417 			 */
2418 			if (rv == 0 && ldcp->tstate == TS_UP &&
2419 			    (tstate & ~TS_IN_RESET) !=
2420 			    (ldcp->tstate & ~TS_IN_RESET)) {
2421 				*notify_client = B_TRUE;
2422 				*notify_event = LDC_EVT_UP;
2423 			}
2424 		}
2425 
2426 		/* process data NACKs */
2427 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2428 			DWARN(ldcp->id,
2429 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2430 			    ldcp->id);
2431 			mutex_enter(&ldcp->tx_lock);
2432 			i_ldc_reset(ldcp, B_TRUE);
2433 			mutex_exit(&ldcp->tx_lock);
2434 			*notify_client = B_TRUE;
2435 			*notify_event = LDC_EVT_RESET;
2436 			break;
2437 		}
2438 
2439 		/* process data ACKs */
2440 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2441 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2442 				*notify_client = B_TRUE;
2443 				*notify_event = LDC_EVT_RESET;
2444 				break;
2445 			}
2446 		}
2447 
2448 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2449 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2450 
2451 			/*
2452 			 * Copy the data packet to the data queue. Note
2453 			 * that the copy routine updates the rx_head pointer.
2454 			 */
2455 			i_ldc_rxdq_copy(ldcp, &rx_head);
2456 
2457 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2458 				*notify_client = B_TRUE;
2459 			*notify_event |= LDC_EVT_READ;
2460 		} else {
2461 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2462 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2463 		}
2464 
2465 		/* move the head one position */
2466 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2467 			*notify_client = B_TRUE;
2468 			*notify_event = LDC_EVT_RESET;
2469 			break;
2470 		}
2471 
2472 	} /* for */
2473 
2474 loop_exit:
2475 
2476 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2477 		/* ACK data packets */
2478 		if ((*notify_event &
2479 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2480 			int ack_rv;
2481 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2482 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2483 				cmn_err(CE_NOTE,
2484 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2485 				    "send ACK\n", ldcp->id);
2486 
2487 				mutex_enter(&ldcp->tx_lock);
2488 				i_ldc_reset(ldcp, B_FALSE);
2489 				mutex_exit(&ldcp->tx_lock);
2490 
2491 				*notify_client = B_TRUE;
2492 				*notify_event = LDC_EVT_RESET;
2493 				goto skip_ackpeek;
2494 			}
2495 		}
2496 
2497 		/*
2498 		 * If we have no more space on the data queue, make sure
2499 		 * there are no ACKs on the rx queue waiting to be processed.
2500 		 */
2501 		if (rv == ENOSPC) {
2502 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2503 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2504 				*notify_client = B_TRUE;
2505 				*notify_event = LDC_EVT_RESET;
2506 			}
2507 			return (rv);
2508 		} else {
2509 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2510 		}
2511 	}
2512 
2513 skip_ackpeek:
2514 
2515 	/* Return, indicating whether or not data packets were found */
2516 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2517 		return (0);
2518 
2519 	return (ENOMSG);
2520 }
2521 
2522 /*
2523  * Process any ACK packets on the HV receive queue.
2524  *
2525  * This function is only used by RELIABLE mode channels when the
2526  * secondary data queue fills up and there are packets remaining on
2527  * the HV receive queue.
2528  */
2529 int
2530 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2531 {
2532 	int		rv = 0;
2533 	ldc_msg_t	*msg;
2534 
2535 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2536 		ldcp->rx_ack_head = rx_head;
2537 
2538 	while (ldcp->rx_ack_head != rx_tail) {
2539 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2540 
2541 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2542 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2543 				break;
2544 			msg->stype &= ~LDC_ACK;
2545 		}
2546 
2547 		ldcp->rx_ack_head =
2548 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2549 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2550 	}
2551 	return (rv);
2552 }
2553 
2554 /* -------------------------------------------------------------------------- */
2555 
2556 /*
2557  * LDC API functions
2558  */
2559 
2560 /*
2561  * Initialize the channel. Allocate internal structure and memory for
2562  * TX/RX queues, and initialize locks.
2563  */
2564 int
2565 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2566 {
2567 	ldc_chan_t	*ldcp;
2568 	int		rv, exit_val;
2569 	uint64_t	ra_base, nentries;
2570 	uint64_t	qlen;
2571 
2572 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2573 
2574 	if (attr == NULL) {
2575 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2576 		return (EINVAL);
2577 	}
2578 	if (handle == NULL) {
2579 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2580 		return (EINVAL);
2581 	}
2582 
2583 	/* check if channel is valid */
2584 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2585 	if (rv == H_ECHANNEL) {
2586 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2587 		return (EINVAL);
2588 	}
2589 
2590 	/* check if the channel has already been initialized */
2591 	mutex_enter(&ldcssp->lock);
2592 	ldcp = ldcssp->chan_list;
2593 	while (ldcp != NULL) {
2594 		if (ldcp->id == id) {
2595 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2596 			    id);
2597 			mutex_exit(&ldcssp->lock);
2598 			return (EADDRINUSE);
2599 		}
2600 		ldcp = ldcp->next;
2601 	}
2602 	mutex_exit(&ldcssp->lock);
2603 
2604 	ASSERT(ldcp == NULL);
2605 
2606 	*handle = 0;
2607 
2608 	/* Allocate an ldcp structure */
2609 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2610 
2611 	/*
2612 	 * Initialize the channel and Tx lock
2613 	 *
2614 	 * The channel 'lock' protects the entire channel and
2615 	 * should be acquired before initializing, resetting,
2616 	 * destroying or reading from a channel.
2617 	 *
2618 	 * The 'tx_lock' should be acquired prior to transmitting
2619 	 * data over the channel. The lock should also be acquired
2620 	 * prior to channel reconfiguration (in order to prevent
2621 	 * concurrent writes).
2622 	 *
2623 	 * ORDERING: When both locks are being acquired, to prevent
2624 	 * deadlocks, the channel lock should be always acquired prior
2625 	 * to the tx_lock.
2626 	 */
2627 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2628 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2629 
2630 	/* Initialize the channel */
2631 	ldcp->id = id;
2632 	ldcp->cb = NULL;
2633 	ldcp->cb_arg = NULL;
2634 	ldcp->cb_inprogress = B_FALSE;
2635 	ldcp->cb_enabled = B_FALSE;
2636 	ldcp->next = NULL;
2637 
2638 	/* Read attributes */
2639 	ldcp->mode = attr->mode;
2640 	ldcp->devclass = attr->devclass;
2641 	ldcp->devinst = attr->instance;
2642 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2643 
2644 	D1(ldcp->id,
2645 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2646 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2647 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2648 
2649 	ldcp->next_vidx = 0;
2650 	ldcp->tstate = TS_IN_RESET;
2651 	ldcp->hstate = 0;
2652 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2653 	ldcp->last_ack_rcd = 0;
2654 	ldcp->last_msg_rcd = 0;
2655 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2656 
2657 	ldcp->stream_bufferp = NULL;
2658 	ldcp->exp_dring_list = NULL;
2659 	ldcp->imp_dring_list = NULL;
2660 	ldcp->mhdl_list = NULL;
2661 
2662 	ldcp->tx_intr_state = LDC_INTR_NONE;
2663 	ldcp->rx_intr_state = LDC_INTR_NONE;
2664 
2665 	/* Initialize payload size depending on whether channel is reliable */
2666 	switch (ldcp->mode) {
2667 	case LDC_MODE_RAW:
2668 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2669 		ldcp->read_p = i_ldc_read_raw;
2670 		ldcp->write_p = i_ldc_write_raw;
2671 		break;
2672 	case LDC_MODE_UNRELIABLE:
2673 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2674 		ldcp->read_p = i_ldc_read_packet;
2675 		ldcp->write_p = i_ldc_write_packet;
2676 		break;
2677 	case LDC_MODE_RELIABLE:
2678 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2679 
2680 		ldcp->stream_remains = 0;
2681 		ldcp->stream_offset = 0;
2682 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2683 		ldcp->read_p = i_ldc_read_stream;
2684 		ldcp->write_p = i_ldc_write_stream;
2685 		break;
2686 	default:
2687 		exit_val = EINVAL;
2688 		goto cleanup_on_exit;
2689 	}
2690 
2691 	/*
2692 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2693 	 * value is smaller than default length of ldc_queue_entries,
2694 	 * qlen is set to ldc_queue_entries. Ensure that computed
2695 	 * length is a power-of-two value.
2696 	 */
2697 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2698 	if (!ISP2(qlen)) {
2699 		uint64_t	tmp = 1;
2700 		while (qlen) {
2701 			qlen >>= 1; tmp <<= 1;
2702 		}
2703 		qlen = tmp;
2704 	}
2705 
2706 	ldcp->rx_q_entries =
2707 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2708 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2709 
2710 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2711 
2712 	/* Create a transmit queue */
2713 	ldcp->tx_q_va = (uint64_t)
2714 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2715 	if (ldcp->tx_q_va == 0) {
2716 		cmn_err(CE_WARN,
2717 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2718 		    ldcp->id);
2719 		exit_val = ENOMEM;
2720 		goto cleanup_on_exit;
2721 	}
2722 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2723 
2724 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2725 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2726 
2727 	ldcp->tstate |= TS_TXQ_RDY;
2728 
2729 	/* Create a receive queue */
2730 	ldcp->rx_q_va = (uint64_t)
2731 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2732 	if (ldcp->rx_q_va == 0) {
2733 		cmn_err(CE_WARN,
2734 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2735 		    ldcp->id);
2736 		exit_val = ENOMEM;
2737 		goto cleanup_on_exit;
2738 	}
2739 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2740 
2741 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2742 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2743 
2744 	ldcp->tstate |= TS_RXQ_RDY;
2745 
2746 	/* Setup a separate read data queue */
2747 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2748 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2749 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2750 
2751 		/* Make sure the data queue multiplier is a power of 2 */
2752 		if (!ISP2(ldc_rxdq_multiplier)) {
2753 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2754 			    "not a power of 2, resetting", ldcp->id);
2755 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2756 		}
2757 
2758 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2759 		ldcp->rx_dq_va = (uint64_t)
2760 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2761 		    KM_SLEEP);
2762 		if (ldcp->rx_dq_va == 0) {
2763 			cmn_err(CE_WARN,
2764 			    "ldc_init: (0x%lx) RX data queue "
2765 			    "allocation failed\n", ldcp->id);
2766 			exit_val = ENOMEM;
2767 			goto cleanup_on_exit;
2768 		}
2769 
2770 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2771 
2772 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2773 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2774 		    ldcp->rx_dq_entries);
2775 	} else {
2776 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2777 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2778 	}
2779 
2780 	/* Init descriptor ring and memory handle list lock */
2781 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2782 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2783 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2784 
2785 	/* mark status as INITialized */
2786 	ldcp->status = LDC_INIT;
2787 
2788 	/* Add to channel list */
2789 	mutex_enter(&ldcssp->lock);
2790 	ldcp->next = ldcssp->chan_list;
2791 	ldcssp->chan_list = ldcp;
2792 	ldcssp->channel_count++;
2793 	mutex_exit(&ldcssp->lock);
2794 
2795 	/* set the handle */
2796 	*handle = (ldc_handle_t)ldcp;
2797 
2798 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2799 
2800 	return (0);
2801 
2802 cleanup_on_exit:
2803 
2804 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2805 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2806 
2807 	if (ldcp->tstate & TS_TXQ_RDY)
2808 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2809 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2810 
2811 	if (ldcp->tstate & TS_RXQ_RDY)
2812 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2813 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2814 
2815 	mutex_destroy(&ldcp->tx_lock);
2816 	mutex_destroy(&ldcp->lock);
2817 
2818 	kmem_free(ldcp, sizeof (ldc_chan_t));
2819 
2820 	return (exit_val);
2821 }
2822 
2823 /*
2824  * Finalizes the LDC connection. It will return EBUSY if the
2825  * channel is open. A ldc_close() has to be done prior to
2826  * a ldc_fini operation. It frees TX/RX queues, associated
2827  * with the channel
2828  */
2829 int
2830 ldc_fini(ldc_handle_t handle)
2831 {
2832 	ldc_chan_t	*ldcp;
2833 	ldc_chan_t	*tmp_ldcp;
2834 	uint64_t	id;
2835 
2836 	if (handle == 0) {
2837 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2838 		return (EINVAL);
2839 	}
2840 	ldcp = (ldc_chan_t *)handle;
2841 	id = ldcp->id;
2842 
2843 	mutex_enter(&ldcp->lock);
2844 
2845 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2846 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2847 		    ldcp->id);
2848 		mutex_exit(&ldcp->lock);
2849 		return (EBUSY);
2850 	}
2851 
2852 	/* Remove from the channel list */
2853 	mutex_enter(&ldcssp->lock);
2854 	tmp_ldcp = ldcssp->chan_list;
2855 	if (tmp_ldcp == ldcp) {
2856 		ldcssp->chan_list = ldcp->next;
2857 		ldcp->next = NULL;
2858 	} else {
2859 		while (tmp_ldcp != NULL) {
2860 			if (tmp_ldcp->next == ldcp) {
2861 				tmp_ldcp->next = ldcp->next;
2862 				ldcp->next = NULL;
2863 				break;
2864 			}
2865 			tmp_ldcp = tmp_ldcp->next;
2866 		}
2867 		if (tmp_ldcp == NULL) {
2868 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2869 			mutex_exit(&ldcssp->lock);
2870 			mutex_exit(&ldcp->lock);
2871 			return (EINVAL);
2872 		}
2873 	}
2874 
2875 	ldcssp->channel_count--;
2876 
2877 	mutex_exit(&ldcssp->lock);
2878 
2879 	/* Free the map table for this channel */
2880 	if (ldcp->mtbl) {
2881 		(void) hv_ldc_set_map_table(ldcp->id, 0, 0);
2882 		if (ldcp->mtbl->contigmem)
2883 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2884 		else
2885 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2886 		mutex_destroy(&ldcp->mtbl->lock);
2887 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2888 	}
2889 
2890 	/* Destroy descriptor ring and memory handle list lock */
2891 	mutex_destroy(&ldcp->exp_dlist_lock);
2892 	mutex_destroy(&ldcp->imp_dlist_lock);
2893 	mutex_destroy(&ldcp->mlist_lock);
2894 
2895 	/* Free the stream buffer for RELIABLE_MODE */
2896 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2897 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2898 
2899 	/* Free the RX queue */
2900 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2901 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2902 	ldcp->tstate &= ~TS_RXQ_RDY;
2903 
2904 	/* Free the RX data queue */
2905 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2906 		kmem_free((caddr_t)ldcp->rx_dq_va,
2907 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2908 	}
2909 
2910 	/* Free the TX queue */
2911 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2912 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2913 	ldcp->tstate &= ~TS_TXQ_RDY;
2914 
2915 	mutex_exit(&ldcp->lock);
2916 
2917 	/* Destroy mutex */
2918 	mutex_destroy(&ldcp->tx_lock);
2919 	mutex_destroy(&ldcp->lock);
2920 
2921 	/* free channel structure */
2922 	kmem_free(ldcp, sizeof (ldc_chan_t));
2923 
2924 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2925 
2926 	return (0);
2927 }
2928 
2929 /*
2930  * Open the LDC channel for use. It registers the TX/RX queues
2931  * with the Hypervisor. It also specifies the interrupt number
2932  * and target CPU for this channel
2933  */
2934 int
2935 ldc_open(ldc_handle_t handle)
2936 {
2937 	ldc_chan_t	*ldcp;
2938 	int		rv;
2939 
2940 	if (handle == 0) {
2941 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2942 		return (EINVAL);
2943 	}
2944 
2945 	ldcp = (ldc_chan_t *)handle;
2946 
2947 	mutex_enter(&ldcp->lock);
2948 
2949 	if (ldcp->tstate < TS_INIT) {
2950 		DWARN(ldcp->id,
2951 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2952 		mutex_exit(&ldcp->lock);
2953 		return (EFAULT);
2954 	}
2955 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2956 		DWARN(ldcp->id,
2957 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2958 		mutex_exit(&ldcp->lock);
2959 		return (EFAULT);
2960 	}
2961 
2962 	/*
2963 	 * Unregister/Register the tx queue with the hypervisor
2964 	 */
2965 	rv = hv_ldc_tx_qconf(ldcp->id, 0, 0);
2966 	if (rv) {
2967 		cmn_err(CE_WARN,
2968 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2969 		    ldcp->id);
2970 		mutex_exit(&ldcp->lock);
2971 		return (EIO);
2972 	}
2973 
2974 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2975 	if (rv) {
2976 		cmn_err(CE_WARN,
2977 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2978 		    ldcp->id);
2979 		mutex_exit(&ldcp->lock);
2980 		return (EIO);
2981 	}
2982 
2983 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2984 	    ldcp->id);
2985 
2986 	/*
2987 	 * Unregister/Register the rx queue with the hypervisor
2988 	 */
2989 	rv = hv_ldc_rx_qconf(ldcp->id, 0, 0);
2990 	if (rv) {
2991 		cmn_err(CE_WARN,
2992 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2993 		    ldcp->id);
2994 		mutex_exit(&ldcp->lock);
2995 		return (EIO);
2996 	}
2997 
2998 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2999 	if (rv) {
3000 		cmn_err(CE_WARN,
3001 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
3002 		    ldcp->id);
3003 		mutex_exit(&ldcp->lock);
3004 		return (EIO);
3005 	}
3006 
3007 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
3008 	    ldcp->id);
3009 
3010 	ldcp->tstate |= TS_QCONF_RDY;
3011 
3012 	/* Register the channel with the channel nexus */
3013 	rv = i_ldc_register_channel(ldcp);
3014 	if (rv && rv != EAGAIN) {
3015 		cmn_err(CE_WARN,
3016 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
3017 		ldcp->tstate &= ~TS_QCONF_RDY;
3018 		(void) hv_ldc_tx_qconf(ldcp->id, 0, 0);
3019 		(void) hv_ldc_rx_qconf(ldcp->id, 0, 0);
3020 		mutex_exit(&ldcp->lock);
3021 		return (EIO);
3022 	}
3023 
3024 	/* mark channel in OPEN state */
3025 	ldcp->status = LDC_OPEN;
3026 
3027 	/* Read channel state */
3028 	rv = hv_ldc_tx_get_state(ldcp->id,
3029 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3030 	if (rv) {
3031 		cmn_err(CE_WARN,
3032 		    "ldc_open: (0x%lx) cannot read channel state\n",
3033 		    ldcp->id);
3034 		(void) i_ldc_unregister_channel(ldcp);
3035 		ldcp->tstate &= ~TS_QCONF_RDY;
3036 		(void) hv_ldc_tx_qconf(ldcp->id, 0, 0);
3037 		(void) hv_ldc_rx_qconf(ldcp->id, 0, 0);
3038 		mutex_exit(&ldcp->lock);
3039 		return (EIO);
3040 	}
3041 
3042 	/*
3043 	 * set the ACKd head to current head location for reliable
3044 	 */
3045 	ldcp->tx_ackd_head = ldcp->tx_head;
3046 
3047 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
3048 	if (ldcp->link_state == LDC_CHANNEL_UP ||
3049 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3050 		ldcp->tstate |= TS_LINK_READY;
3051 		ldcp->status = LDC_READY;
3052 	}
3053 
3054 	/*
3055 	 * if channel is being opened in RAW mode - no handshake is needed
3056 	 * switch the channel READY and UP state
3057 	 */
3058 	if (ldcp->mode == LDC_MODE_RAW) {
3059 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
3060 		ldcp->status = LDC_UP;
3061 	}
3062 
3063 	mutex_exit(&ldcp->lock);
3064 
3065 	/*
3066 	 * Increment number of open channels
3067 	 */
3068 	mutex_enter(&ldcssp->lock);
3069 	ldcssp->channels_open++;
3070 	mutex_exit(&ldcssp->lock);
3071 
3072 	D1(ldcp->id,
3073 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3074 	    "(tstate=0x%x, status=0x%x)\n",
3075 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3076 
3077 	return (0);
3078 }
3079 
3080 /*
3081  * Close the LDC connection. It will return EBUSY if there
3082  * are memory segments or descriptor rings either bound to or
3083  * mapped over the channel
3084  */
3085 int
3086 ldc_close(ldc_handle_t handle)
3087 {
3088 	ldc_chan_t	*ldcp;
3089 	int		rv = 0, retries = 0;
3090 	boolean_t	chk_done = B_FALSE;
3091 
3092 	if (handle == 0) {
3093 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3094 		return (EINVAL);
3095 	}
3096 	ldcp = (ldc_chan_t *)handle;
3097 
3098 	mutex_enter(&ldcp->lock);
3099 
3100 	/* return error if channel is not open */
3101 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3102 		DWARN(ldcp->id,
3103 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3104 		mutex_exit(&ldcp->lock);
3105 		return (EFAULT);
3106 	}
3107 
3108 	/* if any memory handles, drings, are bound or mapped cannot close */
3109 	if (ldcp->mhdl_list != NULL) {
3110 		DWARN(ldcp->id,
3111 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3112 		    ldcp->id);
3113 		mutex_exit(&ldcp->lock);
3114 		return (EBUSY);
3115 	}
3116 	if (ldcp->exp_dring_list != NULL) {
3117 		DWARN(ldcp->id,
3118 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3119 		    ldcp->id);
3120 		mutex_exit(&ldcp->lock);
3121 		return (EBUSY);
3122 	}
3123 	if (ldcp->imp_dring_list != NULL) {
3124 		DWARN(ldcp->id,
3125 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3126 		    ldcp->id);
3127 		mutex_exit(&ldcp->lock);
3128 		return (EBUSY);
3129 	}
3130 
3131 	if (ldcp->cb_inprogress) {
3132 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3133 		    ldcp->id);
3134 		mutex_exit(&ldcp->lock);
3135 		return (EWOULDBLOCK);
3136 	}
3137 
3138 	/* Obtain Tx lock */
3139 	mutex_enter(&ldcp->tx_lock);
3140 
3141 	/*
3142 	 * Wait for pending transmits to complete i.e Tx queue to drain
3143 	 * if there are pending pkts - wait 1 ms and retry again
3144 	 */
3145 	for (;;) {
3146 
3147 		rv = hv_ldc_tx_get_state(ldcp->id,
3148 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3149 		if (rv) {
3150 			cmn_err(CE_WARN,
3151 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3152 			mutex_exit(&ldcp->tx_lock);
3153 			mutex_exit(&ldcp->lock);
3154 			return (EIO);
3155 		}
3156 
3157 		if (ldcp->tx_head == ldcp->tx_tail ||
3158 		    ldcp->link_state != LDC_CHANNEL_UP) {
3159 			break;
3160 		}
3161 
3162 		if (chk_done) {
3163 			DWARN(ldcp->id,
3164 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3165 			    ldcp->id);
3166 			break;
3167 		}
3168 
3169 		/* wait for one ms and try again */
3170 		delay(drv_usectohz(1000));
3171 		chk_done = B_TRUE;
3172 	}
3173 
3174 	/*
3175 	 * Drain the Tx and Rx queues as we are closing the
3176 	 * channel. We dont care about any pending packets.
3177 	 * We have to also drain the queue prior to clearing
3178 	 * pending interrupts, otherwise the HV will trigger
3179 	 * an interrupt the moment the interrupt state is
3180 	 * cleared.
3181 	 */
3182 	(void) i_ldc_txq_reconf(ldcp);
3183 	i_ldc_rxq_drain(ldcp);
3184 
3185 	/*
3186 	 * Unregister the channel with the nexus
3187 	 */
3188 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3189 
3190 		mutex_exit(&ldcp->tx_lock);
3191 		mutex_exit(&ldcp->lock);
3192 
3193 		/* if any error other than EAGAIN return back */
3194 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3195 			cmn_err(CE_WARN,
3196 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3197 			    ldcp->id, rv);
3198 			return (rv);
3199 		}
3200 
3201 		/*
3202 		 * As there could be pending interrupts we need
3203 		 * to wait and try again
3204 		 */
3205 		drv_usecwait(ldc_close_delay);
3206 		mutex_enter(&ldcp->lock);
3207 		mutex_enter(&ldcp->tx_lock);
3208 		retries++;
3209 	}
3210 
3211 	ldcp->tstate &= ~TS_QCONF_RDY;
3212 
3213 	/*
3214 	 * Unregister queues
3215 	 */
3216 	rv = hv_ldc_tx_qconf(ldcp->id, 0, 0);
3217 	if (rv) {
3218 		cmn_err(CE_WARN,
3219 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3220 		    ldcp->id);
3221 		mutex_exit(&ldcp->tx_lock);
3222 		mutex_exit(&ldcp->lock);
3223 		return (EIO);
3224 	}
3225 	rv = hv_ldc_rx_qconf(ldcp->id, 0, 0);
3226 	if (rv) {
3227 		cmn_err(CE_WARN,
3228 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3229 		    ldcp->id);
3230 		mutex_exit(&ldcp->tx_lock);
3231 		mutex_exit(&ldcp->lock);
3232 		return (EIO);
3233 	}
3234 
3235 	/* Reset channel state information */
3236 	i_ldc_reset_state(ldcp);
3237 
3238 	/* Mark channel as down and in initialized state */
3239 	ldcp->tx_ackd_head = 0;
3240 	ldcp->tx_head = 0;
3241 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3242 	ldcp->status = LDC_INIT;
3243 
3244 	mutex_exit(&ldcp->tx_lock);
3245 	mutex_exit(&ldcp->lock);
3246 
3247 	/* Decrement number of open channels */
3248 	mutex_enter(&ldcssp->lock);
3249 	ldcssp->channels_open--;
3250 	mutex_exit(&ldcssp->lock);
3251 
3252 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3253 
3254 	return (0);
3255 }
3256 
3257 /*
3258  * Register channel callback
3259  */
3260 int
3261 ldc_reg_callback(ldc_handle_t handle,
3262     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3263 {
3264 	ldc_chan_t *ldcp;
3265 
3266 	if (handle == 0) {
3267 		DWARN(DBG_ALL_LDCS,
3268 		    "ldc_reg_callback: invalid channel handle\n");
3269 		return (EINVAL);
3270 	}
3271 	if (((uint64_t)cb) < KERNELBASE) {
3272 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3273 		return (EINVAL);
3274 	}
3275 	ldcp = (ldc_chan_t *)handle;
3276 
3277 	mutex_enter(&ldcp->lock);
3278 
3279 	if (ldcp->cb) {
3280 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3281 		    ldcp->id);
3282 		mutex_exit(&ldcp->lock);
3283 		return (EIO);
3284 	}
3285 	if (ldcp->cb_inprogress) {
3286 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3287 		    ldcp->id);
3288 		mutex_exit(&ldcp->lock);
3289 		return (EWOULDBLOCK);
3290 	}
3291 
3292 	ldcp->cb = cb;
3293 	ldcp->cb_arg = arg;
3294 	ldcp->cb_enabled = B_TRUE;
3295 
3296 	D1(ldcp->id,
3297 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3298 	    ldcp->id);
3299 
3300 	mutex_exit(&ldcp->lock);
3301 
3302 	return (0);
3303 }
3304 
3305 /*
3306  * Unregister channel callback
3307  */
3308 int
3309 ldc_unreg_callback(ldc_handle_t handle)
3310 {
3311 	ldc_chan_t *ldcp;
3312 
3313 	if (handle == 0) {
3314 		DWARN(DBG_ALL_LDCS,
3315 		    "ldc_unreg_callback: invalid channel handle\n");
3316 		return (EINVAL);
3317 	}
3318 	ldcp = (ldc_chan_t *)handle;
3319 
3320 	mutex_enter(&ldcp->lock);
3321 
3322 	if (ldcp->cb == NULL) {
3323 		DWARN(ldcp->id,
3324 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3325 		    ldcp->id);
3326 		mutex_exit(&ldcp->lock);
3327 		return (EIO);
3328 	}
3329 	if (ldcp->cb_inprogress) {
3330 		DWARN(ldcp->id,
3331 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3332 		    ldcp->id);
3333 		mutex_exit(&ldcp->lock);
3334 		return (EWOULDBLOCK);
3335 	}
3336 
3337 	ldcp->cb = NULL;
3338 	ldcp->cb_arg = NULL;
3339 	ldcp->cb_enabled = B_FALSE;
3340 
3341 	D1(ldcp->id,
3342 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3343 	    ldcp->id);
3344 
3345 	mutex_exit(&ldcp->lock);
3346 
3347 	return (0);
3348 }
3349 
3350 
3351 /*
3352  * Bring a channel up by initiating a handshake with the peer
3353  * This call is asynchronous. It will complete at a later point
3354  * in time when the peer responds back with an RTR.
3355  */
3356 int
3357 ldc_up(ldc_handle_t handle)
3358 {
3359 	int		rv;
3360 	ldc_chan_t	*ldcp;
3361 	ldc_msg_t	*ldcmsg;
3362 	uint64_t	tx_tail, tstate, link_state;
3363 
3364 	if (handle == 0) {
3365 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3366 		return (EINVAL);
3367 	}
3368 	ldcp = (ldc_chan_t *)handle;
3369 
3370 	mutex_enter(&ldcp->lock);
3371 
3372 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3373 
3374 	/* clear the reset state */
3375 	tstate = ldcp->tstate;
3376 	ldcp->tstate &= ~TS_IN_RESET;
3377 
3378 	if (ldcp->tstate == TS_UP) {
3379 		DWARN(ldcp->id,
3380 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3381 		    ldcp->id);
3382 
3383 		/* mark channel as up */
3384 		ldcp->status = LDC_UP;
3385 
3386 		/*
3387 		 * if channel was in reset state and there was
3388 		 * pending data clear interrupt state. this will
3389 		 * trigger an interrupt, causing the RX handler to
3390 		 * to invoke the client's callback
3391 		 */
3392 		if ((tstate & TS_IN_RESET) &&
3393 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3394 			D1(ldcp->id,
3395 			    "ldc_up: (0x%llx) channel has pending data, "
3396 			    "clearing interrupt\n", ldcp->id);
3397 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3398 		}
3399 
3400 		mutex_exit(&ldcp->lock);
3401 		return (0);
3402 	}
3403 
3404 	/* if the channel is in RAW mode - mark it as UP, if READY */
3405 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3406 		ldcp->tstate = TS_UP;
3407 		mutex_exit(&ldcp->lock);
3408 		return (0);
3409 	}
3410 
3411 	/* Don't start another handshake if there is one in progress */
3412 	if (ldcp->hstate) {
3413 		D1(ldcp->id,
3414 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3415 		    ldcp->id);
3416 		mutex_exit(&ldcp->lock);
3417 		return (0);
3418 	}
3419 
3420 	mutex_enter(&ldcp->tx_lock);
3421 
3422 	/* save current link state */
3423 	link_state = ldcp->link_state;
3424 
3425 	/* get the current tail for the LDC msg */
3426 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3427 	if (rv) {
3428 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3429 		    ldcp->id);
3430 		mutex_exit(&ldcp->tx_lock);
3431 		mutex_exit(&ldcp->lock);
3432 		return (ECONNREFUSED);
3433 	}
3434 
3435 	/*
3436 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3437 	 * from a previous state of DOWN, then mark the channel as
3438 	 * being ready for handshake.
3439 	 */
3440 	if ((link_state == LDC_CHANNEL_DOWN) &&
3441 	    (link_state != ldcp->link_state)) {
3442 
3443 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3444 		    (ldcp->link_state == LDC_CHANNEL_UP));
3445 
3446 		if (ldcp->mode == LDC_MODE_RAW) {
3447 			ldcp->status = LDC_UP;
3448 			ldcp->tstate = TS_UP;
3449 			mutex_exit(&ldcp->tx_lock);
3450 			mutex_exit(&ldcp->lock);
3451 			return (0);
3452 		} else {
3453 			ldcp->status = LDC_READY;
3454 			ldcp->tstate |= TS_LINK_READY;
3455 		}
3456 
3457 	}
3458 
3459 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3460 	ZERO_PKT(ldcmsg);
3461 
3462 	ldcmsg->type = LDC_CTRL;
3463 	ldcmsg->stype = LDC_INFO;
3464 	ldcmsg->ctrl = LDC_VER;
3465 	ldcp->next_vidx = 0;
3466 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3467 
3468 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3469 
3470 	/* initiate the send by calling into HV and set the new tail */
3471 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3472 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3473 
3474 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3475 	if (rv) {
3476 		DWARN(ldcp->id,
3477 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3478 		    ldcp->id, rv);
3479 		mutex_exit(&ldcp->tx_lock);
3480 		mutex_exit(&ldcp->lock);
3481 		return (rv);
3482 	}
3483 
3484 	ldcp->hstate |= TS_SENT_VER;
3485 	ldcp->tx_tail = tx_tail;
3486 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3487 
3488 	mutex_exit(&ldcp->tx_lock);
3489 	mutex_exit(&ldcp->lock);
3490 
3491 	return (rv);
3492 }
3493 
3494 
3495 /*
3496  * Bring a channel down by resetting its state and queues
3497  */
3498 int
3499 ldc_down(ldc_handle_t handle)
3500 {
3501 	ldc_chan_t	*ldcp;
3502 
3503 	if (handle == 0) {
3504 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3505 		return (EINVAL);
3506 	}
3507 	ldcp = (ldc_chan_t *)handle;
3508 	mutex_enter(&ldcp->lock);
3509 	mutex_enter(&ldcp->tx_lock);
3510 	i_ldc_reset(ldcp, B_TRUE);
3511 	mutex_exit(&ldcp->tx_lock);
3512 	mutex_exit(&ldcp->lock);
3513 
3514 	return (0);
3515 }
3516 
3517 /*
3518  * Get the current channel status
3519  */
3520 int
3521 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3522 {
3523 	ldc_chan_t *ldcp;
3524 
3525 	if (handle == 0 || status == NULL) {
3526 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3527 		return (EINVAL);
3528 	}
3529 	ldcp = (ldc_chan_t *)handle;
3530 
3531 	*status = ((ldc_chan_t *)handle)->status;
3532 
3533 	D1(ldcp->id,
3534 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3535 	return (0);
3536 }
3537 
3538 
3539 /*
3540  * Set the channel's callback mode - enable/disable callbacks
3541  */
3542 int
3543 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3544 {
3545 	ldc_chan_t	*ldcp;
3546 
3547 	if (handle == 0) {
3548 		DWARN(DBG_ALL_LDCS,
3549 		    "ldc_set_intr_mode: invalid channel handle\n");
3550 		return (EINVAL);
3551 	}
3552 	ldcp = (ldc_chan_t *)handle;
3553 
3554 	/*
3555 	 * Record no callbacks should be invoked
3556 	 */
3557 	mutex_enter(&ldcp->lock);
3558 
3559 	switch (cmode) {
3560 	case LDC_CB_DISABLE:
3561 		if (!ldcp->cb_enabled) {
3562 			DWARN(ldcp->id,
3563 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3564 			    ldcp->id);
3565 			break;
3566 		}
3567 		ldcp->cb_enabled = B_FALSE;
3568 
3569 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3570 		    ldcp->id);
3571 		break;
3572 
3573 	case LDC_CB_ENABLE:
3574 		if (ldcp->cb_enabled) {
3575 			DWARN(ldcp->id,
3576 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3577 			    ldcp->id);
3578 			break;
3579 		}
3580 		ldcp->cb_enabled = B_TRUE;
3581 
3582 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3583 		    ldcp->id);
3584 		break;
3585 	}
3586 
3587 	mutex_exit(&ldcp->lock);
3588 
3589 	return (0);
3590 }
3591 
3592 /*
3593  * Check to see if there are packets on the incoming queue
3594  * Will return hasdata = B_FALSE if there are no packets
3595  */
3596 int
3597 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3598 {
3599 	int		rv;
3600 	uint64_t	rx_head, rx_tail;
3601 	ldc_chan_t	*ldcp;
3602 
3603 	if (handle == 0) {
3604 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3605 		return (EINVAL);
3606 	}
3607 	ldcp = (ldc_chan_t *)handle;
3608 
3609 	*hasdata = B_FALSE;
3610 
3611 	mutex_enter(&ldcp->lock);
3612 
3613 	if (ldcp->tstate != TS_UP) {
3614 		D1(ldcp->id,
3615 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3616 		mutex_exit(&ldcp->lock);
3617 		return (ECONNRESET);
3618 	}
3619 
3620 	/* Read packet(s) from the queue */
3621 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3622 	    &ldcp->link_state);
3623 	if (rv != 0) {
3624 		cmn_err(CE_WARN,
3625 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3626 		mutex_exit(&ldcp->lock);
3627 		return (EIO);
3628 	}
3629 
3630 	/* reset the channel state if the channel went down */
3631 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3632 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3633 		mutex_enter(&ldcp->tx_lock);
3634 		i_ldc_reset(ldcp, B_FALSE);
3635 		mutex_exit(&ldcp->tx_lock);
3636 		mutex_exit(&ldcp->lock);
3637 		return (ECONNRESET);
3638 	}
3639 
3640 	switch (ldcp->mode) {
3641 	case LDC_MODE_RAW:
3642 		/*
3643 		 * In raw mode, there are no ctrl packets, so checking
3644 		 * if the queue is non-empty is sufficient.
3645 		 */
3646 		*hasdata = (rx_head != rx_tail);
3647 		break;
3648 
3649 	case LDC_MODE_UNRELIABLE:
3650 		/*
3651 		 * In unreliable mode, if the queue is non-empty, we need
3652 		 * to check if it actually contains unread data packets.
3653 		 * The queue may just contain ctrl packets.
3654 		 */
3655 		if (rx_head != rx_tail) {
3656 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3657 			/*
3658 			 * If no data packets were found on the queue,
3659 			 * all packets must have been control packets
3660 			 * which will now have been processed, leaving
3661 			 * the queue empty. If the interrupt state
3662 			 * is pending, we need to clear the interrupt
3663 			 * here.
3664 			 */
3665 			if (*hasdata == B_FALSE &&
3666 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3667 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3668 			}
3669 		}
3670 		break;
3671 
3672 	case LDC_MODE_RELIABLE:
3673 		/*
3674 		 * In reliable mode, first check for 'stream_remains' > 0.
3675 		 * Otherwise, if the data queue head and tail pointers
3676 		 * differ, there must be data to read.
3677 		 */
3678 		if (ldcp->stream_remains > 0)
3679 			*hasdata = B_TRUE;
3680 		else
3681 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3682 		break;
3683 
3684 	default:
3685 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3686 		    "(0x%x)", ldcp->id, ldcp->mode);
3687 		mutex_exit(&ldcp->lock);
3688 		return (EIO);
3689 	}
3690 
3691 	mutex_exit(&ldcp->lock);
3692 
3693 	return (0);
3694 }
3695 
3696 
3697 /*
3698  * Read 'size' amount of bytes or less. If incoming buffer
3699  * is more than 'size', ENOBUFS is returned.
3700  *
3701  * On return, size contains the number of bytes read.
3702  */
3703 int
3704 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3705 {
3706 	ldc_chan_t	*ldcp;
3707 	uint64_t	rx_head = 0, rx_tail = 0;
3708 	int		rv = 0, exit_val;
3709 
3710 	if (handle == 0) {
3711 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3712 		return (EINVAL);
3713 	}
3714 
3715 	ldcp = (ldc_chan_t *)handle;
3716 
3717 	/* channel lock */
3718 	mutex_enter(&ldcp->lock);
3719 
3720 	if (ldcp->tstate != TS_UP) {
3721 		DWARN(ldcp->id,
3722 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3723 		    ldcp->id);
3724 		exit_val = ECONNRESET;
3725 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3726 		TRACE_RXDQ_LENGTH(ldcp);
3727 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3728 
3729 		/*
3730 		 * For reliable mode channels, the interrupt
3731 		 * state is only set to pending during
3732 		 * interrupt handling when the secondary data
3733 		 * queue became full, leaving unprocessed
3734 		 * packets on the Rx queue. If the interrupt
3735 		 * state is pending and space is now available
3736 		 * on the data queue, clear the interrupt.
3737 		 */
3738 		if (ldcp->rx_intr_state == LDC_INTR_PEND &&
3739 		    Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
3740 		    ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >=
3741 		    LDC_PACKET_SIZE) {
3742 			/* data queue is not full */
3743 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3744 		}
3745 
3746 		mutex_exit(&ldcp->lock);
3747 		return (exit_val);
3748 	} else {
3749 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3750 	}
3751 
3752 	/*
3753 	 * if queue has been drained - clear interrupt
3754 	 */
3755 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3756 	    &ldcp->link_state);
3757 	if (rv != 0) {
3758 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3759 		    ldcp->id);
3760 		mutex_enter(&ldcp->tx_lock);
3761 		i_ldc_reset(ldcp, B_TRUE);
3762 		mutex_exit(&ldcp->tx_lock);
3763 		mutex_exit(&ldcp->lock);
3764 		return (ECONNRESET);
3765 	}
3766 
3767 	if (exit_val == 0) {
3768 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3769 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3770 			mutex_enter(&ldcp->tx_lock);
3771 			i_ldc_reset(ldcp, B_FALSE);
3772 			exit_val = ECONNRESET;
3773 			mutex_exit(&ldcp->tx_lock);
3774 		}
3775 		if ((rv == 0) &&
3776 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3777 		    (rx_head == rx_tail)) {
3778 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3779 		}
3780 	}
3781 
3782 	mutex_exit(&ldcp->lock);
3783 	return (exit_val);
3784 }
3785 
3786 /*
3787  * Basic raw mondo read -
3788  * no interpretation of mondo contents at all.
3789  *
3790  * Enter and exit with ldcp->lock held by caller
3791  */
3792 static int
3793 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3794 {
3795 	uint64_t	q_size_mask;
3796 	ldc_msg_t	*msgp;
3797 	uint8_t		*msgbufp;
3798 	int		rv = 0, space;
3799 	uint64_t	rx_head, rx_tail;
3800 
3801 	space = *sizep;
3802 
3803 	if (space < LDC_PAYLOAD_SIZE_RAW)
3804 		return (ENOBUFS);
3805 
3806 	ASSERT(mutex_owned(&ldcp->lock));
3807 
3808 	/* compute mask for increment */
3809 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3810 
3811 	/*
3812 	 * Read packet(s) from the queue
3813 	 */
3814 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3815 	    &ldcp->link_state);
3816 	if (rv != 0) {
3817 		cmn_err(CE_WARN,
3818 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3819 		    ldcp->id);
3820 		return (EIO);
3821 	}
3822 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3823 	    " rxt=0x%llx, st=0x%llx\n",
3824 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3825 
3826 	/* reset the channel state if the channel went down */
3827 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3828 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3829 		mutex_enter(&ldcp->tx_lock);
3830 		i_ldc_reset(ldcp, B_FALSE);
3831 		mutex_exit(&ldcp->tx_lock);
3832 		return (ECONNRESET);
3833 	}
3834 
3835 	/*
3836 	 * Check for empty queue
3837 	 */
3838 	if (rx_head == rx_tail) {
3839 		*sizep = 0;
3840 		return (0);
3841 	}
3842 
3843 	/* get the message */
3844 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3845 
3846 	/* if channel is in RAW mode, copy data and return */
3847 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3848 
3849 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3850 
3851 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3852 
3853 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3854 
3855 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3856 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3857 
3858 	return (rv);
3859 }
3860 
3861 /*
3862  * Process LDC mondos to build larger packets
3863  * with either un-reliable or reliable delivery.
3864  *
3865  * Enter and exit with ldcp->lock held by caller
3866  */
3867 static int
3868 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3869 {
3870 	int		rv = 0;
3871 	uint64_t	rx_head = 0, rx_tail = 0;
3872 	uint64_t	curr_head = 0;
3873 	ldc_msg_t	*msg;
3874 	caddr_t		target;
3875 	size_t		len = 0, bytes_read = 0;
3876 	int		retries = 0;
3877 	uint64_t	q_va, q_size_mask;
3878 	uint64_t	first_fragment = 0;
3879 
3880 	target = target_bufp;
3881 
3882 	ASSERT(mutex_owned(&ldcp->lock));
3883 
3884 	/* check if the buffer and size are valid */
3885 	if (target_bufp == NULL || *sizep == 0) {
3886 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3887 		    ldcp->id);
3888 		return (EINVAL);
3889 	}
3890 
3891 	/* Set q_va and compute increment mask for the appropriate queue */
3892 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3893 		q_va	    = ldcp->rx_dq_va;
3894 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3895 	} else {
3896 		q_va	    = ldcp->rx_q_va;
3897 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3898 	}
3899 
3900 	/*
3901 	 * Read packet(s) from the queue
3902 	 */
3903 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3904 	    &ldcp->link_state);
3905 	if (rv != 0) {
3906 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3907 		    ldcp->id);
3908 		mutex_enter(&ldcp->tx_lock);
3909 		i_ldc_reset(ldcp, B_TRUE);
3910 		mutex_exit(&ldcp->tx_lock);
3911 		return (ECONNRESET);
3912 	}
3913 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3914 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3915 
3916 	/* reset the channel state if the channel went down */
3917 	if (ldcp->link_state != LDC_CHANNEL_UP)
3918 		goto channel_is_reset;
3919 
3920 	for (;;) {
3921 
3922 		if (curr_head == rx_tail) {
3923 			/*
3924 			 * If a data queue is being used, check the Rx HV
3925 			 * queue. This will copy over any new data packets
3926 			 * that have arrived.
3927 			 */
3928 			if (ldcp->mode == LDC_MODE_RELIABLE)
3929 				(void) i_ldc_chkq(ldcp);
3930 
3931 			rv = ldcp->readq_get_state(ldcp,
3932 			    &rx_head, &rx_tail, &ldcp->link_state);
3933 			if (rv != 0) {
3934 				cmn_err(CE_WARN,
3935 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3936 				    ldcp->id);
3937 				mutex_enter(&ldcp->tx_lock);
3938 				i_ldc_reset(ldcp, B_TRUE);
3939 				mutex_exit(&ldcp->tx_lock);
3940 				return (ECONNRESET);
3941 			}
3942 
3943 			if (ldcp->link_state != LDC_CHANNEL_UP)
3944 				goto channel_is_reset;
3945 
3946 			if (curr_head == rx_tail) {
3947 
3948 				/* If in the middle of a fragmented xfer */
3949 				if (first_fragment != 0) {
3950 
3951 					/* wait for ldc_delay usecs */
3952 					drv_usecwait(ldc_delay);
3953 
3954 					if (++retries < ldc_max_retries)
3955 						continue;
3956 
3957 					*sizep = 0;
3958 					if (ldcp->mode != LDC_MODE_RELIABLE)
3959 						ldcp->last_msg_rcd =
3960 						    first_fragment - 1;
3961 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3962 					    "(0x%llx) read timeout", ldcp->id);
3963 					return (EAGAIN);
3964 				}
3965 				*sizep = 0;
3966 				break;
3967 			}
3968 		}
3969 		retries = 0;
3970 
3971 		D2(ldcp->id,
3972 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3973 		    ldcp->id, curr_head, rx_head, rx_tail);
3974 
3975 		/* get the message */
3976 		msg = (ldc_msg_t *)(q_va + curr_head);
3977 
3978 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3979 		    ldcp->rx_q_va + curr_head);
3980 
3981 		/* Check the message ID for the message received */
3982 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3983 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3984 
3985 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3986 				    "error, q_ptrs=0x%lx,0x%lx",
3987 				    ldcp->id, rx_head, rx_tail);
3988 
3989 				/* throw away data */
3990 				bytes_read = 0;
3991 
3992 				/* Reset last_msg_rcd to start of message */
3993 				if (first_fragment != 0) {
3994 					ldcp->last_msg_rcd = first_fragment - 1;
3995 					first_fragment = 0;
3996 				}
3997 				/*
3998 				 * Send a NACK -- invalid seqid
3999 				 * get the current tail for the response
4000 				 */
4001 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
4002 				    (msg->ctrl & LDC_CTRL_MASK));
4003 				if (rv) {
4004 					cmn_err(CE_NOTE,
4005 					    "ldc_read: (0x%lx) err sending "
4006 					    "NACK msg\n", ldcp->id);
4007 
4008 					/* if cannot send NACK - reset chan */
4009 					mutex_enter(&ldcp->tx_lock);
4010 					i_ldc_reset(ldcp, B_FALSE);
4011 					mutex_exit(&ldcp->tx_lock);
4012 					rv = ECONNRESET;
4013 					break;
4014 				}
4015 
4016 				/* purge receive queue */
4017 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
4018 
4019 				break;
4020 			}
4021 
4022 			/*
4023 			 * Process any messages of type CTRL messages
4024 			 * Future implementations should try to pass these
4025 			 * to LDC link by resetting the intr state.
4026 			 *
4027 			 * NOTE: not done as a switch() as type can be
4028 			 * both ctrl+data
4029 			 */
4030 			if (msg->type & LDC_CTRL) {
4031 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
4032 					if (rv == EAGAIN)
4033 						continue;
4034 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
4035 					*sizep = 0;
4036 					bytes_read = 0;
4037 					break;
4038 				}
4039 			}
4040 
4041 			/* process data ACKs */
4042 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
4043 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
4044 					*sizep = 0;
4045 					bytes_read = 0;
4046 					break;
4047 				}
4048 			}
4049 
4050 			/* process data NACKs */
4051 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
4052 				DWARN(ldcp->id,
4053 				    "ldc_read: (0x%llx) received DATA/NACK",
4054 				    ldcp->id);
4055 				mutex_enter(&ldcp->tx_lock);
4056 				i_ldc_reset(ldcp, B_TRUE);
4057 				mutex_exit(&ldcp->tx_lock);
4058 				return (ECONNRESET);
4059 			}
4060 		}
4061 
4062 		/* process data messages */
4063 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
4064 
4065 			uint8_t *msgbuf = (uint8_t *)(
4066 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
4067 			    msg->rdata : msg->udata);
4068 
4069 			D2(ldcp->id,
4070 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
4071 
4072 			/* get the packet length */
4073 			len = (msg->env & LDC_LEN_MASK);
4074 
4075 				/*
4076 				 * FUTURE OPTIMIZATION:
4077 				 * dont need to set q head for every
4078 				 * packet we read just need to do this when
4079 				 * we are done or need to wait for more
4080 				 * mondos to make a full packet - this is
4081 				 * currently expensive.
4082 				 */
4083 
4084 			if (first_fragment == 0) {
4085 
4086 				/*
4087 				 * first packets should always have the start
4088 				 * bit set (even for a single packet). If not
4089 				 * throw away the packet
4090 				 */
4091 				if (!(msg->env & LDC_FRAG_START)) {
4092 
4093 					DWARN(DBG_ALL_LDCS,
4094 					    "ldc_read: (0x%llx) not start - "
4095 					    "frag=%x\n", ldcp->id,
4096 					    (msg->env) & LDC_FRAG_MASK);
4097 
4098 					/* toss pkt, inc head, cont reading */
4099 					bytes_read = 0;
4100 					target = target_bufp;
4101 					curr_head =
4102 					    (curr_head + LDC_PACKET_SIZE)
4103 					    & q_size_mask;
4104 					if (rv = ldcp->readq_set_head(ldcp,
4105 					    curr_head))
4106 						break;
4107 
4108 					continue;
4109 				}
4110 
4111 				first_fragment = msg->seqid;
4112 			} else {
4113 				/* check to see if this is a pkt w/ START bit */
4114 				if (msg->env & LDC_FRAG_START) {
4115 					DWARN(DBG_ALL_LDCS,
4116 					    "ldc_read:(0x%llx) unexpected pkt"
4117 					    " env=0x%x discarding %d bytes,"
4118 					    " lastmsg=%d, currentmsg=%d\n",
4119 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4120 					    bytes_read, ldcp->last_msg_rcd,
4121 					    msg->seqid);
4122 
4123 					/* throw data we have read so far */
4124 					bytes_read = 0;
4125 					target = target_bufp;
4126 					first_fragment = msg->seqid;
4127 
4128 					if (rv = ldcp->readq_set_head(ldcp,
4129 					    curr_head))
4130 						break;
4131 				}
4132 			}
4133 
4134 			/* copy (next) pkt into buffer */
4135 			if (len <= (*sizep - bytes_read)) {
4136 				bcopy(msgbuf, target, len);
4137 				target += len;
4138 				bytes_read += len;
4139 			} else {
4140 				/*
4141 				 * there is not enough space in the buffer to
4142 				 * read this pkt. throw message away & continue
4143 				 * reading data from queue
4144 				 */
4145 				DWARN(DBG_ALL_LDCS,
4146 				    "ldc_read: (0x%llx) buffer too small, "
4147 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4148 				    curr_head, *sizep, bytes_read+len);
4149 
4150 				first_fragment = 0;
4151 				target = target_bufp;
4152 				bytes_read = 0;
4153 
4154 				/* throw away everything received so far */
4155 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4156 					break;
4157 
4158 				/* continue reading remaining pkts */
4159 				continue;
4160 			}
4161 		}
4162 
4163 		/* set the message id */
4164 		if (ldcp->mode != LDC_MODE_RELIABLE)
4165 			ldcp->last_msg_rcd = msg->seqid;
4166 
4167 		/* move the head one position */
4168 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4169 
4170 		if (msg->env & LDC_FRAG_STOP) {
4171 
4172 			/*
4173 			 * All pkts that are part of this fragmented transfer
4174 			 * have been read or this was a single pkt read
4175 			 * or there was an error
4176 			 */
4177 
4178 			/* set the queue head */
4179 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4180 				bytes_read = 0;
4181 
4182 			*sizep = bytes_read;
4183 
4184 			break;
4185 		}
4186 
4187 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4188 		if ((msg->type & LDC_CTRL) ||
4189 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4190 
4191 			/* set the queue head */
4192 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4193 				bytes_read = 0;
4194 				break;
4195 			}
4196 
4197 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4198 			    ldcp->id, curr_head);
4199 		}
4200 
4201 	} /* for (;;) */
4202 
4203 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4204 
4205 	return (rv);
4206 
4207 channel_is_reset:
4208 	mutex_enter(&ldcp->tx_lock);
4209 	i_ldc_reset(ldcp, B_FALSE);
4210 	mutex_exit(&ldcp->tx_lock);
4211 	return (ECONNRESET);
4212 }
4213 
4214 /*
4215  * Fetch and buffer incoming packets so we can hand them back as
4216  * a basic byte stream.
4217  *
4218  * Enter and exit with ldcp->lock held by caller
4219  */
4220 static int
4221 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4222 {
4223 	int	rv;
4224 	size_t	size;
4225 
4226 	ASSERT(mutex_owned(&ldcp->lock));
4227 
4228 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4229 	    ldcp->id, *sizep);
4230 
4231 	if (ldcp->stream_remains == 0) {
4232 		size = ldcp->mtu;
4233 		rv = i_ldc_read_packet(ldcp,
4234 		    (caddr_t)ldcp->stream_bufferp, &size);
4235 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4236 		    ldcp->id, size);
4237 
4238 		if (rv != 0)
4239 			return (rv);
4240 
4241 		ldcp->stream_remains = size;
4242 		ldcp->stream_offset = 0;
4243 	}
4244 
4245 	size = MIN(ldcp->stream_remains, *sizep);
4246 
4247 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4248 	ldcp->stream_offset += size;
4249 	ldcp->stream_remains -= size;
4250 
4251 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4252 	    ldcp->id, size);
4253 
4254 	*sizep = size;
4255 	return (0);
4256 }
4257 
4258 /*
4259  * Write specified amount of bytes to the channel
4260  * in multiple pkts of pkt_payload size. Each
4261  * packet is tagged with an unique packet ID in
4262  * the case of a reliable link.
4263  *
4264  * On return, size contains the number of bytes written.
4265  */
4266 int
4267 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4268 {
4269 	ldc_chan_t	*ldcp;
4270 	int		rv = 0;
4271 
4272 	if (handle == 0) {
4273 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4274 		return (EINVAL);
4275 	}
4276 	ldcp = (ldc_chan_t *)handle;
4277 
4278 	mutex_enter(&ldcp->tx_lock);
4279 
4280 	/* check if non-zero data to write */
4281 	if (buf == NULL || sizep == NULL) {
4282 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4283 		    ldcp->id);
4284 		mutex_exit(&ldcp->tx_lock);
4285 		return (EINVAL);
4286 	}
4287 
4288 	if (*sizep == 0) {
4289 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4290 		    ldcp->id);
4291 		mutex_exit(&ldcp->tx_lock);
4292 		return (0);
4293 	}
4294 
4295 	/* Check if channel is UP for data exchange */
4296 	if (ldcp->tstate != TS_UP) {
4297 		DWARN(ldcp->id,
4298 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4299 		    ldcp->id);
4300 		*sizep = 0;
4301 		rv = ECONNRESET;
4302 	} else {
4303 		rv = ldcp->write_p(ldcp, buf, sizep);
4304 	}
4305 
4306 	mutex_exit(&ldcp->tx_lock);
4307 
4308 	return (rv);
4309 }
4310 
4311 /*
4312  * Write a raw packet to the channel
4313  * On return, size contains the number of bytes written.
4314  */
4315 static int
4316 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4317 {
4318 	ldc_msg_t	*ldcmsg;
4319 	uint64_t	tx_head, tx_tail, new_tail;
4320 	int		rv = 0;
4321 	size_t		size;
4322 
4323 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4324 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4325 
4326 	size = *sizep;
4327 
4328 	/*
4329 	 * Check to see if the packet size is less than or
4330 	 * equal to packet size support in raw mode
4331 	 */
4332 	if (size > ldcp->pkt_payload) {
4333 		DWARN(ldcp->id,
4334 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4335 		    ldcp->id, *sizep);
4336 		*sizep = 0;
4337 		return (EMSGSIZE);
4338 	}
4339 
4340 	/* get the qptrs for the tx queue */
4341 	rv = hv_ldc_tx_get_state(ldcp->id,
4342 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4343 	if (rv != 0) {
4344 		cmn_err(CE_WARN,
4345 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4346 		*sizep = 0;
4347 		return (EIO);
4348 	}
4349 
4350 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4351 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4352 		DWARN(ldcp->id,
4353 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4354 
4355 		*sizep = 0;
4356 		if (mutex_tryenter(&ldcp->lock)) {
4357 			i_ldc_reset(ldcp, B_FALSE);
4358 			mutex_exit(&ldcp->lock);
4359 		} else {
4360 			/*
4361 			 * Release Tx lock, and then reacquire channel
4362 			 * and Tx lock in correct order
4363 			 */
4364 			mutex_exit(&ldcp->tx_lock);
4365 			mutex_enter(&ldcp->lock);
4366 			mutex_enter(&ldcp->tx_lock);
4367 			i_ldc_reset(ldcp, B_FALSE);
4368 			mutex_exit(&ldcp->lock);
4369 		}
4370 		return (ECONNRESET);
4371 	}
4372 
4373 	tx_tail = ldcp->tx_tail;
4374 	tx_head = ldcp->tx_head;
4375 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4376 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4377 
4378 	if (new_tail == tx_head) {
4379 		DWARN(DBG_ALL_LDCS,
4380 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4381 		*sizep = 0;
4382 		return (EWOULDBLOCK);
4383 	}
4384 
4385 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4386 	    ldcp->id, size);
4387 
4388 	/* Send the data now */
4389 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4390 
4391 	/* copy the data into pkt */
4392 	bcopy((uint8_t *)buf, ldcmsg, size);
4393 
4394 	/* increment tail */
4395 	tx_tail = new_tail;
4396 
4397 	/*
4398 	 * All packets have been copied into the TX queue
4399 	 * update the tail ptr in the HV
4400 	 */
4401 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4402 	if (rv) {
4403 		if (rv == EWOULDBLOCK) {
4404 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4405 			    ldcp->id);
4406 			*sizep = 0;
4407 			return (EWOULDBLOCK);
4408 		}
4409 
4410 		*sizep = 0;
4411 		if (mutex_tryenter(&ldcp->lock)) {
4412 			i_ldc_reset(ldcp, B_FALSE);
4413 			mutex_exit(&ldcp->lock);
4414 		} else {
4415 			/*
4416 			 * Release Tx lock, and then reacquire channel
4417 			 * and Tx lock in correct order
4418 			 */
4419 			mutex_exit(&ldcp->tx_lock);
4420 			mutex_enter(&ldcp->lock);
4421 			mutex_enter(&ldcp->tx_lock);
4422 			i_ldc_reset(ldcp, B_FALSE);
4423 			mutex_exit(&ldcp->lock);
4424 		}
4425 		return (ECONNRESET);
4426 	}
4427 
4428 	ldcp->tx_tail = tx_tail;
4429 	*sizep = size;
4430 
4431 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4432 
4433 	return (rv);
4434 }
4435 
4436 
4437 /*
4438  * Write specified amount of bytes to the channel
4439  * in multiple pkts of pkt_payload size. Each
4440  * packet is tagged with an unique packet ID in
4441  * the case of a reliable link.
4442  *
4443  * On return, size contains the number of bytes written.
4444  * This function needs to ensure that the write size is < MTU size
4445  */
4446 static int
4447 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4448 {
4449 	ldc_msg_t	*ldcmsg;
4450 	uint64_t	tx_head, tx_tail, new_tail, start;
4451 	uint64_t	txq_size_mask, numavail;
4452 	uint8_t		*msgbuf, *source = (uint8_t *)buf;
4453 	size_t		len, bytes_written = 0, remaining;
4454 	int		rv;
4455 	uint32_t	curr_seqid;
4456 
4457 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4458 
4459 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4460 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4461 
4462 	/* compute mask for increment */
4463 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4464 
4465 	/* get the qptrs for the tx queue */
4466 	rv = hv_ldc_tx_get_state(ldcp->id,
4467 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4468 	if (rv != 0) {
4469 		cmn_err(CE_WARN,
4470 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4471 		*size = 0;
4472 		return (EIO);
4473 	}
4474 
4475 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4476 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4477 		DWARN(ldcp->id,
4478 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4479 		*size = 0;
4480 		if (mutex_tryenter(&ldcp->lock)) {
4481 			i_ldc_reset(ldcp, B_FALSE);
4482 			mutex_exit(&ldcp->lock);
4483 		} else {
4484 			/*
4485 			 * Release Tx lock, and then reacquire channel
4486 			 * and Tx lock in correct order
4487 			 */
4488 			mutex_exit(&ldcp->tx_lock);
4489 			mutex_enter(&ldcp->lock);
4490 			mutex_enter(&ldcp->tx_lock);
4491 			i_ldc_reset(ldcp, B_FALSE);
4492 			mutex_exit(&ldcp->lock);
4493 		}
4494 		return (ECONNRESET);
4495 	}
4496 
4497 	tx_tail = ldcp->tx_tail;
4498 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4499 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4500 
4501 	/*
4502 	 * Check to see if the queue is full. The check is done using
4503 	 * the appropriate head based on the link mode.
4504 	 */
4505 	i_ldc_get_tx_head(ldcp, &tx_head);
4506 
4507 	if (new_tail == tx_head) {
4508 		DWARN(DBG_ALL_LDCS,
4509 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4510 		*size = 0;
4511 		return (EWOULDBLOCK);
4512 	}
4513 
4514 	/*
4515 	 * Make sure that the LDC Tx queue has enough space
4516 	 */
4517 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4518 	    + ldcp->tx_q_entries - 1;
4519 	numavail %= ldcp->tx_q_entries;
4520 
4521 	if (*size > (numavail * ldcp->pkt_payload)) {
4522 		DWARN(DBG_ALL_LDCS,
4523 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4524 		return (EWOULDBLOCK);
4525 	}
4526 
4527 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4528 	    ldcp->id, *size);
4529 
4530 	/* Send the data now */
4531 	bytes_written = 0;
4532 	curr_seqid = ldcp->last_msg_snt;
4533 	start = tx_tail;
4534 
4535 	while (*size > bytes_written) {
4536 
4537 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4538 
4539 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4540 		    ldcmsg->rdata : ldcmsg->udata);
4541 
4542 		ldcmsg->type = LDC_DATA;
4543 		ldcmsg->stype = LDC_INFO;
4544 		ldcmsg->ctrl = 0;
4545 
4546 		remaining = *size - bytes_written;
4547 		len = min(ldcp->pkt_payload, remaining);
4548 		ldcmsg->env = (uint8_t)len;
4549 
4550 		curr_seqid++;
4551 		ldcmsg->seqid = curr_seqid;
4552 
4553 		/* copy the data into pkt */
4554 		bcopy(source, msgbuf, len);
4555 
4556 		source += len;
4557 		bytes_written += len;
4558 
4559 		/* increment tail */
4560 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4561 
4562 		ASSERT(tx_tail != tx_head);
4563 	}
4564 
4565 	/* Set the start and stop bits */
4566 	ldcmsg->env |= LDC_FRAG_STOP;
4567 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4568 	ldcmsg->env |= LDC_FRAG_START;
4569 
4570 	/*
4571 	 * All packets have been copied into the TX queue
4572 	 * update the tail ptr in the HV
4573 	 */
4574 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4575 	if (rv == 0) {
4576 		ldcp->tx_tail = tx_tail;
4577 		ldcp->last_msg_snt = curr_seqid;
4578 		*size = bytes_written;
4579 	} else {
4580 		int rv2;
4581 
4582 		if (rv != EWOULDBLOCK) {
4583 			*size = 0;
4584 			if (mutex_tryenter(&ldcp->lock)) {
4585 				i_ldc_reset(ldcp, B_FALSE);
4586 				mutex_exit(&ldcp->lock);
4587 			} else {
4588 				/*
4589 				 * Release Tx lock, and then reacquire channel
4590 				 * and Tx lock in correct order
4591 				 */
4592 				mutex_exit(&ldcp->tx_lock);
4593 				mutex_enter(&ldcp->lock);
4594 				mutex_enter(&ldcp->tx_lock);
4595 				i_ldc_reset(ldcp, B_FALSE);
4596 				mutex_exit(&ldcp->lock);
4597 			}
4598 			return (ECONNRESET);
4599 		}
4600 
4601 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4602 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4603 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4604 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4605 
4606 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4607 		    &tx_head, &tx_tail, &ldcp->link_state);
4608 
4609 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4610 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4611 		    rv2, tx_head, tx_tail, ldcp->link_state);
4612 
4613 		*size = 0;
4614 	}
4615 
4616 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4617 
4618 	return (rv);
4619 }
4620 
4621 /*
4622  * Write specified amount of bytes to the channel
4623  * in multiple pkts of pkt_payload size. Each
4624  * packet is tagged with an unique packet ID in
4625  * the case of a reliable link.
4626  *
4627  * On return, size contains the number of bytes written.
4628  * This function needs to ensure that the write size is < MTU size
4629  */
4630 static int
4631 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4632 {
4633 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4634 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4635 
4636 	/* Truncate packet to max of MTU size */
4637 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4638 	return (i_ldc_write_packet(ldcp, buf, sizep));
4639 }
4640 
4641 
4642 /*
4643  * Interfaces for channel nexus to register/unregister with LDC module
4644  * The nexus will register functions to be used to register individual
4645  * channels with the nexus and enable interrupts for the channels
4646  */
4647 int
4648 ldc_register(ldc_cnex_t *cinfo)
4649 {
4650 	ldc_chan_t	*ldcp;
4651 
4652 	if (cinfo == NULL || cinfo->dip == NULL ||
4653 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4654 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4655 	    cinfo->clr_intr == NULL) {
4656 
4657 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4658 		return (EINVAL);
4659 	}
4660 
4661 	mutex_enter(&ldcssp->lock);
4662 
4663 	/* nexus registration */
4664 	ldcssp->cinfo.dip = cinfo->dip;
4665 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4666 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4667 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4668 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4669 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4670 
4671 	/* register any channels that might have been previously initialized */
4672 	ldcp = ldcssp->chan_list;
4673 	while (ldcp) {
4674 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4675 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4676 			(void) i_ldc_register_channel(ldcp);
4677 
4678 		ldcp = ldcp->next;
4679 	}
4680 
4681 	mutex_exit(&ldcssp->lock);
4682 
4683 	return (0);
4684 }
4685 
4686 int
4687 ldc_unregister(ldc_cnex_t *cinfo)
4688 {
4689 	if (cinfo == NULL || cinfo->dip == NULL) {
4690 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4691 		return (EINVAL);
4692 	}
4693 
4694 	mutex_enter(&ldcssp->lock);
4695 
4696 	if (cinfo->dip != ldcssp->cinfo.dip) {
4697 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4698 		mutex_exit(&ldcssp->lock);
4699 		return (EINVAL);
4700 	}
4701 
4702 	/* nexus unregister */
4703 	ldcssp->cinfo.dip = NULL;
4704 	ldcssp->cinfo.reg_chan = NULL;
4705 	ldcssp->cinfo.unreg_chan = NULL;
4706 	ldcssp->cinfo.add_intr = NULL;
4707 	ldcssp->cinfo.rem_intr = NULL;
4708 	ldcssp->cinfo.clr_intr = NULL;
4709 
4710 	mutex_exit(&ldcssp->lock);
4711 
4712 	return (0);
4713 }
4714 
4715 int
4716 ldc_info(ldc_handle_t handle, ldc_info_t *info)
4717 {
4718 	ldc_chan_t	*ldcp;
4719 	uint64_t	avail;
4720 
4721 	if (handle == 0 || info == NULL) {
4722 		DWARN(DBG_ALL_LDCS, "ldc_get_info: invalid args\n");
4723 		return (EINVAL);
4724 	}
4725 
4726 	ldcp = (ldc_chan_t *)handle;
4727 
4728 	mutex_enter(&ldcp->lock);
4729 
4730 	/* check to see if channel is initalized */
4731 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4732 		DWARN(ldcp->id,
4733 		    "ldc_get_info: (0x%llx) channel not initialized\n",
4734 		    ldcp->id);
4735 		mutex_exit(&ldcp->lock);
4736 		return (EINVAL);
4737 	}
4738 
4739 	mutex_exit(&ldcp->lock);
4740 
4741 	/*
4742 	 * ldcssp->mapin_size is the max amount of shared memory supported by
4743 	 * the Hypervisor per guest. e.g, legacy HV supports 64MB; latest HV
4744 	 * support 1GB. This size is read during ldc module initialization.
4745 	 *
4746 	 * ldc_dring_direct_map_rsvd is the amount of memory reserved for
4747 	 * mapping in descriptor rings. In the initial implementation, we use a
4748 	 * simple approach to determine the amount of mapin space available per
4749 	 * channel. In future, we may implement strict accounting of the actual
4750 	 * memory consumed to determine the exact amount available per channel.
4751 	 */
4752 	if (ldcssp->mapin_size <= ldc_dring_direct_map_rsvd) {
4753 		info->direct_map_size_max = 0;
4754 		return (0);
4755 	}
4756 
4757 	avail = ldcssp->mapin_size - ldc_dring_direct_map_rsvd;
4758 	if (avail >= ldc_direct_map_size_max) {
4759 		info->direct_map_size_max = ldc_direct_map_size_max;
4760 	} else {
4761 		info->direct_map_size_max = 0;
4762 	}
4763 
4764 	return (0);
4765 }
4766