xref: /illumos-gate/usr/src/uts/sun4v/io/ldc.c (revision c0dd49bdd68c0d758a67d56f07826f3b45cfc664)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * sun4v LDC Link Layer
29  */
30 #include <sys/types.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/cred.h>
35 #include <sys/kmem.h>
36 #include <sys/conf.h>
37 #include <sys/cmn_err.h>
38 #include <sys/ksynch.h>
39 #include <sys/modctl.h>
40 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
41 #include <sys/debug.h>
42 #include <sys/cred.h>
43 #include <sys/promif.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/cyclic.h>
47 #include <sys/machsystm.h>
48 #include <sys/vm.h>
49 #include <sys/cpu.h>
50 #include <sys/intreg.h>
51 #include <sys/machcpuvar.h>
52 #include <sys/mmu.h>
53 #include <sys/pte.h>
54 #include <vm/hat.h>
55 #include <vm/as.h>
56 #include <vm/hat_sfmmu.h>
57 #include <sys/vm_machparam.h>
58 #include <vm/seg_kmem.h>
59 #include <vm/seg_kpm.h>
60 #include <sys/note.h>
61 #include <sys/ivintr.h>
62 #include <sys/hypervisor_api.h>
63 #include <sys/ldc.h>
64 #include <sys/ldc_impl.h>
65 #include <sys/cnex.h>
66 #include <sys/hsvc.h>
67 #include <sys/sdt.h>
68 #include <sys/kldc.h>
69 
70 /* Core internal functions */
71 int i_ldc_h2v_error(int h_error);
72 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
73 
74 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
75 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
76 static void i_ldc_rxq_drain(ldc_chan_t *ldcp);
77 static void i_ldc_reset_state(ldc_chan_t *ldcp);
78 static void i_ldc_debug_enter(void);
79 
80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
85     uint8_t ctrlmsg);
86 
87 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
92     uint64_t *tail, uint64_t *link_state);
93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
94     uint64_t rx_tail);
95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
96 
97 /* Interrupt handling functions */
98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
101     uint64_t *notify_event);
102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
103 
104 /* Read method functions */
105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
109 	size_t *sizep);
110 
111 /* Write method functions */
112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
117 	size_t *sizep);
118 
119 /* Pkt processing internal functions */
120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
127 
128 /* LDC Version */
129 static ldc_ver_t ldc_versions[] = { {1, 0} };
130 
131 /* number of supported versions */
132 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
133 
134 /* Invalid value for the ldc_chan_t rx_ack_head field */
135 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
136 
137 
138 /* Module State Pointer */
139 ldc_soft_state_t *ldcssp;
140 
141 static struct modldrv md = {
142 	&mod_miscops,			/* This is a misc module */
143 	"sun4v LDC module",		/* Name of the module */
144 };
145 
146 static struct modlinkage ml = {
147 	MODREV_1,
148 	&md,
149 	NULL
150 };
151 
152 static uint64_t ldc_sup_minor;		/* Supported minor number */
153 static hsvc_info_t ldc_hsvc = {
154 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 1, "ldc"
155 };
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * The length of the reliable-mode data queue in terms of the LDC
173  * receive queue length. i.e., the number of times larger than the
174  * LDC receive queue that the data queue should be. The HV receive
175  * queue is required to be a power of 2 and this implementation
176  * assumes the data queue will also be a power of 2. By making the
177  * multiplier a power of 2, we ensure the data queue will be a
178  * power of 2. We use a multiplier because the receive queue is
179  * sized to be sane relative to the MTU and the same is needed for
180  * the data queue.
181  */
182 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
183 
184 /*
185  * LDC retry count and delay - when the HV returns EWOULDBLOCK
186  * the operation is retried 'ldc_max_retries' times with a
187  * wait of 'ldc_delay' usecs between each retry.
188  */
189 int ldc_max_retries = LDC_MAX_RETRIES;
190 clock_t ldc_delay = LDC_DELAY;
191 
192 /*
193  * Channels which have a devclass satisfying the following
194  * will be reset when entering the prom or kmdb.
195  *
196  *   LDC_DEVCLASS_PROM_RESET(devclass) != 0
197  *
198  * By default, only block device service channels are reset.
199  */
200 #define	LDC_DEVCLASS_BIT(dc)		(0x1 << (dc))
201 #define	LDC_DEVCLASS_PROM_RESET(dc)	\
202 	(LDC_DEVCLASS_BIT(dc) & ldc_debug_reset_mask)
203 static uint64_t ldc_debug_reset_mask = LDC_DEVCLASS_BIT(LDC_DEV_BLK_SVC) |
204     LDC_DEVCLASS_BIT(LDC_DEV_GENERIC);
205 
206 /*
207  * delay between each retry of channel unregistration in
208  * ldc_close(), to wait for pending interrupts to complete.
209  */
210 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
211 
212 #ifdef DEBUG
213 
214 /*
215  * Print debug messages
216  *
217  * set ldcdbg to 0x7 for enabling all msgs
218  * 0x4 - Warnings
219  * 0x2 - All debug messages
220  * 0x1 - Minimal debug messages
221  *
222  * set ldcdbgchan to the channel number you want to debug
223  * setting it to -1 prints debug messages for all channels
224  * NOTE: ldcdbgchan has no effect on error messages
225  */
226 
227 int ldcdbg = 0x0;
228 int64_t ldcdbgchan = DBG_ALL_LDCS;
229 uint64_t ldc_inject_err_flag = 0;
230 
231 void
232 ldcdebug(int64_t id, const char *fmt, ...)
233 {
234 	char buf[512];
235 	va_list ap;
236 
237 	/*
238 	 * Do not return if,
239 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
240 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
241 	 * debug channel = caller specified channel
242 	 */
243 	if ((id != DBG_ALL_LDCS) &&
244 	    (ldcdbgchan != DBG_ALL_LDCS) &&
245 	    (ldcdbgchan != id)) {
246 		return;
247 	}
248 
249 	va_start(ap, fmt);
250 	(void) vsprintf(buf, fmt, ap);
251 	va_end(ap);
252 
253 	cmn_err(CE_CONT, "?%s", buf);
254 }
255 
256 #define	LDC_ERR_RESET		0x1
257 #define	LDC_ERR_PKTLOSS		0x2
258 #define	LDC_ERR_DQFULL		0x4
259 #define	LDC_ERR_DRNGCLEAR	0x8
260 
261 static boolean_t
262 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
263 {
264 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
265 		return (B_FALSE);
266 
267 	if ((ldc_inject_err_flag & error) == 0)
268 		return (B_FALSE);
269 
270 	/* clear the injection state */
271 	ldc_inject_err_flag &= ~error;
272 
273 	return (B_TRUE);
274 }
275 
276 #define	D1		\
277 if (ldcdbg & 0x01)	\
278 	ldcdebug
279 
280 #define	D2		\
281 if (ldcdbg & 0x02)	\
282 	ldcdebug
283 
284 #define	DWARN		\
285 if (ldcdbg & 0x04)	\
286 	ldcdebug
287 
288 #define	DUMP_PAYLOAD(id, addr)						\
289 {									\
290 	char buf[65*3];							\
291 	int i;								\
292 	uint8_t *src = (uint8_t *)addr;					\
293 	for (i = 0; i < 64; i++, src++)					\
294 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
295 	(void) sprintf(&buf[i * 3], "|\n");				\
296 	D2((id), "payload: %s", buf);					\
297 }
298 
299 #define	DUMP_LDC_PKT(c, s, addr)					\
300 {									\
301 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
302 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
303 	if (msg->type == LDC_DATA) {                                    \
304 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
305 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
306 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
307 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
308 	    (msg->env & LDC_LEN_MASK));					\
309 	} else { 							\
310 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
311 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
312 	} 								\
313 }
314 
315 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
316 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
317 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
318 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
319 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
320 
321 #else
322 
323 #define	DBG_ALL_LDCS -1
324 
325 #define	D1
326 #define	D2
327 #define	DWARN
328 
329 #define	DUMP_PAYLOAD(id, addr)
330 #define	DUMP_LDC_PKT(c, s, addr)
331 
332 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
333 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
334 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
335 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
336 
337 #endif
338 
339 /*
340  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
341  * lengths. Just pass the head, tail, and entries values so that the
342  * length can be calculated in a dtrace script when the probe is enabled.
343  */
344 #define	TRACE_RXDQ_LENGTH(ldcp)						\
345 	DTRACE_PROBE4(rxdq__size,					\
346 	uint64_t, ldcp->id,						\
347 	uint64_t, ldcp->rx_dq_head,					\
348 	uint64_t, ldcp->rx_dq_tail,					\
349 	uint64_t, ldcp->rx_dq_entries)
350 
351 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
352 	DTRACE_PROBE4(rxhvq__size,					\
353 	uint64_t, ldcp->id,						\
354 	uint64_t, head,							\
355 	uint64_t, tail,							\
356 	uint64_t, ldcp->rx_q_entries)
357 
358 /* A dtrace SDT probe to ease tracing of data queue copy operations */
359 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
360 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
361 
362 /* The amount of contiguous space at the tail of the queue */
363 #define	Q_CONTIG_SPACE(head, tail, size)				\
364 	((head) <= (tail) ? ((size) - (tail)) :				\
365 	((head) - (tail) - LDC_PACKET_SIZE))
366 
367 #define	ZERO_PKT(p)			\
368 	bzero((p), sizeof (ldc_msg_t));
369 
370 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
371 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
372 
373 int
374 _init(void)
375 {
376 	int status;
377 	extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
378 
379 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
380 	if (status != 0) {
381 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
382 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
383 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
384 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
385 		return (-1);
386 	}
387 
388 	/* Initialize shared memory HV API version checking */
389 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
390 
391 	/* allocate soft state structure */
392 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
393 
394 	/* Link the module into the system */
395 	status = mod_install(&ml);
396 	if (status != 0) {
397 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
398 		return (status);
399 	}
400 
401 	/* Initialize the LDC state structure */
402 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
403 
404 	mutex_enter(&ldcssp->lock);
405 
406 	/* Create a cache for memory handles */
407 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
408 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
409 	if (ldcssp->memhdl_cache == NULL) {
410 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
411 		mutex_exit(&ldcssp->lock);
412 		return (-1);
413 	}
414 
415 	/* Create cache for memory segment structures */
416 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
417 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
418 	if (ldcssp->memseg_cache == NULL) {
419 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
420 		mutex_exit(&ldcssp->lock);
421 		return (-1);
422 	}
423 
424 
425 	ldcssp->channel_count = 0;
426 	ldcssp->channels_open = 0;
427 	ldcssp->chan_list = NULL;
428 	ldcssp->dring_list = NULL;
429 
430 	/* Register debug_enter callback */
431 	kldc_set_debug_cb(&i_ldc_debug_enter);
432 
433 	mutex_exit(&ldcssp->lock);
434 
435 	return (0);
436 }
437 
438 int
439 _info(struct modinfo *modinfop)
440 {
441 	/* Report status of the dynamically loadable driver module */
442 	return (mod_info(&ml, modinfop));
443 }
444 
445 int
446 _fini(void)
447 {
448 	int 		rv, status;
449 	ldc_chan_t 	*tmp_ldcp, *ldcp;
450 	ldc_dring_t 	*tmp_dringp, *dringp;
451 	ldc_mem_info_t 	minfo;
452 
453 	/* Unlink the driver module from the system */
454 	status = mod_remove(&ml);
455 	if (status) {
456 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
457 		return (EIO);
458 	}
459 
460 	/* Unregister debug_enter callback */
461 	kldc_set_debug_cb(NULL);
462 
463 	/* Free descriptor rings */
464 	dringp = ldcssp->dring_list;
465 	while (dringp != NULL) {
466 		tmp_dringp = dringp->next;
467 
468 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
469 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
470 			if (minfo.status == LDC_BOUND) {
471 				(void) ldc_mem_dring_unbind(
472 				    (ldc_dring_handle_t)dringp);
473 			}
474 			if (minfo.status == LDC_MAPPED) {
475 				(void) ldc_mem_dring_unmap(
476 				    (ldc_dring_handle_t)dringp);
477 			}
478 		}
479 
480 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
481 		dringp = tmp_dringp;
482 	}
483 	ldcssp->dring_list = NULL;
484 
485 	/* close and finalize channels */
486 	ldcp = ldcssp->chan_list;
487 	while (ldcp != NULL) {
488 		tmp_ldcp = ldcp->next;
489 
490 		(void) ldc_close((ldc_handle_t)ldcp);
491 		(void) ldc_fini((ldc_handle_t)ldcp);
492 
493 		ldcp = tmp_ldcp;
494 	}
495 	ldcssp->chan_list = NULL;
496 
497 	/* Destroy kmem caches */
498 	kmem_cache_destroy(ldcssp->memhdl_cache);
499 	kmem_cache_destroy(ldcssp->memseg_cache);
500 
501 	/*
502 	 * We have successfully "removed" the driver.
503 	 * Destroying soft states
504 	 */
505 	mutex_destroy(&ldcssp->lock);
506 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
507 
508 	(void) hsvc_unregister(&ldc_hsvc);
509 
510 	return (status);
511 }
512 
513 /* -------------------------------------------------------------------------- */
514 
515 /*
516  * LDC Link Layer Internal Functions
517  */
518 
519 /*
520  * Translate HV Errors to sun4v error codes
521  */
522 int
523 i_ldc_h2v_error(int h_error)
524 {
525 	switch (h_error) {
526 
527 	case	H_EOK:
528 		return (0);
529 
530 	case	H_ENORADDR:
531 		return (EFAULT);
532 
533 	case	H_EBADPGSZ:
534 	case	H_EINVAL:
535 		return (EINVAL);
536 
537 	case	H_EWOULDBLOCK:
538 		return (EWOULDBLOCK);
539 
540 	case	H_ENOACCESS:
541 	case	H_ENOMAP:
542 		return (EACCES);
543 
544 	case	H_EIO:
545 	case	H_ECPUERROR:
546 		return (EIO);
547 
548 	case	H_ENOTSUPPORTED:
549 		return (ENOTSUP);
550 
551 	case 	H_ETOOMANY:
552 		return (ENOSPC);
553 
554 	case	H_ECHANNEL:
555 		return (ECHRNG);
556 	default:
557 		break;
558 	}
559 
560 	return (EIO);
561 }
562 
563 /*
564  * Reconfigure the transmit queue
565  */
566 static int
567 i_ldc_txq_reconf(ldc_chan_t *ldcp)
568 {
569 	int rv;
570 
571 	ASSERT(MUTEX_HELD(&ldcp->lock));
572 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
573 
574 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
575 	if (rv) {
576 		cmn_err(CE_WARN,
577 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
578 		return (EIO);
579 	}
580 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
581 	    &(ldcp->tx_tail), &(ldcp->link_state));
582 	if (rv) {
583 		cmn_err(CE_WARN,
584 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
585 		return (EIO);
586 	}
587 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
588 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
589 	    ldcp->link_state);
590 
591 	return (0);
592 }
593 
594 /*
595  * Reconfigure the receive queue
596  */
597 static int
598 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
599 {
600 	int rv;
601 	uint64_t rx_head, rx_tail;
602 
603 	ASSERT(MUTEX_HELD(&ldcp->lock));
604 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
605 	    &(ldcp->link_state));
606 	if (rv) {
607 		cmn_err(CE_WARN,
608 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
609 		    ldcp->id);
610 		return (EIO);
611 	}
612 
613 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
614 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
615 		    ldcp->rx_q_entries);
616 		if (rv) {
617 			cmn_err(CE_WARN,
618 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
619 			    ldcp->id);
620 			return (EIO);
621 		}
622 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
623 		    ldcp->id);
624 	}
625 
626 	return (0);
627 }
628 
629 
630 /*
631  * Drain the contents of the receive queue
632  */
633 static void
634 i_ldc_rxq_drain(ldc_chan_t *ldcp)
635 {
636 	int rv;
637 	uint64_t rx_head, rx_tail;
638 	int retries = 0;
639 
640 	ASSERT(MUTEX_HELD(&ldcp->lock));
641 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
642 	    &(ldcp->link_state));
643 	if (rv) {
644 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state, "
645 		    "rv = 0x%x", ldcp->id, rv);
646 		return;
647 	}
648 
649 	/* If the queue is already empty just return success. */
650 	if (rx_head == rx_tail)
651 		return;
652 
653 	/*
654 	 * We are draining the queue in order to close the channel.
655 	 * Call hv_ldc_rx_set_qhead directly instead of i_ldc_set_rx_head
656 	 * because we do not need to reset the channel if the set
657 	 * qhead fails.
658 	 */
659 	if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0)
660 		return;
661 
662 	while ((rv == H_EWOULDBLOCK) && (retries++ < ldc_max_retries)) {
663 		drv_usecwait(ldc_delay);
664 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0)
665 			return;
666 	}
667 
668 	cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot set qhead 0x%lx, "
669 	    "rv = 0x%x", ldcp->id, rx_tail, rv);
670 }
671 
672 
673 /*
674  * Reset LDC state structure and its contents
675  */
676 static void
677 i_ldc_reset_state(ldc_chan_t *ldcp)
678 {
679 	ASSERT(MUTEX_HELD(&ldcp->lock));
680 	ldcp->last_msg_snt = LDC_INIT_SEQID;
681 	ldcp->last_ack_rcd = 0;
682 	ldcp->last_msg_rcd = 0;
683 	ldcp->tx_ackd_head = ldcp->tx_head;
684 	ldcp->stream_remains = 0;
685 	ldcp->next_vidx = 0;
686 	ldcp->hstate = 0;
687 	ldcp->tstate = TS_OPEN;
688 	ldcp->status = LDC_OPEN;
689 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
690 	ldcp->rx_dq_head = 0;
691 	ldcp->rx_dq_tail = 0;
692 
693 	if (ldcp->link_state == LDC_CHANNEL_UP ||
694 	    ldcp->link_state == LDC_CHANNEL_RESET) {
695 
696 		if (ldcp->mode == LDC_MODE_RAW) {
697 			ldcp->status = LDC_UP;
698 			ldcp->tstate = TS_UP;
699 		} else {
700 			ldcp->status = LDC_READY;
701 			ldcp->tstate |= TS_LINK_READY;
702 		}
703 	}
704 }
705 
706 /*
707  * Reset a LDC channel
708  */
709 void
710 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
711 {
712 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
713 
714 	ASSERT(MUTEX_HELD(&ldcp->lock));
715 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
716 
717 	/* reconfig Tx and Rx queues */
718 	(void) i_ldc_txq_reconf(ldcp);
719 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
720 
721 	/* Clear Tx and Rx interrupts */
722 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
723 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
724 
725 	/* Reset channel state */
726 	i_ldc_reset_state(ldcp);
727 
728 	/* Mark channel in reset */
729 	ldcp->tstate |= TS_IN_RESET;
730 }
731 
732 /*
733  * Walk the channel list and reset channels if they are of the right
734  * devclass and their Rx queues have been configured. No locks are
735  * taken because the function is only invoked by the kernel just before
736  * entering the prom or debugger when the system is single-threaded.
737  */
738 static void
739 i_ldc_debug_enter(void)
740 {
741 	ldc_chan_t *ldcp;
742 
743 	ldcp = ldcssp->chan_list;
744 	while (ldcp != NULL) {
745 		if (((ldcp->tstate & TS_QCONF_RDY) == TS_QCONF_RDY) &&
746 		    (LDC_DEVCLASS_PROM_RESET(ldcp->devclass) != 0)) {
747 			(void) hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
748 			    ldcp->rx_q_entries);
749 		}
750 		ldcp = ldcp->next;
751 	}
752 }
753 
754 /*
755  * Clear pending interrupts
756  */
757 static void
758 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
759 {
760 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
761 
762 	ASSERT(MUTEX_HELD(&ldcp->lock));
763 	ASSERT(cinfo->dip != NULL);
764 
765 	switch (itype) {
766 	case CNEX_TX_INTR:
767 		/* check Tx interrupt */
768 		if (ldcp->tx_intr_state)
769 			ldcp->tx_intr_state = LDC_INTR_NONE;
770 		else
771 			return;
772 		break;
773 
774 	case CNEX_RX_INTR:
775 		/* check Rx interrupt */
776 		if (ldcp->rx_intr_state)
777 			ldcp->rx_intr_state = LDC_INTR_NONE;
778 		else
779 			return;
780 		break;
781 	}
782 
783 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
784 	D2(ldcp->id,
785 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
786 	    ldcp->id, itype);
787 }
788 
789 /*
790  * Set the receive queue head
791  * Resets connection and returns an error if it fails.
792  */
793 static int
794 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
795 {
796 	int 	rv;
797 	int 	retries;
798 
799 	ASSERT(MUTEX_HELD(&ldcp->lock));
800 	for (retries = 0; retries < ldc_max_retries; retries++) {
801 
802 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
803 			return (0);
804 
805 		if (rv != H_EWOULDBLOCK)
806 			break;
807 
808 		/* wait for ldc_delay usecs */
809 		drv_usecwait(ldc_delay);
810 	}
811 
812 	cmn_err(CE_WARN, "ldc_set_rx_qhead: (0x%lx) cannot set qhead 0x%lx, "
813 	    "rv = 0x%x", ldcp->id, head, rv);
814 	mutex_enter(&ldcp->tx_lock);
815 	i_ldc_reset(ldcp, B_TRUE);
816 	mutex_exit(&ldcp->tx_lock);
817 
818 	return (ECONNRESET);
819 }
820 
821 /*
822  * Returns the tx_head to be used for transfer
823  */
824 static void
825 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
826 {
827 	ldc_msg_t 	*pkt;
828 
829 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
830 
831 	/* get current Tx head */
832 	*head = ldcp->tx_head;
833 
834 	/*
835 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
836 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
837 	 * up to the current location of tx_head. This needs to be done
838 	 * as the peer will only ACK DATA/INFO pkts.
839 	 */
840 	if (ldcp->mode == LDC_MODE_RELIABLE) {
841 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
842 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
843 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
844 				break;
845 			}
846 			/* advance ACKd head */
847 			ldcp->tx_ackd_head =
848 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
849 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
850 		}
851 		*head = ldcp->tx_ackd_head;
852 	}
853 }
854 
855 /*
856  * Returns the tx_tail to be used for transfer
857  * Re-reads the TX queue ptrs if and only if the
858  * the cached head and tail are equal (queue is full)
859  */
860 static int
861 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
862 {
863 	int 		rv;
864 	uint64_t 	current_head, new_tail;
865 
866 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
867 	/* Read the head and tail ptrs from HV */
868 	rv = hv_ldc_tx_get_state(ldcp->id,
869 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
870 	if (rv) {
871 		cmn_err(CE_WARN,
872 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
873 		    ldcp->id);
874 		return (EIO);
875 	}
876 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
877 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
878 		    ldcp->id);
879 		return (ECONNRESET);
880 	}
881 
882 	i_ldc_get_tx_head(ldcp, &current_head);
883 
884 	/* increment the tail */
885 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
886 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
887 
888 	if (new_tail == current_head) {
889 		DWARN(ldcp->id,
890 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
891 		    ldcp->id);
892 		return (EWOULDBLOCK);
893 	}
894 
895 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
896 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
897 
898 	*tail = ldcp->tx_tail;
899 	return (0);
900 }
901 
902 /*
903  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
904  * and retry ldc_max_retries times before returning an error.
905  * Returns 0, EWOULDBLOCK or EIO
906  */
907 static int
908 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
909 {
910 	int		rv, retval = EWOULDBLOCK;
911 	int 		retries;
912 
913 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
914 	for (retries = 0; retries < ldc_max_retries; retries++) {
915 
916 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
917 			retval = 0;
918 			break;
919 		}
920 		if (rv != H_EWOULDBLOCK) {
921 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
922 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
923 			retval = EIO;
924 			break;
925 		}
926 
927 		/* wait for ldc_delay usecs */
928 		drv_usecwait(ldc_delay);
929 	}
930 	return (retval);
931 }
932 
933 /*
934  * Copy a data packet from the HV receive queue to the data queue.
935  * Caller must ensure that the data queue is not already full.
936  *
937  * The *head argument represents the current head pointer for the HV
938  * receive queue. After copying a packet from the HV receive queue,
939  * the *head pointer will be updated. This allows the caller to update
940  * the head pointer in HV using the returned *head value.
941  */
942 void
943 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
944 {
945 	uint64_t	q_size, dq_size;
946 
947 	ASSERT(MUTEX_HELD(&ldcp->lock));
948 
949 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
950 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
951 
952 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
953 	    dq_size) >= LDC_PACKET_SIZE);
954 
955 	bcopy((void *)(ldcp->rx_q_va + *head),
956 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
957 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
958 
959 	/* Update rx head */
960 	*head = (*head + LDC_PACKET_SIZE) % q_size;
961 
962 	/* Update dq tail */
963 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
964 }
965 
966 /*
967  * Update the Rx data queue head pointer
968  */
969 static int
970 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
971 {
972 	ldcp->rx_dq_head = head;
973 	return (0);
974 }
975 
976 /*
977  * Get the Rx data queue head and tail pointers
978  */
979 static uint64_t
980 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
981     uint64_t *link_state)
982 {
983 	_NOTE(ARGUNUSED(link_state))
984 	*head = ldcp->rx_dq_head;
985 	*tail = ldcp->rx_dq_tail;
986 	return (0);
987 }
988 
989 /*
990  * Wrapper for the Rx HV queue set head function. Giving the
991  * data queue and HV queue set head functions the same type.
992  */
993 static uint64_t
994 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
995     uint64_t *link_state)
996 {
997 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
998 	    link_state)));
999 }
1000 
1001 /*
1002  * LDC receive interrupt handler
1003  *    triggered for channel with data pending to read
1004  *    i.e. Rx queue content changes
1005  */
1006 static uint_t
1007 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1008 {
1009 	_NOTE(ARGUNUSED(arg2))
1010 
1011 	ldc_chan_t	*ldcp;
1012 	boolean_t	notify;
1013 	uint64_t	event;
1014 	int		rv, status;
1015 
1016 	/* Get the channel for which interrupt was received */
1017 	if (arg1 == NULL) {
1018 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1019 		return (DDI_INTR_UNCLAIMED);
1020 	}
1021 
1022 	ldcp = (ldc_chan_t *)arg1;
1023 
1024 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1025 	    ldcp->id, ldcp);
1026 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1027 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1028 	    ldcp->link_state);
1029 
1030 	/* Lock channel */
1031 	mutex_enter(&ldcp->lock);
1032 
1033 	/* Mark the interrupt as being actively handled */
1034 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1035 
1036 	status = i_ldc_rx_process_hvq(ldcp, &notify, &event);
1037 
1038 	if (ldcp->mode != LDC_MODE_RELIABLE) {
1039 		/*
1040 		 * If there are no data packets on the queue, clear
1041 		 * the interrupt. Otherwise, the ldc_read will clear
1042 		 * interrupts after draining the queue. To indicate the
1043 		 * interrupt has not yet been cleared, it is marked
1044 		 * as pending.
1045 		 */
1046 		if ((event & LDC_EVT_READ) == 0) {
1047 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1048 		} else {
1049 			ldcp->rx_intr_state = LDC_INTR_PEND;
1050 		}
1051 	}
1052 
1053 	/* if callbacks are disabled, do not notify */
1054 	if (notify && ldcp->cb_enabled) {
1055 		ldcp->cb_inprogress = B_TRUE;
1056 		mutex_exit(&ldcp->lock);
1057 		rv = ldcp->cb(event, ldcp->cb_arg);
1058 		if (rv) {
1059 			DWARN(ldcp->id,
1060 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1061 			    ldcp->id);
1062 		}
1063 		mutex_enter(&ldcp->lock);
1064 		ldcp->cb_inprogress = B_FALSE;
1065 	}
1066 
1067 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1068 		if (status == ENOSPC) {
1069 			/*
1070 			 * Here, ENOSPC indicates the secondary data
1071 			 * queue is full and the Rx queue is non-empty.
1072 			 * Much like how reliable and raw modes are
1073 			 * handled above, since the Rx queue is non-
1074 			 * empty, we mark the interrupt as pending to
1075 			 * indicate it has not yet been cleared.
1076 			 */
1077 			ldcp->rx_intr_state = LDC_INTR_PEND;
1078 		} else {
1079 			/*
1080 			 * We have processed all CTRL packets and
1081 			 * copied all DATA packets to the secondary
1082 			 * queue. Clear the interrupt.
1083 			 */
1084 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1085 		}
1086 	}
1087 
1088 	mutex_exit(&ldcp->lock);
1089 
1090 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1091 
1092 	return (DDI_INTR_CLAIMED);
1093 }
1094 
1095 /*
1096  * Wrapper for the Rx HV queue processing function to be used when
1097  * checking the Rx HV queue for data packets. Unlike the interrupt
1098  * handler code flow, the Rx interrupt is not cleared here and
1099  * callbacks are not made.
1100  */
1101 static uint_t
1102 i_ldc_chkq(ldc_chan_t *ldcp)
1103 {
1104 	boolean_t	notify;
1105 	uint64_t	event;
1106 
1107 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1108 }
1109 
1110 /*
1111  * Send a LDC message
1112  */
1113 static int
1114 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1115     uint8_t ctrlmsg)
1116 {
1117 	int		rv;
1118 	ldc_msg_t 	*pkt;
1119 	uint64_t	tx_tail;
1120 	uint32_t	curr_seqid;
1121 
1122 	/* Obtain Tx lock */
1123 	mutex_enter(&ldcp->tx_lock);
1124 
1125 	curr_seqid = ldcp->last_msg_snt;
1126 
1127 	/* get the current tail for the message */
1128 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1129 	if (rv) {
1130 		DWARN(ldcp->id,
1131 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1132 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1133 		    ldcp->id, pkttype, subtype, ctrlmsg);
1134 		mutex_exit(&ldcp->tx_lock);
1135 		return (rv);
1136 	}
1137 
1138 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1139 	ZERO_PKT(pkt);
1140 
1141 	/* Initialize the packet */
1142 	pkt->type = pkttype;
1143 	pkt->stype = subtype;
1144 	pkt->ctrl = ctrlmsg;
1145 
1146 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1147 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1148 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1149 		curr_seqid++;
1150 		if (ldcp->mode != LDC_MODE_RAW) {
1151 			pkt->seqid = curr_seqid;
1152 			pkt->ackid = ldcp->last_msg_rcd;
1153 		}
1154 	}
1155 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1156 
1157 	/* initiate the send by calling into HV and set the new tail */
1158 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1159 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1160 
1161 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1162 	if (rv) {
1163 		DWARN(ldcp->id,
1164 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1165 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1166 		    ldcp->id, pkttype, subtype, ctrlmsg);
1167 		mutex_exit(&ldcp->tx_lock);
1168 		return (EIO);
1169 	}
1170 
1171 	ldcp->last_msg_snt = curr_seqid;
1172 	ldcp->tx_tail = tx_tail;
1173 
1174 	mutex_exit(&ldcp->tx_lock);
1175 	return (0);
1176 }
1177 
1178 /*
1179  * Checks if packet was received in right order
1180  * in the case of a reliable link.
1181  * Returns 0 if in order, else EIO
1182  */
1183 static int
1184 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1185 {
1186 	/* No seqid checking for RAW mode */
1187 	if (ldcp->mode == LDC_MODE_RAW)
1188 		return (0);
1189 
1190 	/* No seqid checking for version, RTS, RTR message */
1191 	if (msg->ctrl == LDC_VER ||
1192 	    msg->ctrl == LDC_RTS ||
1193 	    msg->ctrl == LDC_RTR)
1194 		return (0);
1195 
1196 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1197 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1198 		DWARN(ldcp->id,
1199 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1200 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1201 		    (ldcp->last_msg_rcd + 1));
1202 		return (EIO);
1203 	}
1204 
1205 #ifdef DEBUG
1206 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1207 		DWARN(ldcp->id,
1208 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1209 		return (EIO);
1210 	}
1211 #endif
1212 
1213 	return (0);
1214 }
1215 
1216 
1217 /*
1218  * Process an incoming version ctrl message
1219  */
1220 static int
1221 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1222 {
1223 	int 		rv = 0, idx = ldcp->next_vidx;
1224 	ldc_msg_t 	*pkt;
1225 	uint64_t	tx_tail;
1226 	ldc_ver_t	*rcvd_ver;
1227 
1228 	/* get the received version */
1229 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1230 
1231 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1232 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1233 
1234 	/* Obtain Tx lock */
1235 	mutex_enter(&ldcp->tx_lock);
1236 
1237 	switch (msg->stype) {
1238 	case LDC_INFO:
1239 
1240 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1241 			(void) i_ldc_txq_reconf(ldcp);
1242 			i_ldc_reset_state(ldcp);
1243 			mutex_exit(&ldcp->tx_lock);
1244 			return (EAGAIN);
1245 		}
1246 
1247 		/* get the current tail and pkt for the response */
1248 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1249 		if (rv != 0) {
1250 			DWARN(ldcp->id,
1251 			    "i_ldc_process_VER: (0x%llx) err sending "
1252 			    "version ACK/NACK\n", ldcp->id);
1253 			i_ldc_reset(ldcp, B_TRUE);
1254 			mutex_exit(&ldcp->tx_lock);
1255 			return (ECONNRESET);
1256 		}
1257 
1258 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1259 		ZERO_PKT(pkt);
1260 
1261 		/* initialize the packet */
1262 		pkt->type = LDC_CTRL;
1263 		pkt->ctrl = LDC_VER;
1264 
1265 		for (;;) {
1266 
1267 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1268 			    rcvd_ver->major, rcvd_ver->minor,
1269 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1270 
1271 			if (rcvd_ver->major == ldc_versions[idx].major) {
1272 				/* major version match - ACK version */
1273 				pkt->stype = LDC_ACK;
1274 
1275 				/*
1276 				 * lower minor version to the one this endpt
1277 				 * supports, if necessary
1278 				 */
1279 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1280 					rcvd_ver->minor =
1281 					    ldc_versions[idx].minor;
1282 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1283 
1284 				break;
1285 			}
1286 
1287 			if (rcvd_ver->major > ldc_versions[idx].major) {
1288 
1289 				D1(ldcp->id, "i_ldc_process_VER: using next"
1290 				    " lower idx=%d, v%u.%u\n", idx,
1291 				    ldc_versions[idx].major,
1292 				    ldc_versions[idx].minor);
1293 
1294 				/* nack with next lower version */
1295 				pkt->stype = LDC_NACK;
1296 				bcopy(&ldc_versions[idx], pkt->udata,
1297 				    sizeof (ldc_versions[idx]));
1298 				ldcp->next_vidx = idx;
1299 				break;
1300 			}
1301 
1302 			/* next major version */
1303 			idx++;
1304 
1305 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1306 
1307 			if (idx == LDC_NUM_VERS) {
1308 				/* no version match - send NACK */
1309 				pkt->stype = LDC_NACK;
1310 				bzero(pkt->udata, sizeof (ldc_ver_t));
1311 				ldcp->next_vidx = 0;
1312 				break;
1313 			}
1314 		}
1315 
1316 		/* initiate the send by calling into HV and set the new tail */
1317 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1318 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1319 
1320 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1321 		if (rv == 0) {
1322 			ldcp->tx_tail = tx_tail;
1323 			if (pkt->stype == LDC_ACK) {
1324 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1325 				    " version ACK\n", ldcp->id);
1326 				/* Save the ACK'd version */
1327 				ldcp->version.major = rcvd_ver->major;
1328 				ldcp->version.minor = rcvd_ver->minor;
1329 				ldcp->hstate |= TS_RCVD_VER;
1330 				ldcp->tstate |= TS_VER_DONE;
1331 				D1(DBG_ALL_LDCS,
1332 				    "(0x%llx) Sent ACK, "
1333 				    "Agreed on version v%u.%u\n",
1334 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1335 			}
1336 		} else {
1337 			DWARN(ldcp->id,
1338 			    "i_ldc_process_VER: (0x%llx) error sending "
1339 			    "ACK/NACK\n", ldcp->id);
1340 			i_ldc_reset(ldcp, B_TRUE);
1341 			mutex_exit(&ldcp->tx_lock);
1342 			return (ECONNRESET);
1343 		}
1344 
1345 		break;
1346 
1347 	case LDC_ACK:
1348 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1349 			if (ldcp->version.major != rcvd_ver->major ||
1350 			    ldcp->version.minor != rcvd_ver->minor) {
1351 
1352 				/* mismatched version - reset connection */
1353 				DWARN(ldcp->id,
1354 				    "i_ldc_process_VER: (0x%llx) recvd"
1355 				    " ACK ver != sent ACK ver\n", ldcp->id);
1356 				i_ldc_reset(ldcp, B_TRUE);
1357 				mutex_exit(&ldcp->tx_lock);
1358 				return (ECONNRESET);
1359 			}
1360 		} else {
1361 			/* SUCCESS - we have agreed on a version */
1362 			ldcp->version.major = rcvd_ver->major;
1363 			ldcp->version.minor = rcvd_ver->minor;
1364 			ldcp->tstate |= TS_VER_DONE;
1365 		}
1366 
1367 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1368 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1369 
1370 		/* initiate RTS-RTR-RDX handshake */
1371 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1372 		if (rv) {
1373 			DWARN(ldcp->id,
1374 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1375 			    ldcp->id);
1376 			i_ldc_reset(ldcp, B_TRUE);
1377 			mutex_exit(&ldcp->tx_lock);
1378 			return (ECONNRESET);
1379 		}
1380 
1381 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1382 		ZERO_PKT(pkt);
1383 
1384 		pkt->type = LDC_CTRL;
1385 		pkt->stype = LDC_INFO;
1386 		pkt->ctrl = LDC_RTS;
1387 		pkt->env = ldcp->mode;
1388 		if (ldcp->mode != LDC_MODE_RAW)
1389 			pkt->seqid = LDC_INIT_SEQID;
1390 
1391 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1392 
1393 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1394 
1395 		/* initiate the send by calling into HV and set the new tail */
1396 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1397 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1398 
1399 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1400 		if (rv) {
1401 			D2(ldcp->id,
1402 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1403 			    ldcp->id);
1404 			i_ldc_reset(ldcp, B_TRUE);
1405 			mutex_exit(&ldcp->tx_lock);
1406 			return (ECONNRESET);
1407 		}
1408 
1409 		ldcp->tx_tail = tx_tail;
1410 		ldcp->hstate |= TS_SENT_RTS;
1411 
1412 		break;
1413 
1414 	case LDC_NACK:
1415 		/* check if version in NACK is zero */
1416 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1417 			/* version handshake failure */
1418 			DWARN(DBG_ALL_LDCS,
1419 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1420 			    ldcp->id);
1421 			i_ldc_reset(ldcp, B_TRUE);
1422 			mutex_exit(&ldcp->tx_lock);
1423 			return (ECONNRESET);
1424 		}
1425 
1426 		/* get the current tail and pkt for the response */
1427 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1428 		if (rv != 0) {
1429 			cmn_err(CE_NOTE,
1430 			    "i_ldc_process_VER: (0x%lx) err sending "
1431 			    "version ACK/NACK\n", ldcp->id);
1432 			i_ldc_reset(ldcp, B_TRUE);
1433 			mutex_exit(&ldcp->tx_lock);
1434 			return (ECONNRESET);
1435 		}
1436 
1437 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1438 		ZERO_PKT(pkt);
1439 
1440 		/* initialize the packet */
1441 		pkt->type = LDC_CTRL;
1442 		pkt->ctrl = LDC_VER;
1443 		pkt->stype = LDC_INFO;
1444 
1445 		/* check ver in NACK msg has a match */
1446 		for (;;) {
1447 			if (rcvd_ver->major == ldc_versions[idx].major) {
1448 				/*
1449 				 * major version match - resubmit request
1450 				 * if lower minor version to the one this endpt
1451 				 * supports, if necessary
1452 				 */
1453 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1454 					rcvd_ver->minor =
1455 					    ldc_versions[idx].minor;
1456 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1457 				break;
1458 			}
1459 
1460 			if (rcvd_ver->major > ldc_versions[idx].major) {
1461 
1462 				D1(ldcp->id, "i_ldc_process_VER: using next"
1463 				    " lower idx=%d, v%u.%u\n", idx,
1464 				    ldc_versions[idx].major,
1465 				    ldc_versions[idx].minor);
1466 
1467 				/* send next lower version */
1468 				bcopy(&ldc_versions[idx], pkt->udata,
1469 				    sizeof (ldc_versions[idx]));
1470 				ldcp->next_vidx = idx;
1471 				break;
1472 			}
1473 
1474 			/* next version */
1475 			idx++;
1476 
1477 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1478 
1479 			if (idx == LDC_NUM_VERS) {
1480 				/* no version match - terminate */
1481 				ldcp->next_vidx = 0;
1482 				mutex_exit(&ldcp->tx_lock);
1483 				return (ECONNRESET);
1484 			}
1485 		}
1486 
1487 		/* initiate the send by calling into HV and set the new tail */
1488 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1489 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1490 
1491 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1492 		if (rv == 0) {
1493 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1494 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1495 			    ldc_versions[idx].minor);
1496 			ldcp->tx_tail = tx_tail;
1497 		} else {
1498 			cmn_err(CE_NOTE,
1499 			    "i_ldc_process_VER: (0x%lx) error sending version"
1500 			    "INFO\n", ldcp->id);
1501 			i_ldc_reset(ldcp, B_TRUE);
1502 			mutex_exit(&ldcp->tx_lock);
1503 			return (ECONNRESET);
1504 		}
1505 
1506 		break;
1507 	}
1508 
1509 	mutex_exit(&ldcp->tx_lock);
1510 	return (rv);
1511 }
1512 
1513 
1514 /*
1515  * Process an incoming RTS ctrl message
1516  */
1517 static int
1518 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1519 {
1520 	int 		rv = 0;
1521 	ldc_msg_t 	*pkt;
1522 	uint64_t	tx_tail;
1523 	boolean_t	sent_NACK = B_FALSE;
1524 
1525 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1526 
1527 	switch (msg->stype) {
1528 	case LDC_NACK:
1529 		DWARN(ldcp->id,
1530 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1531 		    ldcp->id);
1532 
1533 		/* Reset the channel -- as we cannot continue */
1534 		mutex_enter(&ldcp->tx_lock);
1535 		i_ldc_reset(ldcp, B_TRUE);
1536 		mutex_exit(&ldcp->tx_lock);
1537 		rv = ECONNRESET;
1538 		break;
1539 
1540 	case LDC_INFO:
1541 
1542 		/* check mode */
1543 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1544 			cmn_err(CE_NOTE,
1545 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1546 			    ldcp->id);
1547 			/*
1548 			 * send NACK in response to MODE message
1549 			 * get the current tail for the response
1550 			 */
1551 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1552 			if (rv) {
1553 				/* if cannot send NACK - reset channel */
1554 				mutex_enter(&ldcp->tx_lock);
1555 				i_ldc_reset(ldcp, B_TRUE);
1556 				mutex_exit(&ldcp->tx_lock);
1557 				rv = ECONNRESET;
1558 				break;
1559 			}
1560 			sent_NACK = B_TRUE;
1561 		}
1562 		break;
1563 	default:
1564 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1565 		    ldcp->id);
1566 		mutex_enter(&ldcp->tx_lock);
1567 		i_ldc_reset(ldcp, B_TRUE);
1568 		mutex_exit(&ldcp->tx_lock);
1569 		rv = ECONNRESET;
1570 		break;
1571 	}
1572 
1573 	/*
1574 	 * If either the connection was reset (when rv != 0) or
1575 	 * a NACK was sent, we return. In the case of a NACK
1576 	 * we dont want to consume the packet that came in but
1577 	 * not record that we received the RTS
1578 	 */
1579 	if (rv || sent_NACK)
1580 		return (rv);
1581 
1582 	/* record RTS received */
1583 	ldcp->hstate |= TS_RCVD_RTS;
1584 
1585 	/* store initial SEQID info */
1586 	ldcp->last_msg_snt = msg->seqid;
1587 
1588 	/* Obtain Tx lock */
1589 	mutex_enter(&ldcp->tx_lock);
1590 
1591 	/* get the current tail for the response */
1592 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1593 	if (rv != 0) {
1594 		cmn_err(CE_NOTE,
1595 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1596 		    ldcp->id);
1597 		i_ldc_reset(ldcp, B_TRUE);
1598 		mutex_exit(&ldcp->tx_lock);
1599 		return (ECONNRESET);
1600 	}
1601 
1602 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1603 	ZERO_PKT(pkt);
1604 
1605 	/* initialize the packet */
1606 	pkt->type = LDC_CTRL;
1607 	pkt->stype = LDC_INFO;
1608 	pkt->ctrl = LDC_RTR;
1609 	pkt->env = ldcp->mode;
1610 	if (ldcp->mode != LDC_MODE_RAW)
1611 		pkt->seqid = LDC_INIT_SEQID;
1612 
1613 	ldcp->last_msg_rcd = msg->seqid;
1614 
1615 	/* initiate the send by calling into HV and set the new tail */
1616 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1617 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1618 
1619 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1620 	if (rv == 0) {
1621 		D2(ldcp->id,
1622 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1623 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1624 
1625 		ldcp->tx_tail = tx_tail;
1626 		ldcp->hstate |= TS_SENT_RTR;
1627 
1628 	} else {
1629 		cmn_err(CE_NOTE,
1630 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1631 		    ldcp->id);
1632 		i_ldc_reset(ldcp, B_TRUE);
1633 		mutex_exit(&ldcp->tx_lock);
1634 		return (ECONNRESET);
1635 	}
1636 
1637 	mutex_exit(&ldcp->tx_lock);
1638 	return (0);
1639 }
1640 
1641 /*
1642  * Process an incoming RTR ctrl message
1643  */
1644 static int
1645 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1646 {
1647 	int 		rv = 0;
1648 	boolean_t	sent_NACK = B_FALSE;
1649 
1650 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1651 
1652 	switch (msg->stype) {
1653 	case LDC_NACK:
1654 		/* RTR NACK received */
1655 		DWARN(ldcp->id,
1656 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1657 		    ldcp->id);
1658 
1659 		/* Reset the channel -- as we cannot continue */
1660 		mutex_enter(&ldcp->tx_lock);
1661 		i_ldc_reset(ldcp, B_TRUE);
1662 		mutex_exit(&ldcp->tx_lock);
1663 		rv = ECONNRESET;
1664 
1665 		break;
1666 
1667 	case LDC_INFO:
1668 
1669 		/* check mode */
1670 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1671 			DWARN(ldcp->id,
1672 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1673 			    "expecting 0x%x, got 0x%x\n",
1674 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1675 			/*
1676 			 * send NACK in response to MODE message
1677 			 * get the current tail for the response
1678 			 */
1679 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1680 			if (rv) {
1681 				/* if cannot send NACK - reset channel */
1682 				mutex_enter(&ldcp->tx_lock);
1683 				i_ldc_reset(ldcp, B_TRUE);
1684 				mutex_exit(&ldcp->tx_lock);
1685 				rv = ECONNRESET;
1686 				break;
1687 			}
1688 			sent_NACK = B_TRUE;
1689 		}
1690 		break;
1691 
1692 	default:
1693 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1694 		    ldcp->id);
1695 
1696 		/* Reset the channel -- as we cannot continue */
1697 		mutex_enter(&ldcp->tx_lock);
1698 		i_ldc_reset(ldcp, B_TRUE);
1699 		mutex_exit(&ldcp->tx_lock);
1700 		rv = ECONNRESET;
1701 		break;
1702 	}
1703 
1704 	/*
1705 	 * If either the connection was reset (when rv != 0) or
1706 	 * a NACK was sent, we return. In the case of a NACK
1707 	 * we dont want to consume the packet that came in but
1708 	 * not record that we received the RTR
1709 	 */
1710 	if (rv || sent_NACK)
1711 		return (rv);
1712 
1713 	ldcp->last_msg_snt = msg->seqid;
1714 	ldcp->hstate |= TS_RCVD_RTR;
1715 
1716 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1717 	if (rv) {
1718 		cmn_err(CE_NOTE,
1719 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1720 		    ldcp->id);
1721 		mutex_enter(&ldcp->tx_lock);
1722 		i_ldc_reset(ldcp, B_TRUE);
1723 		mutex_exit(&ldcp->tx_lock);
1724 		return (ECONNRESET);
1725 	}
1726 	D2(ldcp->id,
1727 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1728 
1729 	ldcp->hstate |= TS_SENT_RDX;
1730 	ldcp->tstate |= TS_HSHAKE_DONE;
1731 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1732 		ldcp->status = LDC_UP;
1733 
1734 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1735 
1736 	return (0);
1737 }
1738 
1739 
1740 /*
1741  * Process an incoming RDX ctrl message
1742  */
1743 static int
1744 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1745 {
1746 	int	rv = 0;
1747 
1748 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1749 
1750 	switch (msg->stype) {
1751 	case LDC_NACK:
1752 		/* RDX NACK received */
1753 		DWARN(ldcp->id,
1754 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1755 		    ldcp->id);
1756 
1757 		/* Reset the channel -- as we cannot continue */
1758 		mutex_enter(&ldcp->tx_lock);
1759 		i_ldc_reset(ldcp, B_TRUE);
1760 		mutex_exit(&ldcp->tx_lock);
1761 		rv = ECONNRESET;
1762 
1763 		break;
1764 
1765 	case LDC_INFO:
1766 
1767 		/*
1768 		 * if channel is UP and a RDX received after data transmission
1769 		 * has commenced it is an error
1770 		 */
1771 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1772 			DWARN(DBG_ALL_LDCS,
1773 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1774 			    " - LDC reset\n", ldcp->id);
1775 			mutex_enter(&ldcp->tx_lock);
1776 			i_ldc_reset(ldcp, B_TRUE);
1777 			mutex_exit(&ldcp->tx_lock);
1778 			return (ECONNRESET);
1779 		}
1780 
1781 		ldcp->hstate |= TS_RCVD_RDX;
1782 		ldcp->tstate |= TS_HSHAKE_DONE;
1783 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1784 			ldcp->status = LDC_UP;
1785 
1786 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1787 		break;
1788 
1789 	default:
1790 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1791 		    ldcp->id);
1792 
1793 		/* Reset the channel -- as we cannot continue */
1794 		mutex_enter(&ldcp->tx_lock);
1795 		i_ldc_reset(ldcp, B_TRUE);
1796 		mutex_exit(&ldcp->tx_lock);
1797 		rv = ECONNRESET;
1798 		break;
1799 	}
1800 
1801 	return (rv);
1802 }
1803 
1804 /*
1805  * Process an incoming ACK for a data packet
1806  */
1807 static int
1808 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1809 {
1810 	int		rv;
1811 	uint64_t 	tx_head;
1812 	ldc_msg_t	*pkt;
1813 
1814 	/* Obtain Tx lock */
1815 	mutex_enter(&ldcp->tx_lock);
1816 
1817 	/*
1818 	 * Read the current Tx head and tail
1819 	 */
1820 	rv = hv_ldc_tx_get_state(ldcp->id,
1821 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1822 	if (rv != 0) {
1823 		cmn_err(CE_WARN,
1824 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1825 		    ldcp->id);
1826 
1827 		/* Reset the channel -- as we cannot continue */
1828 		i_ldc_reset(ldcp, B_TRUE);
1829 		mutex_exit(&ldcp->tx_lock);
1830 		return (ECONNRESET);
1831 	}
1832 
1833 	/*
1834 	 * loop from where the previous ACK location was to the
1835 	 * current head location. This is how far the HV has
1836 	 * actually send pkts. Pkts between head and tail are
1837 	 * yet to be sent by HV.
1838 	 */
1839 	tx_head = ldcp->tx_ackd_head;
1840 	for (;;) {
1841 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1842 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1843 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1844 
1845 		if (pkt->seqid == msg->ackid) {
1846 			D2(ldcp->id,
1847 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1848 			    ldcp->id);
1849 			ldcp->last_ack_rcd = msg->ackid;
1850 			ldcp->tx_ackd_head = tx_head;
1851 			break;
1852 		}
1853 		if (tx_head == ldcp->tx_head) {
1854 			/* could not find packet */
1855 			DWARN(ldcp->id,
1856 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1857 			    ldcp->id);
1858 
1859 			/* Reset the channel -- as we cannot continue */
1860 			i_ldc_reset(ldcp, B_TRUE);
1861 			mutex_exit(&ldcp->tx_lock);
1862 			return (ECONNRESET);
1863 		}
1864 	}
1865 
1866 	mutex_exit(&ldcp->tx_lock);
1867 	return (0);
1868 }
1869 
1870 /*
1871  * Process incoming control message
1872  * Return 0 - session can continue
1873  *        EAGAIN - reprocess packet - state was changed
1874  *	  ECONNRESET - channel was reset
1875  */
1876 static int
1877 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1878 {
1879 	int 		rv = 0;
1880 
1881 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1882 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1883 
1884 	switch (ldcp->tstate & ~TS_IN_RESET) {
1885 
1886 	case TS_OPEN:
1887 	case TS_READY:
1888 
1889 		switch (msg->ctrl & LDC_CTRL_MASK) {
1890 		case LDC_VER:
1891 			/* process version message */
1892 			rv = i_ldc_process_VER(ldcp, msg);
1893 			break;
1894 		default:
1895 			DWARN(ldcp->id,
1896 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1897 			    "tstate=0x%x\n", ldcp->id,
1898 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1899 			break;
1900 		}
1901 
1902 		break;
1903 
1904 	case TS_VREADY:
1905 
1906 		switch (msg->ctrl & LDC_CTRL_MASK) {
1907 		case LDC_VER:
1908 			/* process version message */
1909 			rv = i_ldc_process_VER(ldcp, msg);
1910 			break;
1911 		case LDC_RTS:
1912 			/* process RTS message */
1913 			rv = i_ldc_process_RTS(ldcp, msg);
1914 			break;
1915 		case LDC_RTR:
1916 			/* process RTR message */
1917 			rv = i_ldc_process_RTR(ldcp, msg);
1918 			break;
1919 		case LDC_RDX:
1920 			/* process RDX message */
1921 			rv = i_ldc_process_RDX(ldcp, msg);
1922 			break;
1923 		default:
1924 			DWARN(ldcp->id,
1925 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1926 			    "tstate=0x%x\n", ldcp->id,
1927 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1928 			break;
1929 		}
1930 
1931 		break;
1932 
1933 	case TS_UP:
1934 
1935 		switch (msg->ctrl & LDC_CTRL_MASK) {
1936 		case LDC_VER:
1937 			DWARN(ldcp->id,
1938 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1939 			    "- LDC reset\n", ldcp->id);
1940 			/* peer is redoing version negotiation */
1941 			mutex_enter(&ldcp->tx_lock);
1942 			(void) i_ldc_txq_reconf(ldcp);
1943 			i_ldc_reset_state(ldcp);
1944 			mutex_exit(&ldcp->tx_lock);
1945 			rv = EAGAIN;
1946 			break;
1947 
1948 		case LDC_RDX:
1949 			/* process RDX message */
1950 			rv = i_ldc_process_RDX(ldcp, msg);
1951 			break;
1952 
1953 		default:
1954 			DWARN(ldcp->id,
1955 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1956 			    "tstate=0x%x\n", ldcp->id,
1957 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1958 			break;
1959 		}
1960 	}
1961 
1962 	return (rv);
1963 }
1964 
1965 /*
1966  * Register channel with the channel nexus
1967  */
1968 static int
1969 i_ldc_register_channel(ldc_chan_t *ldcp)
1970 {
1971 	int		rv = 0;
1972 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1973 
1974 	if (cinfo->dip == NULL) {
1975 		DWARN(ldcp->id,
1976 		    "i_ldc_register_channel: cnex has not registered\n");
1977 		return (EAGAIN);
1978 	}
1979 
1980 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1981 	if (rv) {
1982 		DWARN(ldcp->id,
1983 		    "i_ldc_register_channel: cannot register channel\n");
1984 		return (rv);
1985 	}
1986 
1987 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1988 	    i_ldc_tx_hdlr, ldcp, NULL);
1989 	if (rv) {
1990 		DWARN(ldcp->id,
1991 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1992 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1993 		return (rv);
1994 	}
1995 
1996 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1997 	    i_ldc_rx_hdlr, ldcp, NULL);
1998 	if (rv) {
1999 		DWARN(ldcp->id,
2000 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
2001 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2002 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
2003 		return (rv);
2004 	}
2005 
2006 	ldcp->tstate |= TS_CNEX_RDY;
2007 
2008 	return (0);
2009 }
2010 
2011 /*
2012  * Unregister a channel with the channel nexus
2013  */
2014 static int
2015 i_ldc_unregister_channel(ldc_chan_t *ldcp)
2016 {
2017 	int		rv = 0;
2018 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
2019 
2020 	if (cinfo->dip == NULL) {
2021 		DWARN(ldcp->id,
2022 		    "i_ldc_unregister_channel: cnex has not registered\n");
2023 		return (EAGAIN);
2024 	}
2025 
2026 	if (ldcp->tstate & TS_CNEX_RDY) {
2027 
2028 		/* Remove the Rx interrupt */
2029 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
2030 		if (rv) {
2031 			if (rv != EAGAIN) {
2032 				DWARN(ldcp->id,
2033 				    "i_ldc_unregister_channel: err removing "
2034 				    "Rx intr\n");
2035 				return (rv);
2036 			}
2037 
2038 			/*
2039 			 * If interrupts are pending and handler has
2040 			 * finished running, clear interrupt and try
2041 			 * again
2042 			 */
2043 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
2044 				return (rv);
2045 
2046 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2047 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
2048 			    CNEX_RX_INTR);
2049 			if (rv) {
2050 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
2051 				    "err removing Rx interrupt\n");
2052 				return (rv);
2053 			}
2054 		}
2055 
2056 		/* Remove the Tx interrupt */
2057 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2058 		if (rv) {
2059 			DWARN(ldcp->id,
2060 			    "i_ldc_unregister_channel: err removing Tx intr\n");
2061 			return (rv);
2062 		}
2063 
2064 		/* Unregister the channel */
2065 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2066 		if (rv) {
2067 			DWARN(ldcp->id,
2068 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2069 			return (rv);
2070 		}
2071 
2072 		ldcp->tstate &= ~TS_CNEX_RDY;
2073 	}
2074 
2075 	return (0);
2076 }
2077 
2078 
2079 /*
2080  * LDC transmit interrupt handler
2081  *    triggered for chanel up/down/reset events
2082  *    and Tx queue content changes
2083  */
2084 static uint_t
2085 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2086 {
2087 	_NOTE(ARGUNUSED(arg2))
2088 
2089 	int 		rv;
2090 	ldc_chan_t 	*ldcp;
2091 	boolean_t 	notify_client = B_FALSE;
2092 	uint64_t	notify_event = 0, link_state;
2093 
2094 	/* Get the channel for which interrupt was received */
2095 	ASSERT(arg1 != NULL);
2096 	ldcp = (ldc_chan_t *)arg1;
2097 
2098 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2099 	    ldcp->id, ldcp);
2100 
2101 	/* Lock channel */
2102 	mutex_enter(&ldcp->lock);
2103 
2104 	/* Obtain Tx lock */
2105 	mutex_enter(&ldcp->tx_lock);
2106 
2107 	/* mark interrupt as pending */
2108 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2109 
2110 	/* save current link state */
2111 	link_state = ldcp->link_state;
2112 
2113 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2114 	    &ldcp->link_state);
2115 	if (rv) {
2116 		cmn_err(CE_WARN,
2117 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2118 		    ldcp->id, rv);
2119 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2120 		mutex_exit(&ldcp->tx_lock);
2121 		mutex_exit(&ldcp->lock);
2122 		return (DDI_INTR_CLAIMED);
2123 	}
2124 
2125 	/*
2126 	 * reset the channel state if the channel went down
2127 	 * (other side unconfigured queue) or channel was reset
2128 	 * (other side reconfigured its queue)
2129 	 */
2130 	if (link_state != ldcp->link_state &&
2131 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2132 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2133 		i_ldc_reset(ldcp, B_FALSE);
2134 		notify_client = B_TRUE;
2135 		notify_event = LDC_EVT_DOWN;
2136 	}
2137 
2138 	if (link_state != ldcp->link_state &&
2139 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2140 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2141 		i_ldc_reset(ldcp, B_FALSE);
2142 		notify_client = B_TRUE;
2143 		notify_event = LDC_EVT_RESET;
2144 	}
2145 
2146 	if (link_state != ldcp->link_state &&
2147 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2148 	    ldcp->link_state == LDC_CHANNEL_UP) {
2149 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2150 		notify_client = B_TRUE;
2151 		notify_event = LDC_EVT_RESET;
2152 		ldcp->tstate |= TS_LINK_READY;
2153 		ldcp->status = LDC_READY;
2154 	}
2155 
2156 	/* if callbacks are disabled, do not notify */
2157 	if (!ldcp->cb_enabled)
2158 		notify_client = B_FALSE;
2159 
2160 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2161 	mutex_exit(&ldcp->tx_lock);
2162 
2163 	if (notify_client) {
2164 		ldcp->cb_inprogress = B_TRUE;
2165 		mutex_exit(&ldcp->lock);
2166 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2167 		if (rv) {
2168 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2169 			    "failure", ldcp->id);
2170 		}
2171 		mutex_enter(&ldcp->lock);
2172 		ldcp->cb_inprogress = B_FALSE;
2173 	}
2174 
2175 	mutex_exit(&ldcp->lock);
2176 
2177 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2178 
2179 	return (DDI_INTR_CLAIMED);
2180 }
2181 
2182 /*
2183  * Process the Rx HV queue.
2184  *
2185  * Returns 0 if data packets were found and no errors were encountered,
2186  * otherwise returns an error. In either case, the *notify argument is
2187  * set to indicate whether or not the client callback function should
2188  * be invoked. The *event argument is set to contain the callback event.
2189  *
2190  * Depending on the channel mode, packets are handled differently:
2191  *
2192  * RAW MODE
2193  * For raw mode channels, when a data packet is encountered,
2194  * processing stops and all packets are left on the queue to be removed
2195  * and processed by the ldc_read code path.
2196  *
2197  * UNRELIABLE MODE
2198  * For unreliable mode, when a data packet is encountered, processing
2199  * stops, and all packets are left on the queue to be removed and
2200  * processed by the ldc_read code path. Control packets are processed
2201  * inline if they are encountered before any data packets.
2202  *
2203  * RELIABLE MODE
2204  * For reliable mode channels, all packets on the receive queue
2205  * are processed: data packets are copied to the data queue and
2206  * control packets are processed inline. Packets are only left on
2207  * the receive queue when the data queue is full.
2208  */
2209 static uint_t
2210 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2211     uint64_t *notify_event)
2212 {
2213 	int		rv;
2214 	uint64_t 	rx_head, rx_tail;
2215 	ldc_msg_t 	*msg;
2216 	uint64_t	link_state, first_fragment = 0;
2217 	boolean_t	trace_length = B_TRUE;
2218 
2219 	ASSERT(MUTEX_HELD(&ldcp->lock));
2220 	*notify_client = B_FALSE;
2221 	*notify_event = 0;
2222 
2223 	/*
2224 	 * Read packet(s) from the queue
2225 	 */
2226 	for (;;) {
2227 
2228 		link_state = ldcp->link_state;
2229 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2230 		    &ldcp->link_state);
2231 		if (rv) {
2232 			cmn_err(CE_WARN,
2233 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2234 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2235 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2236 			return (EIO);
2237 		}
2238 
2239 		/*
2240 		 * reset the channel state if the channel went down
2241 		 * (other side unconfigured queue) or channel was reset
2242 		 * (other side reconfigured its queue)
2243 		 */
2244 
2245 		if (link_state != ldcp->link_state) {
2246 
2247 			switch (ldcp->link_state) {
2248 			case LDC_CHANNEL_DOWN:
2249 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2250 				    "link down\n", ldcp->id);
2251 				mutex_enter(&ldcp->tx_lock);
2252 				i_ldc_reset(ldcp, B_FALSE);
2253 				mutex_exit(&ldcp->tx_lock);
2254 				*notify_client = B_TRUE;
2255 				*notify_event = LDC_EVT_DOWN;
2256 				goto loop_exit;
2257 
2258 			case LDC_CHANNEL_UP:
2259 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2260 				    "channel link up\n", ldcp->id);
2261 
2262 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2263 					*notify_client = B_TRUE;
2264 					*notify_event = LDC_EVT_RESET;
2265 					ldcp->tstate |= TS_LINK_READY;
2266 					ldcp->status = LDC_READY;
2267 				}
2268 				break;
2269 
2270 			case LDC_CHANNEL_RESET:
2271 			default:
2272 #ifdef DEBUG
2273 force_reset:
2274 #endif
2275 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2276 				    "link reset\n", ldcp->id);
2277 				mutex_enter(&ldcp->tx_lock);
2278 				i_ldc_reset(ldcp, B_FALSE);
2279 				mutex_exit(&ldcp->tx_lock);
2280 				*notify_client = B_TRUE;
2281 				*notify_event = LDC_EVT_RESET;
2282 				break;
2283 			}
2284 		}
2285 
2286 #ifdef DEBUG
2287 		if (LDC_INJECT_RESET(ldcp))
2288 			goto force_reset;
2289 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2290 			i_ldc_mem_inject_dring_clear(ldcp);
2291 #endif
2292 		if (trace_length) {
2293 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2294 			trace_length = B_FALSE;
2295 		}
2296 
2297 		if (rx_head == rx_tail) {
2298 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2299 			    "No packets\n", ldcp->id);
2300 			break;
2301 		}
2302 
2303 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2304 		    "tail=0x%llx\n", rx_head, rx_tail);
2305 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2306 		    ldcp->rx_q_va + rx_head);
2307 
2308 		/* get the message */
2309 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2310 
2311 		/* if channel is in RAW mode or data pkt, notify and return */
2312 		if (ldcp->mode == LDC_MODE_RAW) {
2313 			*notify_client = B_TRUE;
2314 			*notify_event |= LDC_EVT_READ;
2315 			break;
2316 		}
2317 
2318 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2319 
2320 			/* discard packet if channel is not up */
2321 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2322 
2323 				/* move the head one position */
2324 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2325 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2326 
2327 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2328 					break;
2329 
2330 				continue;
2331 			} else {
2332 				uint64_t dq_head, dq_tail;
2333 
2334 				/* process only RELIABLE mode data packets */
2335 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2336 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2337 						*notify_client = B_TRUE;
2338 					*notify_event |= LDC_EVT_READ;
2339 					break;
2340 				}
2341 
2342 				/* don't process packet if queue full */
2343 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2344 				    &dq_tail, NULL);
2345 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2346 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2347 				if (dq_tail == dq_head ||
2348 				    LDC_INJECT_DQFULL(ldcp)) {
2349 					rv = ENOSPC;
2350 					break;
2351 				}
2352 			}
2353 		}
2354 
2355 		/* Check the sequence ID for the message received */
2356 		rv = i_ldc_check_seqid(ldcp, msg);
2357 		if (rv != 0) {
2358 
2359 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2360 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2361 			    rx_head, rx_tail);
2362 
2363 			/* Reset last_msg_rcd to start of message */
2364 			if (first_fragment != 0) {
2365 				ldcp->last_msg_rcd = first_fragment - 1;
2366 				first_fragment = 0;
2367 			}
2368 
2369 			/*
2370 			 * Send a NACK due to seqid mismatch
2371 			 */
2372 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2373 			    (msg->ctrl & LDC_CTRL_MASK));
2374 
2375 			if (rv) {
2376 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2377 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2378 				    ldcp->id);
2379 
2380 				/* if cannot send NACK - reset channel */
2381 				mutex_enter(&ldcp->tx_lock);
2382 				i_ldc_reset(ldcp, B_TRUE);
2383 				mutex_exit(&ldcp->tx_lock);
2384 
2385 				*notify_client = B_TRUE;
2386 				*notify_event = LDC_EVT_RESET;
2387 				break;
2388 			}
2389 
2390 			/* purge receive queue */
2391 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2392 			break;
2393 		}
2394 
2395 		/* record the message ID */
2396 		ldcp->last_msg_rcd = msg->seqid;
2397 
2398 		/* process control messages */
2399 		if (msg->type & LDC_CTRL) {
2400 			/* save current internal state */
2401 			uint64_t tstate = ldcp->tstate;
2402 
2403 			rv = i_ldc_ctrlmsg(ldcp, msg);
2404 			if (rv == EAGAIN) {
2405 				/* re-process pkt - state was adjusted */
2406 				continue;
2407 			}
2408 			if (rv == ECONNRESET) {
2409 				*notify_client = B_TRUE;
2410 				*notify_event = LDC_EVT_RESET;
2411 				break;
2412 			}
2413 
2414 			/*
2415 			 * control message processing was successful
2416 			 * channel transitioned to ready for communication
2417 			 */
2418 			if (rv == 0 && ldcp->tstate == TS_UP &&
2419 			    (tstate & ~TS_IN_RESET) !=
2420 			    (ldcp->tstate & ~TS_IN_RESET)) {
2421 				*notify_client = B_TRUE;
2422 				*notify_event = LDC_EVT_UP;
2423 			}
2424 		}
2425 
2426 		/* process data NACKs */
2427 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2428 			DWARN(ldcp->id,
2429 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2430 			    ldcp->id);
2431 			mutex_enter(&ldcp->tx_lock);
2432 			i_ldc_reset(ldcp, B_TRUE);
2433 			mutex_exit(&ldcp->tx_lock);
2434 			*notify_client = B_TRUE;
2435 			*notify_event = LDC_EVT_RESET;
2436 			break;
2437 		}
2438 
2439 		/* process data ACKs */
2440 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2441 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2442 				*notify_client = B_TRUE;
2443 				*notify_event = LDC_EVT_RESET;
2444 				break;
2445 			}
2446 		}
2447 
2448 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2449 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2450 
2451 			/*
2452 			 * Copy the data packet to the data queue. Note
2453 			 * that the copy routine updates the rx_head pointer.
2454 			 */
2455 			i_ldc_rxdq_copy(ldcp, &rx_head);
2456 
2457 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2458 				*notify_client = B_TRUE;
2459 			*notify_event |= LDC_EVT_READ;
2460 		} else {
2461 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2462 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2463 		}
2464 
2465 		/* move the head one position */
2466 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2467 			*notify_client = B_TRUE;
2468 			*notify_event = LDC_EVT_RESET;
2469 			break;
2470 		}
2471 
2472 	} /* for */
2473 
2474 loop_exit:
2475 
2476 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2477 		/* ACK data packets */
2478 		if ((*notify_event &
2479 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2480 			int ack_rv;
2481 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2482 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2483 				cmn_err(CE_NOTE,
2484 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2485 				    "send ACK\n", ldcp->id);
2486 
2487 				mutex_enter(&ldcp->tx_lock);
2488 				i_ldc_reset(ldcp, B_FALSE);
2489 				mutex_exit(&ldcp->tx_lock);
2490 
2491 				*notify_client = B_TRUE;
2492 				*notify_event = LDC_EVT_RESET;
2493 				goto skip_ackpeek;
2494 			}
2495 		}
2496 
2497 		/*
2498 		 * If we have no more space on the data queue, make sure
2499 		 * there are no ACKs on the rx queue waiting to be processed.
2500 		 */
2501 		if (rv == ENOSPC) {
2502 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2503 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2504 				*notify_client = B_TRUE;
2505 				*notify_event = LDC_EVT_RESET;
2506 			}
2507 			return (rv);
2508 		} else {
2509 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2510 		}
2511 	}
2512 
2513 skip_ackpeek:
2514 
2515 	/* Return, indicating whether or not data packets were found */
2516 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2517 		return (0);
2518 
2519 	return (ENOMSG);
2520 }
2521 
2522 /*
2523  * Process any ACK packets on the HV receive queue.
2524  *
2525  * This function is only used by RELIABLE mode channels when the
2526  * secondary data queue fills up and there are packets remaining on
2527  * the HV receive queue.
2528  */
2529 int
2530 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2531 {
2532 	int		rv = 0;
2533 	ldc_msg_t	*msg;
2534 
2535 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2536 		ldcp->rx_ack_head = rx_head;
2537 
2538 	while (ldcp->rx_ack_head != rx_tail) {
2539 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2540 
2541 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2542 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2543 				break;
2544 			msg->stype &= ~LDC_ACK;
2545 		}
2546 
2547 		ldcp->rx_ack_head =
2548 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2549 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2550 	}
2551 	return (rv);
2552 }
2553 
2554 /* -------------------------------------------------------------------------- */
2555 
2556 /*
2557  * LDC API functions
2558  */
2559 
2560 /*
2561  * Initialize the channel. Allocate internal structure and memory for
2562  * TX/RX queues, and initialize locks.
2563  */
2564 int
2565 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2566 {
2567 	ldc_chan_t 	*ldcp;
2568 	int		rv, exit_val;
2569 	uint64_t	ra_base, nentries;
2570 	uint64_t	qlen;
2571 
2572 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2573 
2574 	if (attr == NULL) {
2575 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2576 		return (EINVAL);
2577 	}
2578 	if (handle == NULL) {
2579 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2580 		return (EINVAL);
2581 	}
2582 
2583 	/* check if channel is valid */
2584 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2585 	if (rv == H_ECHANNEL) {
2586 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2587 		return (EINVAL);
2588 	}
2589 
2590 	/* check if the channel has already been initialized */
2591 	mutex_enter(&ldcssp->lock);
2592 	ldcp = ldcssp->chan_list;
2593 	while (ldcp != NULL) {
2594 		if (ldcp->id == id) {
2595 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2596 			    id);
2597 			mutex_exit(&ldcssp->lock);
2598 			return (EADDRINUSE);
2599 		}
2600 		ldcp = ldcp->next;
2601 	}
2602 	mutex_exit(&ldcssp->lock);
2603 
2604 	ASSERT(ldcp == NULL);
2605 
2606 	*handle = 0;
2607 
2608 	/* Allocate an ldcp structure */
2609 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2610 
2611 	/*
2612 	 * Initialize the channel and Tx lock
2613 	 *
2614 	 * The channel 'lock' protects the entire channel and
2615 	 * should be acquired before initializing, resetting,
2616 	 * destroying or reading from a channel.
2617 	 *
2618 	 * The 'tx_lock' should be acquired prior to transmitting
2619 	 * data over the channel. The lock should also be acquired
2620 	 * prior to channel reconfiguration (in order to prevent
2621 	 * concurrent writes).
2622 	 *
2623 	 * ORDERING: When both locks are being acquired, to prevent
2624 	 * deadlocks, the channel lock should be always acquired prior
2625 	 * to the tx_lock.
2626 	 */
2627 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2628 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2629 
2630 	/* Initialize the channel */
2631 	ldcp->id = id;
2632 	ldcp->cb = NULL;
2633 	ldcp->cb_arg = NULL;
2634 	ldcp->cb_inprogress = B_FALSE;
2635 	ldcp->cb_enabled = B_FALSE;
2636 	ldcp->next = NULL;
2637 
2638 	/* Read attributes */
2639 	ldcp->mode = attr->mode;
2640 	ldcp->devclass = attr->devclass;
2641 	ldcp->devinst = attr->instance;
2642 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2643 
2644 	D1(ldcp->id,
2645 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2646 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2647 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2648 
2649 	ldcp->next_vidx = 0;
2650 	ldcp->tstate = TS_IN_RESET;
2651 	ldcp->hstate = 0;
2652 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2653 	ldcp->last_ack_rcd = 0;
2654 	ldcp->last_msg_rcd = 0;
2655 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2656 
2657 	ldcp->stream_bufferp = NULL;
2658 	ldcp->exp_dring_list = NULL;
2659 	ldcp->imp_dring_list = NULL;
2660 	ldcp->mhdl_list = NULL;
2661 
2662 	ldcp->tx_intr_state = LDC_INTR_NONE;
2663 	ldcp->rx_intr_state = LDC_INTR_NONE;
2664 
2665 	/* Initialize payload size depending on whether channel is reliable */
2666 	switch (ldcp->mode) {
2667 	case LDC_MODE_RAW:
2668 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2669 		ldcp->read_p = i_ldc_read_raw;
2670 		ldcp->write_p = i_ldc_write_raw;
2671 		break;
2672 	case LDC_MODE_UNRELIABLE:
2673 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2674 		ldcp->read_p = i_ldc_read_packet;
2675 		ldcp->write_p = i_ldc_write_packet;
2676 		break;
2677 	case LDC_MODE_RELIABLE:
2678 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2679 
2680 		ldcp->stream_remains = 0;
2681 		ldcp->stream_offset = 0;
2682 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2683 		ldcp->read_p = i_ldc_read_stream;
2684 		ldcp->write_p = i_ldc_write_stream;
2685 		break;
2686 	default:
2687 		exit_val = EINVAL;
2688 		goto cleanup_on_exit;
2689 	}
2690 
2691 	/*
2692 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2693 	 * value is smaller than default length of ldc_queue_entries,
2694 	 * qlen is set to ldc_queue_entries. Ensure that computed
2695 	 * length is a power-of-two value.
2696 	 */
2697 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2698 	if (!ISP2(qlen)) {
2699 		uint64_t	tmp = 1;
2700 		while (qlen) {
2701 			qlen >>= 1; tmp <<= 1;
2702 		}
2703 		qlen = tmp;
2704 	}
2705 
2706 	ldcp->rx_q_entries =
2707 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2708 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2709 
2710 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2711 
2712 	/* Create a transmit queue */
2713 	ldcp->tx_q_va = (uint64_t)
2714 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2715 	if (ldcp->tx_q_va == NULL) {
2716 		cmn_err(CE_WARN,
2717 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2718 		    ldcp->id);
2719 		exit_val = ENOMEM;
2720 		goto cleanup_on_exit;
2721 	}
2722 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2723 
2724 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2725 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2726 
2727 	ldcp->tstate |= TS_TXQ_RDY;
2728 
2729 	/* Create a receive queue */
2730 	ldcp->rx_q_va = (uint64_t)
2731 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2732 	if (ldcp->rx_q_va == NULL) {
2733 		cmn_err(CE_WARN,
2734 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2735 		    ldcp->id);
2736 		exit_val = ENOMEM;
2737 		goto cleanup_on_exit;
2738 	}
2739 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2740 
2741 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2742 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2743 
2744 	ldcp->tstate |= TS_RXQ_RDY;
2745 
2746 	/* Setup a separate read data queue */
2747 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2748 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2749 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2750 
2751 		/* Make sure the data queue multiplier is a power of 2 */
2752 		if (!ISP2(ldc_rxdq_multiplier)) {
2753 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2754 			    "not a power of 2, resetting", ldcp->id);
2755 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2756 		}
2757 
2758 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2759 		ldcp->rx_dq_va = (uint64_t)
2760 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2761 		    KM_SLEEP);
2762 		if (ldcp->rx_dq_va == NULL) {
2763 			cmn_err(CE_WARN,
2764 			    "ldc_init: (0x%lx) RX data queue "
2765 			    "allocation failed\n", ldcp->id);
2766 			exit_val = ENOMEM;
2767 			goto cleanup_on_exit;
2768 		}
2769 
2770 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2771 
2772 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2773 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2774 		    ldcp->rx_dq_entries);
2775 	} else {
2776 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2777 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2778 	}
2779 
2780 	/* Init descriptor ring and memory handle list lock */
2781 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2782 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2783 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2784 
2785 	/* mark status as INITialized */
2786 	ldcp->status = LDC_INIT;
2787 
2788 	/* Add to channel list */
2789 	mutex_enter(&ldcssp->lock);
2790 	ldcp->next = ldcssp->chan_list;
2791 	ldcssp->chan_list = ldcp;
2792 	ldcssp->channel_count++;
2793 	mutex_exit(&ldcssp->lock);
2794 
2795 	/* set the handle */
2796 	*handle = (ldc_handle_t)ldcp;
2797 
2798 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2799 
2800 	return (0);
2801 
2802 cleanup_on_exit:
2803 
2804 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2805 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2806 
2807 	if (ldcp->tstate & TS_TXQ_RDY)
2808 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2809 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2810 
2811 	if (ldcp->tstate & TS_RXQ_RDY)
2812 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2813 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2814 
2815 	mutex_destroy(&ldcp->tx_lock);
2816 	mutex_destroy(&ldcp->lock);
2817 
2818 	if (ldcp)
2819 		kmem_free(ldcp, sizeof (ldc_chan_t));
2820 
2821 	return (exit_val);
2822 }
2823 
2824 /*
2825  * Finalizes the LDC connection. It will return EBUSY if the
2826  * channel is open. A ldc_close() has to be done prior to
2827  * a ldc_fini operation. It frees TX/RX queues, associated
2828  * with the channel
2829  */
2830 int
2831 ldc_fini(ldc_handle_t handle)
2832 {
2833 	ldc_chan_t 	*ldcp;
2834 	ldc_chan_t 	*tmp_ldcp;
2835 	uint64_t 	id;
2836 
2837 	if (handle == NULL) {
2838 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2839 		return (EINVAL);
2840 	}
2841 	ldcp = (ldc_chan_t *)handle;
2842 	id = ldcp->id;
2843 
2844 	mutex_enter(&ldcp->lock);
2845 
2846 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2847 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2848 		    ldcp->id);
2849 		mutex_exit(&ldcp->lock);
2850 		return (EBUSY);
2851 	}
2852 
2853 	/* Remove from the channel list */
2854 	mutex_enter(&ldcssp->lock);
2855 	tmp_ldcp = ldcssp->chan_list;
2856 	if (tmp_ldcp == ldcp) {
2857 		ldcssp->chan_list = ldcp->next;
2858 		ldcp->next = NULL;
2859 	} else {
2860 		while (tmp_ldcp != NULL) {
2861 			if (tmp_ldcp->next == ldcp) {
2862 				tmp_ldcp->next = ldcp->next;
2863 				ldcp->next = NULL;
2864 				break;
2865 			}
2866 			tmp_ldcp = tmp_ldcp->next;
2867 		}
2868 		if (tmp_ldcp == NULL) {
2869 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2870 			mutex_exit(&ldcssp->lock);
2871 			mutex_exit(&ldcp->lock);
2872 			return (EINVAL);
2873 		}
2874 	}
2875 
2876 	ldcssp->channel_count--;
2877 
2878 	mutex_exit(&ldcssp->lock);
2879 
2880 	/* Free the map table for this channel */
2881 	if (ldcp->mtbl) {
2882 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2883 		if (ldcp->mtbl->contigmem)
2884 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2885 		else
2886 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2887 		mutex_destroy(&ldcp->mtbl->lock);
2888 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2889 	}
2890 
2891 	/* Destroy descriptor ring and memory handle list lock */
2892 	mutex_destroy(&ldcp->exp_dlist_lock);
2893 	mutex_destroy(&ldcp->imp_dlist_lock);
2894 	mutex_destroy(&ldcp->mlist_lock);
2895 
2896 	/* Free the stream buffer for RELIABLE_MODE */
2897 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2898 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2899 
2900 	/* Free the RX queue */
2901 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2902 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2903 	ldcp->tstate &= ~TS_RXQ_RDY;
2904 
2905 	/* Free the RX data queue */
2906 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2907 		kmem_free((caddr_t)ldcp->rx_dq_va,
2908 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2909 	}
2910 
2911 	/* Free the TX queue */
2912 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2913 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2914 	ldcp->tstate &= ~TS_TXQ_RDY;
2915 
2916 	mutex_exit(&ldcp->lock);
2917 
2918 	/* Destroy mutex */
2919 	mutex_destroy(&ldcp->tx_lock);
2920 	mutex_destroy(&ldcp->lock);
2921 
2922 	/* free channel structure */
2923 	kmem_free(ldcp, sizeof (ldc_chan_t));
2924 
2925 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2926 
2927 	return (0);
2928 }
2929 
2930 /*
2931  * Open the LDC channel for use. It registers the TX/RX queues
2932  * with the Hypervisor. It also specifies the interrupt number
2933  * and target CPU for this channel
2934  */
2935 int
2936 ldc_open(ldc_handle_t handle)
2937 {
2938 	ldc_chan_t 	*ldcp;
2939 	int 		rv;
2940 
2941 	if (handle == NULL) {
2942 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2943 		return (EINVAL);
2944 	}
2945 
2946 	ldcp = (ldc_chan_t *)handle;
2947 
2948 	mutex_enter(&ldcp->lock);
2949 
2950 	if (ldcp->tstate < TS_INIT) {
2951 		DWARN(ldcp->id,
2952 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2953 		mutex_exit(&ldcp->lock);
2954 		return (EFAULT);
2955 	}
2956 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2957 		DWARN(ldcp->id,
2958 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2959 		mutex_exit(&ldcp->lock);
2960 		return (EFAULT);
2961 	}
2962 
2963 	/*
2964 	 * Unregister/Register the tx queue with the hypervisor
2965 	 */
2966 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2967 	if (rv) {
2968 		cmn_err(CE_WARN,
2969 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2970 		    ldcp->id);
2971 		mutex_exit(&ldcp->lock);
2972 		return (EIO);
2973 	}
2974 
2975 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2976 	if (rv) {
2977 		cmn_err(CE_WARN,
2978 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2979 		    ldcp->id);
2980 		mutex_exit(&ldcp->lock);
2981 		return (EIO);
2982 	}
2983 
2984 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2985 	    ldcp->id);
2986 
2987 	/*
2988 	 * Unregister/Register the rx queue with the hypervisor
2989 	 */
2990 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2991 	if (rv) {
2992 		cmn_err(CE_WARN,
2993 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2994 		    ldcp->id);
2995 		mutex_exit(&ldcp->lock);
2996 		return (EIO);
2997 	}
2998 
2999 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
3000 	if (rv) {
3001 		cmn_err(CE_WARN,
3002 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
3003 		    ldcp->id);
3004 		mutex_exit(&ldcp->lock);
3005 		return (EIO);
3006 	}
3007 
3008 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
3009 	    ldcp->id);
3010 
3011 	ldcp->tstate |= TS_QCONF_RDY;
3012 
3013 	/* Register the channel with the channel nexus */
3014 	rv = i_ldc_register_channel(ldcp);
3015 	if (rv && rv != EAGAIN) {
3016 		cmn_err(CE_WARN,
3017 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
3018 		ldcp->tstate &= ~TS_QCONF_RDY;
3019 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3020 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3021 		mutex_exit(&ldcp->lock);
3022 		return (EIO);
3023 	}
3024 
3025 	/* mark channel in OPEN state */
3026 	ldcp->status = LDC_OPEN;
3027 
3028 	/* Read channel state */
3029 	rv = hv_ldc_tx_get_state(ldcp->id,
3030 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3031 	if (rv) {
3032 		cmn_err(CE_WARN,
3033 		    "ldc_open: (0x%lx) cannot read channel state\n",
3034 		    ldcp->id);
3035 		(void) i_ldc_unregister_channel(ldcp);
3036 		ldcp->tstate &= ~TS_QCONF_RDY;
3037 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3038 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3039 		mutex_exit(&ldcp->lock);
3040 		return (EIO);
3041 	}
3042 
3043 	/*
3044 	 * set the ACKd head to current head location for reliable
3045 	 */
3046 	ldcp->tx_ackd_head = ldcp->tx_head;
3047 
3048 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
3049 	if (ldcp->link_state == LDC_CHANNEL_UP ||
3050 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3051 		ldcp->tstate |= TS_LINK_READY;
3052 		ldcp->status = LDC_READY;
3053 	}
3054 
3055 	/*
3056 	 * if channel is being opened in RAW mode - no handshake is needed
3057 	 * switch the channel READY and UP state
3058 	 */
3059 	if (ldcp->mode == LDC_MODE_RAW) {
3060 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
3061 		ldcp->status = LDC_UP;
3062 	}
3063 
3064 	mutex_exit(&ldcp->lock);
3065 
3066 	/*
3067 	 * Increment number of open channels
3068 	 */
3069 	mutex_enter(&ldcssp->lock);
3070 	ldcssp->channels_open++;
3071 	mutex_exit(&ldcssp->lock);
3072 
3073 	D1(ldcp->id,
3074 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3075 	    "(tstate=0x%x, status=0x%x)\n",
3076 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3077 
3078 	return (0);
3079 }
3080 
3081 /*
3082  * Close the LDC connection. It will return EBUSY if there
3083  * are memory segments or descriptor rings either bound to or
3084  * mapped over the channel
3085  */
3086 int
3087 ldc_close(ldc_handle_t handle)
3088 {
3089 	ldc_chan_t 	*ldcp;
3090 	int		rv = 0, retries = 0;
3091 	boolean_t	chk_done = B_FALSE;
3092 
3093 	if (handle == NULL) {
3094 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3095 		return (EINVAL);
3096 	}
3097 	ldcp = (ldc_chan_t *)handle;
3098 
3099 	mutex_enter(&ldcp->lock);
3100 
3101 	/* return error if channel is not open */
3102 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3103 		DWARN(ldcp->id,
3104 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3105 		mutex_exit(&ldcp->lock);
3106 		return (EFAULT);
3107 	}
3108 
3109 	/* if any memory handles, drings, are bound or mapped cannot close */
3110 	if (ldcp->mhdl_list != NULL) {
3111 		DWARN(ldcp->id,
3112 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3113 		    ldcp->id);
3114 		mutex_exit(&ldcp->lock);
3115 		return (EBUSY);
3116 	}
3117 	if (ldcp->exp_dring_list != NULL) {
3118 		DWARN(ldcp->id,
3119 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3120 		    ldcp->id);
3121 		mutex_exit(&ldcp->lock);
3122 		return (EBUSY);
3123 	}
3124 	if (ldcp->imp_dring_list != NULL) {
3125 		DWARN(ldcp->id,
3126 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3127 		    ldcp->id);
3128 		mutex_exit(&ldcp->lock);
3129 		return (EBUSY);
3130 	}
3131 
3132 	if (ldcp->cb_inprogress) {
3133 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3134 		    ldcp->id);
3135 		mutex_exit(&ldcp->lock);
3136 		return (EWOULDBLOCK);
3137 	}
3138 
3139 	/* Obtain Tx lock */
3140 	mutex_enter(&ldcp->tx_lock);
3141 
3142 	/*
3143 	 * Wait for pending transmits to complete i.e Tx queue to drain
3144 	 * if there are pending pkts - wait 1 ms and retry again
3145 	 */
3146 	for (;;) {
3147 
3148 		rv = hv_ldc_tx_get_state(ldcp->id,
3149 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3150 		if (rv) {
3151 			cmn_err(CE_WARN,
3152 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3153 			mutex_exit(&ldcp->tx_lock);
3154 			mutex_exit(&ldcp->lock);
3155 			return (EIO);
3156 		}
3157 
3158 		if (ldcp->tx_head == ldcp->tx_tail ||
3159 		    ldcp->link_state != LDC_CHANNEL_UP) {
3160 			break;
3161 		}
3162 
3163 		if (chk_done) {
3164 			DWARN(ldcp->id,
3165 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3166 			    ldcp->id);
3167 			break;
3168 		}
3169 
3170 		/* wait for one ms and try again */
3171 		delay(drv_usectohz(1000));
3172 		chk_done = B_TRUE;
3173 	}
3174 
3175 	/*
3176 	 * Drain the Tx and Rx queues as we are closing the
3177 	 * channel. We dont care about any pending packets.
3178 	 * We have to also drain the queue prior to clearing
3179 	 * pending interrupts, otherwise the HV will trigger
3180 	 * an interrupt the moment the interrupt state is
3181 	 * cleared.
3182 	 */
3183 	(void) i_ldc_txq_reconf(ldcp);
3184 	i_ldc_rxq_drain(ldcp);
3185 
3186 	/*
3187 	 * Unregister the channel with the nexus
3188 	 */
3189 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3190 
3191 		mutex_exit(&ldcp->tx_lock);
3192 		mutex_exit(&ldcp->lock);
3193 
3194 		/* if any error other than EAGAIN return back */
3195 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3196 			cmn_err(CE_WARN,
3197 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3198 			    ldcp->id, rv);
3199 			return (rv);
3200 		}
3201 
3202 		/*
3203 		 * As there could be pending interrupts we need
3204 		 * to wait and try again
3205 		 */
3206 		drv_usecwait(ldc_close_delay);
3207 		mutex_enter(&ldcp->lock);
3208 		mutex_enter(&ldcp->tx_lock);
3209 		retries++;
3210 	}
3211 
3212 	ldcp->tstate &= ~TS_QCONF_RDY;
3213 
3214 	/*
3215 	 * Unregister queues
3216 	 */
3217 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3218 	if (rv) {
3219 		cmn_err(CE_WARN,
3220 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3221 		    ldcp->id);
3222 		mutex_exit(&ldcp->tx_lock);
3223 		mutex_exit(&ldcp->lock);
3224 		return (EIO);
3225 	}
3226 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3227 	if (rv) {
3228 		cmn_err(CE_WARN,
3229 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3230 		    ldcp->id);
3231 		mutex_exit(&ldcp->tx_lock);
3232 		mutex_exit(&ldcp->lock);
3233 		return (EIO);
3234 	}
3235 
3236 	/* Reset channel state information */
3237 	i_ldc_reset_state(ldcp);
3238 
3239 	/* Mark channel as down and in initialized state */
3240 	ldcp->tx_ackd_head = 0;
3241 	ldcp->tx_head = 0;
3242 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3243 	ldcp->status = LDC_INIT;
3244 
3245 	mutex_exit(&ldcp->tx_lock);
3246 	mutex_exit(&ldcp->lock);
3247 
3248 	/* Decrement number of open channels */
3249 	mutex_enter(&ldcssp->lock);
3250 	ldcssp->channels_open--;
3251 	mutex_exit(&ldcssp->lock);
3252 
3253 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3254 
3255 	return (0);
3256 }
3257 
3258 /*
3259  * Register channel callback
3260  */
3261 int
3262 ldc_reg_callback(ldc_handle_t handle,
3263     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3264 {
3265 	ldc_chan_t *ldcp;
3266 
3267 	if (handle == NULL) {
3268 		DWARN(DBG_ALL_LDCS,
3269 		    "ldc_reg_callback: invalid channel handle\n");
3270 		return (EINVAL);
3271 	}
3272 	if (((uint64_t)cb) < KERNELBASE) {
3273 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3274 		return (EINVAL);
3275 	}
3276 	ldcp = (ldc_chan_t *)handle;
3277 
3278 	mutex_enter(&ldcp->lock);
3279 
3280 	if (ldcp->cb) {
3281 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3282 		    ldcp->id);
3283 		mutex_exit(&ldcp->lock);
3284 		return (EIO);
3285 	}
3286 	if (ldcp->cb_inprogress) {
3287 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3288 		    ldcp->id);
3289 		mutex_exit(&ldcp->lock);
3290 		return (EWOULDBLOCK);
3291 	}
3292 
3293 	ldcp->cb = cb;
3294 	ldcp->cb_arg = arg;
3295 	ldcp->cb_enabled = B_TRUE;
3296 
3297 	D1(ldcp->id,
3298 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3299 	    ldcp->id);
3300 
3301 	mutex_exit(&ldcp->lock);
3302 
3303 	return (0);
3304 }
3305 
3306 /*
3307  * Unregister channel callback
3308  */
3309 int
3310 ldc_unreg_callback(ldc_handle_t handle)
3311 {
3312 	ldc_chan_t *ldcp;
3313 
3314 	if (handle == NULL) {
3315 		DWARN(DBG_ALL_LDCS,
3316 		    "ldc_unreg_callback: invalid channel handle\n");
3317 		return (EINVAL);
3318 	}
3319 	ldcp = (ldc_chan_t *)handle;
3320 
3321 	mutex_enter(&ldcp->lock);
3322 
3323 	if (ldcp->cb == NULL) {
3324 		DWARN(ldcp->id,
3325 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3326 		    ldcp->id);
3327 		mutex_exit(&ldcp->lock);
3328 		return (EIO);
3329 	}
3330 	if (ldcp->cb_inprogress) {
3331 		DWARN(ldcp->id,
3332 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3333 		    ldcp->id);
3334 		mutex_exit(&ldcp->lock);
3335 		return (EWOULDBLOCK);
3336 	}
3337 
3338 	ldcp->cb = NULL;
3339 	ldcp->cb_arg = NULL;
3340 	ldcp->cb_enabled = B_FALSE;
3341 
3342 	D1(ldcp->id,
3343 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3344 	    ldcp->id);
3345 
3346 	mutex_exit(&ldcp->lock);
3347 
3348 	return (0);
3349 }
3350 
3351 
3352 /*
3353  * Bring a channel up by initiating a handshake with the peer
3354  * This call is asynchronous. It will complete at a later point
3355  * in time when the peer responds back with an RTR.
3356  */
3357 int
3358 ldc_up(ldc_handle_t handle)
3359 {
3360 	int 		rv;
3361 	ldc_chan_t 	*ldcp;
3362 	ldc_msg_t 	*ldcmsg;
3363 	uint64_t 	tx_tail, tstate, link_state;
3364 
3365 	if (handle == NULL) {
3366 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3367 		return (EINVAL);
3368 	}
3369 	ldcp = (ldc_chan_t *)handle;
3370 
3371 	mutex_enter(&ldcp->lock);
3372 
3373 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3374 
3375 	/* clear the reset state */
3376 	tstate = ldcp->tstate;
3377 	ldcp->tstate &= ~TS_IN_RESET;
3378 
3379 	if (ldcp->tstate == TS_UP) {
3380 		DWARN(ldcp->id,
3381 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3382 		    ldcp->id);
3383 
3384 		/* mark channel as up */
3385 		ldcp->status = LDC_UP;
3386 
3387 		/*
3388 		 * if channel was in reset state and there was
3389 		 * pending data clear interrupt state. this will
3390 		 * trigger an interrupt, causing the RX handler to
3391 		 * to invoke the client's callback
3392 		 */
3393 		if ((tstate & TS_IN_RESET) &&
3394 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3395 			D1(ldcp->id,
3396 			    "ldc_up: (0x%llx) channel has pending data, "
3397 			    "clearing interrupt\n", ldcp->id);
3398 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3399 		}
3400 
3401 		mutex_exit(&ldcp->lock);
3402 		return (0);
3403 	}
3404 
3405 	/* if the channel is in RAW mode - mark it as UP, if READY */
3406 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3407 		ldcp->tstate = TS_UP;
3408 		mutex_exit(&ldcp->lock);
3409 		return (0);
3410 	}
3411 
3412 	/* Don't start another handshake if there is one in progress */
3413 	if (ldcp->hstate) {
3414 		D1(ldcp->id,
3415 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3416 		    ldcp->id);
3417 		mutex_exit(&ldcp->lock);
3418 		return (0);
3419 	}
3420 
3421 	mutex_enter(&ldcp->tx_lock);
3422 
3423 	/* save current link state */
3424 	link_state = ldcp->link_state;
3425 
3426 	/* get the current tail for the LDC msg */
3427 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3428 	if (rv) {
3429 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3430 		    ldcp->id);
3431 		mutex_exit(&ldcp->tx_lock);
3432 		mutex_exit(&ldcp->lock);
3433 		return (ECONNREFUSED);
3434 	}
3435 
3436 	/*
3437 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3438 	 * from a previous state of DOWN, then mark the channel as
3439 	 * being ready for handshake.
3440 	 */
3441 	if ((link_state == LDC_CHANNEL_DOWN) &&
3442 	    (link_state != ldcp->link_state)) {
3443 
3444 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3445 		    (ldcp->link_state == LDC_CHANNEL_UP));
3446 
3447 		if (ldcp->mode == LDC_MODE_RAW) {
3448 			ldcp->status = LDC_UP;
3449 			ldcp->tstate = TS_UP;
3450 			mutex_exit(&ldcp->tx_lock);
3451 			mutex_exit(&ldcp->lock);
3452 			return (0);
3453 		} else {
3454 			ldcp->status = LDC_READY;
3455 			ldcp->tstate |= TS_LINK_READY;
3456 		}
3457 
3458 	}
3459 
3460 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3461 	ZERO_PKT(ldcmsg);
3462 
3463 	ldcmsg->type = LDC_CTRL;
3464 	ldcmsg->stype = LDC_INFO;
3465 	ldcmsg->ctrl = LDC_VER;
3466 	ldcp->next_vidx = 0;
3467 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3468 
3469 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3470 
3471 	/* initiate the send by calling into HV and set the new tail */
3472 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3473 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3474 
3475 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3476 	if (rv) {
3477 		DWARN(ldcp->id,
3478 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3479 		    ldcp->id, rv);
3480 		mutex_exit(&ldcp->tx_lock);
3481 		mutex_exit(&ldcp->lock);
3482 		return (rv);
3483 	}
3484 
3485 	ldcp->hstate |= TS_SENT_VER;
3486 	ldcp->tx_tail = tx_tail;
3487 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3488 
3489 	mutex_exit(&ldcp->tx_lock);
3490 	mutex_exit(&ldcp->lock);
3491 
3492 	return (rv);
3493 }
3494 
3495 
3496 /*
3497  * Bring a channel down by resetting its state and queues
3498  */
3499 int
3500 ldc_down(ldc_handle_t handle)
3501 {
3502 	ldc_chan_t 	*ldcp;
3503 
3504 	if (handle == NULL) {
3505 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3506 		return (EINVAL);
3507 	}
3508 	ldcp = (ldc_chan_t *)handle;
3509 	mutex_enter(&ldcp->lock);
3510 	mutex_enter(&ldcp->tx_lock);
3511 	i_ldc_reset(ldcp, B_TRUE);
3512 	mutex_exit(&ldcp->tx_lock);
3513 	mutex_exit(&ldcp->lock);
3514 
3515 	return (0);
3516 }
3517 
3518 /*
3519  * Get the current channel status
3520  */
3521 int
3522 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3523 {
3524 	ldc_chan_t *ldcp;
3525 
3526 	if (handle == NULL || status == NULL) {
3527 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3528 		return (EINVAL);
3529 	}
3530 	ldcp = (ldc_chan_t *)handle;
3531 
3532 	*status = ((ldc_chan_t *)handle)->status;
3533 
3534 	D1(ldcp->id,
3535 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3536 	return (0);
3537 }
3538 
3539 
3540 /*
3541  * Set the channel's callback mode - enable/disable callbacks
3542  */
3543 int
3544 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3545 {
3546 	ldc_chan_t 	*ldcp;
3547 
3548 	if (handle == NULL) {
3549 		DWARN(DBG_ALL_LDCS,
3550 		    "ldc_set_intr_mode: invalid channel handle\n");
3551 		return (EINVAL);
3552 	}
3553 	ldcp = (ldc_chan_t *)handle;
3554 
3555 	/*
3556 	 * Record no callbacks should be invoked
3557 	 */
3558 	mutex_enter(&ldcp->lock);
3559 
3560 	switch (cmode) {
3561 	case LDC_CB_DISABLE:
3562 		if (!ldcp->cb_enabled) {
3563 			DWARN(ldcp->id,
3564 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3565 			    ldcp->id);
3566 			break;
3567 		}
3568 		ldcp->cb_enabled = B_FALSE;
3569 
3570 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3571 		    ldcp->id);
3572 		break;
3573 
3574 	case LDC_CB_ENABLE:
3575 		if (ldcp->cb_enabled) {
3576 			DWARN(ldcp->id,
3577 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3578 			    ldcp->id);
3579 			break;
3580 		}
3581 		ldcp->cb_enabled = B_TRUE;
3582 
3583 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3584 		    ldcp->id);
3585 		break;
3586 	}
3587 
3588 	mutex_exit(&ldcp->lock);
3589 
3590 	return (0);
3591 }
3592 
3593 /*
3594  * Check to see if there are packets on the incoming queue
3595  * Will return hasdata = B_FALSE if there are no packets
3596  */
3597 int
3598 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3599 {
3600 	int 		rv;
3601 	uint64_t 	rx_head, rx_tail;
3602 	ldc_chan_t 	*ldcp;
3603 
3604 	if (handle == NULL) {
3605 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3606 		return (EINVAL);
3607 	}
3608 	ldcp = (ldc_chan_t *)handle;
3609 
3610 	*hasdata = B_FALSE;
3611 
3612 	mutex_enter(&ldcp->lock);
3613 
3614 	if (ldcp->tstate != TS_UP) {
3615 		D1(ldcp->id,
3616 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3617 		mutex_exit(&ldcp->lock);
3618 		return (ECONNRESET);
3619 	}
3620 
3621 	/* Read packet(s) from the queue */
3622 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3623 	    &ldcp->link_state);
3624 	if (rv != 0) {
3625 		cmn_err(CE_WARN,
3626 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3627 		mutex_exit(&ldcp->lock);
3628 		return (EIO);
3629 	}
3630 
3631 	/* reset the channel state if the channel went down */
3632 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3633 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3634 		mutex_enter(&ldcp->tx_lock);
3635 		i_ldc_reset(ldcp, B_FALSE);
3636 		mutex_exit(&ldcp->tx_lock);
3637 		mutex_exit(&ldcp->lock);
3638 		return (ECONNRESET);
3639 	}
3640 
3641 	switch (ldcp->mode) {
3642 	case LDC_MODE_RAW:
3643 		/*
3644 		 * In raw mode, there are no ctrl packets, so checking
3645 		 * if the queue is non-empty is sufficient.
3646 		 */
3647 		*hasdata = (rx_head != rx_tail);
3648 		break;
3649 
3650 	case LDC_MODE_UNRELIABLE:
3651 		/*
3652 		 * In unreliable mode, if the queue is non-empty, we need
3653 		 * to check if it actually contains unread data packets.
3654 		 * The queue may just contain ctrl packets.
3655 		 */
3656 		if (rx_head != rx_tail) {
3657 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3658 			/*
3659 			 * If no data packets were found on the queue,
3660 			 * all packets must have been control packets
3661 			 * which will now have been processed, leaving
3662 			 * the queue empty. If the interrupt state
3663 			 * is pending, we need to clear the interrupt
3664 			 * here.
3665 			 */
3666 			if (*hasdata == B_FALSE &&
3667 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3668 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3669 			}
3670 		}
3671 		break;
3672 
3673 	case LDC_MODE_RELIABLE:
3674 		/*
3675 		 * In reliable mode, first check for 'stream_remains' > 0.
3676 		 * Otherwise, if the data queue head and tail pointers
3677 		 * differ, there must be data to read.
3678 		 */
3679 		if (ldcp->stream_remains > 0)
3680 			*hasdata = B_TRUE;
3681 		else
3682 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3683 		break;
3684 
3685 	default:
3686 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3687 		    "(0x%x)", ldcp->id, ldcp->mode);
3688 		mutex_exit(&ldcp->lock);
3689 		return (EIO);
3690 	}
3691 
3692 	mutex_exit(&ldcp->lock);
3693 
3694 	return (0);
3695 }
3696 
3697 
3698 /*
3699  * Read 'size' amount of bytes or less. If incoming buffer
3700  * is more than 'size', ENOBUFS is returned.
3701  *
3702  * On return, size contains the number of bytes read.
3703  */
3704 int
3705 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3706 {
3707 	ldc_chan_t 	*ldcp;
3708 	uint64_t 	rx_head = 0, rx_tail = 0;
3709 	int		rv = 0, exit_val;
3710 
3711 	if (handle == NULL) {
3712 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3713 		return (EINVAL);
3714 	}
3715 
3716 	ldcp = (ldc_chan_t *)handle;
3717 
3718 	/* channel lock */
3719 	mutex_enter(&ldcp->lock);
3720 
3721 	if (ldcp->tstate != TS_UP) {
3722 		DWARN(ldcp->id,
3723 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3724 		    ldcp->id);
3725 		exit_val = ECONNRESET;
3726 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3727 		TRACE_RXDQ_LENGTH(ldcp);
3728 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3729 
3730 		/*
3731 		 * For reliable mode channels, the interrupt
3732 		 * state is only set to pending during
3733 		 * interrupt handling when the secondary data
3734 		 * queue became full, leaving unprocessed
3735 		 * packets on the Rx queue. If the interrupt
3736 		 * state is pending and space is now available
3737 		 * on the data queue, clear the interrupt.
3738 		 */
3739 		if (ldcp->rx_intr_state == LDC_INTR_PEND &&
3740 		    Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
3741 		    ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >=
3742 		    LDC_PACKET_SIZE) {
3743 			/* data queue is not full */
3744 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3745 		}
3746 
3747 		mutex_exit(&ldcp->lock);
3748 		return (exit_val);
3749 	} else {
3750 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3751 	}
3752 
3753 	/*
3754 	 * if queue has been drained - clear interrupt
3755 	 */
3756 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3757 	    &ldcp->link_state);
3758 	if (rv != 0) {
3759 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3760 		    ldcp->id);
3761 		mutex_enter(&ldcp->tx_lock);
3762 		i_ldc_reset(ldcp, B_TRUE);
3763 		mutex_exit(&ldcp->tx_lock);
3764 		mutex_exit(&ldcp->lock);
3765 		return (ECONNRESET);
3766 	}
3767 
3768 	if (exit_val == 0) {
3769 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3770 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3771 			mutex_enter(&ldcp->tx_lock);
3772 			i_ldc_reset(ldcp, B_FALSE);
3773 			exit_val = ECONNRESET;
3774 			mutex_exit(&ldcp->tx_lock);
3775 		}
3776 		if ((rv == 0) &&
3777 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3778 		    (rx_head == rx_tail)) {
3779 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3780 		}
3781 	}
3782 
3783 	mutex_exit(&ldcp->lock);
3784 	return (exit_val);
3785 }
3786 
3787 /*
3788  * Basic raw mondo read -
3789  * no interpretation of mondo contents at all.
3790  *
3791  * Enter and exit with ldcp->lock held by caller
3792  */
3793 static int
3794 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3795 {
3796 	uint64_t 	q_size_mask;
3797 	ldc_msg_t 	*msgp;
3798 	uint8_t		*msgbufp;
3799 	int		rv = 0, space;
3800 	uint64_t 	rx_head, rx_tail;
3801 
3802 	space = *sizep;
3803 
3804 	if (space < LDC_PAYLOAD_SIZE_RAW)
3805 		return (ENOBUFS);
3806 
3807 	ASSERT(mutex_owned(&ldcp->lock));
3808 
3809 	/* compute mask for increment */
3810 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3811 
3812 	/*
3813 	 * Read packet(s) from the queue
3814 	 */
3815 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3816 	    &ldcp->link_state);
3817 	if (rv != 0) {
3818 		cmn_err(CE_WARN,
3819 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3820 		    ldcp->id);
3821 		return (EIO);
3822 	}
3823 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3824 	    " rxt=0x%llx, st=0x%llx\n",
3825 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3826 
3827 	/* reset the channel state if the channel went down */
3828 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3829 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3830 		mutex_enter(&ldcp->tx_lock);
3831 		i_ldc_reset(ldcp, B_FALSE);
3832 		mutex_exit(&ldcp->tx_lock);
3833 		return (ECONNRESET);
3834 	}
3835 
3836 	/*
3837 	 * Check for empty queue
3838 	 */
3839 	if (rx_head == rx_tail) {
3840 		*sizep = 0;
3841 		return (0);
3842 	}
3843 
3844 	/* get the message */
3845 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3846 
3847 	/* if channel is in RAW mode, copy data and return */
3848 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3849 
3850 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3851 
3852 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3853 
3854 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3855 
3856 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3857 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3858 
3859 	return (rv);
3860 }
3861 
3862 /*
3863  * Process LDC mondos to build larger packets
3864  * with either un-reliable or reliable delivery.
3865  *
3866  * Enter and exit with ldcp->lock held by caller
3867  */
3868 static int
3869 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3870 {
3871 	int		rv = 0;
3872 	uint64_t 	rx_head = 0, rx_tail = 0;
3873 	uint64_t 	curr_head = 0;
3874 	ldc_msg_t 	*msg;
3875 	caddr_t 	target;
3876 	size_t 		len = 0, bytes_read = 0;
3877 	int 		retries = 0;
3878 	uint64_t 	q_va, q_size_mask;
3879 	uint64_t	first_fragment = 0;
3880 
3881 	target = target_bufp;
3882 
3883 	ASSERT(mutex_owned(&ldcp->lock));
3884 
3885 	/* check if the buffer and size are valid */
3886 	if (target_bufp == NULL || *sizep == 0) {
3887 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3888 		    ldcp->id);
3889 		return (EINVAL);
3890 	}
3891 
3892 	/* Set q_va and compute increment mask for the appropriate queue */
3893 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3894 		q_va	    = ldcp->rx_dq_va;
3895 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3896 	} else {
3897 		q_va	    = ldcp->rx_q_va;
3898 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3899 	}
3900 
3901 	/*
3902 	 * Read packet(s) from the queue
3903 	 */
3904 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3905 	    &ldcp->link_state);
3906 	if (rv != 0) {
3907 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3908 		    ldcp->id);
3909 		mutex_enter(&ldcp->tx_lock);
3910 		i_ldc_reset(ldcp, B_TRUE);
3911 		mutex_exit(&ldcp->tx_lock);
3912 		return (ECONNRESET);
3913 	}
3914 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3915 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3916 
3917 	/* reset the channel state if the channel went down */
3918 	if (ldcp->link_state != LDC_CHANNEL_UP)
3919 		goto channel_is_reset;
3920 
3921 	for (;;) {
3922 
3923 		if (curr_head == rx_tail) {
3924 			/*
3925 			 * If a data queue is being used, check the Rx HV
3926 			 * queue. This will copy over any new data packets
3927 			 * that have arrived.
3928 			 */
3929 			if (ldcp->mode == LDC_MODE_RELIABLE)
3930 				(void) i_ldc_chkq(ldcp);
3931 
3932 			rv = ldcp->readq_get_state(ldcp,
3933 			    &rx_head, &rx_tail, &ldcp->link_state);
3934 			if (rv != 0) {
3935 				cmn_err(CE_WARN,
3936 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3937 				    ldcp->id);
3938 				mutex_enter(&ldcp->tx_lock);
3939 				i_ldc_reset(ldcp, B_TRUE);
3940 				mutex_exit(&ldcp->tx_lock);
3941 				return (ECONNRESET);
3942 			}
3943 
3944 			if (ldcp->link_state != LDC_CHANNEL_UP)
3945 				goto channel_is_reset;
3946 
3947 			if (curr_head == rx_tail) {
3948 
3949 				/* If in the middle of a fragmented xfer */
3950 				if (first_fragment != 0) {
3951 
3952 					/* wait for ldc_delay usecs */
3953 					drv_usecwait(ldc_delay);
3954 
3955 					if (++retries < ldc_max_retries)
3956 						continue;
3957 
3958 					*sizep = 0;
3959 					if (ldcp->mode != LDC_MODE_RELIABLE)
3960 						ldcp->last_msg_rcd =
3961 						    first_fragment - 1;
3962 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3963 					    "(0x%llx) read timeout", ldcp->id);
3964 					return (EAGAIN);
3965 				}
3966 				*sizep = 0;
3967 				break;
3968 			}
3969 		}
3970 		retries = 0;
3971 
3972 		D2(ldcp->id,
3973 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3974 		    ldcp->id, curr_head, rx_head, rx_tail);
3975 
3976 		/* get the message */
3977 		msg = (ldc_msg_t *)(q_va + curr_head);
3978 
3979 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3980 		    ldcp->rx_q_va + curr_head);
3981 
3982 		/* Check the message ID for the message received */
3983 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3984 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3985 
3986 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3987 				    "error, q_ptrs=0x%lx,0x%lx",
3988 				    ldcp->id, rx_head, rx_tail);
3989 
3990 				/* throw away data */
3991 				bytes_read = 0;
3992 
3993 				/* Reset last_msg_rcd to start of message */
3994 				if (first_fragment != 0) {
3995 					ldcp->last_msg_rcd = first_fragment - 1;
3996 					first_fragment = 0;
3997 				}
3998 				/*
3999 				 * Send a NACK -- invalid seqid
4000 				 * get the current tail for the response
4001 				 */
4002 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
4003 				    (msg->ctrl & LDC_CTRL_MASK));
4004 				if (rv) {
4005 					cmn_err(CE_NOTE,
4006 					    "ldc_read: (0x%lx) err sending "
4007 					    "NACK msg\n", ldcp->id);
4008 
4009 					/* if cannot send NACK - reset chan */
4010 					mutex_enter(&ldcp->tx_lock);
4011 					i_ldc_reset(ldcp, B_FALSE);
4012 					mutex_exit(&ldcp->tx_lock);
4013 					rv = ECONNRESET;
4014 					break;
4015 				}
4016 
4017 				/* purge receive queue */
4018 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
4019 
4020 				break;
4021 			}
4022 
4023 			/*
4024 			 * Process any messages of type CTRL messages
4025 			 * Future implementations should try to pass these
4026 			 * to LDC link by resetting the intr state.
4027 			 *
4028 			 * NOTE: not done as a switch() as type can be
4029 			 * both ctrl+data
4030 			 */
4031 			if (msg->type & LDC_CTRL) {
4032 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
4033 					if (rv == EAGAIN)
4034 						continue;
4035 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
4036 					*sizep = 0;
4037 					bytes_read = 0;
4038 					break;
4039 				}
4040 			}
4041 
4042 			/* process data ACKs */
4043 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
4044 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
4045 					*sizep = 0;
4046 					bytes_read = 0;
4047 					break;
4048 				}
4049 			}
4050 
4051 			/* process data NACKs */
4052 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
4053 				DWARN(ldcp->id,
4054 				    "ldc_read: (0x%llx) received DATA/NACK",
4055 				    ldcp->id);
4056 				mutex_enter(&ldcp->tx_lock);
4057 				i_ldc_reset(ldcp, B_TRUE);
4058 				mutex_exit(&ldcp->tx_lock);
4059 				return (ECONNRESET);
4060 			}
4061 		}
4062 
4063 		/* process data messages */
4064 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
4065 
4066 			uint8_t *msgbuf = (uint8_t *)(
4067 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
4068 			    msg->rdata : msg->udata);
4069 
4070 			D2(ldcp->id,
4071 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
4072 
4073 			/* get the packet length */
4074 			len = (msg->env & LDC_LEN_MASK);
4075 
4076 				/*
4077 				 * FUTURE OPTIMIZATION:
4078 				 * dont need to set q head for every
4079 				 * packet we read just need to do this when
4080 				 * we are done or need to wait for more
4081 				 * mondos to make a full packet - this is
4082 				 * currently expensive.
4083 				 */
4084 
4085 			if (first_fragment == 0) {
4086 
4087 				/*
4088 				 * first packets should always have the start
4089 				 * bit set (even for a single packet). If not
4090 				 * throw away the packet
4091 				 */
4092 				if (!(msg->env & LDC_FRAG_START)) {
4093 
4094 					DWARN(DBG_ALL_LDCS,
4095 					    "ldc_read: (0x%llx) not start - "
4096 					    "frag=%x\n", ldcp->id,
4097 					    (msg->env) & LDC_FRAG_MASK);
4098 
4099 					/* toss pkt, inc head, cont reading */
4100 					bytes_read = 0;
4101 					target = target_bufp;
4102 					curr_head =
4103 					    (curr_head + LDC_PACKET_SIZE)
4104 					    & q_size_mask;
4105 					if (rv = ldcp->readq_set_head(ldcp,
4106 					    curr_head))
4107 						break;
4108 
4109 					continue;
4110 				}
4111 
4112 				first_fragment = msg->seqid;
4113 			} else {
4114 				/* check to see if this is a pkt w/ START bit */
4115 				if (msg->env & LDC_FRAG_START) {
4116 					DWARN(DBG_ALL_LDCS,
4117 					    "ldc_read:(0x%llx) unexpected pkt"
4118 					    " env=0x%x discarding %d bytes,"
4119 					    " lastmsg=%d, currentmsg=%d\n",
4120 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4121 					    bytes_read, ldcp->last_msg_rcd,
4122 					    msg->seqid);
4123 
4124 					/* throw data we have read so far */
4125 					bytes_read = 0;
4126 					target = target_bufp;
4127 					first_fragment = msg->seqid;
4128 
4129 					if (rv = ldcp->readq_set_head(ldcp,
4130 					    curr_head))
4131 						break;
4132 				}
4133 			}
4134 
4135 			/* copy (next) pkt into buffer */
4136 			if (len <= (*sizep - bytes_read)) {
4137 				bcopy(msgbuf, target, len);
4138 				target += len;
4139 				bytes_read += len;
4140 			} else {
4141 				/*
4142 				 * there is not enough space in the buffer to
4143 				 * read this pkt. throw message away & continue
4144 				 * reading data from queue
4145 				 */
4146 				DWARN(DBG_ALL_LDCS,
4147 				    "ldc_read: (0x%llx) buffer too small, "
4148 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4149 				    curr_head, *sizep, bytes_read+len);
4150 
4151 				first_fragment = 0;
4152 				target = target_bufp;
4153 				bytes_read = 0;
4154 
4155 				/* throw away everything received so far */
4156 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4157 					break;
4158 
4159 				/* continue reading remaining pkts */
4160 				continue;
4161 			}
4162 		}
4163 
4164 		/* set the message id */
4165 		if (ldcp->mode != LDC_MODE_RELIABLE)
4166 			ldcp->last_msg_rcd = msg->seqid;
4167 
4168 		/* move the head one position */
4169 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4170 
4171 		if (msg->env & LDC_FRAG_STOP) {
4172 
4173 			/*
4174 			 * All pkts that are part of this fragmented transfer
4175 			 * have been read or this was a single pkt read
4176 			 * or there was an error
4177 			 */
4178 
4179 			/* set the queue head */
4180 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4181 				bytes_read = 0;
4182 
4183 			*sizep = bytes_read;
4184 
4185 			break;
4186 		}
4187 
4188 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4189 		if ((msg->type & LDC_CTRL) ||
4190 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4191 
4192 			/* set the queue head */
4193 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4194 				bytes_read = 0;
4195 				break;
4196 			}
4197 
4198 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4199 			    ldcp->id, curr_head);
4200 		}
4201 
4202 	} /* for (;;) */
4203 
4204 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4205 
4206 	return (rv);
4207 
4208 channel_is_reset:
4209 	mutex_enter(&ldcp->tx_lock);
4210 	i_ldc_reset(ldcp, B_FALSE);
4211 	mutex_exit(&ldcp->tx_lock);
4212 	return (ECONNRESET);
4213 }
4214 
4215 /*
4216  * Fetch and buffer incoming packets so we can hand them back as
4217  * a basic byte stream.
4218  *
4219  * Enter and exit with ldcp->lock held by caller
4220  */
4221 static int
4222 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4223 {
4224 	int	rv;
4225 	size_t	size;
4226 
4227 	ASSERT(mutex_owned(&ldcp->lock));
4228 
4229 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4230 	    ldcp->id, *sizep);
4231 
4232 	if (ldcp->stream_remains == 0) {
4233 		size = ldcp->mtu;
4234 		rv = i_ldc_read_packet(ldcp,
4235 		    (caddr_t)ldcp->stream_bufferp, &size);
4236 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4237 		    ldcp->id, size);
4238 
4239 		if (rv != 0)
4240 			return (rv);
4241 
4242 		ldcp->stream_remains = size;
4243 		ldcp->stream_offset = 0;
4244 	}
4245 
4246 	size = MIN(ldcp->stream_remains, *sizep);
4247 
4248 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4249 	ldcp->stream_offset += size;
4250 	ldcp->stream_remains -= size;
4251 
4252 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4253 	    ldcp->id, size);
4254 
4255 	*sizep = size;
4256 	return (0);
4257 }
4258 
4259 /*
4260  * Write specified amount of bytes to the channel
4261  * in multiple pkts of pkt_payload size. Each
4262  * packet is tagged with an unique packet ID in
4263  * the case of a reliable link.
4264  *
4265  * On return, size contains the number of bytes written.
4266  */
4267 int
4268 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4269 {
4270 	ldc_chan_t	*ldcp;
4271 	int		rv = 0;
4272 
4273 	if (handle == NULL) {
4274 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4275 		return (EINVAL);
4276 	}
4277 	ldcp = (ldc_chan_t *)handle;
4278 
4279 	mutex_enter(&ldcp->tx_lock);
4280 
4281 	/* check if non-zero data to write */
4282 	if (buf == NULL || sizep == NULL) {
4283 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4284 		    ldcp->id);
4285 		mutex_exit(&ldcp->tx_lock);
4286 		return (EINVAL);
4287 	}
4288 
4289 	if (*sizep == 0) {
4290 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4291 		    ldcp->id);
4292 		mutex_exit(&ldcp->tx_lock);
4293 		return (0);
4294 	}
4295 
4296 	/* Check if channel is UP for data exchange */
4297 	if (ldcp->tstate != TS_UP) {
4298 		DWARN(ldcp->id,
4299 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4300 		    ldcp->id);
4301 		*sizep = 0;
4302 		rv = ECONNRESET;
4303 	} else {
4304 		rv = ldcp->write_p(ldcp, buf, sizep);
4305 	}
4306 
4307 	mutex_exit(&ldcp->tx_lock);
4308 
4309 	return (rv);
4310 }
4311 
4312 /*
4313  * Write a raw packet to the channel
4314  * On return, size contains the number of bytes written.
4315  */
4316 static int
4317 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4318 {
4319 	ldc_msg_t 	*ldcmsg;
4320 	uint64_t 	tx_head, tx_tail, new_tail;
4321 	int		rv = 0;
4322 	size_t		size;
4323 
4324 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4325 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4326 
4327 	size = *sizep;
4328 
4329 	/*
4330 	 * Check to see if the packet size is less than or
4331 	 * equal to packet size support in raw mode
4332 	 */
4333 	if (size > ldcp->pkt_payload) {
4334 		DWARN(ldcp->id,
4335 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4336 		    ldcp->id, *sizep);
4337 		*sizep = 0;
4338 		return (EMSGSIZE);
4339 	}
4340 
4341 	/* get the qptrs for the tx queue */
4342 	rv = hv_ldc_tx_get_state(ldcp->id,
4343 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4344 	if (rv != 0) {
4345 		cmn_err(CE_WARN,
4346 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4347 		*sizep = 0;
4348 		return (EIO);
4349 	}
4350 
4351 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4352 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4353 		DWARN(ldcp->id,
4354 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4355 
4356 		*sizep = 0;
4357 		if (mutex_tryenter(&ldcp->lock)) {
4358 			i_ldc_reset(ldcp, B_FALSE);
4359 			mutex_exit(&ldcp->lock);
4360 		} else {
4361 			/*
4362 			 * Release Tx lock, and then reacquire channel
4363 			 * and Tx lock in correct order
4364 			 */
4365 			mutex_exit(&ldcp->tx_lock);
4366 			mutex_enter(&ldcp->lock);
4367 			mutex_enter(&ldcp->tx_lock);
4368 			i_ldc_reset(ldcp, B_FALSE);
4369 			mutex_exit(&ldcp->lock);
4370 		}
4371 		return (ECONNRESET);
4372 	}
4373 
4374 	tx_tail = ldcp->tx_tail;
4375 	tx_head = ldcp->tx_head;
4376 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4377 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4378 
4379 	if (new_tail == tx_head) {
4380 		DWARN(DBG_ALL_LDCS,
4381 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4382 		*sizep = 0;
4383 		return (EWOULDBLOCK);
4384 	}
4385 
4386 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4387 	    ldcp->id, size);
4388 
4389 	/* Send the data now */
4390 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4391 
4392 	/* copy the data into pkt */
4393 	bcopy((uint8_t *)buf, ldcmsg, size);
4394 
4395 	/* increment tail */
4396 	tx_tail = new_tail;
4397 
4398 	/*
4399 	 * All packets have been copied into the TX queue
4400 	 * update the tail ptr in the HV
4401 	 */
4402 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4403 	if (rv) {
4404 		if (rv == EWOULDBLOCK) {
4405 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4406 			    ldcp->id);
4407 			*sizep = 0;
4408 			return (EWOULDBLOCK);
4409 		}
4410 
4411 		*sizep = 0;
4412 		if (mutex_tryenter(&ldcp->lock)) {
4413 			i_ldc_reset(ldcp, B_FALSE);
4414 			mutex_exit(&ldcp->lock);
4415 		} else {
4416 			/*
4417 			 * Release Tx lock, and then reacquire channel
4418 			 * and Tx lock in correct order
4419 			 */
4420 			mutex_exit(&ldcp->tx_lock);
4421 			mutex_enter(&ldcp->lock);
4422 			mutex_enter(&ldcp->tx_lock);
4423 			i_ldc_reset(ldcp, B_FALSE);
4424 			mutex_exit(&ldcp->lock);
4425 		}
4426 		return (ECONNRESET);
4427 	}
4428 
4429 	ldcp->tx_tail = tx_tail;
4430 	*sizep = size;
4431 
4432 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4433 
4434 	return (rv);
4435 }
4436 
4437 
4438 /*
4439  * Write specified amount of bytes to the channel
4440  * in multiple pkts of pkt_payload size. Each
4441  * packet is tagged with an unique packet ID in
4442  * the case of a reliable link.
4443  *
4444  * On return, size contains the number of bytes written.
4445  * This function needs to ensure that the write size is < MTU size
4446  */
4447 static int
4448 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4449 {
4450 	ldc_msg_t 	*ldcmsg;
4451 	uint64_t 	tx_head, tx_tail, new_tail, start;
4452 	uint64_t	txq_size_mask, numavail;
4453 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4454 	size_t 		len, bytes_written = 0, remaining;
4455 	int		rv;
4456 	uint32_t	curr_seqid;
4457 
4458 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4459 
4460 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4461 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4462 
4463 	/* compute mask for increment */
4464 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4465 
4466 	/* get the qptrs for the tx queue */
4467 	rv = hv_ldc_tx_get_state(ldcp->id,
4468 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4469 	if (rv != 0) {
4470 		cmn_err(CE_WARN,
4471 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4472 		*size = 0;
4473 		return (EIO);
4474 	}
4475 
4476 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4477 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4478 		DWARN(ldcp->id,
4479 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4480 		*size = 0;
4481 		if (mutex_tryenter(&ldcp->lock)) {
4482 			i_ldc_reset(ldcp, B_FALSE);
4483 			mutex_exit(&ldcp->lock);
4484 		} else {
4485 			/*
4486 			 * Release Tx lock, and then reacquire channel
4487 			 * and Tx lock in correct order
4488 			 */
4489 			mutex_exit(&ldcp->tx_lock);
4490 			mutex_enter(&ldcp->lock);
4491 			mutex_enter(&ldcp->tx_lock);
4492 			i_ldc_reset(ldcp, B_FALSE);
4493 			mutex_exit(&ldcp->lock);
4494 		}
4495 		return (ECONNRESET);
4496 	}
4497 
4498 	tx_tail = ldcp->tx_tail;
4499 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4500 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4501 
4502 	/*
4503 	 * Check to see if the queue is full. The check is done using
4504 	 * the appropriate head based on the link mode.
4505 	 */
4506 	i_ldc_get_tx_head(ldcp, &tx_head);
4507 
4508 	if (new_tail == tx_head) {
4509 		DWARN(DBG_ALL_LDCS,
4510 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4511 		*size = 0;
4512 		return (EWOULDBLOCK);
4513 	}
4514 
4515 	/*
4516 	 * Make sure that the LDC Tx queue has enough space
4517 	 */
4518 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4519 	    + ldcp->tx_q_entries - 1;
4520 	numavail %= ldcp->tx_q_entries;
4521 
4522 	if (*size > (numavail * ldcp->pkt_payload)) {
4523 		DWARN(DBG_ALL_LDCS,
4524 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4525 		return (EWOULDBLOCK);
4526 	}
4527 
4528 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4529 	    ldcp->id, *size);
4530 
4531 	/* Send the data now */
4532 	bytes_written = 0;
4533 	curr_seqid = ldcp->last_msg_snt;
4534 	start = tx_tail;
4535 
4536 	while (*size > bytes_written) {
4537 
4538 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4539 
4540 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4541 		    ldcmsg->rdata : ldcmsg->udata);
4542 
4543 		ldcmsg->type = LDC_DATA;
4544 		ldcmsg->stype = LDC_INFO;
4545 		ldcmsg->ctrl = 0;
4546 
4547 		remaining = *size - bytes_written;
4548 		len = min(ldcp->pkt_payload, remaining);
4549 		ldcmsg->env = (uint8_t)len;
4550 
4551 		curr_seqid++;
4552 		ldcmsg->seqid = curr_seqid;
4553 
4554 		/* copy the data into pkt */
4555 		bcopy(source, msgbuf, len);
4556 
4557 		source += len;
4558 		bytes_written += len;
4559 
4560 		/* increment tail */
4561 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4562 
4563 		ASSERT(tx_tail != tx_head);
4564 	}
4565 
4566 	/* Set the start and stop bits */
4567 	ldcmsg->env |= LDC_FRAG_STOP;
4568 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4569 	ldcmsg->env |= LDC_FRAG_START;
4570 
4571 	/*
4572 	 * All packets have been copied into the TX queue
4573 	 * update the tail ptr in the HV
4574 	 */
4575 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4576 	if (rv == 0) {
4577 		ldcp->tx_tail = tx_tail;
4578 		ldcp->last_msg_snt = curr_seqid;
4579 		*size = bytes_written;
4580 	} else {
4581 		int rv2;
4582 
4583 		if (rv != EWOULDBLOCK) {
4584 			*size = 0;
4585 			if (mutex_tryenter(&ldcp->lock)) {
4586 				i_ldc_reset(ldcp, B_FALSE);
4587 				mutex_exit(&ldcp->lock);
4588 			} else {
4589 				/*
4590 				 * Release Tx lock, and then reacquire channel
4591 				 * and Tx lock in correct order
4592 				 */
4593 				mutex_exit(&ldcp->tx_lock);
4594 				mutex_enter(&ldcp->lock);
4595 				mutex_enter(&ldcp->tx_lock);
4596 				i_ldc_reset(ldcp, B_FALSE);
4597 				mutex_exit(&ldcp->lock);
4598 			}
4599 			return (ECONNRESET);
4600 		}
4601 
4602 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4603 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4604 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4605 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4606 
4607 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4608 		    &tx_head, &tx_tail, &ldcp->link_state);
4609 
4610 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4611 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4612 		    rv2, tx_head, tx_tail, ldcp->link_state);
4613 
4614 		*size = 0;
4615 	}
4616 
4617 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4618 
4619 	return (rv);
4620 }
4621 
4622 /*
4623  * Write specified amount of bytes to the channel
4624  * in multiple pkts of pkt_payload size. Each
4625  * packet is tagged with an unique packet ID in
4626  * the case of a reliable link.
4627  *
4628  * On return, size contains the number of bytes written.
4629  * This function needs to ensure that the write size is < MTU size
4630  */
4631 static int
4632 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4633 {
4634 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4635 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4636 
4637 	/* Truncate packet to max of MTU size */
4638 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4639 	return (i_ldc_write_packet(ldcp, buf, sizep));
4640 }
4641 
4642 
4643 /*
4644  * Interfaces for channel nexus to register/unregister with LDC module
4645  * The nexus will register functions to be used to register individual
4646  * channels with the nexus and enable interrupts for the channels
4647  */
4648 int
4649 ldc_register(ldc_cnex_t *cinfo)
4650 {
4651 	ldc_chan_t	*ldcp;
4652 
4653 	if (cinfo == NULL || cinfo->dip == NULL ||
4654 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4655 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4656 	    cinfo->clr_intr == NULL) {
4657 
4658 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4659 		return (EINVAL);
4660 	}
4661 
4662 	mutex_enter(&ldcssp->lock);
4663 
4664 	/* nexus registration */
4665 	ldcssp->cinfo.dip = cinfo->dip;
4666 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4667 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4668 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4669 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4670 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4671 
4672 	/* register any channels that might have been previously initialized */
4673 	ldcp = ldcssp->chan_list;
4674 	while (ldcp) {
4675 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4676 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4677 			(void) i_ldc_register_channel(ldcp);
4678 
4679 		ldcp = ldcp->next;
4680 	}
4681 
4682 	mutex_exit(&ldcssp->lock);
4683 
4684 	return (0);
4685 }
4686 
4687 int
4688 ldc_unregister(ldc_cnex_t *cinfo)
4689 {
4690 	if (cinfo == NULL || cinfo->dip == NULL) {
4691 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4692 		return (EINVAL);
4693 	}
4694 
4695 	mutex_enter(&ldcssp->lock);
4696 
4697 	if (cinfo->dip != ldcssp->cinfo.dip) {
4698 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4699 		mutex_exit(&ldcssp->lock);
4700 		return (EINVAL);
4701 	}
4702 
4703 	/* nexus unregister */
4704 	ldcssp->cinfo.dip = NULL;
4705 	ldcssp->cinfo.reg_chan = NULL;
4706 	ldcssp->cinfo.unreg_chan = NULL;
4707 	ldcssp->cinfo.add_intr = NULL;
4708 	ldcssp->cinfo.rem_intr = NULL;
4709 	ldcssp->cinfo.clr_intr = NULL;
4710 
4711 	mutex_exit(&ldcssp->lock);
4712 
4713 	return (0);
4714 }
4715