xref: /titanic_52/usr/src/uts/sun4v/io/ldc.c (revision 2b24ab6b3865caeede9eeb9db6b83e1d89dcd1ea)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * sun4v LDC Link Layer
29  */
30 #include <sys/types.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/cred.h>
35 #include <sys/kmem.h>
36 #include <sys/conf.h>
37 #include <sys/cmn_err.h>
38 #include <sys/ksynch.h>
39 #include <sys/modctl.h>
40 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
41 #include <sys/debug.h>
42 #include <sys/cred.h>
43 #include <sys/promif.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/cyclic.h>
47 #include <sys/machsystm.h>
48 #include <sys/vm.h>
49 #include <sys/cpu.h>
50 #include <sys/intreg.h>
51 #include <sys/machcpuvar.h>
52 #include <sys/mmu.h>
53 #include <sys/pte.h>
54 #include <vm/hat.h>
55 #include <vm/as.h>
56 #include <vm/hat_sfmmu.h>
57 #include <sys/vm_machparam.h>
58 #include <vm/seg_kmem.h>
59 #include <vm/seg_kpm.h>
60 #include <sys/note.h>
61 #include <sys/ivintr.h>
62 #include <sys/hypervisor_api.h>
63 #include <sys/ldc.h>
64 #include <sys/ldc_impl.h>
65 #include <sys/cnex.h>
66 #include <sys/hsvc.h>
67 #include <sys/sdt.h>
68 #include <sys/kldc.h>
69 
70 /* Core internal functions */
71 int i_ldc_h2v_error(int h_error);
72 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
73 
74 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
75 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
76 static void i_ldc_rxq_drain(ldc_chan_t *ldcp);
77 static void i_ldc_reset_state(ldc_chan_t *ldcp);
78 static void i_ldc_debug_enter(void);
79 
80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
85     uint8_t ctrlmsg);
86 
87 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
92     uint64_t *tail, uint64_t *link_state);
93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
94     uint64_t rx_tail);
95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
96 
97 /* Interrupt handling functions */
98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
101     uint64_t *notify_event);
102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
103 
104 /* Read method functions */
105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
109 	size_t *sizep);
110 
111 /* Write method functions */
112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
117 	size_t *sizep);
118 
119 /* Pkt processing internal functions */
120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
127 
128 /* LDC Version */
129 static ldc_ver_t ldc_versions[] = { {1, 0} };
130 
131 /* number of supported versions */
132 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
133 
134 /* Invalid value for the ldc_chan_t rx_ack_head field */
135 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
136 
137 
138 /* Module State Pointer */
139 ldc_soft_state_t *ldcssp;
140 
141 static struct modldrv md = {
142 	&mod_miscops,			/* This is a misc module */
143 	"sun4v LDC module",		/* Name of the module */
144 };
145 
146 static struct modlinkage ml = {
147 	MODREV_1,
148 	&md,
149 	NULL
150 };
151 
152 static uint64_t ldc_sup_minor;		/* Supported minor number */
153 static hsvc_info_t ldc_hsvc = {
154 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 1, "ldc"
155 };
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * The length of the reliable-mode data queue in terms of the LDC
173  * receive queue length. i.e., the number of times larger than the
174  * LDC receive queue that the data queue should be. The HV receive
175  * queue is required to be a power of 2 and this implementation
176  * assumes the data queue will also be a power of 2. By making the
177  * multiplier a power of 2, we ensure the data queue will be a
178  * power of 2. We use a multiplier because the receive queue is
179  * sized to be sane relative to the MTU and the same is needed for
180  * the data queue.
181  */
182 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
183 
184 /*
185  * LDC retry count and delay - when the HV returns EWOULDBLOCK
186  * the operation is retried 'ldc_max_retries' times with a
187  * wait of 'ldc_delay' usecs between each retry.
188  */
189 int ldc_max_retries = LDC_MAX_RETRIES;
190 clock_t ldc_delay = LDC_DELAY;
191 
192 /*
193  * Channels which have a devclass satisfying the following
194  * will be reset when entering the prom or kmdb.
195  *
196  *   LDC_DEVCLASS_PROM_RESET(devclass) != 0
197  *
198  * By default, only block device service channels are reset.
199  */
200 #define	LDC_DEVCLASS_BIT(dc)		(0x1 << (dc))
201 #define	LDC_DEVCLASS_PROM_RESET(dc)	\
202 	(LDC_DEVCLASS_BIT(dc) & ldc_debug_reset_mask)
203 static uint64_t ldc_debug_reset_mask = LDC_DEVCLASS_BIT(LDC_DEV_BLK_SVC);
204 
205 /*
206  * delay between each retry of channel unregistration in
207  * ldc_close(), to wait for pending interrupts to complete.
208  */
209 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
210 
211 #ifdef DEBUG
212 
213 /*
214  * Print debug messages
215  *
216  * set ldcdbg to 0x7 for enabling all msgs
217  * 0x4 - Warnings
218  * 0x2 - All debug messages
219  * 0x1 - Minimal debug messages
220  *
221  * set ldcdbgchan to the channel number you want to debug
222  * setting it to -1 prints debug messages for all channels
223  * NOTE: ldcdbgchan has no effect on error messages
224  */
225 
226 int ldcdbg = 0x0;
227 int64_t ldcdbgchan = DBG_ALL_LDCS;
228 uint64_t ldc_inject_err_flag = 0;
229 
230 void
231 ldcdebug(int64_t id, const char *fmt, ...)
232 {
233 	char buf[512];
234 	va_list ap;
235 
236 	/*
237 	 * Do not return if,
238 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
239 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
240 	 * debug channel = caller specified channel
241 	 */
242 	if ((id != DBG_ALL_LDCS) &&
243 	    (ldcdbgchan != DBG_ALL_LDCS) &&
244 	    (ldcdbgchan != id)) {
245 		return;
246 	}
247 
248 	va_start(ap, fmt);
249 	(void) vsprintf(buf, fmt, ap);
250 	va_end(ap);
251 
252 	cmn_err(CE_CONT, "?%s", buf);
253 }
254 
255 #define	LDC_ERR_RESET		0x1
256 #define	LDC_ERR_PKTLOSS		0x2
257 #define	LDC_ERR_DQFULL		0x4
258 #define	LDC_ERR_DRNGCLEAR	0x8
259 
260 static boolean_t
261 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
262 {
263 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
264 		return (B_FALSE);
265 
266 	if ((ldc_inject_err_flag & error) == 0)
267 		return (B_FALSE);
268 
269 	/* clear the injection state */
270 	ldc_inject_err_flag &= ~error;
271 
272 	return (B_TRUE);
273 }
274 
275 #define	D1		\
276 if (ldcdbg & 0x01)	\
277 	ldcdebug
278 
279 #define	D2		\
280 if (ldcdbg & 0x02)	\
281 	ldcdebug
282 
283 #define	DWARN		\
284 if (ldcdbg & 0x04)	\
285 	ldcdebug
286 
287 #define	DUMP_PAYLOAD(id, addr)						\
288 {									\
289 	char buf[65*3];							\
290 	int i;								\
291 	uint8_t *src = (uint8_t *)addr;					\
292 	for (i = 0; i < 64; i++, src++)					\
293 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
294 	(void) sprintf(&buf[i * 3], "|\n");				\
295 	D2((id), "payload: %s", buf);					\
296 }
297 
298 #define	DUMP_LDC_PKT(c, s, addr)					\
299 {									\
300 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
301 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
302 	if (msg->type == LDC_DATA) {                                    \
303 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
304 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
305 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
306 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
307 	    (msg->env & LDC_LEN_MASK));					\
308 	} else { 							\
309 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
310 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
311 	} 								\
312 }
313 
314 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
315 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
316 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
317 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
318 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
319 
320 #else
321 
322 #define	DBG_ALL_LDCS -1
323 
324 #define	D1
325 #define	D2
326 #define	DWARN
327 
328 #define	DUMP_PAYLOAD(id, addr)
329 #define	DUMP_LDC_PKT(c, s, addr)
330 
331 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
332 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
333 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
334 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
335 
336 #endif
337 
338 /*
339  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
340  * lengths. Just pass the head, tail, and entries values so that the
341  * length can be calculated in a dtrace script when the probe is enabled.
342  */
343 #define	TRACE_RXDQ_LENGTH(ldcp)						\
344 	DTRACE_PROBE4(rxdq__size,					\
345 	uint64_t, ldcp->id,						\
346 	uint64_t, ldcp->rx_dq_head,					\
347 	uint64_t, ldcp->rx_dq_tail,					\
348 	uint64_t, ldcp->rx_dq_entries)
349 
350 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
351 	DTRACE_PROBE4(rxhvq__size,					\
352 	uint64_t, ldcp->id,						\
353 	uint64_t, head,							\
354 	uint64_t, tail,							\
355 	uint64_t, ldcp->rx_q_entries)
356 
357 /* A dtrace SDT probe to ease tracing of data queue copy operations */
358 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
359 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
360 
361 /* The amount of contiguous space at the tail of the queue */
362 #define	Q_CONTIG_SPACE(head, tail, size)				\
363 	((head) <= (tail) ? ((size) - (tail)) :				\
364 	((head) - (tail) - LDC_PACKET_SIZE))
365 
366 #define	ZERO_PKT(p)			\
367 	bzero((p), sizeof (ldc_msg_t));
368 
369 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
370 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
371 
372 int
373 _init(void)
374 {
375 	int status;
376 	extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
377 
378 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
379 	if (status != 0) {
380 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
381 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
382 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
383 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
384 		return (-1);
385 	}
386 
387 	/* Initialize shared memory HV API version checking */
388 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
389 
390 	/* allocate soft state structure */
391 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
392 
393 	/* Link the module into the system */
394 	status = mod_install(&ml);
395 	if (status != 0) {
396 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
397 		return (status);
398 	}
399 
400 	/* Initialize the LDC state structure */
401 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
402 
403 	mutex_enter(&ldcssp->lock);
404 
405 	/* Create a cache for memory handles */
406 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
407 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
408 	if (ldcssp->memhdl_cache == NULL) {
409 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
410 		mutex_exit(&ldcssp->lock);
411 		return (-1);
412 	}
413 
414 	/* Create cache for memory segment structures */
415 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
416 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
417 	if (ldcssp->memseg_cache == NULL) {
418 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
419 		mutex_exit(&ldcssp->lock);
420 		return (-1);
421 	}
422 
423 
424 	ldcssp->channel_count = 0;
425 	ldcssp->channels_open = 0;
426 	ldcssp->chan_list = NULL;
427 	ldcssp->dring_list = NULL;
428 
429 	/* Register debug_enter callback */
430 	kldc_set_debug_cb(&i_ldc_debug_enter);
431 
432 	mutex_exit(&ldcssp->lock);
433 
434 	return (0);
435 }
436 
437 int
438 _info(struct modinfo *modinfop)
439 {
440 	/* Report status of the dynamically loadable driver module */
441 	return (mod_info(&ml, modinfop));
442 }
443 
444 int
445 _fini(void)
446 {
447 	int 		rv, status;
448 	ldc_chan_t 	*tmp_ldcp, *ldcp;
449 	ldc_dring_t 	*tmp_dringp, *dringp;
450 	ldc_mem_info_t 	minfo;
451 
452 	/* Unlink the driver module from the system */
453 	status = mod_remove(&ml);
454 	if (status) {
455 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
456 		return (EIO);
457 	}
458 
459 	/* Unregister debug_enter callback */
460 	kldc_set_debug_cb(NULL);
461 
462 	/* Free descriptor rings */
463 	dringp = ldcssp->dring_list;
464 	while (dringp != NULL) {
465 		tmp_dringp = dringp->next;
466 
467 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
468 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
469 			if (minfo.status == LDC_BOUND) {
470 				(void) ldc_mem_dring_unbind(
471 				    (ldc_dring_handle_t)dringp);
472 			}
473 			if (minfo.status == LDC_MAPPED) {
474 				(void) ldc_mem_dring_unmap(
475 				    (ldc_dring_handle_t)dringp);
476 			}
477 		}
478 
479 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
480 		dringp = tmp_dringp;
481 	}
482 	ldcssp->dring_list = NULL;
483 
484 	/* close and finalize channels */
485 	ldcp = ldcssp->chan_list;
486 	while (ldcp != NULL) {
487 		tmp_ldcp = ldcp->next;
488 
489 		(void) ldc_close((ldc_handle_t)ldcp);
490 		(void) ldc_fini((ldc_handle_t)ldcp);
491 
492 		ldcp = tmp_ldcp;
493 	}
494 	ldcssp->chan_list = NULL;
495 
496 	/* Destroy kmem caches */
497 	kmem_cache_destroy(ldcssp->memhdl_cache);
498 	kmem_cache_destroy(ldcssp->memseg_cache);
499 
500 	/*
501 	 * We have successfully "removed" the driver.
502 	 * Destroying soft states
503 	 */
504 	mutex_destroy(&ldcssp->lock);
505 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
506 
507 	(void) hsvc_unregister(&ldc_hsvc);
508 
509 	return (status);
510 }
511 
512 /* -------------------------------------------------------------------------- */
513 
514 /*
515  * LDC Link Layer Internal Functions
516  */
517 
518 /*
519  * Translate HV Errors to sun4v error codes
520  */
521 int
522 i_ldc_h2v_error(int h_error)
523 {
524 	switch (h_error) {
525 
526 	case	H_EOK:
527 		return (0);
528 
529 	case	H_ENORADDR:
530 		return (EFAULT);
531 
532 	case	H_EBADPGSZ:
533 	case	H_EINVAL:
534 		return (EINVAL);
535 
536 	case	H_EWOULDBLOCK:
537 		return (EWOULDBLOCK);
538 
539 	case	H_ENOACCESS:
540 	case	H_ENOMAP:
541 		return (EACCES);
542 
543 	case	H_EIO:
544 	case	H_ECPUERROR:
545 		return (EIO);
546 
547 	case	H_ENOTSUPPORTED:
548 		return (ENOTSUP);
549 
550 	case 	H_ETOOMANY:
551 		return (ENOSPC);
552 
553 	case	H_ECHANNEL:
554 		return (ECHRNG);
555 	default:
556 		break;
557 	}
558 
559 	return (EIO);
560 }
561 
562 /*
563  * Reconfigure the transmit queue
564  */
565 static int
566 i_ldc_txq_reconf(ldc_chan_t *ldcp)
567 {
568 	int rv;
569 
570 	ASSERT(MUTEX_HELD(&ldcp->lock));
571 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
572 
573 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
574 	if (rv) {
575 		cmn_err(CE_WARN,
576 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
577 		return (EIO);
578 	}
579 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
580 	    &(ldcp->tx_tail), &(ldcp->link_state));
581 	if (rv) {
582 		cmn_err(CE_WARN,
583 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
584 		return (EIO);
585 	}
586 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
587 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
588 	    ldcp->link_state);
589 
590 	return (0);
591 }
592 
593 /*
594  * Reconfigure the receive queue
595  */
596 static int
597 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
598 {
599 	int rv;
600 	uint64_t rx_head, rx_tail;
601 
602 	ASSERT(MUTEX_HELD(&ldcp->lock));
603 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
604 	    &(ldcp->link_state));
605 	if (rv) {
606 		cmn_err(CE_WARN,
607 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
608 		    ldcp->id);
609 		return (EIO);
610 	}
611 
612 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
613 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
614 		    ldcp->rx_q_entries);
615 		if (rv) {
616 			cmn_err(CE_WARN,
617 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
618 			    ldcp->id);
619 			return (EIO);
620 		}
621 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
622 		    ldcp->id);
623 	}
624 
625 	return (0);
626 }
627 
628 
629 /*
630  * Drain the contents of the receive queue
631  */
632 static void
633 i_ldc_rxq_drain(ldc_chan_t *ldcp)
634 {
635 	int rv;
636 	uint64_t rx_head, rx_tail;
637 	int retries = 0;
638 
639 	ASSERT(MUTEX_HELD(&ldcp->lock));
640 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
641 	    &(ldcp->link_state));
642 	if (rv) {
643 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state, "
644 		    "rv = 0x%x", ldcp->id, rv);
645 		return;
646 	}
647 
648 	/* If the queue is already empty just return success. */
649 	if (rx_head == rx_tail)
650 		return;
651 
652 	/*
653 	 * We are draining the queue in order to close the channel.
654 	 * Call hv_ldc_rx_set_qhead directly instead of i_ldc_set_rx_head
655 	 * because we do not need to reset the channel if the set
656 	 * qhead fails.
657 	 */
658 	if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0)
659 		return;
660 
661 	while ((rv == H_EWOULDBLOCK) && (retries++ < ldc_max_retries)) {
662 		drv_usecwait(ldc_delay);
663 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0)
664 			return;
665 	}
666 
667 	cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot set qhead 0x%lx, "
668 	    "rv = 0x%x", ldcp->id, rx_tail, rv);
669 }
670 
671 
672 /*
673  * Reset LDC state structure and its contents
674  */
675 static void
676 i_ldc_reset_state(ldc_chan_t *ldcp)
677 {
678 	ASSERT(MUTEX_HELD(&ldcp->lock));
679 	ldcp->last_msg_snt = LDC_INIT_SEQID;
680 	ldcp->last_ack_rcd = 0;
681 	ldcp->last_msg_rcd = 0;
682 	ldcp->tx_ackd_head = ldcp->tx_head;
683 	ldcp->stream_remains = 0;
684 	ldcp->next_vidx = 0;
685 	ldcp->hstate = 0;
686 	ldcp->tstate = TS_OPEN;
687 	ldcp->status = LDC_OPEN;
688 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
689 	ldcp->rx_dq_head = 0;
690 	ldcp->rx_dq_tail = 0;
691 
692 	if (ldcp->link_state == LDC_CHANNEL_UP ||
693 	    ldcp->link_state == LDC_CHANNEL_RESET) {
694 
695 		if (ldcp->mode == LDC_MODE_RAW) {
696 			ldcp->status = LDC_UP;
697 			ldcp->tstate = TS_UP;
698 		} else {
699 			ldcp->status = LDC_READY;
700 			ldcp->tstate |= TS_LINK_READY;
701 		}
702 	}
703 }
704 
705 /*
706  * Reset a LDC channel
707  */
708 void
709 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
710 {
711 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
712 
713 	ASSERT(MUTEX_HELD(&ldcp->lock));
714 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
715 
716 	/* reconfig Tx and Rx queues */
717 	(void) i_ldc_txq_reconf(ldcp);
718 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
719 
720 	/* Clear Tx and Rx interrupts */
721 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
722 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
723 
724 	/* Reset channel state */
725 	i_ldc_reset_state(ldcp);
726 
727 	/* Mark channel in reset */
728 	ldcp->tstate |= TS_IN_RESET;
729 }
730 
731 /*
732  * Walk the channel list and reset channels if they are of the right
733  * devclass and their Rx queues have been configured. No locks are
734  * taken because the function is only invoked by the kernel just before
735  * entering the prom or debugger when the system is single-threaded.
736  */
737 static void
738 i_ldc_debug_enter(void)
739 {
740 	ldc_chan_t *ldcp;
741 
742 	ldcp = ldcssp->chan_list;
743 	while (ldcp != NULL) {
744 		if (((ldcp->tstate & TS_QCONF_RDY) == TS_QCONF_RDY) &&
745 		    (LDC_DEVCLASS_PROM_RESET(ldcp->devclass) != 0)) {
746 			(void) hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
747 			    ldcp->rx_q_entries);
748 		}
749 		ldcp = ldcp->next;
750 	}
751 }
752 
753 /*
754  * Clear pending interrupts
755  */
756 static void
757 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
758 {
759 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
760 
761 	ASSERT(MUTEX_HELD(&ldcp->lock));
762 	ASSERT(cinfo->dip != NULL);
763 
764 	switch (itype) {
765 	case CNEX_TX_INTR:
766 		/* check Tx interrupt */
767 		if (ldcp->tx_intr_state)
768 			ldcp->tx_intr_state = LDC_INTR_NONE;
769 		else
770 			return;
771 		break;
772 
773 	case CNEX_RX_INTR:
774 		/* check Rx interrupt */
775 		if (ldcp->rx_intr_state)
776 			ldcp->rx_intr_state = LDC_INTR_NONE;
777 		else
778 			return;
779 		break;
780 	}
781 
782 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
783 	D2(ldcp->id,
784 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
785 	    ldcp->id, itype);
786 }
787 
788 /*
789  * Set the receive queue head
790  * Resets connection and returns an error if it fails.
791  */
792 static int
793 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
794 {
795 	int 	rv;
796 	int 	retries;
797 
798 	ASSERT(MUTEX_HELD(&ldcp->lock));
799 	for (retries = 0; retries < ldc_max_retries; retries++) {
800 
801 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
802 			return (0);
803 
804 		if (rv != H_EWOULDBLOCK)
805 			break;
806 
807 		/* wait for ldc_delay usecs */
808 		drv_usecwait(ldc_delay);
809 	}
810 
811 	cmn_err(CE_WARN, "ldc_set_rx_qhead: (0x%lx) cannot set qhead 0x%lx, "
812 	    "rv = 0x%x", ldcp->id, head, rv);
813 	mutex_enter(&ldcp->tx_lock);
814 	i_ldc_reset(ldcp, B_TRUE);
815 	mutex_exit(&ldcp->tx_lock);
816 
817 	return (ECONNRESET);
818 }
819 
820 /*
821  * Returns the tx_head to be used for transfer
822  */
823 static void
824 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
825 {
826 	ldc_msg_t 	*pkt;
827 
828 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
829 
830 	/* get current Tx head */
831 	*head = ldcp->tx_head;
832 
833 	/*
834 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
835 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
836 	 * up to the current location of tx_head. This needs to be done
837 	 * as the peer will only ACK DATA/INFO pkts.
838 	 */
839 	if (ldcp->mode == LDC_MODE_RELIABLE) {
840 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
841 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
842 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
843 				break;
844 			}
845 			/* advance ACKd head */
846 			ldcp->tx_ackd_head =
847 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
848 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
849 		}
850 		*head = ldcp->tx_ackd_head;
851 	}
852 }
853 
854 /*
855  * Returns the tx_tail to be used for transfer
856  * Re-reads the TX queue ptrs if and only if the
857  * the cached head and tail are equal (queue is full)
858  */
859 static int
860 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
861 {
862 	int 		rv;
863 	uint64_t 	current_head, new_tail;
864 
865 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
866 	/* Read the head and tail ptrs from HV */
867 	rv = hv_ldc_tx_get_state(ldcp->id,
868 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
869 	if (rv) {
870 		cmn_err(CE_WARN,
871 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
872 		    ldcp->id);
873 		return (EIO);
874 	}
875 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
876 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
877 		    ldcp->id);
878 		return (ECONNRESET);
879 	}
880 
881 	i_ldc_get_tx_head(ldcp, &current_head);
882 
883 	/* increment the tail */
884 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
885 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
886 
887 	if (new_tail == current_head) {
888 		DWARN(ldcp->id,
889 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
890 		    ldcp->id);
891 		return (EWOULDBLOCK);
892 	}
893 
894 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
895 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
896 
897 	*tail = ldcp->tx_tail;
898 	return (0);
899 }
900 
901 /*
902  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
903  * and retry ldc_max_retries times before returning an error.
904  * Returns 0, EWOULDBLOCK or EIO
905  */
906 static int
907 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
908 {
909 	int		rv, retval = EWOULDBLOCK;
910 	int 		retries;
911 
912 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
913 	for (retries = 0; retries < ldc_max_retries; retries++) {
914 
915 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
916 			retval = 0;
917 			break;
918 		}
919 		if (rv != H_EWOULDBLOCK) {
920 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
921 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
922 			retval = EIO;
923 			break;
924 		}
925 
926 		/* wait for ldc_delay usecs */
927 		drv_usecwait(ldc_delay);
928 	}
929 	return (retval);
930 }
931 
932 /*
933  * Copy a data packet from the HV receive queue to the data queue.
934  * Caller must ensure that the data queue is not already full.
935  *
936  * The *head argument represents the current head pointer for the HV
937  * receive queue. After copying a packet from the HV receive queue,
938  * the *head pointer will be updated. This allows the caller to update
939  * the head pointer in HV using the returned *head value.
940  */
941 void
942 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
943 {
944 	uint64_t	q_size, dq_size;
945 
946 	ASSERT(MUTEX_HELD(&ldcp->lock));
947 
948 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
949 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
950 
951 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
952 	    dq_size) >= LDC_PACKET_SIZE);
953 
954 	bcopy((void *)(ldcp->rx_q_va + *head),
955 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
956 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
957 
958 	/* Update rx head */
959 	*head = (*head + LDC_PACKET_SIZE) % q_size;
960 
961 	/* Update dq tail */
962 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
963 }
964 
965 /*
966  * Update the Rx data queue head pointer
967  */
968 static int
969 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
970 {
971 	ldcp->rx_dq_head = head;
972 	return (0);
973 }
974 
975 /*
976  * Get the Rx data queue head and tail pointers
977  */
978 static uint64_t
979 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
980     uint64_t *link_state)
981 {
982 	_NOTE(ARGUNUSED(link_state))
983 	*head = ldcp->rx_dq_head;
984 	*tail = ldcp->rx_dq_tail;
985 	return (0);
986 }
987 
988 /*
989  * Wrapper for the Rx HV queue set head function. Giving the
990  * data queue and HV queue set head functions the same type.
991  */
992 static uint64_t
993 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
994     uint64_t *link_state)
995 {
996 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
997 	    link_state)));
998 }
999 
1000 /*
1001  * LDC receive interrupt handler
1002  *    triggered for channel with data pending to read
1003  *    i.e. Rx queue content changes
1004  */
1005 static uint_t
1006 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1007 {
1008 	_NOTE(ARGUNUSED(arg2))
1009 
1010 	ldc_chan_t	*ldcp;
1011 	boolean_t	notify;
1012 	uint64_t	event;
1013 	int		rv, status;
1014 
1015 	/* Get the channel for which interrupt was received */
1016 	if (arg1 == NULL) {
1017 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1018 		return (DDI_INTR_UNCLAIMED);
1019 	}
1020 
1021 	ldcp = (ldc_chan_t *)arg1;
1022 
1023 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1024 	    ldcp->id, ldcp);
1025 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1026 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1027 	    ldcp->link_state);
1028 
1029 	/* Lock channel */
1030 	mutex_enter(&ldcp->lock);
1031 
1032 	/* Mark the interrupt as being actively handled */
1033 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1034 
1035 	status = i_ldc_rx_process_hvq(ldcp, &notify, &event);
1036 
1037 	if (ldcp->mode != LDC_MODE_RELIABLE) {
1038 		/*
1039 		 * If there are no data packets on the queue, clear
1040 		 * the interrupt. Otherwise, the ldc_read will clear
1041 		 * interrupts after draining the queue. To indicate the
1042 		 * interrupt has not yet been cleared, it is marked
1043 		 * as pending.
1044 		 */
1045 		if ((event & LDC_EVT_READ) == 0) {
1046 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1047 		} else {
1048 			ldcp->rx_intr_state = LDC_INTR_PEND;
1049 		}
1050 	}
1051 
1052 	/* if callbacks are disabled, do not notify */
1053 	if (notify && ldcp->cb_enabled) {
1054 		ldcp->cb_inprogress = B_TRUE;
1055 		mutex_exit(&ldcp->lock);
1056 		rv = ldcp->cb(event, ldcp->cb_arg);
1057 		if (rv) {
1058 			DWARN(ldcp->id,
1059 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1060 			    ldcp->id);
1061 		}
1062 		mutex_enter(&ldcp->lock);
1063 		ldcp->cb_inprogress = B_FALSE;
1064 	}
1065 
1066 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1067 		if (status == ENOSPC) {
1068 			/*
1069 			 * Here, ENOSPC indicates the secondary data
1070 			 * queue is full and the Rx queue is non-empty.
1071 			 * Much like how reliable and raw modes are
1072 			 * handled above, since the Rx queue is non-
1073 			 * empty, we mark the interrupt as pending to
1074 			 * indicate it has not yet been cleared.
1075 			 */
1076 			ldcp->rx_intr_state = LDC_INTR_PEND;
1077 		} else {
1078 			/*
1079 			 * We have processed all CTRL packets and
1080 			 * copied all DATA packets to the secondary
1081 			 * queue. Clear the interrupt.
1082 			 */
1083 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1084 		}
1085 	}
1086 
1087 	mutex_exit(&ldcp->lock);
1088 
1089 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1090 
1091 	return (DDI_INTR_CLAIMED);
1092 }
1093 
1094 /*
1095  * Wrapper for the Rx HV queue processing function to be used when
1096  * checking the Rx HV queue for data packets. Unlike the interrupt
1097  * handler code flow, the Rx interrupt is not cleared here and
1098  * callbacks are not made.
1099  */
1100 static uint_t
1101 i_ldc_chkq(ldc_chan_t *ldcp)
1102 {
1103 	boolean_t	notify;
1104 	uint64_t	event;
1105 
1106 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1107 }
1108 
1109 /*
1110  * Send a LDC message
1111  */
1112 static int
1113 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1114     uint8_t ctrlmsg)
1115 {
1116 	int		rv;
1117 	ldc_msg_t 	*pkt;
1118 	uint64_t	tx_tail;
1119 	uint32_t	curr_seqid;
1120 
1121 	/* Obtain Tx lock */
1122 	mutex_enter(&ldcp->tx_lock);
1123 
1124 	curr_seqid = ldcp->last_msg_snt;
1125 
1126 	/* get the current tail for the message */
1127 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1128 	if (rv) {
1129 		DWARN(ldcp->id,
1130 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1131 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1132 		    ldcp->id, pkttype, subtype, ctrlmsg);
1133 		mutex_exit(&ldcp->tx_lock);
1134 		return (rv);
1135 	}
1136 
1137 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1138 	ZERO_PKT(pkt);
1139 
1140 	/* Initialize the packet */
1141 	pkt->type = pkttype;
1142 	pkt->stype = subtype;
1143 	pkt->ctrl = ctrlmsg;
1144 
1145 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1146 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1147 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1148 		curr_seqid++;
1149 		if (ldcp->mode != LDC_MODE_RAW) {
1150 			pkt->seqid = curr_seqid;
1151 			pkt->ackid = ldcp->last_msg_rcd;
1152 		}
1153 	}
1154 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1155 
1156 	/* initiate the send by calling into HV and set the new tail */
1157 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1158 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1159 
1160 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1161 	if (rv) {
1162 		DWARN(ldcp->id,
1163 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1164 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1165 		    ldcp->id, pkttype, subtype, ctrlmsg);
1166 		mutex_exit(&ldcp->tx_lock);
1167 		return (EIO);
1168 	}
1169 
1170 	ldcp->last_msg_snt = curr_seqid;
1171 	ldcp->tx_tail = tx_tail;
1172 
1173 	mutex_exit(&ldcp->tx_lock);
1174 	return (0);
1175 }
1176 
1177 /*
1178  * Checks if packet was received in right order
1179  * in the case of a reliable link.
1180  * Returns 0 if in order, else EIO
1181  */
1182 static int
1183 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1184 {
1185 	/* No seqid checking for RAW mode */
1186 	if (ldcp->mode == LDC_MODE_RAW)
1187 		return (0);
1188 
1189 	/* No seqid checking for version, RTS, RTR message */
1190 	if (msg->ctrl == LDC_VER ||
1191 	    msg->ctrl == LDC_RTS ||
1192 	    msg->ctrl == LDC_RTR)
1193 		return (0);
1194 
1195 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1196 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1197 		DWARN(ldcp->id,
1198 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1199 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1200 		    (ldcp->last_msg_rcd + 1));
1201 		return (EIO);
1202 	}
1203 
1204 #ifdef DEBUG
1205 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1206 		DWARN(ldcp->id,
1207 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1208 		return (EIO);
1209 	}
1210 #endif
1211 
1212 	return (0);
1213 }
1214 
1215 
1216 /*
1217  * Process an incoming version ctrl message
1218  */
1219 static int
1220 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1221 {
1222 	int 		rv = 0, idx = ldcp->next_vidx;
1223 	ldc_msg_t 	*pkt;
1224 	uint64_t	tx_tail;
1225 	ldc_ver_t	*rcvd_ver;
1226 
1227 	/* get the received version */
1228 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1229 
1230 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1231 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1232 
1233 	/* Obtain Tx lock */
1234 	mutex_enter(&ldcp->tx_lock);
1235 
1236 	switch (msg->stype) {
1237 	case LDC_INFO:
1238 
1239 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1240 			(void) i_ldc_txq_reconf(ldcp);
1241 			i_ldc_reset_state(ldcp);
1242 			mutex_exit(&ldcp->tx_lock);
1243 			return (EAGAIN);
1244 		}
1245 
1246 		/* get the current tail and pkt for the response */
1247 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1248 		if (rv != 0) {
1249 			DWARN(ldcp->id,
1250 			    "i_ldc_process_VER: (0x%llx) err sending "
1251 			    "version ACK/NACK\n", ldcp->id);
1252 			i_ldc_reset(ldcp, B_TRUE);
1253 			mutex_exit(&ldcp->tx_lock);
1254 			return (ECONNRESET);
1255 		}
1256 
1257 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1258 		ZERO_PKT(pkt);
1259 
1260 		/* initialize the packet */
1261 		pkt->type = LDC_CTRL;
1262 		pkt->ctrl = LDC_VER;
1263 
1264 		for (;;) {
1265 
1266 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1267 			    rcvd_ver->major, rcvd_ver->minor,
1268 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1269 
1270 			if (rcvd_ver->major == ldc_versions[idx].major) {
1271 				/* major version match - ACK version */
1272 				pkt->stype = LDC_ACK;
1273 
1274 				/*
1275 				 * lower minor version to the one this endpt
1276 				 * supports, if necessary
1277 				 */
1278 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1279 					rcvd_ver->minor =
1280 					    ldc_versions[idx].minor;
1281 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1282 
1283 				break;
1284 			}
1285 
1286 			if (rcvd_ver->major > ldc_versions[idx].major) {
1287 
1288 				D1(ldcp->id, "i_ldc_process_VER: using next"
1289 				    " lower idx=%d, v%u.%u\n", idx,
1290 				    ldc_versions[idx].major,
1291 				    ldc_versions[idx].minor);
1292 
1293 				/* nack with next lower version */
1294 				pkt->stype = LDC_NACK;
1295 				bcopy(&ldc_versions[idx], pkt->udata,
1296 				    sizeof (ldc_versions[idx]));
1297 				ldcp->next_vidx = idx;
1298 				break;
1299 			}
1300 
1301 			/* next major version */
1302 			idx++;
1303 
1304 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1305 
1306 			if (idx == LDC_NUM_VERS) {
1307 				/* no version match - send NACK */
1308 				pkt->stype = LDC_NACK;
1309 				bzero(pkt->udata, sizeof (ldc_ver_t));
1310 				ldcp->next_vidx = 0;
1311 				break;
1312 			}
1313 		}
1314 
1315 		/* initiate the send by calling into HV and set the new tail */
1316 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1317 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1318 
1319 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1320 		if (rv == 0) {
1321 			ldcp->tx_tail = tx_tail;
1322 			if (pkt->stype == LDC_ACK) {
1323 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1324 				    " version ACK\n", ldcp->id);
1325 				/* Save the ACK'd version */
1326 				ldcp->version.major = rcvd_ver->major;
1327 				ldcp->version.minor = rcvd_ver->minor;
1328 				ldcp->hstate |= TS_RCVD_VER;
1329 				ldcp->tstate |= TS_VER_DONE;
1330 				D1(DBG_ALL_LDCS,
1331 				    "(0x%llx) Sent ACK, "
1332 				    "Agreed on version v%u.%u\n",
1333 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1334 			}
1335 		} else {
1336 			DWARN(ldcp->id,
1337 			    "i_ldc_process_VER: (0x%llx) error sending "
1338 			    "ACK/NACK\n", ldcp->id);
1339 			i_ldc_reset(ldcp, B_TRUE);
1340 			mutex_exit(&ldcp->tx_lock);
1341 			return (ECONNRESET);
1342 		}
1343 
1344 		break;
1345 
1346 	case LDC_ACK:
1347 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1348 			if (ldcp->version.major != rcvd_ver->major ||
1349 			    ldcp->version.minor != rcvd_ver->minor) {
1350 
1351 				/* mismatched version - reset connection */
1352 				DWARN(ldcp->id,
1353 				    "i_ldc_process_VER: (0x%llx) recvd"
1354 				    " ACK ver != sent ACK ver\n", ldcp->id);
1355 				i_ldc_reset(ldcp, B_TRUE);
1356 				mutex_exit(&ldcp->tx_lock);
1357 				return (ECONNRESET);
1358 			}
1359 		} else {
1360 			/* SUCCESS - we have agreed on a version */
1361 			ldcp->version.major = rcvd_ver->major;
1362 			ldcp->version.minor = rcvd_ver->minor;
1363 			ldcp->tstate |= TS_VER_DONE;
1364 		}
1365 
1366 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1367 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1368 
1369 		/* initiate RTS-RTR-RDX handshake */
1370 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1371 		if (rv) {
1372 			DWARN(ldcp->id,
1373 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1374 			    ldcp->id);
1375 			i_ldc_reset(ldcp, B_TRUE);
1376 			mutex_exit(&ldcp->tx_lock);
1377 			return (ECONNRESET);
1378 		}
1379 
1380 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1381 		ZERO_PKT(pkt);
1382 
1383 		pkt->type = LDC_CTRL;
1384 		pkt->stype = LDC_INFO;
1385 		pkt->ctrl = LDC_RTS;
1386 		pkt->env = ldcp->mode;
1387 		if (ldcp->mode != LDC_MODE_RAW)
1388 			pkt->seqid = LDC_INIT_SEQID;
1389 
1390 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1391 
1392 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1393 
1394 		/* initiate the send by calling into HV and set the new tail */
1395 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1396 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1397 
1398 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1399 		if (rv) {
1400 			D2(ldcp->id,
1401 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1402 			    ldcp->id);
1403 			i_ldc_reset(ldcp, B_TRUE);
1404 			mutex_exit(&ldcp->tx_lock);
1405 			return (ECONNRESET);
1406 		}
1407 
1408 		ldcp->tx_tail = tx_tail;
1409 		ldcp->hstate |= TS_SENT_RTS;
1410 
1411 		break;
1412 
1413 	case LDC_NACK:
1414 		/* check if version in NACK is zero */
1415 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1416 			/* version handshake failure */
1417 			DWARN(DBG_ALL_LDCS,
1418 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1419 			    ldcp->id);
1420 			i_ldc_reset(ldcp, B_TRUE);
1421 			mutex_exit(&ldcp->tx_lock);
1422 			return (ECONNRESET);
1423 		}
1424 
1425 		/* get the current tail and pkt for the response */
1426 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1427 		if (rv != 0) {
1428 			cmn_err(CE_NOTE,
1429 			    "i_ldc_process_VER: (0x%lx) err sending "
1430 			    "version ACK/NACK\n", ldcp->id);
1431 			i_ldc_reset(ldcp, B_TRUE);
1432 			mutex_exit(&ldcp->tx_lock);
1433 			return (ECONNRESET);
1434 		}
1435 
1436 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1437 		ZERO_PKT(pkt);
1438 
1439 		/* initialize the packet */
1440 		pkt->type = LDC_CTRL;
1441 		pkt->ctrl = LDC_VER;
1442 		pkt->stype = LDC_INFO;
1443 
1444 		/* check ver in NACK msg has a match */
1445 		for (;;) {
1446 			if (rcvd_ver->major == ldc_versions[idx].major) {
1447 				/*
1448 				 * major version match - resubmit request
1449 				 * if lower minor version to the one this endpt
1450 				 * supports, if necessary
1451 				 */
1452 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1453 					rcvd_ver->minor =
1454 					    ldc_versions[idx].minor;
1455 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1456 				break;
1457 			}
1458 
1459 			if (rcvd_ver->major > ldc_versions[idx].major) {
1460 
1461 				D1(ldcp->id, "i_ldc_process_VER: using next"
1462 				    " lower idx=%d, v%u.%u\n", idx,
1463 				    ldc_versions[idx].major,
1464 				    ldc_versions[idx].minor);
1465 
1466 				/* send next lower version */
1467 				bcopy(&ldc_versions[idx], pkt->udata,
1468 				    sizeof (ldc_versions[idx]));
1469 				ldcp->next_vidx = idx;
1470 				break;
1471 			}
1472 
1473 			/* next version */
1474 			idx++;
1475 
1476 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1477 
1478 			if (idx == LDC_NUM_VERS) {
1479 				/* no version match - terminate */
1480 				ldcp->next_vidx = 0;
1481 				mutex_exit(&ldcp->tx_lock);
1482 				return (ECONNRESET);
1483 			}
1484 		}
1485 
1486 		/* initiate the send by calling into HV and set the new tail */
1487 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1488 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1489 
1490 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1491 		if (rv == 0) {
1492 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1493 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1494 			    ldc_versions[idx].minor);
1495 			ldcp->tx_tail = tx_tail;
1496 		} else {
1497 			cmn_err(CE_NOTE,
1498 			    "i_ldc_process_VER: (0x%lx) error sending version"
1499 			    "INFO\n", ldcp->id);
1500 			i_ldc_reset(ldcp, B_TRUE);
1501 			mutex_exit(&ldcp->tx_lock);
1502 			return (ECONNRESET);
1503 		}
1504 
1505 		break;
1506 	}
1507 
1508 	mutex_exit(&ldcp->tx_lock);
1509 	return (rv);
1510 }
1511 
1512 
1513 /*
1514  * Process an incoming RTS ctrl message
1515  */
1516 static int
1517 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1518 {
1519 	int 		rv = 0;
1520 	ldc_msg_t 	*pkt;
1521 	uint64_t	tx_tail;
1522 	boolean_t	sent_NACK = B_FALSE;
1523 
1524 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1525 
1526 	switch (msg->stype) {
1527 	case LDC_NACK:
1528 		DWARN(ldcp->id,
1529 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1530 		    ldcp->id);
1531 
1532 		/* Reset the channel -- as we cannot continue */
1533 		mutex_enter(&ldcp->tx_lock);
1534 		i_ldc_reset(ldcp, B_TRUE);
1535 		mutex_exit(&ldcp->tx_lock);
1536 		rv = ECONNRESET;
1537 		break;
1538 
1539 	case LDC_INFO:
1540 
1541 		/* check mode */
1542 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1543 			cmn_err(CE_NOTE,
1544 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1545 			    ldcp->id);
1546 			/*
1547 			 * send NACK in response to MODE message
1548 			 * get the current tail for the response
1549 			 */
1550 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1551 			if (rv) {
1552 				/* if cannot send NACK - reset channel */
1553 				mutex_enter(&ldcp->tx_lock);
1554 				i_ldc_reset(ldcp, B_TRUE);
1555 				mutex_exit(&ldcp->tx_lock);
1556 				rv = ECONNRESET;
1557 				break;
1558 			}
1559 			sent_NACK = B_TRUE;
1560 		}
1561 		break;
1562 	default:
1563 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1564 		    ldcp->id);
1565 		mutex_enter(&ldcp->tx_lock);
1566 		i_ldc_reset(ldcp, B_TRUE);
1567 		mutex_exit(&ldcp->tx_lock);
1568 		rv = ECONNRESET;
1569 		break;
1570 	}
1571 
1572 	/*
1573 	 * If either the connection was reset (when rv != 0) or
1574 	 * a NACK was sent, we return. In the case of a NACK
1575 	 * we dont want to consume the packet that came in but
1576 	 * not record that we received the RTS
1577 	 */
1578 	if (rv || sent_NACK)
1579 		return (rv);
1580 
1581 	/* record RTS received */
1582 	ldcp->hstate |= TS_RCVD_RTS;
1583 
1584 	/* store initial SEQID info */
1585 	ldcp->last_msg_snt = msg->seqid;
1586 
1587 	/* Obtain Tx lock */
1588 	mutex_enter(&ldcp->tx_lock);
1589 
1590 	/* get the current tail for the response */
1591 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1592 	if (rv != 0) {
1593 		cmn_err(CE_NOTE,
1594 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1595 		    ldcp->id);
1596 		i_ldc_reset(ldcp, B_TRUE);
1597 		mutex_exit(&ldcp->tx_lock);
1598 		return (ECONNRESET);
1599 	}
1600 
1601 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1602 	ZERO_PKT(pkt);
1603 
1604 	/* initialize the packet */
1605 	pkt->type = LDC_CTRL;
1606 	pkt->stype = LDC_INFO;
1607 	pkt->ctrl = LDC_RTR;
1608 	pkt->env = ldcp->mode;
1609 	if (ldcp->mode != LDC_MODE_RAW)
1610 		pkt->seqid = LDC_INIT_SEQID;
1611 
1612 	ldcp->last_msg_rcd = msg->seqid;
1613 
1614 	/* initiate the send by calling into HV and set the new tail */
1615 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1616 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1617 
1618 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1619 	if (rv == 0) {
1620 		D2(ldcp->id,
1621 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1622 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1623 
1624 		ldcp->tx_tail = tx_tail;
1625 		ldcp->hstate |= TS_SENT_RTR;
1626 
1627 	} else {
1628 		cmn_err(CE_NOTE,
1629 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1630 		    ldcp->id);
1631 		i_ldc_reset(ldcp, B_TRUE);
1632 		mutex_exit(&ldcp->tx_lock);
1633 		return (ECONNRESET);
1634 	}
1635 
1636 	mutex_exit(&ldcp->tx_lock);
1637 	return (0);
1638 }
1639 
1640 /*
1641  * Process an incoming RTR ctrl message
1642  */
1643 static int
1644 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1645 {
1646 	int 		rv = 0;
1647 	boolean_t	sent_NACK = B_FALSE;
1648 
1649 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1650 
1651 	switch (msg->stype) {
1652 	case LDC_NACK:
1653 		/* RTR NACK received */
1654 		DWARN(ldcp->id,
1655 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1656 		    ldcp->id);
1657 
1658 		/* Reset the channel -- as we cannot continue */
1659 		mutex_enter(&ldcp->tx_lock);
1660 		i_ldc_reset(ldcp, B_TRUE);
1661 		mutex_exit(&ldcp->tx_lock);
1662 		rv = ECONNRESET;
1663 
1664 		break;
1665 
1666 	case LDC_INFO:
1667 
1668 		/* check mode */
1669 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1670 			DWARN(ldcp->id,
1671 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1672 			    "expecting 0x%x, got 0x%x\n",
1673 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1674 			/*
1675 			 * send NACK in response to MODE message
1676 			 * get the current tail for the response
1677 			 */
1678 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1679 			if (rv) {
1680 				/* if cannot send NACK - reset channel */
1681 				mutex_enter(&ldcp->tx_lock);
1682 				i_ldc_reset(ldcp, B_TRUE);
1683 				mutex_exit(&ldcp->tx_lock);
1684 				rv = ECONNRESET;
1685 				break;
1686 			}
1687 			sent_NACK = B_TRUE;
1688 		}
1689 		break;
1690 
1691 	default:
1692 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1693 		    ldcp->id);
1694 
1695 		/* Reset the channel -- as we cannot continue */
1696 		mutex_enter(&ldcp->tx_lock);
1697 		i_ldc_reset(ldcp, B_TRUE);
1698 		mutex_exit(&ldcp->tx_lock);
1699 		rv = ECONNRESET;
1700 		break;
1701 	}
1702 
1703 	/*
1704 	 * If either the connection was reset (when rv != 0) or
1705 	 * a NACK was sent, we return. In the case of a NACK
1706 	 * we dont want to consume the packet that came in but
1707 	 * not record that we received the RTR
1708 	 */
1709 	if (rv || sent_NACK)
1710 		return (rv);
1711 
1712 	ldcp->last_msg_snt = msg->seqid;
1713 	ldcp->hstate |= TS_RCVD_RTR;
1714 
1715 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1716 	if (rv) {
1717 		cmn_err(CE_NOTE,
1718 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1719 		    ldcp->id);
1720 		mutex_enter(&ldcp->tx_lock);
1721 		i_ldc_reset(ldcp, B_TRUE);
1722 		mutex_exit(&ldcp->tx_lock);
1723 		return (ECONNRESET);
1724 	}
1725 	D2(ldcp->id,
1726 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1727 
1728 	ldcp->hstate |= TS_SENT_RDX;
1729 	ldcp->tstate |= TS_HSHAKE_DONE;
1730 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1731 		ldcp->status = LDC_UP;
1732 
1733 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1734 
1735 	return (0);
1736 }
1737 
1738 
1739 /*
1740  * Process an incoming RDX ctrl message
1741  */
1742 static int
1743 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1744 {
1745 	int	rv = 0;
1746 
1747 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1748 
1749 	switch (msg->stype) {
1750 	case LDC_NACK:
1751 		/* RDX NACK received */
1752 		DWARN(ldcp->id,
1753 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1754 		    ldcp->id);
1755 
1756 		/* Reset the channel -- as we cannot continue */
1757 		mutex_enter(&ldcp->tx_lock);
1758 		i_ldc_reset(ldcp, B_TRUE);
1759 		mutex_exit(&ldcp->tx_lock);
1760 		rv = ECONNRESET;
1761 
1762 		break;
1763 
1764 	case LDC_INFO:
1765 
1766 		/*
1767 		 * if channel is UP and a RDX received after data transmission
1768 		 * has commenced it is an error
1769 		 */
1770 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1771 			DWARN(DBG_ALL_LDCS,
1772 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1773 			    " - LDC reset\n", ldcp->id);
1774 			mutex_enter(&ldcp->tx_lock);
1775 			i_ldc_reset(ldcp, B_TRUE);
1776 			mutex_exit(&ldcp->tx_lock);
1777 			return (ECONNRESET);
1778 		}
1779 
1780 		ldcp->hstate |= TS_RCVD_RDX;
1781 		ldcp->tstate |= TS_HSHAKE_DONE;
1782 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1783 			ldcp->status = LDC_UP;
1784 
1785 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1786 		break;
1787 
1788 	default:
1789 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1790 		    ldcp->id);
1791 
1792 		/* Reset the channel -- as we cannot continue */
1793 		mutex_enter(&ldcp->tx_lock);
1794 		i_ldc_reset(ldcp, B_TRUE);
1795 		mutex_exit(&ldcp->tx_lock);
1796 		rv = ECONNRESET;
1797 		break;
1798 	}
1799 
1800 	return (rv);
1801 }
1802 
1803 /*
1804  * Process an incoming ACK for a data packet
1805  */
1806 static int
1807 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1808 {
1809 	int		rv;
1810 	uint64_t 	tx_head;
1811 	ldc_msg_t	*pkt;
1812 
1813 	/* Obtain Tx lock */
1814 	mutex_enter(&ldcp->tx_lock);
1815 
1816 	/*
1817 	 * Read the current Tx head and tail
1818 	 */
1819 	rv = hv_ldc_tx_get_state(ldcp->id,
1820 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1821 	if (rv != 0) {
1822 		cmn_err(CE_WARN,
1823 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1824 		    ldcp->id);
1825 
1826 		/* Reset the channel -- as we cannot continue */
1827 		i_ldc_reset(ldcp, B_TRUE);
1828 		mutex_exit(&ldcp->tx_lock);
1829 		return (ECONNRESET);
1830 	}
1831 
1832 	/*
1833 	 * loop from where the previous ACK location was to the
1834 	 * current head location. This is how far the HV has
1835 	 * actually send pkts. Pkts between head and tail are
1836 	 * yet to be sent by HV.
1837 	 */
1838 	tx_head = ldcp->tx_ackd_head;
1839 	for (;;) {
1840 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1841 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1842 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1843 
1844 		if (pkt->seqid == msg->ackid) {
1845 			D2(ldcp->id,
1846 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1847 			    ldcp->id);
1848 			ldcp->last_ack_rcd = msg->ackid;
1849 			ldcp->tx_ackd_head = tx_head;
1850 			break;
1851 		}
1852 		if (tx_head == ldcp->tx_head) {
1853 			/* could not find packet */
1854 			DWARN(ldcp->id,
1855 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1856 			    ldcp->id);
1857 
1858 			/* Reset the channel -- as we cannot continue */
1859 			i_ldc_reset(ldcp, B_TRUE);
1860 			mutex_exit(&ldcp->tx_lock);
1861 			return (ECONNRESET);
1862 		}
1863 	}
1864 
1865 	mutex_exit(&ldcp->tx_lock);
1866 	return (0);
1867 }
1868 
1869 /*
1870  * Process incoming control message
1871  * Return 0 - session can continue
1872  *        EAGAIN - reprocess packet - state was changed
1873  *	  ECONNRESET - channel was reset
1874  */
1875 static int
1876 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1877 {
1878 	int 		rv = 0;
1879 
1880 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1881 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1882 
1883 	switch (ldcp->tstate & ~TS_IN_RESET) {
1884 
1885 	case TS_OPEN:
1886 	case TS_READY:
1887 
1888 		switch (msg->ctrl & LDC_CTRL_MASK) {
1889 		case LDC_VER:
1890 			/* process version message */
1891 			rv = i_ldc_process_VER(ldcp, msg);
1892 			break;
1893 		default:
1894 			DWARN(ldcp->id,
1895 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1896 			    "tstate=0x%x\n", ldcp->id,
1897 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1898 			break;
1899 		}
1900 
1901 		break;
1902 
1903 	case TS_VREADY:
1904 
1905 		switch (msg->ctrl & LDC_CTRL_MASK) {
1906 		case LDC_VER:
1907 			/* process version message */
1908 			rv = i_ldc_process_VER(ldcp, msg);
1909 			break;
1910 		case LDC_RTS:
1911 			/* process RTS message */
1912 			rv = i_ldc_process_RTS(ldcp, msg);
1913 			break;
1914 		case LDC_RTR:
1915 			/* process RTR message */
1916 			rv = i_ldc_process_RTR(ldcp, msg);
1917 			break;
1918 		case LDC_RDX:
1919 			/* process RDX message */
1920 			rv = i_ldc_process_RDX(ldcp, msg);
1921 			break;
1922 		default:
1923 			DWARN(ldcp->id,
1924 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1925 			    "tstate=0x%x\n", ldcp->id,
1926 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1927 			break;
1928 		}
1929 
1930 		break;
1931 
1932 	case TS_UP:
1933 
1934 		switch (msg->ctrl & LDC_CTRL_MASK) {
1935 		case LDC_VER:
1936 			DWARN(ldcp->id,
1937 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1938 			    "- LDC reset\n", ldcp->id);
1939 			/* peer is redoing version negotiation */
1940 			mutex_enter(&ldcp->tx_lock);
1941 			(void) i_ldc_txq_reconf(ldcp);
1942 			i_ldc_reset_state(ldcp);
1943 			mutex_exit(&ldcp->tx_lock);
1944 			rv = EAGAIN;
1945 			break;
1946 
1947 		case LDC_RDX:
1948 			/* process RDX message */
1949 			rv = i_ldc_process_RDX(ldcp, msg);
1950 			break;
1951 
1952 		default:
1953 			DWARN(ldcp->id,
1954 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1955 			    "tstate=0x%x\n", ldcp->id,
1956 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1957 			break;
1958 		}
1959 	}
1960 
1961 	return (rv);
1962 }
1963 
1964 /*
1965  * Register channel with the channel nexus
1966  */
1967 static int
1968 i_ldc_register_channel(ldc_chan_t *ldcp)
1969 {
1970 	int		rv = 0;
1971 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1972 
1973 	if (cinfo->dip == NULL) {
1974 		DWARN(ldcp->id,
1975 		    "i_ldc_register_channel: cnex has not registered\n");
1976 		return (EAGAIN);
1977 	}
1978 
1979 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1980 	if (rv) {
1981 		DWARN(ldcp->id,
1982 		    "i_ldc_register_channel: cannot register channel\n");
1983 		return (rv);
1984 	}
1985 
1986 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1987 	    i_ldc_tx_hdlr, ldcp, NULL);
1988 	if (rv) {
1989 		DWARN(ldcp->id,
1990 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1991 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1992 		return (rv);
1993 	}
1994 
1995 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1996 	    i_ldc_rx_hdlr, ldcp, NULL);
1997 	if (rv) {
1998 		DWARN(ldcp->id,
1999 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
2000 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2001 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
2002 		return (rv);
2003 	}
2004 
2005 	ldcp->tstate |= TS_CNEX_RDY;
2006 
2007 	return (0);
2008 }
2009 
2010 /*
2011  * Unregister a channel with the channel nexus
2012  */
2013 static int
2014 i_ldc_unregister_channel(ldc_chan_t *ldcp)
2015 {
2016 	int		rv = 0;
2017 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
2018 
2019 	if (cinfo->dip == NULL) {
2020 		DWARN(ldcp->id,
2021 		    "i_ldc_unregister_channel: cnex has not registered\n");
2022 		return (EAGAIN);
2023 	}
2024 
2025 	if (ldcp->tstate & TS_CNEX_RDY) {
2026 
2027 		/* Remove the Rx interrupt */
2028 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
2029 		if (rv) {
2030 			if (rv != EAGAIN) {
2031 				DWARN(ldcp->id,
2032 				    "i_ldc_unregister_channel: err removing "
2033 				    "Rx intr\n");
2034 				return (rv);
2035 			}
2036 
2037 			/*
2038 			 * If interrupts are pending and handler has
2039 			 * finished running, clear interrupt and try
2040 			 * again
2041 			 */
2042 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
2043 				return (rv);
2044 
2045 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2046 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
2047 			    CNEX_RX_INTR);
2048 			if (rv) {
2049 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
2050 				    "err removing Rx interrupt\n");
2051 				return (rv);
2052 			}
2053 		}
2054 
2055 		/* Remove the Tx interrupt */
2056 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2057 		if (rv) {
2058 			DWARN(ldcp->id,
2059 			    "i_ldc_unregister_channel: err removing Tx intr\n");
2060 			return (rv);
2061 		}
2062 
2063 		/* Unregister the channel */
2064 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2065 		if (rv) {
2066 			DWARN(ldcp->id,
2067 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2068 			return (rv);
2069 		}
2070 
2071 		ldcp->tstate &= ~TS_CNEX_RDY;
2072 	}
2073 
2074 	return (0);
2075 }
2076 
2077 
2078 /*
2079  * LDC transmit interrupt handler
2080  *    triggered for chanel up/down/reset events
2081  *    and Tx queue content changes
2082  */
2083 static uint_t
2084 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2085 {
2086 	_NOTE(ARGUNUSED(arg2))
2087 
2088 	int 		rv;
2089 	ldc_chan_t 	*ldcp;
2090 	boolean_t 	notify_client = B_FALSE;
2091 	uint64_t	notify_event = 0, link_state;
2092 
2093 	/* Get the channel for which interrupt was received */
2094 	ASSERT(arg1 != NULL);
2095 	ldcp = (ldc_chan_t *)arg1;
2096 
2097 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2098 	    ldcp->id, ldcp);
2099 
2100 	/* Lock channel */
2101 	mutex_enter(&ldcp->lock);
2102 
2103 	/* Obtain Tx lock */
2104 	mutex_enter(&ldcp->tx_lock);
2105 
2106 	/* mark interrupt as pending */
2107 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2108 
2109 	/* save current link state */
2110 	link_state = ldcp->link_state;
2111 
2112 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2113 	    &ldcp->link_state);
2114 	if (rv) {
2115 		cmn_err(CE_WARN,
2116 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2117 		    ldcp->id, rv);
2118 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2119 		mutex_exit(&ldcp->tx_lock);
2120 		mutex_exit(&ldcp->lock);
2121 		return (DDI_INTR_CLAIMED);
2122 	}
2123 
2124 	/*
2125 	 * reset the channel state if the channel went down
2126 	 * (other side unconfigured queue) or channel was reset
2127 	 * (other side reconfigured its queue)
2128 	 */
2129 	if (link_state != ldcp->link_state &&
2130 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2131 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2132 		i_ldc_reset(ldcp, B_FALSE);
2133 		notify_client = B_TRUE;
2134 		notify_event = LDC_EVT_DOWN;
2135 	}
2136 
2137 	if (link_state != ldcp->link_state &&
2138 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2139 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2140 		i_ldc_reset(ldcp, B_FALSE);
2141 		notify_client = B_TRUE;
2142 		notify_event = LDC_EVT_RESET;
2143 	}
2144 
2145 	if (link_state != ldcp->link_state &&
2146 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2147 	    ldcp->link_state == LDC_CHANNEL_UP) {
2148 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2149 		notify_client = B_TRUE;
2150 		notify_event = LDC_EVT_RESET;
2151 		ldcp->tstate |= TS_LINK_READY;
2152 		ldcp->status = LDC_READY;
2153 	}
2154 
2155 	/* if callbacks are disabled, do not notify */
2156 	if (!ldcp->cb_enabled)
2157 		notify_client = B_FALSE;
2158 
2159 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2160 	mutex_exit(&ldcp->tx_lock);
2161 
2162 	if (notify_client) {
2163 		ldcp->cb_inprogress = B_TRUE;
2164 		mutex_exit(&ldcp->lock);
2165 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2166 		if (rv) {
2167 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2168 			    "failure", ldcp->id);
2169 		}
2170 		mutex_enter(&ldcp->lock);
2171 		ldcp->cb_inprogress = B_FALSE;
2172 	}
2173 
2174 	mutex_exit(&ldcp->lock);
2175 
2176 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2177 
2178 	return (DDI_INTR_CLAIMED);
2179 }
2180 
2181 /*
2182  * Process the Rx HV queue.
2183  *
2184  * Returns 0 if data packets were found and no errors were encountered,
2185  * otherwise returns an error. In either case, the *notify argument is
2186  * set to indicate whether or not the client callback function should
2187  * be invoked. The *event argument is set to contain the callback event.
2188  *
2189  * Depending on the channel mode, packets are handled differently:
2190  *
2191  * RAW MODE
2192  * For raw mode channels, when a data packet is encountered,
2193  * processing stops and all packets are left on the queue to be removed
2194  * and processed by the ldc_read code path.
2195  *
2196  * UNRELIABLE MODE
2197  * For unreliable mode, when a data packet is encountered, processing
2198  * stops, and all packets are left on the queue to be removed and
2199  * processed by the ldc_read code path. Control packets are processed
2200  * inline if they are encountered before any data packets.
2201  *
2202  * RELIABLE MODE
2203  * For reliable mode channels, all packets on the receive queue
2204  * are processed: data packets are copied to the data queue and
2205  * control packets are processed inline. Packets are only left on
2206  * the receive queue when the data queue is full.
2207  */
2208 static uint_t
2209 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2210     uint64_t *notify_event)
2211 {
2212 	int		rv;
2213 	uint64_t 	rx_head, rx_tail;
2214 	ldc_msg_t 	*msg;
2215 	uint64_t	link_state, first_fragment = 0;
2216 	boolean_t	trace_length = B_TRUE;
2217 
2218 	ASSERT(MUTEX_HELD(&ldcp->lock));
2219 	*notify_client = B_FALSE;
2220 	*notify_event = 0;
2221 
2222 	/*
2223 	 * Read packet(s) from the queue
2224 	 */
2225 	for (;;) {
2226 
2227 		link_state = ldcp->link_state;
2228 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2229 		    &ldcp->link_state);
2230 		if (rv) {
2231 			cmn_err(CE_WARN,
2232 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2233 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2234 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2235 			return (EIO);
2236 		}
2237 
2238 		/*
2239 		 * reset the channel state if the channel went down
2240 		 * (other side unconfigured queue) or channel was reset
2241 		 * (other side reconfigured its queue)
2242 		 */
2243 
2244 		if (link_state != ldcp->link_state) {
2245 
2246 			switch (ldcp->link_state) {
2247 			case LDC_CHANNEL_DOWN:
2248 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2249 				    "link down\n", ldcp->id);
2250 				mutex_enter(&ldcp->tx_lock);
2251 				i_ldc_reset(ldcp, B_FALSE);
2252 				mutex_exit(&ldcp->tx_lock);
2253 				*notify_client = B_TRUE;
2254 				*notify_event = LDC_EVT_DOWN;
2255 				goto loop_exit;
2256 
2257 			case LDC_CHANNEL_UP:
2258 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2259 				    "channel link up\n", ldcp->id);
2260 
2261 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2262 					*notify_client = B_TRUE;
2263 					*notify_event = LDC_EVT_RESET;
2264 					ldcp->tstate |= TS_LINK_READY;
2265 					ldcp->status = LDC_READY;
2266 				}
2267 				break;
2268 
2269 			case LDC_CHANNEL_RESET:
2270 			default:
2271 #ifdef DEBUG
2272 force_reset:
2273 #endif
2274 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2275 				    "link reset\n", ldcp->id);
2276 				mutex_enter(&ldcp->tx_lock);
2277 				i_ldc_reset(ldcp, B_FALSE);
2278 				mutex_exit(&ldcp->tx_lock);
2279 				*notify_client = B_TRUE;
2280 				*notify_event = LDC_EVT_RESET;
2281 				break;
2282 			}
2283 		}
2284 
2285 #ifdef DEBUG
2286 		if (LDC_INJECT_RESET(ldcp))
2287 			goto force_reset;
2288 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2289 			i_ldc_mem_inject_dring_clear(ldcp);
2290 #endif
2291 		if (trace_length) {
2292 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2293 			trace_length = B_FALSE;
2294 		}
2295 
2296 		if (rx_head == rx_tail) {
2297 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2298 			    "No packets\n", ldcp->id);
2299 			break;
2300 		}
2301 
2302 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2303 		    "tail=0x%llx\n", rx_head, rx_tail);
2304 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2305 		    ldcp->rx_q_va + rx_head);
2306 
2307 		/* get the message */
2308 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2309 
2310 		/* if channel is in RAW mode or data pkt, notify and return */
2311 		if (ldcp->mode == LDC_MODE_RAW) {
2312 			*notify_client = B_TRUE;
2313 			*notify_event |= LDC_EVT_READ;
2314 			break;
2315 		}
2316 
2317 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2318 
2319 			/* discard packet if channel is not up */
2320 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2321 
2322 				/* move the head one position */
2323 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2324 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2325 
2326 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2327 					break;
2328 
2329 				continue;
2330 			} else {
2331 				uint64_t dq_head, dq_tail;
2332 
2333 				/* process only RELIABLE mode data packets */
2334 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2335 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2336 						*notify_client = B_TRUE;
2337 					*notify_event |= LDC_EVT_READ;
2338 					break;
2339 				}
2340 
2341 				/* don't process packet if queue full */
2342 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2343 				    &dq_tail, NULL);
2344 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2345 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2346 				if (dq_tail == dq_head ||
2347 				    LDC_INJECT_DQFULL(ldcp)) {
2348 					rv = ENOSPC;
2349 					break;
2350 				}
2351 			}
2352 		}
2353 
2354 		/* Check the sequence ID for the message received */
2355 		rv = i_ldc_check_seqid(ldcp, msg);
2356 		if (rv != 0) {
2357 
2358 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2359 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2360 			    rx_head, rx_tail);
2361 
2362 			/* Reset last_msg_rcd to start of message */
2363 			if (first_fragment != 0) {
2364 				ldcp->last_msg_rcd = first_fragment - 1;
2365 				first_fragment = 0;
2366 			}
2367 
2368 			/*
2369 			 * Send a NACK due to seqid mismatch
2370 			 */
2371 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2372 			    (msg->ctrl & LDC_CTRL_MASK));
2373 
2374 			if (rv) {
2375 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2376 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2377 				    ldcp->id);
2378 
2379 				/* if cannot send NACK - reset channel */
2380 				mutex_enter(&ldcp->tx_lock);
2381 				i_ldc_reset(ldcp, B_TRUE);
2382 				mutex_exit(&ldcp->tx_lock);
2383 
2384 				*notify_client = B_TRUE;
2385 				*notify_event = LDC_EVT_RESET;
2386 				break;
2387 			}
2388 
2389 			/* purge receive queue */
2390 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2391 			break;
2392 		}
2393 
2394 		/* record the message ID */
2395 		ldcp->last_msg_rcd = msg->seqid;
2396 
2397 		/* process control messages */
2398 		if (msg->type & LDC_CTRL) {
2399 			/* save current internal state */
2400 			uint64_t tstate = ldcp->tstate;
2401 
2402 			rv = i_ldc_ctrlmsg(ldcp, msg);
2403 			if (rv == EAGAIN) {
2404 				/* re-process pkt - state was adjusted */
2405 				continue;
2406 			}
2407 			if (rv == ECONNRESET) {
2408 				*notify_client = B_TRUE;
2409 				*notify_event = LDC_EVT_RESET;
2410 				break;
2411 			}
2412 
2413 			/*
2414 			 * control message processing was successful
2415 			 * channel transitioned to ready for communication
2416 			 */
2417 			if (rv == 0 && ldcp->tstate == TS_UP &&
2418 			    (tstate & ~TS_IN_RESET) !=
2419 			    (ldcp->tstate & ~TS_IN_RESET)) {
2420 				*notify_client = B_TRUE;
2421 				*notify_event = LDC_EVT_UP;
2422 			}
2423 		}
2424 
2425 		/* process data NACKs */
2426 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2427 			DWARN(ldcp->id,
2428 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2429 			    ldcp->id);
2430 			mutex_enter(&ldcp->tx_lock);
2431 			i_ldc_reset(ldcp, B_TRUE);
2432 			mutex_exit(&ldcp->tx_lock);
2433 			*notify_client = B_TRUE;
2434 			*notify_event = LDC_EVT_RESET;
2435 			break;
2436 		}
2437 
2438 		/* process data ACKs */
2439 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2440 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2441 				*notify_client = B_TRUE;
2442 				*notify_event = LDC_EVT_RESET;
2443 				break;
2444 			}
2445 		}
2446 
2447 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2448 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2449 
2450 			/*
2451 			 * Copy the data packet to the data queue. Note
2452 			 * that the copy routine updates the rx_head pointer.
2453 			 */
2454 			i_ldc_rxdq_copy(ldcp, &rx_head);
2455 
2456 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2457 				*notify_client = B_TRUE;
2458 			*notify_event |= LDC_EVT_READ;
2459 		} else {
2460 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2461 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2462 		}
2463 
2464 		/* move the head one position */
2465 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2466 			*notify_client = B_TRUE;
2467 			*notify_event = LDC_EVT_RESET;
2468 			break;
2469 		}
2470 
2471 	} /* for */
2472 
2473 loop_exit:
2474 
2475 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2476 		/* ACK data packets */
2477 		if ((*notify_event &
2478 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2479 			int ack_rv;
2480 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2481 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2482 				cmn_err(CE_NOTE,
2483 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2484 				    "send ACK\n", ldcp->id);
2485 
2486 				mutex_enter(&ldcp->tx_lock);
2487 				i_ldc_reset(ldcp, B_FALSE);
2488 				mutex_exit(&ldcp->tx_lock);
2489 
2490 				*notify_client = B_TRUE;
2491 				*notify_event = LDC_EVT_RESET;
2492 				goto skip_ackpeek;
2493 			}
2494 		}
2495 
2496 		/*
2497 		 * If we have no more space on the data queue, make sure
2498 		 * there are no ACKs on the rx queue waiting to be processed.
2499 		 */
2500 		if (rv == ENOSPC) {
2501 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2502 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2503 				*notify_client = B_TRUE;
2504 				*notify_event = LDC_EVT_RESET;
2505 			}
2506 			return (rv);
2507 		} else {
2508 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2509 		}
2510 	}
2511 
2512 skip_ackpeek:
2513 
2514 	/* Return, indicating whether or not data packets were found */
2515 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2516 		return (0);
2517 
2518 	return (ENOMSG);
2519 }
2520 
2521 /*
2522  * Process any ACK packets on the HV receive queue.
2523  *
2524  * This function is only used by RELIABLE mode channels when the
2525  * secondary data queue fills up and there are packets remaining on
2526  * the HV receive queue.
2527  */
2528 int
2529 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2530 {
2531 	int		rv = 0;
2532 	ldc_msg_t	*msg;
2533 
2534 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2535 		ldcp->rx_ack_head = rx_head;
2536 
2537 	while (ldcp->rx_ack_head != rx_tail) {
2538 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2539 
2540 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2541 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2542 				break;
2543 			msg->stype &= ~LDC_ACK;
2544 		}
2545 
2546 		ldcp->rx_ack_head =
2547 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2548 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2549 	}
2550 	return (rv);
2551 }
2552 
2553 /* -------------------------------------------------------------------------- */
2554 
2555 /*
2556  * LDC API functions
2557  */
2558 
2559 /*
2560  * Initialize the channel. Allocate internal structure and memory for
2561  * TX/RX queues, and initialize locks.
2562  */
2563 int
2564 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2565 {
2566 	ldc_chan_t 	*ldcp;
2567 	int		rv, exit_val;
2568 	uint64_t	ra_base, nentries;
2569 	uint64_t	qlen;
2570 
2571 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2572 
2573 	if (attr == NULL) {
2574 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2575 		return (EINVAL);
2576 	}
2577 	if (handle == NULL) {
2578 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2579 		return (EINVAL);
2580 	}
2581 
2582 	/* check if channel is valid */
2583 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2584 	if (rv == H_ECHANNEL) {
2585 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2586 		return (EINVAL);
2587 	}
2588 
2589 	/* check if the channel has already been initialized */
2590 	mutex_enter(&ldcssp->lock);
2591 	ldcp = ldcssp->chan_list;
2592 	while (ldcp != NULL) {
2593 		if (ldcp->id == id) {
2594 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2595 			    id);
2596 			mutex_exit(&ldcssp->lock);
2597 			return (EADDRINUSE);
2598 		}
2599 		ldcp = ldcp->next;
2600 	}
2601 	mutex_exit(&ldcssp->lock);
2602 
2603 	ASSERT(ldcp == NULL);
2604 
2605 	*handle = 0;
2606 
2607 	/* Allocate an ldcp structure */
2608 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2609 
2610 	/*
2611 	 * Initialize the channel and Tx lock
2612 	 *
2613 	 * The channel 'lock' protects the entire channel and
2614 	 * should be acquired before initializing, resetting,
2615 	 * destroying or reading from a channel.
2616 	 *
2617 	 * The 'tx_lock' should be acquired prior to transmitting
2618 	 * data over the channel. The lock should also be acquired
2619 	 * prior to channel reconfiguration (in order to prevent
2620 	 * concurrent writes).
2621 	 *
2622 	 * ORDERING: When both locks are being acquired, to prevent
2623 	 * deadlocks, the channel lock should be always acquired prior
2624 	 * to the tx_lock.
2625 	 */
2626 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2627 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2628 
2629 	/* Initialize the channel */
2630 	ldcp->id = id;
2631 	ldcp->cb = NULL;
2632 	ldcp->cb_arg = NULL;
2633 	ldcp->cb_inprogress = B_FALSE;
2634 	ldcp->cb_enabled = B_FALSE;
2635 	ldcp->next = NULL;
2636 
2637 	/* Read attributes */
2638 	ldcp->mode = attr->mode;
2639 	ldcp->devclass = attr->devclass;
2640 	ldcp->devinst = attr->instance;
2641 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2642 
2643 	D1(ldcp->id,
2644 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2645 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2646 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2647 
2648 	ldcp->next_vidx = 0;
2649 	ldcp->tstate = TS_IN_RESET;
2650 	ldcp->hstate = 0;
2651 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2652 	ldcp->last_ack_rcd = 0;
2653 	ldcp->last_msg_rcd = 0;
2654 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2655 
2656 	ldcp->stream_bufferp = NULL;
2657 	ldcp->exp_dring_list = NULL;
2658 	ldcp->imp_dring_list = NULL;
2659 	ldcp->mhdl_list = NULL;
2660 
2661 	ldcp->tx_intr_state = LDC_INTR_NONE;
2662 	ldcp->rx_intr_state = LDC_INTR_NONE;
2663 
2664 	/* Initialize payload size depending on whether channel is reliable */
2665 	switch (ldcp->mode) {
2666 	case LDC_MODE_RAW:
2667 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2668 		ldcp->read_p = i_ldc_read_raw;
2669 		ldcp->write_p = i_ldc_write_raw;
2670 		break;
2671 	case LDC_MODE_UNRELIABLE:
2672 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2673 		ldcp->read_p = i_ldc_read_packet;
2674 		ldcp->write_p = i_ldc_write_packet;
2675 		break;
2676 	case LDC_MODE_RELIABLE:
2677 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2678 
2679 		ldcp->stream_remains = 0;
2680 		ldcp->stream_offset = 0;
2681 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2682 		ldcp->read_p = i_ldc_read_stream;
2683 		ldcp->write_p = i_ldc_write_stream;
2684 		break;
2685 	default:
2686 		exit_val = EINVAL;
2687 		goto cleanup_on_exit;
2688 	}
2689 
2690 	/*
2691 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2692 	 * value is smaller than default length of ldc_queue_entries,
2693 	 * qlen is set to ldc_queue_entries. Ensure that computed
2694 	 * length is a power-of-two value.
2695 	 */
2696 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2697 	if (!ISP2(qlen)) {
2698 		uint64_t	tmp = 1;
2699 		while (qlen) {
2700 			qlen >>= 1; tmp <<= 1;
2701 		}
2702 		qlen = tmp;
2703 	}
2704 
2705 	ldcp->rx_q_entries =
2706 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2707 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2708 
2709 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2710 
2711 	/* Create a transmit queue */
2712 	ldcp->tx_q_va = (uint64_t)
2713 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2714 	if (ldcp->tx_q_va == NULL) {
2715 		cmn_err(CE_WARN,
2716 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2717 		    ldcp->id);
2718 		exit_val = ENOMEM;
2719 		goto cleanup_on_exit;
2720 	}
2721 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2722 
2723 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2724 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2725 
2726 	ldcp->tstate |= TS_TXQ_RDY;
2727 
2728 	/* Create a receive queue */
2729 	ldcp->rx_q_va = (uint64_t)
2730 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2731 	if (ldcp->rx_q_va == NULL) {
2732 		cmn_err(CE_WARN,
2733 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2734 		    ldcp->id);
2735 		exit_val = ENOMEM;
2736 		goto cleanup_on_exit;
2737 	}
2738 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2739 
2740 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2741 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2742 
2743 	ldcp->tstate |= TS_RXQ_RDY;
2744 
2745 	/* Setup a separate read data queue */
2746 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2747 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2748 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2749 
2750 		/* Make sure the data queue multiplier is a power of 2 */
2751 		if (!ISP2(ldc_rxdq_multiplier)) {
2752 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2753 			    "not a power of 2, resetting", ldcp->id);
2754 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2755 		}
2756 
2757 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2758 		ldcp->rx_dq_va = (uint64_t)
2759 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2760 		    KM_SLEEP);
2761 		if (ldcp->rx_dq_va == NULL) {
2762 			cmn_err(CE_WARN,
2763 			    "ldc_init: (0x%lx) RX data queue "
2764 			    "allocation failed\n", ldcp->id);
2765 			exit_val = ENOMEM;
2766 			goto cleanup_on_exit;
2767 		}
2768 
2769 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2770 
2771 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2772 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2773 		    ldcp->rx_dq_entries);
2774 	} else {
2775 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2776 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2777 	}
2778 
2779 	/* Init descriptor ring and memory handle list lock */
2780 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2781 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2782 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2783 
2784 	/* mark status as INITialized */
2785 	ldcp->status = LDC_INIT;
2786 
2787 	/* Add to channel list */
2788 	mutex_enter(&ldcssp->lock);
2789 	ldcp->next = ldcssp->chan_list;
2790 	ldcssp->chan_list = ldcp;
2791 	ldcssp->channel_count++;
2792 	mutex_exit(&ldcssp->lock);
2793 
2794 	/* set the handle */
2795 	*handle = (ldc_handle_t)ldcp;
2796 
2797 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2798 
2799 	return (0);
2800 
2801 cleanup_on_exit:
2802 
2803 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2804 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2805 
2806 	if (ldcp->tstate & TS_TXQ_RDY)
2807 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2808 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2809 
2810 	if (ldcp->tstate & TS_RXQ_RDY)
2811 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2812 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2813 
2814 	mutex_destroy(&ldcp->tx_lock);
2815 	mutex_destroy(&ldcp->lock);
2816 
2817 	if (ldcp)
2818 		kmem_free(ldcp, sizeof (ldc_chan_t));
2819 
2820 	return (exit_val);
2821 }
2822 
2823 /*
2824  * Finalizes the LDC connection. It will return EBUSY if the
2825  * channel is open. A ldc_close() has to be done prior to
2826  * a ldc_fini operation. It frees TX/RX queues, associated
2827  * with the channel
2828  */
2829 int
2830 ldc_fini(ldc_handle_t handle)
2831 {
2832 	ldc_chan_t 	*ldcp;
2833 	ldc_chan_t 	*tmp_ldcp;
2834 	uint64_t 	id;
2835 
2836 	if (handle == NULL) {
2837 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2838 		return (EINVAL);
2839 	}
2840 	ldcp = (ldc_chan_t *)handle;
2841 	id = ldcp->id;
2842 
2843 	mutex_enter(&ldcp->lock);
2844 
2845 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2846 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2847 		    ldcp->id);
2848 		mutex_exit(&ldcp->lock);
2849 		return (EBUSY);
2850 	}
2851 
2852 	/* Remove from the channel list */
2853 	mutex_enter(&ldcssp->lock);
2854 	tmp_ldcp = ldcssp->chan_list;
2855 	if (tmp_ldcp == ldcp) {
2856 		ldcssp->chan_list = ldcp->next;
2857 		ldcp->next = NULL;
2858 	} else {
2859 		while (tmp_ldcp != NULL) {
2860 			if (tmp_ldcp->next == ldcp) {
2861 				tmp_ldcp->next = ldcp->next;
2862 				ldcp->next = NULL;
2863 				break;
2864 			}
2865 			tmp_ldcp = tmp_ldcp->next;
2866 		}
2867 		if (tmp_ldcp == NULL) {
2868 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2869 			mutex_exit(&ldcssp->lock);
2870 			mutex_exit(&ldcp->lock);
2871 			return (EINVAL);
2872 		}
2873 	}
2874 
2875 	ldcssp->channel_count--;
2876 
2877 	mutex_exit(&ldcssp->lock);
2878 
2879 	/* Free the map table for this channel */
2880 	if (ldcp->mtbl) {
2881 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2882 		if (ldcp->mtbl->contigmem)
2883 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2884 		else
2885 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2886 		mutex_destroy(&ldcp->mtbl->lock);
2887 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2888 	}
2889 
2890 	/* Destroy descriptor ring and memory handle list lock */
2891 	mutex_destroy(&ldcp->exp_dlist_lock);
2892 	mutex_destroy(&ldcp->imp_dlist_lock);
2893 	mutex_destroy(&ldcp->mlist_lock);
2894 
2895 	/* Free the stream buffer for RELIABLE_MODE */
2896 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2897 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2898 
2899 	/* Free the RX queue */
2900 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2901 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2902 	ldcp->tstate &= ~TS_RXQ_RDY;
2903 
2904 	/* Free the RX data queue */
2905 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2906 		kmem_free((caddr_t)ldcp->rx_dq_va,
2907 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2908 	}
2909 
2910 	/* Free the TX queue */
2911 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2912 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2913 	ldcp->tstate &= ~TS_TXQ_RDY;
2914 
2915 	mutex_exit(&ldcp->lock);
2916 
2917 	/* Destroy mutex */
2918 	mutex_destroy(&ldcp->tx_lock);
2919 	mutex_destroy(&ldcp->lock);
2920 
2921 	/* free channel structure */
2922 	kmem_free(ldcp, sizeof (ldc_chan_t));
2923 
2924 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2925 
2926 	return (0);
2927 }
2928 
2929 /*
2930  * Open the LDC channel for use. It registers the TX/RX queues
2931  * with the Hypervisor. It also specifies the interrupt number
2932  * and target CPU for this channel
2933  */
2934 int
2935 ldc_open(ldc_handle_t handle)
2936 {
2937 	ldc_chan_t 	*ldcp;
2938 	int 		rv;
2939 
2940 	if (handle == NULL) {
2941 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2942 		return (EINVAL);
2943 	}
2944 
2945 	ldcp = (ldc_chan_t *)handle;
2946 
2947 	mutex_enter(&ldcp->lock);
2948 
2949 	if (ldcp->tstate < TS_INIT) {
2950 		DWARN(ldcp->id,
2951 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2952 		mutex_exit(&ldcp->lock);
2953 		return (EFAULT);
2954 	}
2955 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2956 		DWARN(ldcp->id,
2957 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2958 		mutex_exit(&ldcp->lock);
2959 		return (EFAULT);
2960 	}
2961 
2962 	/*
2963 	 * Unregister/Register the tx queue with the hypervisor
2964 	 */
2965 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2966 	if (rv) {
2967 		cmn_err(CE_WARN,
2968 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2969 		    ldcp->id);
2970 		mutex_exit(&ldcp->lock);
2971 		return (EIO);
2972 	}
2973 
2974 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2975 	if (rv) {
2976 		cmn_err(CE_WARN,
2977 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2978 		    ldcp->id);
2979 		mutex_exit(&ldcp->lock);
2980 		return (EIO);
2981 	}
2982 
2983 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2984 	    ldcp->id);
2985 
2986 	/*
2987 	 * Unregister/Register the rx queue with the hypervisor
2988 	 */
2989 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2990 	if (rv) {
2991 		cmn_err(CE_WARN,
2992 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2993 		    ldcp->id);
2994 		mutex_exit(&ldcp->lock);
2995 		return (EIO);
2996 	}
2997 
2998 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2999 	if (rv) {
3000 		cmn_err(CE_WARN,
3001 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
3002 		    ldcp->id);
3003 		mutex_exit(&ldcp->lock);
3004 		return (EIO);
3005 	}
3006 
3007 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
3008 	    ldcp->id);
3009 
3010 	ldcp->tstate |= TS_QCONF_RDY;
3011 
3012 	/* Register the channel with the channel nexus */
3013 	rv = i_ldc_register_channel(ldcp);
3014 	if (rv && rv != EAGAIN) {
3015 		cmn_err(CE_WARN,
3016 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
3017 		ldcp->tstate &= ~TS_QCONF_RDY;
3018 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3019 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3020 		mutex_exit(&ldcp->lock);
3021 		return (EIO);
3022 	}
3023 
3024 	/* mark channel in OPEN state */
3025 	ldcp->status = LDC_OPEN;
3026 
3027 	/* Read channel state */
3028 	rv = hv_ldc_tx_get_state(ldcp->id,
3029 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3030 	if (rv) {
3031 		cmn_err(CE_WARN,
3032 		    "ldc_open: (0x%lx) cannot read channel state\n",
3033 		    ldcp->id);
3034 		(void) i_ldc_unregister_channel(ldcp);
3035 		ldcp->tstate &= ~TS_QCONF_RDY;
3036 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3037 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3038 		mutex_exit(&ldcp->lock);
3039 		return (EIO);
3040 	}
3041 
3042 	/*
3043 	 * set the ACKd head to current head location for reliable
3044 	 */
3045 	ldcp->tx_ackd_head = ldcp->tx_head;
3046 
3047 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
3048 	if (ldcp->link_state == LDC_CHANNEL_UP ||
3049 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3050 		ldcp->tstate |= TS_LINK_READY;
3051 		ldcp->status = LDC_READY;
3052 	}
3053 
3054 	/*
3055 	 * if channel is being opened in RAW mode - no handshake is needed
3056 	 * switch the channel READY and UP state
3057 	 */
3058 	if (ldcp->mode == LDC_MODE_RAW) {
3059 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
3060 		ldcp->status = LDC_UP;
3061 	}
3062 
3063 	mutex_exit(&ldcp->lock);
3064 
3065 	/*
3066 	 * Increment number of open channels
3067 	 */
3068 	mutex_enter(&ldcssp->lock);
3069 	ldcssp->channels_open++;
3070 	mutex_exit(&ldcssp->lock);
3071 
3072 	D1(ldcp->id,
3073 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3074 	    "(tstate=0x%x, status=0x%x)\n",
3075 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3076 
3077 	return (0);
3078 }
3079 
3080 /*
3081  * Close the LDC connection. It will return EBUSY if there
3082  * are memory segments or descriptor rings either bound to or
3083  * mapped over the channel
3084  */
3085 int
3086 ldc_close(ldc_handle_t handle)
3087 {
3088 	ldc_chan_t 	*ldcp;
3089 	int		rv = 0, retries = 0;
3090 	boolean_t	chk_done = B_FALSE;
3091 
3092 	if (handle == NULL) {
3093 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3094 		return (EINVAL);
3095 	}
3096 	ldcp = (ldc_chan_t *)handle;
3097 
3098 	mutex_enter(&ldcp->lock);
3099 
3100 	/* return error if channel is not open */
3101 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3102 		DWARN(ldcp->id,
3103 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3104 		mutex_exit(&ldcp->lock);
3105 		return (EFAULT);
3106 	}
3107 
3108 	/* if any memory handles, drings, are bound or mapped cannot close */
3109 	if (ldcp->mhdl_list != NULL) {
3110 		DWARN(ldcp->id,
3111 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3112 		    ldcp->id);
3113 		mutex_exit(&ldcp->lock);
3114 		return (EBUSY);
3115 	}
3116 	if (ldcp->exp_dring_list != NULL) {
3117 		DWARN(ldcp->id,
3118 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3119 		    ldcp->id);
3120 		mutex_exit(&ldcp->lock);
3121 		return (EBUSY);
3122 	}
3123 	if (ldcp->imp_dring_list != NULL) {
3124 		DWARN(ldcp->id,
3125 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3126 		    ldcp->id);
3127 		mutex_exit(&ldcp->lock);
3128 		return (EBUSY);
3129 	}
3130 
3131 	if (ldcp->cb_inprogress) {
3132 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3133 		    ldcp->id);
3134 		mutex_exit(&ldcp->lock);
3135 		return (EWOULDBLOCK);
3136 	}
3137 
3138 	/* Obtain Tx lock */
3139 	mutex_enter(&ldcp->tx_lock);
3140 
3141 	/*
3142 	 * Wait for pending transmits to complete i.e Tx queue to drain
3143 	 * if there are pending pkts - wait 1 ms and retry again
3144 	 */
3145 	for (;;) {
3146 
3147 		rv = hv_ldc_tx_get_state(ldcp->id,
3148 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3149 		if (rv) {
3150 			cmn_err(CE_WARN,
3151 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3152 			mutex_exit(&ldcp->tx_lock);
3153 			mutex_exit(&ldcp->lock);
3154 			return (EIO);
3155 		}
3156 
3157 		if (ldcp->tx_head == ldcp->tx_tail ||
3158 		    ldcp->link_state != LDC_CHANNEL_UP) {
3159 			break;
3160 		}
3161 
3162 		if (chk_done) {
3163 			DWARN(ldcp->id,
3164 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3165 			    ldcp->id);
3166 			break;
3167 		}
3168 
3169 		/* wait for one ms and try again */
3170 		delay(drv_usectohz(1000));
3171 		chk_done = B_TRUE;
3172 	}
3173 
3174 	/*
3175 	 * Drain the Tx and Rx queues as we are closing the
3176 	 * channel. We dont care about any pending packets.
3177 	 * We have to also drain the queue prior to clearing
3178 	 * pending interrupts, otherwise the HV will trigger
3179 	 * an interrupt the moment the interrupt state is
3180 	 * cleared.
3181 	 */
3182 	(void) i_ldc_txq_reconf(ldcp);
3183 	i_ldc_rxq_drain(ldcp);
3184 
3185 	/*
3186 	 * Unregister the channel with the nexus
3187 	 */
3188 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3189 
3190 		mutex_exit(&ldcp->tx_lock);
3191 		mutex_exit(&ldcp->lock);
3192 
3193 		/* if any error other than EAGAIN return back */
3194 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3195 			cmn_err(CE_WARN,
3196 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3197 			    ldcp->id, rv);
3198 			return (rv);
3199 		}
3200 
3201 		/*
3202 		 * As there could be pending interrupts we need
3203 		 * to wait and try again
3204 		 */
3205 		drv_usecwait(ldc_close_delay);
3206 		mutex_enter(&ldcp->lock);
3207 		mutex_enter(&ldcp->tx_lock);
3208 		retries++;
3209 	}
3210 
3211 	ldcp->tstate &= ~TS_QCONF_RDY;
3212 
3213 	/*
3214 	 * Unregister queues
3215 	 */
3216 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3217 	if (rv) {
3218 		cmn_err(CE_WARN,
3219 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3220 		    ldcp->id);
3221 		mutex_exit(&ldcp->tx_lock);
3222 		mutex_exit(&ldcp->lock);
3223 		return (EIO);
3224 	}
3225 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3226 	if (rv) {
3227 		cmn_err(CE_WARN,
3228 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3229 		    ldcp->id);
3230 		mutex_exit(&ldcp->tx_lock);
3231 		mutex_exit(&ldcp->lock);
3232 		return (EIO);
3233 	}
3234 
3235 	/* Reset channel state information */
3236 	i_ldc_reset_state(ldcp);
3237 
3238 	/* Mark channel as down and in initialized state */
3239 	ldcp->tx_ackd_head = 0;
3240 	ldcp->tx_head = 0;
3241 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3242 	ldcp->status = LDC_INIT;
3243 
3244 	mutex_exit(&ldcp->tx_lock);
3245 	mutex_exit(&ldcp->lock);
3246 
3247 	/* Decrement number of open channels */
3248 	mutex_enter(&ldcssp->lock);
3249 	ldcssp->channels_open--;
3250 	mutex_exit(&ldcssp->lock);
3251 
3252 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3253 
3254 	return (0);
3255 }
3256 
3257 /*
3258  * Register channel callback
3259  */
3260 int
3261 ldc_reg_callback(ldc_handle_t handle,
3262     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3263 {
3264 	ldc_chan_t *ldcp;
3265 
3266 	if (handle == NULL) {
3267 		DWARN(DBG_ALL_LDCS,
3268 		    "ldc_reg_callback: invalid channel handle\n");
3269 		return (EINVAL);
3270 	}
3271 	if (((uint64_t)cb) < KERNELBASE) {
3272 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3273 		return (EINVAL);
3274 	}
3275 	ldcp = (ldc_chan_t *)handle;
3276 
3277 	mutex_enter(&ldcp->lock);
3278 
3279 	if (ldcp->cb) {
3280 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3281 		    ldcp->id);
3282 		mutex_exit(&ldcp->lock);
3283 		return (EIO);
3284 	}
3285 	if (ldcp->cb_inprogress) {
3286 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3287 		    ldcp->id);
3288 		mutex_exit(&ldcp->lock);
3289 		return (EWOULDBLOCK);
3290 	}
3291 
3292 	ldcp->cb = cb;
3293 	ldcp->cb_arg = arg;
3294 	ldcp->cb_enabled = B_TRUE;
3295 
3296 	D1(ldcp->id,
3297 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3298 	    ldcp->id);
3299 
3300 	mutex_exit(&ldcp->lock);
3301 
3302 	return (0);
3303 }
3304 
3305 /*
3306  * Unregister channel callback
3307  */
3308 int
3309 ldc_unreg_callback(ldc_handle_t handle)
3310 {
3311 	ldc_chan_t *ldcp;
3312 
3313 	if (handle == NULL) {
3314 		DWARN(DBG_ALL_LDCS,
3315 		    "ldc_unreg_callback: invalid channel handle\n");
3316 		return (EINVAL);
3317 	}
3318 	ldcp = (ldc_chan_t *)handle;
3319 
3320 	mutex_enter(&ldcp->lock);
3321 
3322 	if (ldcp->cb == NULL) {
3323 		DWARN(ldcp->id,
3324 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3325 		    ldcp->id);
3326 		mutex_exit(&ldcp->lock);
3327 		return (EIO);
3328 	}
3329 	if (ldcp->cb_inprogress) {
3330 		DWARN(ldcp->id,
3331 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3332 		    ldcp->id);
3333 		mutex_exit(&ldcp->lock);
3334 		return (EWOULDBLOCK);
3335 	}
3336 
3337 	ldcp->cb = NULL;
3338 	ldcp->cb_arg = NULL;
3339 	ldcp->cb_enabled = B_FALSE;
3340 
3341 	D1(ldcp->id,
3342 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3343 	    ldcp->id);
3344 
3345 	mutex_exit(&ldcp->lock);
3346 
3347 	return (0);
3348 }
3349 
3350 
3351 /*
3352  * Bring a channel up by initiating a handshake with the peer
3353  * This call is asynchronous. It will complete at a later point
3354  * in time when the peer responds back with an RTR.
3355  */
3356 int
3357 ldc_up(ldc_handle_t handle)
3358 {
3359 	int 		rv;
3360 	ldc_chan_t 	*ldcp;
3361 	ldc_msg_t 	*ldcmsg;
3362 	uint64_t 	tx_tail, tstate, link_state;
3363 
3364 	if (handle == NULL) {
3365 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3366 		return (EINVAL);
3367 	}
3368 	ldcp = (ldc_chan_t *)handle;
3369 
3370 	mutex_enter(&ldcp->lock);
3371 
3372 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3373 
3374 	/* clear the reset state */
3375 	tstate = ldcp->tstate;
3376 	ldcp->tstate &= ~TS_IN_RESET;
3377 
3378 	if (ldcp->tstate == TS_UP) {
3379 		DWARN(ldcp->id,
3380 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3381 		    ldcp->id);
3382 
3383 		/* mark channel as up */
3384 		ldcp->status = LDC_UP;
3385 
3386 		/*
3387 		 * if channel was in reset state and there was
3388 		 * pending data clear interrupt state. this will
3389 		 * trigger an interrupt, causing the RX handler to
3390 		 * to invoke the client's callback
3391 		 */
3392 		if ((tstate & TS_IN_RESET) &&
3393 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3394 			D1(ldcp->id,
3395 			    "ldc_up: (0x%llx) channel has pending data, "
3396 			    "clearing interrupt\n", ldcp->id);
3397 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3398 		}
3399 
3400 		mutex_exit(&ldcp->lock);
3401 		return (0);
3402 	}
3403 
3404 	/* if the channel is in RAW mode - mark it as UP, if READY */
3405 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3406 		ldcp->tstate = TS_UP;
3407 		mutex_exit(&ldcp->lock);
3408 		return (0);
3409 	}
3410 
3411 	/* Don't start another handshake if there is one in progress */
3412 	if (ldcp->hstate) {
3413 		D1(ldcp->id,
3414 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3415 		    ldcp->id);
3416 		mutex_exit(&ldcp->lock);
3417 		return (0);
3418 	}
3419 
3420 	mutex_enter(&ldcp->tx_lock);
3421 
3422 	/* save current link state */
3423 	link_state = ldcp->link_state;
3424 
3425 	/* get the current tail for the LDC msg */
3426 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3427 	if (rv) {
3428 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3429 		    ldcp->id);
3430 		mutex_exit(&ldcp->tx_lock);
3431 		mutex_exit(&ldcp->lock);
3432 		return (ECONNREFUSED);
3433 	}
3434 
3435 	/*
3436 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3437 	 * from a previous state of DOWN, then mark the channel as
3438 	 * being ready for handshake.
3439 	 */
3440 	if ((link_state == LDC_CHANNEL_DOWN) &&
3441 	    (link_state != ldcp->link_state)) {
3442 
3443 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3444 		    (ldcp->link_state == LDC_CHANNEL_UP));
3445 
3446 		if (ldcp->mode == LDC_MODE_RAW) {
3447 			ldcp->status = LDC_UP;
3448 			ldcp->tstate = TS_UP;
3449 			mutex_exit(&ldcp->tx_lock);
3450 			mutex_exit(&ldcp->lock);
3451 			return (0);
3452 		} else {
3453 			ldcp->status = LDC_READY;
3454 			ldcp->tstate |= TS_LINK_READY;
3455 		}
3456 
3457 	}
3458 
3459 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3460 	ZERO_PKT(ldcmsg);
3461 
3462 	ldcmsg->type = LDC_CTRL;
3463 	ldcmsg->stype = LDC_INFO;
3464 	ldcmsg->ctrl = LDC_VER;
3465 	ldcp->next_vidx = 0;
3466 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3467 
3468 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3469 
3470 	/* initiate the send by calling into HV and set the new tail */
3471 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3472 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3473 
3474 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3475 	if (rv) {
3476 		DWARN(ldcp->id,
3477 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3478 		    ldcp->id, rv);
3479 		mutex_exit(&ldcp->tx_lock);
3480 		mutex_exit(&ldcp->lock);
3481 		return (rv);
3482 	}
3483 
3484 	ldcp->hstate |= TS_SENT_VER;
3485 	ldcp->tx_tail = tx_tail;
3486 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3487 
3488 	mutex_exit(&ldcp->tx_lock);
3489 	mutex_exit(&ldcp->lock);
3490 
3491 	return (rv);
3492 }
3493 
3494 
3495 /*
3496  * Bring a channel down by resetting its state and queues
3497  */
3498 int
3499 ldc_down(ldc_handle_t handle)
3500 {
3501 	ldc_chan_t 	*ldcp;
3502 
3503 	if (handle == NULL) {
3504 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3505 		return (EINVAL);
3506 	}
3507 	ldcp = (ldc_chan_t *)handle;
3508 	mutex_enter(&ldcp->lock);
3509 	mutex_enter(&ldcp->tx_lock);
3510 	i_ldc_reset(ldcp, B_TRUE);
3511 	mutex_exit(&ldcp->tx_lock);
3512 	mutex_exit(&ldcp->lock);
3513 
3514 	return (0);
3515 }
3516 
3517 /*
3518  * Get the current channel status
3519  */
3520 int
3521 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3522 {
3523 	ldc_chan_t *ldcp;
3524 
3525 	if (handle == NULL || status == NULL) {
3526 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3527 		return (EINVAL);
3528 	}
3529 	ldcp = (ldc_chan_t *)handle;
3530 
3531 	*status = ((ldc_chan_t *)handle)->status;
3532 
3533 	D1(ldcp->id,
3534 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3535 	return (0);
3536 }
3537 
3538 
3539 /*
3540  * Set the channel's callback mode - enable/disable callbacks
3541  */
3542 int
3543 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3544 {
3545 	ldc_chan_t 	*ldcp;
3546 
3547 	if (handle == NULL) {
3548 		DWARN(DBG_ALL_LDCS,
3549 		    "ldc_set_intr_mode: invalid channel handle\n");
3550 		return (EINVAL);
3551 	}
3552 	ldcp = (ldc_chan_t *)handle;
3553 
3554 	/*
3555 	 * Record no callbacks should be invoked
3556 	 */
3557 	mutex_enter(&ldcp->lock);
3558 
3559 	switch (cmode) {
3560 	case LDC_CB_DISABLE:
3561 		if (!ldcp->cb_enabled) {
3562 			DWARN(ldcp->id,
3563 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3564 			    ldcp->id);
3565 			break;
3566 		}
3567 		ldcp->cb_enabled = B_FALSE;
3568 
3569 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3570 		    ldcp->id);
3571 		break;
3572 
3573 	case LDC_CB_ENABLE:
3574 		if (ldcp->cb_enabled) {
3575 			DWARN(ldcp->id,
3576 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3577 			    ldcp->id);
3578 			break;
3579 		}
3580 		ldcp->cb_enabled = B_TRUE;
3581 
3582 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3583 		    ldcp->id);
3584 		break;
3585 	}
3586 
3587 	mutex_exit(&ldcp->lock);
3588 
3589 	return (0);
3590 }
3591 
3592 /*
3593  * Check to see if there are packets on the incoming queue
3594  * Will return hasdata = B_FALSE if there are no packets
3595  */
3596 int
3597 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3598 {
3599 	int 		rv;
3600 	uint64_t 	rx_head, rx_tail;
3601 	ldc_chan_t 	*ldcp;
3602 
3603 	if (handle == NULL) {
3604 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3605 		return (EINVAL);
3606 	}
3607 	ldcp = (ldc_chan_t *)handle;
3608 
3609 	*hasdata = B_FALSE;
3610 
3611 	mutex_enter(&ldcp->lock);
3612 
3613 	if (ldcp->tstate != TS_UP) {
3614 		D1(ldcp->id,
3615 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3616 		mutex_exit(&ldcp->lock);
3617 		return (ECONNRESET);
3618 	}
3619 
3620 	/* Read packet(s) from the queue */
3621 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3622 	    &ldcp->link_state);
3623 	if (rv != 0) {
3624 		cmn_err(CE_WARN,
3625 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3626 		mutex_exit(&ldcp->lock);
3627 		return (EIO);
3628 	}
3629 
3630 	/* reset the channel state if the channel went down */
3631 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3632 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3633 		mutex_enter(&ldcp->tx_lock);
3634 		i_ldc_reset(ldcp, B_FALSE);
3635 		mutex_exit(&ldcp->tx_lock);
3636 		mutex_exit(&ldcp->lock);
3637 		return (ECONNRESET);
3638 	}
3639 
3640 	switch (ldcp->mode) {
3641 	case LDC_MODE_RAW:
3642 		/*
3643 		 * In raw mode, there are no ctrl packets, so checking
3644 		 * if the queue is non-empty is sufficient.
3645 		 */
3646 		*hasdata = (rx_head != rx_tail);
3647 		break;
3648 
3649 	case LDC_MODE_UNRELIABLE:
3650 		/*
3651 		 * In unreliable mode, if the queue is non-empty, we need
3652 		 * to check if it actually contains unread data packets.
3653 		 * The queue may just contain ctrl packets.
3654 		 */
3655 		if (rx_head != rx_tail) {
3656 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3657 			/*
3658 			 * If no data packets were found on the queue,
3659 			 * all packets must have been control packets
3660 			 * which will now have been processed, leaving
3661 			 * the queue empty. If the interrupt state
3662 			 * is pending, we need to clear the interrupt
3663 			 * here.
3664 			 */
3665 			if (*hasdata == B_FALSE &&
3666 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3667 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3668 			}
3669 		}
3670 		break;
3671 
3672 	case LDC_MODE_RELIABLE:
3673 		/*
3674 		 * In reliable mode, first check for 'stream_remains' > 0.
3675 		 * Otherwise, if the data queue head and tail pointers
3676 		 * differ, there must be data to read.
3677 		 */
3678 		if (ldcp->stream_remains > 0)
3679 			*hasdata = B_TRUE;
3680 		else
3681 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3682 		break;
3683 
3684 	default:
3685 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3686 		    "(0x%x)", ldcp->id, ldcp->mode);
3687 		mutex_exit(&ldcp->lock);
3688 		return (EIO);
3689 	}
3690 
3691 	mutex_exit(&ldcp->lock);
3692 
3693 	return (0);
3694 }
3695 
3696 
3697 /*
3698  * Read 'size' amount of bytes or less. If incoming buffer
3699  * is more than 'size', ENOBUFS is returned.
3700  *
3701  * On return, size contains the number of bytes read.
3702  */
3703 int
3704 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3705 {
3706 	ldc_chan_t 	*ldcp;
3707 	uint64_t 	rx_head = 0, rx_tail = 0;
3708 	int		rv = 0, exit_val;
3709 
3710 	if (handle == NULL) {
3711 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3712 		return (EINVAL);
3713 	}
3714 
3715 	ldcp = (ldc_chan_t *)handle;
3716 
3717 	/* channel lock */
3718 	mutex_enter(&ldcp->lock);
3719 
3720 	if (ldcp->tstate != TS_UP) {
3721 		DWARN(ldcp->id,
3722 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3723 		    ldcp->id);
3724 		exit_val = ECONNRESET;
3725 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3726 		TRACE_RXDQ_LENGTH(ldcp);
3727 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3728 
3729 		/*
3730 		 * For reliable mode channels, the interrupt
3731 		 * state is only set to pending during
3732 		 * interrupt handling when the secondary data
3733 		 * queue became full, leaving unprocessed
3734 		 * packets on the Rx queue. If the interrupt
3735 		 * state is pending and space is now available
3736 		 * on the data queue, clear the interrupt.
3737 		 */
3738 		if (ldcp->rx_intr_state == LDC_INTR_PEND &&
3739 		    Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
3740 		    ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >=
3741 		    LDC_PACKET_SIZE) {
3742 			/* data queue is not full */
3743 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3744 		}
3745 
3746 		mutex_exit(&ldcp->lock);
3747 		return (exit_val);
3748 	} else {
3749 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3750 	}
3751 
3752 	/*
3753 	 * if queue has been drained - clear interrupt
3754 	 */
3755 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3756 	    &ldcp->link_state);
3757 	if (rv != 0) {
3758 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3759 		    ldcp->id);
3760 		mutex_enter(&ldcp->tx_lock);
3761 		i_ldc_reset(ldcp, B_TRUE);
3762 		mutex_exit(&ldcp->tx_lock);
3763 		mutex_exit(&ldcp->lock);
3764 		return (ECONNRESET);
3765 	}
3766 
3767 	if (exit_val == 0) {
3768 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3769 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3770 			mutex_enter(&ldcp->tx_lock);
3771 			i_ldc_reset(ldcp, B_FALSE);
3772 			exit_val = ECONNRESET;
3773 			mutex_exit(&ldcp->tx_lock);
3774 		}
3775 		if ((rv == 0) &&
3776 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3777 		    (rx_head == rx_tail)) {
3778 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3779 		}
3780 	}
3781 
3782 	mutex_exit(&ldcp->lock);
3783 	return (exit_val);
3784 }
3785 
3786 /*
3787  * Basic raw mondo read -
3788  * no interpretation of mondo contents at all.
3789  *
3790  * Enter and exit with ldcp->lock held by caller
3791  */
3792 static int
3793 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3794 {
3795 	uint64_t 	q_size_mask;
3796 	ldc_msg_t 	*msgp;
3797 	uint8_t		*msgbufp;
3798 	int		rv = 0, space;
3799 	uint64_t 	rx_head, rx_tail;
3800 
3801 	space = *sizep;
3802 
3803 	if (space < LDC_PAYLOAD_SIZE_RAW)
3804 		return (ENOBUFS);
3805 
3806 	ASSERT(mutex_owned(&ldcp->lock));
3807 
3808 	/* compute mask for increment */
3809 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3810 
3811 	/*
3812 	 * Read packet(s) from the queue
3813 	 */
3814 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3815 	    &ldcp->link_state);
3816 	if (rv != 0) {
3817 		cmn_err(CE_WARN,
3818 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3819 		    ldcp->id);
3820 		return (EIO);
3821 	}
3822 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3823 	    " rxt=0x%llx, st=0x%llx\n",
3824 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3825 
3826 	/* reset the channel state if the channel went down */
3827 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3828 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3829 		mutex_enter(&ldcp->tx_lock);
3830 		i_ldc_reset(ldcp, B_FALSE);
3831 		mutex_exit(&ldcp->tx_lock);
3832 		return (ECONNRESET);
3833 	}
3834 
3835 	/*
3836 	 * Check for empty queue
3837 	 */
3838 	if (rx_head == rx_tail) {
3839 		*sizep = 0;
3840 		return (0);
3841 	}
3842 
3843 	/* get the message */
3844 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3845 
3846 	/* if channel is in RAW mode, copy data and return */
3847 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3848 
3849 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3850 
3851 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3852 
3853 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3854 
3855 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3856 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3857 
3858 	return (rv);
3859 }
3860 
3861 /*
3862  * Process LDC mondos to build larger packets
3863  * with either un-reliable or reliable delivery.
3864  *
3865  * Enter and exit with ldcp->lock held by caller
3866  */
3867 static int
3868 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3869 {
3870 	int		rv = 0;
3871 	uint64_t 	rx_head = 0, rx_tail = 0;
3872 	uint64_t 	curr_head = 0;
3873 	ldc_msg_t 	*msg;
3874 	caddr_t 	target;
3875 	size_t 		len = 0, bytes_read = 0;
3876 	int 		retries = 0;
3877 	uint64_t 	q_va, q_size_mask;
3878 	uint64_t	first_fragment = 0;
3879 
3880 	target = target_bufp;
3881 
3882 	ASSERT(mutex_owned(&ldcp->lock));
3883 
3884 	/* check if the buffer and size are valid */
3885 	if (target_bufp == NULL || *sizep == 0) {
3886 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3887 		    ldcp->id);
3888 		return (EINVAL);
3889 	}
3890 
3891 	/* Set q_va and compute increment mask for the appropriate queue */
3892 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3893 		q_va	    = ldcp->rx_dq_va;
3894 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3895 	} else {
3896 		q_va	    = ldcp->rx_q_va;
3897 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3898 	}
3899 
3900 	/*
3901 	 * Read packet(s) from the queue
3902 	 */
3903 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3904 	    &ldcp->link_state);
3905 	if (rv != 0) {
3906 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3907 		    ldcp->id);
3908 		mutex_enter(&ldcp->tx_lock);
3909 		i_ldc_reset(ldcp, B_TRUE);
3910 		mutex_exit(&ldcp->tx_lock);
3911 		return (ECONNRESET);
3912 	}
3913 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3914 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3915 
3916 	/* reset the channel state if the channel went down */
3917 	if (ldcp->link_state != LDC_CHANNEL_UP)
3918 		goto channel_is_reset;
3919 
3920 	for (;;) {
3921 
3922 		if (curr_head == rx_tail) {
3923 			/*
3924 			 * If a data queue is being used, check the Rx HV
3925 			 * queue. This will copy over any new data packets
3926 			 * that have arrived.
3927 			 */
3928 			if (ldcp->mode == LDC_MODE_RELIABLE)
3929 				(void) i_ldc_chkq(ldcp);
3930 
3931 			rv = ldcp->readq_get_state(ldcp,
3932 			    &rx_head, &rx_tail, &ldcp->link_state);
3933 			if (rv != 0) {
3934 				cmn_err(CE_WARN,
3935 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3936 				    ldcp->id);
3937 				mutex_enter(&ldcp->tx_lock);
3938 				i_ldc_reset(ldcp, B_TRUE);
3939 				mutex_exit(&ldcp->tx_lock);
3940 				return (ECONNRESET);
3941 			}
3942 
3943 			if (ldcp->link_state != LDC_CHANNEL_UP)
3944 				goto channel_is_reset;
3945 
3946 			if (curr_head == rx_tail) {
3947 
3948 				/* If in the middle of a fragmented xfer */
3949 				if (first_fragment != 0) {
3950 
3951 					/* wait for ldc_delay usecs */
3952 					drv_usecwait(ldc_delay);
3953 
3954 					if (++retries < ldc_max_retries)
3955 						continue;
3956 
3957 					*sizep = 0;
3958 					if (ldcp->mode != LDC_MODE_RELIABLE)
3959 						ldcp->last_msg_rcd =
3960 						    first_fragment - 1;
3961 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3962 					    "(0x%llx) read timeout", ldcp->id);
3963 					return (EAGAIN);
3964 				}
3965 				*sizep = 0;
3966 				break;
3967 			}
3968 		}
3969 		retries = 0;
3970 
3971 		D2(ldcp->id,
3972 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3973 		    ldcp->id, curr_head, rx_head, rx_tail);
3974 
3975 		/* get the message */
3976 		msg = (ldc_msg_t *)(q_va + curr_head);
3977 
3978 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3979 		    ldcp->rx_q_va + curr_head);
3980 
3981 		/* Check the message ID for the message received */
3982 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3983 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3984 
3985 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3986 				    "error, q_ptrs=0x%lx,0x%lx",
3987 				    ldcp->id, rx_head, rx_tail);
3988 
3989 				/* throw away data */
3990 				bytes_read = 0;
3991 
3992 				/* Reset last_msg_rcd to start of message */
3993 				if (first_fragment != 0) {
3994 					ldcp->last_msg_rcd = first_fragment - 1;
3995 					first_fragment = 0;
3996 				}
3997 				/*
3998 				 * Send a NACK -- invalid seqid
3999 				 * get the current tail for the response
4000 				 */
4001 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
4002 				    (msg->ctrl & LDC_CTRL_MASK));
4003 				if (rv) {
4004 					cmn_err(CE_NOTE,
4005 					    "ldc_read: (0x%lx) err sending "
4006 					    "NACK msg\n", ldcp->id);
4007 
4008 					/* if cannot send NACK - reset chan */
4009 					mutex_enter(&ldcp->tx_lock);
4010 					i_ldc_reset(ldcp, B_FALSE);
4011 					mutex_exit(&ldcp->tx_lock);
4012 					rv = ECONNRESET;
4013 					break;
4014 				}
4015 
4016 				/* purge receive queue */
4017 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
4018 
4019 				break;
4020 			}
4021 
4022 			/*
4023 			 * Process any messages of type CTRL messages
4024 			 * Future implementations should try to pass these
4025 			 * to LDC link by resetting the intr state.
4026 			 *
4027 			 * NOTE: not done as a switch() as type can be
4028 			 * both ctrl+data
4029 			 */
4030 			if (msg->type & LDC_CTRL) {
4031 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
4032 					if (rv == EAGAIN)
4033 						continue;
4034 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
4035 					*sizep = 0;
4036 					bytes_read = 0;
4037 					break;
4038 				}
4039 			}
4040 
4041 			/* process data ACKs */
4042 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
4043 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
4044 					*sizep = 0;
4045 					bytes_read = 0;
4046 					break;
4047 				}
4048 			}
4049 
4050 			/* process data NACKs */
4051 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
4052 				DWARN(ldcp->id,
4053 				    "ldc_read: (0x%llx) received DATA/NACK",
4054 				    ldcp->id);
4055 				mutex_enter(&ldcp->tx_lock);
4056 				i_ldc_reset(ldcp, B_TRUE);
4057 				mutex_exit(&ldcp->tx_lock);
4058 				return (ECONNRESET);
4059 			}
4060 		}
4061 
4062 		/* process data messages */
4063 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
4064 
4065 			uint8_t *msgbuf = (uint8_t *)(
4066 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
4067 			    msg->rdata : msg->udata);
4068 
4069 			D2(ldcp->id,
4070 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
4071 
4072 			/* get the packet length */
4073 			len = (msg->env & LDC_LEN_MASK);
4074 
4075 				/*
4076 				 * FUTURE OPTIMIZATION:
4077 				 * dont need to set q head for every
4078 				 * packet we read just need to do this when
4079 				 * we are done or need to wait for more
4080 				 * mondos to make a full packet - this is
4081 				 * currently expensive.
4082 				 */
4083 
4084 			if (first_fragment == 0) {
4085 
4086 				/*
4087 				 * first packets should always have the start
4088 				 * bit set (even for a single packet). If not
4089 				 * throw away the packet
4090 				 */
4091 				if (!(msg->env & LDC_FRAG_START)) {
4092 
4093 					DWARN(DBG_ALL_LDCS,
4094 					    "ldc_read: (0x%llx) not start - "
4095 					    "frag=%x\n", ldcp->id,
4096 					    (msg->env) & LDC_FRAG_MASK);
4097 
4098 					/* toss pkt, inc head, cont reading */
4099 					bytes_read = 0;
4100 					target = target_bufp;
4101 					curr_head =
4102 					    (curr_head + LDC_PACKET_SIZE)
4103 					    & q_size_mask;
4104 					if (rv = ldcp->readq_set_head(ldcp,
4105 					    curr_head))
4106 						break;
4107 
4108 					continue;
4109 				}
4110 
4111 				first_fragment = msg->seqid;
4112 			} else {
4113 				/* check to see if this is a pkt w/ START bit */
4114 				if (msg->env & LDC_FRAG_START) {
4115 					DWARN(DBG_ALL_LDCS,
4116 					    "ldc_read:(0x%llx) unexpected pkt"
4117 					    " env=0x%x discarding %d bytes,"
4118 					    " lastmsg=%d, currentmsg=%d\n",
4119 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4120 					    bytes_read, ldcp->last_msg_rcd,
4121 					    msg->seqid);
4122 
4123 					/* throw data we have read so far */
4124 					bytes_read = 0;
4125 					target = target_bufp;
4126 					first_fragment = msg->seqid;
4127 
4128 					if (rv = ldcp->readq_set_head(ldcp,
4129 					    curr_head))
4130 						break;
4131 				}
4132 			}
4133 
4134 			/* copy (next) pkt into buffer */
4135 			if (len <= (*sizep - bytes_read)) {
4136 				bcopy(msgbuf, target, len);
4137 				target += len;
4138 				bytes_read += len;
4139 			} else {
4140 				/*
4141 				 * there is not enough space in the buffer to
4142 				 * read this pkt. throw message away & continue
4143 				 * reading data from queue
4144 				 */
4145 				DWARN(DBG_ALL_LDCS,
4146 				    "ldc_read: (0x%llx) buffer too small, "
4147 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4148 				    curr_head, *sizep, bytes_read+len);
4149 
4150 				first_fragment = 0;
4151 				target = target_bufp;
4152 				bytes_read = 0;
4153 
4154 				/* throw away everything received so far */
4155 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4156 					break;
4157 
4158 				/* continue reading remaining pkts */
4159 				continue;
4160 			}
4161 		}
4162 
4163 		/* set the message id */
4164 		if (ldcp->mode != LDC_MODE_RELIABLE)
4165 			ldcp->last_msg_rcd = msg->seqid;
4166 
4167 		/* move the head one position */
4168 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4169 
4170 		if (msg->env & LDC_FRAG_STOP) {
4171 
4172 			/*
4173 			 * All pkts that are part of this fragmented transfer
4174 			 * have been read or this was a single pkt read
4175 			 * or there was an error
4176 			 */
4177 
4178 			/* set the queue head */
4179 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4180 				bytes_read = 0;
4181 
4182 			*sizep = bytes_read;
4183 
4184 			break;
4185 		}
4186 
4187 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4188 		if ((msg->type & LDC_CTRL) ||
4189 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4190 
4191 			/* set the queue head */
4192 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4193 				bytes_read = 0;
4194 				break;
4195 			}
4196 
4197 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4198 			    ldcp->id, curr_head);
4199 		}
4200 
4201 	} /* for (;;) */
4202 
4203 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4204 
4205 	return (rv);
4206 
4207 channel_is_reset:
4208 	mutex_enter(&ldcp->tx_lock);
4209 	i_ldc_reset(ldcp, B_FALSE);
4210 	mutex_exit(&ldcp->tx_lock);
4211 	return (ECONNRESET);
4212 }
4213 
4214 /*
4215  * Fetch and buffer incoming packets so we can hand them back as
4216  * a basic byte stream.
4217  *
4218  * Enter and exit with ldcp->lock held by caller
4219  */
4220 static int
4221 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4222 {
4223 	int	rv;
4224 	size_t	size;
4225 
4226 	ASSERT(mutex_owned(&ldcp->lock));
4227 
4228 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4229 	    ldcp->id, *sizep);
4230 
4231 	if (ldcp->stream_remains == 0) {
4232 		size = ldcp->mtu;
4233 		rv = i_ldc_read_packet(ldcp,
4234 		    (caddr_t)ldcp->stream_bufferp, &size);
4235 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4236 		    ldcp->id, size);
4237 
4238 		if (rv != 0)
4239 			return (rv);
4240 
4241 		ldcp->stream_remains = size;
4242 		ldcp->stream_offset = 0;
4243 	}
4244 
4245 	size = MIN(ldcp->stream_remains, *sizep);
4246 
4247 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4248 	ldcp->stream_offset += size;
4249 	ldcp->stream_remains -= size;
4250 
4251 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4252 	    ldcp->id, size);
4253 
4254 	*sizep = size;
4255 	return (0);
4256 }
4257 
4258 /*
4259  * Write specified amount of bytes to the channel
4260  * in multiple pkts of pkt_payload size. Each
4261  * packet is tagged with an unique packet ID in
4262  * the case of a reliable link.
4263  *
4264  * On return, size contains the number of bytes written.
4265  */
4266 int
4267 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4268 {
4269 	ldc_chan_t	*ldcp;
4270 	int		rv = 0;
4271 
4272 	if (handle == NULL) {
4273 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4274 		return (EINVAL);
4275 	}
4276 	ldcp = (ldc_chan_t *)handle;
4277 
4278 	/* check if writes can occur */
4279 	if (!mutex_tryenter(&ldcp->tx_lock)) {
4280 		/*
4281 		 * Could not get the lock - channel could
4282 		 * be in the process of being unconfigured
4283 		 * or reader has encountered an error
4284 		 */
4285 		return (EAGAIN);
4286 	}
4287 
4288 	/* check if non-zero data to write */
4289 	if (buf == NULL || sizep == NULL) {
4290 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4291 		    ldcp->id);
4292 		mutex_exit(&ldcp->tx_lock);
4293 		return (EINVAL);
4294 	}
4295 
4296 	if (*sizep == 0) {
4297 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4298 		    ldcp->id);
4299 		mutex_exit(&ldcp->tx_lock);
4300 		return (0);
4301 	}
4302 
4303 	/* Check if channel is UP for data exchange */
4304 	if (ldcp->tstate != TS_UP) {
4305 		DWARN(ldcp->id,
4306 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4307 		    ldcp->id);
4308 		*sizep = 0;
4309 		rv = ECONNRESET;
4310 	} else {
4311 		rv = ldcp->write_p(ldcp, buf, sizep);
4312 	}
4313 
4314 	mutex_exit(&ldcp->tx_lock);
4315 
4316 	return (rv);
4317 }
4318 
4319 /*
4320  * Write a raw packet to the channel
4321  * On return, size contains the number of bytes written.
4322  */
4323 static int
4324 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4325 {
4326 	ldc_msg_t 	*ldcmsg;
4327 	uint64_t 	tx_head, tx_tail, new_tail;
4328 	int		rv = 0;
4329 	size_t		size;
4330 
4331 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4332 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4333 
4334 	size = *sizep;
4335 
4336 	/*
4337 	 * Check to see if the packet size is less than or
4338 	 * equal to packet size support in raw mode
4339 	 */
4340 	if (size > ldcp->pkt_payload) {
4341 		DWARN(ldcp->id,
4342 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4343 		    ldcp->id, *sizep);
4344 		*sizep = 0;
4345 		return (EMSGSIZE);
4346 	}
4347 
4348 	/* get the qptrs for the tx queue */
4349 	rv = hv_ldc_tx_get_state(ldcp->id,
4350 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4351 	if (rv != 0) {
4352 		cmn_err(CE_WARN,
4353 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4354 		*sizep = 0;
4355 		return (EIO);
4356 	}
4357 
4358 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4359 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4360 		DWARN(ldcp->id,
4361 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4362 
4363 		*sizep = 0;
4364 		if (mutex_tryenter(&ldcp->lock)) {
4365 			i_ldc_reset(ldcp, B_FALSE);
4366 			mutex_exit(&ldcp->lock);
4367 		} else {
4368 			/*
4369 			 * Release Tx lock, and then reacquire channel
4370 			 * and Tx lock in correct order
4371 			 */
4372 			mutex_exit(&ldcp->tx_lock);
4373 			mutex_enter(&ldcp->lock);
4374 			mutex_enter(&ldcp->tx_lock);
4375 			i_ldc_reset(ldcp, B_FALSE);
4376 			mutex_exit(&ldcp->lock);
4377 		}
4378 		return (ECONNRESET);
4379 	}
4380 
4381 	tx_tail = ldcp->tx_tail;
4382 	tx_head = ldcp->tx_head;
4383 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4384 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4385 
4386 	if (new_tail == tx_head) {
4387 		DWARN(DBG_ALL_LDCS,
4388 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4389 		*sizep = 0;
4390 		return (EWOULDBLOCK);
4391 	}
4392 
4393 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4394 	    ldcp->id, size);
4395 
4396 	/* Send the data now */
4397 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4398 
4399 	/* copy the data into pkt */
4400 	bcopy((uint8_t *)buf, ldcmsg, size);
4401 
4402 	/* increment tail */
4403 	tx_tail = new_tail;
4404 
4405 	/*
4406 	 * All packets have been copied into the TX queue
4407 	 * update the tail ptr in the HV
4408 	 */
4409 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4410 	if (rv) {
4411 		if (rv == EWOULDBLOCK) {
4412 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4413 			    ldcp->id);
4414 			*sizep = 0;
4415 			return (EWOULDBLOCK);
4416 		}
4417 
4418 		*sizep = 0;
4419 		if (mutex_tryenter(&ldcp->lock)) {
4420 			i_ldc_reset(ldcp, B_FALSE);
4421 			mutex_exit(&ldcp->lock);
4422 		} else {
4423 			/*
4424 			 * Release Tx lock, and then reacquire channel
4425 			 * and Tx lock in correct order
4426 			 */
4427 			mutex_exit(&ldcp->tx_lock);
4428 			mutex_enter(&ldcp->lock);
4429 			mutex_enter(&ldcp->tx_lock);
4430 			i_ldc_reset(ldcp, B_FALSE);
4431 			mutex_exit(&ldcp->lock);
4432 		}
4433 		return (ECONNRESET);
4434 	}
4435 
4436 	ldcp->tx_tail = tx_tail;
4437 	*sizep = size;
4438 
4439 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4440 
4441 	return (rv);
4442 }
4443 
4444 
4445 /*
4446  * Write specified amount of bytes to the channel
4447  * in multiple pkts of pkt_payload size. Each
4448  * packet is tagged with an unique packet ID in
4449  * the case of a reliable link.
4450  *
4451  * On return, size contains the number of bytes written.
4452  * This function needs to ensure that the write size is < MTU size
4453  */
4454 static int
4455 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4456 {
4457 	ldc_msg_t 	*ldcmsg;
4458 	uint64_t 	tx_head, tx_tail, new_tail, start;
4459 	uint64_t	txq_size_mask, numavail;
4460 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4461 	size_t 		len, bytes_written = 0, remaining;
4462 	int		rv;
4463 	uint32_t	curr_seqid;
4464 
4465 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4466 
4467 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4468 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4469 
4470 	/* compute mask for increment */
4471 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4472 
4473 	/* get the qptrs for the tx queue */
4474 	rv = hv_ldc_tx_get_state(ldcp->id,
4475 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4476 	if (rv != 0) {
4477 		cmn_err(CE_WARN,
4478 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4479 		*size = 0;
4480 		return (EIO);
4481 	}
4482 
4483 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4484 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4485 		DWARN(ldcp->id,
4486 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4487 		*size = 0;
4488 		if (mutex_tryenter(&ldcp->lock)) {
4489 			i_ldc_reset(ldcp, B_FALSE);
4490 			mutex_exit(&ldcp->lock);
4491 		} else {
4492 			/*
4493 			 * Release Tx lock, and then reacquire channel
4494 			 * and Tx lock in correct order
4495 			 */
4496 			mutex_exit(&ldcp->tx_lock);
4497 			mutex_enter(&ldcp->lock);
4498 			mutex_enter(&ldcp->tx_lock);
4499 			i_ldc_reset(ldcp, B_FALSE);
4500 			mutex_exit(&ldcp->lock);
4501 		}
4502 		return (ECONNRESET);
4503 	}
4504 
4505 	tx_tail = ldcp->tx_tail;
4506 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4507 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4508 
4509 	/*
4510 	 * Check to see if the queue is full. The check is done using
4511 	 * the appropriate head based on the link mode.
4512 	 */
4513 	i_ldc_get_tx_head(ldcp, &tx_head);
4514 
4515 	if (new_tail == tx_head) {
4516 		DWARN(DBG_ALL_LDCS,
4517 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4518 		*size = 0;
4519 		return (EWOULDBLOCK);
4520 	}
4521 
4522 	/*
4523 	 * Make sure that the LDC Tx queue has enough space
4524 	 */
4525 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4526 	    + ldcp->tx_q_entries - 1;
4527 	numavail %= ldcp->tx_q_entries;
4528 
4529 	if (*size > (numavail * ldcp->pkt_payload)) {
4530 		DWARN(DBG_ALL_LDCS,
4531 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4532 		return (EWOULDBLOCK);
4533 	}
4534 
4535 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4536 	    ldcp->id, *size);
4537 
4538 	/* Send the data now */
4539 	bytes_written = 0;
4540 	curr_seqid = ldcp->last_msg_snt;
4541 	start = tx_tail;
4542 
4543 	while (*size > bytes_written) {
4544 
4545 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4546 
4547 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4548 		    ldcmsg->rdata : ldcmsg->udata);
4549 
4550 		ldcmsg->type = LDC_DATA;
4551 		ldcmsg->stype = LDC_INFO;
4552 		ldcmsg->ctrl = 0;
4553 
4554 		remaining = *size - bytes_written;
4555 		len = min(ldcp->pkt_payload, remaining);
4556 		ldcmsg->env = (uint8_t)len;
4557 
4558 		curr_seqid++;
4559 		ldcmsg->seqid = curr_seqid;
4560 
4561 		/* copy the data into pkt */
4562 		bcopy(source, msgbuf, len);
4563 
4564 		source += len;
4565 		bytes_written += len;
4566 
4567 		/* increment tail */
4568 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4569 
4570 		ASSERT(tx_tail != tx_head);
4571 	}
4572 
4573 	/* Set the start and stop bits */
4574 	ldcmsg->env |= LDC_FRAG_STOP;
4575 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4576 	ldcmsg->env |= LDC_FRAG_START;
4577 
4578 	/*
4579 	 * All packets have been copied into the TX queue
4580 	 * update the tail ptr in the HV
4581 	 */
4582 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4583 	if (rv == 0) {
4584 		ldcp->tx_tail = tx_tail;
4585 		ldcp->last_msg_snt = curr_seqid;
4586 		*size = bytes_written;
4587 	} else {
4588 		int rv2;
4589 
4590 		if (rv != EWOULDBLOCK) {
4591 			*size = 0;
4592 			if (mutex_tryenter(&ldcp->lock)) {
4593 				i_ldc_reset(ldcp, B_FALSE);
4594 				mutex_exit(&ldcp->lock);
4595 			} else {
4596 				/*
4597 				 * Release Tx lock, and then reacquire channel
4598 				 * and Tx lock in correct order
4599 				 */
4600 				mutex_exit(&ldcp->tx_lock);
4601 				mutex_enter(&ldcp->lock);
4602 				mutex_enter(&ldcp->tx_lock);
4603 				i_ldc_reset(ldcp, B_FALSE);
4604 				mutex_exit(&ldcp->lock);
4605 			}
4606 			return (ECONNRESET);
4607 		}
4608 
4609 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4610 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4611 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4612 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4613 
4614 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4615 		    &tx_head, &tx_tail, &ldcp->link_state);
4616 
4617 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4618 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4619 		    rv2, tx_head, tx_tail, ldcp->link_state);
4620 
4621 		*size = 0;
4622 	}
4623 
4624 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4625 
4626 	return (rv);
4627 }
4628 
4629 /*
4630  * Write specified amount of bytes to the channel
4631  * in multiple pkts of pkt_payload size. Each
4632  * packet is tagged with an unique packet ID in
4633  * the case of a reliable link.
4634  *
4635  * On return, size contains the number of bytes written.
4636  * This function needs to ensure that the write size is < MTU size
4637  */
4638 static int
4639 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4640 {
4641 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4642 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4643 
4644 	/* Truncate packet to max of MTU size */
4645 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4646 	return (i_ldc_write_packet(ldcp, buf, sizep));
4647 }
4648 
4649 
4650 /*
4651  * Interfaces for channel nexus to register/unregister with LDC module
4652  * The nexus will register functions to be used to register individual
4653  * channels with the nexus and enable interrupts for the channels
4654  */
4655 int
4656 ldc_register(ldc_cnex_t *cinfo)
4657 {
4658 	ldc_chan_t	*ldcp;
4659 
4660 	if (cinfo == NULL || cinfo->dip == NULL ||
4661 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4662 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4663 	    cinfo->clr_intr == NULL) {
4664 
4665 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4666 		return (EINVAL);
4667 	}
4668 
4669 	mutex_enter(&ldcssp->lock);
4670 
4671 	/* nexus registration */
4672 	ldcssp->cinfo.dip = cinfo->dip;
4673 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4674 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4675 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4676 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4677 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4678 
4679 	/* register any channels that might have been previously initialized */
4680 	ldcp = ldcssp->chan_list;
4681 	while (ldcp) {
4682 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4683 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4684 			(void) i_ldc_register_channel(ldcp);
4685 
4686 		ldcp = ldcp->next;
4687 	}
4688 
4689 	mutex_exit(&ldcssp->lock);
4690 
4691 	return (0);
4692 }
4693 
4694 int
4695 ldc_unregister(ldc_cnex_t *cinfo)
4696 {
4697 	if (cinfo == NULL || cinfo->dip == NULL) {
4698 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4699 		return (EINVAL);
4700 	}
4701 
4702 	mutex_enter(&ldcssp->lock);
4703 
4704 	if (cinfo->dip != ldcssp->cinfo.dip) {
4705 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4706 		mutex_exit(&ldcssp->lock);
4707 		return (EINVAL);
4708 	}
4709 
4710 	/* nexus unregister */
4711 	ldcssp->cinfo.dip = NULL;
4712 	ldcssp->cinfo.reg_chan = NULL;
4713 	ldcssp->cinfo.unreg_chan = NULL;
4714 	ldcssp->cinfo.add_intr = NULL;
4715 	ldcssp->cinfo.rem_intr = NULL;
4716 	ldcssp->cinfo.clr_intr = NULL;
4717 
4718 	mutex_exit(&ldcssp->lock);
4719 
4720 	return (0);
4721 }
4722