xref: /titanic_51/usr/src/uts/sun4v/io/ldc.c (revision c2765d203a42aaeda144370182c6cda62904d860)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * sun4v LDC Link Layer
29  */
30 #include <sys/types.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/cred.h>
35 #include <sys/kmem.h>
36 #include <sys/conf.h>
37 #include <sys/cmn_err.h>
38 #include <sys/ksynch.h>
39 #include <sys/modctl.h>
40 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
41 #include <sys/debug.h>
42 #include <sys/cred.h>
43 #include <sys/promif.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/cyclic.h>
47 #include <sys/machsystm.h>
48 #include <sys/vm.h>
49 #include <sys/cpu.h>
50 #include <sys/intreg.h>
51 #include <sys/machcpuvar.h>
52 #include <sys/mmu.h>
53 #include <sys/pte.h>
54 #include <vm/hat.h>
55 #include <vm/as.h>
56 #include <vm/hat_sfmmu.h>
57 #include <sys/vm_machparam.h>
58 #include <vm/seg_kmem.h>
59 #include <vm/seg_kpm.h>
60 #include <sys/note.h>
61 #include <sys/ivintr.h>
62 #include <sys/hypervisor_api.h>
63 #include <sys/ldc.h>
64 #include <sys/ldc_impl.h>
65 #include <sys/cnex.h>
66 #include <sys/hsvc.h>
67 #include <sys/sdt.h>
68 #include <sys/kldc.h>
69 
70 /* Core internal functions */
71 int i_ldc_h2v_error(int h_error);
72 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
73 
74 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
75 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
76 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
77 static void i_ldc_reset_state(ldc_chan_t *ldcp);
78 static void i_ldc_debug_enter(void);
79 
80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
85     uint8_t ctrlmsg);
86 
87 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
92     uint64_t *tail, uint64_t *link_state);
93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
94     uint64_t rx_tail);
95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
96 
97 /* Interrupt handling functions */
98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
101     uint64_t *notify_event);
102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
103 
104 /* Read method functions */
105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
109 	size_t *sizep);
110 
111 /* Write method functions */
112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
117 	size_t *sizep);
118 
119 /* Pkt processing internal functions */
120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
127 
128 /* LDC Version */
129 static ldc_ver_t ldc_versions[] = { {1, 0} };
130 
131 /* number of supported versions */
132 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
133 
134 /* Invalid value for the ldc_chan_t rx_ack_head field */
135 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
136 
137 
138 /* Module State Pointer */
139 ldc_soft_state_t *ldcssp;
140 
141 static struct modldrv md = {
142 	&mod_miscops,			/* This is a misc module */
143 	"sun4v LDC module",		/* Name of the module */
144 };
145 
146 static struct modlinkage ml = {
147 	MODREV_1,
148 	&md,
149 	NULL
150 };
151 
152 static uint64_t ldc_sup_minor;		/* Supported minor number */
153 static hsvc_info_t ldc_hsvc = {
154 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 1, "ldc"
155 };
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * The length of the reliable-mode data queue in terms of the LDC
173  * receive queue length. i.e., the number of times larger than the
174  * LDC receive queue that the data queue should be. The HV receive
175  * queue is required to be a power of 2 and this implementation
176  * assumes the data queue will also be a power of 2. By making the
177  * multiplier a power of 2, we ensure the data queue will be a
178  * power of 2. We use a multiplier because the receive queue is
179  * sized to be sane relative to the MTU and the same is needed for
180  * the data queue.
181  */
182 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
183 
184 /*
185  * LDC retry count and delay - when the HV returns EWOULDBLOCK
186  * the operation is retried 'ldc_max_retries' times with a
187  * wait of 'ldc_delay' usecs between each retry.
188  */
189 int ldc_max_retries = LDC_MAX_RETRIES;
190 clock_t ldc_delay = LDC_DELAY;
191 
192 /*
193  * Channels which have a devclass satisfying the following
194  * will be reset when entering the prom or kmdb.
195  *
196  *   LDC_DEVCLASS_PROM_RESET(devclass) != 0
197  *
198  * By default, only block device service channels are reset.
199  */
200 #define	LDC_DEVCLASS_BIT(dc)		(0x1 << (dc))
201 #define	LDC_DEVCLASS_PROM_RESET(dc)	\
202 	(LDC_DEVCLASS_BIT(dc) & ldc_debug_reset_mask)
203 static uint64_t ldc_debug_reset_mask = LDC_DEVCLASS_BIT(LDC_DEV_BLK_SVC);
204 
205 /*
206  * delay between each retry of channel unregistration in
207  * ldc_close(), to wait for pending interrupts to complete.
208  */
209 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
210 
211 #ifdef DEBUG
212 
213 /*
214  * Print debug messages
215  *
216  * set ldcdbg to 0x7 for enabling all msgs
217  * 0x4 - Warnings
218  * 0x2 - All debug messages
219  * 0x1 - Minimal debug messages
220  *
221  * set ldcdbgchan to the channel number you want to debug
222  * setting it to -1 prints debug messages for all channels
223  * NOTE: ldcdbgchan has no effect on error messages
224  */
225 
226 int ldcdbg = 0x0;
227 int64_t ldcdbgchan = DBG_ALL_LDCS;
228 uint64_t ldc_inject_err_flag = 0;
229 
230 void
231 ldcdebug(int64_t id, const char *fmt, ...)
232 {
233 	char buf[512];
234 	va_list ap;
235 
236 	/*
237 	 * Do not return if,
238 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
239 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
240 	 * debug channel = caller specified channel
241 	 */
242 	if ((id != DBG_ALL_LDCS) &&
243 	    (ldcdbgchan != DBG_ALL_LDCS) &&
244 	    (ldcdbgchan != id)) {
245 		return;
246 	}
247 
248 	va_start(ap, fmt);
249 	(void) vsprintf(buf, fmt, ap);
250 	va_end(ap);
251 
252 	cmn_err(CE_CONT, "?%s", buf);
253 }
254 
255 #define	LDC_ERR_RESET		0x1
256 #define	LDC_ERR_PKTLOSS		0x2
257 #define	LDC_ERR_DQFULL		0x4
258 #define	LDC_ERR_DRNGCLEAR	0x8
259 
260 static boolean_t
261 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
262 {
263 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
264 		return (B_FALSE);
265 
266 	if ((ldc_inject_err_flag & error) == 0)
267 		return (B_FALSE);
268 
269 	/* clear the injection state */
270 	ldc_inject_err_flag &= ~error;
271 
272 	return (B_TRUE);
273 }
274 
275 #define	D1		\
276 if (ldcdbg & 0x01)	\
277 	ldcdebug
278 
279 #define	D2		\
280 if (ldcdbg & 0x02)	\
281 	ldcdebug
282 
283 #define	DWARN		\
284 if (ldcdbg & 0x04)	\
285 	ldcdebug
286 
287 #define	DUMP_PAYLOAD(id, addr)						\
288 {									\
289 	char buf[65*3];							\
290 	int i;								\
291 	uint8_t *src = (uint8_t *)addr;					\
292 	for (i = 0; i < 64; i++, src++)					\
293 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
294 	(void) sprintf(&buf[i * 3], "|\n");				\
295 	D2((id), "payload: %s", buf);					\
296 }
297 
298 #define	DUMP_LDC_PKT(c, s, addr)					\
299 {									\
300 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
301 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
302 	if (msg->type == LDC_DATA) {                                    \
303 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
304 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
305 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
306 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
307 	    (msg->env & LDC_LEN_MASK));					\
308 	} else { 							\
309 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
310 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
311 	} 								\
312 }
313 
314 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
315 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
316 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
317 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
318 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
319 
320 #else
321 
322 #define	DBG_ALL_LDCS -1
323 
324 #define	D1
325 #define	D2
326 #define	DWARN
327 
328 #define	DUMP_PAYLOAD(id, addr)
329 #define	DUMP_LDC_PKT(c, s, addr)
330 
331 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
332 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
333 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
334 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
335 
336 #endif
337 
338 /*
339  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
340  * lengths. Just pass the head, tail, and entries values so that the
341  * length can be calculated in a dtrace script when the probe is enabled.
342  */
343 #define	TRACE_RXDQ_LENGTH(ldcp)						\
344 	DTRACE_PROBE4(rxdq__size,					\
345 	uint64_t, ldcp->id,						\
346 	uint64_t, ldcp->rx_dq_head,					\
347 	uint64_t, ldcp->rx_dq_tail,					\
348 	uint64_t, ldcp->rx_dq_entries)
349 
350 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
351 	DTRACE_PROBE4(rxhvq__size,					\
352 	uint64_t, ldcp->id,						\
353 	uint64_t, head,							\
354 	uint64_t, tail,							\
355 	uint64_t, ldcp->rx_q_entries)
356 
357 /* A dtrace SDT probe to ease tracing of data queue copy operations */
358 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
359 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
360 
361 /* The amount of contiguous space at the tail of the queue */
362 #define	Q_CONTIG_SPACE(head, tail, size)				\
363 	((head) <= (tail) ? ((size) - (tail)) :				\
364 	((head) - (tail) - LDC_PACKET_SIZE))
365 
366 #define	ZERO_PKT(p)			\
367 	bzero((p), sizeof (ldc_msg_t));
368 
369 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
370 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
371 
372 int
373 _init(void)
374 {
375 	int status;
376 	extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
377 
378 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
379 	if (status != 0) {
380 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
381 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
382 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
383 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
384 		return (-1);
385 	}
386 
387 	/* Initialize shared memory HV API version checking */
388 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
389 
390 	/* allocate soft state structure */
391 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
392 
393 	/* Link the module into the system */
394 	status = mod_install(&ml);
395 	if (status != 0) {
396 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
397 		return (status);
398 	}
399 
400 	/* Initialize the LDC state structure */
401 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
402 
403 	mutex_enter(&ldcssp->lock);
404 
405 	/* Create a cache for memory handles */
406 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
407 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
408 	if (ldcssp->memhdl_cache == NULL) {
409 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
410 		mutex_exit(&ldcssp->lock);
411 		return (-1);
412 	}
413 
414 	/* Create cache for memory segment structures */
415 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
416 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
417 	if (ldcssp->memseg_cache == NULL) {
418 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
419 		mutex_exit(&ldcssp->lock);
420 		return (-1);
421 	}
422 
423 
424 	ldcssp->channel_count = 0;
425 	ldcssp->channels_open = 0;
426 	ldcssp->chan_list = NULL;
427 	ldcssp->dring_list = NULL;
428 
429 	/* Register debug_enter callback */
430 	kldc_set_debug_cb(&i_ldc_debug_enter);
431 
432 	mutex_exit(&ldcssp->lock);
433 
434 	return (0);
435 }
436 
437 int
438 _info(struct modinfo *modinfop)
439 {
440 	/* Report status of the dynamically loadable driver module */
441 	return (mod_info(&ml, modinfop));
442 }
443 
444 int
445 _fini(void)
446 {
447 	int 		rv, status;
448 	ldc_chan_t 	*tmp_ldcp, *ldcp;
449 	ldc_dring_t 	*tmp_dringp, *dringp;
450 	ldc_mem_info_t 	minfo;
451 
452 	/* Unlink the driver module from the system */
453 	status = mod_remove(&ml);
454 	if (status) {
455 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
456 		return (EIO);
457 	}
458 
459 	/* Unregister debug_enter callback */
460 	kldc_set_debug_cb(NULL);
461 
462 	/* Free descriptor rings */
463 	dringp = ldcssp->dring_list;
464 	while (dringp != NULL) {
465 		tmp_dringp = dringp->next;
466 
467 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
468 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
469 			if (minfo.status == LDC_BOUND) {
470 				(void) ldc_mem_dring_unbind(
471 				    (ldc_dring_handle_t)dringp);
472 			}
473 			if (minfo.status == LDC_MAPPED) {
474 				(void) ldc_mem_dring_unmap(
475 				    (ldc_dring_handle_t)dringp);
476 			}
477 		}
478 
479 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
480 		dringp = tmp_dringp;
481 	}
482 	ldcssp->dring_list = NULL;
483 
484 	/* close and finalize channels */
485 	ldcp = ldcssp->chan_list;
486 	while (ldcp != NULL) {
487 		tmp_ldcp = ldcp->next;
488 
489 		(void) ldc_close((ldc_handle_t)ldcp);
490 		(void) ldc_fini((ldc_handle_t)ldcp);
491 
492 		ldcp = tmp_ldcp;
493 	}
494 	ldcssp->chan_list = NULL;
495 
496 	/* Destroy kmem caches */
497 	kmem_cache_destroy(ldcssp->memhdl_cache);
498 	kmem_cache_destroy(ldcssp->memseg_cache);
499 
500 	/*
501 	 * We have successfully "removed" the driver.
502 	 * Destroying soft states
503 	 */
504 	mutex_destroy(&ldcssp->lock);
505 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
506 
507 	(void) hsvc_unregister(&ldc_hsvc);
508 
509 	return (status);
510 }
511 
512 /* -------------------------------------------------------------------------- */
513 
514 /*
515  * LDC Link Layer Internal Functions
516  */
517 
518 /*
519  * Translate HV Errors to sun4v error codes
520  */
521 int
522 i_ldc_h2v_error(int h_error)
523 {
524 	switch (h_error) {
525 
526 	case	H_EOK:
527 		return (0);
528 
529 	case	H_ENORADDR:
530 		return (EFAULT);
531 
532 	case	H_EBADPGSZ:
533 	case	H_EINVAL:
534 		return (EINVAL);
535 
536 	case	H_EWOULDBLOCK:
537 		return (EWOULDBLOCK);
538 
539 	case	H_ENOACCESS:
540 	case	H_ENOMAP:
541 		return (EACCES);
542 
543 	case	H_EIO:
544 	case	H_ECPUERROR:
545 		return (EIO);
546 
547 	case	H_ENOTSUPPORTED:
548 		return (ENOTSUP);
549 
550 	case 	H_ETOOMANY:
551 		return (ENOSPC);
552 
553 	case	H_ECHANNEL:
554 		return (ECHRNG);
555 	default:
556 		break;
557 	}
558 
559 	return (EIO);
560 }
561 
562 /*
563  * Reconfigure the transmit queue
564  */
565 static int
566 i_ldc_txq_reconf(ldc_chan_t *ldcp)
567 {
568 	int rv;
569 
570 	ASSERT(MUTEX_HELD(&ldcp->lock));
571 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
572 
573 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
574 	if (rv) {
575 		cmn_err(CE_WARN,
576 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
577 		return (EIO);
578 	}
579 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
580 	    &(ldcp->tx_tail), &(ldcp->link_state));
581 	if (rv) {
582 		cmn_err(CE_WARN,
583 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
584 		return (EIO);
585 	}
586 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
587 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
588 	    ldcp->link_state);
589 
590 	return (0);
591 }
592 
593 /*
594  * Reconfigure the receive queue
595  */
596 static int
597 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
598 {
599 	int rv;
600 	uint64_t rx_head, rx_tail;
601 
602 	ASSERT(MUTEX_HELD(&ldcp->lock));
603 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
604 	    &(ldcp->link_state));
605 	if (rv) {
606 		cmn_err(CE_WARN,
607 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
608 		    ldcp->id);
609 		return (EIO);
610 	}
611 
612 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
613 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
614 		    ldcp->rx_q_entries);
615 		if (rv) {
616 			cmn_err(CE_WARN,
617 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
618 			    ldcp->id);
619 			return (EIO);
620 		}
621 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
622 		    ldcp->id);
623 	}
624 
625 	return (0);
626 }
627 
628 
629 /*
630  * Drain the contents of the receive queue
631  */
632 static int
633 i_ldc_rxq_drain(ldc_chan_t *ldcp)
634 {
635 	int rv;
636 	uint64_t rx_head, rx_tail;
637 
638 	ASSERT(MUTEX_HELD(&ldcp->lock));
639 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
640 	    &(ldcp->link_state));
641 	if (rv) {
642 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
643 		    ldcp->id);
644 		return (EIO);
645 	}
646 
647 	/* flush contents by setting the head = tail */
648 	return (i_ldc_set_rx_head(ldcp, rx_tail));
649 }
650 
651 
652 /*
653  * Reset LDC state structure and its contents
654  */
655 static void
656 i_ldc_reset_state(ldc_chan_t *ldcp)
657 {
658 	ASSERT(MUTEX_HELD(&ldcp->lock));
659 	ldcp->last_msg_snt = LDC_INIT_SEQID;
660 	ldcp->last_ack_rcd = 0;
661 	ldcp->last_msg_rcd = 0;
662 	ldcp->tx_ackd_head = ldcp->tx_head;
663 	ldcp->stream_remains = 0;
664 	ldcp->next_vidx = 0;
665 	ldcp->hstate = 0;
666 	ldcp->tstate = TS_OPEN;
667 	ldcp->status = LDC_OPEN;
668 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
669 	ldcp->rx_dq_head = 0;
670 	ldcp->rx_dq_tail = 0;
671 
672 	if (ldcp->link_state == LDC_CHANNEL_UP ||
673 	    ldcp->link_state == LDC_CHANNEL_RESET) {
674 
675 		if (ldcp->mode == LDC_MODE_RAW) {
676 			ldcp->status = LDC_UP;
677 			ldcp->tstate = TS_UP;
678 		} else {
679 			ldcp->status = LDC_READY;
680 			ldcp->tstate |= TS_LINK_READY;
681 		}
682 	}
683 }
684 
685 /*
686  * Reset a LDC channel
687  */
688 void
689 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
690 {
691 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
692 
693 	ASSERT(MUTEX_HELD(&ldcp->lock));
694 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
695 
696 	/* reconfig Tx and Rx queues */
697 	(void) i_ldc_txq_reconf(ldcp);
698 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
699 
700 	/* Clear Tx and Rx interrupts */
701 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
702 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
703 
704 	/* Reset channel state */
705 	i_ldc_reset_state(ldcp);
706 
707 	/* Mark channel in reset */
708 	ldcp->tstate |= TS_IN_RESET;
709 }
710 
711 /*
712  * Walk the channel list and reset channels if they are of the right
713  * devclass and their Rx queues have been configured. No locks are
714  * taken because the function is only invoked by the kernel just before
715  * entering the prom or debugger when the system is single-threaded.
716  */
717 static void
718 i_ldc_debug_enter(void)
719 {
720 	ldc_chan_t *ldcp;
721 
722 	ldcp = ldcssp->chan_list;
723 	while (ldcp != NULL) {
724 		if (((ldcp->tstate & TS_QCONF_RDY) == TS_QCONF_RDY) &&
725 		    (LDC_DEVCLASS_PROM_RESET(ldcp->devclass) != 0)) {
726 			(void) hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
727 			    ldcp->rx_q_entries);
728 		}
729 		ldcp = ldcp->next;
730 	}
731 }
732 
733 /*
734  * Clear pending interrupts
735  */
736 static void
737 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
738 {
739 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
740 
741 	ASSERT(MUTEX_HELD(&ldcp->lock));
742 	ASSERT(cinfo->dip != NULL);
743 
744 	switch (itype) {
745 	case CNEX_TX_INTR:
746 		/* check Tx interrupt */
747 		if (ldcp->tx_intr_state)
748 			ldcp->tx_intr_state = LDC_INTR_NONE;
749 		else
750 			return;
751 		break;
752 
753 	case CNEX_RX_INTR:
754 		/* check Rx interrupt */
755 		if (ldcp->rx_intr_state)
756 			ldcp->rx_intr_state = LDC_INTR_NONE;
757 		else
758 			return;
759 		break;
760 	}
761 
762 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
763 	D2(ldcp->id,
764 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
765 	    ldcp->id, itype);
766 }
767 
768 /*
769  * Set the receive queue head
770  * Resets connection and returns an error if it fails.
771  */
772 static int
773 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
774 {
775 	int 	rv;
776 	int 	retries;
777 
778 	ASSERT(MUTEX_HELD(&ldcp->lock));
779 	for (retries = 0; retries < ldc_max_retries; retries++) {
780 
781 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
782 			return (0);
783 
784 		if (rv != H_EWOULDBLOCK)
785 			break;
786 
787 		/* wait for ldc_delay usecs */
788 		drv_usecwait(ldc_delay);
789 	}
790 
791 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
792 	    ldcp->id, head);
793 	mutex_enter(&ldcp->tx_lock);
794 	i_ldc_reset(ldcp, B_TRUE);
795 	mutex_exit(&ldcp->tx_lock);
796 
797 	return (ECONNRESET);
798 }
799 
800 /*
801  * Returns the tx_head to be used for transfer
802  */
803 static void
804 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
805 {
806 	ldc_msg_t 	*pkt;
807 
808 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
809 
810 	/* get current Tx head */
811 	*head = ldcp->tx_head;
812 
813 	/*
814 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
815 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
816 	 * up to the current location of tx_head. This needs to be done
817 	 * as the peer will only ACK DATA/INFO pkts.
818 	 */
819 	if (ldcp->mode == LDC_MODE_RELIABLE) {
820 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
821 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
822 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
823 				break;
824 			}
825 			/* advance ACKd head */
826 			ldcp->tx_ackd_head =
827 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
828 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
829 		}
830 		*head = ldcp->tx_ackd_head;
831 	}
832 }
833 
834 /*
835  * Returns the tx_tail to be used for transfer
836  * Re-reads the TX queue ptrs if and only if the
837  * the cached head and tail are equal (queue is full)
838  */
839 static int
840 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
841 {
842 	int 		rv;
843 	uint64_t 	current_head, new_tail;
844 
845 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
846 	/* Read the head and tail ptrs from HV */
847 	rv = hv_ldc_tx_get_state(ldcp->id,
848 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
849 	if (rv) {
850 		cmn_err(CE_WARN,
851 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
852 		    ldcp->id);
853 		return (EIO);
854 	}
855 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
856 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
857 		    ldcp->id);
858 		return (ECONNRESET);
859 	}
860 
861 	i_ldc_get_tx_head(ldcp, &current_head);
862 
863 	/* increment the tail */
864 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
865 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
866 
867 	if (new_tail == current_head) {
868 		DWARN(ldcp->id,
869 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
870 		    ldcp->id);
871 		return (EWOULDBLOCK);
872 	}
873 
874 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
875 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
876 
877 	*tail = ldcp->tx_tail;
878 	return (0);
879 }
880 
881 /*
882  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
883  * and retry ldc_max_retries times before returning an error.
884  * Returns 0, EWOULDBLOCK or EIO
885  */
886 static int
887 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
888 {
889 	int		rv, retval = EWOULDBLOCK;
890 	int 		retries;
891 
892 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
893 	for (retries = 0; retries < ldc_max_retries; retries++) {
894 
895 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
896 			retval = 0;
897 			break;
898 		}
899 		if (rv != H_EWOULDBLOCK) {
900 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
901 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
902 			retval = EIO;
903 			break;
904 		}
905 
906 		/* wait for ldc_delay usecs */
907 		drv_usecwait(ldc_delay);
908 	}
909 	return (retval);
910 }
911 
912 /*
913  * Copy a data packet from the HV receive queue to the data queue.
914  * Caller must ensure that the data queue is not already full.
915  *
916  * The *head argument represents the current head pointer for the HV
917  * receive queue. After copying a packet from the HV receive queue,
918  * the *head pointer will be updated. This allows the caller to update
919  * the head pointer in HV using the returned *head value.
920  */
921 void
922 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
923 {
924 	uint64_t	q_size, dq_size;
925 
926 	ASSERT(MUTEX_HELD(&ldcp->lock));
927 
928 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
929 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
930 
931 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
932 	    dq_size) >= LDC_PACKET_SIZE);
933 
934 	bcopy((void *)(ldcp->rx_q_va + *head),
935 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
936 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
937 
938 	/* Update rx head */
939 	*head = (*head + LDC_PACKET_SIZE) % q_size;
940 
941 	/* Update dq tail */
942 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
943 }
944 
945 /*
946  * Update the Rx data queue head pointer
947  */
948 static int
949 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
950 {
951 	ldcp->rx_dq_head = head;
952 	return (0);
953 }
954 
955 /*
956  * Get the Rx data queue head and tail pointers
957  */
958 static uint64_t
959 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
960     uint64_t *link_state)
961 {
962 	_NOTE(ARGUNUSED(link_state))
963 	*head = ldcp->rx_dq_head;
964 	*tail = ldcp->rx_dq_tail;
965 	return (0);
966 }
967 
968 /*
969  * Wrapper for the Rx HV queue set head function. Giving the
970  * data queue and HV queue set head functions the same type.
971  */
972 static uint64_t
973 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
974     uint64_t *link_state)
975 {
976 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
977 	    link_state)));
978 }
979 
980 /*
981  * LDC receive interrupt handler
982  *    triggered for channel with data pending to read
983  *    i.e. Rx queue content changes
984  */
985 static uint_t
986 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
987 {
988 	_NOTE(ARGUNUSED(arg2))
989 
990 	ldc_chan_t	*ldcp;
991 	boolean_t	notify;
992 	uint64_t	event;
993 	int		rv, status;
994 
995 	/* Get the channel for which interrupt was received */
996 	if (arg1 == NULL) {
997 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
998 		return (DDI_INTR_UNCLAIMED);
999 	}
1000 
1001 	ldcp = (ldc_chan_t *)arg1;
1002 
1003 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1004 	    ldcp->id, ldcp);
1005 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1006 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1007 	    ldcp->link_state);
1008 
1009 	/* Lock channel */
1010 	mutex_enter(&ldcp->lock);
1011 
1012 	/* Mark the interrupt as being actively handled */
1013 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1014 
1015 	status = i_ldc_rx_process_hvq(ldcp, &notify, &event);
1016 
1017 	if (ldcp->mode != LDC_MODE_RELIABLE) {
1018 		/*
1019 		 * If there are no data packets on the queue, clear
1020 		 * the interrupt. Otherwise, the ldc_read will clear
1021 		 * interrupts after draining the queue. To indicate the
1022 		 * interrupt has not yet been cleared, it is marked
1023 		 * as pending.
1024 		 */
1025 		if ((event & LDC_EVT_READ) == 0) {
1026 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1027 		} else {
1028 			ldcp->rx_intr_state = LDC_INTR_PEND;
1029 		}
1030 	}
1031 
1032 	/* if callbacks are disabled, do not notify */
1033 	if (notify && ldcp->cb_enabled) {
1034 		ldcp->cb_inprogress = B_TRUE;
1035 		mutex_exit(&ldcp->lock);
1036 		rv = ldcp->cb(event, ldcp->cb_arg);
1037 		if (rv) {
1038 			DWARN(ldcp->id,
1039 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1040 			    ldcp->id);
1041 		}
1042 		mutex_enter(&ldcp->lock);
1043 		ldcp->cb_inprogress = B_FALSE;
1044 	}
1045 
1046 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1047 		if (status == ENOSPC) {
1048 			/*
1049 			 * Here, ENOSPC indicates the secondary data
1050 			 * queue is full and the Rx queue is non-empty.
1051 			 * Much like how reliable and raw modes are
1052 			 * handled above, since the Rx queue is non-
1053 			 * empty, we mark the interrupt as pending to
1054 			 * indicate it has not yet been cleared.
1055 			 */
1056 			ldcp->rx_intr_state = LDC_INTR_PEND;
1057 		} else {
1058 			/*
1059 			 * We have processed all CTRL packets and
1060 			 * copied all DATA packets to the secondary
1061 			 * queue. Clear the interrupt.
1062 			 */
1063 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1064 		}
1065 	}
1066 
1067 	mutex_exit(&ldcp->lock);
1068 
1069 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1070 
1071 	return (DDI_INTR_CLAIMED);
1072 }
1073 
1074 /*
1075  * Wrapper for the Rx HV queue processing function to be used when
1076  * checking the Rx HV queue for data packets. Unlike the interrupt
1077  * handler code flow, the Rx interrupt is not cleared here and
1078  * callbacks are not made.
1079  */
1080 static uint_t
1081 i_ldc_chkq(ldc_chan_t *ldcp)
1082 {
1083 	boolean_t	notify;
1084 	uint64_t	event;
1085 
1086 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1087 }
1088 
1089 /*
1090  * Send a LDC message
1091  */
1092 static int
1093 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1094     uint8_t ctrlmsg)
1095 {
1096 	int		rv;
1097 	ldc_msg_t 	*pkt;
1098 	uint64_t	tx_tail;
1099 	uint32_t	curr_seqid;
1100 
1101 	/* Obtain Tx lock */
1102 	mutex_enter(&ldcp->tx_lock);
1103 
1104 	curr_seqid = ldcp->last_msg_snt;
1105 
1106 	/* get the current tail for the message */
1107 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1108 	if (rv) {
1109 		DWARN(ldcp->id,
1110 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1111 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1112 		    ldcp->id, pkttype, subtype, ctrlmsg);
1113 		mutex_exit(&ldcp->tx_lock);
1114 		return (rv);
1115 	}
1116 
1117 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1118 	ZERO_PKT(pkt);
1119 
1120 	/* Initialize the packet */
1121 	pkt->type = pkttype;
1122 	pkt->stype = subtype;
1123 	pkt->ctrl = ctrlmsg;
1124 
1125 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1126 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1127 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1128 		curr_seqid++;
1129 		if (ldcp->mode != LDC_MODE_RAW) {
1130 			pkt->seqid = curr_seqid;
1131 			pkt->ackid = ldcp->last_msg_rcd;
1132 		}
1133 	}
1134 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1135 
1136 	/* initiate the send by calling into HV and set the new tail */
1137 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1138 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1139 
1140 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1141 	if (rv) {
1142 		DWARN(ldcp->id,
1143 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1144 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1145 		    ldcp->id, pkttype, subtype, ctrlmsg);
1146 		mutex_exit(&ldcp->tx_lock);
1147 		return (EIO);
1148 	}
1149 
1150 	ldcp->last_msg_snt = curr_seqid;
1151 	ldcp->tx_tail = tx_tail;
1152 
1153 	mutex_exit(&ldcp->tx_lock);
1154 	return (0);
1155 }
1156 
1157 /*
1158  * Checks if packet was received in right order
1159  * in the case of a reliable link.
1160  * Returns 0 if in order, else EIO
1161  */
1162 static int
1163 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1164 {
1165 	/* No seqid checking for RAW mode */
1166 	if (ldcp->mode == LDC_MODE_RAW)
1167 		return (0);
1168 
1169 	/* No seqid checking for version, RTS, RTR message */
1170 	if (msg->ctrl == LDC_VER ||
1171 	    msg->ctrl == LDC_RTS ||
1172 	    msg->ctrl == LDC_RTR)
1173 		return (0);
1174 
1175 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1176 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1177 		DWARN(ldcp->id,
1178 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1179 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1180 		    (ldcp->last_msg_rcd + 1));
1181 		return (EIO);
1182 	}
1183 
1184 #ifdef DEBUG
1185 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1186 		DWARN(ldcp->id,
1187 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1188 		return (EIO);
1189 	}
1190 #endif
1191 
1192 	return (0);
1193 }
1194 
1195 
1196 /*
1197  * Process an incoming version ctrl message
1198  */
1199 static int
1200 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1201 {
1202 	int 		rv = 0, idx = ldcp->next_vidx;
1203 	ldc_msg_t 	*pkt;
1204 	uint64_t	tx_tail;
1205 	ldc_ver_t	*rcvd_ver;
1206 
1207 	/* get the received version */
1208 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1209 
1210 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1211 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1212 
1213 	/* Obtain Tx lock */
1214 	mutex_enter(&ldcp->tx_lock);
1215 
1216 	switch (msg->stype) {
1217 	case LDC_INFO:
1218 
1219 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1220 			(void) i_ldc_txq_reconf(ldcp);
1221 			i_ldc_reset_state(ldcp);
1222 			mutex_exit(&ldcp->tx_lock);
1223 			return (EAGAIN);
1224 		}
1225 
1226 		/* get the current tail and pkt for the response */
1227 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1228 		if (rv != 0) {
1229 			DWARN(ldcp->id,
1230 			    "i_ldc_process_VER: (0x%llx) err sending "
1231 			    "version ACK/NACK\n", ldcp->id);
1232 			i_ldc_reset(ldcp, B_TRUE);
1233 			mutex_exit(&ldcp->tx_lock);
1234 			return (ECONNRESET);
1235 		}
1236 
1237 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1238 		ZERO_PKT(pkt);
1239 
1240 		/* initialize the packet */
1241 		pkt->type = LDC_CTRL;
1242 		pkt->ctrl = LDC_VER;
1243 
1244 		for (;;) {
1245 
1246 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1247 			    rcvd_ver->major, rcvd_ver->minor,
1248 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1249 
1250 			if (rcvd_ver->major == ldc_versions[idx].major) {
1251 				/* major version match - ACK version */
1252 				pkt->stype = LDC_ACK;
1253 
1254 				/*
1255 				 * lower minor version to the one this endpt
1256 				 * supports, if necessary
1257 				 */
1258 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1259 					rcvd_ver->minor =
1260 					    ldc_versions[idx].minor;
1261 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1262 
1263 				break;
1264 			}
1265 
1266 			if (rcvd_ver->major > ldc_versions[idx].major) {
1267 
1268 				D1(ldcp->id, "i_ldc_process_VER: using next"
1269 				    " lower idx=%d, v%u.%u\n", idx,
1270 				    ldc_versions[idx].major,
1271 				    ldc_versions[idx].minor);
1272 
1273 				/* nack with next lower version */
1274 				pkt->stype = LDC_NACK;
1275 				bcopy(&ldc_versions[idx], pkt->udata,
1276 				    sizeof (ldc_versions[idx]));
1277 				ldcp->next_vidx = idx;
1278 				break;
1279 			}
1280 
1281 			/* next major version */
1282 			idx++;
1283 
1284 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1285 
1286 			if (idx == LDC_NUM_VERS) {
1287 				/* no version match - send NACK */
1288 				pkt->stype = LDC_NACK;
1289 				bzero(pkt->udata, sizeof (ldc_ver_t));
1290 				ldcp->next_vidx = 0;
1291 				break;
1292 			}
1293 		}
1294 
1295 		/* initiate the send by calling into HV and set the new tail */
1296 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1297 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1298 
1299 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1300 		if (rv == 0) {
1301 			ldcp->tx_tail = tx_tail;
1302 			if (pkt->stype == LDC_ACK) {
1303 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1304 				    " version ACK\n", ldcp->id);
1305 				/* Save the ACK'd version */
1306 				ldcp->version.major = rcvd_ver->major;
1307 				ldcp->version.minor = rcvd_ver->minor;
1308 				ldcp->hstate |= TS_RCVD_VER;
1309 				ldcp->tstate |= TS_VER_DONE;
1310 				D1(DBG_ALL_LDCS,
1311 				    "(0x%llx) Sent ACK, "
1312 				    "Agreed on version v%u.%u\n",
1313 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1314 			}
1315 		} else {
1316 			DWARN(ldcp->id,
1317 			    "i_ldc_process_VER: (0x%llx) error sending "
1318 			    "ACK/NACK\n", ldcp->id);
1319 			i_ldc_reset(ldcp, B_TRUE);
1320 			mutex_exit(&ldcp->tx_lock);
1321 			return (ECONNRESET);
1322 		}
1323 
1324 		break;
1325 
1326 	case LDC_ACK:
1327 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1328 			if (ldcp->version.major != rcvd_ver->major ||
1329 			    ldcp->version.minor != rcvd_ver->minor) {
1330 
1331 				/* mismatched version - reset connection */
1332 				DWARN(ldcp->id,
1333 				    "i_ldc_process_VER: (0x%llx) recvd"
1334 				    " ACK ver != sent ACK ver\n", ldcp->id);
1335 				i_ldc_reset(ldcp, B_TRUE);
1336 				mutex_exit(&ldcp->tx_lock);
1337 				return (ECONNRESET);
1338 			}
1339 		} else {
1340 			/* SUCCESS - we have agreed on a version */
1341 			ldcp->version.major = rcvd_ver->major;
1342 			ldcp->version.minor = rcvd_ver->minor;
1343 			ldcp->tstate |= TS_VER_DONE;
1344 		}
1345 
1346 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1347 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1348 
1349 		/* initiate RTS-RTR-RDX handshake */
1350 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1351 		if (rv) {
1352 			DWARN(ldcp->id,
1353 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1354 			    ldcp->id);
1355 			i_ldc_reset(ldcp, B_TRUE);
1356 			mutex_exit(&ldcp->tx_lock);
1357 			return (ECONNRESET);
1358 		}
1359 
1360 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1361 		ZERO_PKT(pkt);
1362 
1363 		pkt->type = LDC_CTRL;
1364 		pkt->stype = LDC_INFO;
1365 		pkt->ctrl = LDC_RTS;
1366 		pkt->env = ldcp->mode;
1367 		if (ldcp->mode != LDC_MODE_RAW)
1368 			pkt->seqid = LDC_INIT_SEQID;
1369 
1370 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1371 
1372 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1373 
1374 		/* initiate the send by calling into HV and set the new tail */
1375 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1376 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1377 
1378 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1379 		if (rv) {
1380 			D2(ldcp->id,
1381 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1382 			    ldcp->id);
1383 			i_ldc_reset(ldcp, B_TRUE);
1384 			mutex_exit(&ldcp->tx_lock);
1385 			return (ECONNRESET);
1386 		}
1387 
1388 		ldcp->tx_tail = tx_tail;
1389 		ldcp->hstate |= TS_SENT_RTS;
1390 
1391 		break;
1392 
1393 	case LDC_NACK:
1394 		/* check if version in NACK is zero */
1395 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1396 			/* version handshake failure */
1397 			DWARN(DBG_ALL_LDCS,
1398 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1399 			    ldcp->id);
1400 			i_ldc_reset(ldcp, B_TRUE);
1401 			mutex_exit(&ldcp->tx_lock);
1402 			return (ECONNRESET);
1403 		}
1404 
1405 		/* get the current tail and pkt for the response */
1406 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1407 		if (rv != 0) {
1408 			cmn_err(CE_NOTE,
1409 			    "i_ldc_process_VER: (0x%lx) err sending "
1410 			    "version ACK/NACK\n", ldcp->id);
1411 			i_ldc_reset(ldcp, B_TRUE);
1412 			mutex_exit(&ldcp->tx_lock);
1413 			return (ECONNRESET);
1414 		}
1415 
1416 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1417 		ZERO_PKT(pkt);
1418 
1419 		/* initialize the packet */
1420 		pkt->type = LDC_CTRL;
1421 		pkt->ctrl = LDC_VER;
1422 		pkt->stype = LDC_INFO;
1423 
1424 		/* check ver in NACK msg has a match */
1425 		for (;;) {
1426 			if (rcvd_ver->major == ldc_versions[idx].major) {
1427 				/*
1428 				 * major version match - resubmit request
1429 				 * if lower minor version to the one this endpt
1430 				 * supports, if necessary
1431 				 */
1432 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1433 					rcvd_ver->minor =
1434 					    ldc_versions[idx].minor;
1435 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1436 				break;
1437 			}
1438 
1439 			if (rcvd_ver->major > ldc_versions[idx].major) {
1440 
1441 				D1(ldcp->id, "i_ldc_process_VER: using next"
1442 				    " lower idx=%d, v%u.%u\n", idx,
1443 				    ldc_versions[idx].major,
1444 				    ldc_versions[idx].minor);
1445 
1446 				/* send next lower version */
1447 				bcopy(&ldc_versions[idx], pkt->udata,
1448 				    sizeof (ldc_versions[idx]));
1449 				ldcp->next_vidx = idx;
1450 				break;
1451 			}
1452 
1453 			/* next version */
1454 			idx++;
1455 
1456 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1457 
1458 			if (idx == LDC_NUM_VERS) {
1459 				/* no version match - terminate */
1460 				ldcp->next_vidx = 0;
1461 				mutex_exit(&ldcp->tx_lock);
1462 				return (ECONNRESET);
1463 			}
1464 		}
1465 
1466 		/* initiate the send by calling into HV and set the new tail */
1467 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1468 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1469 
1470 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1471 		if (rv == 0) {
1472 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1473 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1474 			    ldc_versions[idx].minor);
1475 			ldcp->tx_tail = tx_tail;
1476 		} else {
1477 			cmn_err(CE_NOTE,
1478 			    "i_ldc_process_VER: (0x%lx) error sending version"
1479 			    "INFO\n", ldcp->id);
1480 			i_ldc_reset(ldcp, B_TRUE);
1481 			mutex_exit(&ldcp->tx_lock);
1482 			return (ECONNRESET);
1483 		}
1484 
1485 		break;
1486 	}
1487 
1488 	mutex_exit(&ldcp->tx_lock);
1489 	return (rv);
1490 }
1491 
1492 
1493 /*
1494  * Process an incoming RTS ctrl message
1495  */
1496 static int
1497 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1498 {
1499 	int 		rv = 0;
1500 	ldc_msg_t 	*pkt;
1501 	uint64_t	tx_tail;
1502 	boolean_t	sent_NACK = B_FALSE;
1503 
1504 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1505 
1506 	switch (msg->stype) {
1507 	case LDC_NACK:
1508 		DWARN(ldcp->id,
1509 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1510 		    ldcp->id);
1511 
1512 		/* Reset the channel -- as we cannot continue */
1513 		mutex_enter(&ldcp->tx_lock);
1514 		i_ldc_reset(ldcp, B_TRUE);
1515 		mutex_exit(&ldcp->tx_lock);
1516 		rv = ECONNRESET;
1517 		break;
1518 
1519 	case LDC_INFO:
1520 
1521 		/* check mode */
1522 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1523 			cmn_err(CE_NOTE,
1524 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1525 			    ldcp->id);
1526 			/*
1527 			 * send NACK in response to MODE message
1528 			 * get the current tail for the response
1529 			 */
1530 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1531 			if (rv) {
1532 				/* if cannot send NACK - reset channel */
1533 				mutex_enter(&ldcp->tx_lock);
1534 				i_ldc_reset(ldcp, B_TRUE);
1535 				mutex_exit(&ldcp->tx_lock);
1536 				rv = ECONNRESET;
1537 				break;
1538 			}
1539 			sent_NACK = B_TRUE;
1540 		}
1541 		break;
1542 	default:
1543 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1544 		    ldcp->id);
1545 		mutex_enter(&ldcp->tx_lock);
1546 		i_ldc_reset(ldcp, B_TRUE);
1547 		mutex_exit(&ldcp->tx_lock);
1548 		rv = ECONNRESET;
1549 		break;
1550 	}
1551 
1552 	/*
1553 	 * If either the connection was reset (when rv != 0) or
1554 	 * a NACK was sent, we return. In the case of a NACK
1555 	 * we dont want to consume the packet that came in but
1556 	 * not record that we received the RTS
1557 	 */
1558 	if (rv || sent_NACK)
1559 		return (rv);
1560 
1561 	/* record RTS received */
1562 	ldcp->hstate |= TS_RCVD_RTS;
1563 
1564 	/* store initial SEQID info */
1565 	ldcp->last_msg_snt = msg->seqid;
1566 
1567 	/* Obtain Tx lock */
1568 	mutex_enter(&ldcp->tx_lock);
1569 
1570 	/* get the current tail for the response */
1571 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1572 	if (rv != 0) {
1573 		cmn_err(CE_NOTE,
1574 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1575 		    ldcp->id);
1576 		i_ldc_reset(ldcp, B_TRUE);
1577 		mutex_exit(&ldcp->tx_lock);
1578 		return (ECONNRESET);
1579 	}
1580 
1581 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1582 	ZERO_PKT(pkt);
1583 
1584 	/* initialize the packet */
1585 	pkt->type = LDC_CTRL;
1586 	pkt->stype = LDC_INFO;
1587 	pkt->ctrl = LDC_RTR;
1588 	pkt->env = ldcp->mode;
1589 	if (ldcp->mode != LDC_MODE_RAW)
1590 		pkt->seqid = LDC_INIT_SEQID;
1591 
1592 	ldcp->last_msg_rcd = msg->seqid;
1593 
1594 	/* initiate the send by calling into HV and set the new tail */
1595 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1596 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1597 
1598 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1599 	if (rv == 0) {
1600 		D2(ldcp->id,
1601 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1602 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1603 
1604 		ldcp->tx_tail = tx_tail;
1605 		ldcp->hstate |= TS_SENT_RTR;
1606 
1607 	} else {
1608 		cmn_err(CE_NOTE,
1609 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1610 		    ldcp->id);
1611 		i_ldc_reset(ldcp, B_TRUE);
1612 		mutex_exit(&ldcp->tx_lock);
1613 		return (ECONNRESET);
1614 	}
1615 
1616 	mutex_exit(&ldcp->tx_lock);
1617 	return (0);
1618 }
1619 
1620 /*
1621  * Process an incoming RTR ctrl message
1622  */
1623 static int
1624 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1625 {
1626 	int 		rv = 0;
1627 	boolean_t	sent_NACK = B_FALSE;
1628 
1629 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1630 
1631 	switch (msg->stype) {
1632 	case LDC_NACK:
1633 		/* RTR NACK received */
1634 		DWARN(ldcp->id,
1635 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1636 		    ldcp->id);
1637 
1638 		/* Reset the channel -- as we cannot continue */
1639 		mutex_enter(&ldcp->tx_lock);
1640 		i_ldc_reset(ldcp, B_TRUE);
1641 		mutex_exit(&ldcp->tx_lock);
1642 		rv = ECONNRESET;
1643 
1644 		break;
1645 
1646 	case LDC_INFO:
1647 
1648 		/* check mode */
1649 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1650 			DWARN(ldcp->id,
1651 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1652 			    "expecting 0x%x, got 0x%x\n",
1653 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1654 			/*
1655 			 * send NACK in response to MODE message
1656 			 * get the current tail for the response
1657 			 */
1658 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1659 			if (rv) {
1660 				/* if cannot send NACK - reset channel */
1661 				mutex_enter(&ldcp->tx_lock);
1662 				i_ldc_reset(ldcp, B_TRUE);
1663 				mutex_exit(&ldcp->tx_lock);
1664 				rv = ECONNRESET;
1665 				break;
1666 			}
1667 			sent_NACK = B_TRUE;
1668 		}
1669 		break;
1670 
1671 	default:
1672 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1673 		    ldcp->id);
1674 
1675 		/* Reset the channel -- as we cannot continue */
1676 		mutex_enter(&ldcp->tx_lock);
1677 		i_ldc_reset(ldcp, B_TRUE);
1678 		mutex_exit(&ldcp->tx_lock);
1679 		rv = ECONNRESET;
1680 		break;
1681 	}
1682 
1683 	/*
1684 	 * If either the connection was reset (when rv != 0) or
1685 	 * a NACK was sent, we return. In the case of a NACK
1686 	 * we dont want to consume the packet that came in but
1687 	 * not record that we received the RTR
1688 	 */
1689 	if (rv || sent_NACK)
1690 		return (rv);
1691 
1692 	ldcp->last_msg_snt = msg->seqid;
1693 	ldcp->hstate |= TS_RCVD_RTR;
1694 
1695 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1696 	if (rv) {
1697 		cmn_err(CE_NOTE,
1698 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1699 		    ldcp->id);
1700 		mutex_enter(&ldcp->tx_lock);
1701 		i_ldc_reset(ldcp, B_TRUE);
1702 		mutex_exit(&ldcp->tx_lock);
1703 		return (ECONNRESET);
1704 	}
1705 	D2(ldcp->id,
1706 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1707 
1708 	ldcp->hstate |= TS_SENT_RDX;
1709 	ldcp->tstate |= TS_HSHAKE_DONE;
1710 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1711 		ldcp->status = LDC_UP;
1712 
1713 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1714 
1715 	return (0);
1716 }
1717 
1718 
1719 /*
1720  * Process an incoming RDX ctrl message
1721  */
1722 static int
1723 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1724 {
1725 	int	rv = 0;
1726 
1727 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1728 
1729 	switch (msg->stype) {
1730 	case LDC_NACK:
1731 		/* RDX NACK received */
1732 		DWARN(ldcp->id,
1733 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1734 		    ldcp->id);
1735 
1736 		/* Reset the channel -- as we cannot continue */
1737 		mutex_enter(&ldcp->tx_lock);
1738 		i_ldc_reset(ldcp, B_TRUE);
1739 		mutex_exit(&ldcp->tx_lock);
1740 		rv = ECONNRESET;
1741 
1742 		break;
1743 
1744 	case LDC_INFO:
1745 
1746 		/*
1747 		 * if channel is UP and a RDX received after data transmission
1748 		 * has commenced it is an error
1749 		 */
1750 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1751 			DWARN(DBG_ALL_LDCS,
1752 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1753 			    " - LDC reset\n", ldcp->id);
1754 			mutex_enter(&ldcp->tx_lock);
1755 			i_ldc_reset(ldcp, B_TRUE);
1756 			mutex_exit(&ldcp->tx_lock);
1757 			return (ECONNRESET);
1758 		}
1759 
1760 		ldcp->hstate |= TS_RCVD_RDX;
1761 		ldcp->tstate |= TS_HSHAKE_DONE;
1762 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1763 			ldcp->status = LDC_UP;
1764 
1765 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1766 		break;
1767 
1768 	default:
1769 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1770 		    ldcp->id);
1771 
1772 		/* Reset the channel -- as we cannot continue */
1773 		mutex_enter(&ldcp->tx_lock);
1774 		i_ldc_reset(ldcp, B_TRUE);
1775 		mutex_exit(&ldcp->tx_lock);
1776 		rv = ECONNRESET;
1777 		break;
1778 	}
1779 
1780 	return (rv);
1781 }
1782 
1783 /*
1784  * Process an incoming ACK for a data packet
1785  */
1786 static int
1787 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1788 {
1789 	int		rv;
1790 	uint64_t 	tx_head;
1791 	ldc_msg_t	*pkt;
1792 
1793 	/* Obtain Tx lock */
1794 	mutex_enter(&ldcp->tx_lock);
1795 
1796 	/*
1797 	 * Read the current Tx head and tail
1798 	 */
1799 	rv = hv_ldc_tx_get_state(ldcp->id,
1800 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1801 	if (rv != 0) {
1802 		cmn_err(CE_WARN,
1803 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1804 		    ldcp->id);
1805 
1806 		/* Reset the channel -- as we cannot continue */
1807 		i_ldc_reset(ldcp, B_TRUE);
1808 		mutex_exit(&ldcp->tx_lock);
1809 		return (ECONNRESET);
1810 	}
1811 
1812 	/*
1813 	 * loop from where the previous ACK location was to the
1814 	 * current head location. This is how far the HV has
1815 	 * actually send pkts. Pkts between head and tail are
1816 	 * yet to be sent by HV.
1817 	 */
1818 	tx_head = ldcp->tx_ackd_head;
1819 	for (;;) {
1820 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1821 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1822 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1823 
1824 		if (pkt->seqid == msg->ackid) {
1825 			D2(ldcp->id,
1826 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1827 			    ldcp->id);
1828 			ldcp->last_ack_rcd = msg->ackid;
1829 			ldcp->tx_ackd_head = tx_head;
1830 			break;
1831 		}
1832 		if (tx_head == ldcp->tx_head) {
1833 			/* could not find packet */
1834 			DWARN(ldcp->id,
1835 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1836 			    ldcp->id);
1837 
1838 			/* Reset the channel -- as we cannot continue */
1839 			i_ldc_reset(ldcp, B_TRUE);
1840 			mutex_exit(&ldcp->tx_lock);
1841 			return (ECONNRESET);
1842 		}
1843 	}
1844 
1845 	mutex_exit(&ldcp->tx_lock);
1846 	return (0);
1847 }
1848 
1849 /*
1850  * Process incoming control message
1851  * Return 0 - session can continue
1852  *        EAGAIN - reprocess packet - state was changed
1853  *	  ECONNRESET - channel was reset
1854  */
1855 static int
1856 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1857 {
1858 	int 		rv = 0;
1859 
1860 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1861 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1862 
1863 	switch (ldcp->tstate & ~TS_IN_RESET) {
1864 
1865 	case TS_OPEN:
1866 	case TS_READY:
1867 
1868 		switch (msg->ctrl & LDC_CTRL_MASK) {
1869 		case LDC_VER:
1870 			/* process version message */
1871 			rv = i_ldc_process_VER(ldcp, msg);
1872 			break;
1873 		default:
1874 			DWARN(ldcp->id,
1875 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1876 			    "tstate=0x%x\n", ldcp->id,
1877 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1878 			break;
1879 		}
1880 
1881 		break;
1882 
1883 	case TS_VREADY:
1884 
1885 		switch (msg->ctrl & LDC_CTRL_MASK) {
1886 		case LDC_VER:
1887 			/* process version message */
1888 			rv = i_ldc_process_VER(ldcp, msg);
1889 			break;
1890 		case LDC_RTS:
1891 			/* process RTS message */
1892 			rv = i_ldc_process_RTS(ldcp, msg);
1893 			break;
1894 		case LDC_RTR:
1895 			/* process RTR message */
1896 			rv = i_ldc_process_RTR(ldcp, msg);
1897 			break;
1898 		case LDC_RDX:
1899 			/* process RDX message */
1900 			rv = i_ldc_process_RDX(ldcp, msg);
1901 			break;
1902 		default:
1903 			DWARN(ldcp->id,
1904 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1905 			    "tstate=0x%x\n", ldcp->id,
1906 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1907 			break;
1908 		}
1909 
1910 		break;
1911 
1912 	case TS_UP:
1913 
1914 		switch (msg->ctrl & LDC_CTRL_MASK) {
1915 		case LDC_VER:
1916 			DWARN(ldcp->id,
1917 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1918 			    "- LDC reset\n", ldcp->id);
1919 			/* peer is redoing version negotiation */
1920 			mutex_enter(&ldcp->tx_lock);
1921 			(void) i_ldc_txq_reconf(ldcp);
1922 			i_ldc_reset_state(ldcp);
1923 			mutex_exit(&ldcp->tx_lock);
1924 			rv = EAGAIN;
1925 			break;
1926 
1927 		case LDC_RDX:
1928 			/* process RDX message */
1929 			rv = i_ldc_process_RDX(ldcp, msg);
1930 			break;
1931 
1932 		default:
1933 			DWARN(ldcp->id,
1934 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1935 			    "tstate=0x%x\n", ldcp->id,
1936 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1937 			break;
1938 		}
1939 	}
1940 
1941 	return (rv);
1942 }
1943 
1944 /*
1945  * Register channel with the channel nexus
1946  */
1947 static int
1948 i_ldc_register_channel(ldc_chan_t *ldcp)
1949 {
1950 	int		rv = 0;
1951 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1952 
1953 	if (cinfo->dip == NULL) {
1954 		DWARN(ldcp->id,
1955 		    "i_ldc_register_channel: cnex has not registered\n");
1956 		return (EAGAIN);
1957 	}
1958 
1959 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1960 	if (rv) {
1961 		DWARN(ldcp->id,
1962 		    "i_ldc_register_channel: cannot register channel\n");
1963 		return (rv);
1964 	}
1965 
1966 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1967 	    i_ldc_tx_hdlr, ldcp, NULL);
1968 	if (rv) {
1969 		DWARN(ldcp->id,
1970 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1971 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1972 		return (rv);
1973 	}
1974 
1975 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1976 	    i_ldc_rx_hdlr, ldcp, NULL);
1977 	if (rv) {
1978 		DWARN(ldcp->id,
1979 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1980 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1981 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1982 		return (rv);
1983 	}
1984 
1985 	ldcp->tstate |= TS_CNEX_RDY;
1986 
1987 	return (0);
1988 }
1989 
1990 /*
1991  * Unregister a channel with the channel nexus
1992  */
1993 static int
1994 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1995 {
1996 	int		rv = 0;
1997 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1998 
1999 	if (cinfo->dip == NULL) {
2000 		DWARN(ldcp->id,
2001 		    "i_ldc_unregister_channel: cnex has not registered\n");
2002 		return (EAGAIN);
2003 	}
2004 
2005 	if (ldcp->tstate & TS_CNEX_RDY) {
2006 
2007 		/* Remove the Rx interrupt */
2008 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
2009 		if (rv) {
2010 			if (rv != EAGAIN) {
2011 				DWARN(ldcp->id,
2012 				    "i_ldc_unregister_channel: err removing "
2013 				    "Rx intr\n");
2014 				return (rv);
2015 			}
2016 
2017 			/*
2018 			 * If interrupts are pending and handler has
2019 			 * finished running, clear interrupt and try
2020 			 * again
2021 			 */
2022 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
2023 				return (rv);
2024 
2025 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2026 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
2027 			    CNEX_RX_INTR);
2028 			if (rv) {
2029 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
2030 				    "err removing Rx interrupt\n");
2031 				return (rv);
2032 			}
2033 		}
2034 
2035 		/* Remove the Tx interrupt */
2036 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2037 		if (rv) {
2038 			DWARN(ldcp->id,
2039 			    "i_ldc_unregister_channel: err removing Tx intr\n");
2040 			return (rv);
2041 		}
2042 
2043 		/* Unregister the channel */
2044 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2045 		if (rv) {
2046 			DWARN(ldcp->id,
2047 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2048 			return (rv);
2049 		}
2050 
2051 		ldcp->tstate &= ~TS_CNEX_RDY;
2052 	}
2053 
2054 	return (0);
2055 }
2056 
2057 
2058 /*
2059  * LDC transmit interrupt handler
2060  *    triggered for chanel up/down/reset events
2061  *    and Tx queue content changes
2062  */
2063 static uint_t
2064 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2065 {
2066 	_NOTE(ARGUNUSED(arg2))
2067 
2068 	int 		rv;
2069 	ldc_chan_t 	*ldcp;
2070 	boolean_t 	notify_client = B_FALSE;
2071 	uint64_t	notify_event = 0, link_state;
2072 
2073 	/* Get the channel for which interrupt was received */
2074 	ASSERT(arg1 != NULL);
2075 	ldcp = (ldc_chan_t *)arg1;
2076 
2077 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2078 	    ldcp->id, ldcp);
2079 
2080 	/* Lock channel */
2081 	mutex_enter(&ldcp->lock);
2082 
2083 	/* Obtain Tx lock */
2084 	mutex_enter(&ldcp->tx_lock);
2085 
2086 	/* mark interrupt as pending */
2087 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2088 
2089 	/* save current link state */
2090 	link_state = ldcp->link_state;
2091 
2092 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2093 	    &ldcp->link_state);
2094 	if (rv) {
2095 		cmn_err(CE_WARN,
2096 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2097 		    ldcp->id, rv);
2098 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2099 		mutex_exit(&ldcp->tx_lock);
2100 		mutex_exit(&ldcp->lock);
2101 		return (DDI_INTR_CLAIMED);
2102 	}
2103 
2104 	/*
2105 	 * reset the channel state if the channel went down
2106 	 * (other side unconfigured queue) or channel was reset
2107 	 * (other side reconfigured its queue)
2108 	 */
2109 	if (link_state != ldcp->link_state &&
2110 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2111 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2112 		i_ldc_reset(ldcp, B_FALSE);
2113 		notify_client = B_TRUE;
2114 		notify_event = LDC_EVT_DOWN;
2115 	}
2116 
2117 	if (link_state != ldcp->link_state &&
2118 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2119 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2120 		i_ldc_reset(ldcp, B_FALSE);
2121 		notify_client = B_TRUE;
2122 		notify_event = LDC_EVT_RESET;
2123 	}
2124 
2125 	if (link_state != ldcp->link_state &&
2126 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2127 	    ldcp->link_state == LDC_CHANNEL_UP) {
2128 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2129 		notify_client = B_TRUE;
2130 		notify_event = LDC_EVT_RESET;
2131 		ldcp->tstate |= TS_LINK_READY;
2132 		ldcp->status = LDC_READY;
2133 	}
2134 
2135 	/* if callbacks are disabled, do not notify */
2136 	if (!ldcp->cb_enabled)
2137 		notify_client = B_FALSE;
2138 
2139 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2140 	mutex_exit(&ldcp->tx_lock);
2141 
2142 	if (notify_client) {
2143 		ldcp->cb_inprogress = B_TRUE;
2144 		mutex_exit(&ldcp->lock);
2145 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2146 		if (rv) {
2147 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2148 			    "failure", ldcp->id);
2149 		}
2150 		mutex_enter(&ldcp->lock);
2151 		ldcp->cb_inprogress = B_FALSE;
2152 	}
2153 
2154 	mutex_exit(&ldcp->lock);
2155 
2156 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2157 
2158 	return (DDI_INTR_CLAIMED);
2159 }
2160 
2161 /*
2162  * Process the Rx HV queue.
2163  *
2164  * Returns 0 if data packets were found and no errors were encountered,
2165  * otherwise returns an error. In either case, the *notify argument is
2166  * set to indicate whether or not the client callback function should
2167  * be invoked. The *event argument is set to contain the callback event.
2168  *
2169  * Depending on the channel mode, packets are handled differently:
2170  *
2171  * RAW MODE
2172  * For raw mode channels, when a data packet is encountered,
2173  * processing stops and all packets are left on the queue to be removed
2174  * and processed by the ldc_read code path.
2175  *
2176  * UNRELIABLE MODE
2177  * For unreliable mode, when a data packet is encountered, processing
2178  * stops, and all packets are left on the queue to be removed and
2179  * processed by the ldc_read code path. Control packets are processed
2180  * inline if they are encountered before any data packets.
2181  *
2182  * RELIABLE MODE
2183  * For reliable mode channels, all packets on the receive queue
2184  * are processed: data packets are copied to the data queue and
2185  * control packets are processed inline. Packets are only left on
2186  * the receive queue when the data queue is full.
2187  */
2188 static uint_t
2189 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2190     uint64_t *notify_event)
2191 {
2192 	int		rv;
2193 	uint64_t 	rx_head, rx_tail;
2194 	ldc_msg_t 	*msg;
2195 	uint64_t	link_state, first_fragment = 0;
2196 	boolean_t	trace_length = B_TRUE;
2197 
2198 	ASSERT(MUTEX_HELD(&ldcp->lock));
2199 	*notify_client = B_FALSE;
2200 	*notify_event = 0;
2201 
2202 	/*
2203 	 * Read packet(s) from the queue
2204 	 */
2205 	for (;;) {
2206 
2207 		link_state = ldcp->link_state;
2208 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2209 		    &ldcp->link_state);
2210 		if (rv) {
2211 			cmn_err(CE_WARN,
2212 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2213 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2214 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2215 			return (EIO);
2216 		}
2217 
2218 		/*
2219 		 * reset the channel state if the channel went down
2220 		 * (other side unconfigured queue) or channel was reset
2221 		 * (other side reconfigured its queue)
2222 		 */
2223 
2224 		if (link_state != ldcp->link_state) {
2225 
2226 			switch (ldcp->link_state) {
2227 			case LDC_CHANNEL_DOWN:
2228 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2229 				    "link down\n", ldcp->id);
2230 				mutex_enter(&ldcp->tx_lock);
2231 				i_ldc_reset(ldcp, B_FALSE);
2232 				mutex_exit(&ldcp->tx_lock);
2233 				*notify_client = B_TRUE;
2234 				*notify_event = LDC_EVT_DOWN;
2235 				goto loop_exit;
2236 
2237 			case LDC_CHANNEL_UP:
2238 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2239 				    "channel link up\n", ldcp->id);
2240 
2241 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2242 					*notify_client = B_TRUE;
2243 					*notify_event = LDC_EVT_RESET;
2244 					ldcp->tstate |= TS_LINK_READY;
2245 					ldcp->status = LDC_READY;
2246 				}
2247 				break;
2248 
2249 			case LDC_CHANNEL_RESET:
2250 			default:
2251 #ifdef DEBUG
2252 force_reset:
2253 #endif
2254 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2255 				    "link reset\n", ldcp->id);
2256 				mutex_enter(&ldcp->tx_lock);
2257 				i_ldc_reset(ldcp, B_FALSE);
2258 				mutex_exit(&ldcp->tx_lock);
2259 				*notify_client = B_TRUE;
2260 				*notify_event = LDC_EVT_RESET;
2261 				break;
2262 			}
2263 		}
2264 
2265 #ifdef DEBUG
2266 		if (LDC_INJECT_RESET(ldcp))
2267 			goto force_reset;
2268 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2269 			i_ldc_mem_inject_dring_clear(ldcp);
2270 #endif
2271 		if (trace_length) {
2272 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2273 			trace_length = B_FALSE;
2274 		}
2275 
2276 		if (rx_head == rx_tail) {
2277 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2278 			    "No packets\n", ldcp->id);
2279 			break;
2280 		}
2281 
2282 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2283 		    "tail=0x%llx\n", rx_head, rx_tail);
2284 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2285 		    ldcp->rx_q_va + rx_head);
2286 
2287 		/* get the message */
2288 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2289 
2290 		/* if channel is in RAW mode or data pkt, notify and return */
2291 		if (ldcp->mode == LDC_MODE_RAW) {
2292 			*notify_client = B_TRUE;
2293 			*notify_event |= LDC_EVT_READ;
2294 			break;
2295 		}
2296 
2297 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2298 
2299 			/* discard packet if channel is not up */
2300 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2301 
2302 				/* move the head one position */
2303 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2304 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2305 
2306 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2307 					break;
2308 
2309 				continue;
2310 			} else {
2311 				uint64_t dq_head, dq_tail;
2312 
2313 				/* process only RELIABLE mode data packets */
2314 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2315 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2316 						*notify_client = B_TRUE;
2317 					*notify_event |= LDC_EVT_READ;
2318 					break;
2319 				}
2320 
2321 				/* don't process packet if queue full */
2322 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2323 				    &dq_tail, NULL);
2324 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2325 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2326 				if (dq_tail == dq_head ||
2327 				    LDC_INJECT_DQFULL(ldcp)) {
2328 					rv = ENOSPC;
2329 					break;
2330 				}
2331 			}
2332 		}
2333 
2334 		/* Check the sequence ID for the message received */
2335 		rv = i_ldc_check_seqid(ldcp, msg);
2336 		if (rv != 0) {
2337 
2338 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2339 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2340 			    rx_head, rx_tail);
2341 
2342 			/* Reset last_msg_rcd to start of message */
2343 			if (first_fragment != 0) {
2344 				ldcp->last_msg_rcd = first_fragment - 1;
2345 				first_fragment = 0;
2346 			}
2347 
2348 			/*
2349 			 * Send a NACK due to seqid mismatch
2350 			 */
2351 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2352 			    (msg->ctrl & LDC_CTRL_MASK));
2353 
2354 			if (rv) {
2355 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2356 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2357 				    ldcp->id);
2358 
2359 				/* if cannot send NACK - reset channel */
2360 				mutex_enter(&ldcp->tx_lock);
2361 				i_ldc_reset(ldcp, B_TRUE);
2362 				mutex_exit(&ldcp->tx_lock);
2363 
2364 				*notify_client = B_TRUE;
2365 				*notify_event = LDC_EVT_RESET;
2366 				break;
2367 			}
2368 
2369 			/* purge receive queue */
2370 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2371 			break;
2372 		}
2373 
2374 		/* record the message ID */
2375 		ldcp->last_msg_rcd = msg->seqid;
2376 
2377 		/* process control messages */
2378 		if (msg->type & LDC_CTRL) {
2379 			/* save current internal state */
2380 			uint64_t tstate = ldcp->tstate;
2381 
2382 			rv = i_ldc_ctrlmsg(ldcp, msg);
2383 			if (rv == EAGAIN) {
2384 				/* re-process pkt - state was adjusted */
2385 				continue;
2386 			}
2387 			if (rv == ECONNRESET) {
2388 				*notify_client = B_TRUE;
2389 				*notify_event = LDC_EVT_RESET;
2390 				break;
2391 			}
2392 
2393 			/*
2394 			 * control message processing was successful
2395 			 * channel transitioned to ready for communication
2396 			 */
2397 			if (rv == 0 && ldcp->tstate == TS_UP &&
2398 			    (tstate & ~TS_IN_RESET) !=
2399 			    (ldcp->tstate & ~TS_IN_RESET)) {
2400 				*notify_client = B_TRUE;
2401 				*notify_event = LDC_EVT_UP;
2402 			}
2403 		}
2404 
2405 		/* process data NACKs */
2406 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2407 			DWARN(ldcp->id,
2408 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2409 			    ldcp->id);
2410 			mutex_enter(&ldcp->tx_lock);
2411 			i_ldc_reset(ldcp, B_TRUE);
2412 			mutex_exit(&ldcp->tx_lock);
2413 			*notify_client = B_TRUE;
2414 			*notify_event = LDC_EVT_RESET;
2415 			break;
2416 		}
2417 
2418 		/* process data ACKs */
2419 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2420 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2421 				*notify_client = B_TRUE;
2422 				*notify_event = LDC_EVT_RESET;
2423 				break;
2424 			}
2425 		}
2426 
2427 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2428 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2429 
2430 			/*
2431 			 * Copy the data packet to the data queue. Note
2432 			 * that the copy routine updates the rx_head pointer.
2433 			 */
2434 			i_ldc_rxdq_copy(ldcp, &rx_head);
2435 
2436 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2437 				*notify_client = B_TRUE;
2438 			*notify_event |= LDC_EVT_READ;
2439 		} else {
2440 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2441 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2442 		}
2443 
2444 		/* move the head one position */
2445 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2446 			*notify_client = B_TRUE;
2447 			*notify_event = LDC_EVT_RESET;
2448 			break;
2449 		}
2450 
2451 	} /* for */
2452 
2453 loop_exit:
2454 
2455 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2456 		/* ACK data packets */
2457 		if ((*notify_event &
2458 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2459 			int ack_rv;
2460 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2461 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2462 				cmn_err(CE_NOTE,
2463 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2464 				    "send ACK\n", ldcp->id);
2465 
2466 				mutex_enter(&ldcp->tx_lock);
2467 				i_ldc_reset(ldcp, B_FALSE);
2468 				mutex_exit(&ldcp->tx_lock);
2469 
2470 				*notify_client = B_TRUE;
2471 				*notify_event = LDC_EVT_RESET;
2472 				goto skip_ackpeek;
2473 			}
2474 		}
2475 
2476 		/*
2477 		 * If we have no more space on the data queue, make sure
2478 		 * there are no ACKs on the rx queue waiting to be processed.
2479 		 */
2480 		if (rv == ENOSPC) {
2481 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2482 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2483 				*notify_client = B_TRUE;
2484 				*notify_event = LDC_EVT_RESET;
2485 			}
2486 			return (rv);
2487 		} else {
2488 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2489 		}
2490 	}
2491 
2492 skip_ackpeek:
2493 
2494 	/* Return, indicating whether or not data packets were found */
2495 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2496 		return (0);
2497 
2498 	return (ENOMSG);
2499 }
2500 
2501 /*
2502  * Process any ACK packets on the HV receive queue.
2503  *
2504  * This function is only used by RELIABLE mode channels when the
2505  * secondary data queue fills up and there are packets remaining on
2506  * the HV receive queue.
2507  */
2508 int
2509 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2510 {
2511 	int		rv = 0;
2512 	ldc_msg_t	*msg;
2513 
2514 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2515 		ldcp->rx_ack_head = rx_head;
2516 
2517 	while (ldcp->rx_ack_head != rx_tail) {
2518 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2519 
2520 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2521 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2522 				break;
2523 			msg->stype &= ~LDC_ACK;
2524 		}
2525 
2526 		ldcp->rx_ack_head =
2527 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2528 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2529 	}
2530 	return (rv);
2531 }
2532 
2533 /* -------------------------------------------------------------------------- */
2534 
2535 /*
2536  * LDC API functions
2537  */
2538 
2539 /*
2540  * Initialize the channel. Allocate internal structure and memory for
2541  * TX/RX queues, and initialize locks.
2542  */
2543 int
2544 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2545 {
2546 	ldc_chan_t 	*ldcp;
2547 	int		rv, exit_val;
2548 	uint64_t	ra_base, nentries;
2549 	uint64_t	qlen;
2550 
2551 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2552 
2553 	if (attr == NULL) {
2554 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2555 		return (EINVAL);
2556 	}
2557 	if (handle == NULL) {
2558 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2559 		return (EINVAL);
2560 	}
2561 
2562 	/* check if channel is valid */
2563 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2564 	if (rv == H_ECHANNEL) {
2565 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2566 		return (EINVAL);
2567 	}
2568 
2569 	/* check if the channel has already been initialized */
2570 	mutex_enter(&ldcssp->lock);
2571 	ldcp = ldcssp->chan_list;
2572 	while (ldcp != NULL) {
2573 		if (ldcp->id == id) {
2574 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2575 			    id);
2576 			mutex_exit(&ldcssp->lock);
2577 			return (EADDRINUSE);
2578 		}
2579 		ldcp = ldcp->next;
2580 	}
2581 	mutex_exit(&ldcssp->lock);
2582 
2583 	ASSERT(ldcp == NULL);
2584 
2585 	*handle = 0;
2586 
2587 	/* Allocate an ldcp structure */
2588 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2589 
2590 	/*
2591 	 * Initialize the channel and Tx lock
2592 	 *
2593 	 * The channel 'lock' protects the entire channel and
2594 	 * should be acquired before initializing, resetting,
2595 	 * destroying or reading from a channel.
2596 	 *
2597 	 * The 'tx_lock' should be acquired prior to transmitting
2598 	 * data over the channel. The lock should also be acquired
2599 	 * prior to channel reconfiguration (in order to prevent
2600 	 * concurrent writes).
2601 	 *
2602 	 * ORDERING: When both locks are being acquired, to prevent
2603 	 * deadlocks, the channel lock should be always acquired prior
2604 	 * to the tx_lock.
2605 	 */
2606 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2607 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2608 
2609 	/* Initialize the channel */
2610 	ldcp->id = id;
2611 	ldcp->cb = NULL;
2612 	ldcp->cb_arg = NULL;
2613 	ldcp->cb_inprogress = B_FALSE;
2614 	ldcp->cb_enabled = B_FALSE;
2615 	ldcp->next = NULL;
2616 
2617 	/* Read attributes */
2618 	ldcp->mode = attr->mode;
2619 	ldcp->devclass = attr->devclass;
2620 	ldcp->devinst = attr->instance;
2621 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2622 
2623 	D1(ldcp->id,
2624 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2625 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2626 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2627 
2628 	ldcp->next_vidx = 0;
2629 	ldcp->tstate = TS_IN_RESET;
2630 	ldcp->hstate = 0;
2631 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2632 	ldcp->last_ack_rcd = 0;
2633 	ldcp->last_msg_rcd = 0;
2634 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2635 
2636 	ldcp->stream_bufferp = NULL;
2637 	ldcp->exp_dring_list = NULL;
2638 	ldcp->imp_dring_list = NULL;
2639 	ldcp->mhdl_list = NULL;
2640 
2641 	ldcp->tx_intr_state = LDC_INTR_NONE;
2642 	ldcp->rx_intr_state = LDC_INTR_NONE;
2643 
2644 	/* Initialize payload size depending on whether channel is reliable */
2645 	switch (ldcp->mode) {
2646 	case LDC_MODE_RAW:
2647 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2648 		ldcp->read_p = i_ldc_read_raw;
2649 		ldcp->write_p = i_ldc_write_raw;
2650 		break;
2651 	case LDC_MODE_UNRELIABLE:
2652 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2653 		ldcp->read_p = i_ldc_read_packet;
2654 		ldcp->write_p = i_ldc_write_packet;
2655 		break;
2656 	case LDC_MODE_RELIABLE:
2657 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2658 
2659 		ldcp->stream_remains = 0;
2660 		ldcp->stream_offset = 0;
2661 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2662 		ldcp->read_p = i_ldc_read_stream;
2663 		ldcp->write_p = i_ldc_write_stream;
2664 		break;
2665 	default:
2666 		exit_val = EINVAL;
2667 		goto cleanup_on_exit;
2668 	}
2669 
2670 	/*
2671 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2672 	 * value is smaller than default length of ldc_queue_entries,
2673 	 * qlen is set to ldc_queue_entries. Ensure that computed
2674 	 * length is a power-of-two value.
2675 	 */
2676 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2677 	if (!ISP2(qlen)) {
2678 		uint64_t	tmp = 1;
2679 		while (qlen) {
2680 			qlen >>= 1; tmp <<= 1;
2681 		}
2682 		qlen = tmp;
2683 	}
2684 
2685 	ldcp->rx_q_entries =
2686 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2687 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2688 
2689 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2690 
2691 	/* Create a transmit queue */
2692 	ldcp->tx_q_va = (uint64_t)
2693 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2694 	if (ldcp->tx_q_va == NULL) {
2695 		cmn_err(CE_WARN,
2696 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2697 		    ldcp->id);
2698 		exit_val = ENOMEM;
2699 		goto cleanup_on_exit;
2700 	}
2701 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2702 
2703 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2704 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2705 
2706 	ldcp->tstate |= TS_TXQ_RDY;
2707 
2708 	/* Create a receive queue */
2709 	ldcp->rx_q_va = (uint64_t)
2710 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2711 	if (ldcp->rx_q_va == NULL) {
2712 		cmn_err(CE_WARN,
2713 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2714 		    ldcp->id);
2715 		exit_val = ENOMEM;
2716 		goto cleanup_on_exit;
2717 	}
2718 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2719 
2720 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2721 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2722 
2723 	ldcp->tstate |= TS_RXQ_RDY;
2724 
2725 	/* Setup a separate read data queue */
2726 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2727 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2728 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2729 
2730 		/* Make sure the data queue multiplier is a power of 2 */
2731 		if (!ISP2(ldc_rxdq_multiplier)) {
2732 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2733 			    "not a power of 2, resetting", ldcp->id);
2734 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2735 		}
2736 
2737 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2738 		ldcp->rx_dq_va = (uint64_t)
2739 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2740 		    KM_SLEEP);
2741 		if (ldcp->rx_dq_va == NULL) {
2742 			cmn_err(CE_WARN,
2743 			    "ldc_init: (0x%lx) RX data queue "
2744 			    "allocation failed\n", ldcp->id);
2745 			exit_val = ENOMEM;
2746 			goto cleanup_on_exit;
2747 		}
2748 
2749 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2750 
2751 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2752 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2753 		    ldcp->rx_dq_entries);
2754 	} else {
2755 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2756 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2757 	}
2758 
2759 	/* Init descriptor ring and memory handle list lock */
2760 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2761 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2762 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2763 
2764 	/* mark status as INITialized */
2765 	ldcp->status = LDC_INIT;
2766 
2767 	/* Add to channel list */
2768 	mutex_enter(&ldcssp->lock);
2769 	ldcp->next = ldcssp->chan_list;
2770 	ldcssp->chan_list = ldcp;
2771 	ldcssp->channel_count++;
2772 	mutex_exit(&ldcssp->lock);
2773 
2774 	/* set the handle */
2775 	*handle = (ldc_handle_t)ldcp;
2776 
2777 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2778 
2779 	return (0);
2780 
2781 cleanup_on_exit:
2782 
2783 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2784 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2785 
2786 	if (ldcp->tstate & TS_TXQ_RDY)
2787 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2788 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2789 
2790 	if (ldcp->tstate & TS_RXQ_RDY)
2791 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2792 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2793 
2794 	mutex_destroy(&ldcp->tx_lock);
2795 	mutex_destroy(&ldcp->lock);
2796 
2797 	if (ldcp)
2798 		kmem_free(ldcp, sizeof (ldc_chan_t));
2799 
2800 	return (exit_val);
2801 }
2802 
2803 /*
2804  * Finalizes the LDC connection. It will return EBUSY if the
2805  * channel is open. A ldc_close() has to be done prior to
2806  * a ldc_fini operation. It frees TX/RX queues, associated
2807  * with the channel
2808  */
2809 int
2810 ldc_fini(ldc_handle_t handle)
2811 {
2812 	ldc_chan_t 	*ldcp;
2813 	ldc_chan_t 	*tmp_ldcp;
2814 	uint64_t 	id;
2815 
2816 	if (handle == NULL) {
2817 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2818 		return (EINVAL);
2819 	}
2820 	ldcp = (ldc_chan_t *)handle;
2821 	id = ldcp->id;
2822 
2823 	mutex_enter(&ldcp->lock);
2824 
2825 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2826 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2827 		    ldcp->id);
2828 		mutex_exit(&ldcp->lock);
2829 		return (EBUSY);
2830 	}
2831 
2832 	/* Remove from the channel list */
2833 	mutex_enter(&ldcssp->lock);
2834 	tmp_ldcp = ldcssp->chan_list;
2835 	if (tmp_ldcp == ldcp) {
2836 		ldcssp->chan_list = ldcp->next;
2837 		ldcp->next = NULL;
2838 	} else {
2839 		while (tmp_ldcp != NULL) {
2840 			if (tmp_ldcp->next == ldcp) {
2841 				tmp_ldcp->next = ldcp->next;
2842 				ldcp->next = NULL;
2843 				break;
2844 			}
2845 			tmp_ldcp = tmp_ldcp->next;
2846 		}
2847 		if (tmp_ldcp == NULL) {
2848 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2849 			mutex_exit(&ldcssp->lock);
2850 			mutex_exit(&ldcp->lock);
2851 			return (EINVAL);
2852 		}
2853 	}
2854 
2855 	ldcssp->channel_count--;
2856 
2857 	mutex_exit(&ldcssp->lock);
2858 
2859 	/* Free the map table for this channel */
2860 	if (ldcp->mtbl) {
2861 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2862 		if (ldcp->mtbl->contigmem)
2863 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2864 		else
2865 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2866 		mutex_destroy(&ldcp->mtbl->lock);
2867 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2868 	}
2869 
2870 	/* Destroy descriptor ring and memory handle list lock */
2871 	mutex_destroy(&ldcp->exp_dlist_lock);
2872 	mutex_destroy(&ldcp->imp_dlist_lock);
2873 	mutex_destroy(&ldcp->mlist_lock);
2874 
2875 	/* Free the stream buffer for RELIABLE_MODE */
2876 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2877 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2878 
2879 	/* Free the RX queue */
2880 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2881 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2882 	ldcp->tstate &= ~TS_RXQ_RDY;
2883 
2884 	/* Free the RX data queue */
2885 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2886 		kmem_free((caddr_t)ldcp->rx_dq_va,
2887 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2888 	}
2889 
2890 	/* Free the TX queue */
2891 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2892 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2893 	ldcp->tstate &= ~TS_TXQ_RDY;
2894 
2895 	mutex_exit(&ldcp->lock);
2896 
2897 	/* Destroy mutex */
2898 	mutex_destroy(&ldcp->tx_lock);
2899 	mutex_destroy(&ldcp->lock);
2900 
2901 	/* free channel structure */
2902 	kmem_free(ldcp, sizeof (ldc_chan_t));
2903 
2904 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2905 
2906 	return (0);
2907 }
2908 
2909 /*
2910  * Open the LDC channel for use. It registers the TX/RX queues
2911  * with the Hypervisor. It also specifies the interrupt number
2912  * and target CPU for this channel
2913  */
2914 int
2915 ldc_open(ldc_handle_t handle)
2916 {
2917 	ldc_chan_t 	*ldcp;
2918 	int 		rv;
2919 
2920 	if (handle == NULL) {
2921 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2922 		return (EINVAL);
2923 	}
2924 
2925 	ldcp = (ldc_chan_t *)handle;
2926 
2927 	mutex_enter(&ldcp->lock);
2928 
2929 	if (ldcp->tstate < TS_INIT) {
2930 		DWARN(ldcp->id,
2931 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2932 		mutex_exit(&ldcp->lock);
2933 		return (EFAULT);
2934 	}
2935 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2936 		DWARN(ldcp->id,
2937 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2938 		mutex_exit(&ldcp->lock);
2939 		return (EFAULT);
2940 	}
2941 
2942 	/*
2943 	 * Unregister/Register the tx queue with the hypervisor
2944 	 */
2945 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2946 	if (rv) {
2947 		cmn_err(CE_WARN,
2948 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2949 		    ldcp->id);
2950 		mutex_exit(&ldcp->lock);
2951 		return (EIO);
2952 	}
2953 
2954 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2955 	if (rv) {
2956 		cmn_err(CE_WARN,
2957 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2958 		    ldcp->id);
2959 		mutex_exit(&ldcp->lock);
2960 		return (EIO);
2961 	}
2962 
2963 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2964 	    ldcp->id);
2965 
2966 	/*
2967 	 * Unregister/Register the rx queue with the hypervisor
2968 	 */
2969 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2970 	if (rv) {
2971 		cmn_err(CE_WARN,
2972 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2973 		    ldcp->id);
2974 		mutex_exit(&ldcp->lock);
2975 		return (EIO);
2976 	}
2977 
2978 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2979 	if (rv) {
2980 		cmn_err(CE_WARN,
2981 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2982 		    ldcp->id);
2983 		mutex_exit(&ldcp->lock);
2984 		return (EIO);
2985 	}
2986 
2987 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2988 	    ldcp->id);
2989 
2990 	ldcp->tstate |= TS_QCONF_RDY;
2991 
2992 	/* Register the channel with the channel nexus */
2993 	rv = i_ldc_register_channel(ldcp);
2994 	if (rv && rv != EAGAIN) {
2995 		cmn_err(CE_WARN,
2996 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2997 		ldcp->tstate &= ~TS_QCONF_RDY;
2998 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2999 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3000 		mutex_exit(&ldcp->lock);
3001 		return (EIO);
3002 	}
3003 
3004 	/* mark channel in OPEN state */
3005 	ldcp->status = LDC_OPEN;
3006 
3007 	/* Read channel state */
3008 	rv = hv_ldc_tx_get_state(ldcp->id,
3009 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3010 	if (rv) {
3011 		cmn_err(CE_WARN,
3012 		    "ldc_open: (0x%lx) cannot read channel state\n",
3013 		    ldcp->id);
3014 		(void) i_ldc_unregister_channel(ldcp);
3015 		ldcp->tstate &= ~TS_QCONF_RDY;
3016 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3017 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3018 		mutex_exit(&ldcp->lock);
3019 		return (EIO);
3020 	}
3021 
3022 	/*
3023 	 * set the ACKd head to current head location for reliable
3024 	 */
3025 	ldcp->tx_ackd_head = ldcp->tx_head;
3026 
3027 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
3028 	if (ldcp->link_state == LDC_CHANNEL_UP ||
3029 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3030 		ldcp->tstate |= TS_LINK_READY;
3031 		ldcp->status = LDC_READY;
3032 	}
3033 
3034 	/*
3035 	 * if channel is being opened in RAW mode - no handshake is needed
3036 	 * switch the channel READY and UP state
3037 	 */
3038 	if (ldcp->mode == LDC_MODE_RAW) {
3039 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
3040 		ldcp->status = LDC_UP;
3041 	}
3042 
3043 	mutex_exit(&ldcp->lock);
3044 
3045 	/*
3046 	 * Increment number of open channels
3047 	 */
3048 	mutex_enter(&ldcssp->lock);
3049 	ldcssp->channels_open++;
3050 	mutex_exit(&ldcssp->lock);
3051 
3052 	D1(ldcp->id,
3053 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3054 	    "(tstate=0x%x, status=0x%x)\n",
3055 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3056 
3057 	return (0);
3058 }
3059 
3060 /*
3061  * Close the LDC connection. It will return EBUSY if there
3062  * are memory segments or descriptor rings either bound to or
3063  * mapped over the channel
3064  */
3065 int
3066 ldc_close(ldc_handle_t handle)
3067 {
3068 	ldc_chan_t 	*ldcp;
3069 	int		rv = 0, retries = 0;
3070 	boolean_t	chk_done = B_FALSE;
3071 
3072 	if (handle == NULL) {
3073 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3074 		return (EINVAL);
3075 	}
3076 	ldcp = (ldc_chan_t *)handle;
3077 
3078 	mutex_enter(&ldcp->lock);
3079 
3080 	/* return error if channel is not open */
3081 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3082 		DWARN(ldcp->id,
3083 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3084 		mutex_exit(&ldcp->lock);
3085 		return (EFAULT);
3086 	}
3087 
3088 	/* if any memory handles, drings, are bound or mapped cannot close */
3089 	if (ldcp->mhdl_list != NULL) {
3090 		DWARN(ldcp->id,
3091 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3092 		    ldcp->id);
3093 		mutex_exit(&ldcp->lock);
3094 		return (EBUSY);
3095 	}
3096 	if (ldcp->exp_dring_list != NULL) {
3097 		DWARN(ldcp->id,
3098 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3099 		    ldcp->id);
3100 		mutex_exit(&ldcp->lock);
3101 		return (EBUSY);
3102 	}
3103 	if (ldcp->imp_dring_list != NULL) {
3104 		DWARN(ldcp->id,
3105 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3106 		    ldcp->id);
3107 		mutex_exit(&ldcp->lock);
3108 		return (EBUSY);
3109 	}
3110 
3111 	if (ldcp->cb_inprogress) {
3112 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3113 		    ldcp->id);
3114 		mutex_exit(&ldcp->lock);
3115 		return (EWOULDBLOCK);
3116 	}
3117 
3118 	/* Obtain Tx lock */
3119 	mutex_enter(&ldcp->tx_lock);
3120 
3121 	/*
3122 	 * Wait for pending transmits to complete i.e Tx queue to drain
3123 	 * if there are pending pkts - wait 1 ms and retry again
3124 	 */
3125 	for (;;) {
3126 
3127 		rv = hv_ldc_tx_get_state(ldcp->id,
3128 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3129 		if (rv) {
3130 			cmn_err(CE_WARN,
3131 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3132 			mutex_exit(&ldcp->tx_lock);
3133 			mutex_exit(&ldcp->lock);
3134 			return (EIO);
3135 		}
3136 
3137 		if (ldcp->tx_head == ldcp->tx_tail ||
3138 		    ldcp->link_state != LDC_CHANNEL_UP) {
3139 			break;
3140 		}
3141 
3142 		if (chk_done) {
3143 			DWARN(ldcp->id,
3144 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3145 			    ldcp->id);
3146 			break;
3147 		}
3148 
3149 		/* wait for one ms and try again */
3150 		delay(drv_usectohz(1000));
3151 		chk_done = B_TRUE;
3152 	}
3153 
3154 	/*
3155 	 * Drain the Tx and Rx queues as we are closing the
3156 	 * channel. We dont care about any pending packets.
3157 	 * We have to also drain the queue prior to clearing
3158 	 * pending interrupts, otherwise the HV will trigger
3159 	 * an interrupt the moment the interrupt state is
3160 	 * cleared.
3161 	 */
3162 	(void) i_ldc_txq_reconf(ldcp);
3163 	(void) i_ldc_rxq_drain(ldcp);
3164 
3165 	/*
3166 	 * Unregister the channel with the nexus
3167 	 */
3168 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3169 
3170 		mutex_exit(&ldcp->tx_lock);
3171 		mutex_exit(&ldcp->lock);
3172 
3173 		/* if any error other than EAGAIN return back */
3174 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3175 			cmn_err(CE_WARN,
3176 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3177 			    ldcp->id, rv);
3178 			return (rv);
3179 		}
3180 
3181 		/*
3182 		 * As there could be pending interrupts we need
3183 		 * to wait and try again
3184 		 */
3185 		drv_usecwait(ldc_close_delay);
3186 		mutex_enter(&ldcp->lock);
3187 		mutex_enter(&ldcp->tx_lock);
3188 		retries++;
3189 	}
3190 
3191 	ldcp->tstate &= ~TS_QCONF_RDY;
3192 
3193 	/*
3194 	 * Unregister queues
3195 	 */
3196 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3197 	if (rv) {
3198 		cmn_err(CE_WARN,
3199 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3200 		    ldcp->id);
3201 		mutex_exit(&ldcp->tx_lock);
3202 		mutex_exit(&ldcp->lock);
3203 		return (EIO);
3204 	}
3205 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3206 	if (rv) {
3207 		cmn_err(CE_WARN,
3208 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3209 		    ldcp->id);
3210 		mutex_exit(&ldcp->tx_lock);
3211 		mutex_exit(&ldcp->lock);
3212 		return (EIO);
3213 	}
3214 
3215 	/* Reset channel state information */
3216 	i_ldc_reset_state(ldcp);
3217 
3218 	/* Mark channel as down and in initialized state */
3219 	ldcp->tx_ackd_head = 0;
3220 	ldcp->tx_head = 0;
3221 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3222 	ldcp->status = LDC_INIT;
3223 
3224 	mutex_exit(&ldcp->tx_lock);
3225 	mutex_exit(&ldcp->lock);
3226 
3227 	/* Decrement number of open channels */
3228 	mutex_enter(&ldcssp->lock);
3229 	ldcssp->channels_open--;
3230 	mutex_exit(&ldcssp->lock);
3231 
3232 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3233 
3234 	return (0);
3235 }
3236 
3237 /*
3238  * Register channel callback
3239  */
3240 int
3241 ldc_reg_callback(ldc_handle_t handle,
3242     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3243 {
3244 	ldc_chan_t *ldcp;
3245 
3246 	if (handle == NULL) {
3247 		DWARN(DBG_ALL_LDCS,
3248 		    "ldc_reg_callback: invalid channel handle\n");
3249 		return (EINVAL);
3250 	}
3251 	if (((uint64_t)cb) < KERNELBASE) {
3252 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3253 		return (EINVAL);
3254 	}
3255 	ldcp = (ldc_chan_t *)handle;
3256 
3257 	mutex_enter(&ldcp->lock);
3258 
3259 	if (ldcp->cb) {
3260 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3261 		    ldcp->id);
3262 		mutex_exit(&ldcp->lock);
3263 		return (EIO);
3264 	}
3265 	if (ldcp->cb_inprogress) {
3266 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3267 		    ldcp->id);
3268 		mutex_exit(&ldcp->lock);
3269 		return (EWOULDBLOCK);
3270 	}
3271 
3272 	ldcp->cb = cb;
3273 	ldcp->cb_arg = arg;
3274 	ldcp->cb_enabled = B_TRUE;
3275 
3276 	D1(ldcp->id,
3277 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3278 	    ldcp->id);
3279 
3280 	mutex_exit(&ldcp->lock);
3281 
3282 	return (0);
3283 }
3284 
3285 /*
3286  * Unregister channel callback
3287  */
3288 int
3289 ldc_unreg_callback(ldc_handle_t handle)
3290 {
3291 	ldc_chan_t *ldcp;
3292 
3293 	if (handle == NULL) {
3294 		DWARN(DBG_ALL_LDCS,
3295 		    "ldc_unreg_callback: invalid channel handle\n");
3296 		return (EINVAL);
3297 	}
3298 	ldcp = (ldc_chan_t *)handle;
3299 
3300 	mutex_enter(&ldcp->lock);
3301 
3302 	if (ldcp->cb == NULL) {
3303 		DWARN(ldcp->id,
3304 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3305 		    ldcp->id);
3306 		mutex_exit(&ldcp->lock);
3307 		return (EIO);
3308 	}
3309 	if (ldcp->cb_inprogress) {
3310 		DWARN(ldcp->id,
3311 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3312 		    ldcp->id);
3313 		mutex_exit(&ldcp->lock);
3314 		return (EWOULDBLOCK);
3315 	}
3316 
3317 	ldcp->cb = NULL;
3318 	ldcp->cb_arg = NULL;
3319 	ldcp->cb_enabled = B_FALSE;
3320 
3321 	D1(ldcp->id,
3322 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3323 	    ldcp->id);
3324 
3325 	mutex_exit(&ldcp->lock);
3326 
3327 	return (0);
3328 }
3329 
3330 
3331 /*
3332  * Bring a channel up by initiating a handshake with the peer
3333  * This call is asynchronous. It will complete at a later point
3334  * in time when the peer responds back with an RTR.
3335  */
3336 int
3337 ldc_up(ldc_handle_t handle)
3338 {
3339 	int 		rv;
3340 	ldc_chan_t 	*ldcp;
3341 	ldc_msg_t 	*ldcmsg;
3342 	uint64_t 	tx_tail, tstate, link_state;
3343 
3344 	if (handle == NULL) {
3345 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3346 		return (EINVAL);
3347 	}
3348 	ldcp = (ldc_chan_t *)handle;
3349 
3350 	mutex_enter(&ldcp->lock);
3351 
3352 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3353 
3354 	/* clear the reset state */
3355 	tstate = ldcp->tstate;
3356 	ldcp->tstate &= ~TS_IN_RESET;
3357 
3358 	if (ldcp->tstate == TS_UP) {
3359 		DWARN(ldcp->id,
3360 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3361 		    ldcp->id);
3362 
3363 		/* mark channel as up */
3364 		ldcp->status = LDC_UP;
3365 
3366 		/*
3367 		 * if channel was in reset state and there was
3368 		 * pending data clear interrupt state. this will
3369 		 * trigger an interrupt, causing the RX handler to
3370 		 * to invoke the client's callback
3371 		 */
3372 		if ((tstate & TS_IN_RESET) &&
3373 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3374 			D1(ldcp->id,
3375 			    "ldc_up: (0x%llx) channel has pending data, "
3376 			    "clearing interrupt\n", ldcp->id);
3377 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3378 		}
3379 
3380 		mutex_exit(&ldcp->lock);
3381 		return (0);
3382 	}
3383 
3384 	/* if the channel is in RAW mode - mark it as UP, if READY */
3385 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3386 		ldcp->tstate = TS_UP;
3387 		mutex_exit(&ldcp->lock);
3388 		return (0);
3389 	}
3390 
3391 	/* Don't start another handshake if there is one in progress */
3392 	if (ldcp->hstate) {
3393 		D1(ldcp->id,
3394 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3395 		    ldcp->id);
3396 		mutex_exit(&ldcp->lock);
3397 		return (0);
3398 	}
3399 
3400 	mutex_enter(&ldcp->tx_lock);
3401 
3402 	/* save current link state */
3403 	link_state = ldcp->link_state;
3404 
3405 	/* get the current tail for the LDC msg */
3406 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3407 	if (rv) {
3408 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3409 		    ldcp->id);
3410 		mutex_exit(&ldcp->tx_lock);
3411 		mutex_exit(&ldcp->lock);
3412 		return (ECONNREFUSED);
3413 	}
3414 
3415 	/*
3416 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3417 	 * from a previous state of DOWN, then mark the channel as
3418 	 * being ready for handshake.
3419 	 */
3420 	if ((link_state == LDC_CHANNEL_DOWN) &&
3421 	    (link_state != ldcp->link_state)) {
3422 
3423 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3424 		    (ldcp->link_state == LDC_CHANNEL_UP));
3425 
3426 		if (ldcp->mode == LDC_MODE_RAW) {
3427 			ldcp->status = LDC_UP;
3428 			ldcp->tstate = TS_UP;
3429 			mutex_exit(&ldcp->tx_lock);
3430 			mutex_exit(&ldcp->lock);
3431 			return (0);
3432 		} else {
3433 			ldcp->status = LDC_READY;
3434 			ldcp->tstate |= TS_LINK_READY;
3435 		}
3436 
3437 	}
3438 
3439 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3440 	ZERO_PKT(ldcmsg);
3441 
3442 	ldcmsg->type = LDC_CTRL;
3443 	ldcmsg->stype = LDC_INFO;
3444 	ldcmsg->ctrl = LDC_VER;
3445 	ldcp->next_vidx = 0;
3446 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3447 
3448 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3449 
3450 	/* initiate the send by calling into HV and set the new tail */
3451 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3452 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3453 
3454 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3455 	if (rv) {
3456 		DWARN(ldcp->id,
3457 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3458 		    ldcp->id, rv);
3459 		mutex_exit(&ldcp->tx_lock);
3460 		mutex_exit(&ldcp->lock);
3461 		return (rv);
3462 	}
3463 
3464 	ldcp->hstate |= TS_SENT_VER;
3465 	ldcp->tx_tail = tx_tail;
3466 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3467 
3468 	mutex_exit(&ldcp->tx_lock);
3469 	mutex_exit(&ldcp->lock);
3470 
3471 	return (rv);
3472 }
3473 
3474 
3475 /*
3476  * Bring a channel down by resetting its state and queues
3477  */
3478 int
3479 ldc_down(ldc_handle_t handle)
3480 {
3481 	ldc_chan_t 	*ldcp;
3482 
3483 	if (handle == NULL) {
3484 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3485 		return (EINVAL);
3486 	}
3487 	ldcp = (ldc_chan_t *)handle;
3488 	mutex_enter(&ldcp->lock);
3489 	mutex_enter(&ldcp->tx_lock);
3490 	i_ldc_reset(ldcp, B_TRUE);
3491 	mutex_exit(&ldcp->tx_lock);
3492 	mutex_exit(&ldcp->lock);
3493 
3494 	return (0);
3495 }
3496 
3497 /*
3498  * Get the current channel status
3499  */
3500 int
3501 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3502 {
3503 	ldc_chan_t *ldcp;
3504 
3505 	if (handle == NULL || status == NULL) {
3506 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3507 		return (EINVAL);
3508 	}
3509 	ldcp = (ldc_chan_t *)handle;
3510 
3511 	*status = ((ldc_chan_t *)handle)->status;
3512 
3513 	D1(ldcp->id,
3514 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3515 	return (0);
3516 }
3517 
3518 
3519 /*
3520  * Set the channel's callback mode - enable/disable callbacks
3521  */
3522 int
3523 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3524 {
3525 	ldc_chan_t 	*ldcp;
3526 
3527 	if (handle == NULL) {
3528 		DWARN(DBG_ALL_LDCS,
3529 		    "ldc_set_intr_mode: invalid channel handle\n");
3530 		return (EINVAL);
3531 	}
3532 	ldcp = (ldc_chan_t *)handle;
3533 
3534 	/*
3535 	 * Record no callbacks should be invoked
3536 	 */
3537 	mutex_enter(&ldcp->lock);
3538 
3539 	switch (cmode) {
3540 	case LDC_CB_DISABLE:
3541 		if (!ldcp->cb_enabled) {
3542 			DWARN(ldcp->id,
3543 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3544 			    ldcp->id);
3545 			break;
3546 		}
3547 		ldcp->cb_enabled = B_FALSE;
3548 
3549 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3550 		    ldcp->id);
3551 		break;
3552 
3553 	case LDC_CB_ENABLE:
3554 		if (ldcp->cb_enabled) {
3555 			DWARN(ldcp->id,
3556 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3557 			    ldcp->id);
3558 			break;
3559 		}
3560 		ldcp->cb_enabled = B_TRUE;
3561 
3562 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3563 		    ldcp->id);
3564 		break;
3565 	}
3566 
3567 	mutex_exit(&ldcp->lock);
3568 
3569 	return (0);
3570 }
3571 
3572 /*
3573  * Check to see if there are packets on the incoming queue
3574  * Will return hasdata = B_FALSE if there are no packets
3575  */
3576 int
3577 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3578 {
3579 	int 		rv;
3580 	uint64_t 	rx_head, rx_tail;
3581 	ldc_chan_t 	*ldcp;
3582 
3583 	if (handle == NULL) {
3584 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3585 		return (EINVAL);
3586 	}
3587 	ldcp = (ldc_chan_t *)handle;
3588 
3589 	*hasdata = B_FALSE;
3590 
3591 	mutex_enter(&ldcp->lock);
3592 
3593 	if (ldcp->tstate != TS_UP) {
3594 		D1(ldcp->id,
3595 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3596 		mutex_exit(&ldcp->lock);
3597 		return (ECONNRESET);
3598 	}
3599 
3600 	/* Read packet(s) from the queue */
3601 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3602 	    &ldcp->link_state);
3603 	if (rv != 0) {
3604 		cmn_err(CE_WARN,
3605 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3606 		mutex_exit(&ldcp->lock);
3607 		return (EIO);
3608 	}
3609 
3610 	/* reset the channel state if the channel went down */
3611 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3612 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3613 		mutex_enter(&ldcp->tx_lock);
3614 		i_ldc_reset(ldcp, B_FALSE);
3615 		mutex_exit(&ldcp->tx_lock);
3616 		mutex_exit(&ldcp->lock);
3617 		return (ECONNRESET);
3618 	}
3619 
3620 	switch (ldcp->mode) {
3621 	case LDC_MODE_RAW:
3622 		/*
3623 		 * In raw mode, there are no ctrl packets, so checking
3624 		 * if the queue is non-empty is sufficient.
3625 		 */
3626 		*hasdata = (rx_head != rx_tail);
3627 		break;
3628 
3629 	case LDC_MODE_UNRELIABLE:
3630 		/*
3631 		 * In unreliable mode, if the queue is non-empty, we need
3632 		 * to check if it actually contains unread data packets.
3633 		 * The queue may just contain ctrl packets.
3634 		 */
3635 		if (rx_head != rx_tail) {
3636 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3637 			/*
3638 			 * If no data packets were found on the queue,
3639 			 * all packets must have been control packets
3640 			 * which will now have been processed, leaving
3641 			 * the queue empty. If the interrupt state
3642 			 * is pending, we need to clear the interrupt
3643 			 * here.
3644 			 */
3645 			if (*hasdata == B_FALSE &&
3646 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3647 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3648 			}
3649 		}
3650 		break;
3651 
3652 	case LDC_MODE_RELIABLE:
3653 		/*
3654 		 * In reliable mode, first check for 'stream_remains' > 0.
3655 		 * Otherwise, if the data queue head and tail pointers
3656 		 * differ, there must be data to read.
3657 		 */
3658 		if (ldcp->stream_remains > 0)
3659 			*hasdata = B_TRUE;
3660 		else
3661 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3662 		break;
3663 
3664 	default:
3665 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3666 		    "(0x%x)", ldcp->id, ldcp->mode);
3667 		mutex_exit(&ldcp->lock);
3668 		return (EIO);
3669 	}
3670 
3671 	mutex_exit(&ldcp->lock);
3672 
3673 	return (0);
3674 }
3675 
3676 
3677 /*
3678  * Read 'size' amount of bytes or less. If incoming buffer
3679  * is more than 'size', ENOBUFS is returned.
3680  *
3681  * On return, size contains the number of bytes read.
3682  */
3683 int
3684 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3685 {
3686 	ldc_chan_t 	*ldcp;
3687 	uint64_t 	rx_head = 0, rx_tail = 0;
3688 	int		rv = 0, exit_val;
3689 
3690 	if (handle == NULL) {
3691 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3692 		return (EINVAL);
3693 	}
3694 
3695 	ldcp = (ldc_chan_t *)handle;
3696 
3697 	/* channel lock */
3698 	mutex_enter(&ldcp->lock);
3699 
3700 	if (ldcp->tstate != TS_UP) {
3701 		DWARN(ldcp->id,
3702 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3703 		    ldcp->id);
3704 		exit_val = ECONNRESET;
3705 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3706 		TRACE_RXDQ_LENGTH(ldcp);
3707 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3708 
3709 		/*
3710 		 * For reliable mode channels, the interrupt
3711 		 * state is only set to pending during
3712 		 * interrupt handling when the secondary data
3713 		 * queue became full, leaving unprocessed
3714 		 * packets on the Rx queue. If the interrupt
3715 		 * state is pending and space is now available
3716 		 * on the data queue, clear the interrupt.
3717 		 */
3718 		if (ldcp->rx_intr_state == LDC_INTR_PEND &&
3719 		    Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
3720 		    ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >=
3721 		    LDC_PACKET_SIZE) {
3722 			/* data queue is not full */
3723 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3724 		}
3725 
3726 		mutex_exit(&ldcp->lock);
3727 		return (exit_val);
3728 	} else {
3729 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3730 	}
3731 
3732 	/*
3733 	 * if queue has been drained - clear interrupt
3734 	 */
3735 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3736 	    &ldcp->link_state);
3737 	if (rv != 0) {
3738 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3739 		    ldcp->id);
3740 		mutex_enter(&ldcp->tx_lock);
3741 		i_ldc_reset(ldcp, B_TRUE);
3742 		mutex_exit(&ldcp->tx_lock);
3743 		mutex_exit(&ldcp->lock);
3744 		return (ECONNRESET);
3745 	}
3746 
3747 	if (exit_val == 0) {
3748 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3749 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3750 			mutex_enter(&ldcp->tx_lock);
3751 			i_ldc_reset(ldcp, B_FALSE);
3752 			exit_val = ECONNRESET;
3753 			mutex_exit(&ldcp->tx_lock);
3754 		}
3755 		if ((rv == 0) &&
3756 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3757 		    (rx_head == rx_tail)) {
3758 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3759 		}
3760 	}
3761 
3762 	mutex_exit(&ldcp->lock);
3763 	return (exit_val);
3764 }
3765 
3766 /*
3767  * Basic raw mondo read -
3768  * no interpretation of mondo contents at all.
3769  *
3770  * Enter and exit with ldcp->lock held by caller
3771  */
3772 static int
3773 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3774 {
3775 	uint64_t 	q_size_mask;
3776 	ldc_msg_t 	*msgp;
3777 	uint8_t		*msgbufp;
3778 	int		rv = 0, space;
3779 	uint64_t 	rx_head, rx_tail;
3780 
3781 	space = *sizep;
3782 
3783 	if (space < LDC_PAYLOAD_SIZE_RAW)
3784 		return (ENOBUFS);
3785 
3786 	ASSERT(mutex_owned(&ldcp->lock));
3787 
3788 	/* compute mask for increment */
3789 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3790 
3791 	/*
3792 	 * Read packet(s) from the queue
3793 	 */
3794 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3795 	    &ldcp->link_state);
3796 	if (rv != 0) {
3797 		cmn_err(CE_WARN,
3798 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3799 		    ldcp->id);
3800 		return (EIO);
3801 	}
3802 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3803 	    " rxt=0x%llx, st=0x%llx\n",
3804 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3805 
3806 	/* reset the channel state if the channel went down */
3807 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3808 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3809 		mutex_enter(&ldcp->tx_lock);
3810 		i_ldc_reset(ldcp, B_FALSE);
3811 		mutex_exit(&ldcp->tx_lock);
3812 		return (ECONNRESET);
3813 	}
3814 
3815 	/*
3816 	 * Check for empty queue
3817 	 */
3818 	if (rx_head == rx_tail) {
3819 		*sizep = 0;
3820 		return (0);
3821 	}
3822 
3823 	/* get the message */
3824 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3825 
3826 	/* if channel is in RAW mode, copy data and return */
3827 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3828 
3829 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3830 
3831 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3832 
3833 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3834 
3835 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3836 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3837 
3838 	return (rv);
3839 }
3840 
3841 /*
3842  * Process LDC mondos to build larger packets
3843  * with either un-reliable or reliable delivery.
3844  *
3845  * Enter and exit with ldcp->lock held by caller
3846  */
3847 static int
3848 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3849 {
3850 	int		rv = 0;
3851 	uint64_t 	rx_head = 0, rx_tail = 0;
3852 	uint64_t 	curr_head = 0;
3853 	ldc_msg_t 	*msg;
3854 	caddr_t 	target;
3855 	size_t 		len = 0, bytes_read = 0;
3856 	int 		retries = 0;
3857 	uint64_t 	q_va, q_size_mask;
3858 	uint64_t	first_fragment = 0;
3859 
3860 	target = target_bufp;
3861 
3862 	ASSERT(mutex_owned(&ldcp->lock));
3863 
3864 	/* check if the buffer and size are valid */
3865 	if (target_bufp == NULL || *sizep == 0) {
3866 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3867 		    ldcp->id);
3868 		return (EINVAL);
3869 	}
3870 
3871 	/* Set q_va and compute increment mask for the appropriate queue */
3872 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3873 		q_va	    = ldcp->rx_dq_va;
3874 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3875 	} else {
3876 		q_va	    = ldcp->rx_q_va;
3877 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3878 	}
3879 
3880 	/*
3881 	 * Read packet(s) from the queue
3882 	 */
3883 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3884 	    &ldcp->link_state);
3885 	if (rv != 0) {
3886 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3887 		    ldcp->id);
3888 		mutex_enter(&ldcp->tx_lock);
3889 		i_ldc_reset(ldcp, B_TRUE);
3890 		mutex_exit(&ldcp->tx_lock);
3891 		return (ECONNRESET);
3892 	}
3893 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3894 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3895 
3896 	/* reset the channel state if the channel went down */
3897 	if (ldcp->link_state != LDC_CHANNEL_UP)
3898 		goto channel_is_reset;
3899 
3900 	for (;;) {
3901 
3902 		if (curr_head == rx_tail) {
3903 			/*
3904 			 * If a data queue is being used, check the Rx HV
3905 			 * queue. This will copy over any new data packets
3906 			 * that have arrived.
3907 			 */
3908 			if (ldcp->mode == LDC_MODE_RELIABLE)
3909 				(void) i_ldc_chkq(ldcp);
3910 
3911 			rv = ldcp->readq_get_state(ldcp,
3912 			    &rx_head, &rx_tail, &ldcp->link_state);
3913 			if (rv != 0) {
3914 				cmn_err(CE_WARN,
3915 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3916 				    ldcp->id);
3917 				mutex_enter(&ldcp->tx_lock);
3918 				i_ldc_reset(ldcp, B_TRUE);
3919 				mutex_exit(&ldcp->tx_lock);
3920 				return (ECONNRESET);
3921 			}
3922 
3923 			if (ldcp->link_state != LDC_CHANNEL_UP)
3924 				goto channel_is_reset;
3925 
3926 			if (curr_head == rx_tail) {
3927 
3928 				/* If in the middle of a fragmented xfer */
3929 				if (first_fragment != 0) {
3930 
3931 					/* wait for ldc_delay usecs */
3932 					drv_usecwait(ldc_delay);
3933 
3934 					if (++retries < ldc_max_retries)
3935 						continue;
3936 
3937 					*sizep = 0;
3938 					if (ldcp->mode != LDC_MODE_RELIABLE)
3939 						ldcp->last_msg_rcd =
3940 						    first_fragment - 1;
3941 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3942 					    "(0x%llx) read timeout", ldcp->id);
3943 					return (EAGAIN);
3944 				}
3945 				*sizep = 0;
3946 				break;
3947 			}
3948 		}
3949 		retries = 0;
3950 
3951 		D2(ldcp->id,
3952 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3953 		    ldcp->id, curr_head, rx_head, rx_tail);
3954 
3955 		/* get the message */
3956 		msg = (ldc_msg_t *)(q_va + curr_head);
3957 
3958 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3959 		    ldcp->rx_q_va + curr_head);
3960 
3961 		/* Check the message ID for the message received */
3962 		if (ldcp->mode != LDC_MODE_RELIABLE) {
3963 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3964 
3965 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3966 				    "error, q_ptrs=0x%lx,0x%lx",
3967 				    ldcp->id, rx_head, rx_tail);
3968 
3969 				/* throw away data */
3970 				bytes_read = 0;
3971 
3972 				/* Reset last_msg_rcd to start of message */
3973 				if (first_fragment != 0) {
3974 					ldcp->last_msg_rcd = first_fragment - 1;
3975 					first_fragment = 0;
3976 				}
3977 				/*
3978 				 * Send a NACK -- invalid seqid
3979 				 * get the current tail for the response
3980 				 */
3981 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3982 				    (msg->ctrl & LDC_CTRL_MASK));
3983 				if (rv) {
3984 					cmn_err(CE_NOTE,
3985 					    "ldc_read: (0x%lx) err sending "
3986 					    "NACK msg\n", ldcp->id);
3987 
3988 					/* if cannot send NACK - reset chan */
3989 					mutex_enter(&ldcp->tx_lock);
3990 					i_ldc_reset(ldcp, B_FALSE);
3991 					mutex_exit(&ldcp->tx_lock);
3992 					rv = ECONNRESET;
3993 					break;
3994 				}
3995 
3996 				/* purge receive queue */
3997 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3998 
3999 				break;
4000 			}
4001 
4002 			/*
4003 			 * Process any messages of type CTRL messages
4004 			 * Future implementations should try to pass these
4005 			 * to LDC link by resetting the intr state.
4006 			 *
4007 			 * NOTE: not done as a switch() as type can be
4008 			 * both ctrl+data
4009 			 */
4010 			if (msg->type & LDC_CTRL) {
4011 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
4012 					if (rv == EAGAIN)
4013 						continue;
4014 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
4015 					*sizep = 0;
4016 					bytes_read = 0;
4017 					break;
4018 				}
4019 			}
4020 
4021 			/* process data ACKs */
4022 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
4023 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
4024 					*sizep = 0;
4025 					bytes_read = 0;
4026 					break;
4027 				}
4028 			}
4029 
4030 			/* process data NACKs */
4031 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
4032 				DWARN(ldcp->id,
4033 				    "ldc_read: (0x%llx) received DATA/NACK",
4034 				    ldcp->id);
4035 				mutex_enter(&ldcp->tx_lock);
4036 				i_ldc_reset(ldcp, B_TRUE);
4037 				mutex_exit(&ldcp->tx_lock);
4038 				return (ECONNRESET);
4039 			}
4040 		}
4041 
4042 		/* process data messages */
4043 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
4044 
4045 			uint8_t *msgbuf = (uint8_t *)(
4046 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
4047 			    msg->rdata : msg->udata);
4048 
4049 			D2(ldcp->id,
4050 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
4051 
4052 			/* get the packet length */
4053 			len = (msg->env & LDC_LEN_MASK);
4054 
4055 				/*
4056 				 * FUTURE OPTIMIZATION:
4057 				 * dont need to set q head for every
4058 				 * packet we read just need to do this when
4059 				 * we are done or need to wait for more
4060 				 * mondos to make a full packet - this is
4061 				 * currently expensive.
4062 				 */
4063 
4064 			if (first_fragment == 0) {
4065 
4066 				/*
4067 				 * first packets should always have the start
4068 				 * bit set (even for a single packet). If not
4069 				 * throw away the packet
4070 				 */
4071 				if (!(msg->env & LDC_FRAG_START)) {
4072 
4073 					DWARN(DBG_ALL_LDCS,
4074 					    "ldc_read: (0x%llx) not start - "
4075 					    "frag=%x\n", ldcp->id,
4076 					    (msg->env) & LDC_FRAG_MASK);
4077 
4078 					/* toss pkt, inc head, cont reading */
4079 					bytes_read = 0;
4080 					target = target_bufp;
4081 					curr_head =
4082 					    (curr_head + LDC_PACKET_SIZE)
4083 					    & q_size_mask;
4084 					if (rv = ldcp->readq_set_head(ldcp,
4085 					    curr_head))
4086 						break;
4087 
4088 					continue;
4089 				}
4090 
4091 				first_fragment = msg->seqid;
4092 			} else {
4093 				/* check to see if this is a pkt w/ START bit */
4094 				if (msg->env & LDC_FRAG_START) {
4095 					DWARN(DBG_ALL_LDCS,
4096 					    "ldc_read:(0x%llx) unexpected pkt"
4097 					    " env=0x%x discarding %d bytes,"
4098 					    " lastmsg=%d, currentmsg=%d\n",
4099 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4100 					    bytes_read, ldcp->last_msg_rcd,
4101 					    msg->seqid);
4102 
4103 					/* throw data we have read so far */
4104 					bytes_read = 0;
4105 					target = target_bufp;
4106 					first_fragment = msg->seqid;
4107 
4108 					if (rv = ldcp->readq_set_head(ldcp,
4109 					    curr_head))
4110 						break;
4111 				}
4112 			}
4113 
4114 			/* copy (next) pkt into buffer */
4115 			if (len <= (*sizep - bytes_read)) {
4116 				bcopy(msgbuf, target, len);
4117 				target += len;
4118 				bytes_read += len;
4119 			} else {
4120 				/*
4121 				 * there is not enough space in the buffer to
4122 				 * read this pkt. throw message away & continue
4123 				 * reading data from queue
4124 				 */
4125 				DWARN(DBG_ALL_LDCS,
4126 				    "ldc_read: (0x%llx) buffer too small, "
4127 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4128 				    curr_head, *sizep, bytes_read+len);
4129 
4130 				first_fragment = 0;
4131 				target = target_bufp;
4132 				bytes_read = 0;
4133 
4134 				/* throw away everything received so far */
4135 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4136 					break;
4137 
4138 				/* continue reading remaining pkts */
4139 				continue;
4140 			}
4141 		}
4142 
4143 		/* set the message id */
4144 		if (ldcp->mode != LDC_MODE_RELIABLE)
4145 			ldcp->last_msg_rcd = msg->seqid;
4146 
4147 		/* move the head one position */
4148 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4149 
4150 		if (msg->env & LDC_FRAG_STOP) {
4151 
4152 			/*
4153 			 * All pkts that are part of this fragmented transfer
4154 			 * have been read or this was a single pkt read
4155 			 * or there was an error
4156 			 */
4157 
4158 			/* set the queue head */
4159 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4160 				bytes_read = 0;
4161 
4162 			*sizep = bytes_read;
4163 
4164 			break;
4165 		}
4166 
4167 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4168 		if ((msg->type & LDC_CTRL) ||
4169 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4170 
4171 			/* set the queue head */
4172 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4173 				bytes_read = 0;
4174 				break;
4175 			}
4176 
4177 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4178 			    ldcp->id, curr_head);
4179 		}
4180 
4181 	} /* for (;;) */
4182 
4183 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4184 
4185 	return (rv);
4186 
4187 channel_is_reset:
4188 	mutex_enter(&ldcp->tx_lock);
4189 	i_ldc_reset(ldcp, B_FALSE);
4190 	mutex_exit(&ldcp->tx_lock);
4191 	return (ECONNRESET);
4192 }
4193 
4194 /*
4195  * Fetch and buffer incoming packets so we can hand them back as
4196  * a basic byte stream.
4197  *
4198  * Enter and exit with ldcp->lock held by caller
4199  */
4200 static int
4201 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4202 {
4203 	int	rv;
4204 	size_t	size;
4205 
4206 	ASSERT(mutex_owned(&ldcp->lock));
4207 
4208 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4209 	    ldcp->id, *sizep);
4210 
4211 	if (ldcp->stream_remains == 0) {
4212 		size = ldcp->mtu;
4213 		rv = i_ldc_read_packet(ldcp,
4214 		    (caddr_t)ldcp->stream_bufferp, &size);
4215 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4216 		    ldcp->id, size);
4217 
4218 		if (rv != 0)
4219 			return (rv);
4220 
4221 		ldcp->stream_remains = size;
4222 		ldcp->stream_offset = 0;
4223 	}
4224 
4225 	size = MIN(ldcp->stream_remains, *sizep);
4226 
4227 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4228 	ldcp->stream_offset += size;
4229 	ldcp->stream_remains -= size;
4230 
4231 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4232 	    ldcp->id, size);
4233 
4234 	*sizep = size;
4235 	return (0);
4236 }
4237 
4238 /*
4239  * Write specified amount of bytes to the channel
4240  * in multiple pkts of pkt_payload size. Each
4241  * packet is tagged with an unique packet ID in
4242  * the case of a reliable link.
4243  *
4244  * On return, size contains the number of bytes written.
4245  */
4246 int
4247 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4248 {
4249 	ldc_chan_t	*ldcp;
4250 	int		rv = 0;
4251 
4252 	if (handle == NULL) {
4253 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4254 		return (EINVAL);
4255 	}
4256 	ldcp = (ldc_chan_t *)handle;
4257 
4258 	/* check if writes can occur */
4259 	if (!mutex_tryenter(&ldcp->tx_lock)) {
4260 		/*
4261 		 * Could not get the lock - channel could
4262 		 * be in the process of being unconfigured
4263 		 * or reader has encountered an error
4264 		 */
4265 		return (EAGAIN);
4266 	}
4267 
4268 	/* check if non-zero data to write */
4269 	if (buf == NULL || sizep == NULL) {
4270 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4271 		    ldcp->id);
4272 		mutex_exit(&ldcp->tx_lock);
4273 		return (EINVAL);
4274 	}
4275 
4276 	if (*sizep == 0) {
4277 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4278 		    ldcp->id);
4279 		mutex_exit(&ldcp->tx_lock);
4280 		return (0);
4281 	}
4282 
4283 	/* Check if channel is UP for data exchange */
4284 	if (ldcp->tstate != TS_UP) {
4285 		DWARN(ldcp->id,
4286 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4287 		    ldcp->id);
4288 		*sizep = 0;
4289 		rv = ECONNRESET;
4290 	} else {
4291 		rv = ldcp->write_p(ldcp, buf, sizep);
4292 	}
4293 
4294 	mutex_exit(&ldcp->tx_lock);
4295 
4296 	return (rv);
4297 }
4298 
4299 /*
4300  * Write a raw packet to the channel
4301  * On return, size contains the number of bytes written.
4302  */
4303 static int
4304 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4305 {
4306 	ldc_msg_t 	*ldcmsg;
4307 	uint64_t 	tx_head, tx_tail, new_tail;
4308 	int		rv = 0;
4309 	size_t		size;
4310 
4311 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4312 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4313 
4314 	size = *sizep;
4315 
4316 	/*
4317 	 * Check to see if the packet size is less than or
4318 	 * equal to packet size support in raw mode
4319 	 */
4320 	if (size > ldcp->pkt_payload) {
4321 		DWARN(ldcp->id,
4322 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4323 		    ldcp->id, *sizep);
4324 		*sizep = 0;
4325 		return (EMSGSIZE);
4326 	}
4327 
4328 	/* get the qptrs for the tx queue */
4329 	rv = hv_ldc_tx_get_state(ldcp->id,
4330 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4331 	if (rv != 0) {
4332 		cmn_err(CE_WARN,
4333 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4334 		*sizep = 0;
4335 		return (EIO);
4336 	}
4337 
4338 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4339 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4340 		DWARN(ldcp->id,
4341 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4342 
4343 		*sizep = 0;
4344 		if (mutex_tryenter(&ldcp->lock)) {
4345 			i_ldc_reset(ldcp, B_FALSE);
4346 			mutex_exit(&ldcp->lock);
4347 		} else {
4348 			/*
4349 			 * Release Tx lock, and then reacquire channel
4350 			 * and Tx lock in correct order
4351 			 */
4352 			mutex_exit(&ldcp->tx_lock);
4353 			mutex_enter(&ldcp->lock);
4354 			mutex_enter(&ldcp->tx_lock);
4355 			i_ldc_reset(ldcp, B_FALSE);
4356 			mutex_exit(&ldcp->lock);
4357 		}
4358 		return (ECONNRESET);
4359 	}
4360 
4361 	tx_tail = ldcp->tx_tail;
4362 	tx_head = ldcp->tx_head;
4363 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4364 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4365 
4366 	if (new_tail == tx_head) {
4367 		DWARN(DBG_ALL_LDCS,
4368 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4369 		*sizep = 0;
4370 		return (EWOULDBLOCK);
4371 	}
4372 
4373 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4374 	    ldcp->id, size);
4375 
4376 	/* Send the data now */
4377 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4378 
4379 	/* copy the data into pkt */
4380 	bcopy((uint8_t *)buf, ldcmsg, size);
4381 
4382 	/* increment tail */
4383 	tx_tail = new_tail;
4384 
4385 	/*
4386 	 * All packets have been copied into the TX queue
4387 	 * update the tail ptr in the HV
4388 	 */
4389 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4390 	if (rv) {
4391 		if (rv == EWOULDBLOCK) {
4392 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4393 			    ldcp->id);
4394 			*sizep = 0;
4395 			return (EWOULDBLOCK);
4396 		}
4397 
4398 		*sizep = 0;
4399 		if (mutex_tryenter(&ldcp->lock)) {
4400 			i_ldc_reset(ldcp, B_FALSE);
4401 			mutex_exit(&ldcp->lock);
4402 		} else {
4403 			/*
4404 			 * Release Tx lock, and then reacquire channel
4405 			 * and Tx lock in correct order
4406 			 */
4407 			mutex_exit(&ldcp->tx_lock);
4408 			mutex_enter(&ldcp->lock);
4409 			mutex_enter(&ldcp->tx_lock);
4410 			i_ldc_reset(ldcp, B_FALSE);
4411 			mutex_exit(&ldcp->lock);
4412 		}
4413 		return (ECONNRESET);
4414 	}
4415 
4416 	ldcp->tx_tail = tx_tail;
4417 	*sizep = size;
4418 
4419 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4420 
4421 	return (rv);
4422 }
4423 
4424 
4425 /*
4426  * Write specified amount of bytes to the channel
4427  * in multiple pkts of pkt_payload size. Each
4428  * packet is tagged with an unique packet ID in
4429  * the case of a reliable link.
4430  *
4431  * On return, size contains the number of bytes written.
4432  * This function needs to ensure that the write size is < MTU size
4433  */
4434 static int
4435 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4436 {
4437 	ldc_msg_t 	*ldcmsg;
4438 	uint64_t 	tx_head, tx_tail, new_tail, start;
4439 	uint64_t	txq_size_mask, numavail;
4440 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4441 	size_t 		len, bytes_written = 0, remaining;
4442 	int		rv;
4443 	uint32_t	curr_seqid;
4444 
4445 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4446 
4447 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4448 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4449 
4450 	/* compute mask for increment */
4451 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4452 
4453 	/* get the qptrs for the tx queue */
4454 	rv = hv_ldc_tx_get_state(ldcp->id,
4455 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4456 	if (rv != 0) {
4457 		cmn_err(CE_WARN,
4458 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4459 		*size = 0;
4460 		return (EIO);
4461 	}
4462 
4463 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4464 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4465 		DWARN(ldcp->id,
4466 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4467 		*size = 0;
4468 		if (mutex_tryenter(&ldcp->lock)) {
4469 			i_ldc_reset(ldcp, B_FALSE);
4470 			mutex_exit(&ldcp->lock);
4471 		} else {
4472 			/*
4473 			 * Release Tx lock, and then reacquire channel
4474 			 * and Tx lock in correct order
4475 			 */
4476 			mutex_exit(&ldcp->tx_lock);
4477 			mutex_enter(&ldcp->lock);
4478 			mutex_enter(&ldcp->tx_lock);
4479 			i_ldc_reset(ldcp, B_FALSE);
4480 			mutex_exit(&ldcp->lock);
4481 		}
4482 		return (ECONNRESET);
4483 	}
4484 
4485 	tx_tail = ldcp->tx_tail;
4486 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4487 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4488 
4489 	/*
4490 	 * Check to see if the queue is full. The check is done using
4491 	 * the appropriate head based on the link mode.
4492 	 */
4493 	i_ldc_get_tx_head(ldcp, &tx_head);
4494 
4495 	if (new_tail == tx_head) {
4496 		DWARN(DBG_ALL_LDCS,
4497 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4498 		*size = 0;
4499 		return (EWOULDBLOCK);
4500 	}
4501 
4502 	/*
4503 	 * Make sure that the LDC Tx queue has enough space
4504 	 */
4505 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4506 	    + ldcp->tx_q_entries - 1;
4507 	numavail %= ldcp->tx_q_entries;
4508 
4509 	if (*size > (numavail * ldcp->pkt_payload)) {
4510 		DWARN(DBG_ALL_LDCS,
4511 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4512 		return (EWOULDBLOCK);
4513 	}
4514 
4515 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4516 	    ldcp->id, *size);
4517 
4518 	/* Send the data now */
4519 	bytes_written = 0;
4520 	curr_seqid = ldcp->last_msg_snt;
4521 	start = tx_tail;
4522 
4523 	while (*size > bytes_written) {
4524 
4525 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4526 
4527 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4528 		    ldcmsg->rdata : ldcmsg->udata);
4529 
4530 		ldcmsg->type = LDC_DATA;
4531 		ldcmsg->stype = LDC_INFO;
4532 		ldcmsg->ctrl = 0;
4533 
4534 		remaining = *size - bytes_written;
4535 		len = min(ldcp->pkt_payload, remaining);
4536 		ldcmsg->env = (uint8_t)len;
4537 
4538 		curr_seqid++;
4539 		ldcmsg->seqid = curr_seqid;
4540 
4541 		/* copy the data into pkt */
4542 		bcopy(source, msgbuf, len);
4543 
4544 		source += len;
4545 		bytes_written += len;
4546 
4547 		/* increment tail */
4548 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4549 
4550 		ASSERT(tx_tail != tx_head);
4551 	}
4552 
4553 	/* Set the start and stop bits */
4554 	ldcmsg->env |= LDC_FRAG_STOP;
4555 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4556 	ldcmsg->env |= LDC_FRAG_START;
4557 
4558 	/*
4559 	 * All packets have been copied into the TX queue
4560 	 * update the tail ptr in the HV
4561 	 */
4562 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4563 	if (rv == 0) {
4564 		ldcp->tx_tail = tx_tail;
4565 		ldcp->last_msg_snt = curr_seqid;
4566 		*size = bytes_written;
4567 	} else {
4568 		int rv2;
4569 
4570 		if (rv != EWOULDBLOCK) {
4571 			*size = 0;
4572 			if (mutex_tryenter(&ldcp->lock)) {
4573 				i_ldc_reset(ldcp, B_FALSE);
4574 				mutex_exit(&ldcp->lock);
4575 			} else {
4576 				/*
4577 				 * Release Tx lock, and then reacquire channel
4578 				 * and Tx lock in correct order
4579 				 */
4580 				mutex_exit(&ldcp->tx_lock);
4581 				mutex_enter(&ldcp->lock);
4582 				mutex_enter(&ldcp->tx_lock);
4583 				i_ldc_reset(ldcp, B_FALSE);
4584 				mutex_exit(&ldcp->lock);
4585 			}
4586 			return (ECONNRESET);
4587 		}
4588 
4589 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4590 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4591 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4592 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4593 
4594 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4595 		    &tx_head, &tx_tail, &ldcp->link_state);
4596 
4597 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4598 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4599 		    rv2, tx_head, tx_tail, ldcp->link_state);
4600 
4601 		*size = 0;
4602 	}
4603 
4604 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4605 
4606 	return (rv);
4607 }
4608 
4609 /*
4610  * Write specified amount of bytes to the channel
4611  * in multiple pkts of pkt_payload size. Each
4612  * packet is tagged with an unique packet ID in
4613  * the case of a reliable link.
4614  *
4615  * On return, size contains the number of bytes written.
4616  * This function needs to ensure that the write size is < MTU size
4617  */
4618 static int
4619 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4620 {
4621 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4622 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4623 
4624 	/* Truncate packet to max of MTU size */
4625 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4626 	return (i_ldc_write_packet(ldcp, buf, sizep));
4627 }
4628 
4629 
4630 /*
4631  * Interfaces for channel nexus to register/unregister with LDC module
4632  * The nexus will register functions to be used to register individual
4633  * channels with the nexus and enable interrupts for the channels
4634  */
4635 int
4636 ldc_register(ldc_cnex_t *cinfo)
4637 {
4638 	ldc_chan_t	*ldcp;
4639 
4640 	if (cinfo == NULL || cinfo->dip == NULL ||
4641 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4642 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4643 	    cinfo->clr_intr == NULL) {
4644 
4645 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4646 		return (EINVAL);
4647 	}
4648 
4649 	mutex_enter(&ldcssp->lock);
4650 
4651 	/* nexus registration */
4652 	ldcssp->cinfo.dip = cinfo->dip;
4653 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4654 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4655 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4656 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4657 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4658 
4659 	/* register any channels that might have been previously initialized */
4660 	ldcp = ldcssp->chan_list;
4661 	while (ldcp) {
4662 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4663 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4664 			(void) i_ldc_register_channel(ldcp);
4665 
4666 		ldcp = ldcp->next;
4667 	}
4668 
4669 	mutex_exit(&ldcssp->lock);
4670 
4671 	return (0);
4672 }
4673 
4674 int
4675 ldc_unregister(ldc_cnex_t *cinfo)
4676 {
4677 	if (cinfo == NULL || cinfo->dip == NULL) {
4678 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4679 		return (EINVAL);
4680 	}
4681 
4682 	mutex_enter(&ldcssp->lock);
4683 
4684 	if (cinfo->dip != ldcssp->cinfo.dip) {
4685 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4686 		mutex_exit(&ldcssp->lock);
4687 		return (EINVAL);
4688 	}
4689 
4690 	/* nexus unregister */
4691 	ldcssp->cinfo.dip = NULL;
4692 	ldcssp->cinfo.reg_chan = NULL;
4693 	ldcssp->cinfo.unreg_chan = NULL;
4694 	ldcssp->cinfo.add_intr = NULL;
4695 	ldcssp->cinfo.rem_intr = NULL;
4696 	ldcssp->cinfo.clr_intr = NULL;
4697 
4698 	mutex_exit(&ldcssp->lock);
4699 
4700 	return (0);
4701 }
4702