xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision 64e4e50ab4bc3670a29e5691e3dd935c94f0a5d7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * sun4v LDC Link Layer
28  */
29 #include <sys/types.h>
30 #include <sys/file.h>
31 #include <sys/errno.h>
32 #include <sys/open.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/cmn_err.h>
37 #include <sys/ksynch.h>
38 #include <sys/modctl.h>
39 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
40 #include <sys/debug.h>
41 #include <sys/cred.h>
42 #include <sys/promif.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/cyclic.h>
46 #include <sys/machsystm.h>
47 #include <sys/vm.h>
48 #include <sys/cpu.h>
49 #include <sys/intreg.h>
50 #include <sys/machcpuvar.h>
51 #include <sys/mmu.h>
52 #include <sys/pte.h>
53 #include <vm/hat.h>
54 #include <vm/as.h>
55 #include <vm/hat_sfmmu.h>
56 #include <sys/vm_machparam.h>
57 #include <vm/seg_kmem.h>
58 #include <vm/seg_kpm.h>
59 #include <sys/note.h>
60 #include <sys/ivintr.h>
61 #include <sys/hypervisor_api.h>
62 #include <sys/ldc.h>
63 #include <sys/ldc_impl.h>
64 #include <sys/cnex.h>
65 #include <sys/hsvc.h>
66 #include <sys/sdt.h>
67 #include <sys/kldc.h>
68 
69 /* Core internal functions */
70 int i_ldc_h2v_error(int h_error);
71 void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
72 
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static void i_ldc_rxq_drain(ldc_chan_t *ldcp);
76 static void i_ldc_reset_state(ldc_chan_t *ldcp);
77 static void i_ldc_debug_enter(void);
78 
79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
80 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
81 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
82 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
83 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
84     uint8_t ctrlmsg);
85 
86 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
87 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
88 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
89     uint64_t *tail, uint64_t *link_state);
90 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
91     uint64_t *tail, uint64_t *link_state);
92 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
93     uint64_t rx_tail);
94 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
95 
96 /* Interrupt handling functions */
97 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
98 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
100     uint64_t *notify_event);
101 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
102 
103 /* Read method functions */
104 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
105 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
106 	size_t *sizep);
107 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
108 	size_t *sizep);
109 
110 /* Write method functions */
111 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
112 	size_t *sizep);
113 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
114 	size_t *sizep);
115 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
116 	size_t *sizep);
117 
118 /* Pkt processing internal functions */
119 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
120 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
122 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 
127 /* Imported functions */
128 extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor);
129 extern void i_ldc_init_mapin(ldc_soft_state_t *ldcssp, uint64_t major,
130 	uint64_t minor);
131 
132 /* LDC Version */
133 static ldc_ver_t ldc_versions[] = { {1, 0} };
134 
135 /* number of supported versions */
136 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
137 
138 /* Invalid value for the ldc_chan_t rx_ack_head field */
139 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
140 
141 
142 /* Module State Pointer */
143 ldc_soft_state_t *ldcssp;
144 
145 static struct modldrv md = {
146 	&mod_miscops,			/* This is a misc module */
147 	"sun4v LDC module",		/* Name of the module */
148 };
149 
150 static struct modlinkage ml = {
151 	MODREV_1,
152 	&md,
153 	NULL
154 };
155 
156 static uint64_t ldc_sup_minor;		/* Supported minor number */
157 static hsvc_info_t ldc_hsvc = {
158 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 2, "ldc"
159 };
160 
161 /*
162  * The no. of MTU size messages that can be stored in
163  * the LDC Tx queue. The number of Tx queue entries is
164  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
165  */
166 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
167 
168 /*
169  * The minimum queue length. This is the size of the smallest
170  * LDC queue. If the computed value is less than this default,
171  * the queue length is rounded up to 'ldc_queue_entries'.
172  */
173 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
174 
175 /*
176  * The length of the reliable-mode data queue in terms of the LDC
177  * receive queue length. i.e., the number of times larger than the
178  * LDC receive queue that the data queue should be. The HV receive
179  * queue is required to be a power of 2 and this implementation
180  * assumes the data queue will also be a power of 2. By making the
181  * multiplier a power of 2, we ensure the data queue will be a
182  * power of 2. We use a multiplier because the receive queue is
183  * sized to be sane relative to the MTU and the same is needed for
184  * the data queue.
185  */
186 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
187 
188 /*
189  * LDC retry count and delay - when the HV returns EWOULDBLOCK
190  * the operation is retried 'ldc_max_retries' times with a
191  * wait of 'ldc_delay' usecs between each retry.
192  */
193 int ldc_max_retries = LDC_MAX_RETRIES;
194 clock_t ldc_delay = LDC_DELAY;
195 
196 /*
197  * Channels which have a devclass satisfying the following
198  * will be reset when entering the prom or kmdb.
199  *
200  *   LDC_DEVCLASS_PROM_RESET(devclass) != 0
201  *
202  * By default, only block device service channels are reset.
203  */
204 #define	LDC_DEVCLASS_BIT(dc)		(0x1 << (dc))
205 #define	LDC_DEVCLASS_PROM_RESET(dc)	\
206 	(LDC_DEVCLASS_BIT(dc) & ldc_debug_reset_mask)
207 static uint64_t ldc_debug_reset_mask = LDC_DEVCLASS_BIT(LDC_DEV_BLK_SVC) |
208     LDC_DEVCLASS_BIT(LDC_DEV_GENERIC);
209 
210 /*
211  * delay between each retry of channel unregistration in
212  * ldc_close(), to wait for pending interrupts to complete.
213  */
214 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
215 
216 
217 /*
218  * Reserved mapin space for descriptor rings.
219  */
220 uint64_t ldc_dring_direct_map_rsvd = LDC_DIRECT_MAP_SIZE_DEFAULT;
221 
222 /*
223  * Maximum direct map space allowed per channel.
224  */
225 uint64_t	ldc_direct_map_size_max = (16 * 1024 * 1024);	/* 16 MB */
226 
227 #ifdef DEBUG
228 
229 /*
230  * Print debug messages
231  *
232  * set ldcdbg to 0x7 for enabling all msgs
233  * 0x4 - Warnings
234  * 0x2 - All debug messages
235  * 0x1 - Minimal debug messages
236  *
237  * set ldcdbgchan to the channel number you want to debug
238  * setting it to -1 prints debug messages for all channels
239  * NOTE: ldcdbgchan has no effect on error messages
240  */
241 
242 int ldcdbg = 0x0;
243 int64_t ldcdbgchan = DBG_ALL_LDCS;
244 uint64_t ldc_inject_err_flag = 0;
245 
246 void
247 ldcdebug(int64_t id, const char *fmt, ...)
248 {
249 	char buf[512];
250 	va_list ap;
251 
252 	/*
253 	 * Do not return if,
254 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
255 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
256 	 * debug channel = caller specified channel
257 	 */
258 	if ((id != DBG_ALL_LDCS) &&
259 	    (ldcdbgchan != DBG_ALL_LDCS) &&
260 	    (ldcdbgchan != id)) {
261 		return;
262 	}
263 
264 	va_start(ap, fmt);
265 	(void) vsprintf(buf, fmt, ap);
266 	va_end(ap);
267 
268 	cmn_err(CE_CONT, "?%s", buf);
269 }
270 
271 #define	LDC_ERR_RESET		0x1
272 #define	LDC_ERR_PKTLOSS		0x2
273 #define	LDC_ERR_DQFULL		0x4
274 #define	LDC_ERR_DRNGCLEAR	0x8
275 
276 static boolean_t
277 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
278 {
279 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
280 		return (B_FALSE);
281 
282 	if ((ldc_inject_err_flag & error) == 0)
283 		return (B_FALSE);
284 
285 	/* clear the injection state */
286 	ldc_inject_err_flag &= ~error;
287 
288 	return (B_TRUE);
289 }
290 
291 #define	D1		\
292 if (ldcdbg & 0x01)	\
293 	ldcdebug
294 
295 #define	D2		\
296 if (ldcdbg & 0x02)	\
297 	ldcdebug
298 
299 #define	DWARN		\
300 if (ldcdbg & 0x04)	\
301 	ldcdebug
302 
303 #define	DUMP_PAYLOAD(id, addr)						\
304 {									\
305 	char buf[65*3];							\
306 	int i;								\
307 	uint8_t *src = (uint8_t *)addr;					\
308 	for (i = 0; i < 64; i++, src++)					\
309 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
310 	(void) sprintf(&buf[i * 3], "|\n");				\
311 	D2((id), "payload: %s", buf);					\
312 }
313 
314 #define	DUMP_LDC_PKT(c, s, addr)					\
315 {									\
316 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
317 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
318 	if (msg->type == LDC_DATA) {                                    \
319 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
320 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
321 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
322 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
323 	    (msg->env & LDC_LEN_MASK));					\
324 	} else { 							\
325 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
326 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
327 	} 								\
328 }
329 
330 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
331 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
332 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
333 #define	LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR)
334 extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp);
335 
336 #else
337 
338 #define	DBG_ALL_LDCS -1
339 
340 #define	D1
341 #define	D2
342 #define	DWARN
343 
344 #define	DUMP_PAYLOAD(id, addr)
345 #define	DUMP_LDC_PKT(c, s, addr)
346 
347 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
348 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
349 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
350 #define	LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE)
351 
352 #endif
353 
354 /*
355  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
356  * lengths. Just pass the head, tail, and entries values so that the
357  * length can be calculated in a dtrace script when the probe is enabled.
358  */
359 #define	TRACE_RXDQ_LENGTH(ldcp)						\
360 	DTRACE_PROBE4(rxdq__size,					\
361 	uint64_t, ldcp->id,						\
362 	uint64_t, ldcp->rx_dq_head,					\
363 	uint64_t, ldcp->rx_dq_tail,					\
364 	uint64_t, ldcp->rx_dq_entries)
365 
366 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
367 	DTRACE_PROBE4(rxhvq__size,					\
368 	uint64_t, ldcp->id,						\
369 	uint64_t, head,							\
370 	uint64_t, tail,							\
371 	uint64_t, ldcp->rx_q_entries)
372 
373 /* A dtrace SDT probe to ease tracing of data queue copy operations */
374 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
375 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
376 
377 /* The amount of contiguous space at the tail of the queue */
378 #define	Q_CONTIG_SPACE(head, tail, size)				\
379 	((head) <= (tail) ? ((size) - (tail)) :				\
380 	((head) - (tail) - LDC_PACKET_SIZE))
381 
382 #define	ZERO_PKT(p)			\
383 	bzero((p), sizeof (ldc_msg_t));
384 
385 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
386 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
387 
388 int
389 _init(void)
390 {
391 	int status;
392 
393 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
394 	if (status != 0) {
395 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
396 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
397 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
398 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
399 		return (-1);
400 	}
401 
402 	/* Initialize shared memory HV API version checking */
403 	i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor);
404 
405 	/* allocate soft state structure */
406 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
407 
408 	i_ldc_init_mapin(ldcssp, ldc_hsvc.hsvc_major, ldc_sup_minor);
409 
410 	/* Link the module into the system */
411 	status = mod_install(&ml);
412 	if (status != 0) {
413 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
414 		return (status);
415 	}
416 
417 	/* Initialize the LDC state structure */
418 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
419 
420 	mutex_enter(&ldcssp->lock);
421 
422 	/* Create a cache for memory handles */
423 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
424 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
425 	if (ldcssp->memhdl_cache == NULL) {
426 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
427 		mutex_exit(&ldcssp->lock);
428 		return (-1);
429 	}
430 
431 	/* Create cache for memory segment structures */
432 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
433 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
434 	if (ldcssp->memseg_cache == NULL) {
435 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
436 		mutex_exit(&ldcssp->lock);
437 		return (-1);
438 	}
439 
440 
441 	ldcssp->channel_count = 0;
442 	ldcssp->channels_open = 0;
443 	ldcssp->chan_list = NULL;
444 	ldcssp->dring_list = NULL;
445 
446 	/* Register debug_enter callback */
447 	kldc_set_debug_cb(&i_ldc_debug_enter);
448 
449 	mutex_exit(&ldcssp->lock);
450 
451 	return (0);
452 }
453 
454 int
455 _info(struct modinfo *modinfop)
456 {
457 	/* Report status of the dynamically loadable driver module */
458 	return (mod_info(&ml, modinfop));
459 }
460 
461 int
462 _fini(void)
463 {
464 	int 		rv, status;
465 	ldc_chan_t 	*tmp_ldcp, *ldcp;
466 	ldc_dring_t 	*tmp_dringp, *dringp;
467 	ldc_mem_info_t 	minfo;
468 
469 	/* Unlink the driver module from the system */
470 	status = mod_remove(&ml);
471 	if (status) {
472 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
473 		return (EIO);
474 	}
475 
476 	/* Unregister debug_enter callback */
477 	kldc_set_debug_cb(NULL);
478 
479 	/* Free descriptor rings */
480 	dringp = ldcssp->dring_list;
481 	while (dringp != NULL) {
482 		tmp_dringp = dringp->next;
483 
484 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
485 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
486 			if (minfo.status == LDC_BOUND) {
487 				(void) ldc_mem_dring_unbind(
488 				    (ldc_dring_handle_t)dringp);
489 			}
490 			if (minfo.status == LDC_MAPPED) {
491 				(void) ldc_mem_dring_unmap(
492 				    (ldc_dring_handle_t)dringp);
493 			}
494 		}
495 
496 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
497 		dringp = tmp_dringp;
498 	}
499 	ldcssp->dring_list = NULL;
500 
501 	/* close and finalize channels */
502 	ldcp = ldcssp->chan_list;
503 	while (ldcp != NULL) {
504 		tmp_ldcp = ldcp->next;
505 
506 		(void) ldc_close((ldc_handle_t)ldcp);
507 		(void) ldc_fini((ldc_handle_t)ldcp);
508 
509 		ldcp = tmp_ldcp;
510 	}
511 	ldcssp->chan_list = NULL;
512 
513 	/* Destroy kmem caches */
514 	kmem_cache_destroy(ldcssp->memhdl_cache);
515 	kmem_cache_destroy(ldcssp->memseg_cache);
516 
517 	/*
518 	 * We have successfully "removed" the driver.
519 	 * Destroying soft states
520 	 */
521 	mutex_destroy(&ldcssp->lock);
522 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
523 
524 	(void) hsvc_unregister(&ldc_hsvc);
525 
526 	return (status);
527 }
528 
529 /* -------------------------------------------------------------------------- */
530 
531 /*
532  * LDC Link Layer Internal Functions
533  */
534 
535 /*
536  * Translate HV Errors to sun4v error codes
537  */
538 int
539 i_ldc_h2v_error(int h_error)
540 {
541 	switch (h_error) {
542 
543 	case	H_EOK:
544 		return (0);
545 
546 	case	H_ENORADDR:
547 		return (EFAULT);
548 
549 	case	H_EBADPGSZ:
550 	case	H_EINVAL:
551 		return (EINVAL);
552 
553 	case	H_EWOULDBLOCK:
554 		return (EWOULDBLOCK);
555 
556 	case	H_ENOACCESS:
557 	case	H_ENOMAP:
558 		return (EACCES);
559 
560 	case	H_EIO:
561 	case	H_ECPUERROR:
562 		return (EIO);
563 
564 	case	H_ENOTSUPPORTED:
565 		return (ENOTSUP);
566 
567 	case 	H_ETOOMANY:
568 		return (ENOSPC);
569 
570 	case	H_ECHANNEL:
571 		return (ECHRNG);
572 	default:
573 		break;
574 	}
575 
576 	return (EIO);
577 }
578 
579 /*
580  * Reconfigure the transmit queue
581  */
582 static int
583 i_ldc_txq_reconf(ldc_chan_t *ldcp)
584 {
585 	int rv;
586 
587 	ASSERT(MUTEX_HELD(&ldcp->lock));
588 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
589 
590 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
591 	if (rv) {
592 		cmn_err(CE_WARN,
593 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
594 		return (EIO);
595 	}
596 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
597 	    &(ldcp->tx_tail), &(ldcp->link_state));
598 	if (rv) {
599 		cmn_err(CE_WARN,
600 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
601 		return (EIO);
602 	}
603 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
604 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
605 	    ldcp->link_state);
606 
607 	return (0);
608 }
609 
610 /*
611  * Reconfigure the receive queue
612  */
613 static int
614 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
615 {
616 	int rv;
617 	uint64_t rx_head, rx_tail;
618 
619 	ASSERT(MUTEX_HELD(&ldcp->lock));
620 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
621 	    &(ldcp->link_state));
622 	if (rv) {
623 		cmn_err(CE_WARN,
624 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
625 		    ldcp->id);
626 		return (EIO);
627 	}
628 
629 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
630 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
631 		    ldcp->rx_q_entries);
632 		if (rv) {
633 			cmn_err(CE_WARN,
634 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
635 			    ldcp->id);
636 			return (EIO);
637 		}
638 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
639 		    ldcp->id);
640 	}
641 
642 	return (0);
643 }
644 
645 
646 /*
647  * Drain the contents of the receive queue
648  */
649 static void
650 i_ldc_rxq_drain(ldc_chan_t *ldcp)
651 {
652 	int rv;
653 	uint64_t rx_head, rx_tail;
654 	int retries = 0;
655 
656 	ASSERT(MUTEX_HELD(&ldcp->lock));
657 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
658 	    &(ldcp->link_state));
659 	if (rv) {
660 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state, "
661 		    "rv = 0x%x", ldcp->id, rv);
662 		return;
663 	}
664 
665 	/* If the queue is already empty just return success. */
666 	if (rx_head == rx_tail)
667 		return;
668 
669 	/*
670 	 * We are draining the queue in order to close the channel.
671 	 * Call hv_ldc_rx_set_qhead directly instead of i_ldc_set_rx_head
672 	 * because we do not need to reset the channel if the set
673 	 * qhead fails.
674 	 */
675 	if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0)
676 		return;
677 
678 	while ((rv == H_EWOULDBLOCK) && (retries++ < ldc_max_retries)) {
679 		drv_usecwait(ldc_delay);
680 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0)
681 			return;
682 	}
683 
684 	cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot set qhead 0x%lx, "
685 	    "rv = 0x%x", ldcp->id, rx_tail, rv);
686 }
687 
688 
689 /*
690  * Reset LDC state structure and its contents
691  */
692 static void
693 i_ldc_reset_state(ldc_chan_t *ldcp)
694 {
695 	ASSERT(MUTEX_HELD(&ldcp->lock));
696 	ldcp->last_msg_snt = LDC_INIT_SEQID;
697 	ldcp->last_ack_rcd = 0;
698 	ldcp->last_msg_rcd = 0;
699 	ldcp->tx_ackd_head = ldcp->tx_head;
700 	ldcp->stream_remains = 0;
701 	ldcp->next_vidx = 0;
702 	ldcp->hstate = 0;
703 	ldcp->tstate = TS_OPEN;
704 	ldcp->status = LDC_OPEN;
705 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
706 	ldcp->rx_dq_head = 0;
707 	ldcp->rx_dq_tail = 0;
708 
709 	if (ldcp->link_state == LDC_CHANNEL_UP ||
710 	    ldcp->link_state == LDC_CHANNEL_RESET) {
711 
712 		if (ldcp->mode == LDC_MODE_RAW) {
713 			ldcp->status = LDC_UP;
714 			ldcp->tstate = TS_UP;
715 		} else {
716 			ldcp->status = LDC_READY;
717 			ldcp->tstate |= TS_LINK_READY;
718 		}
719 	}
720 }
721 
722 /*
723  * Reset a LDC channel
724  */
725 void
726 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
727 {
728 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
729 
730 	ASSERT(MUTEX_HELD(&ldcp->lock));
731 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
732 
733 	/* reconfig Tx and Rx queues */
734 	(void) i_ldc_txq_reconf(ldcp);
735 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
736 
737 	/* Clear Tx and Rx interrupts */
738 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
739 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
740 
741 	/* Reset channel state */
742 	i_ldc_reset_state(ldcp);
743 
744 	/* Mark channel in reset */
745 	ldcp->tstate |= TS_IN_RESET;
746 }
747 
748 /*
749  * Walk the channel list and reset channels if they are of the right
750  * devclass and their Rx queues have been configured. No locks are
751  * taken because the function is only invoked by the kernel just before
752  * entering the prom or debugger when the system is single-threaded.
753  */
754 static void
755 i_ldc_debug_enter(void)
756 {
757 	ldc_chan_t *ldcp;
758 
759 	ldcp = ldcssp->chan_list;
760 	while (ldcp != NULL) {
761 		if (((ldcp->tstate & TS_QCONF_RDY) == TS_QCONF_RDY) &&
762 		    (LDC_DEVCLASS_PROM_RESET(ldcp->devclass) != 0)) {
763 			(void) hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
764 			    ldcp->rx_q_entries);
765 		}
766 		ldcp = ldcp->next;
767 	}
768 }
769 
770 /*
771  * Clear pending interrupts
772  */
773 static void
774 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
775 {
776 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
777 
778 	ASSERT(MUTEX_HELD(&ldcp->lock));
779 	ASSERT(cinfo->dip != NULL);
780 
781 	switch (itype) {
782 	case CNEX_TX_INTR:
783 		/* check Tx interrupt */
784 		if (ldcp->tx_intr_state)
785 			ldcp->tx_intr_state = LDC_INTR_NONE;
786 		else
787 			return;
788 		break;
789 
790 	case CNEX_RX_INTR:
791 		/* check Rx interrupt */
792 		if (ldcp->rx_intr_state)
793 			ldcp->rx_intr_state = LDC_INTR_NONE;
794 		else
795 			return;
796 		break;
797 	}
798 
799 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
800 	D2(ldcp->id,
801 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
802 	    ldcp->id, itype);
803 }
804 
805 /*
806  * Set the receive queue head
807  * Resets connection and returns an error if it fails.
808  */
809 static int
810 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
811 {
812 	int 	rv;
813 	int 	retries;
814 
815 	ASSERT(MUTEX_HELD(&ldcp->lock));
816 	for (retries = 0; retries < ldc_max_retries; retries++) {
817 
818 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
819 			return (0);
820 
821 		if (rv != H_EWOULDBLOCK)
822 			break;
823 
824 		/* wait for ldc_delay usecs */
825 		drv_usecwait(ldc_delay);
826 	}
827 
828 	cmn_err(CE_WARN, "ldc_set_rx_qhead: (0x%lx) cannot set qhead 0x%lx, "
829 	    "rv = 0x%x", ldcp->id, head, rv);
830 	mutex_enter(&ldcp->tx_lock);
831 	i_ldc_reset(ldcp, B_TRUE);
832 	mutex_exit(&ldcp->tx_lock);
833 
834 	return (ECONNRESET);
835 }
836 
837 /*
838  * Returns the tx_head to be used for transfer
839  */
840 static void
841 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
842 {
843 	ldc_msg_t 	*pkt;
844 
845 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
846 
847 	/* get current Tx head */
848 	*head = ldcp->tx_head;
849 
850 	/*
851 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
852 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
853 	 * up to the current location of tx_head. This needs to be done
854 	 * as the peer will only ACK DATA/INFO pkts.
855 	 */
856 	if (ldcp->mode == LDC_MODE_RELIABLE) {
857 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
858 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
859 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
860 				break;
861 			}
862 			/* advance ACKd head */
863 			ldcp->tx_ackd_head =
864 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
865 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
866 		}
867 		*head = ldcp->tx_ackd_head;
868 	}
869 }
870 
871 /*
872  * Returns the tx_tail to be used for transfer
873  * Re-reads the TX queue ptrs if and only if the
874  * the cached head and tail are equal (queue is full)
875  */
876 static int
877 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
878 {
879 	int 		rv;
880 	uint64_t 	current_head, new_tail;
881 
882 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
883 	/* Read the head and tail ptrs from HV */
884 	rv = hv_ldc_tx_get_state(ldcp->id,
885 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
886 	if (rv) {
887 		cmn_err(CE_WARN,
888 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
889 		    ldcp->id);
890 		return (EIO);
891 	}
892 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
893 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
894 		    ldcp->id);
895 		return (ECONNRESET);
896 	}
897 
898 	i_ldc_get_tx_head(ldcp, &current_head);
899 
900 	/* increment the tail */
901 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
902 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
903 
904 	if (new_tail == current_head) {
905 		DWARN(ldcp->id,
906 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
907 		    ldcp->id);
908 		return (EWOULDBLOCK);
909 	}
910 
911 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
912 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
913 
914 	*tail = ldcp->tx_tail;
915 	return (0);
916 }
917 
918 /*
919  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
920  * and retry ldc_max_retries times before returning an error.
921  * Returns 0, EWOULDBLOCK or EIO
922  */
923 static int
924 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
925 {
926 	int		rv, retval = EWOULDBLOCK;
927 	int 		retries;
928 
929 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
930 	for (retries = 0; retries < ldc_max_retries; retries++) {
931 
932 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
933 			retval = 0;
934 			break;
935 		}
936 		if (rv != H_EWOULDBLOCK) {
937 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
938 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
939 			retval = EIO;
940 			break;
941 		}
942 
943 		/* wait for ldc_delay usecs */
944 		drv_usecwait(ldc_delay);
945 	}
946 	return (retval);
947 }
948 
949 /*
950  * Copy a data packet from the HV receive queue to the data queue.
951  * Caller must ensure that the data queue is not already full.
952  *
953  * The *head argument represents the current head pointer for the HV
954  * receive queue. After copying a packet from the HV receive queue,
955  * the *head pointer will be updated. This allows the caller to update
956  * the head pointer in HV using the returned *head value.
957  */
958 void
959 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
960 {
961 	uint64_t	q_size, dq_size;
962 
963 	ASSERT(MUTEX_HELD(&ldcp->lock));
964 
965 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
966 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
967 
968 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
969 	    dq_size) >= LDC_PACKET_SIZE);
970 
971 	bcopy((void *)(ldcp->rx_q_va + *head),
972 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
973 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
974 
975 	/* Update rx head */
976 	*head = (*head + LDC_PACKET_SIZE) % q_size;
977 
978 	/* Update dq tail */
979 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
980 }
981 
982 /*
983  * Update the Rx data queue head pointer
984  */
985 static int
986 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
987 {
988 	ldcp->rx_dq_head = head;
989 	return (0);
990 }
991 
992 /*
993  * Get the Rx data queue head and tail pointers
994  */
995 static uint64_t
996 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
997     uint64_t *link_state)
998 {
999 	_NOTE(ARGUNUSED(link_state))
1000 	*head = ldcp->rx_dq_head;
1001 	*tail = ldcp->rx_dq_tail;
1002 	return (0);
1003 }
1004 
1005 /*
1006  * Wrapper for the Rx HV queue set head function. Giving the
1007  * data queue and HV queue set head functions the same type.
1008  */
1009 static uint64_t
1010 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
1011     uint64_t *link_state)
1012 {
1013 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
1014 	    link_state)));
1015 }
1016 
1017 /*
1018  * LDC receive interrupt handler
1019  *    triggered for channel with data pending to read
1020  *    i.e. Rx queue content changes
1021  */
1022 static uint_t
1023 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1024 {
1025 	_NOTE(ARGUNUSED(arg2))
1026 
1027 	ldc_chan_t	*ldcp;
1028 	boolean_t	notify;
1029 	uint64_t	event;
1030 	int		rv, status;
1031 
1032 	/* Get the channel for which interrupt was received */
1033 	if (arg1 == NULL) {
1034 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1035 		return (DDI_INTR_UNCLAIMED);
1036 	}
1037 
1038 	ldcp = (ldc_chan_t *)arg1;
1039 
1040 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1041 	    ldcp->id, ldcp);
1042 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1043 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1044 	    ldcp->link_state);
1045 
1046 	/* Lock channel */
1047 	mutex_enter(&ldcp->lock);
1048 
1049 	/* Mark the interrupt as being actively handled */
1050 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1051 
1052 	status = i_ldc_rx_process_hvq(ldcp, &notify, &event);
1053 
1054 	if (ldcp->mode != LDC_MODE_RELIABLE) {
1055 		/*
1056 		 * If there are no data packets on the queue, clear
1057 		 * the interrupt. Otherwise, the ldc_read will clear
1058 		 * interrupts after draining the queue. To indicate the
1059 		 * interrupt has not yet been cleared, it is marked
1060 		 * as pending.
1061 		 */
1062 		if ((event & LDC_EVT_READ) == 0) {
1063 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1064 		} else {
1065 			ldcp->rx_intr_state = LDC_INTR_PEND;
1066 		}
1067 	}
1068 
1069 	/* if callbacks are disabled, do not notify */
1070 	if (notify && ldcp->cb_enabled) {
1071 		ldcp->cb_inprogress = B_TRUE;
1072 		mutex_exit(&ldcp->lock);
1073 		rv = ldcp->cb(event, ldcp->cb_arg);
1074 		if (rv) {
1075 			DWARN(ldcp->id,
1076 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1077 			    ldcp->id);
1078 		}
1079 		mutex_enter(&ldcp->lock);
1080 		ldcp->cb_inprogress = B_FALSE;
1081 	}
1082 
1083 	if (ldcp->mode == LDC_MODE_RELIABLE) {
1084 		if (status == ENOSPC) {
1085 			/*
1086 			 * Here, ENOSPC indicates the secondary data
1087 			 * queue is full and the Rx queue is non-empty.
1088 			 * Much like how reliable and raw modes are
1089 			 * handled above, since the Rx queue is non-
1090 			 * empty, we mark the interrupt as pending to
1091 			 * indicate it has not yet been cleared.
1092 			 */
1093 			ldcp->rx_intr_state = LDC_INTR_PEND;
1094 		} else {
1095 			/*
1096 			 * We have processed all CTRL packets and
1097 			 * copied all DATA packets to the secondary
1098 			 * queue. Clear the interrupt.
1099 			 */
1100 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1101 		}
1102 	}
1103 
1104 	mutex_exit(&ldcp->lock);
1105 
1106 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1107 
1108 	return (DDI_INTR_CLAIMED);
1109 }
1110 
1111 /*
1112  * Wrapper for the Rx HV queue processing function to be used when
1113  * checking the Rx HV queue for data packets. Unlike the interrupt
1114  * handler code flow, the Rx interrupt is not cleared here and
1115  * callbacks are not made.
1116  */
1117 static uint_t
1118 i_ldc_chkq(ldc_chan_t *ldcp)
1119 {
1120 	boolean_t	notify;
1121 	uint64_t	event;
1122 
1123 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1124 }
1125 
1126 /*
1127  * Send a LDC message
1128  */
1129 static int
1130 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1131     uint8_t ctrlmsg)
1132 {
1133 	int		rv;
1134 	ldc_msg_t 	*pkt;
1135 	uint64_t	tx_tail;
1136 	uint32_t	curr_seqid;
1137 
1138 	/* Obtain Tx lock */
1139 	mutex_enter(&ldcp->tx_lock);
1140 
1141 	curr_seqid = ldcp->last_msg_snt;
1142 
1143 	/* get the current tail for the message */
1144 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1145 	if (rv) {
1146 		DWARN(ldcp->id,
1147 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1148 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1149 		    ldcp->id, pkttype, subtype, ctrlmsg);
1150 		mutex_exit(&ldcp->tx_lock);
1151 		return (rv);
1152 	}
1153 
1154 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1155 	ZERO_PKT(pkt);
1156 
1157 	/* Initialize the packet */
1158 	pkt->type = pkttype;
1159 	pkt->stype = subtype;
1160 	pkt->ctrl = ctrlmsg;
1161 
1162 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1163 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1164 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1165 		curr_seqid++;
1166 		if (ldcp->mode != LDC_MODE_RAW) {
1167 			pkt->seqid = curr_seqid;
1168 			pkt->ackid = ldcp->last_msg_rcd;
1169 		}
1170 	}
1171 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1172 
1173 	/* initiate the send by calling into HV and set the new tail */
1174 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1175 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1176 
1177 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1178 	if (rv) {
1179 		DWARN(ldcp->id,
1180 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1181 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1182 		    ldcp->id, pkttype, subtype, ctrlmsg);
1183 		mutex_exit(&ldcp->tx_lock);
1184 		return (EIO);
1185 	}
1186 
1187 	ldcp->last_msg_snt = curr_seqid;
1188 	ldcp->tx_tail = tx_tail;
1189 
1190 	mutex_exit(&ldcp->tx_lock);
1191 	return (0);
1192 }
1193 
1194 /*
1195  * Checks if packet was received in right order
1196  * in the case of a reliable link.
1197  * Returns 0 if in order, else EIO
1198  */
1199 static int
1200 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1201 {
1202 	/* No seqid checking for RAW mode */
1203 	if (ldcp->mode == LDC_MODE_RAW)
1204 		return (0);
1205 
1206 	/* No seqid checking for version, RTS, RTR message */
1207 	if (msg->ctrl == LDC_VER ||
1208 	    msg->ctrl == LDC_RTS ||
1209 	    msg->ctrl == LDC_RTR)
1210 		return (0);
1211 
1212 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1213 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1214 		DWARN(ldcp->id,
1215 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1216 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1217 		    (ldcp->last_msg_rcd + 1));
1218 		return (EIO);
1219 	}
1220 
1221 #ifdef DEBUG
1222 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1223 		DWARN(ldcp->id,
1224 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1225 		return (EIO);
1226 	}
1227 #endif
1228 
1229 	return (0);
1230 }
1231 
1232 
1233 /*
1234  * Process an incoming version ctrl message
1235  */
1236 static int
1237 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1238 {
1239 	int 		rv = 0, idx = ldcp->next_vidx;
1240 	ldc_msg_t 	*pkt;
1241 	uint64_t	tx_tail;
1242 	ldc_ver_t	*rcvd_ver;
1243 
1244 	/* get the received version */
1245 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1246 
1247 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1248 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1249 
1250 	/* Obtain Tx lock */
1251 	mutex_enter(&ldcp->tx_lock);
1252 
1253 	switch (msg->stype) {
1254 	case LDC_INFO:
1255 
1256 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1257 			(void) i_ldc_txq_reconf(ldcp);
1258 			i_ldc_reset_state(ldcp);
1259 			mutex_exit(&ldcp->tx_lock);
1260 			return (EAGAIN);
1261 		}
1262 
1263 		/* get the current tail and pkt for the response */
1264 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1265 		if (rv != 0) {
1266 			DWARN(ldcp->id,
1267 			    "i_ldc_process_VER: (0x%llx) err sending "
1268 			    "version ACK/NACK\n", ldcp->id);
1269 			i_ldc_reset(ldcp, B_TRUE);
1270 			mutex_exit(&ldcp->tx_lock);
1271 			return (ECONNRESET);
1272 		}
1273 
1274 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1275 		ZERO_PKT(pkt);
1276 
1277 		/* initialize the packet */
1278 		pkt->type = LDC_CTRL;
1279 		pkt->ctrl = LDC_VER;
1280 
1281 		for (;;) {
1282 
1283 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1284 			    rcvd_ver->major, rcvd_ver->minor,
1285 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1286 
1287 			if (rcvd_ver->major == ldc_versions[idx].major) {
1288 				/* major version match - ACK version */
1289 				pkt->stype = LDC_ACK;
1290 
1291 				/*
1292 				 * lower minor version to the one this endpt
1293 				 * supports, if necessary
1294 				 */
1295 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1296 					rcvd_ver->minor =
1297 					    ldc_versions[idx].minor;
1298 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1299 
1300 				break;
1301 			}
1302 
1303 			if (rcvd_ver->major > ldc_versions[idx].major) {
1304 
1305 				D1(ldcp->id, "i_ldc_process_VER: using next"
1306 				    " lower idx=%d, v%u.%u\n", idx,
1307 				    ldc_versions[idx].major,
1308 				    ldc_versions[idx].minor);
1309 
1310 				/* nack with next lower version */
1311 				pkt->stype = LDC_NACK;
1312 				bcopy(&ldc_versions[idx], pkt->udata,
1313 				    sizeof (ldc_versions[idx]));
1314 				ldcp->next_vidx = idx;
1315 				break;
1316 			}
1317 
1318 			/* next major version */
1319 			idx++;
1320 
1321 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1322 
1323 			if (idx == LDC_NUM_VERS) {
1324 				/* no version match - send NACK */
1325 				pkt->stype = LDC_NACK;
1326 				bzero(pkt->udata, sizeof (ldc_ver_t));
1327 				ldcp->next_vidx = 0;
1328 				break;
1329 			}
1330 		}
1331 
1332 		/* initiate the send by calling into HV and set the new tail */
1333 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1334 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1335 
1336 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1337 		if (rv == 0) {
1338 			ldcp->tx_tail = tx_tail;
1339 			if (pkt->stype == LDC_ACK) {
1340 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1341 				    " version ACK\n", ldcp->id);
1342 				/* Save the ACK'd version */
1343 				ldcp->version.major = rcvd_ver->major;
1344 				ldcp->version.minor = rcvd_ver->minor;
1345 				ldcp->hstate |= TS_RCVD_VER;
1346 				ldcp->tstate |= TS_VER_DONE;
1347 				D1(DBG_ALL_LDCS,
1348 				    "(0x%llx) Sent ACK, "
1349 				    "Agreed on version v%u.%u\n",
1350 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1351 			}
1352 		} else {
1353 			DWARN(ldcp->id,
1354 			    "i_ldc_process_VER: (0x%llx) error sending "
1355 			    "ACK/NACK\n", ldcp->id);
1356 			i_ldc_reset(ldcp, B_TRUE);
1357 			mutex_exit(&ldcp->tx_lock);
1358 			return (ECONNRESET);
1359 		}
1360 
1361 		break;
1362 
1363 	case LDC_ACK:
1364 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1365 			if (ldcp->version.major != rcvd_ver->major ||
1366 			    ldcp->version.minor != rcvd_ver->minor) {
1367 
1368 				/* mismatched version - reset connection */
1369 				DWARN(ldcp->id,
1370 				    "i_ldc_process_VER: (0x%llx) recvd"
1371 				    " ACK ver != sent ACK ver\n", ldcp->id);
1372 				i_ldc_reset(ldcp, B_TRUE);
1373 				mutex_exit(&ldcp->tx_lock);
1374 				return (ECONNRESET);
1375 			}
1376 		} else {
1377 			/* SUCCESS - we have agreed on a version */
1378 			ldcp->version.major = rcvd_ver->major;
1379 			ldcp->version.minor = rcvd_ver->minor;
1380 			ldcp->tstate |= TS_VER_DONE;
1381 		}
1382 
1383 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1384 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1385 
1386 		/* initiate RTS-RTR-RDX handshake */
1387 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1388 		if (rv) {
1389 			DWARN(ldcp->id,
1390 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1391 			    ldcp->id);
1392 			i_ldc_reset(ldcp, B_TRUE);
1393 			mutex_exit(&ldcp->tx_lock);
1394 			return (ECONNRESET);
1395 		}
1396 
1397 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1398 		ZERO_PKT(pkt);
1399 
1400 		pkt->type = LDC_CTRL;
1401 		pkt->stype = LDC_INFO;
1402 		pkt->ctrl = LDC_RTS;
1403 		pkt->env = ldcp->mode;
1404 		if (ldcp->mode != LDC_MODE_RAW)
1405 			pkt->seqid = LDC_INIT_SEQID;
1406 
1407 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1408 
1409 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1410 
1411 		/* initiate the send by calling into HV and set the new tail */
1412 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1413 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1414 
1415 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1416 		if (rv) {
1417 			D2(ldcp->id,
1418 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1419 			    ldcp->id);
1420 			i_ldc_reset(ldcp, B_TRUE);
1421 			mutex_exit(&ldcp->tx_lock);
1422 			return (ECONNRESET);
1423 		}
1424 
1425 		ldcp->tx_tail = tx_tail;
1426 		ldcp->hstate |= TS_SENT_RTS;
1427 
1428 		break;
1429 
1430 	case LDC_NACK:
1431 		/* check if version in NACK is zero */
1432 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1433 			/* version handshake failure */
1434 			DWARN(DBG_ALL_LDCS,
1435 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1436 			    ldcp->id);
1437 			i_ldc_reset(ldcp, B_TRUE);
1438 			mutex_exit(&ldcp->tx_lock);
1439 			return (ECONNRESET);
1440 		}
1441 
1442 		/* get the current tail and pkt for the response */
1443 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1444 		if (rv != 0) {
1445 			cmn_err(CE_NOTE,
1446 			    "i_ldc_process_VER: (0x%lx) err sending "
1447 			    "version ACK/NACK\n", ldcp->id);
1448 			i_ldc_reset(ldcp, B_TRUE);
1449 			mutex_exit(&ldcp->tx_lock);
1450 			return (ECONNRESET);
1451 		}
1452 
1453 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1454 		ZERO_PKT(pkt);
1455 
1456 		/* initialize the packet */
1457 		pkt->type = LDC_CTRL;
1458 		pkt->ctrl = LDC_VER;
1459 		pkt->stype = LDC_INFO;
1460 
1461 		/* check ver in NACK msg has a match */
1462 		for (;;) {
1463 			if (rcvd_ver->major == ldc_versions[idx].major) {
1464 				/*
1465 				 * major version match - resubmit request
1466 				 * if lower minor version to the one this endpt
1467 				 * supports, if necessary
1468 				 */
1469 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1470 					rcvd_ver->minor =
1471 					    ldc_versions[idx].minor;
1472 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1473 				break;
1474 			}
1475 
1476 			if (rcvd_ver->major > ldc_versions[idx].major) {
1477 
1478 				D1(ldcp->id, "i_ldc_process_VER: using next"
1479 				    " lower idx=%d, v%u.%u\n", idx,
1480 				    ldc_versions[idx].major,
1481 				    ldc_versions[idx].minor);
1482 
1483 				/* send next lower version */
1484 				bcopy(&ldc_versions[idx], pkt->udata,
1485 				    sizeof (ldc_versions[idx]));
1486 				ldcp->next_vidx = idx;
1487 				break;
1488 			}
1489 
1490 			/* next version */
1491 			idx++;
1492 
1493 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1494 
1495 			if (idx == LDC_NUM_VERS) {
1496 				/* no version match - terminate */
1497 				ldcp->next_vidx = 0;
1498 				mutex_exit(&ldcp->tx_lock);
1499 				return (ECONNRESET);
1500 			}
1501 		}
1502 
1503 		/* initiate the send by calling into HV and set the new tail */
1504 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1505 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1506 
1507 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1508 		if (rv == 0) {
1509 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1510 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1511 			    ldc_versions[idx].minor);
1512 			ldcp->tx_tail = tx_tail;
1513 		} else {
1514 			cmn_err(CE_NOTE,
1515 			    "i_ldc_process_VER: (0x%lx) error sending version"
1516 			    "INFO\n", ldcp->id);
1517 			i_ldc_reset(ldcp, B_TRUE);
1518 			mutex_exit(&ldcp->tx_lock);
1519 			return (ECONNRESET);
1520 		}
1521 
1522 		break;
1523 	}
1524 
1525 	mutex_exit(&ldcp->tx_lock);
1526 	return (rv);
1527 }
1528 
1529 
1530 /*
1531  * Process an incoming RTS ctrl message
1532  */
1533 static int
1534 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1535 {
1536 	int 		rv = 0;
1537 	ldc_msg_t 	*pkt;
1538 	uint64_t	tx_tail;
1539 	boolean_t	sent_NACK = B_FALSE;
1540 
1541 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1542 
1543 	switch (msg->stype) {
1544 	case LDC_NACK:
1545 		DWARN(ldcp->id,
1546 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1547 		    ldcp->id);
1548 
1549 		/* Reset the channel -- as we cannot continue */
1550 		mutex_enter(&ldcp->tx_lock);
1551 		i_ldc_reset(ldcp, B_TRUE);
1552 		mutex_exit(&ldcp->tx_lock);
1553 		rv = ECONNRESET;
1554 		break;
1555 
1556 	case LDC_INFO:
1557 
1558 		/* check mode */
1559 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1560 			cmn_err(CE_NOTE,
1561 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1562 			    ldcp->id);
1563 			/*
1564 			 * send NACK in response to MODE message
1565 			 * get the current tail for the response
1566 			 */
1567 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1568 			if (rv) {
1569 				/* if cannot send NACK - reset channel */
1570 				mutex_enter(&ldcp->tx_lock);
1571 				i_ldc_reset(ldcp, B_TRUE);
1572 				mutex_exit(&ldcp->tx_lock);
1573 				rv = ECONNRESET;
1574 				break;
1575 			}
1576 			sent_NACK = B_TRUE;
1577 		}
1578 		break;
1579 	default:
1580 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1581 		    ldcp->id);
1582 		mutex_enter(&ldcp->tx_lock);
1583 		i_ldc_reset(ldcp, B_TRUE);
1584 		mutex_exit(&ldcp->tx_lock);
1585 		rv = ECONNRESET;
1586 		break;
1587 	}
1588 
1589 	/*
1590 	 * If either the connection was reset (when rv != 0) or
1591 	 * a NACK was sent, we return. In the case of a NACK
1592 	 * we dont want to consume the packet that came in but
1593 	 * not record that we received the RTS
1594 	 */
1595 	if (rv || sent_NACK)
1596 		return (rv);
1597 
1598 	/* record RTS received */
1599 	ldcp->hstate |= TS_RCVD_RTS;
1600 
1601 	/* store initial SEQID info */
1602 	ldcp->last_msg_snt = msg->seqid;
1603 
1604 	/* Obtain Tx lock */
1605 	mutex_enter(&ldcp->tx_lock);
1606 
1607 	/* get the current tail for the response */
1608 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1609 	if (rv != 0) {
1610 		cmn_err(CE_NOTE,
1611 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1612 		    ldcp->id);
1613 		i_ldc_reset(ldcp, B_TRUE);
1614 		mutex_exit(&ldcp->tx_lock);
1615 		return (ECONNRESET);
1616 	}
1617 
1618 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1619 	ZERO_PKT(pkt);
1620 
1621 	/* initialize the packet */
1622 	pkt->type = LDC_CTRL;
1623 	pkt->stype = LDC_INFO;
1624 	pkt->ctrl = LDC_RTR;
1625 	pkt->env = ldcp->mode;
1626 	if (ldcp->mode != LDC_MODE_RAW)
1627 		pkt->seqid = LDC_INIT_SEQID;
1628 
1629 	ldcp->last_msg_rcd = msg->seqid;
1630 
1631 	/* initiate the send by calling into HV and set the new tail */
1632 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1633 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1634 
1635 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1636 	if (rv == 0) {
1637 		D2(ldcp->id,
1638 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1639 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1640 
1641 		ldcp->tx_tail = tx_tail;
1642 		ldcp->hstate |= TS_SENT_RTR;
1643 
1644 	} else {
1645 		cmn_err(CE_NOTE,
1646 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1647 		    ldcp->id);
1648 		i_ldc_reset(ldcp, B_TRUE);
1649 		mutex_exit(&ldcp->tx_lock);
1650 		return (ECONNRESET);
1651 	}
1652 
1653 	mutex_exit(&ldcp->tx_lock);
1654 	return (0);
1655 }
1656 
1657 /*
1658  * Process an incoming RTR ctrl message
1659  */
1660 static int
1661 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1662 {
1663 	int 		rv = 0;
1664 	boolean_t	sent_NACK = B_FALSE;
1665 
1666 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1667 
1668 	switch (msg->stype) {
1669 	case LDC_NACK:
1670 		/* RTR NACK received */
1671 		DWARN(ldcp->id,
1672 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1673 		    ldcp->id);
1674 
1675 		/* Reset the channel -- as we cannot continue */
1676 		mutex_enter(&ldcp->tx_lock);
1677 		i_ldc_reset(ldcp, B_TRUE);
1678 		mutex_exit(&ldcp->tx_lock);
1679 		rv = ECONNRESET;
1680 
1681 		break;
1682 
1683 	case LDC_INFO:
1684 
1685 		/* check mode */
1686 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1687 			DWARN(ldcp->id,
1688 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1689 			    "expecting 0x%x, got 0x%x\n",
1690 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1691 			/*
1692 			 * send NACK in response to MODE message
1693 			 * get the current tail for the response
1694 			 */
1695 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1696 			if (rv) {
1697 				/* if cannot send NACK - reset channel */
1698 				mutex_enter(&ldcp->tx_lock);
1699 				i_ldc_reset(ldcp, B_TRUE);
1700 				mutex_exit(&ldcp->tx_lock);
1701 				rv = ECONNRESET;
1702 				break;
1703 			}
1704 			sent_NACK = B_TRUE;
1705 		}
1706 		break;
1707 
1708 	default:
1709 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1710 		    ldcp->id);
1711 
1712 		/* Reset the channel -- as we cannot continue */
1713 		mutex_enter(&ldcp->tx_lock);
1714 		i_ldc_reset(ldcp, B_TRUE);
1715 		mutex_exit(&ldcp->tx_lock);
1716 		rv = ECONNRESET;
1717 		break;
1718 	}
1719 
1720 	/*
1721 	 * If either the connection was reset (when rv != 0) or
1722 	 * a NACK was sent, we return. In the case of a NACK
1723 	 * we dont want to consume the packet that came in but
1724 	 * not record that we received the RTR
1725 	 */
1726 	if (rv || sent_NACK)
1727 		return (rv);
1728 
1729 	ldcp->last_msg_snt = msg->seqid;
1730 	ldcp->hstate |= TS_RCVD_RTR;
1731 
1732 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1733 	if (rv) {
1734 		cmn_err(CE_NOTE,
1735 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1736 		    ldcp->id);
1737 		mutex_enter(&ldcp->tx_lock);
1738 		i_ldc_reset(ldcp, B_TRUE);
1739 		mutex_exit(&ldcp->tx_lock);
1740 		return (ECONNRESET);
1741 	}
1742 	D2(ldcp->id,
1743 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1744 
1745 	ldcp->hstate |= TS_SENT_RDX;
1746 	ldcp->tstate |= TS_HSHAKE_DONE;
1747 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1748 		ldcp->status = LDC_UP;
1749 
1750 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1751 
1752 	return (0);
1753 }
1754 
1755 
1756 /*
1757  * Process an incoming RDX ctrl message
1758  */
1759 static int
1760 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1761 {
1762 	int	rv = 0;
1763 
1764 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1765 
1766 	switch (msg->stype) {
1767 	case LDC_NACK:
1768 		/* RDX NACK received */
1769 		DWARN(ldcp->id,
1770 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1771 		    ldcp->id);
1772 
1773 		/* Reset the channel -- as we cannot continue */
1774 		mutex_enter(&ldcp->tx_lock);
1775 		i_ldc_reset(ldcp, B_TRUE);
1776 		mutex_exit(&ldcp->tx_lock);
1777 		rv = ECONNRESET;
1778 
1779 		break;
1780 
1781 	case LDC_INFO:
1782 
1783 		/*
1784 		 * if channel is UP and a RDX received after data transmission
1785 		 * has commenced it is an error
1786 		 */
1787 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1788 			DWARN(DBG_ALL_LDCS,
1789 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1790 			    " - LDC reset\n", ldcp->id);
1791 			mutex_enter(&ldcp->tx_lock);
1792 			i_ldc_reset(ldcp, B_TRUE);
1793 			mutex_exit(&ldcp->tx_lock);
1794 			return (ECONNRESET);
1795 		}
1796 
1797 		ldcp->hstate |= TS_RCVD_RDX;
1798 		ldcp->tstate |= TS_HSHAKE_DONE;
1799 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1800 			ldcp->status = LDC_UP;
1801 
1802 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1803 		break;
1804 
1805 	default:
1806 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1807 		    ldcp->id);
1808 
1809 		/* Reset the channel -- as we cannot continue */
1810 		mutex_enter(&ldcp->tx_lock);
1811 		i_ldc_reset(ldcp, B_TRUE);
1812 		mutex_exit(&ldcp->tx_lock);
1813 		rv = ECONNRESET;
1814 		break;
1815 	}
1816 
1817 	return (rv);
1818 }
1819 
1820 /*
1821  * Process an incoming ACK for a data packet
1822  */
1823 static int
1824 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1825 {
1826 	int		rv;
1827 	uint64_t 	tx_head;
1828 	ldc_msg_t	*pkt;
1829 
1830 	/* Obtain Tx lock */
1831 	mutex_enter(&ldcp->tx_lock);
1832 
1833 	/*
1834 	 * Read the current Tx head and tail
1835 	 */
1836 	rv = hv_ldc_tx_get_state(ldcp->id,
1837 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1838 	if (rv != 0) {
1839 		cmn_err(CE_WARN,
1840 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1841 		    ldcp->id);
1842 
1843 		/* Reset the channel -- as we cannot continue */
1844 		i_ldc_reset(ldcp, B_TRUE);
1845 		mutex_exit(&ldcp->tx_lock);
1846 		return (ECONNRESET);
1847 	}
1848 
1849 	/*
1850 	 * loop from where the previous ACK location was to the
1851 	 * current head location. This is how far the HV has
1852 	 * actually send pkts. Pkts between head and tail are
1853 	 * yet to be sent by HV.
1854 	 */
1855 	tx_head = ldcp->tx_ackd_head;
1856 	for (;;) {
1857 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1858 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1859 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1860 
1861 		if (pkt->seqid == msg->ackid) {
1862 			D2(ldcp->id,
1863 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1864 			    ldcp->id);
1865 			ldcp->last_ack_rcd = msg->ackid;
1866 			ldcp->tx_ackd_head = tx_head;
1867 			break;
1868 		}
1869 		if (tx_head == ldcp->tx_head) {
1870 			/* could not find packet */
1871 			DWARN(ldcp->id,
1872 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1873 			    ldcp->id);
1874 
1875 			/* Reset the channel -- as we cannot continue */
1876 			i_ldc_reset(ldcp, B_TRUE);
1877 			mutex_exit(&ldcp->tx_lock);
1878 			return (ECONNRESET);
1879 		}
1880 	}
1881 
1882 	mutex_exit(&ldcp->tx_lock);
1883 	return (0);
1884 }
1885 
1886 /*
1887  * Process incoming control message
1888  * Return 0 - session can continue
1889  *        EAGAIN - reprocess packet - state was changed
1890  *	  ECONNRESET - channel was reset
1891  */
1892 static int
1893 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1894 {
1895 	int 		rv = 0;
1896 
1897 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1898 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1899 
1900 	switch (ldcp->tstate & ~TS_IN_RESET) {
1901 
1902 	case TS_OPEN:
1903 	case TS_READY:
1904 
1905 		switch (msg->ctrl & LDC_CTRL_MASK) {
1906 		case LDC_VER:
1907 			/* process version message */
1908 			rv = i_ldc_process_VER(ldcp, msg);
1909 			break;
1910 		default:
1911 			DWARN(ldcp->id,
1912 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1913 			    "tstate=0x%x\n", ldcp->id,
1914 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1915 			break;
1916 		}
1917 
1918 		break;
1919 
1920 	case TS_VREADY:
1921 
1922 		switch (msg->ctrl & LDC_CTRL_MASK) {
1923 		case LDC_VER:
1924 			/* process version message */
1925 			rv = i_ldc_process_VER(ldcp, msg);
1926 			break;
1927 		case LDC_RTS:
1928 			/* process RTS message */
1929 			rv = i_ldc_process_RTS(ldcp, msg);
1930 			break;
1931 		case LDC_RTR:
1932 			/* process RTR message */
1933 			rv = i_ldc_process_RTR(ldcp, msg);
1934 			break;
1935 		case LDC_RDX:
1936 			/* process RDX message */
1937 			rv = i_ldc_process_RDX(ldcp, msg);
1938 			break;
1939 		default:
1940 			DWARN(ldcp->id,
1941 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1942 			    "tstate=0x%x\n", ldcp->id,
1943 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1944 			break;
1945 		}
1946 
1947 		break;
1948 
1949 	case TS_UP:
1950 
1951 		switch (msg->ctrl & LDC_CTRL_MASK) {
1952 		case LDC_VER:
1953 			DWARN(ldcp->id,
1954 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1955 			    "- LDC reset\n", ldcp->id);
1956 			/* peer is redoing version negotiation */
1957 			mutex_enter(&ldcp->tx_lock);
1958 			(void) i_ldc_txq_reconf(ldcp);
1959 			i_ldc_reset_state(ldcp);
1960 			mutex_exit(&ldcp->tx_lock);
1961 			rv = EAGAIN;
1962 			break;
1963 
1964 		case LDC_RDX:
1965 			/* process RDX message */
1966 			rv = i_ldc_process_RDX(ldcp, msg);
1967 			break;
1968 
1969 		default:
1970 			DWARN(ldcp->id,
1971 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1972 			    "tstate=0x%x\n", ldcp->id,
1973 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1974 			break;
1975 		}
1976 	}
1977 
1978 	return (rv);
1979 }
1980 
1981 /*
1982  * Register channel with the channel nexus
1983  */
1984 static int
1985 i_ldc_register_channel(ldc_chan_t *ldcp)
1986 {
1987 	int		rv = 0;
1988 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1989 
1990 	if (cinfo->dip == NULL) {
1991 		DWARN(ldcp->id,
1992 		    "i_ldc_register_channel: cnex has not registered\n");
1993 		return (EAGAIN);
1994 	}
1995 
1996 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1997 	if (rv) {
1998 		DWARN(ldcp->id,
1999 		    "i_ldc_register_channel: cannot register channel\n");
2000 		return (rv);
2001 	}
2002 
2003 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
2004 	    i_ldc_tx_hdlr, ldcp, NULL);
2005 	if (rv) {
2006 		DWARN(ldcp->id,
2007 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
2008 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
2009 		return (rv);
2010 	}
2011 
2012 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
2013 	    i_ldc_rx_hdlr, ldcp, NULL);
2014 	if (rv) {
2015 		DWARN(ldcp->id,
2016 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
2017 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2018 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
2019 		return (rv);
2020 	}
2021 
2022 	ldcp->tstate |= TS_CNEX_RDY;
2023 
2024 	return (0);
2025 }
2026 
2027 /*
2028  * Unregister a channel with the channel nexus
2029  */
2030 static int
2031 i_ldc_unregister_channel(ldc_chan_t *ldcp)
2032 {
2033 	int		rv = 0;
2034 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
2035 
2036 	if (cinfo->dip == NULL) {
2037 		DWARN(ldcp->id,
2038 		    "i_ldc_unregister_channel: cnex has not registered\n");
2039 		return (EAGAIN);
2040 	}
2041 
2042 	if (ldcp->tstate & TS_CNEX_RDY) {
2043 
2044 		/* Remove the Rx interrupt */
2045 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
2046 		if (rv) {
2047 			if (rv != EAGAIN) {
2048 				DWARN(ldcp->id,
2049 				    "i_ldc_unregister_channel: err removing "
2050 				    "Rx intr\n");
2051 				return (rv);
2052 			}
2053 
2054 			/*
2055 			 * If interrupts are pending and handler has
2056 			 * finished running, clear interrupt and try
2057 			 * again
2058 			 */
2059 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
2060 				return (rv);
2061 
2062 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2063 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
2064 			    CNEX_RX_INTR);
2065 			if (rv) {
2066 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
2067 				    "err removing Rx interrupt\n");
2068 				return (rv);
2069 			}
2070 		}
2071 
2072 		/* Remove the Tx interrupt */
2073 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2074 		if (rv) {
2075 			DWARN(ldcp->id,
2076 			    "i_ldc_unregister_channel: err removing Tx intr\n");
2077 			return (rv);
2078 		}
2079 
2080 		/* Unregister the channel */
2081 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2082 		if (rv) {
2083 			DWARN(ldcp->id,
2084 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2085 			return (rv);
2086 		}
2087 
2088 		ldcp->tstate &= ~TS_CNEX_RDY;
2089 	}
2090 
2091 	return (0);
2092 }
2093 
2094 
2095 /*
2096  * LDC transmit interrupt handler
2097  *    triggered for chanel up/down/reset events
2098  *    and Tx queue content changes
2099  */
2100 static uint_t
2101 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2102 {
2103 	_NOTE(ARGUNUSED(arg2))
2104 
2105 	int 		rv;
2106 	ldc_chan_t 	*ldcp;
2107 	boolean_t 	notify_client = B_FALSE;
2108 	uint64_t	notify_event = 0, link_state;
2109 
2110 	/* Get the channel for which interrupt was received */
2111 	ASSERT(arg1 != NULL);
2112 	ldcp = (ldc_chan_t *)arg1;
2113 
2114 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2115 	    ldcp->id, ldcp);
2116 
2117 	/* Lock channel */
2118 	mutex_enter(&ldcp->lock);
2119 
2120 	/* Obtain Tx lock */
2121 	mutex_enter(&ldcp->tx_lock);
2122 
2123 	/* mark interrupt as pending */
2124 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2125 
2126 	/* save current link state */
2127 	link_state = ldcp->link_state;
2128 
2129 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2130 	    &ldcp->link_state);
2131 	if (rv) {
2132 		cmn_err(CE_WARN,
2133 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2134 		    ldcp->id, rv);
2135 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2136 		mutex_exit(&ldcp->tx_lock);
2137 		mutex_exit(&ldcp->lock);
2138 		return (DDI_INTR_CLAIMED);
2139 	}
2140 
2141 	/*
2142 	 * reset the channel state if the channel went down
2143 	 * (other side unconfigured queue) or channel was reset
2144 	 * (other side reconfigured its queue)
2145 	 */
2146 	if (link_state != ldcp->link_state &&
2147 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2148 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2149 		i_ldc_reset(ldcp, B_FALSE);
2150 		notify_client = B_TRUE;
2151 		notify_event = LDC_EVT_DOWN;
2152 	}
2153 
2154 	if (link_state != ldcp->link_state &&
2155 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2156 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2157 		i_ldc_reset(ldcp, B_FALSE);
2158 		notify_client = B_TRUE;
2159 		notify_event = LDC_EVT_RESET;
2160 	}
2161 
2162 	if (link_state != ldcp->link_state &&
2163 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2164 	    ldcp->link_state == LDC_CHANNEL_UP) {
2165 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2166 		notify_client = B_TRUE;
2167 		notify_event = LDC_EVT_RESET;
2168 		ldcp->tstate |= TS_LINK_READY;
2169 		ldcp->status = LDC_READY;
2170 	}
2171 
2172 	/* if callbacks are disabled, do not notify */
2173 	if (!ldcp->cb_enabled)
2174 		notify_client = B_FALSE;
2175 
2176 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2177 	mutex_exit(&ldcp->tx_lock);
2178 
2179 	if (notify_client) {
2180 		ldcp->cb_inprogress = B_TRUE;
2181 		mutex_exit(&ldcp->lock);
2182 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2183 		if (rv) {
2184 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2185 			    "failure", ldcp->id);
2186 		}
2187 		mutex_enter(&ldcp->lock);
2188 		ldcp->cb_inprogress = B_FALSE;
2189 	}
2190 
2191 	mutex_exit(&ldcp->lock);
2192 
2193 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2194 
2195 	return (DDI_INTR_CLAIMED);
2196 }
2197 
2198 /*
2199  * Process the Rx HV queue.
2200  *
2201  * Returns 0 if data packets were found and no errors were encountered,
2202  * otherwise returns an error. In either case, the *notify argument is
2203  * set to indicate whether or not the client callback function should
2204  * be invoked. The *event argument is set to contain the callback event.
2205  *
2206  * Depending on the channel mode, packets are handled differently:
2207  *
2208  * RAW MODE
2209  * For raw mode channels, when a data packet is encountered,
2210  * processing stops and all packets are left on the queue to be removed
2211  * and processed by the ldc_read code path.
2212  *
2213  * UNRELIABLE MODE
2214  * For unreliable mode, when a data packet is encountered, processing
2215  * stops, and all packets are left on the queue to be removed and
2216  * processed by the ldc_read code path. Control packets are processed
2217  * inline if they are encountered before any data packets.
2218  *
2219  * RELIABLE MODE
2220  * For reliable mode channels, all packets on the receive queue
2221  * are processed: data packets are copied to the data queue and
2222  * control packets are processed inline. Packets are only left on
2223  * the receive queue when the data queue is full.
2224  */
2225 static uint_t
2226 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2227     uint64_t *notify_event)
2228 {
2229 	int		rv;
2230 	uint64_t 	rx_head, rx_tail;
2231 	ldc_msg_t 	*msg;
2232 	uint64_t	link_state, first_fragment = 0;
2233 	boolean_t	trace_length = B_TRUE;
2234 
2235 	ASSERT(MUTEX_HELD(&ldcp->lock));
2236 	*notify_client = B_FALSE;
2237 	*notify_event = 0;
2238 
2239 	/*
2240 	 * Read packet(s) from the queue
2241 	 */
2242 	for (;;) {
2243 
2244 		link_state = ldcp->link_state;
2245 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2246 		    &ldcp->link_state);
2247 		if (rv) {
2248 			cmn_err(CE_WARN,
2249 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2250 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2251 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2252 			return (EIO);
2253 		}
2254 
2255 		/*
2256 		 * reset the channel state if the channel went down
2257 		 * (other side unconfigured queue) or channel was reset
2258 		 * (other side reconfigured its queue)
2259 		 */
2260 
2261 		if (link_state != ldcp->link_state) {
2262 
2263 			switch (ldcp->link_state) {
2264 			case LDC_CHANNEL_DOWN:
2265 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2266 				    "link down\n", ldcp->id);
2267 				mutex_enter(&ldcp->tx_lock);
2268 				i_ldc_reset(ldcp, B_FALSE);
2269 				mutex_exit(&ldcp->tx_lock);
2270 				*notify_client = B_TRUE;
2271 				*notify_event = LDC_EVT_DOWN;
2272 				goto loop_exit;
2273 
2274 			case LDC_CHANNEL_UP:
2275 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2276 				    "channel link up\n", ldcp->id);
2277 
2278 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2279 					*notify_client = B_TRUE;
2280 					*notify_event = LDC_EVT_RESET;
2281 					ldcp->tstate |= TS_LINK_READY;
2282 					ldcp->status = LDC_READY;
2283 				}
2284 				break;
2285 
2286 			case LDC_CHANNEL_RESET:
2287 			default:
2288 #ifdef DEBUG
2289 force_reset:
2290 #endif
2291 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2292 				    "link reset\n", ldcp->id);
2293 				mutex_enter(&ldcp->tx_lock);
2294 				i_ldc_reset(ldcp, B_FALSE);
2295 				mutex_exit(&ldcp->tx_lock);
2296 				*notify_client = B_TRUE;
2297 				*notify_event = LDC_EVT_RESET;
2298 				break;
2299 			}
2300 		}
2301 
2302 #ifdef DEBUG
2303 		if (LDC_INJECT_RESET(ldcp))
2304 			goto force_reset;
2305 		if (LDC_INJECT_DRNGCLEAR(ldcp))
2306 			i_ldc_mem_inject_dring_clear(ldcp);
2307 #endif
2308 		if (trace_length) {
2309 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2310 			trace_length = B_FALSE;
2311 		}
2312 
2313 		if (rx_head == rx_tail) {
2314 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2315 			    "No packets\n", ldcp->id);
2316 			break;
2317 		}
2318 
2319 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2320 		    "tail=0x%llx\n", rx_head, rx_tail);
2321 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2322 		    ldcp->rx_q_va + rx_head);
2323 
2324 		/* get the message */
2325 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2326 
2327 		/* if channel is in RAW mode or data pkt, notify and return */
2328 		if (ldcp->mode == LDC_MODE_RAW) {
2329 			*notify_client = B_TRUE;
2330 			*notify_event |= LDC_EVT_READ;
2331 			break;
2332 		}
2333 
2334 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2335 
2336 			/* discard packet if channel is not up */
2337 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2338 
2339 				/* move the head one position */
2340 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2341 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2342 
2343 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2344 					break;
2345 
2346 				continue;
2347 			} else {
2348 				uint64_t dq_head, dq_tail;
2349 
2350 				/* process only RELIABLE mode data packets */
2351 				if (ldcp->mode != LDC_MODE_RELIABLE) {
2352 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2353 						*notify_client = B_TRUE;
2354 					*notify_event |= LDC_EVT_READ;
2355 					break;
2356 				}
2357 
2358 				/* don't process packet if queue full */
2359 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2360 				    &dq_tail, NULL);
2361 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2362 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2363 				if (dq_tail == dq_head ||
2364 				    LDC_INJECT_DQFULL(ldcp)) {
2365 					rv = ENOSPC;
2366 					break;
2367 				}
2368 			}
2369 		}
2370 
2371 		/* Check the sequence ID for the message received */
2372 		rv = i_ldc_check_seqid(ldcp, msg);
2373 		if (rv != 0) {
2374 
2375 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2376 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2377 			    rx_head, rx_tail);
2378 
2379 			/* Reset last_msg_rcd to start of message */
2380 			if (first_fragment != 0) {
2381 				ldcp->last_msg_rcd = first_fragment - 1;
2382 				first_fragment = 0;
2383 			}
2384 
2385 			/*
2386 			 * Send a NACK due to seqid mismatch
2387 			 */
2388 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2389 			    (msg->ctrl & LDC_CTRL_MASK));
2390 
2391 			if (rv) {
2392 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2393 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2394 				    ldcp->id);
2395 
2396 				/* if cannot send NACK - reset channel */
2397 				mutex_enter(&ldcp->tx_lock);
2398 				i_ldc_reset(ldcp, B_TRUE);
2399 				mutex_exit(&ldcp->tx_lock);
2400 
2401 				*notify_client = B_TRUE;
2402 				*notify_event = LDC_EVT_RESET;
2403 				break;
2404 			}
2405 
2406 			/* purge receive queue */
2407 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2408 			break;
2409 		}
2410 
2411 		/* record the message ID */
2412 		ldcp->last_msg_rcd = msg->seqid;
2413 
2414 		/* process control messages */
2415 		if (msg->type & LDC_CTRL) {
2416 			/* save current internal state */
2417 			uint64_t tstate = ldcp->tstate;
2418 
2419 			rv = i_ldc_ctrlmsg(ldcp, msg);
2420 			if (rv == EAGAIN) {
2421 				/* re-process pkt - state was adjusted */
2422 				continue;
2423 			}
2424 			if (rv == ECONNRESET) {
2425 				*notify_client = B_TRUE;
2426 				*notify_event = LDC_EVT_RESET;
2427 				break;
2428 			}
2429 
2430 			/*
2431 			 * control message processing was successful
2432 			 * channel transitioned to ready for communication
2433 			 */
2434 			if (rv == 0 && ldcp->tstate == TS_UP &&
2435 			    (tstate & ~TS_IN_RESET) !=
2436 			    (ldcp->tstate & ~TS_IN_RESET)) {
2437 				*notify_client = B_TRUE;
2438 				*notify_event = LDC_EVT_UP;
2439 			}
2440 		}
2441 
2442 		/* process data NACKs */
2443 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2444 			DWARN(ldcp->id,
2445 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2446 			    ldcp->id);
2447 			mutex_enter(&ldcp->tx_lock);
2448 			i_ldc_reset(ldcp, B_TRUE);
2449 			mutex_exit(&ldcp->tx_lock);
2450 			*notify_client = B_TRUE;
2451 			*notify_event = LDC_EVT_RESET;
2452 			break;
2453 		}
2454 
2455 		/* process data ACKs */
2456 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2457 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2458 				*notify_client = B_TRUE;
2459 				*notify_event = LDC_EVT_RESET;
2460 				break;
2461 			}
2462 		}
2463 
2464 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2465 			ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
2466 
2467 			/*
2468 			 * Copy the data packet to the data queue. Note
2469 			 * that the copy routine updates the rx_head pointer.
2470 			 */
2471 			i_ldc_rxdq_copy(ldcp, &rx_head);
2472 
2473 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2474 				*notify_client = B_TRUE;
2475 			*notify_event |= LDC_EVT_READ;
2476 		} else {
2477 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2478 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2479 		}
2480 
2481 		/* move the head one position */
2482 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2483 			*notify_client = B_TRUE;
2484 			*notify_event = LDC_EVT_RESET;
2485 			break;
2486 		}
2487 
2488 	} /* for */
2489 
2490 loop_exit:
2491 
2492 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2493 		/* ACK data packets */
2494 		if ((*notify_event &
2495 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2496 			int ack_rv;
2497 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2498 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2499 				cmn_err(CE_NOTE,
2500 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2501 				    "send ACK\n", ldcp->id);
2502 
2503 				mutex_enter(&ldcp->tx_lock);
2504 				i_ldc_reset(ldcp, B_FALSE);
2505 				mutex_exit(&ldcp->tx_lock);
2506 
2507 				*notify_client = B_TRUE;
2508 				*notify_event = LDC_EVT_RESET;
2509 				goto skip_ackpeek;
2510 			}
2511 		}
2512 
2513 		/*
2514 		 * If we have no more space on the data queue, make sure
2515 		 * there are no ACKs on the rx queue waiting to be processed.
2516 		 */
2517 		if (rv == ENOSPC) {
2518 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2519 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2520 				*notify_client = B_TRUE;
2521 				*notify_event = LDC_EVT_RESET;
2522 			}
2523 			return (rv);
2524 		} else {
2525 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2526 		}
2527 	}
2528 
2529 skip_ackpeek:
2530 
2531 	/* Return, indicating whether or not data packets were found */
2532 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2533 		return (0);
2534 
2535 	return (ENOMSG);
2536 }
2537 
2538 /*
2539  * Process any ACK packets on the HV receive queue.
2540  *
2541  * This function is only used by RELIABLE mode channels when the
2542  * secondary data queue fills up and there are packets remaining on
2543  * the HV receive queue.
2544  */
2545 int
2546 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2547 {
2548 	int		rv = 0;
2549 	ldc_msg_t	*msg;
2550 
2551 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2552 		ldcp->rx_ack_head = rx_head;
2553 
2554 	while (ldcp->rx_ack_head != rx_tail) {
2555 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2556 
2557 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2558 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2559 				break;
2560 			msg->stype &= ~LDC_ACK;
2561 		}
2562 
2563 		ldcp->rx_ack_head =
2564 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2565 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2566 	}
2567 	return (rv);
2568 }
2569 
2570 /* -------------------------------------------------------------------------- */
2571 
2572 /*
2573  * LDC API functions
2574  */
2575 
2576 /*
2577  * Initialize the channel. Allocate internal structure and memory for
2578  * TX/RX queues, and initialize locks.
2579  */
2580 int
2581 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2582 {
2583 	ldc_chan_t 	*ldcp;
2584 	int		rv, exit_val;
2585 	uint64_t	ra_base, nentries;
2586 	uint64_t	qlen;
2587 
2588 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2589 
2590 	if (attr == NULL) {
2591 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2592 		return (EINVAL);
2593 	}
2594 	if (handle == NULL) {
2595 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2596 		return (EINVAL);
2597 	}
2598 
2599 	/* check if channel is valid */
2600 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2601 	if (rv == H_ECHANNEL) {
2602 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2603 		return (EINVAL);
2604 	}
2605 
2606 	/* check if the channel has already been initialized */
2607 	mutex_enter(&ldcssp->lock);
2608 	ldcp = ldcssp->chan_list;
2609 	while (ldcp != NULL) {
2610 		if (ldcp->id == id) {
2611 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2612 			    id);
2613 			mutex_exit(&ldcssp->lock);
2614 			return (EADDRINUSE);
2615 		}
2616 		ldcp = ldcp->next;
2617 	}
2618 	mutex_exit(&ldcssp->lock);
2619 
2620 	ASSERT(ldcp == NULL);
2621 
2622 	*handle = 0;
2623 
2624 	/* Allocate an ldcp structure */
2625 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2626 
2627 	/*
2628 	 * Initialize the channel and Tx lock
2629 	 *
2630 	 * The channel 'lock' protects the entire channel and
2631 	 * should be acquired before initializing, resetting,
2632 	 * destroying or reading from a channel.
2633 	 *
2634 	 * The 'tx_lock' should be acquired prior to transmitting
2635 	 * data over the channel. The lock should also be acquired
2636 	 * prior to channel reconfiguration (in order to prevent
2637 	 * concurrent writes).
2638 	 *
2639 	 * ORDERING: When both locks are being acquired, to prevent
2640 	 * deadlocks, the channel lock should be always acquired prior
2641 	 * to the tx_lock.
2642 	 */
2643 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2644 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2645 
2646 	/* Initialize the channel */
2647 	ldcp->id = id;
2648 	ldcp->cb = NULL;
2649 	ldcp->cb_arg = NULL;
2650 	ldcp->cb_inprogress = B_FALSE;
2651 	ldcp->cb_enabled = B_FALSE;
2652 	ldcp->next = NULL;
2653 
2654 	/* Read attributes */
2655 	ldcp->mode = attr->mode;
2656 	ldcp->devclass = attr->devclass;
2657 	ldcp->devinst = attr->instance;
2658 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2659 
2660 	D1(ldcp->id,
2661 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2662 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2663 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2664 
2665 	ldcp->next_vidx = 0;
2666 	ldcp->tstate = TS_IN_RESET;
2667 	ldcp->hstate = 0;
2668 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2669 	ldcp->last_ack_rcd = 0;
2670 	ldcp->last_msg_rcd = 0;
2671 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2672 
2673 	ldcp->stream_bufferp = NULL;
2674 	ldcp->exp_dring_list = NULL;
2675 	ldcp->imp_dring_list = NULL;
2676 	ldcp->mhdl_list = NULL;
2677 
2678 	ldcp->tx_intr_state = LDC_INTR_NONE;
2679 	ldcp->rx_intr_state = LDC_INTR_NONE;
2680 
2681 	/* Initialize payload size depending on whether channel is reliable */
2682 	switch (ldcp->mode) {
2683 	case LDC_MODE_RAW:
2684 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2685 		ldcp->read_p = i_ldc_read_raw;
2686 		ldcp->write_p = i_ldc_write_raw;
2687 		break;
2688 	case LDC_MODE_UNRELIABLE:
2689 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2690 		ldcp->read_p = i_ldc_read_packet;
2691 		ldcp->write_p = i_ldc_write_packet;
2692 		break;
2693 	case LDC_MODE_RELIABLE:
2694 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2695 
2696 		ldcp->stream_remains = 0;
2697 		ldcp->stream_offset = 0;
2698 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2699 		ldcp->read_p = i_ldc_read_stream;
2700 		ldcp->write_p = i_ldc_write_stream;
2701 		break;
2702 	default:
2703 		exit_val = EINVAL;
2704 		goto cleanup_on_exit;
2705 	}
2706 
2707 	/*
2708 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2709 	 * value is smaller than default length of ldc_queue_entries,
2710 	 * qlen is set to ldc_queue_entries. Ensure that computed
2711 	 * length is a power-of-two value.
2712 	 */
2713 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2714 	if (!ISP2(qlen)) {
2715 		uint64_t	tmp = 1;
2716 		while (qlen) {
2717 			qlen >>= 1; tmp <<= 1;
2718 		}
2719 		qlen = tmp;
2720 	}
2721 
2722 	ldcp->rx_q_entries =
2723 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2724 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2725 
2726 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2727 
2728 	/* Create a transmit queue */
2729 	ldcp->tx_q_va = (uint64_t)
2730 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2731 	if (ldcp->tx_q_va == NULL) {
2732 		cmn_err(CE_WARN,
2733 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2734 		    ldcp->id);
2735 		exit_val = ENOMEM;
2736 		goto cleanup_on_exit;
2737 	}
2738 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2739 
2740 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2741 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2742 
2743 	ldcp->tstate |= TS_TXQ_RDY;
2744 
2745 	/* Create a receive queue */
2746 	ldcp->rx_q_va = (uint64_t)
2747 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2748 	if (ldcp->rx_q_va == NULL) {
2749 		cmn_err(CE_WARN,
2750 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2751 		    ldcp->id);
2752 		exit_val = ENOMEM;
2753 		goto cleanup_on_exit;
2754 	}
2755 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2756 
2757 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2758 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2759 
2760 	ldcp->tstate |= TS_RXQ_RDY;
2761 
2762 	/* Setup a separate read data queue */
2763 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2764 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2765 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2766 
2767 		/* Make sure the data queue multiplier is a power of 2 */
2768 		if (!ISP2(ldc_rxdq_multiplier)) {
2769 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2770 			    "not a power of 2, resetting", ldcp->id);
2771 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2772 		}
2773 
2774 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2775 		ldcp->rx_dq_va = (uint64_t)
2776 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2777 		    KM_SLEEP);
2778 		if (ldcp->rx_dq_va == NULL) {
2779 			cmn_err(CE_WARN,
2780 			    "ldc_init: (0x%lx) RX data queue "
2781 			    "allocation failed\n", ldcp->id);
2782 			exit_val = ENOMEM;
2783 			goto cleanup_on_exit;
2784 		}
2785 
2786 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2787 
2788 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2789 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2790 		    ldcp->rx_dq_entries);
2791 	} else {
2792 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2793 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2794 	}
2795 
2796 	/* Init descriptor ring and memory handle list lock */
2797 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2798 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2799 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2800 
2801 	/* mark status as INITialized */
2802 	ldcp->status = LDC_INIT;
2803 
2804 	/* Add to channel list */
2805 	mutex_enter(&ldcssp->lock);
2806 	ldcp->next = ldcssp->chan_list;
2807 	ldcssp->chan_list = ldcp;
2808 	ldcssp->channel_count++;
2809 	mutex_exit(&ldcssp->lock);
2810 
2811 	/* set the handle */
2812 	*handle = (ldc_handle_t)ldcp;
2813 
2814 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2815 
2816 	return (0);
2817 
2818 cleanup_on_exit:
2819 
2820 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2821 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2822 
2823 	if (ldcp->tstate & TS_TXQ_RDY)
2824 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2825 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2826 
2827 	if (ldcp->tstate & TS_RXQ_RDY)
2828 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2829 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2830 
2831 	mutex_destroy(&ldcp->tx_lock);
2832 	mutex_destroy(&ldcp->lock);
2833 
2834 	if (ldcp)
2835 		kmem_free(ldcp, sizeof (ldc_chan_t));
2836 
2837 	return (exit_val);
2838 }
2839 
2840 /*
2841  * Finalizes the LDC connection. It will return EBUSY if the
2842  * channel is open. A ldc_close() has to be done prior to
2843  * a ldc_fini operation. It frees TX/RX queues, associated
2844  * with the channel
2845  */
2846 int
2847 ldc_fini(ldc_handle_t handle)
2848 {
2849 	ldc_chan_t 	*ldcp;
2850 	ldc_chan_t 	*tmp_ldcp;
2851 	uint64_t 	id;
2852 
2853 	if (handle == NULL) {
2854 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2855 		return (EINVAL);
2856 	}
2857 	ldcp = (ldc_chan_t *)handle;
2858 	id = ldcp->id;
2859 
2860 	mutex_enter(&ldcp->lock);
2861 
2862 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2863 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2864 		    ldcp->id);
2865 		mutex_exit(&ldcp->lock);
2866 		return (EBUSY);
2867 	}
2868 
2869 	/* Remove from the channel list */
2870 	mutex_enter(&ldcssp->lock);
2871 	tmp_ldcp = ldcssp->chan_list;
2872 	if (tmp_ldcp == ldcp) {
2873 		ldcssp->chan_list = ldcp->next;
2874 		ldcp->next = NULL;
2875 	} else {
2876 		while (tmp_ldcp != NULL) {
2877 			if (tmp_ldcp->next == ldcp) {
2878 				tmp_ldcp->next = ldcp->next;
2879 				ldcp->next = NULL;
2880 				break;
2881 			}
2882 			tmp_ldcp = tmp_ldcp->next;
2883 		}
2884 		if (tmp_ldcp == NULL) {
2885 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2886 			mutex_exit(&ldcssp->lock);
2887 			mutex_exit(&ldcp->lock);
2888 			return (EINVAL);
2889 		}
2890 	}
2891 
2892 	ldcssp->channel_count--;
2893 
2894 	mutex_exit(&ldcssp->lock);
2895 
2896 	/* Free the map table for this channel */
2897 	if (ldcp->mtbl) {
2898 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2899 		if (ldcp->mtbl->contigmem)
2900 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2901 		else
2902 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2903 		mutex_destroy(&ldcp->mtbl->lock);
2904 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2905 	}
2906 
2907 	/* Destroy descriptor ring and memory handle list lock */
2908 	mutex_destroy(&ldcp->exp_dlist_lock);
2909 	mutex_destroy(&ldcp->imp_dlist_lock);
2910 	mutex_destroy(&ldcp->mlist_lock);
2911 
2912 	/* Free the stream buffer for RELIABLE_MODE */
2913 	if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp)
2914 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2915 
2916 	/* Free the RX queue */
2917 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2918 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2919 	ldcp->tstate &= ~TS_RXQ_RDY;
2920 
2921 	/* Free the RX data queue */
2922 	if (ldcp->mode == LDC_MODE_RELIABLE) {
2923 		kmem_free((caddr_t)ldcp->rx_dq_va,
2924 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2925 	}
2926 
2927 	/* Free the TX queue */
2928 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2929 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2930 	ldcp->tstate &= ~TS_TXQ_RDY;
2931 
2932 	mutex_exit(&ldcp->lock);
2933 
2934 	/* Destroy mutex */
2935 	mutex_destroy(&ldcp->tx_lock);
2936 	mutex_destroy(&ldcp->lock);
2937 
2938 	/* free channel structure */
2939 	kmem_free(ldcp, sizeof (ldc_chan_t));
2940 
2941 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2942 
2943 	return (0);
2944 }
2945 
2946 /*
2947  * Open the LDC channel for use. It registers the TX/RX queues
2948  * with the Hypervisor. It also specifies the interrupt number
2949  * and target CPU for this channel
2950  */
2951 int
2952 ldc_open(ldc_handle_t handle)
2953 {
2954 	ldc_chan_t 	*ldcp;
2955 	int 		rv;
2956 
2957 	if (handle == NULL) {
2958 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2959 		return (EINVAL);
2960 	}
2961 
2962 	ldcp = (ldc_chan_t *)handle;
2963 
2964 	mutex_enter(&ldcp->lock);
2965 
2966 	if (ldcp->tstate < TS_INIT) {
2967 		DWARN(ldcp->id,
2968 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2969 		mutex_exit(&ldcp->lock);
2970 		return (EFAULT);
2971 	}
2972 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2973 		DWARN(ldcp->id,
2974 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2975 		mutex_exit(&ldcp->lock);
2976 		return (EFAULT);
2977 	}
2978 
2979 	/*
2980 	 * Unregister/Register the tx queue with the hypervisor
2981 	 */
2982 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2983 	if (rv) {
2984 		cmn_err(CE_WARN,
2985 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2986 		    ldcp->id);
2987 		mutex_exit(&ldcp->lock);
2988 		return (EIO);
2989 	}
2990 
2991 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2992 	if (rv) {
2993 		cmn_err(CE_WARN,
2994 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2995 		    ldcp->id);
2996 		mutex_exit(&ldcp->lock);
2997 		return (EIO);
2998 	}
2999 
3000 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
3001 	    ldcp->id);
3002 
3003 	/*
3004 	 * Unregister/Register the rx queue with the hypervisor
3005 	 */
3006 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3007 	if (rv) {
3008 		cmn_err(CE_WARN,
3009 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
3010 		    ldcp->id);
3011 		mutex_exit(&ldcp->lock);
3012 		return (EIO);
3013 	}
3014 
3015 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
3016 	if (rv) {
3017 		cmn_err(CE_WARN,
3018 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
3019 		    ldcp->id);
3020 		mutex_exit(&ldcp->lock);
3021 		return (EIO);
3022 	}
3023 
3024 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
3025 	    ldcp->id);
3026 
3027 	ldcp->tstate |= TS_QCONF_RDY;
3028 
3029 	/* Register the channel with the channel nexus */
3030 	rv = i_ldc_register_channel(ldcp);
3031 	if (rv && rv != EAGAIN) {
3032 		cmn_err(CE_WARN,
3033 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
3034 		ldcp->tstate &= ~TS_QCONF_RDY;
3035 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3036 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3037 		mutex_exit(&ldcp->lock);
3038 		return (EIO);
3039 	}
3040 
3041 	/* mark channel in OPEN state */
3042 	ldcp->status = LDC_OPEN;
3043 
3044 	/* Read channel state */
3045 	rv = hv_ldc_tx_get_state(ldcp->id,
3046 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3047 	if (rv) {
3048 		cmn_err(CE_WARN,
3049 		    "ldc_open: (0x%lx) cannot read channel state\n",
3050 		    ldcp->id);
3051 		(void) i_ldc_unregister_channel(ldcp);
3052 		ldcp->tstate &= ~TS_QCONF_RDY;
3053 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3054 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3055 		mutex_exit(&ldcp->lock);
3056 		return (EIO);
3057 	}
3058 
3059 	/*
3060 	 * set the ACKd head to current head location for reliable
3061 	 */
3062 	ldcp->tx_ackd_head = ldcp->tx_head;
3063 
3064 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
3065 	if (ldcp->link_state == LDC_CHANNEL_UP ||
3066 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3067 		ldcp->tstate |= TS_LINK_READY;
3068 		ldcp->status = LDC_READY;
3069 	}
3070 
3071 	/*
3072 	 * if channel is being opened in RAW mode - no handshake is needed
3073 	 * switch the channel READY and UP state
3074 	 */
3075 	if (ldcp->mode == LDC_MODE_RAW) {
3076 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
3077 		ldcp->status = LDC_UP;
3078 	}
3079 
3080 	mutex_exit(&ldcp->lock);
3081 
3082 	/*
3083 	 * Increment number of open channels
3084 	 */
3085 	mutex_enter(&ldcssp->lock);
3086 	ldcssp->channels_open++;
3087 	mutex_exit(&ldcssp->lock);
3088 
3089 	D1(ldcp->id,
3090 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3091 	    "(tstate=0x%x, status=0x%x)\n",
3092 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3093 
3094 	return (0);
3095 }
3096 
3097 /*
3098  * Close the LDC connection. It will return EBUSY if there
3099  * are memory segments or descriptor rings either bound to or
3100  * mapped over the channel
3101  */
3102 int
3103 ldc_close(ldc_handle_t handle)
3104 {
3105 	ldc_chan_t 	*ldcp;
3106 	int		rv = 0, retries = 0;
3107 	boolean_t	chk_done = B_FALSE;
3108 
3109 	if (handle == NULL) {
3110 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3111 		return (EINVAL);
3112 	}
3113 	ldcp = (ldc_chan_t *)handle;
3114 
3115 	mutex_enter(&ldcp->lock);
3116 
3117 	/* return error if channel is not open */
3118 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3119 		DWARN(ldcp->id,
3120 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3121 		mutex_exit(&ldcp->lock);
3122 		return (EFAULT);
3123 	}
3124 
3125 	/* if any memory handles, drings, are bound or mapped cannot close */
3126 	if (ldcp->mhdl_list != NULL) {
3127 		DWARN(ldcp->id,
3128 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3129 		    ldcp->id);
3130 		mutex_exit(&ldcp->lock);
3131 		return (EBUSY);
3132 	}
3133 	if (ldcp->exp_dring_list != NULL) {
3134 		DWARN(ldcp->id,
3135 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3136 		    ldcp->id);
3137 		mutex_exit(&ldcp->lock);
3138 		return (EBUSY);
3139 	}
3140 	if (ldcp->imp_dring_list != NULL) {
3141 		DWARN(ldcp->id,
3142 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3143 		    ldcp->id);
3144 		mutex_exit(&ldcp->lock);
3145 		return (EBUSY);
3146 	}
3147 
3148 	if (ldcp->cb_inprogress) {
3149 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3150 		    ldcp->id);
3151 		mutex_exit(&ldcp->lock);
3152 		return (EWOULDBLOCK);
3153 	}
3154 
3155 	/* Obtain Tx lock */
3156 	mutex_enter(&ldcp->tx_lock);
3157 
3158 	/*
3159 	 * Wait for pending transmits to complete i.e Tx queue to drain
3160 	 * if there are pending pkts - wait 1 ms and retry again
3161 	 */
3162 	for (;;) {
3163 
3164 		rv = hv_ldc_tx_get_state(ldcp->id,
3165 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3166 		if (rv) {
3167 			cmn_err(CE_WARN,
3168 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3169 			mutex_exit(&ldcp->tx_lock);
3170 			mutex_exit(&ldcp->lock);
3171 			return (EIO);
3172 		}
3173 
3174 		if (ldcp->tx_head == ldcp->tx_tail ||
3175 		    ldcp->link_state != LDC_CHANNEL_UP) {
3176 			break;
3177 		}
3178 
3179 		if (chk_done) {
3180 			DWARN(ldcp->id,
3181 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3182 			    ldcp->id);
3183 			break;
3184 		}
3185 
3186 		/* wait for one ms and try again */
3187 		delay(drv_usectohz(1000));
3188 		chk_done = B_TRUE;
3189 	}
3190 
3191 	/*
3192 	 * Drain the Tx and Rx queues as we are closing the
3193 	 * channel. We dont care about any pending packets.
3194 	 * We have to also drain the queue prior to clearing
3195 	 * pending interrupts, otherwise the HV will trigger
3196 	 * an interrupt the moment the interrupt state is
3197 	 * cleared.
3198 	 */
3199 	(void) i_ldc_txq_reconf(ldcp);
3200 	i_ldc_rxq_drain(ldcp);
3201 
3202 	/*
3203 	 * Unregister the channel with the nexus
3204 	 */
3205 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3206 
3207 		mutex_exit(&ldcp->tx_lock);
3208 		mutex_exit(&ldcp->lock);
3209 
3210 		/* if any error other than EAGAIN return back */
3211 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3212 			cmn_err(CE_WARN,
3213 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3214 			    ldcp->id, rv);
3215 			return (rv);
3216 		}
3217 
3218 		/*
3219 		 * As there could be pending interrupts we need
3220 		 * to wait and try again
3221 		 */
3222 		drv_usecwait(ldc_close_delay);
3223 		mutex_enter(&ldcp->lock);
3224 		mutex_enter(&ldcp->tx_lock);
3225 		retries++;
3226 	}
3227 
3228 	ldcp->tstate &= ~TS_QCONF_RDY;
3229 
3230 	/*
3231 	 * Unregister queues
3232 	 */
3233 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3234 	if (rv) {
3235 		cmn_err(CE_WARN,
3236 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3237 		    ldcp->id);
3238 		mutex_exit(&ldcp->tx_lock);
3239 		mutex_exit(&ldcp->lock);
3240 		return (EIO);
3241 	}
3242 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3243 	if (rv) {
3244 		cmn_err(CE_WARN,
3245 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3246 		    ldcp->id);
3247 		mutex_exit(&ldcp->tx_lock);
3248 		mutex_exit(&ldcp->lock);
3249 		return (EIO);
3250 	}
3251 
3252 	/* Reset channel state information */
3253 	i_ldc_reset_state(ldcp);
3254 
3255 	/* Mark channel as down and in initialized state */
3256 	ldcp->tx_ackd_head = 0;
3257 	ldcp->tx_head = 0;
3258 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3259 	ldcp->status = LDC_INIT;
3260 
3261 	mutex_exit(&ldcp->tx_lock);
3262 	mutex_exit(&ldcp->lock);
3263 
3264 	/* Decrement number of open channels */
3265 	mutex_enter(&ldcssp->lock);
3266 	ldcssp->channels_open--;
3267 	mutex_exit(&ldcssp->lock);
3268 
3269 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3270 
3271 	return (0);
3272 }
3273 
3274 /*
3275  * Register channel callback
3276  */
3277 int
3278 ldc_reg_callback(ldc_handle_t handle,
3279     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3280 {
3281 	ldc_chan_t *ldcp;
3282 
3283 	if (handle == NULL) {
3284 		DWARN(DBG_ALL_LDCS,
3285 		    "ldc_reg_callback: invalid channel handle\n");
3286 		return (EINVAL);
3287 	}
3288 	if (((uint64_t)cb) < KERNELBASE) {
3289 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3290 		return (EINVAL);
3291 	}
3292 	ldcp = (ldc_chan_t *)handle;
3293 
3294 	mutex_enter(&ldcp->lock);
3295 
3296 	if (ldcp->cb) {
3297 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3298 		    ldcp->id);
3299 		mutex_exit(&ldcp->lock);
3300 		return (EIO);
3301 	}
3302 	if (ldcp->cb_inprogress) {
3303 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3304 		    ldcp->id);
3305 		mutex_exit(&ldcp->lock);
3306 		return (EWOULDBLOCK);
3307 	}
3308 
3309 	ldcp->cb = cb;
3310 	ldcp->cb_arg = arg;
3311 	ldcp->cb_enabled = B_TRUE;
3312 
3313 	D1(ldcp->id,
3314 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3315 	    ldcp->id);
3316 
3317 	mutex_exit(&ldcp->lock);
3318 
3319 	return (0);
3320 }
3321 
3322 /*
3323  * Unregister channel callback
3324  */
3325 int
3326 ldc_unreg_callback(ldc_handle_t handle)
3327 {
3328 	ldc_chan_t *ldcp;
3329 
3330 	if (handle == NULL) {
3331 		DWARN(DBG_ALL_LDCS,
3332 		    "ldc_unreg_callback: invalid channel handle\n");
3333 		return (EINVAL);
3334 	}
3335 	ldcp = (ldc_chan_t *)handle;
3336 
3337 	mutex_enter(&ldcp->lock);
3338 
3339 	if (ldcp->cb == NULL) {
3340 		DWARN(ldcp->id,
3341 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3342 		    ldcp->id);
3343 		mutex_exit(&ldcp->lock);
3344 		return (EIO);
3345 	}
3346 	if (ldcp->cb_inprogress) {
3347 		DWARN(ldcp->id,
3348 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3349 		    ldcp->id);
3350 		mutex_exit(&ldcp->lock);
3351 		return (EWOULDBLOCK);
3352 	}
3353 
3354 	ldcp->cb = NULL;
3355 	ldcp->cb_arg = NULL;
3356 	ldcp->cb_enabled = B_FALSE;
3357 
3358 	D1(ldcp->id,
3359 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3360 	    ldcp->id);
3361 
3362 	mutex_exit(&ldcp->lock);
3363 
3364 	return (0);
3365 }
3366 
3367 
3368 /*
3369  * Bring a channel up by initiating a handshake with the peer
3370  * This call is asynchronous. It will complete at a later point
3371  * in time when the peer responds back with an RTR.
3372  */
3373 int
3374 ldc_up(ldc_handle_t handle)
3375 {
3376 	int 		rv;
3377 	ldc_chan_t 	*ldcp;
3378 	ldc_msg_t 	*ldcmsg;
3379 	uint64_t 	tx_tail, tstate, link_state;
3380 
3381 	if (handle == NULL) {
3382 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3383 		return (EINVAL);
3384 	}
3385 	ldcp = (ldc_chan_t *)handle;
3386 
3387 	mutex_enter(&ldcp->lock);
3388 
3389 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3390 
3391 	/* clear the reset state */
3392 	tstate = ldcp->tstate;
3393 	ldcp->tstate &= ~TS_IN_RESET;
3394 
3395 	if (ldcp->tstate == TS_UP) {
3396 		DWARN(ldcp->id,
3397 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3398 		    ldcp->id);
3399 
3400 		/* mark channel as up */
3401 		ldcp->status = LDC_UP;
3402 
3403 		/*
3404 		 * if channel was in reset state and there was
3405 		 * pending data clear interrupt state. this will
3406 		 * trigger an interrupt, causing the RX handler to
3407 		 * to invoke the client's callback
3408 		 */
3409 		if ((tstate & TS_IN_RESET) &&
3410 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3411 			D1(ldcp->id,
3412 			    "ldc_up: (0x%llx) channel has pending data, "
3413 			    "clearing interrupt\n", ldcp->id);
3414 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3415 		}
3416 
3417 		mutex_exit(&ldcp->lock);
3418 		return (0);
3419 	}
3420 
3421 	/* if the channel is in RAW mode - mark it as UP, if READY */
3422 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3423 		ldcp->tstate = TS_UP;
3424 		mutex_exit(&ldcp->lock);
3425 		return (0);
3426 	}
3427 
3428 	/* Don't start another handshake if there is one in progress */
3429 	if (ldcp->hstate) {
3430 		D1(ldcp->id,
3431 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3432 		    ldcp->id);
3433 		mutex_exit(&ldcp->lock);
3434 		return (0);
3435 	}
3436 
3437 	mutex_enter(&ldcp->tx_lock);
3438 
3439 	/* save current link state */
3440 	link_state = ldcp->link_state;
3441 
3442 	/* get the current tail for the LDC msg */
3443 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3444 	if (rv) {
3445 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3446 		    ldcp->id);
3447 		mutex_exit(&ldcp->tx_lock);
3448 		mutex_exit(&ldcp->lock);
3449 		return (ECONNREFUSED);
3450 	}
3451 
3452 	/*
3453 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3454 	 * from a previous state of DOWN, then mark the channel as
3455 	 * being ready for handshake.
3456 	 */
3457 	if ((link_state == LDC_CHANNEL_DOWN) &&
3458 	    (link_state != ldcp->link_state)) {
3459 
3460 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3461 		    (ldcp->link_state == LDC_CHANNEL_UP));
3462 
3463 		if (ldcp->mode == LDC_MODE_RAW) {
3464 			ldcp->status = LDC_UP;
3465 			ldcp->tstate = TS_UP;
3466 			mutex_exit(&ldcp->tx_lock);
3467 			mutex_exit(&ldcp->lock);
3468 			return (0);
3469 		} else {
3470 			ldcp->status = LDC_READY;
3471 			ldcp->tstate |= TS_LINK_READY;
3472 		}
3473 
3474 	}
3475 
3476 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3477 	ZERO_PKT(ldcmsg);
3478 
3479 	ldcmsg->type = LDC_CTRL;
3480 	ldcmsg->stype = LDC_INFO;
3481 	ldcmsg->ctrl = LDC_VER;
3482 	ldcp->next_vidx = 0;
3483 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3484 
3485 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3486 
3487 	/* initiate the send by calling into HV and set the new tail */
3488 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3489 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3490 
3491 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3492 	if (rv) {
3493 		DWARN(ldcp->id,
3494 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3495 		    ldcp->id, rv);
3496 		mutex_exit(&ldcp->tx_lock);
3497 		mutex_exit(&ldcp->lock);
3498 		return (rv);
3499 	}
3500 
3501 	ldcp->hstate |= TS_SENT_VER;
3502 	ldcp->tx_tail = tx_tail;
3503 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3504 
3505 	mutex_exit(&ldcp->tx_lock);
3506 	mutex_exit(&ldcp->lock);
3507 
3508 	return (rv);
3509 }
3510 
3511 
3512 /*
3513  * Bring a channel down by resetting its state and queues
3514  */
3515 int
3516 ldc_down(ldc_handle_t handle)
3517 {
3518 	ldc_chan_t 	*ldcp;
3519 
3520 	if (handle == NULL) {
3521 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3522 		return (EINVAL);
3523 	}
3524 	ldcp = (ldc_chan_t *)handle;
3525 	mutex_enter(&ldcp->lock);
3526 	mutex_enter(&ldcp->tx_lock);
3527 	i_ldc_reset(ldcp, B_TRUE);
3528 	mutex_exit(&ldcp->tx_lock);
3529 	mutex_exit(&ldcp->lock);
3530 
3531 	return (0);
3532 }
3533 
3534 /*
3535  * Get the current channel status
3536  */
3537 int
3538 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3539 {
3540 	ldc_chan_t *ldcp;
3541 
3542 	if (handle == NULL || status == NULL) {
3543 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3544 		return (EINVAL);
3545 	}
3546 	ldcp = (ldc_chan_t *)handle;
3547 
3548 	*status = ((ldc_chan_t *)handle)->status;
3549 
3550 	D1(ldcp->id,
3551 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3552 	return (0);
3553 }
3554 
3555 
3556 /*
3557  * Set the channel's callback mode - enable/disable callbacks
3558  */
3559 int
3560 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3561 {
3562 	ldc_chan_t 	*ldcp;
3563 
3564 	if (handle == NULL) {
3565 		DWARN(DBG_ALL_LDCS,
3566 		    "ldc_set_intr_mode: invalid channel handle\n");
3567 		return (EINVAL);
3568 	}
3569 	ldcp = (ldc_chan_t *)handle;
3570 
3571 	/*
3572 	 * Record no callbacks should be invoked
3573 	 */
3574 	mutex_enter(&ldcp->lock);
3575 
3576 	switch (cmode) {
3577 	case LDC_CB_DISABLE:
3578 		if (!ldcp->cb_enabled) {
3579 			DWARN(ldcp->id,
3580 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3581 			    ldcp->id);
3582 			break;
3583 		}
3584 		ldcp->cb_enabled = B_FALSE;
3585 
3586 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3587 		    ldcp->id);
3588 		break;
3589 
3590 	case LDC_CB_ENABLE:
3591 		if (ldcp->cb_enabled) {
3592 			DWARN(ldcp->id,
3593 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3594 			    ldcp->id);
3595 			break;
3596 		}
3597 		ldcp->cb_enabled = B_TRUE;
3598 
3599 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3600 		    ldcp->id);
3601 		break;
3602 	}
3603 
3604 	mutex_exit(&ldcp->lock);
3605 
3606 	return (0);
3607 }
3608 
3609 /*
3610  * Check to see if there are packets on the incoming queue
3611  * Will return hasdata = B_FALSE if there are no packets
3612  */
3613 int
3614 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3615 {
3616 	int 		rv;
3617 	uint64_t 	rx_head, rx_tail;
3618 	ldc_chan_t 	*ldcp;
3619 
3620 	if (handle == NULL) {
3621 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3622 		return (EINVAL);
3623 	}
3624 	ldcp = (ldc_chan_t *)handle;
3625 
3626 	*hasdata = B_FALSE;
3627 
3628 	mutex_enter(&ldcp->lock);
3629 
3630 	if (ldcp->tstate != TS_UP) {
3631 		D1(ldcp->id,
3632 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3633 		mutex_exit(&ldcp->lock);
3634 		return (ECONNRESET);
3635 	}
3636 
3637 	/* Read packet(s) from the queue */
3638 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3639 	    &ldcp->link_state);
3640 	if (rv != 0) {
3641 		cmn_err(CE_WARN,
3642 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3643 		mutex_exit(&ldcp->lock);
3644 		return (EIO);
3645 	}
3646 
3647 	/* reset the channel state if the channel went down */
3648 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3649 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3650 		mutex_enter(&ldcp->tx_lock);
3651 		i_ldc_reset(ldcp, B_FALSE);
3652 		mutex_exit(&ldcp->tx_lock);
3653 		mutex_exit(&ldcp->lock);
3654 		return (ECONNRESET);
3655 	}
3656 
3657 	switch (ldcp->mode) {
3658 	case LDC_MODE_RAW:
3659 		/*
3660 		 * In raw mode, there are no ctrl packets, so checking
3661 		 * if the queue is non-empty is sufficient.
3662 		 */
3663 		*hasdata = (rx_head != rx_tail);
3664 		break;
3665 
3666 	case LDC_MODE_UNRELIABLE:
3667 		/*
3668 		 * In unreliable mode, if the queue is non-empty, we need
3669 		 * to check if it actually contains unread data packets.
3670 		 * The queue may just contain ctrl packets.
3671 		 */
3672 		if (rx_head != rx_tail) {
3673 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3674 			/*
3675 			 * If no data packets were found on the queue,
3676 			 * all packets must have been control packets
3677 			 * which will now have been processed, leaving
3678 			 * the queue empty. If the interrupt state
3679 			 * is pending, we need to clear the interrupt
3680 			 * here.
3681 			 */
3682 			if (*hasdata == B_FALSE &&
3683 			    ldcp->rx_intr_state == LDC_INTR_PEND) {
3684 				i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3685 			}
3686 		}
3687 		break;
3688 
3689 	case LDC_MODE_RELIABLE:
3690 		/*
3691 		 * In reliable mode, first check for 'stream_remains' > 0.
3692 		 * Otherwise, if the data queue head and tail pointers
3693 		 * differ, there must be data to read.
3694 		 */
3695 		if (ldcp->stream_remains > 0)
3696 			*hasdata = B_TRUE;
3697 		else
3698 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3699 		break;
3700 
3701 	default:
3702 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3703 		    "(0x%x)", ldcp->id, ldcp->mode);
3704 		mutex_exit(&ldcp->lock);
3705 		return (EIO);
3706 	}
3707 
3708 	mutex_exit(&ldcp->lock);
3709 
3710 	return (0);
3711 }
3712 
3713 
3714 /*
3715  * Read 'size' amount of bytes or less. If incoming buffer
3716  * is more than 'size', ENOBUFS is returned.
3717  *
3718  * On return, size contains the number of bytes read.
3719  */
3720 int
3721 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3722 {
3723 	ldc_chan_t 	*ldcp;
3724 	uint64_t 	rx_head = 0, rx_tail = 0;
3725 	int		rv = 0, exit_val;
3726 
3727 	if (handle == NULL) {
3728 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3729 		return (EINVAL);
3730 	}
3731 
3732 	ldcp = (ldc_chan_t *)handle;
3733 
3734 	/* channel lock */
3735 	mutex_enter(&ldcp->lock);
3736 
3737 	if (ldcp->tstate != TS_UP) {
3738 		DWARN(ldcp->id,
3739 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3740 		    ldcp->id);
3741 		exit_val = ECONNRESET;
3742 	} else if (ldcp->mode == LDC_MODE_RELIABLE) {
3743 		TRACE_RXDQ_LENGTH(ldcp);
3744 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3745 
3746 		/*
3747 		 * For reliable mode channels, the interrupt
3748 		 * state is only set to pending during
3749 		 * interrupt handling when the secondary data
3750 		 * queue became full, leaving unprocessed
3751 		 * packets on the Rx queue. If the interrupt
3752 		 * state is pending and space is now available
3753 		 * on the data queue, clear the interrupt.
3754 		 */
3755 		if (ldcp->rx_intr_state == LDC_INTR_PEND &&
3756 		    Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
3757 		    ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >=
3758 		    LDC_PACKET_SIZE) {
3759 			/* data queue is not full */
3760 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3761 		}
3762 
3763 		mutex_exit(&ldcp->lock);
3764 		return (exit_val);
3765 	} else {
3766 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3767 	}
3768 
3769 	/*
3770 	 * if queue has been drained - clear interrupt
3771 	 */
3772 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3773 	    &ldcp->link_state);
3774 	if (rv != 0) {
3775 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3776 		    ldcp->id);
3777 		mutex_enter(&ldcp->tx_lock);
3778 		i_ldc_reset(ldcp, B_TRUE);
3779 		mutex_exit(&ldcp->tx_lock);
3780 		mutex_exit(&ldcp->lock);
3781 		return (ECONNRESET);
3782 	}
3783 
3784 	if (exit_val == 0) {
3785 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3786 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3787 			mutex_enter(&ldcp->tx_lock);
3788 			i_ldc_reset(ldcp, B_FALSE);
3789 			exit_val = ECONNRESET;
3790 			mutex_exit(&ldcp->tx_lock);
3791 		}
3792 		if ((rv == 0) &&
3793 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3794 		    (rx_head == rx_tail)) {
3795 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3796 		}
3797 	}
3798 
3799 	mutex_exit(&ldcp->lock);
3800 	return (exit_val);
3801 }
3802 
3803 /*
3804  * Basic raw mondo read -
3805  * no interpretation of mondo contents at all.
3806  *
3807  * Enter and exit with ldcp->lock held by caller
3808  */
3809 static int
3810 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3811 {
3812 	uint64_t 	q_size_mask;
3813 	ldc_msg_t 	*msgp;
3814 	uint8_t		*msgbufp;
3815 	int		rv = 0, space;
3816 	uint64_t 	rx_head, rx_tail;
3817 
3818 	space = *sizep;
3819 
3820 	if (space < LDC_PAYLOAD_SIZE_RAW)
3821 		return (ENOBUFS);
3822 
3823 	ASSERT(mutex_owned(&ldcp->lock));
3824 
3825 	/* compute mask for increment */
3826 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3827 
3828 	/*
3829 	 * Read packet(s) from the queue
3830 	 */
3831 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3832 	    &ldcp->link_state);
3833 	if (rv != 0) {
3834 		cmn_err(CE_WARN,
3835 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3836 		    ldcp->id);
3837 		return (EIO);
3838 	}
3839 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3840 	    " rxt=0x%llx, st=0x%llx\n",
3841 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3842 
3843 	/* reset the channel state if the channel went down */
3844 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3845 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3846 		mutex_enter(&ldcp->tx_lock);
3847 		i_ldc_reset(ldcp, B_FALSE);
3848 		mutex_exit(&ldcp->tx_lock);
3849 		return (ECONNRESET);
3850 	}
3851 
3852 	/*
3853 	 * Check for empty queue
3854 	 */
3855 	if (rx_head == rx_tail) {
3856 		*sizep = 0;
3857 		return (0);
3858 	}
3859 
3860 	/* get the message */
3861 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3862 
3863 	/* if channel is in RAW mode, copy data and return */
3864 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3865 
3866 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3867 
3868 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3869 
3870 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3871 
3872 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3873 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3874 
3875 	return (rv);
3876 }
3877 
3878 /*
3879  * Process LDC mondos to build larger packets
3880  * with either un-reliable or reliable delivery.
3881  *
3882  * Enter and exit with ldcp->lock held by caller
3883  */
3884 static int
3885 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3886 {
3887 	int		rv = 0;
3888 	uint64_t 	rx_head = 0, rx_tail = 0;
3889 	uint64_t 	curr_head = 0;
3890 	ldc_msg_t 	*msg;
3891 	caddr_t 	target;
3892 	size_t 		len = 0, bytes_read = 0;
3893 	int 		retries = 0;
3894 	uint64_t 	q_va, q_size_mask;
3895 	uint64_t	first_fragment = 0;
3896 
3897 	target = target_bufp;
3898 
3899 	ASSERT(mutex_owned(&ldcp->lock));
3900 
3901 	/* check if the buffer and size are valid */
3902 	if (target_bufp == NULL || *sizep == 0) {
3903 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3904 		    ldcp->id);
3905 		return (EINVAL);
3906 	}
3907 
3908 	/* Set q_va and compute increment mask for the appropriate queue */
3909 	if (ldcp->mode == LDC_MODE_RELIABLE) {
3910 		q_va	    = ldcp->rx_dq_va;
3911 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3912 	} else {
3913 		q_va	    = ldcp->rx_q_va;
3914 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3915 	}
3916 
3917 	/*
3918 	 * Read packet(s) from the queue
3919 	 */
3920 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3921 	    &ldcp->link_state);
3922 	if (rv != 0) {
3923 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3924 		    ldcp->id);
3925 		mutex_enter(&ldcp->tx_lock);
3926 		i_ldc_reset(ldcp, B_TRUE);
3927 		mutex_exit(&ldcp->tx_lock);
3928 		return (ECONNRESET);
3929 	}
3930 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3931 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3932 
3933 	/* reset the channel state if the channel went down */
3934 	if (ldcp->link_state != LDC_CHANNEL_UP)
3935 		goto channel_is_reset;
3936 
3937 	for (;;) {
3938 
3939 		if (curr_head == rx_tail) {
3940 			/*
3941 			 * If a data queue is being used, check the Rx HV
3942 			 * queue. This will copy over any new data packets
3943 			 * that have arrived.
3944 			 */
3945 			if (ldcp->mode == LDC_MODE_RELIABLE)
3946 				(void) i_ldc_chkq(ldcp);
3947 
3948 			rv = ldcp->readq_get_state(ldcp,
3949 			    &rx_head, &rx_tail, &ldcp->link_state);
3950 			if (rv != 0) {
3951 				cmn_err(CE_WARN,
3952 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3953 				    ldcp->id);
3954 				mutex_enter(&ldcp->tx_lock);
3955 				i_ldc_reset(ldcp, B_TRUE);
3956 				mutex_exit(&ldcp->tx_lock);
3957 				return (ECONNRESET);
3958 			}
3959 
3960 			if (ldcp->link_state != LDC_CHANNEL_UP)
3961 				goto channel_is_reset;
3962 
3963 			if (curr_head == rx_tail) {
3964 
3965 				/* If in the middle of a fragmented xfer */
3966 				if (first_fragment != 0) {
3967 
3968 					/* wait for ldc_delay usecs */
3969 					drv_usecwait(ldc_delay);
3970 
3971 					if (++retries < ldc_max_retries)
3972 						continue;
3973 
3974 					*sizep = 0;
3975 					if (ldcp->mode != LDC_MODE_RELIABLE)
3976 						ldcp->last_msg_rcd =
3977 						    first_fragment - 1;
3978 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3979 					    "(0x%llx) read timeout", ldcp->id);
3980 					return (EAGAIN);
3981 				}
3982 				*sizep = 0;
3983 				break;
3984 			}
3985 		}
3986 		retries = 0;
3987 
3988 		D2(ldcp->id,
3989 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3990 		    ldcp->id, curr_head, rx_head, rx_tail);
3991 
3992 		/* get the message */
3993 		msg = (ldc_msg_t *)(q_va + curr_head);
3994 
3995 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3996 		    ldcp->rx_q_va + curr_head);
3997 
3998 		/* Check the message ID for the message received */
3999 		if (ldcp->mode != LDC_MODE_RELIABLE) {
4000 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
4001 
4002 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
4003 				    "error, q_ptrs=0x%lx,0x%lx",
4004 				    ldcp->id, rx_head, rx_tail);
4005 
4006 				/* throw away data */
4007 				bytes_read = 0;
4008 
4009 				/* Reset last_msg_rcd to start of message */
4010 				if (first_fragment != 0) {
4011 					ldcp->last_msg_rcd = first_fragment - 1;
4012 					first_fragment = 0;
4013 				}
4014 				/*
4015 				 * Send a NACK -- invalid seqid
4016 				 * get the current tail for the response
4017 				 */
4018 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
4019 				    (msg->ctrl & LDC_CTRL_MASK));
4020 				if (rv) {
4021 					cmn_err(CE_NOTE,
4022 					    "ldc_read: (0x%lx) err sending "
4023 					    "NACK msg\n", ldcp->id);
4024 
4025 					/* if cannot send NACK - reset chan */
4026 					mutex_enter(&ldcp->tx_lock);
4027 					i_ldc_reset(ldcp, B_FALSE);
4028 					mutex_exit(&ldcp->tx_lock);
4029 					rv = ECONNRESET;
4030 					break;
4031 				}
4032 
4033 				/* purge receive queue */
4034 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
4035 
4036 				break;
4037 			}
4038 
4039 			/*
4040 			 * Process any messages of type CTRL messages
4041 			 * Future implementations should try to pass these
4042 			 * to LDC link by resetting the intr state.
4043 			 *
4044 			 * NOTE: not done as a switch() as type can be
4045 			 * both ctrl+data
4046 			 */
4047 			if (msg->type & LDC_CTRL) {
4048 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
4049 					if (rv == EAGAIN)
4050 						continue;
4051 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
4052 					*sizep = 0;
4053 					bytes_read = 0;
4054 					break;
4055 				}
4056 			}
4057 
4058 			/* process data ACKs */
4059 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
4060 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
4061 					*sizep = 0;
4062 					bytes_read = 0;
4063 					break;
4064 				}
4065 			}
4066 
4067 			/* process data NACKs */
4068 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
4069 				DWARN(ldcp->id,
4070 				    "ldc_read: (0x%llx) received DATA/NACK",
4071 				    ldcp->id);
4072 				mutex_enter(&ldcp->tx_lock);
4073 				i_ldc_reset(ldcp, B_TRUE);
4074 				mutex_exit(&ldcp->tx_lock);
4075 				return (ECONNRESET);
4076 			}
4077 		}
4078 
4079 		/* process data messages */
4080 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
4081 
4082 			uint8_t *msgbuf = (uint8_t *)(
4083 			    (ldcp->mode == LDC_MODE_RELIABLE) ?
4084 			    msg->rdata : msg->udata);
4085 
4086 			D2(ldcp->id,
4087 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
4088 
4089 			/* get the packet length */
4090 			len = (msg->env & LDC_LEN_MASK);
4091 
4092 				/*
4093 				 * FUTURE OPTIMIZATION:
4094 				 * dont need to set q head for every
4095 				 * packet we read just need to do this when
4096 				 * we are done or need to wait for more
4097 				 * mondos to make a full packet - this is
4098 				 * currently expensive.
4099 				 */
4100 
4101 			if (first_fragment == 0) {
4102 
4103 				/*
4104 				 * first packets should always have the start
4105 				 * bit set (even for a single packet). If not
4106 				 * throw away the packet
4107 				 */
4108 				if (!(msg->env & LDC_FRAG_START)) {
4109 
4110 					DWARN(DBG_ALL_LDCS,
4111 					    "ldc_read: (0x%llx) not start - "
4112 					    "frag=%x\n", ldcp->id,
4113 					    (msg->env) & LDC_FRAG_MASK);
4114 
4115 					/* toss pkt, inc head, cont reading */
4116 					bytes_read = 0;
4117 					target = target_bufp;
4118 					curr_head =
4119 					    (curr_head + LDC_PACKET_SIZE)
4120 					    & q_size_mask;
4121 					if (rv = ldcp->readq_set_head(ldcp,
4122 					    curr_head))
4123 						break;
4124 
4125 					continue;
4126 				}
4127 
4128 				first_fragment = msg->seqid;
4129 			} else {
4130 				/* check to see if this is a pkt w/ START bit */
4131 				if (msg->env & LDC_FRAG_START) {
4132 					DWARN(DBG_ALL_LDCS,
4133 					    "ldc_read:(0x%llx) unexpected pkt"
4134 					    " env=0x%x discarding %d bytes,"
4135 					    " lastmsg=%d, currentmsg=%d\n",
4136 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4137 					    bytes_read, ldcp->last_msg_rcd,
4138 					    msg->seqid);
4139 
4140 					/* throw data we have read so far */
4141 					bytes_read = 0;
4142 					target = target_bufp;
4143 					first_fragment = msg->seqid;
4144 
4145 					if (rv = ldcp->readq_set_head(ldcp,
4146 					    curr_head))
4147 						break;
4148 				}
4149 			}
4150 
4151 			/* copy (next) pkt into buffer */
4152 			if (len <= (*sizep - bytes_read)) {
4153 				bcopy(msgbuf, target, len);
4154 				target += len;
4155 				bytes_read += len;
4156 			} else {
4157 				/*
4158 				 * there is not enough space in the buffer to
4159 				 * read this pkt. throw message away & continue
4160 				 * reading data from queue
4161 				 */
4162 				DWARN(DBG_ALL_LDCS,
4163 				    "ldc_read: (0x%llx) buffer too small, "
4164 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4165 				    curr_head, *sizep, bytes_read+len);
4166 
4167 				first_fragment = 0;
4168 				target = target_bufp;
4169 				bytes_read = 0;
4170 
4171 				/* throw away everything received so far */
4172 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4173 					break;
4174 
4175 				/* continue reading remaining pkts */
4176 				continue;
4177 			}
4178 		}
4179 
4180 		/* set the message id */
4181 		if (ldcp->mode != LDC_MODE_RELIABLE)
4182 			ldcp->last_msg_rcd = msg->seqid;
4183 
4184 		/* move the head one position */
4185 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4186 
4187 		if (msg->env & LDC_FRAG_STOP) {
4188 
4189 			/*
4190 			 * All pkts that are part of this fragmented transfer
4191 			 * have been read or this was a single pkt read
4192 			 * or there was an error
4193 			 */
4194 
4195 			/* set the queue head */
4196 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4197 				bytes_read = 0;
4198 
4199 			*sizep = bytes_read;
4200 
4201 			break;
4202 		}
4203 
4204 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4205 		if ((msg->type & LDC_CTRL) ||
4206 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4207 
4208 			/* set the queue head */
4209 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4210 				bytes_read = 0;
4211 				break;
4212 			}
4213 
4214 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4215 			    ldcp->id, curr_head);
4216 		}
4217 
4218 	} /* for (;;) */
4219 
4220 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4221 
4222 	return (rv);
4223 
4224 channel_is_reset:
4225 	mutex_enter(&ldcp->tx_lock);
4226 	i_ldc_reset(ldcp, B_FALSE);
4227 	mutex_exit(&ldcp->tx_lock);
4228 	return (ECONNRESET);
4229 }
4230 
4231 /*
4232  * Fetch and buffer incoming packets so we can hand them back as
4233  * a basic byte stream.
4234  *
4235  * Enter and exit with ldcp->lock held by caller
4236  */
4237 static int
4238 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4239 {
4240 	int	rv;
4241 	size_t	size;
4242 
4243 	ASSERT(mutex_owned(&ldcp->lock));
4244 
4245 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4246 	    ldcp->id, *sizep);
4247 
4248 	if (ldcp->stream_remains == 0) {
4249 		size = ldcp->mtu;
4250 		rv = i_ldc_read_packet(ldcp,
4251 		    (caddr_t)ldcp->stream_bufferp, &size);
4252 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4253 		    ldcp->id, size);
4254 
4255 		if (rv != 0)
4256 			return (rv);
4257 
4258 		ldcp->stream_remains = size;
4259 		ldcp->stream_offset = 0;
4260 	}
4261 
4262 	size = MIN(ldcp->stream_remains, *sizep);
4263 
4264 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4265 	ldcp->stream_offset += size;
4266 	ldcp->stream_remains -= size;
4267 
4268 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4269 	    ldcp->id, size);
4270 
4271 	*sizep = size;
4272 	return (0);
4273 }
4274 
4275 /*
4276  * Write specified amount of bytes to the channel
4277  * in multiple pkts of pkt_payload size. Each
4278  * packet is tagged with an unique packet ID in
4279  * the case of a reliable link.
4280  *
4281  * On return, size contains the number of bytes written.
4282  */
4283 int
4284 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4285 {
4286 	ldc_chan_t	*ldcp;
4287 	int		rv = 0;
4288 
4289 	if (handle == NULL) {
4290 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4291 		return (EINVAL);
4292 	}
4293 	ldcp = (ldc_chan_t *)handle;
4294 
4295 	mutex_enter(&ldcp->tx_lock);
4296 
4297 	/* check if non-zero data to write */
4298 	if (buf == NULL || sizep == NULL) {
4299 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4300 		    ldcp->id);
4301 		mutex_exit(&ldcp->tx_lock);
4302 		return (EINVAL);
4303 	}
4304 
4305 	if (*sizep == 0) {
4306 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4307 		    ldcp->id);
4308 		mutex_exit(&ldcp->tx_lock);
4309 		return (0);
4310 	}
4311 
4312 	/* Check if channel is UP for data exchange */
4313 	if (ldcp->tstate != TS_UP) {
4314 		DWARN(ldcp->id,
4315 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4316 		    ldcp->id);
4317 		*sizep = 0;
4318 		rv = ECONNRESET;
4319 	} else {
4320 		rv = ldcp->write_p(ldcp, buf, sizep);
4321 	}
4322 
4323 	mutex_exit(&ldcp->tx_lock);
4324 
4325 	return (rv);
4326 }
4327 
4328 /*
4329  * Write a raw packet to the channel
4330  * On return, size contains the number of bytes written.
4331  */
4332 static int
4333 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4334 {
4335 	ldc_msg_t 	*ldcmsg;
4336 	uint64_t 	tx_head, tx_tail, new_tail;
4337 	int		rv = 0;
4338 	size_t		size;
4339 
4340 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4341 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4342 
4343 	size = *sizep;
4344 
4345 	/*
4346 	 * Check to see if the packet size is less than or
4347 	 * equal to packet size support in raw mode
4348 	 */
4349 	if (size > ldcp->pkt_payload) {
4350 		DWARN(ldcp->id,
4351 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4352 		    ldcp->id, *sizep);
4353 		*sizep = 0;
4354 		return (EMSGSIZE);
4355 	}
4356 
4357 	/* get the qptrs for the tx queue */
4358 	rv = hv_ldc_tx_get_state(ldcp->id,
4359 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4360 	if (rv != 0) {
4361 		cmn_err(CE_WARN,
4362 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4363 		*sizep = 0;
4364 		return (EIO);
4365 	}
4366 
4367 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4368 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4369 		DWARN(ldcp->id,
4370 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4371 
4372 		*sizep = 0;
4373 		if (mutex_tryenter(&ldcp->lock)) {
4374 			i_ldc_reset(ldcp, B_FALSE);
4375 			mutex_exit(&ldcp->lock);
4376 		} else {
4377 			/*
4378 			 * Release Tx lock, and then reacquire channel
4379 			 * and Tx lock in correct order
4380 			 */
4381 			mutex_exit(&ldcp->tx_lock);
4382 			mutex_enter(&ldcp->lock);
4383 			mutex_enter(&ldcp->tx_lock);
4384 			i_ldc_reset(ldcp, B_FALSE);
4385 			mutex_exit(&ldcp->lock);
4386 		}
4387 		return (ECONNRESET);
4388 	}
4389 
4390 	tx_tail = ldcp->tx_tail;
4391 	tx_head = ldcp->tx_head;
4392 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4393 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4394 
4395 	if (new_tail == tx_head) {
4396 		DWARN(DBG_ALL_LDCS,
4397 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4398 		*sizep = 0;
4399 		return (EWOULDBLOCK);
4400 	}
4401 
4402 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4403 	    ldcp->id, size);
4404 
4405 	/* Send the data now */
4406 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4407 
4408 	/* copy the data into pkt */
4409 	bcopy((uint8_t *)buf, ldcmsg, size);
4410 
4411 	/* increment tail */
4412 	tx_tail = new_tail;
4413 
4414 	/*
4415 	 * All packets have been copied into the TX queue
4416 	 * update the tail ptr in the HV
4417 	 */
4418 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4419 	if (rv) {
4420 		if (rv == EWOULDBLOCK) {
4421 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4422 			    ldcp->id);
4423 			*sizep = 0;
4424 			return (EWOULDBLOCK);
4425 		}
4426 
4427 		*sizep = 0;
4428 		if (mutex_tryenter(&ldcp->lock)) {
4429 			i_ldc_reset(ldcp, B_FALSE);
4430 			mutex_exit(&ldcp->lock);
4431 		} else {
4432 			/*
4433 			 * Release Tx lock, and then reacquire channel
4434 			 * and Tx lock in correct order
4435 			 */
4436 			mutex_exit(&ldcp->tx_lock);
4437 			mutex_enter(&ldcp->lock);
4438 			mutex_enter(&ldcp->tx_lock);
4439 			i_ldc_reset(ldcp, B_FALSE);
4440 			mutex_exit(&ldcp->lock);
4441 		}
4442 		return (ECONNRESET);
4443 	}
4444 
4445 	ldcp->tx_tail = tx_tail;
4446 	*sizep = size;
4447 
4448 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4449 
4450 	return (rv);
4451 }
4452 
4453 
4454 /*
4455  * Write specified amount of bytes to the channel
4456  * in multiple pkts of pkt_payload size. Each
4457  * packet is tagged with an unique packet ID in
4458  * the case of a reliable link.
4459  *
4460  * On return, size contains the number of bytes written.
4461  * This function needs to ensure that the write size is < MTU size
4462  */
4463 static int
4464 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4465 {
4466 	ldc_msg_t 	*ldcmsg;
4467 	uint64_t 	tx_head, tx_tail, new_tail, start;
4468 	uint64_t	txq_size_mask, numavail;
4469 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4470 	size_t 		len, bytes_written = 0, remaining;
4471 	int		rv;
4472 	uint32_t	curr_seqid;
4473 
4474 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4475 
4476 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4477 	    ldcp->mode == LDC_MODE_UNRELIABLE);
4478 
4479 	/* compute mask for increment */
4480 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4481 
4482 	/* get the qptrs for the tx queue */
4483 	rv = hv_ldc_tx_get_state(ldcp->id,
4484 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4485 	if (rv != 0) {
4486 		cmn_err(CE_WARN,
4487 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4488 		*size = 0;
4489 		return (EIO);
4490 	}
4491 
4492 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4493 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4494 		DWARN(ldcp->id,
4495 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4496 		*size = 0;
4497 		if (mutex_tryenter(&ldcp->lock)) {
4498 			i_ldc_reset(ldcp, B_FALSE);
4499 			mutex_exit(&ldcp->lock);
4500 		} else {
4501 			/*
4502 			 * Release Tx lock, and then reacquire channel
4503 			 * and Tx lock in correct order
4504 			 */
4505 			mutex_exit(&ldcp->tx_lock);
4506 			mutex_enter(&ldcp->lock);
4507 			mutex_enter(&ldcp->tx_lock);
4508 			i_ldc_reset(ldcp, B_FALSE);
4509 			mutex_exit(&ldcp->lock);
4510 		}
4511 		return (ECONNRESET);
4512 	}
4513 
4514 	tx_tail = ldcp->tx_tail;
4515 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4516 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4517 
4518 	/*
4519 	 * Check to see if the queue is full. The check is done using
4520 	 * the appropriate head based on the link mode.
4521 	 */
4522 	i_ldc_get_tx_head(ldcp, &tx_head);
4523 
4524 	if (new_tail == tx_head) {
4525 		DWARN(DBG_ALL_LDCS,
4526 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4527 		*size = 0;
4528 		return (EWOULDBLOCK);
4529 	}
4530 
4531 	/*
4532 	 * Make sure that the LDC Tx queue has enough space
4533 	 */
4534 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4535 	    + ldcp->tx_q_entries - 1;
4536 	numavail %= ldcp->tx_q_entries;
4537 
4538 	if (*size > (numavail * ldcp->pkt_payload)) {
4539 		DWARN(DBG_ALL_LDCS,
4540 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4541 		return (EWOULDBLOCK);
4542 	}
4543 
4544 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4545 	    ldcp->id, *size);
4546 
4547 	/* Send the data now */
4548 	bytes_written = 0;
4549 	curr_seqid = ldcp->last_msg_snt;
4550 	start = tx_tail;
4551 
4552 	while (*size > bytes_written) {
4553 
4554 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4555 
4556 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ?
4557 		    ldcmsg->rdata : ldcmsg->udata);
4558 
4559 		ldcmsg->type = LDC_DATA;
4560 		ldcmsg->stype = LDC_INFO;
4561 		ldcmsg->ctrl = 0;
4562 
4563 		remaining = *size - bytes_written;
4564 		len = min(ldcp->pkt_payload, remaining);
4565 		ldcmsg->env = (uint8_t)len;
4566 
4567 		curr_seqid++;
4568 		ldcmsg->seqid = curr_seqid;
4569 
4570 		/* copy the data into pkt */
4571 		bcopy(source, msgbuf, len);
4572 
4573 		source += len;
4574 		bytes_written += len;
4575 
4576 		/* increment tail */
4577 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4578 
4579 		ASSERT(tx_tail != tx_head);
4580 	}
4581 
4582 	/* Set the start and stop bits */
4583 	ldcmsg->env |= LDC_FRAG_STOP;
4584 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4585 	ldcmsg->env |= LDC_FRAG_START;
4586 
4587 	/*
4588 	 * All packets have been copied into the TX queue
4589 	 * update the tail ptr in the HV
4590 	 */
4591 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4592 	if (rv == 0) {
4593 		ldcp->tx_tail = tx_tail;
4594 		ldcp->last_msg_snt = curr_seqid;
4595 		*size = bytes_written;
4596 	} else {
4597 		int rv2;
4598 
4599 		if (rv != EWOULDBLOCK) {
4600 			*size = 0;
4601 			if (mutex_tryenter(&ldcp->lock)) {
4602 				i_ldc_reset(ldcp, B_FALSE);
4603 				mutex_exit(&ldcp->lock);
4604 			} else {
4605 				/*
4606 				 * Release Tx lock, and then reacquire channel
4607 				 * and Tx lock in correct order
4608 				 */
4609 				mutex_exit(&ldcp->tx_lock);
4610 				mutex_enter(&ldcp->lock);
4611 				mutex_enter(&ldcp->tx_lock);
4612 				i_ldc_reset(ldcp, B_FALSE);
4613 				mutex_exit(&ldcp->lock);
4614 			}
4615 			return (ECONNRESET);
4616 		}
4617 
4618 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4619 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4620 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4621 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4622 
4623 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4624 		    &tx_head, &tx_tail, &ldcp->link_state);
4625 
4626 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4627 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4628 		    rv2, tx_head, tx_tail, ldcp->link_state);
4629 
4630 		*size = 0;
4631 	}
4632 
4633 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4634 
4635 	return (rv);
4636 }
4637 
4638 /*
4639  * Write specified amount of bytes to the channel
4640  * in multiple pkts of pkt_payload size. Each
4641  * packet is tagged with an unique packet ID in
4642  * the case of a reliable link.
4643  *
4644  * On return, size contains the number of bytes written.
4645  * This function needs to ensure that the write size is < MTU size
4646  */
4647 static int
4648 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4649 {
4650 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4651 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE);
4652 
4653 	/* Truncate packet to max of MTU size */
4654 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4655 	return (i_ldc_write_packet(ldcp, buf, sizep));
4656 }
4657 
4658 
4659 /*
4660  * Interfaces for channel nexus to register/unregister with LDC module
4661  * The nexus will register functions to be used to register individual
4662  * channels with the nexus and enable interrupts for the channels
4663  */
4664 int
4665 ldc_register(ldc_cnex_t *cinfo)
4666 {
4667 	ldc_chan_t	*ldcp;
4668 
4669 	if (cinfo == NULL || cinfo->dip == NULL ||
4670 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4671 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4672 	    cinfo->clr_intr == NULL) {
4673 
4674 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4675 		return (EINVAL);
4676 	}
4677 
4678 	mutex_enter(&ldcssp->lock);
4679 
4680 	/* nexus registration */
4681 	ldcssp->cinfo.dip = cinfo->dip;
4682 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4683 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4684 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4685 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4686 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4687 
4688 	/* register any channels that might have been previously initialized */
4689 	ldcp = ldcssp->chan_list;
4690 	while (ldcp) {
4691 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4692 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4693 			(void) i_ldc_register_channel(ldcp);
4694 
4695 		ldcp = ldcp->next;
4696 	}
4697 
4698 	mutex_exit(&ldcssp->lock);
4699 
4700 	return (0);
4701 }
4702 
4703 int
4704 ldc_unregister(ldc_cnex_t *cinfo)
4705 {
4706 	if (cinfo == NULL || cinfo->dip == NULL) {
4707 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4708 		return (EINVAL);
4709 	}
4710 
4711 	mutex_enter(&ldcssp->lock);
4712 
4713 	if (cinfo->dip != ldcssp->cinfo.dip) {
4714 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4715 		mutex_exit(&ldcssp->lock);
4716 		return (EINVAL);
4717 	}
4718 
4719 	/* nexus unregister */
4720 	ldcssp->cinfo.dip = NULL;
4721 	ldcssp->cinfo.reg_chan = NULL;
4722 	ldcssp->cinfo.unreg_chan = NULL;
4723 	ldcssp->cinfo.add_intr = NULL;
4724 	ldcssp->cinfo.rem_intr = NULL;
4725 	ldcssp->cinfo.clr_intr = NULL;
4726 
4727 	mutex_exit(&ldcssp->lock);
4728 
4729 	return (0);
4730 }
4731 
4732 int
4733 ldc_info(ldc_handle_t handle, ldc_info_t *info)
4734 {
4735 	ldc_chan_t	*ldcp;
4736 	uint64_t	avail;
4737 
4738 	if (handle == NULL || info == NULL) {
4739 		DWARN(DBG_ALL_LDCS, "ldc_get_info: invalid args\n");
4740 		return (EINVAL);
4741 	}
4742 
4743 	ldcp = (ldc_chan_t *)handle;
4744 
4745 	mutex_enter(&ldcp->lock);
4746 
4747 	/* check to see if channel is initalized */
4748 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4749 		DWARN(ldcp->id,
4750 		    "ldc_get_info: (0x%llx) channel not initialized\n",
4751 		    ldcp->id);
4752 		mutex_exit(&ldcp->lock);
4753 		return (EINVAL);
4754 	}
4755 
4756 	mutex_exit(&ldcp->lock);
4757 
4758 	/*
4759 	 * ldcssp->mapin_size is the max amount of shared memory supported by
4760 	 * the Hypervisor per guest. e.g, legacy HV supports 64MB; latest HV
4761 	 * support 1GB. This size is read during ldc module initialization.
4762 	 *
4763 	 * ldc_dring_direct_map_rsvd is the amount of memory reserved for
4764 	 * mapping in descriptor rings. In the initial implementation, we use a
4765 	 * simple approach to determine the amount of mapin space available per
4766 	 * channel. In future, we may implement strict accounting of the actual
4767 	 * memory consumed to determine the exact amount available per channel.
4768 	 */
4769 	if (ldcssp->mapin_size <= ldc_dring_direct_map_rsvd) {
4770 		info->direct_map_size_max = 0;
4771 		return (0);
4772 	}
4773 
4774 	avail = ldcssp->mapin_size - ldc_dring_direct_map_rsvd;
4775 	if (avail >= ldc_direct_map_size_max) {
4776 		info->direct_map_size_max = ldc_direct_map_size_max;
4777 	} else {
4778 		info->direct_map_size_max = 0;
4779 	}
4780 
4781 	return (0);
4782 }
4783