xref: /illumos-gate/usr/src/uts/sun4v/io/vnet_gen.c (revision 2850d85b7b93f31e578520dc3b3feb24db609c62)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 
64 /*
65  * Implementation of the mac functionality for vnet using the
66  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
67  */
68 
69 /*
70  * Function prototypes.
71  */
72 /* vgen proxy entry points */
73 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
74 	mac_register_t **vgenmacp);
75 int vgen_uninit(void *arg);
76 static int vgen_start(void *arg);
77 static void vgen_stop(void *arg);
78 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
79 static int vgen_multicst(void *arg, boolean_t add,
80 	const uint8_t *mca);
81 static int vgen_promisc(void *arg, boolean_t on);
82 static int vgen_unicst(void *arg, const uint8_t *mca);
83 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
84 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
85 
86 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
87 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
88 void vnet_del_fdb(void *arg, uint8_t *macaddr);
89 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
90 	void *txarg, boolean_t upgrade);
91 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
92 void vnet_del_def_rte(void *arg);
93 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
94 void vnet_tx_update(void *arg);
95 
96 /* vgen internal functions */
97 static void vgen_detach_ports(vgen_t *vgenp);
98 static void vgen_port_detach(vgen_port_t *portp);
99 static void vgen_port_list_insert(vgen_port_t *portp);
100 static void vgen_port_list_remove(vgen_port_t *portp);
101 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
102 	int port_num);
103 static int vgen_mdeg_reg(vgen_t *vgenp);
104 static void vgen_mdeg_unreg(vgen_t *vgenp);
105 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
106 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
107 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
108 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
109 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
112 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
113 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
114 
115 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
116 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
117 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
118 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_init_ports(vgen_t *vgenp);
120 static void vgen_port_init(vgen_port_t *portp);
121 static void vgen_uninit_ports(vgen_t *vgenp);
122 static void vgen_port_uninit(vgen_port_t *portp);
123 static void vgen_init_ldcs(vgen_port_t *portp);
124 static void vgen_uninit_ldcs(vgen_port_t *portp);
125 static int vgen_ldc_init(vgen_ldc_t *ldcp);
126 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
127 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
128 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
131 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
132 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
135 static void vgen_reclaim(vgen_ldc_t *ldcp);
136 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
137 static int vgen_num_txpending(vgen_ldc_t *ldcp);
138 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
139 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
140 static void vgen_ldc_watchdog(void *arg);
141 
142 /* vgen handshake functions */
143 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
144 static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
145 	uint16_t ver_minor);
146 static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
147 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
148 	boolean_t caller_holds_lock);
149 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
150 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
151 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
152 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
153 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
154 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
155 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
156 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
157 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
158 static void vgen_handshake(vgen_ldc_t *ldcp);
159 static int vgen_handshake_done(vgen_ldc_t *ldcp);
160 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
161 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
162 	vio_msg_tag_t *tagp);
163 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
166 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
174 	uint32_t start, int32_t end, uint8_t pstate);
175 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
178 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
179 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
181 static void vgen_hwatchdog(void *arg);
182 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
183 static void vgen_print_hparams(vgen_hparams_t *hp);
184 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
185 static uint_t vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2);
186 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
187 static void vgen_ldc_rcv_worker(void *arg);
188 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
189 static void vgen_ldc_queue_data(vgen_ldc_t *ldcp,
190 	mblk_t *rhead, mblk_t *rtail);
191 
192 /*
193  * The handshake process consists of 5 phases defined below, with VH_PHASE0
194  * being the pre-handshake phase and VH_DONE is the phase to indicate
195  * successful completion of all phases.
196  * Each phase may have one to several handshake states which are required
197  * to complete successfully to move to the next phase.
198  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
199  * more details.
200  */
201 /* handshake phases */
202 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
203 
204 /* handshake states */
205 enum {
206 
207 	VER_INFO_SENT	=	0x1,
208 	VER_ACK_RCVD	=	0x2,
209 	VER_INFO_RCVD	=	0x4,
210 	VER_ACK_SENT	=	0x8,
211 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
212 
213 	ATTR_INFO_SENT	=	0x10,
214 	ATTR_ACK_RCVD	=	0x20,
215 	ATTR_INFO_RCVD	=	0x40,
216 	ATTR_ACK_SENT	=	0x80,
217 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
218 
219 	DRING_INFO_SENT	=	0x100,
220 	DRING_ACK_RCVD	=	0x200,
221 	DRING_INFO_RCVD	=	0x400,
222 	DRING_ACK_SENT	=	0x800,
223 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
224 
225 	RDX_INFO_SENT	=	0x1000,
226 	RDX_ACK_RCVD	=	0x2000,
227 	RDX_INFO_RCVD	=	0x4000,
228 	RDX_ACK_SENT	=	0x8000,
229 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
230 
231 };
232 
233 #define	LDC_LOCK(ldcp)	\
234 				mutex_enter(&((ldcp)->cblock));\
235 				mutex_enter(&((ldcp)->rxlock));\
236 				mutex_enter(&((ldcp)->wrlock));\
237 				mutex_enter(&((ldcp)->txlock));\
238 				mutex_enter(&((ldcp)->tclock));
239 #define	LDC_UNLOCK(ldcp)	\
240 				mutex_exit(&((ldcp)->tclock));\
241 				mutex_exit(&((ldcp)->txlock));\
242 				mutex_exit(&((ldcp)->wrlock));\
243 				mutex_exit(&((ldcp)->rxlock));\
244 				mutex_exit(&((ldcp)->cblock));
245 
246 static struct ether_addr etherbroadcastaddr = {
247 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
248 };
249 /*
250  * MIB II broadcast/multicast packets
251  */
252 #define	IS_BROADCAST(ehp) \
253 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
254 #define	IS_MULTICAST(ehp) \
255 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
256 
257 /*
258  * Property names
259  */
260 static char macaddr_propname[] = "mac-address";
261 static char rmacaddr_propname[] = "remote-mac-address";
262 static char channel_propname[] = "channel-endpoint";
263 static char reg_propname[] = "reg";
264 static char port_propname[] = "port";
265 static char swport_propname[] = "switch-port";
266 static char id_propname[] = "id";
267 
268 /* versions supported - in decreasing order */
269 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
270 
271 /* Tunables */
272 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
273 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
274 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
275 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
276 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
277 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
278 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
279 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
280 
281 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
282 
283 /*
284  * max # of packets accumulated prior to sending them up. It is best
285  * to keep this at 60% of the number of recieve buffers.
286  */
287 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
288 
289 /*
290  * Tunables for each receive buffer size and number of buffers for
291  * each buffer size.
292  */
293 uint32_t vgen_rbufsz1 = VGEN_DBLK_SZ_128;
294 uint32_t vgen_rbufsz2 = VGEN_DBLK_SZ_256;
295 uint32_t vgen_rbufsz3 = VGEN_DBLK_SZ_2048;
296 
297 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
298 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
299 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
300 
301 #ifdef DEBUG
302 /* flags to simulate error conditions for debugging */
303 int vgen_trigger_txtimeout = 0;
304 int vgen_trigger_rxlost = 0;
305 #endif
306 
307 /* MD update matching structure */
308 static md_prop_match_t	vport_prop_match[] = {
309 	{ MDET_PROP_VAL,	"id" },
310 	{ MDET_LIST_END,	NULL }
311 };
312 
313 static mdeg_node_match_t vport_match = { "virtual-device-port",
314 					vport_prop_match };
315 
316 /* template for matching a particular vnet instance */
317 static mdeg_prop_spec_t vgen_prop_template[] = {
318 	{ MDET_PROP_STR,	"name",		"network" },
319 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
320 	{ MDET_LIST_END,	NULL,		NULL }
321 };
322 
323 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
324 
325 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
326 
327 static mac_callbacks_t vgen_m_callbacks = {
328 	0,
329 	vgen_stat,
330 	vgen_start,
331 	vgen_stop,
332 	vgen_promisc,
333 	vgen_multicst,
334 	vgen_unicst,
335 	vgen_tx,
336 	NULL,
337 	NULL,
338 	NULL
339 };
340 
341 /* externs */
342 extern pri_t	maxclsyspri;
343 extern proc_t	p0;
344 extern uint32_t vnet_ntxds;
345 extern uint32_t vnet_ldcwd_interval;
346 extern uint32_t vnet_ldcwd_txtimeout;
347 extern uint32_t vnet_ldc_mtu;
348 extern uint32_t vnet_nrbufs;
349 
350 
351 #ifdef DEBUG
352 
353 extern int vnet_dbglevel;
354 static void debug_printf(const char *fname, vgen_t *vgenp,
355 	vgen_ldc_t *ldcp, const char *fmt, ...);
356 
357 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
358 int vgendbg_ldcid = -1;
359 
360 /* simulate handshake error conditions for debug */
361 uint32_t vgen_hdbg;
362 #define	HDBG_VERSION	0x1
363 #define	HDBG_TIMEOUT	0x2
364 #define	HDBG_BAD_SID	0x4
365 #define	HDBG_OUT_STATE	0x8
366 
367 #endif
368 
369 
370 
371 /*
372  * vgen_init() is called by an instance of vnet driver to initialize the
373  * corresponding generic proxy transport layer. The arguments passed by vnet
374  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
375  * the mac address of the vnet device, and a pointer to mac_register_t of
376  * the generic transport is returned in the last argument.
377  */
378 int
379 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
380     mac_register_t **vgenmacp)
381 {
382 	vgen_t *vgenp;
383 	mac_register_t *macp;
384 	int instance;
385 
386 	if ((vnetp == NULL) || (vnetdip == NULL))
387 		return (DDI_FAILURE);
388 
389 	instance = ddi_get_instance(vnetdip);
390 
391 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
392 
393 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
394 
395 	vgenp->vnetp = vnetp;
396 	vgenp->vnetdip = vnetdip;
397 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
398 
399 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
400 		KMEM_FREE(vgenp);
401 		return (DDI_FAILURE);
402 	}
403 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
404 	macp->m_driver = vgenp;
405 	macp->m_dip = vnetdip;
406 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
407 	macp->m_callbacks = &vgen_m_callbacks;
408 	macp->m_min_sdu = 0;
409 	macp->m_max_sdu = ETHERMTU;
410 	vgenp->macp = macp;
411 
412 	/* allocate multicast table */
413 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
414 	    sizeof (struct ether_addr), KM_SLEEP);
415 	vgenp->mccount = 0;
416 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
417 
418 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
419 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
420 
421 	/* register with MD event generator */
422 	if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
423 		rw_destroy(&vgenp->vgenports.rwlock);
424 		mutex_destroy(&vgenp->lock);
425 		kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
426 		    sizeof (struct ether_addr));
427 		mac_free(vgenp->macp);
428 		KMEM_FREE(vgenp);
429 		return (DDI_FAILURE);
430 	}
431 
432 	/* register macp of this vgen_t with vnet */
433 	*vgenmacp = vgenp->macp;
434 
435 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
436 	return (DDI_SUCCESS);
437 }
438 
439 /*
440  * Called by vnet to undo the initializations done by vgen_init().
441  * The handle provided by generic transport during vgen_init() is the argument.
442  */
443 int
444 vgen_uninit(void *arg)
445 {
446 	vgen_t	*vgenp = (vgen_t *)arg;
447 	vio_mblk_pool_t *rp, *nrp;
448 
449 	if (vgenp == NULL) {
450 		return (DDI_FAILURE);
451 	}
452 
453 	DBG1(vgenp, NULL, "enter\n");
454 
455 	/* unregister with MD event generator */
456 	vgen_mdeg_unreg(vgenp);
457 
458 	mutex_enter(&vgenp->lock);
459 
460 	/* detach all ports from the device */
461 	vgen_detach_ports(vgenp);
462 
463 	/*
464 	 * free any pending rx mblk pools,
465 	 * that couldn't be freed previously during channel detach.
466 	 */
467 	rp = vgenp->rmp;
468 	while (rp != NULL) {
469 		nrp = vgenp->rmp = rp->nextp;
470 		if (vio_destroy_mblks(rp)) {
471 			vgenp->rmp = rp;
472 			mutex_exit(&vgenp->lock);
473 			return (DDI_FAILURE);
474 		}
475 		rp = nrp;
476 	}
477 
478 	/* free multicast table */
479 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
480 
481 	mac_free(vgenp->macp);
482 
483 	mutex_exit(&vgenp->lock);
484 
485 	rw_destroy(&vgenp->vgenports.rwlock);
486 	mutex_destroy(&vgenp->lock);
487 
488 	KMEM_FREE(vgenp);
489 
490 	DBG1(vgenp, NULL, "exit\n");
491 
492 	return (DDI_SUCCESS);
493 }
494 
495 /* enable transmit/receive for the device */
496 int
497 vgen_start(void *arg)
498 {
499 	vgen_t		*vgenp = (vgen_t *)arg;
500 
501 	DBG1(vgenp, NULL, "enter\n");
502 
503 	mutex_enter(&vgenp->lock);
504 	vgen_init_ports(vgenp);
505 	vgenp->flags |= VGEN_STARTED;
506 	mutex_exit(&vgenp->lock);
507 
508 	DBG1(vgenp, NULL, "exit\n");
509 	return (DDI_SUCCESS);
510 }
511 
512 /* stop transmit/receive */
513 void
514 vgen_stop(void *arg)
515 {
516 	vgen_t		*vgenp = (vgen_t *)arg;
517 
518 	DBG1(vgenp, NULL, "enter\n");
519 
520 	mutex_enter(&vgenp->lock);
521 	vgen_uninit_ports(vgenp);
522 	vgenp->flags &= ~(VGEN_STARTED);
523 	mutex_exit(&vgenp->lock);
524 
525 	DBG1(vgenp, NULL, "exit\n");
526 }
527 
528 /* vgen transmit function */
529 static mblk_t *
530 vgen_tx(void *arg, mblk_t *mp)
531 {
532 	int i;
533 	vgen_port_t *portp;
534 	int status = VGEN_FAILURE;
535 
536 	portp = (vgen_port_t *)arg;
537 	/*
538 	 * Retry so that we avoid reporting a failure
539 	 * to the upper layer. Returning a failure may cause the
540 	 * upper layer to go into single threaded mode there by
541 	 * causing performance degradation, especially for a large
542 	 * number of connections.
543 	 */
544 	for (i = 0; i < vgen_tx_retries; ) {
545 		status = vgen_portsend(portp, mp);
546 		if (status == VGEN_SUCCESS) {
547 			break;
548 		}
549 		if (++i < vgen_tx_retries)
550 			delay(drv_usectohz(vgen_tx_delay));
551 	}
552 	if (status != VGEN_SUCCESS) {
553 		/* failure */
554 		return (mp);
555 	}
556 	/* success */
557 	return (NULL);
558 }
559 
560 /* transmit packets over the given port */
561 static int
562 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
563 {
564 	vgen_ldclist_t	*ldclp;
565 	vgen_ldc_t *ldcp;
566 	int status;
567 	int rv = VGEN_SUCCESS;
568 
569 	ldclp = &portp->ldclist;
570 	READ_ENTER(&ldclp->rwlock);
571 	/*
572 	 * NOTE: for now, we will assume we have a single channel.
573 	 */
574 	if (ldclp->headp == NULL) {
575 		RW_EXIT(&ldclp->rwlock);
576 		return (VGEN_FAILURE);
577 	}
578 	ldcp = ldclp->headp;
579 
580 	status  = vgen_ldcsend(ldcp, mp);
581 
582 	RW_EXIT(&ldclp->rwlock);
583 
584 	if (status != VGEN_TX_SUCCESS) {
585 		rv = VGEN_FAILURE;
586 	}
587 	return (rv);
588 }
589 
590 /* channel transmit function */
591 static int
592 vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
593 {
594 	vgen_private_desc_t	*tbufp;
595 	vgen_private_desc_t	*rtbufp;
596 	vnet_public_desc_t	*rtxdp;
597 	vgen_private_desc_t	*ntbufp;
598 	vnet_public_desc_t	*txdp;
599 	vio_dring_entry_hdr_t	*hdrp;
600 	vgen_stats_t		*statsp;
601 	struct ether_header	*ehp;
602 	boolean_t	is_bcast = B_FALSE;
603 	boolean_t	is_mcast = B_FALSE;
604 	size_t		mblksz;
605 	caddr_t		dst;
606 	mblk_t		*bp;
607 	size_t		size;
608 	int		rv = 0;
609 	ldc_status_t	istatus;
610 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
611 
612 	statsp = &ldcp->stats;
613 	size = msgsize(mp);
614 
615 	DBG1(vgenp, ldcp, "enter\n");
616 
617 	if (ldcp->ldc_status != LDC_UP) {
618 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
619 		    ldcp->ldc_status);
620 		/* retry ldc_up() if needed */
621 		if (ldcp->flags & CHANNEL_STARTED)
622 			(void) ldc_up(ldcp->ldc_handle);
623 		goto vgen_tx_exit;
624 	}
625 
626 	/* drop the packet if ldc is not up or handshake is not done */
627 	if (ldcp->hphase != VH_DONE) {
628 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
629 		    ldcp->hphase);
630 		goto vgen_tx_exit;
631 	}
632 
633 	if (size > (size_t)ETHERMAX) {
634 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
635 		goto vgen_tx_exit;
636 	}
637 	if (size < ETHERMIN)
638 		size = ETHERMIN;
639 
640 	ehp = (struct ether_header *)mp->b_rptr;
641 	is_bcast = IS_BROADCAST(ehp);
642 	is_mcast = IS_MULTICAST(ehp);
643 
644 	mutex_enter(&ldcp->txlock);
645 	/*
646 	 * allocate a descriptor
647 	 */
648 	tbufp = ldcp->next_tbufp;
649 	ntbufp = NEXTTBUF(ldcp, tbufp);
650 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
651 
652 		mutex_enter(&ldcp->tclock);
653 		/* Try reclaiming now */
654 		vgen_reclaim_dring(ldcp);
655 		ldcp->reclaim_lbolt = ddi_get_lbolt();
656 
657 		if (ntbufp == ldcp->cur_tbufp) {
658 			/* Now we are really out of tbuf/txds */
659 			ldcp->need_resched = B_TRUE;
660 			mutex_exit(&ldcp->tclock);
661 
662 			statsp->tx_no_desc++;
663 			mutex_exit(&ldcp->txlock);
664 
665 			return (VGEN_TX_NORESOURCES);
666 		}
667 		mutex_exit(&ldcp->tclock);
668 	}
669 	/* update next available tbuf in the ring and update tx index */
670 	ldcp->next_tbufp = ntbufp;
671 	INCR_TXI(ldcp->next_txi, ldcp);
672 
673 	/* Mark the buffer busy before releasing the lock */
674 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
675 	mutex_exit(&ldcp->txlock);
676 
677 	/* copy data into pre-allocated transmit buffer */
678 	dst = tbufp->datap + VNET_IPALIGN;
679 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
680 		mblksz = MBLKL(bp);
681 		bcopy(bp->b_rptr, dst, mblksz);
682 		dst += mblksz;
683 	}
684 
685 	tbufp->datalen = size;
686 
687 	/* initialize the corresponding public descriptor (txd) */
688 	txdp = tbufp->descp;
689 	hdrp = &txdp->hdr;
690 	txdp->nbytes = size;
691 	txdp->ncookies = tbufp->ncookies;
692 	bcopy((tbufp->memcookie), (txdp->memcookie),
693 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
694 
695 	mutex_enter(&ldcp->wrlock);
696 	/*
697 	 * If the flags not set to BUSY, it implies that the clobber
698 	 * was done while we were copying the data. In such case,
699 	 * discard the packet and return.
700 	 */
701 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
702 		statsp->oerrors++;
703 		mutex_exit(&ldcp->wrlock);
704 		goto vgen_tx_exit;
705 	}
706 	hdrp->dstate = VIO_DESC_READY;
707 
708 	/* update stats */
709 	statsp->opackets++;
710 	statsp->obytes += size;
711 	if (is_bcast)
712 		statsp->brdcstxmt++;
713 	else if (is_mcast)
714 		statsp->multixmt++;
715 
716 	/* send dring datamsg to the peer */
717 	if (ldcp->resched_peer) {
718 
719 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
720 		rtxdp = rtbufp->descp;
721 
722 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
723 
724 			rv = vgen_send_dring_data(ldcp,
725 			    (uint32_t)ldcp->resched_peer_txi, -1);
726 			if (rv != 0) {
727 				/* error: drop the packet */
728 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
729 				    "failed: rv(%d) len(%d)\n",
730 				    ldcp->ldc_id, rv, size);
731 				statsp->oerrors++;
732 			} else {
733 				ldcp->resched_peer = B_FALSE;
734 			}
735 
736 		}
737 
738 	}
739 
740 	mutex_exit(&ldcp->wrlock);
741 
742 vgen_tx_exit:
743 	if (rv == ECONNRESET) {
744 		/*
745 		 * Check if either callback thread or another tx thread is
746 		 * already running. Calling mutex_enter() will result in a
747 		 * deadlock if the other thread already holds cblock and is
748 		 * blocked in vnet_modify_fdb() (which is called from
749 		 * vgen_handle_evt_reset()) waiting for write access on rwlock,
750 		 * as this transmit thread already holds that lock as a reader
751 		 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
752 		 */
753 		if (mutex_tryenter(&ldcp->cblock)) {
754 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
755 				DWARN(vgenp, ldcp, "ldc_status() error\n");
756 			} else {
757 				ldcp->ldc_status = istatus;
758 			}
759 			if (ldcp->ldc_status != LDC_UP) {
760 				/*
761 				 * Second arg is TRUE, as we know that
762 				 * the caller of this function - vnet_m_tx(),
763 				 * already holds fdb-rwlock as a reader.
764 				 */
765 				vgen_handle_evt_reset(ldcp, B_TRUE);
766 			}
767 			mutex_exit(&ldcp->cblock);
768 		}
769 	}
770 	freemsg(mp);
771 	DBG1(vgenp, ldcp, "exit\n");
772 	return (VGEN_TX_SUCCESS);
773 }
774 
775 /* enable/disable a multicast address */
776 int
777 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
778 {
779 	vgen_t			*vgenp;
780 	vnet_mcast_msg_t	mcastmsg;
781 	vio_msg_tag_t		*tagp;
782 	vgen_port_t		*portp;
783 	vgen_portlist_t		*plistp;
784 	vgen_ldc_t		*ldcp;
785 	vgen_ldclist_t		*ldclp;
786 	struct ether_addr	*addrp;
787 	int			rv = DDI_FAILURE;
788 	uint32_t		i;
789 
790 	vgenp = (vgen_t *)arg;
791 	addrp = (struct ether_addr *)mca;
792 	tagp = &mcastmsg.tag;
793 	bzero(&mcastmsg, sizeof (mcastmsg));
794 
795 	mutex_enter(&vgenp->lock);
796 
797 	plistp = &(vgenp->vgenports);
798 
799 	READ_ENTER(&plistp->rwlock);
800 
801 	portp = vgenp->vsw_portp;
802 	if (portp == NULL) {
803 		RW_EXIT(&plistp->rwlock);
804 		mutex_exit(&vgenp->lock);
805 		return (rv);
806 	}
807 	ldclp = &portp->ldclist;
808 
809 	READ_ENTER(&ldclp->rwlock);
810 
811 	ldcp = ldclp->headp;
812 	if (ldcp == NULL)
813 		goto vgen_mcast_exit;
814 
815 	mutex_enter(&ldcp->cblock);
816 
817 	if (ldcp->hphase == VH_DONE) {
818 		/*
819 		 * If handshake is done, send a msg to vsw to add/remove
820 		 * the multicast address. Otherwise, we just update this
821 		 * mcast address in our table and the table will be sync'd
822 		 * with vsw when handshake completes.
823 		 */
824 		tagp->vio_msgtype = VIO_TYPE_CTRL;
825 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
826 		tagp->vio_subtype_env = VNET_MCAST_INFO;
827 		tagp->vio_sid = ldcp->local_sid;
828 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
829 		mcastmsg.set = add;
830 		mcastmsg.count = 1;
831 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
832 		    B_FALSE) != VGEN_SUCCESS) {
833 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
834 			mutex_exit(&ldcp->cblock);
835 			goto vgen_mcast_exit;
836 		}
837 	}
838 
839 	mutex_exit(&ldcp->cblock);
840 
841 	if (add) {
842 
843 		/* expand multicast table if necessary */
844 		if (vgenp->mccount >= vgenp->mcsize) {
845 			struct ether_addr	*newtab;
846 			uint32_t		newsize;
847 
848 
849 			newsize = vgenp->mcsize * 2;
850 
851 			newtab = kmem_zalloc(newsize *
852 			    sizeof (struct ether_addr), KM_NOSLEEP);
853 			if (newtab == NULL)
854 				goto vgen_mcast_exit;
855 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
856 			    sizeof (struct ether_addr));
857 			kmem_free(vgenp->mctab,
858 			    vgenp->mcsize * sizeof (struct ether_addr));
859 
860 			vgenp->mctab = newtab;
861 			vgenp->mcsize = newsize;
862 		}
863 
864 		/* add address to the table */
865 		vgenp->mctab[vgenp->mccount++] = *addrp;
866 
867 	} else {
868 
869 		/* delete address from the table */
870 		for (i = 0; i < vgenp->mccount; i++) {
871 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
872 
873 				/*
874 				 * If there's more than one address in this
875 				 * table, delete the unwanted one by moving
876 				 * the last one in the list over top of it;
877 				 * otherwise, just remove it.
878 				 */
879 				if (vgenp->mccount > 1) {
880 					vgenp->mctab[i] =
881 					    vgenp->mctab[vgenp->mccount-1];
882 				}
883 				vgenp->mccount--;
884 				break;
885 			}
886 		}
887 	}
888 
889 	rv = DDI_SUCCESS;
890 
891 vgen_mcast_exit:
892 	RW_EXIT(&ldclp->rwlock);
893 	RW_EXIT(&plistp->rwlock);
894 
895 	mutex_exit(&vgenp->lock);
896 	return (rv);
897 }
898 
899 /* set or clear promiscuous mode on the device */
900 static int
901 vgen_promisc(void *arg, boolean_t on)
902 {
903 	_NOTE(ARGUNUSED(arg, on))
904 	return (DDI_SUCCESS);
905 }
906 
907 /* set the unicast mac address of the device */
908 static int
909 vgen_unicst(void *arg, const uint8_t *mca)
910 {
911 	_NOTE(ARGUNUSED(arg, mca))
912 	return (DDI_SUCCESS);
913 }
914 
915 /* get device statistics */
916 int
917 vgen_stat(void *arg, uint_t stat, uint64_t *val)
918 {
919 	vgen_t		*vgenp = (vgen_t *)arg;
920 	vgen_port_t	*portp;
921 	vgen_portlist_t	*plistp;
922 
923 	*val = 0;
924 
925 	plistp = &(vgenp->vgenports);
926 	READ_ENTER(&plistp->rwlock);
927 
928 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
929 		*val += vgen_port_stat(portp, stat);
930 	}
931 
932 	RW_EXIT(&plistp->rwlock);
933 
934 	return (0);
935 }
936 
937 static void
938 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
939 {
940 	 _NOTE(ARGUNUSED(arg, wq, mp))
941 }
942 
943 /* vgen internal functions */
944 /* detach all ports from the device */
945 static void
946 vgen_detach_ports(vgen_t *vgenp)
947 {
948 	vgen_port_t	*portp;
949 	vgen_portlist_t	*plistp;
950 
951 	plistp = &(vgenp->vgenports);
952 	WRITE_ENTER(&plistp->rwlock);
953 
954 	while ((portp = plistp->headp) != NULL) {
955 		vgen_port_detach(portp);
956 	}
957 
958 	RW_EXIT(&plistp->rwlock);
959 }
960 
961 /*
962  * detach the given port.
963  */
964 static void
965 vgen_port_detach(vgen_port_t *portp)
966 {
967 	vgen_t		*vgenp;
968 	vgen_ldclist_t	*ldclp;
969 	int		port_num;
970 
971 	vgenp = portp->vgenp;
972 	port_num = portp->port_num;
973 
974 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
975 
976 	/* remove it from port list */
977 	vgen_port_list_remove(portp);
978 
979 	/* detach channels from this port */
980 	ldclp = &portp->ldclist;
981 	WRITE_ENTER(&ldclp->rwlock);
982 	while (ldclp->headp) {
983 		vgen_ldc_detach(ldclp->headp);
984 	}
985 	RW_EXIT(&ldclp->rwlock);
986 	rw_destroy(&ldclp->rwlock);
987 
988 	if (vgenp->vsw_portp == portp) {
989 		vgenp->vsw_portp = NULL;
990 	}
991 	KMEM_FREE(portp);
992 
993 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
994 }
995 
996 /* add a port to port list */
997 static void
998 vgen_port_list_insert(vgen_port_t *portp)
999 {
1000 	vgen_portlist_t *plistp;
1001 	vgen_t *vgenp;
1002 
1003 	vgenp = portp->vgenp;
1004 	plistp = &(vgenp->vgenports);
1005 
1006 	if (plistp->headp == NULL) {
1007 		plistp->headp = portp;
1008 	} else {
1009 		plistp->tailp->nextp = portp;
1010 	}
1011 	plistp->tailp = portp;
1012 	portp->nextp = NULL;
1013 }
1014 
1015 /* remove a port from port list */
1016 static void
1017 vgen_port_list_remove(vgen_port_t *portp)
1018 {
1019 	vgen_port_t *prevp;
1020 	vgen_port_t *nextp;
1021 	vgen_portlist_t *plistp;
1022 	vgen_t *vgenp;
1023 
1024 	vgenp = portp->vgenp;
1025 
1026 	plistp = &(vgenp->vgenports);
1027 
1028 	if (plistp->headp == NULL)
1029 		return;
1030 
1031 	if (portp == plistp->headp) {
1032 		plistp->headp = portp->nextp;
1033 		if (portp == plistp->tailp)
1034 			plistp->tailp = plistp->headp;
1035 	} else {
1036 		for (prevp = plistp->headp;
1037 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1038 		    prevp = nextp)
1039 			;
1040 		if (nextp == portp) {
1041 			prevp->nextp = portp->nextp;
1042 		}
1043 		if (portp == plistp->tailp)
1044 			plistp->tailp = prevp;
1045 	}
1046 }
1047 
1048 /* lookup a port in the list based on port_num */
1049 static vgen_port_t *
1050 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1051 {
1052 	vgen_port_t *portp = NULL;
1053 
1054 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1055 		if (portp->port_num == port_num) {
1056 			break;
1057 		}
1058 	}
1059 
1060 	return (portp);
1061 }
1062 
1063 /* enable ports for transmit/receive */
1064 static void
1065 vgen_init_ports(vgen_t *vgenp)
1066 {
1067 	vgen_port_t	*portp;
1068 	vgen_portlist_t	*plistp;
1069 
1070 	plistp = &(vgenp->vgenports);
1071 	READ_ENTER(&plistp->rwlock);
1072 
1073 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1074 		vgen_port_init(portp);
1075 	}
1076 
1077 	RW_EXIT(&plistp->rwlock);
1078 }
1079 
1080 static void
1081 vgen_port_init(vgen_port_t *portp)
1082 {
1083 	vgen_t *vgenp;
1084 
1085 	vgenp = portp->vgenp;
1086 	/*
1087 	 * Create fdb entry in vnet, corresponding to the mac
1088 	 * address of this port. Note that the port specified
1089 	 * is vsw-port. This is done so that vsw-port acts
1090 	 * as the route to reach this macaddr, until the
1091 	 * channel for this port comes up (LDC_UP) and
1092 	 * handshake is done successfully.
1093 	 * eg, if the peer is OBP-vnet, it may not bring the
1094 	 * channel up for this port and may communicate via
1095 	 * vsw to reach this port.
1096 	 * Later, when Solaris-vnet comes up at the other end
1097 	 * of the channel for this port and brings up the channel,
1098 	 * it is an indication that peer vnet is capable of
1099 	 * distributed switching, so the direct route through this
1100 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1101 	 */
1102 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1103 	    vgen_tx, vgenp->vsw_portp);
1104 
1105 	if (portp == vgenp->vsw_portp) {
1106 		/*
1107 		 * create the default route entry in vnet's fdb.
1108 		 * This is the entry used by vnet to reach
1109 		 * unknown destinations, which basically goes
1110 		 * through vsw on domain0 and out through the
1111 		 * physical device bound to vsw.
1112 		 */
1113 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1114 	}
1115 
1116 	/* Bring up the channels of this port */
1117 	vgen_init_ldcs(portp);
1118 }
1119 
1120 /* disable transmit/receive on ports */
1121 static void
1122 vgen_uninit_ports(vgen_t *vgenp)
1123 {
1124 	vgen_port_t	*portp;
1125 	vgen_portlist_t	*plistp;
1126 
1127 	plistp = &(vgenp->vgenports);
1128 	READ_ENTER(&plistp->rwlock);
1129 
1130 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1131 		vgen_port_uninit(portp);
1132 	}
1133 
1134 	RW_EXIT(&plistp->rwlock);
1135 }
1136 
1137 static void
1138 vgen_port_uninit(vgen_port_t *portp)
1139 {
1140 	vgen_t *vgenp;
1141 
1142 	vgenp = portp->vgenp;
1143 
1144 	vgen_uninit_ldcs(portp);
1145 	/* delete the entry in vnet's fdb for this port */
1146 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1147 	if (portp == vgenp->vsw_portp) {
1148 		/*
1149 		 * if this is vsw-port, then delete the default
1150 		 * route entry in vnet's fdb.
1151 		 */
1152 		vnet_del_def_rte(vgenp->vnetp);
1153 	}
1154 }
1155 
1156 /* register with MD event generator */
1157 static int
1158 vgen_mdeg_reg(vgen_t *vgenp)
1159 {
1160 	mdeg_prop_spec_t	*pspecp;
1161 	mdeg_node_spec_t	*parentp;
1162 	uint_t			templatesz;
1163 	int			rv;
1164 	mdeg_handle_t		hdl;
1165 	int			i;
1166 
1167 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1168 	    DDI_PROP_DONTPASS, reg_propname, -1);
1169 	if (i == -1) {
1170 		return (DDI_FAILURE);
1171 	}
1172 	templatesz = sizeof (vgen_prop_template);
1173 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1174 	if (pspecp == NULL) {
1175 		return (DDI_FAILURE);
1176 	}
1177 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1178 	if (parentp == NULL) {
1179 		kmem_free(pspecp, templatesz);
1180 		return (DDI_FAILURE);
1181 	}
1182 
1183 	bcopy(vgen_prop_template, pspecp, templatesz);
1184 
1185 	/*
1186 	 * NOTE: The instance here refers to the value of "reg" property and
1187 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1188 	 */
1189 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1190 
1191 	parentp->namep = "virtual-device";
1192 	parentp->specp = pspecp;
1193 
1194 	/* save parentp in vgen_t */
1195 	vgenp->mdeg_parentp = parentp;
1196 
1197 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1198 	if (rv != MDEG_SUCCESS) {
1199 		DERR(vgenp, NULL, "mdeg_register failed\n");
1200 		KMEM_FREE(parentp);
1201 		kmem_free(pspecp, templatesz);
1202 		vgenp->mdeg_parentp = NULL;
1203 		return (DDI_FAILURE);
1204 	}
1205 
1206 	/* save mdeg handle in vgen_t */
1207 	vgenp->mdeg_hdl = hdl;
1208 
1209 	return (DDI_SUCCESS);
1210 }
1211 
1212 /* unregister with MD event generator */
1213 static void
1214 vgen_mdeg_unreg(vgen_t *vgenp)
1215 {
1216 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1217 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1218 	KMEM_FREE(vgenp->mdeg_parentp);
1219 	vgenp->mdeg_parentp = NULL;
1220 	vgenp->mdeg_hdl = NULL;
1221 }
1222 
1223 /* callback function registered with MD event generator */
1224 static int
1225 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1226 {
1227 	int idx;
1228 	int vsw_idx = -1;
1229 	uint64_t val;
1230 	vgen_t *vgenp;
1231 
1232 	if ((resp == NULL) || (cb_argp == NULL)) {
1233 		return (MDEG_FAILURE);
1234 	}
1235 
1236 	vgenp = (vgen_t *)cb_argp;
1237 	DBG1(vgenp, NULL, "enter\n");
1238 
1239 	mutex_enter(&vgenp->lock);
1240 
1241 	DBG1(vgenp, NULL, "ports: removed(%x), "
1242 	"added(%x), updated(%x)\n", resp->removed.nelem,
1243 	    resp->added.nelem, resp->match_curr.nelem);
1244 
1245 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1246 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1247 		    resp->removed.mdep[idx]);
1248 	}
1249 
1250 	if (vgenp->vsw_portp == NULL) {
1251 		/*
1252 		 * find vsw_port and add it first, because other ports need
1253 		 * this when adding fdb entry (see vgen_port_init()).
1254 		 */
1255 		for (idx = 0; idx < resp->added.nelem; idx++) {
1256 			if (!(md_get_prop_val(resp->added.mdp,
1257 			    resp->added.mdep[idx], swport_propname, &val))) {
1258 				if (val == 0) {
1259 					/*
1260 					 * This port is connected to the
1261 					 * vsw on dom0.
1262 					 */
1263 					vsw_idx = idx;
1264 					if (vgen_add_port(vgenp,
1265 					    resp->added.mdp,
1266 					    resp->added.mdep[idx]) !=
1267 					    DDI_SUCCESS) {
1268 						cmn_err(CE_NOTE, "vnet%d Could "
1269 						    "not initialize virtual "
1270 						    "switch port.",
1271 						    ddi_get_instance(vgenp->
1272 						    vnetdip));
1273 						mutex_exit(&vgenp->lock);
1274 						return (MDEG_FAILURE);
1275 					}
1276 					break;
1277 				}
1278 			}
1279 		}
1280 		if (vsw_idx == -1) {
1281 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1282 			mutex_exit(&vgenp->lock);
1283 			return (MDEG_FAILURE);
1284 		}
1285 	}
1286 
1287 	for (idx = 0; idx < resp->added.nelem; idx++) {
1288 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1289 			continue;
1290 
1291 		/* If this port can't be added just skip it. */
1292 		(void) vgen_add_port(vgenp, resp->added.mdp,
1293 		    resp->added.mdep[idx]);
1294 	}
1295 
1296 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1297 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1298 		    resp->match_curr.mdep[idx],
1299 		    resp->match_prev.mdp,
1300 		    resp->match_prev.mdep[idx]);
1301 	}
1302 
1303 	mutex_exit(&vgenp->lock);
1304 	DBG1(vgenp, NULL, "exit\n");
1305 	return (MDEG_SUCCESS);
1306 }
1307 
1308 /* add a new port to the device */
1309 static int
1310 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1311 {
1312 	uint64_t	port_num;
1313 	uint64_t	*ldc_ids;
1314 	uint64_t	macaddr;
1315 	uint64_t	val;
1316 	int		num_ldcs;
1317 	int		vsw_port = B_FALSE;
1318 	int		i;
1319 	int		addrsz;
1320 	int		num_nodes = 0;
1321 	int		listsz = 0;
1322 	int		rv = DDI_SUCCESS;
1323 	mde_cookie_t	*listp = NULL;
1324 	uint8_t		*addrp;
1325 	struct ether_addr	ea;
1326 
1327 	/* read "id" property to get the port number */
1328 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1329 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1330 		return (DDI_FAILURE);
1331 	}
1332 
1333 	/*
1334 	 * Find the channel endpoint node(s) under this port node.
1335 	 */
1336 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1337 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
1338 		    num_nodes);
1339 		return (DDI_FAILURE);
1340 	}
1341 
1342 	/* allocate space for node list */
1343 	listsz = num_nodes * sizeof (mde_cookie_t);
1344 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1345 	if (listp == NULL)
1346 		return (DDI_FAILURE);
1347 
1348 	num_ldcs = md_scan_dag(mdp, mdex,
1349 	    md_find_name(mdp, channel_propname),
1350 	    md_find_name(mdp, "fwd"), listp);
1351 
1352 	if (num_ldcs <= 0) {
1353 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
1354 		kmem_free(listp, listsz);
1355 		return (DDI_FAILURE);
1356 	}
1357 
1358 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
1359 
1360 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1361 	if (ldc_ids == NULL) {
1362 		kmem_free(listp, listsz);
1363 		return (DDI_FAILURE);
1364 	}
1365 
1366 	for (i = 0; i < num_ldcs; i++) {
1367 		/* read channel ids */
1368 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1369 			DWARN(vgenp, NULL, "prop(%s) not found\n",
1370 			    id_propname);
1371 			kmem_free(listp, listsz);
1372 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1373 			return (DDI_FAILURE);
1374 		}
1375 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
1376 	}
1377 
1378 	kmem_free(listp, listsz);
1379 
1380 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1381 	    &addrsz)) {
1382 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
1383 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1384 		return (DDI_FAILURE);
1385 	}
1386 
1387 	if (addrsz < ETHERADDRL) {
1388 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
1389 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1390 		return (DDI_FAILURE);
1391 	}
1392 
1393 	macaddr = *((uint64_t *)addrp);
1394 
1395 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
1396 
1397 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1398 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1399 		macaddr >>= 8;
1400 	}
1401 
1402 	if (vgenp->vsw_portp == NULL) {
1403 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1404 			if (val == 0) {
1405 				/* This port is connected to the vsw on dom0 */
1406 				vsw_port = B_TRUE;
1407 			}
1408 		}
1409 	}
1410 	if (vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1411 	    &ea, vsw_port) != DDI_SUCCESS) {
1412 		cmn_err(CE_NOTE, "vnet%d failed to attach port %d remote MAC "
1413 		    "address %s", ddi_get_instance(vgenp->vnetdip),
1414 		    (int)port_num, ether_sprintf(&ea));
1415 		rv = DDI_FAILURE;
1416 	}
1417 
1418 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1419 
1420 	return (rv);
1421 }
1422 
1423 /* remove a port from the device */
1424 static int
1425 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1426 {
1427 	uint64_t	port_num;
1428 	vgen_port_t	*portp;
1429 	vgen_portlist_t	*plistp;
1430 
1431 	/* read "id" property to get the port number */
1432 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1433 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1434 		return (DDI_FAILURE);
1435 	}
1436 
1437 	plistp = &(vgenp->vgenports);
1438 
1439 	WRITE_ENTER(&plistp->rwlock);
1440 	portp = vgen_port_lookup(plistp, (int)port_num);
1441 	if (portp == NULL) {
1442 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
1443 		RW_EXIT(&plistp->rwlock);
1444 		return (DDI_FAILURE);
1445 	}
1446 
1447 	vgen_port_detach_mdeg(portp);
1448 	RW_EXIT(&plistp->rwlock);
1449 
1450 	return (DDI_SUCCESS);
1451 }
1452 
1453 /* attach a port to the device based on mdeg data */
1454 static int
1455 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1456 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1457 {
1458 	vgen_port_t		*portp;
1459 	vgen_portlist_t		*plistp;
1460 	int			i;
1461 
1462 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1463 	if (portp == NULL) {
1464 		return (DDI_FAILURE);
1465 	}
1466 	portp->vgenp = vgenp;
1467 	portp->port_num = port_num;
1468 
1469 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
1470 
1471 	portp->ldclist.num_ldcs = 0;
1472 	portp->ldclist.headp = NULL;
1473 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1474 
1475 	ether_copy(macaddr, &portp->macaddr);
1476 	for (i = 0; i < num_ids; i++) {
1477 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
1478 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
1479 			vgen_port_detach(portp);
1480 			return (DDI_FAILURE);
1481 		}
1482 	}
1483 
1484 	/* link it into the list of ports */
1485 	plistp = &(vgenp->vgenports);
1486 	WRITE_ENTER(&plistp->rwlock);
1487 	vgen_port_list_insert(portp);
1488 	RW_EXIT(&plistp->rwlock);
1489 
1490 	/* This port is connected to the vsw on domain0 */
1491 	if (vsw_port)
1492 		vgenp->vsw_portp = portp;
1493 
1494 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1495 		vgen_port_init(portp);
1496 	}
1497 
1498 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1499 	return (DDI_SUCCESS);
1500 }
1501 
1502 /* detach a port from the device based on mdeg data */
1503 static void
1504 vgen_port_detach_mdeg(vgen_port_t *portp)
1505 {
1506 	vgen_t *vgenp = portp->vgenp;
1507 
1508 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
1509 	/* stop the port if needed */
1510 	if (vgenp->flags & VGEN_STARTED) {
1511 		vgen_port_uninit(portp);
1512 	}
1513 	vgen_port_detach(portp);
1514 
1515 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1516 }
1517 
1518 static int
1519 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1520 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1521 {
1522 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1523 
1524 	/* NOTE: TBD */
1525 	return (DDI_SUCCESS);
1526 }
1527 
1528 static uint64_t
1529 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1530 {
1531 	vgen_ldclist_t	*ldclp;
1532 	vgen_ldc_t *ldcp;
1533 	uint64_t	val;
1534 
1535 	val = 0;
1536 	ldclp = &portp->ldclist;
1537 
1538 	READ_ENTER(&ldclp->rwlock);
1539 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1540 		val += vgen_ldc_stat(ldcp, stat);
1541 	}
1542 	RW_EXIT(&ldclp->rwlock);
1543 
1544 	return (val);
1545 }
1546 
1547 /* attach the channel corresponding to the given ldc_id to the port */
1548 static int
1549 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1550 {
1551 	vgen_t 		*vgenp;
1552 	vgen_ldclist_t	*ldclp;
1553 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1554 	ldc_attr_t 	attr;
1555 	int 		status;
1556 	ldc_status_t	istatus;
1557 	char		kname[MAXNAMELEN];
1558 	int		instance;
1559 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
1560 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1561 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1562 		AST_create_rxmblks = 0x20, AST_add_softintr = 0x40,
1563 		AST_create_rcv_thread = 0x80} attach_state;
1564 
1565 	attach_state = AST_init;
1566 	vgenp = portp->vgenp;
1567 	ldclp = &portp->ldclist;
1568 
1569 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1570 	if (ldcp == NULL) {
1571 		goto ldc_attach_failed;
1572 	}
1573 	ldcp->ldc_id = ldc_id;
1574 	ldcp->portp = portp;
1575 
1576 	attach_state |= AST_ldc_alloc;
1577 
1578 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1579 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1580 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1581 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
1582 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
1583 
1584 	attach_state |= AST_mutex_init;
1585 
1586 	attr.devclass = LDC_DEV_NT;
1587 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1588 	attr.mode = LDC_MODE_UNRELIABLE;
1589 	attr.mtu = vnet_ldc_mtu;
1590 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1591 	if (status != 0) {
1592 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
1593 		goto ldc_attach_failed;
1594 	}
1595 	attach_state |= AST_ldc_init;
1596 
1597 	if (vgen_rcv_thread_enabled) {
1598 		ldcp->rcv_thr_flags = 0;
1599 		ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
1600 		ldcp->soft_pri = PIL_6;
1601 
1602 		status = ddi_intr_add_softint(vgenp->vnetdip,
1603 		    &ldcp->soft_handle, ldcp->soft_pri,
1604 		    vgen_ldc_rcv_softintr, (void *)ldcp);
1605 		if (status != DDI_SUCCESS) {
1606 			DWARN(vgenp, ldcp, "add_softint failed, rv (%d)\n",
1607 			    status);
1608 			goto ldc_attach_failed;
1609 		}
1610 
1611 		/*
1612 		 * Initialize the soft_lock with the same priority as
1613 		 * the soft interrupt to protect from the soft interrupt.
1614 		 */
1615 		mutex_init(&ldcp->soft_lock, NULL, MUTEX_DRIVER,
1616 		    DDI_INTR_PRI(ldcp->soft_pri));
1617 		attach_state |= AST_add_softintr;
1618 
1619 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
1620 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
1621 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
1622 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
1623 
1624 		attach_state |= AST_create_rcv_thread;
1625 		if (ldcp->rcv_thread == NULL) {
1626 			DWARN(vgenp, ldcp, "Failed to create worker thread");
1627 			goto ldc_attach_failed;
1628 		}
1629 	}
1630 
1631 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1632 	if (status != 0) {
1633 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
1634 		    status);
1635 		goto ldc_attach_failed;
1636 	}
1637 	attach_state |= AST_ldc_reg_cb;
1638 
1639 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1640 	ASSERT(istatus == LDC_INIT);
1641 	ldcp->ldc_status = istatus;
1642 
1643 	/* allocate transmit resources */
1644 	status = vgen_alloc_tx_ring(ldcp);
1645 	if (status != 0) {
1646 		goto ldc_attach_failed;
1647 	}
1648 	attach_state |= AST_alloc_tx_ring;
1649 
1650 	/* allocate receive resources */
1651 	status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
1652 	    vgen_rbufsz1, vgen_rbufsz2, vgen_rbufsz3,
1653 	    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
1654 	if (status != 0) {
1655 		goto ldc_attach_failed;
1656 	}
1657 	attach_state |= AST_create_rxmblks;
1658 
1659 	/* Setup kstats for the channel */
1660 	instance = ddi_get_instance(vgenp->vnetdip);
1661 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
1662 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
1663 	if (ldcp->ksp == NULL) {
1664 		goto ldc_attach_failed;
1665 	}
1666 
1667 	/* initialize vgen_versions supported */
1668 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
1669 
1670 	/* link it into the list of channels for this port */
1671 	WRITE_ENTER(&ldclp->rwlock);
1672 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
1673 	ldcp->nextp = *prev_ldcp;
1674 	*prev_ldcp = ldcp;
1675 	ldclp->num_ldcs++;
1676 	RW_EXIT(&ldclp->rwlock);
1677 
1678 	ldcp->flags |= CHANNEL_ATTACHED;
1679 	return (DDI_SUCCESS);
1680 
1681 ldc_attach_failed:
1682 	if (attach_state & AST_ldc_reg_cb) {
1683 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1684 	}
1685 	if (attach_state & AST_add_softintr) {
1686 		(void) ddi_intr_remove_softint(ldcp->soft_handle);
1687 		mutex_destroy(&ldcp->soft_lock);
1688 	}
1689 	if (attach_state & AST_create_rcv_thread) {
1690 		if (ldcp->rcv_thread != NULL) {
1691 			vgen_stop_rcv_thread(ldcp);
1692 		}
1693 		mutex_destroy(&ldcp->rcv_thr_lock);
1694 		cv_destroy(&ldcp->rcv_thr_cv);
1695 	}
1696 	if (attach_state & AST_create_rxmblks) {
1697 		vio_mblk_pool_t *fvmp = NULL;
1698 
1699 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
1700 		ASSERT(fvmp == NULL);
1701 	}
1702 	if (attach_state & AST_alloc_tx_ring) {
1703 		vgen_free_tx_ring(ldcp);
1704 	}
1705 	if (attach_state & AST_ldc_init) {
1706 		(void) ldc_fini(ldcp->ldc_handle);
1707 	}
1708 	if (attach_state & AST_mutex_init) {
1709 		mutex_destroy(&ldcp->tclock);
1710 		mutex_destroy(&ldcp->txlock);
1711 		mutex_destroy(&ldcp->cblock);
1712 		mutex_destroy(&ldcp->wrlock);
1713 		mutex_destroy(&ldcp->rxlock);
1714 	}
1715 	if (attach_state & AST_ldc_alloc) {
1716 		KMEM_FREE(ldcp);
1717 	}
1718 	return (DDI_FAILURE);
1719 }
1720 
1721 /* detach a channel from the port */
1722 static void
1723 vgen_ldc_detach(vgen_ldc_t *ldcp)
1724 {
1725 	vgen_port_t	*portp;
1726 	vgen_t 		*vgenp;
1727 	vgen_ldc_t 	*pldcp;
1728 	vgen_ldc_t	**prev_ldcp;
1729 	vgen_ldclist_t	*ldclp;
1730 
1731 	portp = ldcp->portp;
1732 	vgenp = portp->vgenp;
1733 	ldclp = &portp->ldclist;
1734 
1735 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
1736 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
1737 		if (pldcp == ldcp) {
1738 			break;
1739 		}
1740 	}
1741 
1742 	if (pldcp == NULL) {
1743 		/* invalid ldcp? */
1744 		return;
1745 	}
1746 
1747 	if (ldcp->ldc_status != LDC_INIT) {
1748 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
1749 	}
1750 
1751 	if (ldcp->flags & CHANNEL_ATTACHED) {
1752 		ldcp->flags &= ~(CHANNEL_ATTACHED);
1753 
1754 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1755 		if (ldcp->rcv_thread != NULL) {
1756 			/* First stop the receive thread */
1757 			vgen_stop_rcv_thread(ldcp);
1758 			(void) ddi_intr_remove_softint(ldcp->soft_handle);
1759 			mutex_destroy(&ldcp->soft_lock);
1760 			mutex_destroy(&ldcp->rcv_thr_lock);
1761 			cv_destroy(&ldcp->rcv_thr_cv);
1762 		}
1763 		/* Free any queued messages */
1764 		if (ldcp->rcv_mhead != NULL) {
1765 			freemsgchain(ldcp->rcv_mhead);
1766 			ldcp->rcv_mhead = NULL;
1767 		}
1768 
1769 		vgen_destroy_kstats(ldcp->ksp);
1770 		ldcp->ksp = NULL;
1771 
1772 		/*
1773 		 * if we cannot reclaim all mblks, put this
1774 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
1775 		 * device gets detached (see vgen_uninit()).
1776 		 */
1777 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
1778 
1779 		/* free transmit resources */
1780 		vgen_free_tx_ring(ldcp);
1781 
1782 		(void) ldc_fini(ldcp->ldc_handle);
1783 		mutex_destroy(&ldcp->tclock);
1784 		mutex_destroy(&ldcp->txlock);
1785 		mutex_destroy(&ldcp->cblock);
1786 		mutex_destroy(&ldcp->wrlock);
1787 		mutex_destroy(&ldcp->rxlock);
1788 
1789 		/* unlink it from the list */
1790 		*prev_ldcp = ldcp->nextp;
1791 		ldclp->num_ldcs--;
1792 		KMEM_FREE(ldcp);
1793 	}
1794 }
1795 
1796 /*
1797  * This function allocates transmit resources for the channel.
1798  * The resources consist of a transmit descriptor ring and an associated
1799  * transmit buffer ring.
1800  */
1801 static int
1802 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
1803 {
1804 	void *tbufp;
1805 	ldc_mem_info_t minfo;
1806 	uint32_t txdsize;
1807 	uint32_t tbufsize;
1808 	int status;
1809 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1810 
1811 	ldcp->num_txds = vnet_ntxds;
1812 	txdsize = sizeof (vnet_public_desc_t);
1813 	tbufsize = sizeof (vgen_private_desc_t);
1814 
1815 	/* allocate transmit buffer ring */
1816 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
1817 	if (tbufp == NULL) {
1818 		return (DDI_FAILURE);
1819 	}
1820 
1821 	/* create transmit descriptor ring */
1822 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
1823 	    &ldcp->tx_dhandle);
1824 	if (status) {
1825 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
1826 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1827 		return (DDI_FAILURE);
1828 	}
1829 
1830 	/* get the addr of descripror ring */
1831 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
1832 	if (status) {
1833 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
1834 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1835 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1836 		ldcp->tbufp = NULL;
1837 		return (DDI_FAILURE);
1838 	}
1839 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
1840 	ldcp->tbufp = tbufp;
1841 
1842 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
1843 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
1844 
1845 	return (DDI_SUCCESS);
1846 }
1847 
1848 /* Free transmit resources for the channel */
1849 static void
1850 vgen_free_tx_ring(vgen_ldc_t *ldcp)
1851 {
1852 	int tbufsize = sizeof (vgen_private_desc_t);
1853 
1854 	/* free transmit descriptor ring */
1855 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1856 
1857 	/* free transmit buffer ring */
1858 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
1859 	ldcp->txdp = ldcp->txdendp = NULL;
1860 	ldcp->tbufp = ldcp->tbufendp = NULL;
1861 }
1862 
1863 /* enable transmit/receive on the channels for the port */
1864 static void
1865 vgen_init_ldcs(vgen_port_t *portp)
1866 {
1867 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1868 	vgen_ldc_t	*ldcp;
1869 
1870 	READ_ENTER(&ldclp->rwlock);
1871 	ldcp =  ldclp->headp;
1872 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1873 		(void) vgen_ldc_init(ldcp);
1874 	}
1875 	RW_EXIT(&ldclp->rwlock);
1876 }
1877 
1878 /* stop transmit/receive on the channels for the port */
1879 static void
1880 vgen_uninit_ldcs(vgen_port_t *portp)
1881 {
1882 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1883 	vgen_ldc_t	*ldcp;
1884 
1885 	READ_ENTER(&ldclp->rwlock);
1886 	ldcp =  ldclp->headp;
1887 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1888 		vgen_ldc_uninit(ldcp);
1889 	}
1890 	RW_EXIT(&ldclp->rwlock);
1891 }
1892 
1893 /* enable transmit/receive on the channel */
1894 static int
1895 vgen_ldc_init(vgen_ldc_t *ldcp)
1896 {
1897 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1898 	ldc_status_t	istatus;
1899 	int		rv;
1900 	uint32_t	retries = 0;
1901 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
1902 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
1903 	init_state = ST_init;
1904 
1905 	DBG1(vgenp, ldcp, "enter\n");
1906 	LDC_LOCK(ldcp);
1907 
1908 	rv = ldc_open(ldcp->ldc_handle);
1909 	if (rv != 0) {
1910 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
1911 		goto ldcinit_failed;
1912 	}
1913 	init_state |= ST_ldc_open;
1914 
1915 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1916 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
1917 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
1918 		goto ldcinit_failed;
1919 	}
1920 	ldcp->ldc_status = istatus;
1921 
1922 	rv = vgen_init_tbufs(ldcp);
1923 	if (rv != 0) {
1924 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
1925 		goto ldcinit_failed;
1926 	}
1927 	init_state |= ST_init_tbufs;
1928 
1929 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
1930 	if (rv != 0) {
1931 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
1932 		goto ldcinit_failed;
1933 	}
1934 
1935 	init_state |= ST_cb_enable;
1936 
1937 	do {
1938 		rv = ldc_up(ldcp->ldc_handle);
1939 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
1940 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
1941 			drv_usecwait(VGEN_LDC_UP_DELAY);
1942 		}
1943 		if (retries++ >= vgen_ldcup_retries)
1944 			break;
1945 	} while (rv == EWOULDBLOCK);
1946 
1947 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1948 	if (istatus == LDC_UP) {
1949 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
1950 	}
1951 
1952 	ldcp->ldc_status = istatus;
1953 
1954 	/* initialize transmit watchdog timeout */
1955 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
1956 	    drv_usectohz(vnet_ldcwd_interval * 1000));
1957 
1958 	ldcp->hphase = -1;
1959 	ldcp->flags |= CHANNEL_STARTED;
1960 
1961 	/* if channel is already UP - start handshake */
1962 	if (istatus == LDC_UP) {
1963 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1964 		if (ldcp->portp != vgenp->vsw_portp) {
1965 			/*
1966 			 * modify fdb entry to use this port as the
1967 			 * channel is up, instead of going through the
1968 			 * vsw-port (see comments in vgen_port_init())
1969 			 */
1970 			vnet_modify_fdb(vgenp->vnetp,
1971 			    (uint8_t *)&ldcp->portp->macaddr,
1972 			    vgen_tx, ldcp->portp, B_FALSE);
1973 		}
1974 
1975 		/* Initialize local session id */
1976 		ldcp->local_sid = ddi_get_lbolt();
1977 
1978 		/* clear peer session id */
1979 		ldcp->peer_sid = 0;
1980 		ldcp->hretries = 0;
1981 
1982 		/* Initiate Handshake process with peer ldc endpoint */
1983 		vgen_reset_hphase(ldcp);
1984 
1985 		mutex_exit(&ldcp->tclock);
1986 		mutex_exit(&ldcp->txlock);
1987 		mutex_exit(&ldcp->wrlock);
1988 		mutex_exit(&ldcp->rxlock);
1989 		vgen_handshake(vh_nextphase(ldcp));
1990 		mutex_exit(&ldcp->cblock);
1991 	} else {
1992 		LDC_UNLOCK(ldcp);
1993 	}
1994 
1995 	return (DDI_SUCCESS);
1996 
1997 ldcinit_failed:
1998 	if (init_state & ST_cb_enable) {
1999 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2000 	}
2001 	if (init_state & ST_init_tbufs) {
2002 		vgen_uninit_tbufs(ldcp);
2003 	}
2004 	if (init_state & ST_ldc_open) {
2005 		(void) ldc_close(ldcp->ldc_handle);
2006 	}
2007 	LDC_UNLOCK(ldcp);
2008 	DBG1(vgenp, ldcp, "exit\n");
2009 	return (DDI_FAILURE);
2010 }
2011 
2012 /* stop transmit/receive on the channel */
2013 static void
2014 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2015 {
2016 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2017 	int	rv;
2018 
2019 	DBG1(vgenp, ldcp, "enter\n");
2020 	LDC_LOCK(ldcp);
2021 
2022 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2023 		LDC_UNLOCK(ldcp);
2024 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2025 		return;
2026 	}
2027 
2028 	/* disable further callbacks */
2029 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2030 	if (rv != 0) {
2031 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
2032 	}
2033 
2034 	/*
2035 	 * clear handshake done bit and wait for pending tx and cb to finish.
2036 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
2037 	 */
2038 	ldcp->hphase &= ~(VH_DONE);
2039 	LDC_UNLOCK(ldcp);
2040 
2041 	/* cancel handshake watchdog timeout */
2042 	if (ldcp->htid) {
2043 		(void) untimeout(ldcp->htid);
2044 		ldcp->htid = 0;
2045 	}
2046 
2047 	/* cancel transmit watchdog timeout */
2048 	if (ldcp->wd_tid) {
2049 		(void) untimeout(ldcp->wd_tid);
2050 		ldcp->wd_tid = 0;
2051 	}
2052 
2053 	drv_usecwait(1000);
2054 
2055 	/* acquire locks again; any pending transmits and callbacks are done */
2056 	LDC_LOCK(ldcp);
2057 
2058 	vgen_reset_hphase(ldcp);
2059 
2060 	vgen_uninit_tbufs(ldcp);
2061 
2062 	rv = ldc_close(ldcp->ldc_handle);
2063 	if (rv != 0) {
2064 		DWARN(vgenp, ldcp, "ldc_close err\n");
2065 	}
2066 	ldcp->ldc_status = LDC_INIT;
2067 	ldcp->flags &= ~(CHANNEL_STARTED);
2068 
2069 	LDC_UNLOCK(ldcp);
2070 
2071 	DBG1(vgenp, ldcp, "exit\n");
2072 }
2073 
2074 /* Initialize the transmit buffer ring for the channel */
2075 static int
2076 vgen_init_tbufs(vgen_ldc_t *ldcp)
2077 {
2078 	vgen_private_desc_t	*tbufp;
2079 	vnet_public_desc_t	*txdp;
2080 	vio_dring_entry_hdr_t		*hdrp;
2081 	int 			i;
2082 	int 			rv;
2083 	caddr_t			datap = NULL;
2084 	int			ci;
2085 	uint32_t		ncookies;
2086 
2087 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2088 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2089 
2090 	datap = kmem_zalloc(ldcp->num_txds * VGEN_TXDBLK_SZ, KM_SLEEP);
2091 	ldcp->tx_datap = datap;
2092 
2093 	/*
2094 	 * for each private descriptor, allocate a ldc mem_handle which is
2095 	 * required to map the data during transmit, set the flags
2096 	 * to free (available for use by transmit routine).
2097 	 */
2098 
2099 	for (i = 0; i < ldcp->num_txds; i++) {
2100 
2101 		tbufp = &(ldcp->tbufp[i]);
2102 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2103 		    &(tbufp->memhandle));
2104 		if (rv) {
2105 			tbufp->memhandle = 0;
2106 			goto init_tbufs_failed;
2107 		}
2108 
2109 		/*
2110 		 * bind ldc memhandle to the corresponding transmit buffer.
2111 		 */
2112 		ci = ncookies = 0;
2113 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2114 		    (caddr_t)datap, VGEN_TXDBLK_SZ, LDC_SHADOW_MAP,
2115 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2116 		if (rv != 0) {
2117 			goto init_tbufs_failed;
2118 		}
2119 
2120 		/*
2121 		 * successful in binding the handle to tx data buffer.
2122 		 * set datap in the private descr to this buffer.
2123 		 */
2124 		tbufp->datap = datap;
2125 
2126 		if ((ncookies == 0) ||
2127 		    (ncookies > MAX_COOKIES)) {
2128 			goto init_tbufs_failed;
2129 		}
2130 
2131 		for (ci = 1; ci < ncookies; ci++) {
2132 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2133 			    &(tbufp->memcookie[ci]));
2134 			if (rv != 0) {
2135 				goto init_tbufs_failed;
2136 			}
2137 		}
2138 
2139 		tbufp->ncookies = ncookies;
2140 		datap += VGEN_TXDBLK_SZ;
2141 
2142 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2143 		txdp = &(ldcp->txdp[i]);
2144 		hdrp = &txdp->hdr;
2145 		hdrp->dstate = VIO_DESC_FREE;
2146 		hdrp->ack = B_FALSE;
2147 		tbufp->descp = txdp;
2148 
2149 	}
2150 
2151 	/* reset tbuf walking pointers */
2152 	ldcp->next_tbufp = ldcp->tbufp;
2153 	ldcp->cur_tbufp = ldcp->tbufp;
2154 
2155 	/* initialize tx seqnum and index */
2156 	ldcp->next_txseq = VNET_ISS;
2157 	ldcp->next_txi = 0;
2158 
2159 	ldcp->resched_peer = B_TRUE;
2160 	ldcp->resched_peer_txi = 0;
2161 
2162 	return (DDI_SUCCESS);
2163 
2164 init_tbufs_failed:;
2165 	vgen_uninit_tbufs(ldcp);
2166 	return (DDI_FAILURE);
2167 }
2168 
2169 /* Uninitialize transmit buffer ring for the channel */
2170 static void
2171 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2172 {
2173 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2174 	int 			i;
2175 
2176 	/* for each tbuf (priv_desc), free ldc mem_handle */
2177 	for (i = 0; i < ldcp->num_txds; i++) {
2178 
2179 		tbufp = &(ldcp->tbufp[i]);
2180 
2181 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2182 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2183 			tbufp->datap = NULL;
2184 		}
2185 		if (tbufp->memhandle) {
2186 			(void) ldc_mem_free_handle(tbufp->memhandle);
2187 			tbufp->memhandle = 0;
2188 		}
2189 	}
2190 
2191 	if (ldcp->tx_datap) {
2192 		/* prealloc'd tx data buffer */
2193 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_TXDBLK_SZ);
2194 		ldcp->tx_datap = NULL;
2195 	}
2196 
2197 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2198 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2199 }
2200 
2201 /* clobber tx descriptor ring */
2202 static void
2203 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2204 {
2205 	vnet_public_desc_t	*txdp;
2206 	vgen_private_desc_t	*tbufp;
2207 	vio_dring_entry_hdr_t	*hdrp;
2208 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2209 	int i;
2210 #ifdef DEBUG
2211 	int ndone = 0;
2212 #endif
2213 
2214 	for (i = 0; i < ldcp->num_txds; i++) {
2215 
2216 		tbufp = &(ldcp->tbufp[i]);
2217 		txdp = tbufp->descp;
2218 		hdrp = &txdp->hdr;
2219 
2220 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2221 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2222 #ifdef DEBUG
2223 			if (hdrp->dstate == VIO_DESC_DONE)
2224 				ndone++;
2225 #endif
2226 			hdrp->dstate = VIO_DESC_FREE;
2227 			hdrp->ack = B_FALSE;
2228 		}
2229 	}
2230 	/* reset tbuf walking pointers */
2231 	ldcp->next_tbufp = ldcp->tbufp;
2232 	ldcp->cur_tbufp = ldcp->tbufp;
2233 
2234 	/* reset tx seqnum and index */
2235 	ldcp->next_txseq = VNET_ISS;
2236 	ldcp->next_txi = 0;
2237 
2238 	ldcp->resched_peer = B_TRUE;
2239 	ldcp->resched_peer_txi = 0;
2240 
2241 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
2242 }
2243 
2244 /* clobber receive descriptor ring */
2245 static void
2246 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2247 {
2248 	ldcp->rx_dhandle = 0;
2249 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2250 	ldcp->rxdp = NULL;
2251 	ldcp->next_rxi = 0;
2252 	ldcp->num_rxds = 0;
2253 	ldcp->next_rxseq = VNET_ISS;
2254 }
2255 
2256 /* initialize receive descriptor ring */
2257 static int
2258 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2259 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2260 {
2261 	int rv;
2262 	ldc_mem_info_t minfo;
2263 
2264 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2265 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2266 	if (rv != 0) {
2267 		return (DDI_FAILURE);
2268 	}
2269 
2270 	/*
2271 	 * sucessfully mapped, now try to
2272 	 * get info about the mapped dring
2273 	 */
2274 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2275 	if (rv != 0) {
2276 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2277 		return (DDI_FAILURE);
2278 	}
2279 
2280 	/*
2281 	 * save ring address, number of descriptors.
2282 	 */
2283 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2284 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2285 	ldcp->num_rxdcookies = ncookies;
2286 	ldcp->num_rxds = num_desc;
2287 	ldcp->next_rxi = 0;
2288 	ldcp->next_rxseq = VNET_ISS;
2289 
2290 	return (DDI_SUCCESS);
2291 }
2292 
2293 /* get channel statistics */
2294 static uint64_t
2295 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2296 {
2297 	vgen_stats_t *statsp;
2298 	uint64_t val;
2299 
2300 	val = 0;
2301 	statsp = &ldcp->stats;
2302 	switch (stat) {
2303 
2304 	case MAC_STAT_MULTIRCV:
2305 		val = statsp->multircv;
2306 		break;
2307 
2308 	case MAC_STAT_BRDCSTRCV:
2309 		val = statsp->brdcstrcv;
2310 		break;
2311 
2312 	case MAC_STAT_MULTIXMT:
2313 		val = statsp->multixmt;
2314 		break;
2315 
2316 	case MAC_STAT_BRDCSTXMT:
2317 		val = statsp->brdcstxmt;
2318 		break;
2319 
2320 	case MAC_STAT_NORCVBUF:
2321 		val = statsp->norcvbuf;
2322 		break;
2323 
2324 	case MAC_STAT_IERRORS:
2325 		val = statsp->ierrors;
2326 		break;
2327 
2328 	case MAC_STAT_NOXMTBUF:
2329 		val = statsp->noxmtbuf;
2330 		break;
2331 
2332 	case MAC_STAT_OERRORS:
2333 		val = statsp->oerrors;
2334 		break;
2335 
2336 	case MAC_STAT_COLLISIONS:
2337 		break;
2338 
2339 	case MAC_STAT_RBYTES:
2340 		val = statsp->rbytes;
2341 		break;
2342 
2343 	case MAC_STAT_IPACKETS:
2344 		val = statsp->ipackets;
2345 		break;
2346 
2347 	case MAC_STAT_OBYTES:
2348 		val = statsp->obytes;
2349 		break;
2350 
2351 	case MAC_STAT_OPACKETS:
2352 		val = statsp->opackets;
2353 		break;
2354 
2355 	/* stats not relevant to ldc, return 0 */
2356 	case MAC_STAT_IFSPEED:
2357 	case ETHER_STAT_ALIGN_ERRORS:
2358 	case ETHER_STAT_FCS_ERRORS:
2359 	case ETHER_STAT_FIRST_COLLISIONS:
2360 	case ETHER_STAT_MULTI_COLLISIONS:
2361 	case ETHER_STAT_DEFER_XMTS:
2362 	case ETHER_STAT_TX_LATE_COLLISIONS:
2363 	case ETHER_STAT_EX_COLLISIONS:
2364 	case ETHER_STAT_MACXMT_ERRORS:
2365 	case ETHER_STAT_CARRIER_ERRORS:
2366 	case ETHER_STAT_TOOLONG_ERRORS:
2367 	case ETHER_STAT_XCVR_ADDR:
2368 	case ETHER_STAT_XCVR_ID:
2369 	case ETHER_STAT_XCVR_INUSE:
2370 	case ETHER_STAT_CAP_1000FDX:
2371 	case ETHER_STAT_CAP_1000HDX:
2372 	case ETHER_STAT_CAP_100FDX:
2373 	case ETHER_STAT_CAP_100HDX:
2374 	case ETHER_STAT_CAP_10FDX:
2375 	case ETHER_STAT_CAP_10HDX:
2376 	case ETHER_STAT_CAP_ASMPAUSE:
2377 	case ETHER_STAT_CAP_PAUSE:
2378 	case ETHER_STAT_CAP_AUTONEG:
2379 	case ETHER_STAT_ADV_CAP_1000FDX:
2380 	case ETHER_STAT_ADV_CAP_1000HDX:
2381 	case ETHER_STAT_ADV_CAP_100FDX:
2382 	case ETHER_STAT_ADV_CAP_100HDX:
2383 	case ETHER_STAT_ADV_CAP_10FDX:
2384 	case ETHER_STAT_ADV_CAP_10HDX:
2385 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2386 	case ETHER_STAT_ADV_CAP_PAUSE:
2387 	case ETHER_STAT_ADV_CAP_AUTONEG:
2388 	case ETHER_STAT_LP_CAP_1000FDX:
2389 	case ETHER_STAT_LP_CAP_1000HDX:
2390 	case ETHER_STAT_LP_CAP_100FDX:
2391 	case ETHER_STAT_LP_CAP_100HDX:
2392 	case ETHER_STAT_LP_CAP_10FDX:
2393 	case ETHER_STAT_LP_CAP_10HDX:
2394 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2395 	case ETHER_STAT_LP_CAP_PAUSE:
2396 	case ETHER_STAT_LP_CAP_AUTONEG:
2397 	case ETHER_STAT_LINK_ASMPAUSE:
2398 	case ETHER_STAT_LINK_PAUSE:
2399 	case ETHER_STAT_LINK_AUTONEG:
2400 	case ETHER_STAT_LINK_DUPLEX:
2401 	default:
2402 		val = 0;
2403 		break;
2404 
2405 	}
2406 	return (val);
2407 }
2408 
2409 /*
2410  * LDC channel is UP, start handshake process with peer.
2411  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2412  * function is being called from transmit routine, otherwise B_FALSE.
2413  */
2414 static void
2415 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2416 {
2417 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2418 
2419 	DBG1(vgenp, ldcp, "enter\n");
2420 
2421 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2422 
2423 	if (ldcp->portp != vgenp->vsw_portp) {
2424 		/*
2425 		 * modify fdb entry to use this port as the
2426 		 * channel is up, instead of going through the
2427 		 * vsw-port (see comments in vgen_port_init())
2428 		 */
2429 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2430 		    vgen_tx, ldcp->portp, flag);
2431 	}
2432 
2433 	/* Initialize local session id */
2434 	ldcp->local_sid = ddi_get_lbolt();
2435 
2436 	/* clear peer session id */
2437 	ldcp->peer_sid = 0;
2438 	ldcp->hretries = 0;
2439 
2440 	if (ldcp->hphase != VH_PHASE0) {
2441 		vgen_handshake_reset(ldcp);
2442 	}
2443 
2444 	/* Initiate Handshake process with peer ldc endpoint */
2445 	vgen_handshake(vh_nextphase(ldcp));
2446 
2447 	DBG1(vgenp, ldcp, "exit\n");
2448 }
2449 
2450 /*
2451  * LDC channel is Reset, terminate connection with peer and try to
2452  * bring the channel up again.
2453  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2454  * function is being called from transmit routine, otherwise B_FALSE.
2455  */
2456 static void
2457 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2458 {
2459 	ldc_status_t istatus;
2460 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2461 	int	rv;
2462 
2463 	DBG1(vgenp, ldcp, "enter\n");
2464 
2465 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2466 
2467 	if ((ldcp->portp != vgenp->vsw_portp) &&
2468 	    (vgenp->vsw_portp != NULL)) {
2469 		/*
2470 		 * modify fdb entry to use vsw-port  as the
2471 		 * channel is reset and we don't have a direct
2472 		 * link to the destination (see comments
2473 		 * in vgen_port_init()).
2474 		 */
2475 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2476 		    vgen_tx, vgenp->vsw_portp, flag);
2477 	}
2478 
2479 	if (ldcp->hphase != VH_PHASE0) {
2480 		vgen_handshake_reset(ldcp);
2481 	}
2482 
2483 	/* try to bring the channel up */
2484 	rv = ldc_up(ldcp->ldc_handle);
2485 	if (rv != 0) {
2486 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2487 	}
2488 
2489 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2490 		DWARN(vgenp, ldcp, "ldc_status err\n");
2491 	} else {
2492 		ldcp->ldc_status = istatus;
2493 	}
2494 
2495 	/* if channel is already UP - restart handshake */
2496 	if (ldcp->ldc_status == LDC_UP) {
2497 		vgen_handle_evt_up(ldcp, flag);
2498 	}
2499 
2500 	DBG1(vgenp, ldcp, "exit\n");
2501 }
2502 
2503 /* Interrupt handler for the channel */
2504 static uint_t
2505 vgen_ldc_cb(uint64_t event, caddr_t arg)
2506 {
2507 	_NOTE(ARGUNUSED(event))
2508 	vgen_ldc_t	*ldcp;
2509 	vgen_t		*vgenp;
2510 	ldc_status_t 	istatus;
2511 	mblk_t		*bp = NULL;
2512 	vgen_stats_t	*statsp;
2513 
2514 	ldcp = (vgen_ldc_t *)arg;
2515 	vgenp = LDC_TO_VGEN(ldcp);
2516 	statsp = &ldcp->stats;
2517 
2518 	DBG1(vgenp, ldcp, "enter\n");
2519 
2520 	mutex_enter(&ldcp->cblock);
2521 	statsp->callbacks++;
2522 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2523 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
2524 		    ldcp->ldc_status);
2525 		mutex_exit(&ldcp->cblock);
2526 		return (LDC_SUCCESS);
2527 	}
2528 
2529 	/*
2530 	 * NOTE: not using switch() as event could be triggered by
2531 	 * a state change and a read request. Also the ordering	of the
2532 	 * check for the event types is deliberate.
2533 	 */
2534 	if (event & LDC_EVT_UP) {
2535 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2536 			DWARN(vgenp, ldcp, "ldc_status err\n");
2537 			/* status couldn't be determined */
2538 			mutex_exit(&ldcp->cblock);
2539 			return (LDC_FAILURE);
2540 		}
2541 		ldcp->ldc_status = istatus;
2542 		if (ldcp->ldc_status != LDC_UP) {
2543 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
2544 			    " but ldc status is not UP(0x%x)\n",
2545 			    ldcp->ldc_status);
2546 			/* spurious interrupt, return success */
2547 			mutex_exit(&ldcp->cblock);
2548 			return (LDC_SUCCESS);
2549 		}
2550 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
2551 		    event, ldcp->ldc_status);
2552 
2553 		vgen_handle_evt_up(ldcp, B_FALSE);
2554 
2555 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2556 	}
2557 
2558 	/* Handle RESET/DOWN before READ event */
2559 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2560 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2561 			DWARN(vgenp, ldcp, "ldc_status error\n");
2562 			/* status couldn't be determined */
2563 			mutex_exit(&ldcp->cblock);
2564 			return (LDC_FAILURE);
2565 		}
2566 		ldcp->ldc_status = istatus;
2567 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
2568 		    event, ldcp->ldc_status);
2569 
2570 		vgen_handle_evt_reset(ldcp, B_FALSE);
2571 
2572 		/*
2573 		 * As the channel is down/reset, ignore READ event
2574 		 * but print a debug warning message.
2575 		 */
2576 		if (event & LDC_EVT_READ) {
2577 			DWARN(vgenp, ldcp,
2578 			    "LDC_EVT_READ set along with RESET/DOWN\n");
2579 			event &= ~LDC_EVT_READ;
2580 		}
2581 	}
2582 
2583 	if (event & LDC_EVT_READ) {
2584 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
2585 		    event, ldcp->ldc_status);
2586 
2587 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2588 
2589 		if (ldcp->rcv_thread != NULL) {
2590 			/*
2591 			 * If the receive thread is enabled, then
2592 			 * wakeup the receive thread to process the
2593 			 * LDC messages.
2594 			 */
2595 			mutex_exit(&ldcp->cblock);
2596 			mutex_enter(&ldcp->rcv_thr_lock);
2597 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
2598 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
2599 				cv_signal(&ldcp->rcv_thr_cv);
2600 			}
2601 			mutex_exit(&ldcp->rcv_thr_lock);
2602 			mutex_enter(&ldcp->cblock);
2603 		} else  {
2604 			vgen_handle_evt_read(ldcp);
2605 			bp = ldcp->rcv_mhead;
2606 			ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
2607 		}
2608 	}
2609 	mutex_exit(&ldcp->cblock);
2610 
2611 	/* send up the received packets to MAC layer */
2612 	if (bp != NULL) {
2613 		vnet_rx(vgenp->vnetp, NULL, bp);
2614 	}
2615 
2616 	if (ldcp->cancel_htid) {
2617 		/*
2618 		 * Cancel handshake timer.
2619 		 * untimeout(9F) will not return until the pending callback is
2620 		 * cancelled or has run. No problems will result from calling
2621 		 * untimeout if the handler has already completed.
2622 		 * If the timeout handler did run, then it would just
2623 		 * return as cancel_htid is set.
2624 		 */
2625 		(void) untimeout(ldcp->cancel_htid);
2626 		ldcp->cancel_htid = 0;
2627 	}
2628 	DBG1(vgenp, ldcp, "exit\n");
2629 
2630 	return (LDC_SUCCESS);
2631 }
2632 
2633 static void
2634 vgen_handle_evt_read(vgen_ldc_t *ldcp)
2635 {
2636 	int		rv;
2637 	uint64_t	ldcmsg[7];
2638 	size_t		msglen;
2639 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2640 	vio_msg_tag_t	*tagp;
2641 	ldc_status_t 	istatus;
2642 	boolean_t 	has_data;
2643 
2644 	DBG1(vgenp, ldcp, "enter\n");
2645 
2646 	/*
2647 	 * If the receive thread is enabled, then the cblock
2648 	 * need to be acquired here. If not, the vgen_ldc_cb()
2649 	 * calls this function with cblock held already.
2650 	 */
2651 	if (ldcp->rcv_thread != NULL) {
2652 		mutex_enter(&ldcp->cblock);
2653 	} else {
2654 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2655 	}
2656 
2657 vgen_evt_read:
2658 	do {
2659 		msglen = sizeof (ldcmsg);
2660 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
2661 
2662 		if (rv != 0) {
2663 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
2664 			    rv, msglen);
2665 			if (rv == ECONNRESET)
2666 				goto vgen_evtread_error;
2667 			break;
2668 		}
2669 		if (msglen == 0) {
2670 			DBG2(vgenp, ldcp, "ldc_read NODATA");
2671 			break;
2672 		}
2673 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
2674 
2675 		tagp = (vio_msg_tag_t *)ldcmsg;
2676 
2677 		if (ldcp->peer_sid) {
2678 			/*
2679 			 * check sid only after we have received peer's sid
2680 			 * in the version negotiate msg.
2681 			 */
2682 #ifdef DEBUG
2683 			if (vgen_hdbg & HDBG_BAD_SID) {
2684 				/* simulate bad sid condition */
2685 				tagp->vio_sid = 0;
2686 				vgen_hdbg &= ~(HDBG_BAD_SID);
2687 			}
2688 #endif
2689 			rv = vgen_check_sid(ldcp, tagp);
2690 			if (rv != VGEN_SUCCESS) {
2691 				/*
2692 				 * If sid mismatch is detected,
2693 				 * reset the channel.
2694 				 */
2695 				ldcp->need_ldc_reset = B_TRUE;
2696 				goto vgen_evtread_error;
2697 			}
2698 		}
2699 
2700 		switch (tagp->vio_msgtype) {
2701 		case VIO_TYPE_CTRL:
2702 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
2703 			break;
2704 
2705 		case VIO_TYPE_DATA:
2706 			rv = vgen_handle_datamsg(ldcp, tagp);
2707 			break;
2708 
2709 		case VIO_TYPE_ERR:
2710 			vgen_handle_errmsg(ldcp, tagp);
2711 			break;
2712 
2713 		default:
2714 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
2715 			    tagp->vio_msgtype);
2716 			break;
2717 		}
2718 
2719 		/*
2720 		 * If an error is encountered, stop processing and
2721 		 * handle the error.
2722 		 */
2723 		if (rv != 0) {
2724 			goto vgen_evtread_error;
2725 		}
2726 
2727 	} while (msglen);
2728 
2729 	/* check once more before exiting */
2730 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
2731 	if ((rv == 0) && (has_data == B_TRUE)) {
2732 		DTRACE_PROBE(vgen_chkq);
2733 		goto vgen_evt_read;
2734 	}
2735 
2736 vgen_evtread_error:
2737 	if (rv == ECONNRESET) {
2738 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2739 			DWARN(vgenp, ldcp, "ldc_status err\n");
2740 		} else {
2741 			ldcp->ldc_status = istatus;
2742 		}
2743 		vgen_handle_evt_reset(ldcp, B_FALSE);
2744 	} else if (rv) {
2745 		vgen_handshake_retry(ldcp);
2746 	}
2747 
2748 	/*
2749 	 * If the receive thread is not enabled, then cancel the
2750 	 * handshake timeout here.
2751 	 */
2752 	if (ldcp->rcv_thread != NULL) {
2753 		mutex_exit(&ldcp->cblock);
2754 		if (ldcp->cancel_htid) {
2755 			/*
2756 			 * Cancel handshake timer. untimeout(9F) will
2757 			 * not return until the pending callback is cancelled
2758 			 * or has run. No problems will result from calling
2759 			 * untimeout if the handler has already completed.
2760 			 * If the timeout handler did run, then it would just
2761 			 * return as cancel_htid is set.
2762 			 */
2763 			(void) untimeout(ldcp->cancel_htid);
2764 			ldcp->cancel_htid = 0;
2765 		}
2766 	}
2767 
2768 	DBG1(vgenp, ldcp, "exit\n");
2769 }
2770 
2771 /* vgen handshake functions */
2772 
2773 /* change the hphase for the channel to the next phase */
2774 static vgen_ldc_t *
2775 vh_nextphase(vgen_ldc_t *ldcp)
2776 {
2777 	if (ldcp->hphase == VH_PHASE3) {
2778 		ldcp->hphase = VH_DONE;
2779 	} else {
2780 		ldcp->hphase++;
2781 	}
2782 	return (ldcp);
2783 }
2784 
2785 /*
2786  * Check whether the given version is supported or not and
2787  * return VGEN_SUCCESS if supported.
2788  */
2789 static int
2790 vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
2791 uint16_t ver_minor)
2792 {
2793 	vgen_ver_t	*versions = ldcp->vgen_versions;
2794 	int		i = 0;
2795 
2796 	while (i < VGEN_NUM_VER) {
2797 		if ((versions[i].ver_major == 0) &&
2798 		    (versions[i].ver_minor == 0)) {
2799 			break;
2800 		}
2801 		if ((versions[i].ver_major == ver_major) &&
2802 		    (versions[i].ver_minor == ver_minor)) {
2803 			return (VGEN_SUCCESS);
2804 		}
2805 		i++;
2806 	}
2807 	return (VGEN_FAILURE);
2808 }
2809 
2810 /*
2811  * Given a version, return VGEN_SUCCESS if a lower version is supported.
2812  */
2813 static int
2814 vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
2815 {
2816 	vgen_ver_t	*versions = ldcp->vgen_versions;
2817 	int		i = 0;
2818 
2819 	while (i < VGEN_NUM_VER) {
2820 		if ((versions[i].ver_major == 0) &&
2821 		    (versions[i].ver_minor == 0)) {
2822 			break;
2823 		}
2824 		/*
2825 		 * if we support a lower minor version within the same major
2826 		 * version, or if we support a lower major version,
2827 		 * update the verp parameter with this lower version and
2828 		 * return success.
2829 		 */
2830 		if (((versions[i].ver_major == verp->ver_major) &&
2831 		    (versions[i].ver_minor < verp->ver_minor)) ||
2832 		    (versions[i].ver_major < verp->ver_major)) {
2833 			verp->ver_major = versions[i].ver_major;
2834 			verp->ver_minor = versions[i].ver_minor;
2835 			return (VGEN_SUCCESS);
2836 		}
2837 		i++;
2838 	}
2839 
2840 	return (VGEN_FAILURE);
2841 }
2842 
2843 /*
2844  * wrapper routine to send the given message over ldc using ldc_write().
2845  */
2846 static int
2847 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
2848     boolean_t caller_holds_lock)
2849 {
2850 	int	rv;
2851 	size_t	len;
2852 	uint32_t retries = 0;
2853 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2854 
2855 	len = msglen;
2856 	if ((len == 0) || (msg == NULL))
2857 		return (VGEN_FAILURE);
2858 
2859 	if (!caller_holds_lock) {
2860 		mutex_enter(&ldcp->wrlock);
2861 	}
2862 
2863 	do {
2864 		len = msglen;
2865 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
2866 		if (retries++ >= vgen_ldcwr_retries)
2867 			break;
2868 	} while (rv == EWOULDBLOCK);
2869 
2870 	if (!caller_holds_lock) {
2871 		mutex_exit(&ldcp->wrlock);
2872 	}
2873 
2874 	if (rv != 0) {
2875 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
2876 		    rv, msglen);
2877 		return (rv);
2878 	}
2879 
2880 	if (len != msglen) {
2881 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
2882 		    rv, msglen);
2883 		return (VGEN_FAILURE);
2884 	}
2885 
2886 	return (VGEN_SUCCESS);
2887 }
2888 
2889 /* send version negotiate message to the peer over ldc */
2890 static int
2891 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
2892 {
2893 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2894 	vio_ver_msg_t	vermsg;
2895 	vio_msg_tag_t	*tagp = &vermsg.tag;
2896 	int		rv;
2897 
2898 	bzero(&vermsg, sizeof (vermsg));
2899 
2900 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2901 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2902 	tagp->vio_subtype_env = VIO_VER_INFO;
2903 	tagp->vio_sid = ldcp->local_sid;
2904 
2905 	/* get version msg payload from ldcp->local */
2906 	vermsg.ver_major = ldcp->local_hparams.ver_major;
2907 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
2908 	vermsg.dev_class = ldcp->local_hparams.dev_class;
2909 
2910 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
2911 	if (rv != VGEN_SUCCESS) {
2912 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2913 		return (rv);
2914 	}
2915 
2916 	ldcp->hstate |= VER_INFO_SENT;
2917 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
2918 	    vermsg.ver_major, vermsg.ver_minor);
2919 
2920 	return (VGEN_SUCCESS);
2921 }
2922 
2923 /* send attr info message to the peer over ldc */
2924 static int
2925 vgen_send_attr_info(vgen_ldc_t *ldcp)
2926 {
2927 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2928 	vnet_attr_msg_t	attrmsg;
2929 	vio_msg_tag_t	*tagp = &attrmsg.tag;
2930 	int		rv;
2931 
2932 	bzero(&attrmsg, sizeof (attrmsg));
2933 
2934 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2935 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2936 	tagp->vio_subtype_env = VIO_ATTR_INFO;
2937 	tagp->vio_sid = ldcp->local_sid;
2938 
2939 	/* get attr msg payload from ldcp->local */
2940 	attrmsg.mtu = ldcp->local_hparams.mtu;
2941 	attrmsg.addr = ldcp->local_hparams.addr;
2942 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
2943 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
2944 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
2945 
2946 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
2947 	if (rv != VGEN_SUCCESS) {
2948 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2949 		return (rv);
2950 	}
2951 
2952 	ldcp->hstate |= ATTR_INFO_SENT;
2953 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
2954 
2955 	return (VGEN_SUCCESS);
2956 }
2957 
2958 /* send descriptor ring register message to the peer over ldc */
2959 static int
2960 vgen_send_dring_reg(vgen_ldc_t *ldcp)
2961 {
2962 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
2963 	vio_dring_reg_msg_t	msg;
2964 	vio_msg_tag_t		*tagp = &msg.tag;
2965 	int		rv;
2966 
2967 	bzero(&msg, sizeof (msg));
2968 
2969 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2970 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2971 	tagp->vio_subtype_env = VIO_DRING_REG;
2972 	tagp->vio_sid = ldcp->local_sid;
2973 
2974 	/* get dring info msg payload from ldcp->local */
2975 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
2976 	    sizeof (ldc_mem_cookie_t));
2977 	msg.ncookies = ldcp->local_hparams.num_dcookies;
2978 	msg.num_descriptors = ldcp->local_hparams.num_desc;
2979 	msg.descriptor_size = ldcp->local_hparams.desc_size;
2980 
2981 	/*
2982 	 * dring_ident is set to 0. After mapping the dring, peer sets this
2983 	 * value and sends it in the ack, which is saved in
2984 	 * vgen_handle_dring_reg().
2985 	 */
2986 	msg.dring_ident = 0;
2987 
2988 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
2989 	if (rv != VGEN_SUCCESS) {
2990 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2991 		return (rv);
2992 	}
2993 
2994 	ldcp->hstate |= DRING_INFO_SENT;
2995 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
2996 
2997 	return (VGEN_SUCCESS);
2998 }
2999 
3000 static int
3001 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3002 {
3003 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3004 	vio_rdx_msg_t	rdxmsg;
3005 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
3006 	int		rv;
3007 
3008 	bzero(&rdxmsg, sizeof (rdxmsg));
3009 
3010 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3011 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3012 	tagp->vio_subtype_env = VIO_RDX;
3013 	tagp->vio_sid = ldcp->local_sid;
3014 
3015 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3016 	if (rv != VGEN_SUCCESS) {
3017 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3018 		return (rv);
3019 	}
3020 
3021 	ldcp->hstate |= RDX_INFO_SENT;
3022 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3023 
3024 	return (VGEN_SUCCESS);
3025 }
3026 
3027 /* send descriptor ring data message to the peer over ldc */
3028 static int
3029 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
3030 {
3031 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3032 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
3033 	vio_msg_tag_t	*tagp = &msgp->tag;
3034 	vgen_stats_t	*statsp = &ldcp->stats;
3035 	int		rv;
3036 
3037 	bzero(msgp, sizeof (*msgp));
3038 
3039 	tagp->vio_msgtype = VIO_TYPE_DATA;
3040 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3041 	tagp->vio_subtype_env = VIO_DRING_DATA;
3042 	tagp->vio_sid = ldcp->local_sid;
3043 
3044 	msgp->seq_num = ldcp->next_txseq;
3045 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
3046 	msgp->start_idx = start;
3047 	msgp->end_idx = end;
3048 
3049 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
3050 	if (rv != VGEN_SUCCESS) {
3051 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3052 		return (rv);
3053 	}
3054 
3055 	ldcp->next_txseq++;
3056 	statsp->dring_data_msgs++;
3057 
3058 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
3059 
3060 	return (VGEN_SUCCESS);
3061 }
3062 
3063 /* send multicast addr info message to vsw */
3064 static int
3065 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3066 {
3067 	vnet_mcast_msg_t	mcastmsg;
3068 	vnet_mcast_msg_t	*msgp;
3069 	vio_msg_tag_t		*tagp;
3070 	vgen_t			*vgenp;
3071 	struct ether_addr	*mca;
3072 	int			rv;
3073 	int			i;
3074 	uint32_t		size;
3075 	uint32_t		mccount;
3076 	uint32_t		n;
3077 
3078 	msgp = &mcastmsg;
3079 	tagp = &msgp->tag;
3080 	vgenp = LDC_TO_VGEN(ldcp);
3081 
3082 	mccount = vgenp->mccount;
3083 	i = 0;
3084 
3085 	do {
3086 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3087 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3088 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3089 		tagp->vio_sid = ldcp->local_sid;
3090 
3091 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3092 		size = n * sizeof (struct ether_addr);
3093 
3094 		mca = &(vgenp->mctab[i]);
3095 		bcopy(mca, (msgp->mca), size);
3096 		msgp->set = B_TRUE;
3097 		msgp->count = n;
3098 
3099 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3100 		    B_FALSE);
3101 		if (rv != VGEN_SUCCESS) {
3102 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3103 			return (rv);
3104 		}
3105 
3106 		mccount -= n;
3107 		i += n;
3108 
3109 	} while (mccount);
3110 
3111 	return (VGEN_SUCCESS);
3112 }
3113 
3114 /* Initiate Phase 2 of handshake */
3115 static int
3116 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3117 {
3118 	int rv;
3119 	uint32_t ncookies = 0;
3120 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3121 
3122 #ifdef DEBUG
3123 	if (vgen_hdbg & HDBG_OUT_STATE) {
3124 		/* simulate out of state condition */
3125 		vgen_hdbg &= ~(HDBG_OUT_STATE);
3126 		rv = vgen_send_rdx_info(ldcp);
3127 		return (rv);
3128 	}
3129 	if (vgen_hdbg & HDBG_TIMEOUT) {
3130 		/* simulate timeout condition */
3131 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3132 		return (VGEN_SUCCESS);
3133 	}
3134 #endif
3135 	rv = vgen_send_attr_info(ldcp);
3136 	if (rv != VGEN_SUCCESS) {
3137 		return (rv);
3138 	}
3139 
3140 	/* Bind descriptor ring to the channel */
3141 	if (ldcp->num_txdcookies == 0) {
3142 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3143 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3144 		if (rv != 0) {
3145 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
3146 			    "rv(%x)\n", rv);
3147 			return (rv);
3148 		}
3149 		ASSERT(ncookies == 1);
3150 		ldcp->num_txdcookies = ncookies;
3151 	}
3152 
3153 	/* update local dring_info params */
3154 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3155 	    sizeof (ldc_mem_cookie_t));
3156 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3157 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3158 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3159 
3160 	rv = vgen_send_dring_reg(ldcp);
3161 	if (rv != VGEN_SUCCESS) {
3162 		return (rv);
3163 	}
3164 
3165 	return (VGEN_SUCCESS);
3166 }
3167 
3168 /*
3169  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3170  * This can happen after a channel comes up (status: LDC_UP) or
3171  * when handshake gets terminated due to various conditions.
3172  */
3173 static void
3174 vgen_reset_hphase(vgen_ldc_t *ldcp)
3175 {
3176 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3177 	ldc_status_t istatus;
3178 	int rv;
3179 
3180 	DBG1(vgenp, ldcp, "enter\n");
3181 	/* reset hstate and hphase */
3182 	ldcp->hstate = 0;
3183 	ldcp->hphase = VH_PHASE0;
3184 
3185 	/*
3186 	 * Save the id of pending handshake timer in cancel_htid.
3187 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
3188 	 * be cancelled after releasing cblock.
3189 	 */
3190 	if (ldcp->htid) {
3191 		ldcp->cancel_htid = ldcp->htid;
3192 		ldcp->htid = 0;
3193 	}
3194 
3195 	if (ldcp->local_hparams.dring_ready) {
3196 		ldcp->local_hparams.dring_ready = B_FALSE;
3197 	}
3198 
3199 	/* Unbind tx descriptor ring from the channel */
3200 	if (ldcp->num_txdcookies) {
3201 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3202 		if (rv != 0) {
3203 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
3204 		}
3205 		ldcp->num_txdcookies = 0;
3206 	}
3207 
3208 	if (ldcp->peer_hparams.dring_ready) {
3209 		ldcp->peer_hparams.dring_ready = B_FALSE;
3210 		/* Unmap peer's dring */
3211 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3212 		vgen_clobber_rxds(ldcp);
3213 	}
3214 
3215 	vgen_clobber_tbufs(ldcp);
3216 
3217 	/*
3218 	 * clear local handshake params and initialize.
3219 	 */
3220 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3221 
3222 	/* set version to the highest version supported */
3223 	ldcp->local_hparams.ver_major =
3224 	    ldcp->vgen_versions[0].ver_major;
3225 	ldcp->local_hparams.ver_minor =
3226 	    ldcp->vgen_versions[0].ver_minor;
3227 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3228 
3229 	/* set attr_info params */
3230 	ldcp->local_hparams.mtu = ETHERMAX;
3231 	ldcp->local_hparams.addr =
3232 	    vnet_macaddr_strtoul(vgenp->macaddr);
3233 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3234 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
3235 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3236 
3237 	/*
3238 	 * Note: dring is created, but not bound yet.
3239 	 * local dring_info params will be updated when we bind the dring in
3240 	 * vgen_handshake_phase2().
3241 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3242 	 * value and sends it in the ack, which is saved in
3243 	 * vgen_handle_dring_reg().
3244 	 */
3245 	ldcp->local_hparams.dring_ident = 0;
3246 
3247 	/* clear peer_hparams */
3248 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3249 
3250 	/* reset the channel if required */
3251 	if (ldcp->need_ldc_reset) {
3252 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3253 		ldcp->need_ldc_reset = B_FALSE;
3254 		(void) ldc_down(ldcp->ldc_handle);
3255 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3256 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
3257 		ldcp->ldc_status = istatus;
3258 
3259 		/* clear sids */
3260 		ldcp->local_sid = 0;
3261 		ldcp->peer_sid = 0;
3262 
3263 		/* try to bring the channel up */
3264 		rv = ldc_up(ldcp->ldc_handle);
3265 		if (rv != 0) {
3266 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3267 		}
3268 
3269 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3270 			DWARN(vgenp, ldcp, "ldc_status err\n");
3271 		} else {
3272 			ldcp->ldc_status = istatus;
3273 		}
3274 	}
3275 }
3276 
3277 /* wrapper function for vgen_reset_hphase */
3278 static void
3279 vgen_handshake_reset(vgen_ldc_t *ldcp)
3280 {
3281 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3282 	mutex_enter(&ldcp->rxlock);
3283 	mutex_enter(&ldcp->wrlock);
3284 	mutex_enter(&ldcp->txlock);
3285 	mutex_enter(&ldcp->tclock);
3286 
3287 	vgen_reset_hphase(ldcp);
3288 
3289 	mutex_exit(&ldcp->tclock);
3290 	mutex_exit(&ldcp->txlock);
3291 	mutex_exit(&ldcp->wrlock);
3292 	mutex_exit(&ldcp->rxlock);
3293 }
3294 
3295 /*
3296  * Initiate handshake with the peer by sending various messages
3297  * based on the handshake-phase that the channel is currently in.
3298  */
3299 static void
3300 vgen_handshake(vgen_ldc_t *ldcp)
3301 {
3302 	uint32_t hphase = ldcp->hphase;
3303 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3304 	ldc_status_t	istatus;
3305 	int	rv = 0;
3306 
3307 	switch (hphase) {
3308 
3309 	case VH_PHASE1:
3310 
3311 		/*
3312 		 * start timer, for entire handshake process, turn this timer
3313 		 * off if all phases of handshake complete successfully and
3314 		 * hphase goes to VH_DONE(below) or
3315 		 * vgen_reset_hphase() gets called or
3316 		 * channel is reset due to errors or
3317 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3318 		 */
3319 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3320 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
3321 
3322 		/* Phase 1 involves negotiating the version */
3323 		rv = vgen_send_version_negotiate(ldcp);
3324 		break;
3325 
3326 	case VH_PHASE2:
3327 		rv = vgen_handshake_phase2(ldcp);
3328 		break;
3329 
3330 	case VH_PHASE3:
3331 		rv = vgen_send_rdx_info(ldcp);
3332 		break;
3333 
3334 	case VH_DONE:
3335 		/*
3336 		 * Save the id of pending handshake timer in cancel_htid.
3337 		 * This will be checked in vgen_ldc_cb() and the handshake
3338 		 * timer will be cancelled after releasing cblock.
3339 		 */
3340 		if (ldcp->htid) {
3341 			ldcp->cancel_htid = ldcp->htid;
3342 			ldcp->htid = 0;
3343 		}
3344 		ldcp->hretries = 0;
3345 		DBG1(vgenp, ldcp, "Handshake Done\n");
3346 
3347 		if (ldcp->portp == vgenp->vsw_portp) {
3348 			/*
3349 			 * If this channel(port) is connected to vsw,
3350 			 * need to sync multicast table with vsw.
3351 			 */
3352 			mutex_exit(&ldcp->cblock);
3353 
3354 			mutex_enter(&vgenp->lock);
3355 			rv = vgen_send_mcast_info(ldcp);
3356 			mutex_exit(&vgenp->lock);
3357 
3358 			mutex_enter(&ldcp->cblock);
3359 			if (rv != VGEN_SUCCESS)
3360 				break;
3361 		}
3362 
3363 		/*
3364 		 * Check if mac layer should be notified to restart
3365 		 * transmissions. This can happen if the channel got
3366 		 * reset and vgen_clobber_tbufs() is called, while
3367 		 * need_resched is set.
3368 		 */
3369 		mutex_enter(&ldcp->tclock);
3370 		if (ldcp->need_resched) {
3371 			ldcp->need_resched = B_FALSE;
3372 			vnet_tx_update(vgenp->vnetp);
3373 		}
3374 		mutex_exit(&ldcp->tclock);
3375 
3376 		break;
3377 
3378 	default:
3379 		break;
3380 	}
3381 
3382 	if (rv == ECONNRESET) {
3383 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3384 			DWARN(vgenp, ldcp, "ldc_status err\n");
3385 		} else {
3386 			ldcp->ldc_status = istatus;
3387 		}
3388 		vgen_handle_evt_reset(ldcp, B_FALSE);
3389 	} else if (rv) {
3390 		vgen_handshake_reset(ldcp);
3391 	}
3392 }
3393 
3394 /*
3395  * Check if the current handshake phase has completed successfully and
3396  * return the status.
3397  */
3398 static int
3399 vgen_handshake_done(vgen_ldc_t *ldcp)
3400 {
3401 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3402 	uint32_t	hphase = ldcp->hphase;
3403 	int 		status = 0;
3404 
3405 	switch (hphase) {
3406 
3407 	case VH_PHASE1:
3408 		/*
3409 		 * Phase1 is done, if version negotiation
3410 		 * completed successfully.
3411 		 */
3412 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3413 		    VER_NEGOTIATED);
3414 		break;
3415 
3416 	case VH_PHASE2:
3417 		/*
3418 		 * Phase 2 is done, if attr info and dring info
3419 		 * have been exchanged successfully.
3420 		 */
3421 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3422 		    ATTR_INFO_EXCHANGED) &&
3423 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3424 		    DRING_INFO_EXCHANGED));
3425 		break;
3426 
3427 	case VH_PHASE3:
3428 		/* Phase 3 is done, if rdx msg has been exchanged */
3429 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3430 		    RDX_EXCHANGED);
3431 		break;
3432 
3433 	default:
3434 		break;
3435 	}
3436 
3437 	if (status == 0) {
3438 		return (VGEN_FAILURE);
3439 	}
3440 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
3441 	return (VGEN_SUCCESS);
3442 }
3443 
3444 /* retry handshake on failure */
3445 static void
3446 vgen_handshake_retry(vgen_ldc_t *ldcp)
3447 {
3448 	/* reset handshake phase */
3449 	vgen_handshake_reset(ldcp);
3450 
3451 	/* handshake retry is specified and the channel is UP */
3452 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
3453 		if (ldcp->hretries++ < vgen_max_hretries) {
3454 			ldcp->local_sid = ddi_get_lbolt();
3455 			vgen_handshake(vh_nextphase(ldcp));
3456 		}
3457 	}
3458 }
3459 
3460 /*
3461  * Handle a version info msg from the peer or an ACK/NACK from the peer
3462  * to a version info msg that we sent.
3463  */
3464 static int
3465 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3466 {
3467 	vgen_t		*vgenp;
3468 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3469 	int		ack = 0;
3470 	int		failed = 0;
3471 	int		idx;
3472 	vgen_ver_t	*versions = ldcp->vgen_versions;
3473 	int		rv = 0;
3474 
3475 	vgenp = LDC_TO_VGEN(ldcp);
3476 	DBG1(vgenp, ldcp, "enter\n");
3477 	switch (tagp->vio_subtype) {
3478 	case VIO_SUBTYPE_INFO:
3479 
3480 		/*  Cache sid of peer if this is the first time */
3481 		if (ldcp->peer_sid == 0) {
3482 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
3483 			    tagp->vio_sid);
3484 			ldcp->peer_sid = tagp->vio_sid;
3485 		}
3486 
3487 		if (ldcp->hphase != VH_PHASE1) {
3488 			/*
3489 			 * If we are not already in VH_PHASE1, reset to
3490 			 * pre-handshake state, and initiate handshake
3491 			 * to the peer too.
3492 			 */
3493 			vgen_handshake_reset(ldcp);
3494 			vgen_handshake(vh_nextphase(ldcp));
3495 		}
3496 		ldcp->hstate |= VER_INFO_RCVD;
3497 
3498 		/* save peer's requested values */
3499 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3500 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3501 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3502 
3503 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3504 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3505 			/* unsupported dev_class, send NACK */
3506 
3507 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3508 
3509 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3510 			tagp->vio_sid = ldcp->local_sid;
3511 			/* send reply msg back to peer */
3512 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3513 			    sizeof (*vermsg), B_FALSE);
3514 			if (rv != VGEN_SUCCESS) {
3515 				return (rv);
3516 			}
3517 			return (VGEN_FAILURE);
3518 		}
3519 
3520 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
3521 		    vermsg->ver_major,  vermsg->ver_minor);
3522 
3523 		idx = 0;
3524 
3525 		for (;;) {
3526 
3527 			if (vermsg->ver_major > versions[idx].ver_major) {
3528 
3529 				/* nack with next lower version */
3530 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3531 				vermsg->ver_major = versions[idx].ver_major;
3532 				vermsg->ver_minor = versions[idx].ver_minor;
3533 				break;
3534 			}
3535 
3536 			if (vermsg->ver_major == versions[idx].ver_major) {
3537 
3538 				/* major version match - ACK version */
3539 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3540 				ack = 1;
3541 
3542 				/*
3543 				 * lower minor version to the one this endpt
3544 				 * supports, if necessary
3545 				 */
3546 				if (vermsg->ver_minor >
3547 				    versions[idx].ver_minor) {
3548 					vermsg->ver_minor =
3549 					    versions[idx].ver_minor;
3550 					ldcp->peer_hparams.ver_minor =
3551 					    versions[idx].ver_minor;
3552 				}
3553 				break;
3554 			}
3555 
3556 			idx++;
3557 
3558 			if (idx == VGEN_NUM_VER) {
3559 
3560 				/* no version match - send NACK */
3561 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3562 				vermsg->ver_major = 0;
3563 				vermsg->ver_minor = 0;
3564 				failed = 1;
3565 				break;
3566 			}
3567 
3568 		}
3569 
3570 		tagp->vio_sid = ldcp->local_sid;
3571 
3572 		/* send reply msg back to peer */
3573 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3574 		    B_FALSE);
3575 		if (rv != VGEN_SUCCESS) {
3576 			return (rv);
3577 		}
3578 
3579 		if (ack) {
3580 			ldcp->hstate |= VER_ACK_SENT;
3581 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
3582 			    vermsg->ver_major, vermsg->ver_minor);
3583 		}
3584 		if (failed) {
3585 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
3586 			return (VGEN_FAILURE);
3587 		}
3588 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3589 
3590 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3591 
3592 			/* local and peer versions match? */
3593 			ASSERT((ldcp->local_hparams.ver_major ==
3594 			    ldcp->peer_hparams.ver_major) &&
3595 			    (ldcp->local_hparams.ver_minor ==
3596 			    ldcp->peer_hparams.ver_minor));
3597 
3598 			/* move to the next phase */
3599 			vgen_handshake(vh_nextphase(ldcp));
3600 		}
3601 
3602 		break;
3603 
3604 	case VIO_SUBTYPE_ACK:
3605 
3606 		if (ldcp->hphase != VH_PHASE1) {
3607 			/*  This should not happen. */
3608 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
3609 			return (VGEN_FAILURE);
3610 		}
3611 
3612 		/* SUCCESS - we have agreed on a version */
3613 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3614 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3615 		ldcp->hstate |= VER_ACK_RCVD;
3616 
3617 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
3618 		    vermsg->ver_major,  vermsg->ver_minor);
3619 
3620 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3621 
3622 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3623 
3624 			/* local and peer versions match? */
3625 			ASSERT((ldcp->local_hparams.ver_major ==
3626 			    ldcp->peer_hparams.ver_major) &&
3627 			    (ldcp->local_hparams.ver_minor ==
3628 			    ldcp->peer_hparams.ver_minor));
3629 
3630 			/* move to the next phase */
3631 			vgen_handshake(vh_nextphase(ldcp));
3632 		}
3633 		break;
3634 
3635 	case VIO_SUBTYPE_NACK:
3636 
3637 		if (ldcp->hphase != VH_PHASE1) {
3638 			/*  This should not happen.  */
3639 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
3640 			"Phase(%u)\n", ldcp->hphase);
3641 			return (VGEN_FAILURE);
3642 		}
3643 
3644 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
3645 		    vermsg->ver_major, vermsg->ver_minor);
3646 
3647 		/* check if version in NACK is zero */
3648 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3649 			/*
3650 			 * Version Negotiation has failed.
3651 			 */
3652 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3653 			return (VGEN_FAILURE);
3654 		}
3655 
3656 		idx = 0;
3657 
3658 		for (;;) {
3659 
3660 			if (vermsg->ver_major > versions[idx].ver_major) {
3661 				/* select next lower version */
3662 
3663 				ldcp->local_hparams.ver_major =
3664 				    versions[idx].ver_major;
3665 				ldcp->local_hparams.ver_minor =
3666 				    versions[idx].ver_minor;
3667 				break;
3668 			}
3669 
3670 			if (vermsg->ver_major == versions[idx].ver_major) {
3671 				/* major version match */
3672 
3673 				ldcp->local_hparams.ver_major =
3674 				    versions[idx].ver_major;
3675 
3676 				ldcp->local_hparams.ver_minor =
3677 				    versions[idx].ver_minor;
3678 				break;
3679 			}
3680 
3681 			idx++;
3682 
3683 			if (idx == VGEN_NUM_VER) {
3684 				/*
3685 				 * no version match.
3686 				 * Version Negotiation has failed.
3687 				 */
3688 				DWARN(vgenp, ldcp,
3689 				    "Version Negotiation Failed\n");
3690 				return (VGEN_FAILURE);
3691 			}
3692 
3693 		}
3694 
3695 		rv = vgen_send_version_negotiate(ldcp);
3696 		if (rv != VGEN_SUCCESS) {
3697 			return (rv);
3698 		}
3699 
3700 		break;
3701 	}
3702 
3703 	DBG1(vgenp, ldcp, "exit\n");
3704 	return (VGEN_SUCCESS);
3705 }
3706 
3707 /* Check if the attributes are supported */
3708 static int
3709 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
3710 {
3711 	_NOTE(ARGUNUSED(ldcp))
3712 
3713 	/*
3714 	 * currently, we support these attr values:
3715 	 * mtu of ethernet, addr_type of mac, xfer_mode of
3716 	 * ldc shared memory, ack_freq of 0 (data is acked if
3717 	 * the ack bit is set in the descriptor) and the address should
3718 	 * match the address in the port node.
3719 	 */
3720 	if ((msg->mtu != ETHERMAX) ||
3721 	    (msg->addr_type != ADDR_TYPE_MAC) ||
3722 	    (msg->xfer_mode != VIO_DRING_MODE) ||
3723 	    (msg->ack_freq > 64)) {
3724 		return (VGEN_FAILURE);
3725 	}
3726 
3727 	return (VGEN_SUCCESS);
3728 }
3729 
3730 /*
3731  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
3732  * to an attr info msg that we sent.
3733  */
3734 static int
3735 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3736 {
3737 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3738 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
3739 	int		ack = 0;
3740 	int		rv = 0;
3741 
3742 	DBG1(vgenp, ldcp, "enter\n");
3743 	if (ldcp->hphase != VH_PHASE2) {
3744 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
3745 		" Invalid Phase(%u)\n",
3746 		    tagp->vio_subtype, ldcp->hphase);
3747 		return (VGEN_FAILURE);
3748 	}
3749 	switch (tagp->vio_subtype) {
3750 	case VIO_SUBTYPE_INFO:
3751 
3752 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
3753 		ldcp->hstate |= ATTR_INFO_RCVD;
3754 
3755 		/* save peer's values */
3756 		ldcp->peer_hparams.mtu = attrmsg->mtu;
3757 		ldcp->peer_hparams.addr = attrmsg->addr;
3758 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
3759 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
3760 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
3761 
3762 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
3763 			/* unsupported attr, send NACK */
3764 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3765 		} else {
3766 			ack = 1;
3767 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3768 		}
3769 		tagp->vio_sid = ldcp->local_sid;
3770 
3771 		/* send reply msg back to peer */
3772 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
3773 		    B_FALSE);
3774 		if (rv != VGEN_SUCCESS) {
3775 			return (rv);
3776 		}
3777 
3778 		if (ack) {
3779 			ldcp->hstate |= ATTR_ACK_SENT;
3780 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
3781 		} else {
3782 			/* failed */
3783 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
3784 			return (VGEN_FAILURE);
3785 		}
3786 
3787 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3788 			vgen_handshake(vh_nextphase(ldcp));
3789 		}
3790 
3791 		break;
3792 
3793 	case VIO_SUBTYPE_ACK:
3794 
3795 		ldcp->hstate |= ATTR_ACK_RCVD;
3796 
3797 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
3798 
3799 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3800 			vgen_handshake(vh_nextphase(ldcp));
3801 		}
3802 		break;
3803 
3804 	case VIO_SUBTYPE_NACK:
3805 
3806 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
3807 		return (VGEN_FAILURE);
3808 	}
3809 	DBG1(vgenp, ldcp, "exit\n");
3810 	return (VGEN_SUCCESS);
3811 }
3812 
3813 /* Check if the dring info msg is ok */
3814 static int
3815 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
3816 {
3817 	/* check if msg contents are ok */
3818 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
3819 	    sizeof (vnet_public_desc_t))) {
3820 		return (VGEN_FAILURE);
3821 	}
3822 	return (VGEN_SUCCESS);
3823 }
3824 
3825 /*
3826  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
3827  * the peer to a dring register msg that we sent.
3828  */
3829 static int
3830 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3831 {
3832 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
3833 	ldc_mem_cookie_t dcookie;
3834 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3835 	int ack = 0;
3836 	int rv = 0;
3837 
3838 	DBG1(vgenp, ldcp, "enter\n");
3839 	if (ldcp->hphase < VH_PHASE2) {
3840 		/* dring_info can be rcvd in any of the phases after Phase1 */
3841 		DWARN(vgenp, ldcp,
3842 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
3843 		    tagp->vio_subtype, ldcp->hphase);
3844 		return (VGEN_FAILURE);
3845 	}
3846 	switch (tagp->vio_subtype) {
3847 	case VIO_SUBTYPE_INFO:
3848 
3849 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
3850 		ldcp->hstate |= DRING_INFO_RCVD;
3851 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
3852 
3853 		ASSERT(msg->ncookies == 1);
3854 
3855 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
3856 			/*
3857 			 * verified dring info msg to be ok,
3858 			 * now try to map the remote dring.
3859 			 */
3860 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
3861 			    msg->descriptor_size, &dcookie,
3862 			    msg->ncookies);
3863 			if (rv == DDI_SUCCESS) {
3864 				/* now we can ack the peer */
3865 				ack = 1;
3866 			}
3867 		}
3868 		if (ack == 0) {
3869 			/* failed, send NACK */
3870 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3871 		} else {
3872 			if (!(ldcp->peer_hparams.dring_ready)) {
3873 
3874 				/* save peer's dring_info values */
3875 				bcopy(&dcookie,
3876 				    &(ldcp->peer_hparams.dring_cookie),
3877 				    sizeof (dcookie));
3878 				ldcp->peer_hparams.num_desc =
3879 				    msg->num_descriptors;
3880 				ldcp->peer_hparams.desc_size =
3881 				    msg->descriptor_size;
3882 				ldcp->peer_hparams.num_dcookies =
3883 				    msg->ncookies;
3884 
3885 				/* set dring_ident for the peer */
3886 				ldcp->peer_hparams.dring_ident =
3887 				    (uint64_t)ldcp->rxdp;
3888 				/* return the dring_ident in ack msg */
3889 				msg->dring_ident =
3890 				    (uint64_t)ldcp->rxdp;
3891 
3892 				ldcp->peer_hparams.dring_ready = B_TRUE;
3893 			}
3894 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3895 		}
3896 		tagp->vio_sid = ldcp->local_sid;
3897 		/* send reply msg back to peer */
3898 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
3899 		    B_FALSE);
3900 		if (rv != VGEN_SUCCESS) {
3901 			return (rv);
3902 		}
3903 
3904 		if (ack) {
3905 			ldcp->hstate |= DRING_ACK_SENT;
3906 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
3907 		} else {
3908 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
3909 			return (VGEN_FAILURE);
3910 		}
3911 
3912 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3913 			vgen_handshake(vh_nextphase(ldcp));
3914 		}
3915 
3916 		break;
3917 
3918 	case VIO_SUBTYPE_ACK:
3919 
3920 		ldcp->hstate |= DRING_ACK_RCVD;
3921 
3922 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
3923 
3924 		if (!(ldcp->local_hparams.dring_ready)) {
3925 			/* local dring is now ready */
3926 			ldcp->local_hparams.dring_ready = B_TRUE;
3927 
3928 			/* save dring_ident acked by peer */
3929 			ldcp->local_hparams.dring_ident =
3930 			    msg->dring_ident;
3931 		}
3932 
3933 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3934 			vgen_handshake(vh_nextphase(ldcp));
3935 		}
3936 
3937 		break;
3938 
3939 	case VIO_SUBTYPE_NACK:
3940 
3941 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
3942 		return (VGEN_FAILURE);
3943 	}
3944 	DBG1(vgenp, ldcp, "exit\n");
3945 	return (VGEN_SUCCESS);
3946 }
3947 
3948 /*
3949  * Handle a rdx info msg from the peer or an ACK/NACK
3950  * from the peer to a rdx info msg that we sent.
3951  */
3952 static int
3953 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3954 {
3955 	int rv = 0;
3956 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3957 
3958 	DBG1(vgenp, ldcp, "enter\n");
3959 	if (ldcp->hphase != VH_PHASE3) {
3960 		DWARN(vgenp, ldcp,
3961 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
3962 		    tagp->vio_subtype, ldcp->hphase);
3963 		return (VGEN_FAILURE);
3964 	}
3965 	switch (tagp->vio_subtype) {
3966 	case VIO_SUBTYPE_INFO:
3967 
3968 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
3969 		ldcp->hstate |= RDX_INFO_RCVD;
3970 
3971 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
3972 		tagp->vio_sid = ldcp->local_sid;
3973 		/* send reply msg back to peer */
3974 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
3975 		    B_FALSE);
3976 		if (rv != VGEN_SUCCESS) {
3977 			return (rv);
3978 		}
3979 
3980 		ldcp->hstate |= RDX_ACK_SENT;
3981 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
3982 
3983 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3984 			vgen_handshake(vh_nextphase(ldcp));
3985 		}
3986 
3987 		break;
3988 
3989 	case VIO_SUBTYPE_ACK:
3990 
3991 		ldcp->hstate |= RDX_ACK_RCVD;
3992 
3993 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
3994 
3995 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3996 			vgen_handshake(vh_nextphase(ldcp));
3997 		}
3998 		break;
3999 
4000 	case VIO_SUBTYPE_NACK:
4001 
4002 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
4003 		return (VGEN_FAILURE);
4004 	}
4005 	DBG1(vgenp, ldcp, "exit\n");
4006 	return (VGEN_SUCCESS);
4007 }
4008 
4009 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
4010 static int
4011 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4012 {
4013 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4014 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
4015 	struct ether_addr *addrp;
4016 	int count;
4017 	int i;
4018 
4019 	DBG1(vgenp, ldcp, "enter\n");
4020 	switch (tagp->vio_subtype) {
4021 
4022 	case VIO_SUBTYPE_INFO:
4023 
4024 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
4025 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
4026 		break;
4027 
4028 	case VIO_SUBTYPE_ACK:
4029 
4030 		/* success adding/removing multicast addr */
4031 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
4032 		break;
4033 
4034 	case VIO_SUBTYPE_NACK:
4035 
4036 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
4037 		if (!(msgp->set)) {
4038 			/* multicast remove request failed */
4039 			break;
4040 		}
4041 
4042 		/* multicast add request failed */
4043 		for (count = 0; count < msgp->count; count++) {
4044 			addrp = &(msgp->mca[count]);
4045 
4046 			/* delete address from the table */
4047 			for (i = 0; i < vgenp->mccount; i++) {
4048 				if (ether_cmp(addrp,
4049 				    &(vgenp->mctab[i])) == 0) {
4050 					if (vgenp->mccount > 1) {
4051 						int t = vgenp->mccount - 1;
4052 						vgenp->mctab[i] =
4053 						    vgenp->mctab[t];
4054 					}
4055 					vgenp->mccount--;
4056 					break;
4057 				}
4058 			}
4059 		}
4060 		break;
4061 
4062 	}
4063 	DBG1(vgenp, ldcp, "exit\n");
4064 
4065 	return (VGEN_SUCCESS);
4066 }
4067 
4068 /* handler for control messages received from the peer ldc end-point */
4069 static int
4070 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4071 {
4072 	int rv = 0;
4073 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4074 
4075 	DBG1(vgenp, ldcp, "enter\n");
4076 	switch (tagp->vio_subtype_env) {
4077 
4078 	case VIO_VER_INFO:
4079 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4080 		break;
4081 
4082 	case VIO_ATTR_INFO:
4083 		rv = vgen_handle_attr_info(ldcp, tagp);
4084 		break;
4085 
4086 	case VIO_DRING_REG:
4087 		rv = vgen_handle_dring_reg(ldcp, tagp);
4088 		break;
4089 
4090 	case VIO_RDX:
4091 		rv = vgen_handle_rdx_info(ldcp, tagp);
4092 		break;
4093 
4094 	case VNET_MCAST_INFO:
4095 		rv = vgen_handle_mcast_info(ldcp, tagp);
4096 		break;
4097 
4098 	}
4099 
4100 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4101 	return (rv);
4102 }
4103 
4104 /* handler for data messages received from the peer ldc end-point */
4105 static int
4106 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4107 {
4108 	int rv = 0;
4109 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4110 
4111 	DBG1(vgenp, ldcp, "enter\n");
4112 
4113 	if (ldcp->hphase != VH_DONE)
4114 		return (rv);
4115 	switch (tagp->vio_subtype_env) {
4116 	case VIO_DRING_DATA:
4117 		rv = vgen_handle_dring_data(ldcp, tagp);
4118 		break;
4119 	default:
4120 		break;
4121 	}
4122 
4123 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4124 	return (rv);
4125 }
4126 
4127 static int
4128 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4129     int32_t end, uint8_t pstate)
4130 {
4131 	int rv = 0;
4132 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4133 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4134 
4135 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4136 	tagp->vio_sid = ldcp->local_sid;
4137 	msgp->start_idx = start;
4138 	msgp->end_idx = end;
4139 	msgp->dring_process_state = pstate;
4140 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4141 	if (rv != VGEN_SUCCESS) {
4142 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4143 	}
4144 	return (rv);
4145 }
4146 
4147 static int
4148 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4149 {
4150 	int rv = 0;
4151 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4152 
4153 
4154 	DBG1(vgenp, ldcp, "enter\n");
4155 	switch (tagp->vio_subtype) {
4156 
4157 	case VIO_SUBTYPE_INFO:
4158 		/*
4159 		 * To reduce the locking contention, release the
4160 		 * cblock here and re-acquire it once we are done
4161 		 * receiving packets.
4162 		 */
4163 		mutex_exit(&ldcp->cblock);
4164 		mutex_enter(&ldcp->rxlock);
4165 		rv = vgen_handle_dring_data_info(ldcp, tagp);
4166 		mutex_exit(&ldcp->rxlock);
4167 		mutex_enter(&ldcp->cblock);
4168 		break;
4169 
4170 	case VIO_SUBTYPE_ACK:
4171 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
4172 		break;
4173 
4174 	case VIO_SUBTYPE_NACK:
4175 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
4176 		break;
4177 	}
4178 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4179 	return (rv);
4180 }
4181 
4182 static int
4183 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4184 {
4185 	uint32_t start;
4186 	int32_t end;
4187 	int rv = 0;
4188 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4189 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4190 #ifdef VGEN_HANDLE_LOST_PKTS
4191 	vgen_stats_t *statsp = &ldcp->stats;
4192 	uint32_t rxi;
4193 	int n;
4194 #endif
4195 
4196 	DBG1(vgenp, ldcp, "enter\n");
4197 
4198 	start = dringmsg->start_idx;
4199 	end = dringmsg->end_idx;
4200 	/*
4201 	 * received a data msg, which contains the start and end
4202 	 * indices of the descriptors within the rx ring holding data,
4203 	 * the seq_num of data packet corresponding to the start index,
4204 	 * and the dring_ident.
4205 	 * We can now read the contents of each of these descriptors
4206 	 * and gather data from it.
4207 	 */
4208 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
4209 	    start, end);
4210 
4211 	/* validate rx start and end indeces */
4212 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4213 	    !(CHECK_RXI(end, ldcp)))) {
4214 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
4215 		    start, end);
4216 		/* drop the message if invalid index */
4217 		return (rv);
4218 	}
4219 
4220 	/* validate dring_ident */
4221 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4222 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4223 		    dringmsg->dring_ident);
4224 		/* invalid dring_ident, drop the msg */
4225 		return (rv);
4226 	}
4227 #ifdef DEBUG
4228 	if (vgen_trigger_rxlost) {
4229 		/* drop this msg to simulate lost pkts for debugging */
4230 		vgen_trigger_rxlost = 0;
4231 		return (rv);
4232 	}
4233 #endif
4234 
4235 #ifdef	VGEN_HANDLE_LOST_PKTS
4236 
4237 	/* receive start index doesn't match expected index */
4238 	if (ldcp->next_rxi != start) {
4239 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
4240 		    ldcp->next_rxi, start);
4241 
4242 		/* calculate the number of pkts lost */
4243 		if (start >= ldcp->next_rxi) {
4244 			n = start - ldcp->next_rxi;
4245 		} else  {
4246 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
4247 		}
4248 
4249 		/*
4250 		 * sequence number of dring data message
4251 		 * is less than the next sequence number that
4252 		 * is expected:
4253 		 *
4254 		 * drop the message and the corresponding packets.
4255 		 */
4256 		if (ldcp->next_rxseq > dringmsg->seq_num) {
4257 			DWARN(vgenp, ldcp, "dropping pkts, expected "
4258 			"rxseq(0x%lx) > recvd(0x%lx)\n",
4259 			    ldcp->next_rxseq, dringmsg->seq_num);
4260 			/*
4261 			 * duplicate/multiple retransmissions from
4262 			 * sender?? drop this msg.
4263 			 */
4264 			return (rv);
4265 		}
4266 
4267 		/*
4268 		 * sequence number of dring data message
4269 		 * is greater than the next expected sequence number
4270 		 *
4271 		 * send a NACK back to the peer to indicate lost
4272 		 * packets.
4273 		 */
4274 		if (dringmsg->seq_num > ldcp->next_rxseq) {
4275 			statsp->rx_lost_pkts += n;
4276 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4277 			tagp->vio_sid = ldcp->local_sid;
4278 			/* indicate the range of lost descriptors */
4279 			dringmsg->start_idx = ldcp->next_rxi;
4280 			rxi = start;
4281 			DECR_RXI(rxi, ldcp);
4282 			dringmsg->end_idx = rxi;
4283 			/* dring ident is left unchanged */
4284 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4285 			    sizeof (*dringmsg), B_FALSE);
4286 			if (rv != VGEN_SUCCESS) {
4287 				DWARN(vgenp, ldcp,
4288 				    "vgen_sendmsg failed, stype:NACK\n");
4289 				return (rv);
4290 			}
4291 #ifdef VGEN_REXMIT
4292 			/*
4293 			 * stop further processing until peer
4294 			 * retransmits with the right index.
4295 			 * update next_rxseq expected.
4296 			 */
4297 			ldcp->next_rxseq += 1;
4298 			return (rv);
4299 #else	/* VGEN_REXMIT */
4300 			/*
4301 			 * treat this range of descrs/pkts as dropped
4302 			 * and set the new expected values for next_rxi
4303 			 * and next_rxseq. continue(below) to process
4304 			 * from the new start index.
4305 			 */
4306 			ldcp->next_rxi = start;
4307 			ldcp->next_rxseq += 1;
4308 #endif	/* VGEN_REXMIT */
4309 
4310 		} else if (dringmsg->seq_num == ldcp->next_rxseq) {
4311 			/*
4312 			 * expected and received seqnums match, but
4313 			 * the descriptor indeces don't?
4314 			 *
4315 			 * restart handshake with peer.
4316 			 */
4317 			DWARN(vgenp, ldcp, "next_rxseq(0x%lx)=="
4318 			    "seq_num(0x%lx)\n", ldcp->next_rxseq,
4319 			    dringmsg->seq_num);
4320 
4321 		}
4322 
4323 	} else {
4324 		/* expected and start dring indeces match */
4325 
4326 		if (dringmsg->seq_num != ldcp->next_rxseq) {
4327 
4328 			/* seqnums don't match */
4329 
4330 			DWARN(vgenp, ldcp,
4331 			    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
4332 			    ldcp->next_rxseq, dringmsg->seq_num);
4333 		}
4334 	}
4335 
4336 #endif	/* VGEN_HANDLE_LOST_PKTS */
4337 
4338 	/* Now receive messages */
4339 	rv = vgen_process_dring_data(ldcp, tagp);
4340 
4341 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4342 	return (rv);
4343 }
4344 
4345 static int
4346 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4347 {
4348 	boolean_t set_ack_start = B_FALSE;
4349 	uint32_t start;
4350 	uint32_t ack_end;
4351 	uint32_t next_rxi;
4352 	uint32_t rxi;
4353 	int count = 0;
4354 	int rv = 0;
4355 	uint32_t retries = 0;
4356 	vgen_stats_t *statsp;
4357 	vnet_public_desc_t *rxdp;
4358 	vio_dring_entry_hdr_t *hdrp;
4359 	mblk_t *bp = NULL;
4360 	mblk_t *bpt = NULL;
4361 	uint32_t ack_start;
4362 	uint32_t datalen;
4363 	uint32_t ncookies;
4364 	boolean_t rxd_err = B_FALSE;
4365 	mblk_t *mp = NULL;
4366 	size_t nbytes;
4367 	boolean_t ack_needed = B_FALSE;
4368 	size_t nread;
4369 	uint64_t off = 0;
4370 	struct ether_header *ehp;
4371 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4372 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4373 
4374 	DBG1(vgenp, ldcp, "enter\n");
4375 
4376 	statsp = &ldcp->stats;
4377 	start = dringmsg->start_idx;
4378 
4379 	/*
4380 	 * start processing the descriptors from the specified
4381 	 * start index, up to the index a descriptor is not ready
4382 	 * to be processed or we process the entire descriptor ring
4383 	 * and wrap around upto the start index.
4384 	 */
4385 
4386 	/* need to set the start index of descriptors to be ack'd */
4387 	set_ack_start = B_TRUE;
4388 
4389 	/* index upto which we have ack'd */
4390 	ack_end = start;
4391 	DECR_RXI(ack_end, ldcp);
4392 
4393 	next_rxi = rxi =  start;
4394 	do {
4395 vgen_recv_retry:
4396 		rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4397 		if (rv != 0) {
4398 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
4399 			    " rv(%d)\n", rv);
4400 			statsp->ierrors++;
4401 			return (rv);
4402 		}
4403 
4404 		rxdp = &(ldcp->rxdp[rxi]);
4405 		hdrp = &rxdp->hdr;
4406 
4407 		if (hdrp->dstate != VIO_DESC_READY) {
4408 			/*
4409 			 * Before waiting and retry here, queue
4410 			 * the messages that are received already.
4411 			 * This will help the soft interrupt to
4412 			 * send them up with less latency.
4413 			 */
4414 			if (bp != NULL) {
4415 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4416 				vgen_ldc_queue_data(ldcp, bp, bpt);
4417 				count = 0;
4418 				bp = bpt = NULL;
4419 			}
4420 			/*
4421 			 * descriptor is not ready.
4422 			 * retry descriptor acquire, stop processing
4423 			 * after max # retries.
4424 			 */
4425 			if (retries == vgen_recv_retries)
4426 				break;
4427 			retries++;
4428 			drv_usecwait(vgen_recv_delay);
4429 			goto vgen_recv_retry;
4430 		}
4431 		retries = 0;
4432 
4433 		if (set_ack_start) {
4434 			/*
4435 			 * initialize the start index of the range
4436 			 * of descriptors to be ack'd.
4437 			 */
4438 			ack_start = rxi;
4439 			set_ack_start = B_FALSE;
4440 		}
4441 
4442 		datalen = rxdp->nbytes;
4443 		ncookies = rxdp->ncookies;
4444 		if ((datalen < ETHERMIN) ||
4445 		    (ncookies == 0) ||
4446 		    (ncookies > MAX_COOKIES)) {
4447 			rxd_err = B_TRUE;
4448 		} else {
4449 			/*
4450 			 * Try to allocate an mblk from the free pool
4451 			 * of recv mblks for the channel.
4452 			 * If this fails, use allocb().
4453 			 */
4454 			nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4455 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
4456 			if (!mp) {
4457 				/*
4458 				 * The data buffer returned by
4459 				 * allocb(9F) is 8byte aligned. We
4460 				 * allocate extra 8 bytes to ensure
4461 				 * size is multiple of 8 bytes for
4462 				 * ldc_mem_copy().
4463 				 */
4464 				statsp->rx_vio_allocb_fail++;
4465 				mp = allocb(VNET_IPALIGN + datalen + 8,
4466 				    BPRI_MED);
4467 			}
4468 		}
4469 		if ((rxd_err) || (mp == NULL)) {
4470 			/*
4471 			 * rxd_err or allocb() failure,
4472 			 * drop this packet, get next.
4473 			 */
4474 			if (rxd_err) {
4475 				statsp->ierrors++;
4476 				rxd_err = B_FALSE;
4477 			} else {
4478 				statsp->rx_allocb_fail++;
4479 			}
4480 
4481 			ack_needed = hdrp->ack;
4482 
4483 			/* set descriptor done bit */
4484 			hdrp->dstate = VIO_DESC_DONE;
4485 
4486 			rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4487 			    rxi, rxi);
4488 			if (rv != 0) {
4489 				DWARN(vgenp, ldcp,
4490 				    "ldc_mem_dring_release err rv(%d)\n", rv);
4491 				return (rv);
4492 			}
4493 
4494 			if (ack_needed) {
4495 				ack_needed = B_FALSE;
4496 				/*
4497 				 * sender needs ack for this packet,
4498 				 * ack pkts upto this index.
4499 				 */
4500 				ack_end = rxi;
4501 
4502 				rv = vgen_send_dring_ack(ldcp, tagp,
4503 				    ack_start, ack_end,
4504 				    VIO_DP_ACTIVE);
4505 				if (rv != VGEN_SUCCESS) {
4506 					goto error_ret;
4507 				}
4508 
4509 				/* need to set new ack start index */
4510 				set_ack_start = B_TRUE;
4511 			}
4512 			goto vgen_next_rxi;
4513 		}
4514 
4515 		nread = nbytes;
4516 		rv = ldc_mem_copy(ldcp->ldc_handle,
4517 		    (caddr_t)mp->b_rptr, off, &nread,
4518 		    rxdp->memcookie, ncookies, LDC_COPY_IN);
4519 
4520 		/* if ldc_mem_copy() failed */
4521 		if (rv) {
4522 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
4523 			statsp->ierrors++;
4524 			freemsg(mp);
4525 			goto error_ret;
4526 		}
4527 
4528 		ack_needed = hdrp->ack;
4529 		hdrp->dstate = VIO_DESC_DONE;
4530 
4531 		rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4532 		if (rv != 0) {
4533 			DWARN(vgenp, ldcp,
4534 			    "ldc_mem_dring_release err rv(%d)\n", rv);
4535 			goto error_ret;
4536 		}
4537 
4538 		mp->b_rptr += VNET_IPALIGN;
4539 
4540 		if (ack_needed) {
4541 			ack_needed = B_FALSE;
4542 			/*
4543 			 * sender needs ack for this packet,
4544 			 * ack pkts upto this index.
4545 			 */
4546 			ack_end = rxi;
4547 
4548 			rv = vgen_send_dring_ack(ldcp, tagp,
4549 			    ack_start, ack_end, VIO_DP_ACTIVE);
4550 			if (rv != VGEN_SUCCESS) {
4551 				goto error_ret;
4552 			}
4553 
4554 			/* need to set new ack start index */
4555 			set_ack_start = B_TRUE;
4556 		}
4557 
4558 		if (nread != nbytes) {
4559 			DWARN(vgenp, ldcp,
4560 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4561 			    nread, nbytes);
4562 			statsp->ierrors++;
4563 			freemsg(mp);
4564 			goto vgen_next_rxi;
4565 		}
4566 
4567 		/* point to the actual end of data */
4568 		mp->b_wptr = mp->b_rptr + datalen;
4569 
4570 		/* update stats */
4571 		statsp->ipackets++;
4572 		statsp->rbytes += datalen;
4573 		ehp = (struct ether_header *)mp->b_rptr;
4574 		if (IS_BROADCAST(ehp))
4575 			statsp->brdcstrcv++;
4576 		else if (IS_MULTICAST(ehp))
4577 			statsp->multircv++;
4578 
4579 		/* build a chain of received packets */
4580 		if (bp == NULL) {
4581 			/* first pkt */
4582 			bp = mp;
4583 			bpt = bp;
4584 			bpt->b_next = NULL;
4585 		} else {
4586 			mp->b_next = NULL;
4587 			bpt->b_next = mp;
4588 			bpt = mp;
4589 		}
4590 
4591 		if (count++ > vgen_chain_len) {
4592 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4593 			vgen_ldc_queue_data(ldcp, bp, bpt);
4594 			count = 0;
4595 			bp = bpt = NULL;
4596 		}
4597 
4598 vgen_next_rxi:
4599 		/* update end index of range of descrs to be ack'd */
4600 		ack_end = rxi;
4601 
4602 		/* update the next index to be processed */
4603 		INCR_RXI(next_rxi, ldcp);
4604 		if (next_rxi == start) {
4605 			/*
4606 			 * processed the entire descriptor ring upto
4607 			 * the index at which we started.
4608 			 */
4609 			break;
4610 		}
4611 
4612 		rxi = next_rxi;
4613 
4614 	_NOTE(CONSTCOND)
4615 	} while (1);
4616 
4617 	/*
4618 	 * send an ack message to peer indicating that we have stopped
4619 	 * processing descriptors.
4620 	 */
4621 	if (set_ack_start) {
4622 		/*
4623 		 * We have ack'd upto some index and we have not
4624 		 * processed any descriptors beyond that index.
4625 		 * Use the last ack'd index as both the start and
4626 		 * end of range of descrs being ack'd.
4627 		 * Note: This results in acking the last index twice
4628 		 * and should be harmless.
4629 		 */
4630 		ack_start = ack_end;
4631 	}
4632 
4633 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4634 	    VIO_DP_STOPPED);
4635 	if (rv != VGEN_SUCCESS) {
4636 		goto error_ret;
4637 	}
4638 
4639 	/* save new recv index and expected seqnum of next dring msg */
4640 	ldcp->next_rxi = next_rxi;
4641 	ldcp->next_rxseq += 1;
4642 
4643 error_ret:
4644 	/* queue the packets received so far */
4645 	if (bp != NULL) {
4646 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4647 		vgen_ldc_queue_data(ldcp, bp, bpt);
4648 		bp = bpt = NULL;
4649 	}
4650 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4651 	return (rv);
4652 
4653 }
4654 
4655 static int
4656 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4657 {
4658 	int rv = 0;
4659 	uint32_t start;
4660 	int32_t end;
4661 	uint32_t txi;
4662 	boolean_t ready_txd = B_FALSE;
4663 	vgen_stats_t *statsp;
4664 	vgen_private_desc_t *tbufp;
4665 	vnet_public_desc_t *txdp;
4666 	vio_dring_entry_hdr_t *hdrp;
4667 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4668 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4669 
4670 	DBG1(vgenp, ldcp, "enter\n");
4671 	start = dringmsg->start_idx;
4672 	end = dringmsg->end_idx;
4673 	statsp = &ldcp->stats;
4674 
4675 	/*
4676 	 * received an ack corresponding to a specific descriptor for
4677 	 * which we had set the ACK bit in the descriptor (during
4678 	 * transmit). This enables us to reclaim descriptors.
4679 	 */
4680 
4681 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
4682 
4683 	/* validate start and end indeces in the tx ack msg */
4684 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4685 		/* drop the message if invalid index */
4686 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
4687 		    start, end);
4688 		return (rv);
4689 	}
4690 	/* validate dring_ident */
4691 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4692 		/* invalid dring_ident, drop the msg */
4693 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4694 		    dringmsg->dring_ident);
4695 		return (rv);
4696 	}
4697 	statsp->dring_data_acks++;
4698 
4699 	/* reclaim descriptors that are done */
4700 	vgen_reclaim(ldcp);
4701 
4702 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
4703 		/*
4704 		 * receiver continued processing descriptors after
4705 		 * sending us the ack.
4706 		 */
4707 		return (rv);
4708 	}
4709 
4710 	statsp->dring_stopped_acks++;
4711 
4712 	/* receiver stopped processing descriptors */
4713 	mutex_enter(&ldcp->wrlock);
4714 	mutex_enter(&ldcp->tclock);
4715 
4716 	/*
4717 	 * determine if there are any pending tx descriptors
4718 	 * ready to be processed by the receiver(peer) and if so,
4719 	 * send a message to the peer to restart receiving.
4720 	 */
4721 	ready_txd = B_FALSE;
4722 
4723 	/*
4724 	 * using the end index of the descriptor range for which
4725 	 * we received the ack, check if the next descriptor is
4726 	 * ready.
4727 	 */
4728 	txi = end;
4729 	INCR_TXI(txi, ldcp);
4730 	tbufp = &ldcp->tbufp[txi];
4731 	txdp = tbufp->descp;
4732 	hdrp = &txdp->hdr;
4733 	if (hdrp->dstate == VIO_DESC_READY) {
4734 		ready_txd = B_TRUE;
4735 	} else {
4736 		/*
4737 		 * descr next to the end of ack'd descr range is not
4738 		 * ready.
4739 		 * starting from the current reclaim index, check
4740 		 * if any descriptor is ready.
4741 		 */
4742 
4743 		txi = ldcp->cur_tbufp - ldcp->tbufp;
4744 		tbufp = &ldcp->tbufp[txi];
4745 
4746 		txdp = tbufp->descp;
4747 		hdrp = &txdp->hdr;
4748 		if (hdrp->dstate == VIO_DESC_READY) {
4749 			ready_txd = B_TRUE;
4750 		}
4751 
4752 	}
4753 
4754 	if (ready_txd) {
4755 		/*
4756 		 * we have tx descriptor(s) ready to be
4757 		 * processed by the receiver.
4758 		 * send a message to the peer with the start index
4759 		 * of ready descriptors.
4760 		 */
4761 		rv = vgen_send_dring_data(ldcp, txi, -1);
4762 		if (rv != VGEN_SUCCESS) {
4763 			ldcp->resched_peer = B_TRUE;
4764 			ldcp->resched_peer_txi = txi;
4765 			mutex_exit(&ldcp->tclock);
4766 			mutex_exit(&ldcp->wrlock);
4767 			return (rv);
4768 		}
4769 	} else {
4770 		/*
4771 		 * no ready tx descriptors. set the flag to send a
4772 		 * message to peer when tx descriptors are ready in
4773 		 * transmit routine.
4774 		 */
4775 		ldcp->resched_peer = B_TRUE;
4776 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
4777 	}
4778 
4779 	mutex_exit(&ldcp->tclock);
4780 	mutex_exit(&ldcp->wrlock);
4781 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4782 	return (rv);
4783 }
4784 
4785 static int
4786 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4787 {
4788 	int rv = 0;
4789 	uint32_t start;
4790 	int32_t end;
4791 	uint32_t txi;
4792 	vnet_public_desc_t *txdp;
4793 	vio_dring_entry_hdr_t *hdrp;
4794 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4795 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4796 #ifdef VGEN_REXMIT
4797 	vgen_stats_t *statsp = &ldcp->stats;
4798 #endif
4799 
4800 	DBG1(vgenp, ldcp, "enter\n");
4801 	start = dringmsg->start_idx;
4802 	end = dringmsg->end_idx;
4803 
4804 	/*
4805 	 * peer sent a NACK msg to indicate lost packets.
4806 	 * The start and end correspond to the range of descriptors
4807 	 * for which the peer didn't receive a dring data msg and so
4808 	 * didn't receive the corresponding data.
4809 	 */
4810 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
4811 
4812 	/* validate start and end indeces in the tx nack msg */
4813 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4814 		/* drop the message if invalid index */
4815 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
4816 		    start, end);
4817 		return (rv);
4818 	}
4819 	/* validate dring_ident */
4820 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4821 		/* invalid dring_ident, drop the msg */
4822 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4823 		    dringmsg->dring_ident);
4824 		return (rv);
4825 	}
4826 	mutex_enter(&ldcp->txlock);
4827 	mutex_enter(&ldcp->tclock);
4828 
4829 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
4830 		/* no busy descriptors, bogus nack ? */
4831 		mutex_exit(&ldcp->tclock);
4832 		mutex_exit(&ldcp->txlock);
4833 		return (rv);
4834 	}
4835 
4836 #ifdef VGEN_REXMIT
4837 	/* send a new dring data msg including the lost descrs */
4838 	end = ldcp->next_tbufp - ldcp->tbufp;
4839 	DECR_TXI(end, ldcp);
4840 	rv = vgen_send_dring_data(ldcp, start, end);
4841 	if (rv != 0) {
4842 		/*
4843 		 * vgen_send_dring_data() error: drop all packets
4844 		 * in this descr range
4845 		 */
4846 		DWARN(vgenp, ldcp, "vgen_send_dring_data failed: rv(%d)\n", rv);
4847 		for (txi = start; txi <= end; ) {
4848 			tbufp = &(ldcp->tbufp[txi]);
4849 			txdp = tbufp->descp;
4850 			hdrp = &txdp->hdr;
4851 			tbufp->flags = VGEN_PRIV_DESC_FREE;
4852 			hdrp->dstate = VIO_DESC_FREE;
4853 			hdrp->ack = B_FALSE;
4854 			statsp->oerrors++;
4855 		}
4856 
4857 		/* update next pointer */
4858 		ldcp->next_tbufp = &(ldcp->tbufp[start]);
4859 		ldcp->next_txi = start;
4860 	}
4861 	DBG2(vgenp, ldcp, "rexmit: start(%d) end(%d)\n", start, end);
4862 #else	/* VGEN_REXMIT */
4863 	/* we just mark the descrs as done so they can be reclaimed */
4864 	for (txi = start; txi <= end; ) {
4865 		txdp = &(ldcp->txdp[txi]);
4866 		hdrp = &txdp->hdr;
4867 		if (hdrp->dstate == VIO_DESC_READY)
4868 			hdrp->dstate = VIO_DESC_DONE;
4869 		INCR_TXI(txi, ldcp);
4870 	}
4871 #endif	/* VGEN_REXMIT */
4872 	mutex_exit(&ldcp->tclock);
4873 	mutex_exit(&ldcp->txlock);
4874 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4875 	return (rv);
4876 }
4877 
4878 static void
4879 vgen_reclaim(vgen_ldc_t *ldcp)
4880 {
4881 	mutex_enter(&ldcp->tclock);
4882 
4883 	vgen_reclaim_dring(ldcp);
4884 	ldcp->reclaim_lbolt = ddi_get_lbolt();
4885 
4886 	mutex_exit(&ldcp->tclock);
4887 }
4888 
4889 /*
4890  * transmit reclaim function. starting from the current reclaim index
4891  * look for descriptors marked DONE and reclaim the descriptor and the
4892  * corresponding buffers (tbuf).
4893  */
4894 static void
4895 vgen_reclaim_dring(vgen_ldc_t *ldcp)
4896 {
4897 	int count = 0;
4898 	vnet_public_desc_t *txdp;
4899 	vgen_private_desc_t *tbufp;
4900 	vio_dring_entry_hdr_t	*hdrp;
4901 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4902 
4903 #ifdef DEBUG
4904 	if (vgen_trigger_txtimeout)
4905 		return;
4906 #endif
4907 
4908 	tbufp = ldcp->cur_tbufp;
4909 	txdp = tbufp->descp;
4910 	hdrp = &txdp->hdr;
4911 
4912 	while ((hdrp->dstate == VIO_DESC_DONE) &&
4913 	    (tbufp != ldcp->next_tbufp)) {
4914 		tbufp->flags = VGEN_PRIV_DESC_FREE;
4915 		hdrp->dstate = VIO_DESC_FREE;
4916 		hdrp->ack = B_FALSE;
4917 
4918 		tbufp = NEXTTBUF(ldcp, tbufp);
4919 		txdp = tbufp->descp;
4920 		hdrp = &txdp->hdr;
4921 		count++;
4922 	}
4923 
4924 	ldcp->cur_tbufp = tbufp;
4925 
4926 	/*
4927 	 * Check if mac layer should be notified to restart transmissions
4928 	 */
4929 	if ((ldcp->need_resched) && (count > 0)) {
4930 		ldcp->need_resched = B_FALSE;
4931 		vnet_tx_update(vgenp->vnetp);
4932 	}
4933 }
4934 
4935 /* return the number of pending transmits for the channel */
4936 static int
4937 vgen_num_txpending(vgen_ldc_t *ldcp)
4938 {
4939 	int n;
4940 
4941 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
4942 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
4943 	} else  {
4944 		/* cur_tbufp > next_tbufp */
4945 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
4946 	}
4947 
4948 	return (n);
4949 }
4950 
4951 /* determine if the transmit descriptor ring is full */
4952 static int
4953 vgen_tx_dring_full(vgen_ldc_t *ldcp)
4954 {
4955 	vgen_private_desc_t	*tbufp;
4956 	vgen_private_desc_t	*ntbufp;
4957 
4958 	tbufp = ldcp->next_tbufp;
4959 	ntbufp = NEXTTBUF(ldcp, tbufp);
4960 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
4961 		return (VGEN_SUCCESS);
4962 	}
4963 	return (VGEN_FAILURE);
4964 }
4965 
4966 /* determine if timeout condition has occured */
4967 static int
4968 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
4969 {
4970 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
4971 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
4972 	    (vnet_ldcwd_txtimeout) &&
4973 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
4974 		return (VGEN_SUCCESS);
4975 	} else {
4976 		return (VGEN_FAILURE);
4977 	}
4978 }
4979 
4980 /* transmit watchdog timeout handler */
4981 static void
4982 vgen_ldc_watchdog(void *arg)
4983 {
4984 	vgen_ldc_t *ldcp;
4985 	vgen_t *vgenp;
4986 	int rv;
4987 
4988 	ldcp = (vgen_ldc_t *)arg;
4989 	vgenp = LDC_TO_VGEN(ldcp);
4990 
4991 	rv = vgen_ldc_txtimeout(ldcp);
4992 	if (rv == VGEN_SUCCESS) {
4993 		DWARN(vgenp, ldcp, "transmit timeout\n");
4994 #ifdef DEBUG
4995 		if (vgen_trigger_txtimeout) {
4996 			/* tx timeout triggered for debugging */
4997 			vgen_trigger_txtimeout = 0;
4998 		}
4999 #endif
5000 		mutex_enter(&ldcp->cblock);
5001 		ldcp->need_ldc_reset = B_TRUE;
5002 		vgen_handshake_retry(ldcp);
5003 		mutex_exit(&ldcp->cblock);
5004 		if (ldcp->need_resched) {
5005 			ldcp->need_resched = B_FALSE;
5006 			vnet_tx_update(vgenp->vnetp);
5007 		}
5008 	}
5009 
5010 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
5011 	    drv_usectohz(vnet_ldcwd_interval * 1000));
5012 }
5013 
5014 /* handler for error messages received from the peer ldc end-point */
5015 static void
5016 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5017 {
5018 	_NOTE(ARGUNUSED(ldcp, tagp))
5019 }
5020 
5021 /* Check if the session id in the received message is valid */
5022 static int
5023 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5024 {
5025 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5026 
5027 	if (tagp->vio_sid != ldcp->peer_sid) {
5028 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5029 		    ldcp->peer_sid, tagp->vio_sid);
5030 		return (VGEN_FAILURE);
5031 	}
5032 	else
5033 		return (VGEN_SUCCESS);
5034 }
5035 
5036 static caddr_t
5037 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5038 {
5039 	(void) sprintf(ebuf,
5040 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5041 	return (ebuf);
5042 }
5043 
5044 /* Handshake watchdog timeout handler */
5045 static void
5046 vgen_hwatchdog(void *arg)
5047 {
5048 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5049 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5050 
5051 	DWARN(vgenp, ldcp,
5052 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5053 	    ldcp->hphase, ldcp->hstate);
5054 
5055 	mutex_enter(&ldcp->cblock);
5056 	if (ldcp->cancel_htid) {
5057 		ldcp->cancel_htid = 0;
5058 		mutex_exit(&ldcp->cblock);
5059 		return;
5060 	}
5061 	ldcp->htid = 0;
5062 	ldcp->need_ldc_reset = B_TRUE;
5063 	vgen_handshake_retry(ldcp);
5064 	mutex_exit(&ldcp->cblock);
5065 }
5066 
5067 static void
5068 vgen_print_hparams(vgen_hparams_t *hp)
5069 {
5070 	uint8_t	addr[6];
5071 	char	ea[6];
5072 	ldc_mem_cookie_t *dc;
5073 
5074 	cmn_err(CE_CONT, "version_info:\n");
5075 	cmn_err(CE_CONT,
5076 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5077 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5078 
5079 	vnet_macaddr_ultostr(hp->addr, addr);
5080 	cmn_err(CE_CONT, "attr_info:\n");
5081 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5082 	    vgen_print_ethaddr(addr, ea));
5083 	cmn_err(CE_CONT,
5084 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5085 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5086 
5087 	dc = &hp->dring_cookie;
5088 	cmn_err(CE_CONT, "dring_info:\n");
5089 	cmn_err(CE_CONT,
5090 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5091 	cmn_err(CE_CONT,
5092 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5093 	    dc->addr, dc->size);
5094 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5095 }
5096 
5097 static void
5098 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5099 {
5100 	vgen_hparams_t *hp;
5101 
5102 	cmn_err(CE_CONT, "Channel Information:\n");
5103 	cmn_err(CE_CONT,
5104 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5105 	    ldcp->ldc_id, ldcp->ldc_status);
5106 	cmn_err(CE_CONT,
5107 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5108 	    ldcp->local_sid, ldcp->peer_sid);
5109 	cmn_err(CE_CONT,
5110 	    "\thphase: 0x%x, hstate: 0x%x\n",
5111 	    ldcp->hphase, ldcp->hstate);
5112 
5113 	cmn_err(CE_CONT, "Local handshake params:\n");
5114 	hp = &ldcp->local_hparams;
5115 	vgen_print_hparams(hp);
5116 
5117 	cmn_err(CE_CONT, "Peer handshake params:\n");
5118 	hp = &ldcp->peer_hparams;
5119 	vgen_print_hparams(hp);
5120 }
5121 
5122 /*
5123  * vgen_ldc_queue_data -- Queue data in the LDC.
5124  */
5125 static void
5126 vgen_ldc_queue_data(vgen_ldc_t *ldcp, mblk_t *rhead, mblk_t *rtail)
5127 {
5128 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5129 
5130 	DBG1(vgenp, ldcp, "enter\n");
5131 	/*
5132 	 * If the receive thread is enabled, then the queue
5133 	 * is protected by the soft_lock. After queuing, trigger
5134 	 * the soft interrupt so that the interrupt handler sends these
5135 	 * messages up the stack.
5136 	 *
5137 	 * If the receive thread is not enabled, then the list is
5138 	 * automatically protected by the cblock lock, so no need
5139 	 * to hold any additional locks.
5140 	 */
5141 	if (ldcp->rcv_thread != NULL) {
5142 		mutex_enter(&ldcp->soft_lock);
5143 	}
5144 	if (ldcp->rcv_mhead == NULL) {
5145 		ldcp->rcv_mhead = rhead;
5146 		ldcp->rcv_mtail = rtail;
5147 	} else {
5148 		ldcp->rcv_mtail->b_next = rhead;
5149 		ldcp->rcv_mtail = rtail;
5150 	}
5151 	if (ldcp->rcv_thread != NULL) {
5152 		mutex_exit(&ldcp->soft_lock);
5153 		(void) ddi_intr_trigger_softint(ldcp->soft_handle, NULL);
5154 	}
5155 	DBG1(vgenp, ldcp, "exit\n");
5156 }
5157 
5158 /*
5159  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
5160  * This thread is woken up by the LDC interrupt handler to process
5161  * LDC packets and receive data.
5162  */
5163 static void
5164 vgen_ldc_rcv_worker(void *arg)
5165 {
5166 	callb_cpr_t	cprinfo;
5167 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5168 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5169 
5170 	DBG1(vgenp, ldcp, "enter\n");
5171 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
5172 	    "vnet_rcv_thread");
5173 	mutex_enter(&ldcp->rcv_thr_lock);
5174 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
5175 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
5176 
5177 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
5178 		/*
5179 		 * Wait until the data is received or a stop
5180 		 * request is received.
5181 		 */
5182 		while (!(ldcp->rcv_thr_flags &
5183 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
5184 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5185 		}
5186 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
5187 
5188 		/*
5189 		 * First process the stop request.
5190 		 */
5191 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
5192 			DBG2(vgenp, ldcp, "stopped\n");
5193 			break;
5194 		}
5195 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
5196 		mutex_exit(&ldcp->rcv_thr_lock);
5197 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
5198 		vgen_handle_evt_read(ldcp);
5199 		mutex_enter(&ldcp->rcv_thr_lock);
5200 	}
5201 
5202 	/*
5203 	 * Update the run status and wakeup the thread that
5204 	 * has sent the stop request.
5205 	 */
5206 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
5207 	cv_signal(&ldcp->rcv_thr_cv);
5208 	CALLB_CPR_EXIT(&cprinfo);
5209 	thread_exit();
5210 	DBG1(vgenp, ldcp, "exit\n");
5211 }
5212 
5213 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
5214 static void
5215 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
5216 {
5217 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5218 
5219 	DBG1(vgenp, ldcp, "enter\n");
5220 	/*
5221 	 * Send a stop request by setting the stop flag and
5222 	 * wait until the receive thread stops.
5223 	 */
5224 	mutex_enter(&ldcp->rcv_thr_lock);
5225 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5226 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
5227 		cv_signal(&ldcp->rcv_thr_cv);
5228 		DBG2(vgenp, ldcp, "waiting...");
5229 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5230 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5231 		}
5232 	}
5233 	mutex_exit(&ldcp->rcv_thr_lock);
5234 	ldcp->rcv_thread = NULL;
5235 	DBG1(vgenp, ldcp, "exit\n");
5236 }
5237 
5238 /*
5239  * vgen_ldc_rcv_softintr -- LDC Soft interrupt handler function.
5240  * Its job is to pickup the recieved packets that are queued in the
5241  * LDC and send them up.
5242  *
5243  * NOTE: An interrupt handler is being used to handle the upper
5244  * layer(s) requirement to send up only at interrupt context.
5245  */
5246 /* ARGSUSED */
5247 static uint_t
5248 vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2)
5249 {
5250 	mblk_t *mp;
5251 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
5252 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5253 
5254 	DBG1(vgenp, ldcp, "enter\n");
5255 	DTRACE_PROBE1(vgen_soft_intr, uint64_t, ldcp->ldc_id);
5256 	mutex_enter(&ldcp->soft_lock);
5257 	mp = ldcp->rcv_mhead;
5258 	ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
5259 	mutex_exit(&ldcp->soft_lock);
5260 	if (mp != NULL) {
5261 		vnet_rx(vgenp->vnetp, NULL, mp);
5262 	}
5263 	DBG1(vgenp, ldcp, "exit\n");
5264 	return (DDI_INTR_CLAIMED);
5265 }
5266 
5267 #if DEBUG
5268 
5269 /*
5270  * Print debug messages - set to 0xf to enable all msgs
5271  */
5272 static void
5273 debug_printf(const char *fname, vgen_t *vgenp,
5274     vgen_ldc_t *ldcp, const char *fmt, ...)
5275 {
5276 	char    buf[256];
5277 	char    *bufp = buf;
5278 	va_list ap;
5279 
5280 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5281 		(void) sprintf(bufp, "vnet%d:",
5282 		    ((vnet_t *)(vgenp->vnetp))->instance);
5283 		bufp += strlen(bufp);
5284 	}
5285 	if (ldcp != NULL) {
5286 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5287 		bufp += strlen(bufp);
5288 	}
5289 	(void) sprintf(bufp, "%s: ", fname);
5290 	bufp += strlen(bufp);
5291 
5292 	va_start(ap, fmt);
5293 	(void) vsprintf(bufp, fmt, ap);
5294 	va_end(ap);
5295 
5296 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5297 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5298 		cmn_err(CE_CONT, "%s\n", buf);
5299 	}
5300 }
5301 #endif
5302