xref: /titanic_51/usr/src/uts/sun4v/io/vnet_gen.c (revision 11021a81a7bbd25468cdfeca85665606b71488f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 
64 /*
65  * Implementation of the mac functionality for vnet using the
66  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
67  */
68 
69 /*
70  * Function prototypes.
71  */
72 /* vgen proxy entry points */
73 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
74 	mac_register_t **vgenmacp);
75 int vgen_uninit(void *arg);
76 static int vgen_start(void *arg);
77 static void vgen_stop(void *arg);
78 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
79 static int vgen_multicst(void *arg, boolean_t add,
80 	const uint8_t *mca);
81 static int vgen_promisc(void *arg, boolean_t on);
82 static int vgen_unicst(void *arg, const uint8_t *mca);
83 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
84 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
85 
86 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
87 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
88 void vnet_del_fdb(void *arg, uint8_t *macaddr);
89 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
90 	void *txarg, boolean_t upgrade);
91 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
92 void vnet_del_def_rte(void *arg);
93 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
94 void vnet_tx_update(void *arg);
95 
96 /* vgen internal functions */
97 static void vgen_detach_ports(vgen_t *vgenp);
98 static void vgen_port_detach(vgen_port_t *portp);
99 static void vgen_port_list_insert(vgen_port_t *portp);
100 static void vgen_port_list_remove(vgen_port_t *portp);
101 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
102 	int port_num);
103 static int vgen_mdeg_reg(vgen_t *vgenp);
104 static void vgen_mdeg_unreg(vgen_t *vgenp);
105 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
106 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
107 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
108 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
109 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
112 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
113 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
114 
115 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
116 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
117 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
118 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_init_ports(vgen_t *vgenp);
120 static void vgen_port_init(vgen_port_t *portp);
121 static void vgen_uninit_ports(vgen_t *vgenp);
122 static void vgen_port_uninit(vgen_port_t *portp);
123 static void vgen_init_ldcs(vgen_port_t *portp);
124 static void vgen_uninit_ldcs(vgen_port_t *portp);
125 static int vgen_ldc_init(vgen_ldc_t *ldcp);
126 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
127 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
128 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
131 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
132 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
135 static void vgen_reclaim(vgen_ldc_t *ldcp);
136 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
137 static int vgen_num_txpending(vgen_ldc_t *ldcp);
138 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
139 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
140 static void vgen_ldc_watchdog(void *arg);
141 
142 /* vgen handshake functions */
143 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
144 static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
145 	uint16_t ver_minor);
146 static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
147 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
148 	boolean_t caller_holds_lock);
149 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
150 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
151 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
152 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
153 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
154 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
155 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
156 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
157 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
158 static void vgen_handshake(vgen_ldc_t *ldcp);
159 static int vgen_handshake_done(vgen_ldc_t *ldcp);
160 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
161 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
162 	vio_msg_tag_t *tagp);
163 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
166 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
174 	uint32_t start, int32_t end, uint8_t pstate);
175 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
178 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
179 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static uint64_t	vgen_macaddr_strtoul(const uint8_t *macaddr);
181 static int vgen_macaddr_ultostr(uint64_t value, uint8_t *macaddr);
182 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
183 static void vgen_hwatchdog(void *arg);
184 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
185 static void vgen_print_hparams(vgen_hparams_t *hp);
186 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
187 static uint_t vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2);
188 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
189 static void vgen_ldc_rcv_worker(void *arg);
190 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
191 static void vgen_ldc_queue_data(vgen_ldc_t *ldcp,
192 	mblk_t *rhead, mblk_t *rtail);
193 
194 /*
195  * The handshake process consists of 5 phases defined below, with VH_PHASE0
196  * being the pre-handshake phase and VH_DONE is the phase to indicate
197  * successful completion of all phases.
198  * Each phase may have one to several handshake states which are required
199  * to complete successfully to move to the next phase.
200  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
201  * more details.
202  */
203 /* handshake phases */
204 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
205 
206 /* handshake states */
207 enum {
208 
209 	VER_INFO_SENT	=	0x1,
210 	VER_ACK_RCVD	=	0x2,
211 	VER_INFO_RCVD	=	0x4,
212 	VER_ACK_SENT	=	0x8,
213 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
214 
215 	ATTR_INFO_SENT	=	0x10,
216 	ATTR_ACK_RCVD	=	0x20,
217 	ATTR_INFO_RCVD	=	0x40,
218 	ATTR_ACK_SENT	=	0x80,
219 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
220 
221 	DRING_INFO_SENT	=	0x100,
222 	DRING_ACK_RCVD	=	0x200,
223 	DRING_INFO_RCVD	=	0x400,
224 	DRING_ACK_SENT	=	0x800,
225 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
226 
227 	RDX_INFO_SENT	=	0x1000,
228 	RDX_ACK_RCVD	=	0x2000,
229 	RDX_INFO_RCVD	=	0x4000,
230 	RDX_ACK_SENT	=	0x8000,
231 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
232 
233 };
234 
235 #define	LDC_LOCK(ldcp)	\
236 				mutex_enter(&((ldcp)->cblock));\
237 				mutex_enter(&((ldcp)->rxlock));\
238 				mutex_enter(&((ldcp)->wrlock));\
239 				mutex_enter(&((ldcp)->txlock));\
240 				mutex_enter(&((ldcp)->tclock));
241 #define	LDC_UNLOCK(ldcp)	\
242 				mutex_exit(&((ldcp)->tclock));\
243 				mutex_exit(&((ldcp)->txlock));\
244 				mutex_exit(&((ldcp)->wrlock));\
245 				mutex_exit(&((ldcp)->rxlock));\
246 				mutex_exit(&((ldcp)->cblock));
247 
248 static struct ether_addr etherbroadcastaddr = {
249 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
250 };
251 /*
252  * MIB II broadcast/multicast packets
253  */
254 #define	IS_BROADCAST(ehp) \
255 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
256 #define	IS_MULTICAST(ehp) \
257 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
258 
259 /*
260  * Property names
261  */
262 static char macaddr_propname[] = "mac-address";
263 static char rmacaddr_propname[] = "remote-mac-address";
264 static char channel_propname[] = "channel-endpoint";
265 static char reg_propname[] = "reg";
266 static char port_propname[] = "port";
267 static char swport_propname[] = "switch-port";
268 static char id_propname[] = "id";
269 
270 /* versions supported - in decreasing order */
271 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
272 
273 /* Tunables */
274 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
275 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
276 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
277 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
278 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
279 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
280 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
281 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
282 
283 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
284 
285 /*
286  * max # of packets accumulated prior to sending them up. It is best
287  * to keep this at 60% of the number of recieve buffers.
288  */
289 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
290 
291 /*
292  * Tunables for each receive buffer size and number of buffers for
293  * each buffer size.
294  */
295 uint32_t vgen_rbufsz1 = VGEN_DBLK_SZ_128;
296 uint32_t vgen_rbufsz2 = VGEN_DBLK_SZ_256;
297 uint32_t vgen_rbufsz3 = VGEN_DBLK_SZ_2048;
298 
299 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
300 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
301 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
302 
303 #ifdef DEBUG
304 /* flags to simulate error conditions for debugging */
305 int vgen_trigger_txtimeout = 0;
306 int vgen_trigger_rxlost = 0;
307 #endif
308 
309 /* MD update matching structure */
310 static md_prop_match_t	vport_prop_match[] = {
311 	{ MDET_PROP_VAL,	"id" },
312 	{ MDET_LIST_END,	NULL }
313 };
314 
315 static mdeg_node_match_t vport_match = { "virtual-device-port",
316 					vport_prop_match };
317 
318 /* template for matching a particular vnet instance */
319 static mdeg_prop_spec_t vgen_prop_template[] = {
320 	{ MDET_PROP_STR,	"name",		"network" },
321 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
322 	{ MDET_LIST_END,	NULL,		NULL }
323 };
324 
325 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
326 
327 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
328 
329 static mac_callbacks_t vgen_m_callbacks = {
330 	0,
331 	vgen_stat,
332 	vgen_start,
333 	vgen_stop,
334 	vgen_promisc,
335 	vgen_multicst,
336 	vgen_unicst,
337 	vgen_tx,
338 	NULL,
339 	NULL,
340 	NULL
341 };
342 
343 /* externs */
344 extern pri_t	maxclsyspri;
345 extern proc_t	p0;
346 extern uint32_t vnet_ntxds;
347 extern uint32_t vnet_ldcwd_interval;
348 extern uint32_t vnet_ldcwd_txtimeout;
349 extern uint32_t vnet_ldc_mtu;
350 extern uint32_t vnet_nrbufs;
351 
352 
353 #ifdef DEBUG
354 
355 extern int vnet_dbglevel;
356 static void debug_printf(const char *fname, vgen_t *vgenp,
357 	vgen_ldc_t *ldcp, const char *fmt, ...);
358 
359 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
360 int vgendbg_ldcid = -1;
361 
362 /* simulate handshake error conditions for debug */
363 uint32_t vgen_hdbg;
364 #define	HDBG_VERSION	0x1
365 #define	HDBG_TIMEOUT	0x2
366 #define	HDBG_BAD_SID	0x4
367 #define	HDBG_OUT_STATE	0x8
368 
369 #endif
370 
371 
372 
373 /*
374  * vgen_init() is called by an instance of vnet driver to initialize the
375  * corresponding generic proxy transport layer. The arguments passed by vnet
376  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
377  * the mac address of the vnet device, and a pointer to mac_register_t of
378  * the generic transport is returned in the last argument.
379  */
380 int
381 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
382     mac_register_t **vgenmacp)
383 {
384 	vgen_t *vgenp;
385 	mac_register_t *macp;
386 	int instance;
387 
388 	if ((vnetp == NULL) || (vnetdip == NULL))
389 		return (DDI_FAILURE);
390 
391 	instance = ddi_get_instance(vnetdip);
392 
393 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
394 
395 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
396 
397 	vgenp->vnetp = vnetp;
398 	vgenp->vnetdip = vnetdip;
399 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
400 
401 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
402 		KMEM_FREE(vgenp);
403 		return (DDI_FAILURE);
404 	}
405 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
406 	macp->m_driver = vgenp;
407 	macp->m_dip = vnetdip;
408 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
409 	macp->m_callbacks = &vgen_m_callbacks;
410 	macp->m_min_sdu = 0;
411 	macp->m_max_sdu = ETHERMTU;
412 	vgenp->macp = macp;
413 
414 	/* allocate multicast table */
415 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
416 	    sizeof (struct ether_addr), KM_SLEEP);
417 	vgenp->mccount = 0;
418 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
419 
420 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
421 
422 	/* register with MD event generator */
423 	if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
424 		mutex_destroy(&vgenp->lock);
425 		kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
426 		    sizeof (struct ether_addr));
427 		mac_free(vgenp->macp);
428 		KMEM_FREE(vgenp);
429 		return (DDI_FAILURE);
430 	}
431 
432 	/* register macp of this vgen_t with vnet */
433 	*vgenmacp = vgenp->macp;
434 
435 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
436 	return (DDI_SUCCESS);
437 }
438 
439 /*
440  * Called by vnet to undo the initializations done by vgen_init().
441  * The handle provided by generic transport during vgen_init() is the argument.
442  */
443 int
444 vgen_uninit(void *arg)
445 {
446 	vgen_t	*vgenp = (vgen_t *)arg;
447 	vio_mblk_pool_t *rp, *nrp;
448 
449 	if (vgenp == NULL) {
450 		return (DDI_FAILURE);
451 	}
452 
453 	DBG1(vgenp, NULL, "enter\n");
454 
455 	/* unregister with MD event generator */
456 	vgen_mdeg_unreg(vgenp);
457 
458 	mutex_enter(&vgenp->lock);
459 
460 	/* detach all ports from the device */
461 	vgen_detach_ports(vgenp);
462 
463 	/*
464 	 * free any pending rx mblk pools,
465 	 * that couldn't be freed previously during channel detach.
466 	 */
467 	rp = vgenp->rmp;
468 	while (rp != NULL) {
469 		nrp = vgenp->rmp = rp->nextp;
470 		if (vio_destroy_mblks(rp)) {
471 			vgenp->rmp = rp;
472 			mutex_exit(&vgenp->lock);
473 			return (DDI_FAILURE);
474 		}
475 		rp = nrp;
476 	}
477 
478 	/* free multicast table */
479 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
480 
481 	mac_free(vgenp->macp);
482 
483 	mutex_exit(&vgenp->lock);
484 
485 	mutex_destroy(&vgenp->lock);
486 
487 	KMEM_FREE(vgenp);
488 
489 	DBG1(vgenp, NULL, "exit\n");
490 
491 	return (DDI_SUCCESS);
492 }
493 
494 /* enable transmit/receive for the device */
495 int
496 vgen_start(void *arg)
497 {
498 	vgen_t		*vgenp = (vgen_t *)arg;
499 
500 	DBG1(vgenp, NULL, "enter\n");
501 
502 	mutex_enter(&vgenp->lock);
503 	vgen_init_ports(vgenp);
504 	vgenp->flags |= VGEN_STARTED;
505 	mutex_exit(&vgenp->lock);
506 
507 	DBG1(vgenp, NULL, "exit\n");
508 	return (DDI_SUCCESS);
509 }
510 
511 /* stop transmit/receive */
512 void
513 vgen_stop(void *arg)
514 {
515 	vgen_t		*vgenp = (vgen_t *)arg;
516 
517 	DBG1(vgenp, NULL, "enter\n");
518 
519 	mutex_enter(&vgenp->lock);
520 	vgen_uninit_ports(vgenp);
521 	vgenp->flags &= ~(VGEN_STARTED);
522 	mutex_exit(&vgenp->lock);
523 
524 	DBG1(vgenp, NULL, "exit\n");
525 }
526 
527 /* vgen transmit function */
528 static mblk_t *
529 vgen_tx(void *arg, mblk_t *mp)
530 {
531 	int i;
532 	vgen_port_t *portp;
533 	int status = VGEN_FAILURE;
534 
535 	portp = (vgen_port_t *)arg;
536 	/*
537 	 * Retry so that we avoid reporting a failure
538 	 * to the upper layer. Returning a failure may cause the
539 	 * upper layer to go into single threaded mode there by
540 	 * causing performance degradation, especially for a large
541 	 * number of connections.
542 	 */
543 	for (i = 0; i < vgen_tx_retries; ) {
544 		status = vgen_portsend(portp, mp);
545 		if (status == VGEN_SUCCESS) {
546 			break;
547 		}
548 		if (++i < vgen_tx_retries)
549 			delay(drv_usectohz(vgen_tx_delay));
550 	}
551 	if (status != VGEN_SUCCESS) {
552 		/* failure */
553 		return (mp);
554 	}
555 	/* success */
556 	return (NULL);
557 }
558 
559 /* transmit packets over the given port */
560 static int
561 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
562 {
563 	vgen_ldclist_t	*ldclp;
564 	vgen_ldc_t *ldcp;
565 	int status;
566 	int rv = VGEN_SUCCESS;
567 
568 	ldclp = &portp->ldclist;
569 	READ_ENTER(&ldclp->rwlock);
570 	/*
571 	 * NOTE: for now, we will assume we have a single channel.
572 	 */
573 	if (ldclp->headp == NULL) {
574 		RW_EXIT(&ldclp->rwlock);
575 		return (VGEN_FAILURE);
576 	}
577 	ldcp = ldclp->headp;
578 
579 	status  = vgen_ldcsend(ldcp, mp);
580 
581 	RW_EXIT(&ldclp->rwlock);
582 
583 	if (status != VGEN_TX_SUCCESS) {
584 		rv = VGEN_FAILURE;
585 	}
586 	return (rv);
587 }
588 
589 /* channel transmit function */
590 static int
591 vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
592 {
593 	vgen_private_desc_t	*tbufp;
594 	vgen_private_desc_t	*rtbufp;
595 	vnet_public_desc_t	*rtxdp;
596 	vgen_private_desc_t	*ntbufp;
597 	vnet_public_desc_t	*txdp;
598 	vio_dring_entry_hdr_t	*hdrp;
599 	vgen_stats_t		*statsp;
600 	struct ether_header	*ehp;
601 	boolean_t	is_bcast = B_FALSE;
602 	boolean_t	is_mcast = B_FALSE;
603 	size_t		mblksz;
604 	caddr_t		dst;
605 	mblk_t		*bp;
606 	size_t		size;
607 	int		rv = 0;
608 	ldc_status_t	istatus;
609 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
610 
611 	statsp = &ldcp->stats;
612 	size = msgsize(mp);
613 
614 	DBG1(vgenp, ldcp, "enter\n");
615 
616 	if (ldcp->ldc_status != LDC_UP) {
617 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
618 		    ldcp->ldc_status);
619 		/* retry ldc_up() if needed */
620 		if (ldcp->flags & CHANNEL_STARTED)
621 			(void) ldc_up(ldcp->ldc_handle);
622 		goto vgen_tx_exit;
623 	}
624 
625 	/* drop the packet if ldc is not up or handshake is not done */
626 	if (ldcp->hphase != VH_DONE) {
627 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
628 		    ldcp->hphase);
629 		goto vgen_tx_exit;
630 	}
631 
632 	if (size > (size_t)ETHERMAX) {
633 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
634 		goto vgen_tx_exit;
635 	}
636 	if (size < ETHERMIN)
637 		size = ETHERMIN;
638 
639 	ehp = (struct ether_header *)mp->b_rptr;
640 	is_bcast = IS_BROADCAST(ehp);
641 	is_mcast = IS_MULTICAST(ehp);
642 
643 	mutex_enter(&ldcp->txlock);
644 	/*
645 	 * allocate a descriptor
646 	 */
647 	tbufp = ldcp->next_tbufp;
648 	ntbufp = NEXTTBUF(ldcp, tbufp);
649 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
650 
651 		mutex_enter(&ldcp->tclock);
652 		/* Try reclaiming now */
653 		vgen_reclaim_dring(ldcp);
654 		ldcp->reclaim_lbolt = ddi_get_lbolt();
655 
656 		if (ntbufp == ldcp->cur_tbufp) {
657 			/* Now we are really out of tbuf/txds */
658 			ldcp->need_resched = B_TRUE;
659 			mutex_exit(&ldcp->tclock);
660 
661 			statsp->tx_no_desc++;
662 			mutex_exit(&ldcp->txlock);
663 
664 			return (VGEN_TX_NORESOURCES);
665 		}
666 		mutex_exit(&ldcp->tclock);
667 	}
668 	/* update next available tbuf in the ring and update tx index */
669 	ldcp->next_tbufp = ntbufp;
670 	INCR_TXI(ldcp->next_txi, ldcp);
671 
672 	/* Mark the buffer busy before releasing the lock */
673 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
674 	mutex_exit(&ldcp->txlock);
675 
676 	/* copy data into pre-allocated transmit buffer */
677 	dst = tbufp->datap + VNET_IPALIGN;
678 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
679 		mblksz = MBLKL(bp);
680 		bcopy(bp->b_rptr, dst, mblksz);
681 		dst += mblksz;
682 	}
683 
684 	tbufp->datalen = size;
685 
686 	/* initialize the corresponding public descriptor (txd) */
687 	txdp = tbufp->descp;
688 	hdrp = &txdp->hdr;
689 	txdp->nbytes = size;
690 	txdp->ncookies = tbufp->ncookies;
691 	bcopy((tbufp->memcookie), (txdp->memcookie),
692 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
693 
694 	mutex_enter(&ldcp->wrlock);
695 	/*
696 	 * If the flags not set to BUSY, it implies that the clobber
697 	 * was done while we were copying the data. In such case,
698 	 * discard the packet and return.
699 	 */
700 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
701 		statsp->oerrors++;
702 		mutex_exit(&ldcp->wrlock);
703 		goto vgen_tx_exit;
704 	}
705 	hdrp->dstate = VIO_DESC_READY;
706 
707 	/* update stats */
708 	statsp->opackets++;
709 	statsp->obytes += size;
710 	if (is_bcast)
711 		statsp->brdcstxmt++;
712 	else if (is_mcast)
713 		statsp->multixmt++;
714 
715 	/* send dring datamsg to the peer */
716 	if (ldcp->resched_peer) {
717 
718 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
719 		rtxdp = rtbufp->descp;
720 
721 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
722 
723 			rv = vgen_send_dring_data(ldcp,
724 			    (uint32_t)ldcp->resched_peer_txi, -1);
725 			if (rv != 0) {
726 				/* error: drop the packet */
727 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
728 				    "failed: rv(%d) len(%d)\n",
729 				    ldcp->ldc_id, rv, size);
730 				statsp->oerrors++;
731 			} else {
732 				ldcp->resched_peer = B_FALSE;
733 			}
734 
735 		}
736 
737 	}
738 
739 	mutex_exit(&ldcp->wrlock);
740 
741 vgen_tx_exit:
742 	if (rv == ECONNRESET) {
743 		/*
744 		 * Check if either callback thread or another tx thread is
745 		 * already running. Calling mutex_enter() will result in a
746 		 * deadlock if the other thread already holds cblock and is
747 		 * blocked in vnet_modify_fdb() (which is called from
748 		 * vgen_handle_evt_reset()) waiting for write access on rwlock,
749 		 * as this transmit thread already holds that lock as a reader
750 		 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
751 		 */
752 		if (mutex_tryenter(&ldcp->cblock)) {
753 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
754 				DWARN(vgenp, ldcp, "ldc_status() error\n");
755 			} else {
756 				ldcp->ldc_status = istatus;
757 			}
758 			if (ldcp->ldc_status != LDC_UP) {
759 				/*
760 				 * Second arg is TRUE, as we know that
761 				 * the caller of this function - vnet_m_tx(),
762 				 * already holds fdb-rwlock as a reader.
763 				 */
764 				vgen_handle_evt_reset(ldcp, B_TRUE);
765 			}
766 			mutex_exit(&ldcp->cblock);
767 		}
768 	}
769 	freemsg(mp);
770 	DBG1(vgenp, ldcp, "exit\n");
771 	return (VGEN_TX_SUCCESS);
772 }
773 
774 /* enable/disable a multicast address */
775 int
776 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
777 {
778 	vgen_t			*vgenp;
779 	vnet_mcast_msg_t	mcastmsg;
780 	vio_msg_tag_t		*tagp;
781 	vgen_port_t		*portp;
782 	vgen_portlist_t		*plistp;
783 	vgen_ldc_t		*ldcp;
784 	vgen_ldclist_t		*ldclp;
785 	struct ether_addr	*addrp;
786 	int			rv = DDI_FAILURE;
787 	uint32_t		i;
788 
789 	vgenp = (vgen_t *)arg;
790 	addrp = (struct ether_addr *)mca;
791 	tagp = &mcastmsg.tag;
792 	bzero(&mcastmsg, sizeof (mcastmsg));
793 
794 	mutex_enter(&vgenp->lock);
795 
796 	plistp = &(vgenp->vgenports);
797 
798 	READ_ENTER(&plistp->rwlock);
799 
800 	portp = vgenp->vsw_portp;
801 	if (portp == NULL) {
802 		RW_EXIT(&plistp->rwlock);
803 		mutex_exit(&vgenp->lock);
804 		return (rv);
805 	}
806 	ldclp = &portp->ldclist;
807 
808 	READ_ENTER(&ldclp->rwlock);
809 
810 	ldcp = ldclp->headp;
811 	if (ldcp == NULL)
812 		goto vgen_mcast_exit;
813 
814 	mutex_enter(&ldcp->cblock);
815 
816 	if (ldcp->hphase == VH_DONE) {
817 		/*
818 		 * If handshake is done, send a msg to vsw to add/remove
819 		 * the multicast address. Otherwise, we just update this
820 		 * mcast address in our table and the table will be sync'd
821 		 * with vsw when handshake completes.
822 		 */
823 		tagp->vio_msgtype = VIO_TYPE_CTRL;
824 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
825 		tagp->vio_subtype_env = VNET_MCAST_INFO;
826 		tagp->vio_sid = ldcp->local_sid;
827 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
828 		mcastmsg.set = add;
829 		mcastmsg.count = 1;
830 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
831 		    B_FALSE) != VGEN_SUCCESS) {
832 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
833 			mutex_exit(&ldcp->cblock);
834 			goto vgen_mcast_exit;
835 		}
836 	}
837 
838 	mutex_exit(&ldcp->cblock);
839 
840 	if (add) {
841 
842 		/* expand multicast table if necessary */
843 		if (vgenp->mccount >= vgenp->mcsize) {
844 			struct ether_addr	*newtab;
845 			uint32_t		newsize;
846 
847 
848 			newsize = vgenp->mcsize * 2;
849 
850 			newtab = kmem_zalloc(newsize *
851 			    sizeof (struct ether_addr), KM_NOSLEEP);
852 			if (newtab == NULL)
853 				goto vgen_mcast_exit;
854 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
855 			    sizeof (struct ether_addr));
856 			kmem_free(vgenp->mctab,
857 			    vgenp->mcsize * sizeof (struct ether_addr));
858 
859 			vgenp->mctab = newtab;
860 			vgenp->mcsize = newsize;
861 		}
862 
863 		/* add address to the table */
864 		vgenp->mctab[vgenp->mccount++] = *addrp;
865 
866 	} else {
867 
868 		/* delete address from the table */
869 		for (i = 0; i < vgenp->mccount; i++) {
870 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
871 
872 				/*
873 				 * If there's more than one address in this
874 				 * table, delete the unwanted one by moving
875 				 * the last one in the list over top of it;
876 				 * otherwise, just remove it.
877 				 */
878 				if (vgenp->mccount > 1) {
879 					vgenp->mctab[i] =
880 					    vgenp->mctab[vgenp->mccount-1];
881 				}
882 				vgenp->mccount--;
883 				break;
884 			}
885 		}
886 	}
887 
888 	rv = DDI_SUCCESS;
889 
890 vgen_mcast_exit:
891 	RW_EXIT(&ldclp->rwlock);
892 	RW_EXIT(&plistp->rwlock);
893 
894 	mutex_exit(&vgenp->lock);
895 	return (rv);
896 }
897 
898 /* set or clear promiscuous mode on the device */
899 static int
900 vgen_promisc(void *arg, boolean_t on)
901 {
902 	_NOTE(ARGUNUSED(arg, on))
903 	return (DDI_SUCCESS);
904 }
905 
906 /* set the unicast mac address of the device */
907 static int
908 vgen_unicst(void *arg, const uint8_t *mca)
909 {
910 	_NOTE(ARGUNUSED(arg, mca))
911 	return (DDI_SUCCESS);
912 }
913 
914 /* get device statistics */
915 int
916 vgen_stat(void *arg, uint_t stat, uint64_t *val)
917 {
918 	vgen_t		*vgenp = (vgen_t *)arg;
919 	vgen_port_t	*portp;
920 	vgen_portlist_t	*plistp;
921 
922 	*val = 0;
923 
924 	plistp = &(vgenp->vgenports);
925 	READ_ENTER(&plistp->rwlock);
926 
927 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
928 		*val += vgen_port_stat(portp, stat);
929 	}
930 
931 	RW_EXIT(&plistp->rwlock);
932 
933 	return (0);
934 }
935 
936 static void
937 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
938 {
939 	 _NOTE(ARGUNUSED(arg, wq, mp))
940 }
941 
942 /* vgen internal functions */
943 /* detach all ports from the device */
944 static void
945 vgen_detach_ports(vgen_t *vgenp)
946 {
947 	vgen_port_t	*portp;
948 	vgen_portlist_t	*plistp;
949 
950 	plistp = &(vgenp->vgenports);
951 	WRITE_ENTER(&plistp->rwlock);
952 
953 	while ((portp = plistp->headp) != NULL) {
954 		vgen_port_detach(portp);
955 	}
956 
957 	RW_EXIT(&plistp->rwlock);
958 }
959 
960 /*
961  * detach the given port.
962  */
963 static void
964 vgen_port_detach(vgen_port_t *portp)
965 {
966 	vgen_t		*vgenp;
967 	vgen_ldclist_t	*ldclp;
968 	int		port_num;
969 
970 	vgenp = portp->vgenp;
971 	port_num = portp->port_num;
972 
973 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
974 
975 	/* remove it from port list */
976 	vgen_port_list_remove(portp);
977 
978 	/* detach channels from this port */
979 	ldclp = &portp->ldclist;
980 	WRITE_ENTER(&ldclp->rwlock);
981 	while (ldclp->headp) {
982 		vgen_ldc_detach(ldclp->headp);
983 	}
984 	RW_EXIT(&ldclp->rwlock);
985 
986 	if (vgenp->vsw_portp == portp) {
987 		vgenp->vsw_portp = NULL;
988 	}
989 	KMEM_FREE(portp);
990 
991 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
992 }
993 
994 /* add a port to port list */
995 static void
996 vgen_port_list_insert(vgen_port_t *portp)
997 {
998 	vgen_portlist_t *plistp;
999 	vgen_t *vgenp;
1000 
1001 	vgenp = portp->vgenp;
1002 	plistp = &(vgenp->vgenports);
1003 
1004 	if (plistp->headp == NULL) {
1005 		plistp->headp = portp;
1006 	} else {
1007 		plistp->tailp->nextp = portp;
1008 	}
1009 	plistp->tailp = portp;
1010 	portp->nextp = NULL;
1011 }
1012 
1013 /* remove a port from port list */
1014 static void
1015 vgen_port_list_remove(vgen_port_t *portp)
1016 {
1017 	vgen_port_t *prevp;
1018 	vgen_port_t *nextp;
1019 	vgen_portlist_t *plistp;
1020 	vgen_t *vgenp;
1021 
1022 	vgenp = portp->vgenp;
1023 
1024 	plistp = &(vgenp->vgenports);
1025 
1026 	if (plistp->headp == NULL)
1027 		return;
1028 
1029 	if (portp == plistp->headp) {
1030 		plistp->headp = portp->nextp;
1031 		if (portp == plistp->tailp)
1032 			plistp->tailp = plistp->headp;
1033 	} else {
1034 		for (prevp = plistp->headp;
1035 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1036 		    prevp = nextp)
1037 			;
1038 		if (nextp == portp) {
1039 			prevp->nextp = portp->nextp;
1040 		}
1041 		if (portp == plistp->tailp)
1042 			plistp->tailp = prevp;
1043 	}
1044 }
1045 
1046 /* lookup a port in the list based on port_num */
1047 static vgen_port_t *
1048 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1049 {
1050 	vgen_port_t *portp = NULL;
1051 
1052 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1053 		if (portp->port_num == port_num) {
1054 			break;
1055 		}
1056 	}
1057 
1058 	return (portp);
1059 }
1060 
1061 /* enable ports for transmit/receive */
1062 static void
1063 vgen_init_ports(vgen_t *vgenp)
1064 {
1065 	vgen_port_t	*portp;
1066 	vgen_portlist_t	*plistp;
1067 
1068 	plistp = &(vgenp->vgenports);
1069 	READ_ENTER(&plistp->rwlock);
1070 
1071 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1072 		vgen_port_init(portp);
1073 	}
1074 
1075 	RW_EXIT(&plistp->rwlock);
1076 }
1077 
1078 static void
1079 vgen_port_init(vgen_port_t *portp)
1080 {
1081 	vgen_t *vgenp;
1082 
1083 	vgenp = portp->vgenp;
1084 	/*
1085 	 * Create fdb entry in vnet, corresponding to the mac
1086 	 * address of this port. Note that the port specified
1087 	 * is vsw-port. This is done so that vsw-port acts
1088 	 * as the route to reach this macaddr, until the
1089 	 * channel for this port comes up (LDC_UP) and
1090 	 * handshake is done successfully.
1091 	 * eg, if the peer is OBP-vnet, it may not bring the
1092 	 * channel up for this port and may communicate via
1093 	 * vsw to reach this port.
1094 	 * Later, when Solaris-vnet comes up at the other end
1095 	 * of the channel for this port and brings up the channel,
1096 	 * it is an indication that peer vnet is capable of
1097 	 * distributed switching, so the direct route through this
1098 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1099 	 */
1100 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1101 	    vgen_tx, vgenp->vsw_portp);
1102 
1103 	if (portp == vgenp->vsw_portp) {
1104 		/*
1105 		 * create the default route entry in vnet's fdb.
1106 		 * This is the entry used by vnet to reach
1107 		 * unknown destinations, which basically goes
1108 		 * through vsw on domain0 and out through the
1109 		 * physical device bound to vsw.
1110 		 */
1111 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1112 	}
1113 
1114 	/* Bring up the channels of this port */
1115 	vgen_init_ldcs(portp);
1116 }
1117 
1118 /* disable transmit/receive on ports */
1119 static void
1120 vgen_uninit_ports(vgen_t *vgenp)
1121 {
1122 	vgen_port_t	*portp;
1123 	vgen_portlist_t	*plistp;
1124 
1125 	plistp = &(vgenp->vgenports);
1126 	READ_ENTER(&plistp->rwlock);
1127 
1128 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1129 		vgen_port_uninit(portp);
1130 	}
1131 
1132 	RW_EXIT(&plistp->rwlock);
1133 }
1134 
1135 static void
1136 vgen_port_uninit(vgen_port_t *portp)
1137 {
1138 	vgen_t *vgenp;
1139 
1140 	vgenp = portp->vgenp;
1141 
1142 	vgen_uninit_ldcs(portp);
1143 	/* delete the entry in vnet's fdb for this port */
1144 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1145 	if (portp == vgenp->vsw_portp) {
1146 		/*
1147 		 * if this is vsw-port, then delete the default
1148 		 * route entry in vnet's fdb.
1149 		 */
1150 		vnet_del_def_rte(vgenp->vnetp);
1151 	}
1152 }
1153 
1154 /* register with MD event generator */
1155 static int
1156 vgen_mdeg_reg(vgen_t *vgenp)
1157 {
1158 	mdeg_prop_spec_t	*pspecp;
1159 	mdeg_node_spec_t	*parentp;
1160 	uint_t			templatesz;
1161 	int			rv;
1162 	mdeg_handle_t		hdl;
1163 	int			i;
1164 
1165 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1166 	    DDI_PROP_DONTPASS, reg_propname, -1);
1167 	if (i == -1) {
1168 		return (DDI_FAILURE);
1169 	}
1170 	templatesz = sizeof (vgen_prop_template);
1171 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1172 	if (pspecp == NULL) {
1173 		return (DDI_FAILURE);
1174 	}
1175 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1176 	if (parentp == NULL) {
1177 		kmem_free(pspecp, templatesz);
1178 		return (DDI_FAILURE);
1179 	}
1180 
1181 	bcopy(vgen_prop_template, pspecp, templatesz);
1182 
1183 	/*
1184 	 * NOTE: The instance here refers to the value of "reg" property and
1185 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1186 	 */
1187 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1188 
1189 	parentp->namep = "virtual-device";
1190 	parentp->specp = pspecp;
1191 
1192 	/* save parentp in vgen_t */
1193 	vgenp->mdeg_parentp = parentp;
1194 
1195 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1196 	if (rv != MDEG_SUCCESS) {
1197 		DERR(vgenp, NULL, "mdeg_register failed\n");
1198 		KMEM_FREE(parentp);
1199 		kmem_free(pspecp, templatesz);
1200 		vgenp->mdeg_parentp = NULL;
1201 		return (DDI_FAILURE);
1202 	}
1203 
1204 	/* save mdeg handle in vgen_t */
1205 	vgenp->mdeg_hdl = hdl;
1206 
1207 	return (DDI_SUCCESS);
1208 }
1209 
1210 /* unregister with MD event generator */
1211 static void
1212 vgen_mdeg_unreg(vgen_t *vgenp)
1213 {
1214 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1215 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1216 	KMEM_FREE(vgenp->mdeg_parentp);
1217 	vgenp->mdeg_parentp = NULL;
1218 	vgenp->mdeg_hdl = NULL;
1219 }
1220 
1221 /* callback function registered with MD event generator */
1222 static int
1223 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1224 {
1225 	int idx;
1226 	int vsw_idx = -1;
1227 	uint64_t val;
1228 	vgen_t *vgenp;
1229 
1230 	if ((resp == NULL) || (cb_argp == NULL)) {
1231 		return (MDEG_FAILURE);
1232 	}
1233 
1234 	vgenp = (vgen_t *)cb_argp;
1235 	DBG1(vgenp, NULL, "enter\n");
1236 
1237 	mutex_enter(&vgenp->lock);
1238 
1239 	DBG1(vgenp, NULL, "ports: removed(%x), "
1240 	"added(%x), updated(%x)\n", resp->removed.nelem,
1241 	    resp->added.nelem, resp->match_curr.nelem);
1242 
1243 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1244 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1245 		    resp->removed.mdep[idx]);
1246 	}
1247 
1248 	if (vgenp->vsw_portp == NULL) {
1249 		/*
1250 		 * find vsw_port and add it first, because other ports need
1251 		 * this when adding fdb entry (see vgen_port_init()).
1252 		 */
1253 		for (idx = 0; idx < resp->added.nelem; idx++) {
1254 			if (!(md_get_prop_val(resp->added.mdp,
1255 			    resp->added.mdep[idx], swport_propname, &val))) {
1256 				if (val == 0) {
1257 					/*
1258 					 * This port is connected to the
1259 					 * vsw on dom0.
1260 					 */
1261 					vsw_idx = idx;
1262 					if (vgen_add_port(vgenp,
1263 					    resp->added.mdp,
1264 					    resp->added.mdep[idx]) !=
1265 					    DDI_SUCCESS) {
1266 						cmn_err(CE_NOTE, "vnet%d Could "
1267 						    "not initialize virtual "
1268 						    "switch port.",
1269 						    ddi_get_instance(vgenp->
1270 						    vnetdip));
1271 						mutex_exit(&vgenp->lock);
1272 						return (MDEG_FAILURE);
1273 					}
1274 					break;
1275 				}
1276 			}
1277 		}
1278 		if (vsw_idx == -1) {
1279 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1280 			mutex_exit(&vgenp->lock);
1281 			return (MDEG_FAILURE);
1282 		}
1283 	}
1284 
1285 	for (idx = 0; idx < resp->added.nelem; idx++) {
1286 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1287 			continue;
1288 
1289 		/* If this port can't be added just skip it. */
1290 		(void) vgen_add_port(vgenp, resp->added.mdp,
1291 		    resp->added.mdep[idx]);
1292 	}
1293 
1294 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1295 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1296 		    resp->match_curr.mdep[idx],
1297 		    resp->match_prev.mdp,
1298 		    resp->match_prev.mdep[idx]);
1299 	}
1300 
1301 	mutex_exit(&vgenp->lock);
1302 	DBG1(vgenp, NULL, "exit\n");
1303 	return (MDEG_SUCCESS);
1304 }
1305 
1306 /* add a new port to the device */
1307 static int
1308 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1309 {
1310 	uint64_t	port_num;
1311 	uint64_t	*ldc_ids;
1312 	uint64_t	macaddr;
1313 	uint64_t	val;
1314 	int		num_ldcs;
1315 	int		vsw_port = B_FALSE;
1316 	int		i;
1317 	int		addrsz;
1318 	int		num_nodes = 0;
1319 	int		listsz = 0;
1320 	int		rv = DDI_SUCCESS;
1321 	mde_cookie_t	*listp = NULL;
1322 	uint8_t		*addrp;
1323 	struct ether_addr	ea;
1324 
1325 	/* read "id" property to get the port number */
1326 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1327 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1328 		return (DDI_FAILURE);
1329 	}
1330 
1331 	/*
1332 	 * Find the channel endpoint node(s) under this port node.
1333 	 */
1334 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1335 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
1336 		    num_nodes);
1337 		return (DDI_FAILURE);
1338 	}
1339 
1340 	/* allocate space for node list */
1341 	listsz = num_nodes * sizeof (mde_cookie_t);
1342 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1343 	if (listp == NULL)
1344 		return (DDI_FAILURE);
1345 
1346 	num_ldcs = md_scan_dag(mdp, mdex,
1347 	    md_find_name(mdp, channel_propname),
1348 	    md_find_name(mdp, "fwd"), listp);
1349 
1350 	if (num_ldcs <= 0) {
1351 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
1352 		kmem_free(listp, listsz);
1353 		return (DDI_FAILURE);
1354 	}
1355 
1356 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
1357 
1358 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1359 	if (ldc_ids == NULL) {
1360 		kmem_free(listp, listsz);
1361 		return (DDI_FAILURE);
1362 	}
1363 
1364 	for (i = 0; i < num_ldcs; i++) {
1365 		/* read channel ids */
1366 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1367 			DWARN(vgenp, NULL, "prop(%s) not found\n",
1368 			    id_propname);
1369 			kmem_free(listp, listsz);
1370 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1371 			return (DDI_FAILURE);
1372 		}
1373 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
1374 	}
1375 
1376 	kmem_free(listp, listsz);
1377 
1378 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1379 	    &addrsz)) {
1380 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
1381 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1382 		return (DDI_FAILURE);
1383 	}
1384 
1385 	if (addrsz < ETHERADDRL) {
1386 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
1387 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1388 		return (DDI_FAILURE);
1389 	}
1390 
1391 	macaddr = *((uint64_t *)addrp);
1392 
1393 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
1394 
1395 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1396 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1397 		macaddr >>= 8;
1398 	}
1399 
1400 	if (vgenp->vsw_portp == NULL) {
1401 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1402 			if (val == 0) {
1403 				/* This port is connected to the vsw on dom0 */
1404 				vsw_port = B_TRUE;
1405 			}
1406 		}
1407 	}
1408 	if (vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1409 	    &ea, vsw_port) != DDI_SUCCESS) {
1410 		cmn_err(CE_NOTE, "vnet%d failed to attach port %d remote MAC "
1411 		    "address %s", ddi_get_instance(vgenp->vnetdip),
1412 		    (int)port_num, ether_sprintf(&ea));
1413 		rv = DDI_FAILURE;
1414 	}
1415 
1416 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1417 
1418 	return (rv);
1419 }
1420 
1421 /* remove a port from the device */
1422 static int
1423 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1424 {
1425 	uint64_t	port_num;
1426 	vgen_port_t	*portp;
1427 	vgen_portlist_t	*plistp;
1428 
1429 	/* read "id" property to get the port number */
1430 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1431 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1432 		return (DDI_FAILURE);
1433 	}
1434 
1435 	plistp = &(vgenp->vgenports);
1436 
1437 	WRITE_ENTER(&plistp->rwlock);
1438 	portp = vgen_port_lookup(plistp, (int)port_num);
1439 	if (portp == NULL) {
1440 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
1441 		RW_EXIT(&plistp->rwlock);
1442 		return (DDI_FAILURE);
1443 	}
1444 
1445 	vgen_port_detach_mdeg(portp);
1446 	RW_EXIT(&plistp->rwlock);
1447 
1448 	return (DDI_SUCCESS);
1449 }
1450 
1451 /* attach a port to the device based on mdeg data */
1452 static int
1453 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1454 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1455 {
1456 	vgen_port_t		*portp;
1457 	vgen_portlist_t		*plistp;
1458 	int			i;
1459 
1460 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1461 	if (portp == NULL) {
1462 		return (DDI_FAILURE);
1463 	}
1464 	portp->vgenp = vgenp;
1465 	portp->port_num = port_num;
1466 
1467 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
1468 
1469 	portp->ldclist.num_ldcs = 0;
1470 	portp->ldclist.headp = NULL;
1471 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1472 
1473 	ether_copy(macaddr, &portp->macaddr);
1474 	for (i = 0; i < num_ids; i++) {
1475 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
1476 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
1477 			rw_destroy(&portp->ldclist.rwlock);
1478 			vgen_port_detach(portp);
1479 			return (DDI_FAILURE);
1480 		}
1481 	}
1482 
1483 	/* link it into the list of ports */
1484 	plistp = &(vgenp->vgenports);
1485 	WRITE_ENTER(&plistp->rwlock);
1486 	vgen_port_list_insert(portp);
1487 	RW_EXIT(&plistp->rwlock);
1488 
1489 	/* This port is connected to the vsw on domain0 */
1490 	if (vsw_port)
1491 		vgenp->vsw_portp = portp;
1492 
1493 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1494 		vgen_port_init(portp);
1495 	}
1496 
1497 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1498 	return (DDI_SUCCESS);
1499 }
1500 
1501 /* detach a port from the device based on mdeg data */
1502 static void
1503 vgen_port_detach_mdeg(vgen_port_t *portp)
1504 {
1505 	vgen_t *vgenp = portp->vgenp;
1506 
1507 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
1508 	/* stop the port if needed */
1509 	if (vgenp->flags & VGEN_STARTED) {
1510 		vgen_port_uninit(portp);
1511 	}
1512 	vgen_port_detach(portp);
1513 
1514 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1515 }
1516 
1517 static int
1518 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1519 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1520 {
1521 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1522 
1523 	/* NOTE: TBD */
1524 	return (DDI_SUCCESS);
1525 }
1526 
1527 static uint64_t
1528 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1529 {
1530 	vgen_ldclist_t	*ldclp;
1531 	vgen_ldc_t *ldcp;
1532 	uint64_t	val;
1533 
1534 	val = 0;
1535 	ldclp = &portp->ldclist;
1536 
1537 	READ_ENTER(&ldclp->rwlock);
1538 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1539 		val += vgen_ldc_stat(ldcp, stat);
1540 	}
1541 	RW_EXIT(&ldclp->rwlock);
1542 
1543 	return (val);
1544 }
1545 
1546 /* attach the channel corresponding to the given ldc_id to the port */
1547 static int
1548 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1549 {
1550 	vgen_t 		*vgenp;
1551 	vgen_ldclist_t	*ldclp;
1552 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1553 	ldc_attr_t 	attr;
1554 	int 		status;
1555 	ldc_status_t	istatus;
1556 	char		kname[MAXNAMELEN];
1557 	int		instance;
1558 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
1559 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1560 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1561 		AST_create_rxmblks = 0x20, AST_add_softintr = 0x40,
1562 		AST_create_rcv_thread = 0x80} attach_state;
1563 
1564 	attach_state = AST_init;
1565 	vgenp = portp->vgenp;
1566 	ldclp = &portp->ldclist;
1567 
1568 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1569 	if (ldcp == NULL) {
1570 		goto ldc_attach_failed;
1571 	}
1572 	ldcp->ldc_id = ldc_id;
1573 	ldcp->portp = portp;
1574 
1575 	attach_state |= AST_ldc_alloc;
1576 
1577 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1578 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1579 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1580 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
1581 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
1582 
1583 	attach_state |= AST_mutex_init;
1584 
1585 	attr.devclass = LDC_DEV_NT;
1586 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1587 	attr.mode = LDC_MODE_UNRELIABLE;
1588 	attr.mtu = vnet_ldc_mtu;
1589 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1590 	if (status != 0) {
1591 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
1592 		goto ldc_attach_failed;
1593 	}
1594 	attach_state |= AST_ldc_init;
1595 
1596 	if (vgen_rcv_thread_enabled) {
1597 		ldcp->rcv_thr_flags = 0;
1598 		ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
1599 		ldcp->soft_pri = PIL_6;
1600 
1601 		status = ddi_intr_add_softint(vgenp->vnetdip,
1602 		    &ldcp->soft_handle, ldcp->soft_pri,
1603 		    vgen_ldc_rcv_softintr, (void *)ldcp);
1604 		if (status != DDI_SUCCESS) {
1605 			DWARN(vgenp, ldcp, "add_softint failed, rv (%d)\n",
1606 			    status);
1607 			goto ldc_attach_failed;
1608 		}
1609 
1610 		/*
1611 		 * Initialize the soft_lock with the same priority as
1612 		 * the soft interrupt to protect from the soft interrupt.
1613 		 */
1614 		mutex_init(&ldcp->soft_lock, NULL, MUTEX_DRIVER,
1615 		    DDI_INTR_PRI(ldcp->soft_pri));
1616 		attach_state |= AST_add_softintr;
1617 
1618 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
1619 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
1620 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
1621 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
1622 
1623 		attach_state |= AST_create_rcv_thread;
1624 		if (ldcp->rcv_thread == NULL) {
1625 			DWARN(vgenp, ldcp, "Failed to create worker thread");
1626 			goto ldc_attach_failed;
1627 		}
1628 	}
1629 
1630 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1631 	if (status != 0) {
1632 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
1633 		    status);
1634 		goto ldc_attach_failed;
1635 	}
1636 	attach_state |= AST_ldc_reg_cb;
1637 
1638 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1639 	ASSERT(istatus == LDC_INIT);
1640 	ldcp->ldc_status = istatus;
1641 
1642 	/* allocate transmit resources */
1643 	status = vgen_alloc_tx_ring(ldcp);
1644 	if (status != 0) {
1645 		goto ldc_attach_failed;
1646 	}
1647 	attach_state |= AST_alloc_tx_ring;
1648 
1649 	/* allocate receive resources */
1650 	status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
1651 	    vgen_rbufsz1, vgen_rbufsz2, vgen_rbufsz3,
1652 	    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
1653 	if (status != 0) {
1654 		goto ldc_attach_failed;
1655 	}
1656 	attach_state |= AST_create_rxmblks;
1657 
1658 	/* Setup kstats for the channel */
1659 	instance = ddi_get_instance(vgenp->vnetdip);
1660 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
1661 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
1662 	if (ldcp->ksp == NULL) {
1663 		goto ldc_attach_failed;
1664 	}
1665 
1666 	/* initialize vgen_versions supported */
1667 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
1668 
1669 	/* link it into the list of channels for this port */
1670 	WRITE_ENTER(&ldclp->rwlock);
1671 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
1672 	ldcp->nextp = *prev_ldcp;
1673 	*prev_ldcp = ldcp;
1674 	ldclp->num_ldcs++;
1675 	RW_EXIT(&ldclp->rwlock);
1676 
1677 	ldcp->flags |= CHANNEL_ATTACHED;
1678 	return (DDI_SUCCESS);
1679 
1680 ldc_attach_failed:
1681 	if (attach_state & AST_ldc_reg_cb) {
1682 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1683 	}
1684 	if (attach_state & AST_add_softintr) {
1685 		(void) ddi_intr_remove_softint(ldcp->soft_handle);
1686 		mutex_destroy(&ldcp->soft_lock);
1687 	}
1688 	if (attach_state & AST_create_rcv_thread) {
1689 		if (ldcp->rcv_thread != NULL) {
1690 			vgen_stop_rcv_thread(ldcp);
1691 		}
1692 		mutex_destroy(&ldcp->rcv_thr_lock);
1693 		cv_destroy(&ldcp->rcv_thr_cv);
1694 	}
1695 	if (attach_state & AST_create_rxmblks) {
1696 		vio_mblk_pool_t *fvmp = NULL;
1697 
1698 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
1699 		ASSERT(fvmp == NULL);
1700 	}
1701 	if (attach_state & AST_alloc_tx_ring) {
1702 		vgen_free_tx_ring(ldcp);
1703 	}
1704 	if (attach_state & AST_ldc_init) {
1705 		(void) ldc_fini(ldcp->ldc_handle);
1706 	}
1707 	if (attach_state & AST_mutex_init) {
1708 		mutex_destroy(&ldcp->tclock);
1709 		mutex_destroy(&ldcp->txlock);
1710 		mutex_destroy(&ldcp->cblock);
1711 		mutex_destroy(&ldcp->wrlock);
1712 		mutex_destroy(&ldcp->rxlock);
1713 	}
1714 	if (attach_state & AST_ldc_alloc) {
1715 		KMEM_FREE(ldcp);
1716 	}
1717 	return (DDI_FAILURE);
1718 }
1719 
1720 /* detach a channel from the port */
1721 static void
1722 vgen_ldc_detach(vgen_ldc_t *ldcp)
1723 {
1724 	vgen_port_t	*portp;
1725 	vgen_t 		*vgenp;
1726 	vgen_ldc_t 	*pldcp;
1727 	vgen_ldc_t	**prev_ldcp;
1728 	vgen_ldclist_t	*ldclp;
1729 
1730 	portp = ldcp->portp;
1731 	vgenp = portp->vgenp;
1732 	ldclp = &portp->ldclist;
1733 
1734 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
1735 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
1736 		if (pldcp == ldcp) {
1737 			break;
1738 		}
1739 	}
1740 
1741 	if (pldcp == NULL) {
1742 		/* invalid ldcp? */
1743 		return;
1744 	}
1745 
1746 	if (ldcp->ldc_status != LDC_INIT) {
1747 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
1748 	}
1749 
1750 	if (ldcp->flags & CHANNEL_ATTACHED) {
1751 		ldcp->flags &= ~(CHANNEL_ATTACHED);
1752 
1753 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1754 		if (ldcp->rcv_thread != NULL) {
1755 			/* First stop the receive thread */
1756 			vgen_stop_rcv_thread(ldcp);
1757 			(void) ddi_intr_remove_softint(ldcp->soft_handle);
1758 			mutex_destroy(&ldcp->soft_lock);
1759 			mutex_destroy(&ldcp->rcv_thr_lock);
1760 			cv_destroy(&ldcp->rcv_thr_cv);
1761 		}
1762 		/* Free any queued messages */
1763 		if (ldcp->rcv_mhead != NULL) {
1764 			freemsgchain(ldcp->rcv_mhead);
1765 			ldcp->rcv_mhead = NULL;
1766 		}
1767 
1768 		vgen_destroy_kstats(ldcp->ksp);
1769 		ldcp->ksp = NULL;
1770 
1771 		/*
1772 		 * if we cannot reclaim all mblks, put this
1773 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
1774 		 * device gets detached (see vgen_uninit()).
1775 		 */
1776 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
1777 
1778 		/* free transmit resources */
1779 		vgen_free_tx_ring(ldcp);
1780 
1781 		(void) ldc_fini(ldcp->ldc_handle);
1782 		mutex_destroy(&ldcp->tclock);
1783 		mutex_destroy(&ldcp->txlock);
1784 		mutex_destroy(&ldcp->cblock);
1785 		mutex_destroy(&ldcp->wrlock);
1786 		mutex_destroy(&ldcp->rxlock);
1787 
1788 		/* unlink it from the list */
1789 		*prev_ldcp = ldcp->nextp;
1790 		ldclp->num_ldcs--;
1791 		KMEM_FREE(ldcp);
1792 	}
1793 }
1794 
1795 /*
1796  * This function allocates transmit resources for the channel.
1797  * The resources consist of a transmit descriptor ring and an associated
1798  * transmit buffer ring.
1799  */
1800 static int
1801 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
1802 {
1803 	void *tbufp;
1804 	ldc_mem_info_t minfo;
1805 	uint32_t txdsize;
1806 	uint32_t tbufsize;
1807 	int status;
1808 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1809 
1810 	ldcp->num_txds = vnet_ntxds;
1811 	txdsize = sizeof (vnet_public_desc_t);
1812 	tbufsize = sizeof (vgen_private_desc_t);
1813 
1814 	/* allocate transmit buffer ring */
1815 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
1816 	if (tbufp == NULL) {
1817 		return (DDI_FAILURE);
1818 	}
1819 
1820 	/* create transmit descriptor ring */
1821 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
1822 	    &ldcp->tx_dhandle);
1823 	if (status) {
1824 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
1825 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1826 		return (DDI_FAILURE);
1827 	}
1828 
1829 	/* get the addr of descripror ring */
1830 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
1831 	if (status) {
1832 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
1833 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1834 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1835 		ldcp->tbufp = NULL;
1836 		return (DDI_FAILURE);
1837 	}
1838 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
1839 	ldcp->tbufp = tbufp;
1840 
1841 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
1842 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
1843 
1844 	return (DDI_SUCCESS);
1845 }
1846 
1847 /* Free transmit resources for the channel */
1848 static void
1849 vgen_free_tx_ring(vgen_ldc_t *ldcp)
1850 {
1851 	int tbufsize = sizeof (vgen_private_desc_t);
1852 
1853 	/* free transmit descriptor ring */
1854 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1855 
1856 	/* free transmit buffer ring */
1857 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
1858 	ldcp->txdp = ldcp->txdendp = NULL;
1859 	ldcp->tbufp = ldcp->tbufendp = NULL;
1860 }
1861 
1862 /* enable transmit/receive on the channels for the port */
1863 static void
1864 vgen_init_ldcs(vgen_port_t *portp)
1865 {
1866 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1867 	vgen_ldc_t	*ldcp;
1868 
1869 	READ_ENTER(&ldclp->rwlock);
1870 	ldcp =  ldclp->headp;
1871 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1872 		(void) vgen_ldc_init(ldcp);
1873 	}
1874 	RW_EXIT(&ldclp->rwlock);
1875 }
1876 
1877 /* stop transmit/receive on the channels for the port */
1878 static void
1879 vgen_uninit_ldcs(vgen_port_t *portp)
1880 {
1881 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1882 	vgen_ldc_t	*ldcp;
1883 
1884 	READ_ENTER(&ldclp->rwlock);
1885 	ldcp =  ldclp->headp;
1886 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1887 		vgen_ldc_uninit(ldcp);
1888 	}
1889 	RW_EXIT(&ldclp->rwlock);
1890 }
1891 
1892 /* enable transmit/receive on the channel */
1893 static int
1894 vgen_ldc_init(vgen_ldc_t *ldcp)
1895 {
1896 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1897 	ldc_status_t	istatus;
1898 	int		rv;
1899 	uint32_t	retries = 0;
1900 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
1901 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
1902 	init_state = ST_init;
1903 
1904 	DBG1(vgenp, ldcp, "enter\n");
1905 	LDC_LOCK(ldcp);
1906 
1907 	rv = ldc_open(ldcp->ldc_handle);
1908 	if (rv != 0) {
1909 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
1910 		goto ldcinit_failed;
1911 	}
1912 	init_state |= ST_ldc_open;
1913 
1914 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1915 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
1916 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
1917 		goto ldcinit_failed;
1918 	}
1919 	ldcp->ldc_status = istatus;
1920 
1921 	rv = vgen_init_tbufs(ldcp);
1922 	if (rv != 0) {
1923 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
1924 		goto ldcinit_failed;
1925 	}
1926 	init_state |= ST_init_tbufs;
1927 
1928 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
1929 	if (rv != 0) {
1930 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
1931 		goto ldcinit_failed;
1932 	}
1933 
1934 	init_state |= ST_cb_enable;
1935 
1936 	do {
1937 		rv = ldc_up(ldcp->ldc_handle);
1938 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
1939 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
1940 			drv_usecwait(VGEN_LDC_UP_DELAY);
1941 		}
1942 		if (retries++ >= vgen_ldcup_retries)
1943 			break;
1944 	} while (rv == EWOULDBLOCK);
1945 
1946 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1947 	if (istatus == LDC_UP) {
1948 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
1949 	}
1950 
1951 	ldcp->ldc_status = istatus;
1952 
1953 	/* initialize transmit watchdog timeout */
1954 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
1955 	    drv_usectohz(vnet_ldcwd_interval * 1000));
1956 
1957 	ldcp->hphase = -1;
1958 	ldcp->flags |= CHANNEL_STARTED;
1959 
1960 	/* if channel is already UP - start handshake */
1961 	if (istatus == LDC_UP) {
1962 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1963 		if (ldcp->portp != vgenp->vsw_portp) {
1964 			/*
1965 			 * modify fdb entry to use this port as the
1966 			 * channel is up, instead of going through the
1967 			 * vsw-port (see comments in vgen_port_init())
1968 			 */
1969 			vnet_modify_fdb(vgenp->vnetp,
1970 			    (uint8_t *)&ldcp->portp->macaddr,
1971 			    vgen_tx, ldcp->portp, B_FALSE);
1972 		}
1973 
1974 		/* Initialize local session id */
1975 		ldcp->local_sid = ddi_get_lbolt();
1976 
1977 		/* clear peer session id */
1978 		ldcp->peer_sid = 0;
1979 		ldcp->hretries = 0;
1980 
1981 		/* Initiate Handshake process with peer ldc endpoint */
1982 		vgen_reset_hphase(ldcp);
1983 
1984 		mutex_exit(&ldcp->tclock);
1985 		mutex_exit(&ldcp->txlock);
1986 		mutex_exit(&ldcp->wrlock);
1987 		vgen_handshake(vh_nextphase(ldcp));
1988 		mutex_exit(&ldcp->rxlock);
1989 		mutex_exit(&ldcp->cblock);
1990 	} else {
1991 		LDC_UNLOCK(ldcp);
1992 	}
1993 
1994 	return (DDI_SUCCESS);
1995 
1996 ldcinit_failed:
1997 	if (init_state & ST_cb_enable) {
1998 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1999 	}
2000 	if (init_state & ST_init_tbufs) {
2001 		vgen_uninit_tbufs(ldcp);
2002 	}
2003 	if (init_state & ST_ldc_open) {
2004 		(void) ldc_close(ldcp->ldc_handle);
2005 	}
2006 	LDC_UNLOCK(ldcp);
2007 	DBG1(vgenp, ldcp, "exit\n");
2008 	return (DDI_FAILURE);
2009 }
2010 
2011 /* stop transmit/receive on the channel */
2012 static void
2013 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2014 {
2015 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2016 	int	rv;
2017 
2018 	DBG1(vgenp, ldcp, "enter\n");
2019 	LDC_LOCK(ldcp);
2020 
2021 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2022 		LDC_UNLOCK(ldcp);
2023 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2024 		return;
2025 	}
2026 
2027 	/* disable further callbacks */
2028 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2029 	if (rv != 0) {
2030 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
2031 	}
2032 
2033 	/*
2034 	 * clear handshake done bit and wait for pending tx and cb to finish.
2035 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
2036 	 */
2037 	ldcp->hphase &= ~(VH_DONE);
2038 	LDC_UNLOCK(ldcp);
2039 
2040 	/* cancel handshake watchdog timeout */
2041 	if (ldcp->htid) {
2042 		(void) untimeout(ldcp->htid);
2043 		ldcp->htid = 0;
2044 	}
2045 
2046 	/* cancel transmit watchdog timeout */
2047 	if (ldcp->wd_tid) {
2048 		(void) untimeout(ldcp->wd_tid);
2049 		ldcp->wd_tid = 0;
2050 	}
2051 
2052 	drv_usecwait(1000);
2053 
2054 	/* acquire locks again; any pending transmits and callbacks are done */
2055 	LDC_LOCK(ldcp);
2056 
2057 	vgen_reset_hphase(ldcp);
2058 
2059 	vgen_uninit_tbufs(ldcp);
2060 
2061 	rv = ldc_close(ldcp->ldc_handle);
2062 	if (rv != 0) {
2063 		DWARN(vgenp, ldcp, "ldc_close err\n");
2064 	}
2065 	ldcp->ldc_status = LDC_INIT;
2066 	ldcp->flags &= ~(CHANNEL_STARTED);
2067 
2068 	LDC_UNLOCK(ldcp);
2069 
2070 	DBG1(vgenp, ldcp, "exit\n");
2071 }
2072 
2073 /* Initialize the transmit buffer ring for the channel */
2074 static int
2075 vgen_init_tbufs(vgen_ldc_t *ldcp)
2076 {
2077 	vgen_private_desc_t	*tbufp;
2078 	vnet_public_desc_t	*txdp;
2079 	vio_dring_entry_hdr_t		*hdrp;
2080 	int 			i;
2081 	int 			rv;
2082 	caddr_t			datap = NULL;
2083 	int			ci;
2084 	uint32_t		ncookies;
2085 
2086 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2087 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2088 
2089 	datap = kmem_zalloc(ldcp->num_txds * VGEN_TXDBLK_SZ, KM_SLEEP);
2090 	ldcp->tx_datap = datap;
2091 
2092 	/*
2093 	 * for each private descriptor, allocate a ldc mem_handle which is
2094 	 * required to map the data during transmit, set the flags
2095 	 * to free (available for use by transmit routine).
2096 	 */
2097 
2098 	for (i = 0; i < ldcp->num_txds; i++) {
2099 
2100 		tbufp = &(ldcp->tbufp[i]);
2101 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2102 		    &(tbufp->memhandle));
2103 		if (rv) {
2104 			tbufp->memhandle = 0;
2105 			goto init_tbufs_failed;
2106 		}
2107 
2108 		/*
2109 		 * bind ldc memhandle to the corresponding transmit buffer.
2110 		 */
2111 		ci = ncookies = 0;
2112 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2113 		    (caddr_t)datap, VGEN_TXDBLK_SZ, LDC_SHADOW_MAP,
2114 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2115 		if (rv != 0) {
2116 			goto init_tbufs_failed;
2117 		}
2118 
2119 		/*
2120 		 * successful in binding the handle to tx data buffer.
2121 		 * set datap in the private descr to this buffer.
2122 		 */
2123 		tbufp->datap = datap;
2124 
2125 		if ((ncookies == 0) ||
2126 		    (ncookies > MAX_COOKIES)) {
2127 			goto init_tbufs_failed;
2128 		}
2129 
2130 		for (ci = 1; ci < ncookies; ci++) {
2131 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2132 			    &(tbufp->memcookie[ci]));
2133 			if (rv != 0) {
2134 				goto init_tbufs_failed;
2135 			}
2136 		}
2137 
2138 		tbufp->ncookies = ncookies;
2139 		datap += VGEN_TXDBLK_SZ;
2140 
2141 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2142 		txdp = &(ldcp->txdp[i]);
2143 		hdrp = &txdp->hdr;
2144 		hdrp->dstate = VIO_DESC_FREE;
2145 		hdrp->ack = B_FALSE;
2146 		tbufp->descp = txdp;
2147 
2148 	}
2149 
2150 	/* reset tbuf walking pointers */
2151 	ldcp->next_tbufp = ldcp->tbufp;
2152 	ldcp->cur_tbufp = ldcp->tbufp;
2153 
2154 	/* initialize tx seqnum and index */
2155 	ldcp->next_txseq = VNET_ISS;
2156 	ldcp->next_txi = 0;
2157 
2158 	ldcp->resched_peer = B_TRUE;
2159 	ldcp->resched_peer_txi = 0;
2160 
2161 	return (DDI_SUCCESS);
2162 
2163 init_tbufs_failed:;
2164 	vgen_uninit_tbufs(ldcp);
2165 	return (DDI_FAILURE);
2166 }
2167 
2168 /* Uninitialize transmit buffer ring for the channel */
2169 static void
2170 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2171 {
2172 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2173 	int 			i;
2174 
2175 	/* for each tbuf (priv_desc), free ldc mem_handle */
2176 	for (i = 0; i < ldcp->num_txds; i++) {
2177 
2178 		tbufp = &(ldcp->tbufp[i]);
2179 
2180 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2181 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2182 			tbufp->datap = NULL;
2183 		}
2184 		if (tbufp->memhandle) {
2185 			(void) ldc_mem_free_handle(tbufp->memhandle);
2186 			tbufp->memhandle = 0;
2187 		}
2188 	}
2189 
2190 	if (ldcp->tx_datap) {
2191 		/* prealloc'd tx data buffer */
2192 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_TXDBLK_SZ);
2193 		ldcp->tx_datap = NULL;
2194 	}
2195 
2196 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2197 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2198 }
2199 
2200 /* clobber tx descriptor ring */
2201 static void
2202 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2203 {
2204 	vnet_public_desc_t	*txdp;
2205 	vgen_private_desc_t	*tbufp;
2206 	vio_dring_entry_hdr_t	*hdrp;
2207 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2208 	int i;
2209 #ifdef DEBUG
2210 	int ndone = 0;
2211 #endif
2212 
2213 	for (i = 0; i < ldcp->num_txds; i++) {
2214 
2215 		tbufp = &(ldcp->tbufp[i]);
2216 		txdp = tbufp->descp;
2217 		hdrp = &txdp->hdr;
2218 
2219 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2220 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2221 #ifdef DEBUG
2222 			if (hdrp->dstate == VIO_DESC_DONE)
2223 				ndone++;
2224 #endif
2225 			hdrp->dstate = VIO_DESC_FREE;
2226 			hdrp->ack = B_FALSE;
2227 		}
2228 	}
2229 	/* reset tbuf walking pointers */
2230 	ldcp->next_tbufp = ldcp->tbufp;
2231 	ldcp->cur_tbufp = ldcp->tbufp;
2232 
2233 	/* reset tx seqnum and index */
2234 	ldcp->next_txseq = VNET_ISS;
2235 	ldcp->next_txi = 0;
2236 
2237 	ldcp->resched_peer = B_TRUE;
2238 	ldcp->resched_peer_txi = 0;
2239 
2240 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
2241 }
2242 
2243 /* clobber receive descriptor ring */
2244 static void
2245 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2246 {
2247 	ldcp->rx_dhandle = 0;
2248 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2249 	ldcp->rxdp = NULL;
2250 	ldcp->next_rxi = 0;
2251 	ldcp->num_rxds = 0;
2252 	ldcp->next_rxseq = VNET_ISS;
2253 }
2254 
2255 /* initialize receive descriptor ring */
2256 static int
2257 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2258 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2259 {
2260 	int rv;
2261 	ldc_mem_info_t minfo;
2262 
2263 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2264 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2265 	if (rv != 0) {
2266 		return (DDI_FAILURE);
2267 	}
2268 
2269 	/*
2270 	 * sucessfully mapped, now try to
2271 	 * get info about the mapped dring
2272 	 */
2273 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2274 	if (rv != 0) {
2275 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2276 		return (DDI_FAILURE);
2277 	}
2278 
2279 	/*
2280 	 * save ring address, number of descriptors.
2281 	 */
2282 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2283 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2284 	ldcp->num_rxdcookies = ncookies;
2285 	ldcp->num_rxds = num_desc;
2286 	ldcp->next_rxi = 0;
2287 	ldcp->next_rxseq = VNET_ISS;
2288 
2289 	return (DDI_SUCCESS);
2290 }
2291 
2292 /* get channel statistics */
2293 static uint64_t
2294 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2295 {
2296 	vgen_stats_t *statsp;
2297 	uint64_t val;
2298 
2299 	val = 0;
2300 	statsp = &ldcp->stats;
2301 	switch (stat) {
2302 
2303 	case MAC_STAT_MULTIRCV:
2304 		val = statsp->multircv;
2305 		break;
2306 
2307 	case MAC_STAT_BRDCSTRCV:
2308 		val = statsp->brdcstrcv;
2309 		break;
2310 
2311 	case MAC_STAT_MULTIXMT:
2312 		val = statsp->multixmt;
2313 		break;
2314 
2315 	case MAC_STAT_BRDCSTXMT:
2316 		val = statsp->brdcstxmt;
2317 		break;
2318 
2319 	case MAC_STAT_NORCVBUF:
2320 		val = statsp->norcvbuf;
2321 		break;
2322 
2323 	case MAC_STAT_IERRORS:
2324 		val = statsp->ierrors;
2325 		break;
2326 
2327 	case MAC_STAT_NOXMTBUF:
2328 		val = statsp->noxmtbuf;
2329 		break;
2330 
2331 	case MAC_STAT_OERRORS:
2332 		val = statsp->oerrors;
2333 		break;
2334 
2335 	case MAC_STAT_COLLISIONS:
2336 		break;
2337 
2338 	case MAC_STAT_RBYTES:
2339 		val = statsp->rbytes;
2340 		break;
2341 
2342 	case MAC_STAT_IPACKETS:
2343 		val = statsp->ipackets;
2344 		break;
2345 
2346 	case MAC_STAT_OBYTES:
2347 		val = statsp->obytes;
2348 		break;
2349 
2350 	case MAC_STAT_OPACKETS:
2351 		val = statsp->opackets;
2352 		break;
2353 
2354 	/* stats not relevant to ldc, return 0 */
2355 	case MAC_STAT_IFSPEED:
2356 	case ETHER_STAT_ALIGN_ERRORS:
2357 	case ETHER_STAT_FCS_ERRORS:
2358 	case ETHER_STAT_FIRST_COLLISIONS:
2359 	case ETHER_STAT_MULTI_COLLISIONS:
2360 	case ETHER_STAT_DEFER_XMTS:
2361 	case ETHER_STAT_TX_LATE_COLLISIONS:
2362 	case ETHER_STAT_EX_COLLISIONS:
2363 	case ETHER_STAT_MACXMT_ERRORS:
2364 	case ETHER_STAT_CARRIER_ERRORS:
2365 	case ETHER_STAT_TOOLONG_ERRORS:
2366 	case ETHER_STAT_XCVR_ADDR:
2367 	case ETHER_STAT_XCVR_ID:
2368 	case ETHER_STAT_XCVR_INUSE:
2369 	case ETHER_STAT_CAP_1000FDX:
2370 	case ETHER_STAT_CAP_1000HDX:
2371 	case ETHER_STAT_CAP_100FDX:
2372 	case ETHER_STAT_CAP_100HDX:
2373 	case ETHER_STAT_CAP_10FDX:
2374 	case ETHER_STAT_CAP_10HDX:
2375 	case ETHER_STAT_CAP_ASMPAUSE:
2376 	case ETHER_STAT_CAP_PAUSE:
2377 	case ETHER_STAT_CAP_AUTONEG:
2378 	case ETHER_STAT_ADV_CAP_1000FDX:
2379 	case ETHER_STAT_ADV_CAP_1000HDX:
2380 	case ETHER_STAT_ADV_CAP_100FDX:
2381 	case ETHER_STAT_ADV_CAP_100HDX:
2382 	case ETHER_STAT_ADV_CAP_10FDX:
2383 	case ETHER_STAT_ADV_CAP_10HDX:
2384 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2385 	case ETHER_STAT_ADV_CAP_PAUSE:
2386 	case ETHER_STAT_ADV_CAP_AUTONEG:
2387 	case ETHER_STAT_LP_CAP_1000FDX:
2388 	case ETHER_STAT_LP_CAP_1000HDX:
2389 	case ETHER_STAT_LP_CAP_100FDX:
2390 	case ETHER_STAT_LP_CAP_100HDX:
2391 	case ETHER_STAT_LP_CAP_10FDX:
2392 	case ETHER_STAT_LP_CAP_10HDX:
2393 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2394 	case ETHER_STAT_LP_CAP_PAUSE:
2395 	case ETHER_STAT_LP_CAP_AUTONEG:
2396 	case ETHER_STAT_LINK_ASMPAUSE:
2397 	case ETHER_STAT_LINK_PAUSE:
2398 	case ETHER_STAT_LINK_AUTONEG:
2399 	case ETHER_STAT_LINK_DUPLEX:
2400 	default:
2401 		val = 0;
2402 		break;
2403 
2404 	}
2405 	return (val);
2406 }
2407 
2408 /*
2409  * LDC channel is UP, start handshake process with peer.
2410  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2411  * function is being called from transmit routine, otherwise B_FALSE.
2412  */
2413 static void
2414 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2415 {
2416 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2417 
2418 	DBG1(vgenp, ldcp, "enter\n");
2419 
2420 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2421 
2422 	if (ldcp->portp != vgenp->vsw_portp) {
2423 		/*
2424 		 * modify fdb entry to use this port as the
2425 		 * channel is up, instead of going through the
2426 		 * vsw-port (see comments in vgen_port_init())
2427 		 */
2428 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2429 		    vgen_tx, ldcp->portp, flag);
2430 	}
2431 
2432 	/* Initialize local session id */
2433 	ldcp->local_sid = ddi_get_lbolt();
2434 
2435 	/* clear peer session id */
2436 	ldcp->peer_sid = 0;
2437 	ldcp->hretries = 0;
2438 
2439 	if (ldcp->hphase != VH_PHASE0) {
2440 		vgen_handshake_reset(ldcp);
2441 	}
2442 
2443 	/* Initiate Handshake process with peer ldc endpoint */
2444 	vgen_handshake(vh_nextphase(ldcp));
2445 
2446 	DBG1(vgenp, ldcp, "exit\n");
2447 }
2448 
2449 /*
2450  * LDC channel is Reset, terminate connection with peer and try to
2451  * bring the channel up again.
2452  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2453  * function is being called from transmit routine, otherwise B_FALSE.
2454  */
2455 static void
2456 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2457 {
2458 	ldc_status_t istatus;
2459 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2460 	int	rv;
2461 
2462 	DBG1(vgenp, ldcp, "enter\n");
2463 
2464 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2465 
2466 	if ((ldcp->portp != vgenp->vsw_portp) &&
2467 	    (vgenp->vsw_portp != NULL)) {
2468 		/*
2469 		 * modify fdb entry to use vsw-port  as the
2470 		 * channel is reset and we don't have a direct
2471 		 * link to the destination (see comments
2472 		 * in vgen_port_init()).
2473 		 */
2474 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2475 		    vgen_tx, vgenp->vsw_portp, flag);
2476 	}
2477 
2478 	if (ldcp->hphase != VH_PHASE0) {
2479 		vgen_handshake_reset(ldcp);
2480 	}
2481 
2482 	/* try to bring the channel up */
2483 	rv = ldc_up(ldcp->ldc_handle);
2484 	if (rv != 0) {
2485 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2486 	}
2487 
2488 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2489 		DWARN(vgenp, ldcp, "ldc_status err\n");
2490 	} else {
2491 		ldcp->ldc_status = istatus;
2492 	}
2493 
2494 	/* if channel is already UP - restart handshake */
2495 	if (ldcp->ldc_status == LDC_UP) {
2496 		vgen_handle_evt_up(ldcp, flag);
2497 	}
2498 
2499 	DBG1(vgenp, ldcp, "exit\n");
2500 }
2501 
2502 /* Interrupt handler for the channel */
2503 static uint_t
2504 vgen_ldc_cb(uint64_t event, caddr_t arg)
2505 {
2506 	_NOTE(ARGUNUSED(event))
2507 	vgen_ldc_t	*ldcp;
2508 	vgen_t		*vgenp;
2509 	ldc_status_t 	istatus;
2510 	mblk_t		*bp = NULL;
2511 	vgen_stats_t	*statsp;
2512 
2513 	ldcp = (vgen_ldc_t *)arg;
2514 	vgenp = LDC_TO_VGEN(ldcp);
2515 	statsp = &ldcp->stats;
2516 
2517 	DBG1(vgenp, ldcp, "enter\n");
2518 
2519 	mutex_enter(&ldcp->cblock);
2520 	statsp->callbacks++;
2521 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2522 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
2523 		    ldcp->ldc_status);
2524 		mutex_exit(&ldcp->cblock);
2525 		return (LDC_SUCCESS);
2526 	}
2527 
2528 	/*
2529 	 * NOTE: not using switch() as event could be triggered by
2530 	 * a state change and a read request. Also the ordering	of the
2531 	 * check for the event types is deliberate.
2532 	 */
2533 	if (event & LDC_EVT_UP) {
2534 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2535 			DWARN(vgenp, ldcp, "ldc_status err\n");
2536 		} else {
2537 			ldcp->ldc_status = istatus;
2538 		}
2539 		ASSERT(ldcp->ldc_status == LDC_UP);
2540 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
2541 		    event, ldcp->ldc_status);
2542 
2543 		vgen_handle_evt_up(ldcp, B_FALSE);
2544 
2545 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2546 	}
2547 
2548 	if (event & LDC_EVT_READ) {
2549 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
2550 		    event, ldcp->ldc_status);
2551 
2552 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2553 
2554 		if (ldcp->rcv_thread != NULL) {
2555 			/*
2556 			 * If the receive thread is enabled, then
2557 			 * wakeup the receive thread to process the
2558 			 * LDC messages.
2559 			 */
2560 			mutex_exit(&ldcp->cblock);
2561 			mutex_enter(&ldcp->rcv_thr_lock);
2562 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
2563 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
2564 				cv_signal(&ldcp->rcv_thr_cv);
2565 			}
2566 			mutex_exit(&ldcp->rcv_thr_lock);
2567 			mutex_enter(&ldcp->cblock);
2568 		} else  {
2569 			vgen_handle_evt_read(ldcp);
2570 			bp = ldcp->rcv_mhead;
2571 			ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
2572 		}
2573 	}
2574 
2575 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2576 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2577 			DWARN(vgenp, ldcp, "ldc_status error\n");
2578 		} else {
2579 			ldcp->ldc_status = istatus;
2580 		}
2581 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
2582 		    event, ldcp->ldc_status);
2583 
2584 		vgen_handle_evt_reset(ldcp, B_FALSE);
2585 	}
2586 	mutex_exit(&ldcp->cblock);
2587 
2588 	/* send up the received packets to MAC layer */
2589 	if (bp != NULL) {
2590 		vnet_rx(vgenp->vnetp, NULL, bp);
2591 	}
2592 
2593 	if (ldcp->cancel_htid) {
2594 		/*
2595 		 * Cancel handshake timer.
2596 		 * untimeout(9F) will not return until the pending callback is
2597 		 * cancelled or has run. No problems will result from calling
2598 		 * untimeout if the handler has already completed.
2599 		 * If the timeout handler did run, then it would just
2600 		 * return as cancel_htid is set.
2601 		 */
2602 		(void) untimeout(ldcp->cancel_htid);
2603 		ldcp->cancel_htid = 0;
2604 	}
2605 	DBG1(vgenp, ldcp, "exit\n");
2606 
2607 	return (LDC_SUCCESS);
2608 }
2609 
2610 static void
2611 vgen_handle_evt_read(vgen_ldc_t *ldcp)
2612 {
2613 	int		rv;
2614 	uint64_t	ldcmsg[7];
2615 	size_t		msglen;
2616 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2617 	vio_msg_tag_t	*tagp;
2618 	ldc_status_t 	istatus;
2619 	boolean_t 	has_data;
2620 
2621 	DBG1(vgenp, ldcp, "enter\n");
2622 
2623 	/*
2624 	 * If the receive thread is enabled, then the cblock
2625 	 * need to be acquired here. If not, the vgen_ldc_cb()
2626 	 * calls this function with cblock held already.
2627 	 */
2628 	if (ldcp->rcv_thread != NULL) {
2629 		mutex_enter(&ldcp->cblock);
2630 	} else {
2631 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2632 	}
2633 
2634 vgen_evt_read:
2635 	do {
2636 		msglen = sizeof (ldcmsg);
2637 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
2638 
2639 		if (rv != 0) {
2640 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
2641 			    rv, msglen);
2642 			if (rv == ECONNRESET)
2643 				goto vgen_evtread_error;
2644 			break;
2645 		}
2646 		if (msglen == 0) {
2647 			DBG2(vgenp, ldcp, "ldc_read NODATA");
2648 			break;
2649 		}
2650 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
2651 
2652 		tagp = (vio_msg_tag_t *)ldcmsg;
2653 
2654 		if (ldcp->peer_sid) {
2655 			/*
2656 			 * check sid only after we have received peer's sid
2657 			 * in the version negotiate msg.
2658 			 */
2659 #ifdef DEBUG
2660 			if (vgen_hdbg & HDBG_BAD_SID) {
2661 				/* simulate bad sid condition */
2662 				tagp->vio_sid = 0;
2663 				vgen_hdbg &= ~(HDBG_BAD_SID);
2664 			}
2665 #endif
2666 			rv = vgen_check_sid(ldcp, tagp);
2667 			if (rv != VGEN_SUCCESS) {
2668 				/*
2669 				 * If sid mismatch is detected,
2670 				 * reset the channel.
2671 				 */
2672 				ldcp->need_ldc_reset = B_TRUE;
2673 				goto vgen_evtread_error;
2674 			}
2675 		}
2676 
2677 		switch (tagp->vio_msgtype) {
2678 		case VIO_TYPE_CTRL:
2679 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
2680 			break;
2681 
2682 		case VIO_TYPE_DATA:
2683 			rv = vgen_handle_datamsg(ldcp, tagp);
2684 			break;
2685 
2686 		case VIO_TYPE_ERR:
2687 			vgen_handle_errmsg(ldcp, tagp);
2688 			break;
2689 
2690 		default:
2691 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
2692 			    tagp->vio_msgtype);
2693 			break;
2694 		}
2695 
2696 		/*
2697 		 * If an error is encountered, stop processing and
2698 		 * handle the error.
2699 		 */
2700 		if (rv != 0) {
2701 			goto vgen_evtread_error;
2702 		}
2703 
2704 	} while (msglen);
2705 
2706 	/* check once more before exiting */
2707 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
2708 	if ((rv == 0) && (has_data == B_TRUE)) {
2709 		DTRACE_PROBE(vgen_chkq);
2710 		goto vgen_evt_read;
2711 	}
2712 
2713 vgen_evtread_error:
2714 	if (rv == ECONNRESET) {
2715 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2716 			DWARN(vgenp, ldcp, "ldc_status err\n");
2717 		} else {
2718 			ldcp->ldc_status = istatus;
2719 		}
2720 		vgen_handle_evt_reset(ldcp, B_FALSE);
2721 	} else if (rv) {
2722 		vgen_handshake_retry(ldcp);
2723 	}
2724 
2725 	/*
2726 	 * If the receive thread is not enabled, then cancel the
2727 	 * handshake timeout here.
2728 	 */
2729 	if (ldcp->rcv_thread != NULL) {
2730 		mutex_exit(&ldcp->cblock);
2731 		if (ldcp->cancel_htid) {
2732 			/*
2733 			 * Cancel handshake timer. untimeout(9F) will
2734 			 * not return until the pending callback is cancelled
2735 			 * or has run. No problems will result from calling
2736 			 * untimeout if the handler has already completed.
2737 			 * If the timeout handler did run, then it would just
2738 			 * return as cancel_htid is set.
2739 			 */
2740 			(void) untimeout(ldcp->cancel_htid);
2741 			ldcp->cancel_htid = 0;
2742 		}
2743 	}
2744 
2745 	DBG1(vgenp, ldcp, "exit\n");
2746 }
2747 
2748 /* vgen handshake functions */
2749 
2750 /* change the hphase for the channel to the next phase */
2751 static vgen_ldc_t *
2752 vh_nextphase(vgen_ldc_t *ldcp)
2753 {
2754 	if (ldcp->hphase == VH_PHASE3) {
2755 		ldcp->hphase = VH_DONE;
2756 	} else {
2757 		ldcp->hphase++;
2758 	}
2759 	return (ldcp);
2760 }
2761 
2762 /*
2763  * Check whether the given version is supported or not and
2764  * return VGEN_SUCCESS if supported.
2765  */
2766 static int
2767 vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
2768 uint16_t ver_minor)
2769 {
2770 	vgen_ver_t	*versions = ldcp->vgen_versions;
2771 	int		i = 0;
2772 
2773 	while (i < VGEN_NUM_VER) {
2774 		if ((versions[i].ver_major == 0) &&
2775 		    (versions[i].ver_minor == 0)) {
2776 			break;
2777 		}
2778 		if ((versions[i].ver_major == ver_major) &&
2779 		    (versions[i].ver_minor == ver_minor)) {
2780 			return (VGEN_SUCCESS);
2781 		}
2782 		i++;
2783 	}
2784 	return (VGEN_FAILURE);
2785 }
2786 
2787 /*
2788  * Given a version, return VGEN_SUCCESS if a lower version is supported.
2789  */
2790 static int
2791 vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
2792 {
2793 	vgen_ver_t	*versions = ldcp->vgen_versions;
2794 	int		i = 0;
2795 
2796 	while (i < VGEN_NUM_VER) {
2797 		if ((versions[i].ver_major == 0) &&
2798 		    (versions[i].ver_minor == 0)) {
2799 			break;
2800 		}
2801 		/*
2802 		 * if we support a lower minor version within the same major
2803 		 * version, or if we support a lower major version,
2804 		 * update the verp parameter with this lower version and
2805 		 * return success.
2806 		 */
2807 		if (((versions[i].ver_major == verp->ver_major) &&
2808 		    (versions[i].ver_minor < verp->ver_minor)) ||
2809 		    (versions[i].ver_major < verp->ver_major)) {
2810 			verp->ver_major = versions[i].ver_major;
2811 			verp->ver_minor = versions[i].ver_minor;
2812 			return (VGEN_SUCCESS);
2813 		}
2814 		i++;
2815 	}
2816 
2817 	return (VGEN_FAILURE);
2818 }
2819 
2820 /*
2821  * wrapper routine to send the given message over ldc using ldc_write().
2822  */
2823 static int
2824 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
2825     boolean_t caller_holds_lock)
2826 {
2827 	int	rv;
2828 	size_t	len;
2829 	uint32_t retries = 0;
2830 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2831 
2832 	len = msglen;
2833 	if ((len == 0) || (msg == NULL))
2834 		return (VGEN_FAILURE);
2835 
2836 	if (!caller_holds_lock) {
2837 		mutex_enter(&ldcp->wrlock);
2838 	}
2839 
2840 	do {
2841 		len = msglen;
2842 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
2843 		if (retries++ >= vgen_ldcwr_retries)
2844 			break;
2845 	} while (rv == EWOULDBLOCK);
2846 
2847 	if (!caller_holds_lock) {
2848 		mutex_exit(&ldcp->wrlock);
2849 	}
2850 
2851 	if (rv != 0) {
2852 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
2853 		    rv, msglen);
2854 		return (rv);
2855 	}
2856 
2857 	if (len != msglen) {
2858 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
2859 		    rv, msglen);
2860 		return (VGEN_FAILURE);
2861 	}
2862 
2863 	return (VGEN_SUCCESS);
2864 }
2865 
2866 /* send version negotiate message to the peer over ldc */
2867 static int
2868 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
2869 {
2870 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2871 	vio_ver_msg_t	vermsg;
2872 	vio_msg_tag_t	*tagp = &vermsg.tag;
2873 	int		rv;
2874 
2875 	bzero(&vermsg, sizeof (vermsg));
2876 
2877 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2878 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2879 	tagp->vio_subtype_env = VIO_VER_INFO;
2880 	tagp->vio_sid = ldcp->local_sid;
2881 
2882 	/* get version msg payload from ldcp->local */
2883 	vermsg.ver_major = ldcp->local_hparams.ver_major;
2884 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
2885 	vermsg.dev_class = ldcp->local_hparams.dev_class;
2886 
2887 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
2888 	if (rv != VGEN_SUCCESS) {
2889 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2890 		return (rv);
2891 	}
2892 
2893 	ldcp->hstate |= VER_INFO_SENT;
2894 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
2895 	    vermsg.ver_major, vermsg.ver_minor);
2896 
2897 	return (VGEN_SUCCESS);
2898 }
2899 
2900 /* send attr info message to the peer over ldc */
2901 static int
2902 vgen_send_attr_info(vgen_ldc_t *ldcp)
2903 {
2904 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2905 	vnet_attr_msg_t	attrmsg;
2906 	vio_msg_tag_t	*tagp = &attrmsg.tag;
2907 	int		rv;
2908 
2909 	bzero(&attrmsg, sizeof (attrmsg));
2910 
2911 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2912 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2913 	tagp->vio_subtype_env = VIO_ATTR_INFO;
2914 	tagp->vio_sid = ldcp->local_sid;
2915 
2916 	/* get attr msg payload from ldcp->local */
2917 	attrmsg.mtu = ldcp->local_hparams.mtu;
2918 	attrmsg.addr = ldcp->local_hparams.addr;
2919 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
2920 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
2921 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
2922 
2923 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
2924 	if (rv != VGEN_SUCCESS) {
2925 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2926 		return (rv);
2927 	}
2928 
2929 	ldcp->hstate |= ATTR_INFO_SENT;
2930 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
2931 
2932 	return (VGEN_SUCCESS);
2933 }
2934 
2935 /* send descriptor ring register message to the peer over ldc */
2936 static int
2937 vgen_send_dring_reg(vgen_ldc_t *ldcp)
2938 {
2939 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
2940 	vio_dring_reg_msg_t	msg;
2941 	vio_msg_tag_t		*tagp = &msg.tag;
2942 	int		rv;
2943 
2944 	bzero(&msg, sizeof (msg));
2945 
2946 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2947 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2948 	tagp->vio_subtype_env = VIO_DRING_REG;
2949 	tagp->vio_sid = ldcp->local_sid;
2950 
2951 	/* get dring info msg payload from ldcp->local */
2952 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
2953 	    sizeof (ldc_mem_cookie_t));
2954 	msg.ncookies = ldcp->local_hparams.num_dcookies;
2955 	msg.num_descriptors = ldcp->local_hparams.num_desc;
2956 	msg.descriptor_size = ldcp->local_hparams.desc_size;
2957 
2958 	/*
2959 	 * dring_ident is set to 0. After mapping the dring, peer sets this
2960 	 * value and sends it in the ack, which is saved in
2961 	 * vgen_handle_dring_reg().
2962 	 */
2963 	msg.dring_ident = 0;
2964 
2965 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
2966 	if (rv != VGEN_SUCCESS) {
2967 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2968 		return (rv);
2969 	}
2970 
2971 	ldcp->hstate |= DRING_INFO_SENT;
2972 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
2973 
2974 	return (VGEN_SUCCESS);
2975 }
2976 
2977 static int
2978 vgen_send_rdx_info(vgen_ldc_t *ldcp)
2979 {
2980 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2981 	vio_rdx_msg_t	rdxmsg;
2982 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
2983 	int		rv;
2984 
2985 	bzero(&rdxmsg, sizeof (rdxmsg));
2986 
2987 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2988 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2989 	tagp->vio_subtype_env = VIO_RDX;
2990 	tagp->vio_sid = ldcp->local_sid;
2991 
2992 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
2993 	if (rv != VGEN_SUCCESS) {
2994 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2995 		return (rv);
2996 	}
2997 
2998 	ldcp->hstate |= RDX_INFO_SENT;
2999 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3000 
3001 	return (VGEN_SUCCESS);
3002 }
3003 
3004 /* send descriptor ring data message to the peer over ldc */
3005 static int
3006 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
3007 {
3008 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3009 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
3010 	vio_msg_tag_t	*tagp = &msgp->tag;
3011 	vgen_stats_t	*statsp = &ldcp->stats;
3012 	int		rv;
3013 
3014 	bzero(msgp, sizeof (*msgp));
3015 
3016 	tagp->vio_msgtype = VIO_TYPE_DATA;
3017 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3018 	tagp->vio_subtype_env = VIO_DRING_DATA;
3019 	tagp->vio_sid = ldcp->local_sid;
3020 
3021 	msgp->seq_num = ldcp->next_txseq;
3022 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
3023 	msgp->start_idx = start;
3024 	msgp->end_idx = end;
3025 
3026 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
3027 	if (rv != VGEN_SUCCESS) {
3028 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3029 		return (rv);
3030 	}
3031 
3032 	ldcp->next_txseq++;
3033 	statsp->dring_data_msgs++;
3034 
3035 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
3036 
3037 	return (VGEN_SUCCESS);
3038 }
3039 
3040 /* send multicast addr info message to vsw */
3041 static int
3042 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3043 {
3044 	vnet_mcast_msg_t	mcastmsg;
3045 	vnet_mcast_msg_t	*msgp;
3046 	vio_msg_tag_t		*tagp;
3047 	vgen_t			*vgenp;
3048 	struct ether_addr	*mca;
3049 	int			rv;
3050 	int			i;
3051 	uint32_t		size;
3052 	uint32_t		mccount;
3053 	uint32_t		n;
3054 
3055 	msgp = &mcastmsg;
3056 	tagp = &msgp->tag;
3057 	vgenp = LDC_TO_VGEN(ldcp);
3058 
3059 	mccount = vgenp->mccount;
3060 	i = 0;
3061 
3062 	do {
3063 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3064 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3065 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3066 		tagp->vio_sid = ldcp->local_sid;
3067 
3068 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3069 		size = n * sizeof (struct ether_addr);
3070 
3071 		mca = &(vgenp->mctab[i]);
3072 		bcopy(mca, (msgp->mca), size);
3073 		msgp->set = B_TRUE;
3074 		msgp->count = n;
3075 
3076 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3077 		    B_FALSE);
3078 		if (rv != VGEN_SUCCESS) {
3079 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3080 			return (rv);
3081 		}
3082 
3083 		mccount -= n;
3084 		i += n;
3085 
3086 	} while (mccount);
3087 
3088 	return (VGEN_SUCCESS);
3089 }
3090 
3091 /* Initiate Phase 2 of handshake */
3092 static int
3093 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3094 {
3095 	int rv;
3096 	uint32_t ncookies = 0;
3097 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3098 
3099 #ifdef DEBUG
3100 	if (vgen_hdbg & HDBG_OUT_STATE) {
3101 		/* simulate out of state condition */
3102 		vgen_hdbg &= ~(HDBG_OUT_STATE);
3103 		rv = vgen_send_rdx_info(ldcp);
3104 		return (rv);
3105 	}
3106 	if (vgen_hdbg & HDBG_TIMEOUT) {
3107 		/* simulate timeout condition */
3108 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3109 		return (VGEN_SUCCESS);
3110 	}
3111 #endif
3112 	rv = vgen_send_attr_info(ldcp);
3113 	if (rv != VGEN_SUCCESS) {
3114 		return (rv);
3115 	}
3116 
3117 	/* Bind descriptor ring to the channel */
3118 	if (ldcp->num_txdcookies == 0) {
3119 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3120 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3121 		if (rv != 0) {
3122 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
3123 			    "rv(%x)\n", rv);
3124 			return (rv);
3125 		}
3126 		ASSERT(ncookies == 1);
3127 		ldcp->num_txdcookies = ncookies;
3128 	}
3129 
3130 	/* update local dring_info params */
3131 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3132 	    sizeof (ldc_mem_cookie_t));
3133 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3134 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3135 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3136 
3137 	rv = vgen_send_dring_reg(ldcp);
3138 	if (rv != VGEN_SUCCESS) {
3139 		return (rv);
3140 	}
3141 
3142 	return (VGEN_SUCCESS);
3143 }
3144 
3145 /*
3146  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3147  * This can happen after a channel comes up (status: LDC_UP) or
3148  * when handshake gets terminated due to various conditions.
3149  */
3150 static void
3151 vgen_reset_hphase(vgen_ldc_t *ldcp)
3152 {
3153 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3154 	ldc_status_t istatus;
3155 	int rv;
3156 
3157 	DBG1(vgenp, ldcp, "enter\n");
3158 	/* reset hstate and hphase */
3159 	ldcp->hstate = 0;
3160 	ldcp->hphase = VH_PHASE0;
3161 
3162 	/*
3163 	 * Save the id of pending handshake timer in cancel_htid.
3164 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
3165 	 * be cancelled after releasing cblock.
3166 	 */
3167 	if (ldcp->htid) {
3168 		ldcp->cancel_htid = ldcp->htid;
3169 		ldcp->htid = 0;
3170 	}
3171 
3172 	if (ldcp->local_hparams.dring_ready) {
3173 		ldcp->local_hparams.dring_ready = B_FALSE;
3174 	}
3175 
3176 	/* Unbind tx descriptor ring from the channel */
3177 	if (ldcp->num_txdcookies) {
3178 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3179 		if (rv != 0) {
3180 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
3181 		}
3182 		ldcp->num_txdcookies = 0;
3183 	}
3184 
3185 	if (ldcp->peer_hparams.dring_ready) {
3186 		ldcp->peer_hparams.dring_ready = B_FALSE;
3187 		/* Unmap peer's dring */
3188 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3189 		vgen_clobber_rxds(ldcp);
3190 	}
3191 
3192 	vgen_clobber_tbufs(ldcp);
3193 
3194 	/*
3195 	 * clear local handshake params and initialize.
3196 	 */
3197 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3198 
3199 	/* set version to the highest version supported */
3200 	ldcp->local_hparams.ver_major =
3201 	    ldcp->vgen_versions[0].ver_major;
3202 	ldcp->local_hparams.ver_minor =
3203 	    ldcp->vgen_versions[0].ver_minor;
3204 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3205 
3206 	/* set attr_info params */
3207 	ldcp->local_hparams.mtu = ETHERMAX;
3208 	ldcp->local_hparams.addr =
3209 	    vgen_macaddr_strtoul(vgenp->macaddr);
3210 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3211 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
3212 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3213 
3214 	/*
3215 	 * Note: dring is created, but not bound yet.
3216 	 * local dring_info params will be updated when we bind the dring in
3217 	 * vgen_handshake_phase2().
3218 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3219 	 * value and sends it in the ack, which is saved in
3220 	 * vgen_handle_dring_reg().
3221 	 */
3222 	ldcp->local_hparams.dring_ident = 0;
3223 
3224 	/* clear peer_hparams */
3225 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3226 
3227 	/* reset the channel if required */
3228 	if (ldcp->need_ldc_reset) {
3229 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3230 		ldcp->need_ldc_reset = B_FALSE;
3231 		(void) ldc_down(ldcp->ldc_handle);
3232 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3233 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
3234 		ldcp->ldc_status = istatus;
3235 
3236 		/* clear sids */
3237 		ldcp->local_sid = 0;
3238 		ldcp->peer_sid = 0;
3239 
3240 		/* try to bring the channel up */
3241 		rv = ldc_up(ldcp->ldc_handle);
3242 		if (rv != 0) {
3243 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3244 		}
3245 
3246 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3247 			DWARN(vgenp, ldcp, "ldc_status err\n");
3248 		} else {
3249 			ldcp->ldc_status = istatus;
3250 		}
3251 	}
3252 }
3253 
3254 /* wrapper function for vgen_reset_hphase */
3255 static void
3256 vgen_handshake_reset(vgen_ldc_t *ldcp)
3257 {
3258 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3259 	mutex_enter(&ldcp->rxlock);
3260 	mutex_enter(&ldcp->wrlock);
3261 	mutex_enter(&ldcp->txlock);
3262 	mutex_enter(&ldcp->tclock);
3263 
3264 	vgen_reset_hphase(ldcp);
3265 
3266 	mutex_exit(&ldcp->tclock);
3267 	mutex_exit(&ldcp->txlock);
3268 	mutex_exit(&ldcp->wrlock);
3269 	mutex_exit(&ldcp->rxlock);
3270 }
3271 
3272 /*
3273  * Initiate handshake with the peer by sending various messages
3274  * based on the handshake-phase that the channel is currently in.
3275  */
3276 static void
3277 vgen_handshake(vgen_ldc_t *ldcp)
3278 {
3279 	uint32_t hphase = ldcp->hphase;
3280 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3281 	ldc_status_t	istatus;
3282 	int	rv = 0;
3283 
3284 	switch (hphase) {
3285 
3286 	case VH_PHASE1:
3287 
3288 		/*
3289 		 * start timer, for entire handshake process, turn this timer
3290 		 * off if all phases of handshake complete successfully and
3291 		 * hphase goes to VH_DONE(below) or
3292 		 * vgen_reset_hphase() gets called or
3293 		 * channel is reset due to errors or
3294 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3295 		 */
3296 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3297 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
3298 
3299 		/* Phase 1 involves negotiating the version */
3300 		rv = vgen_send_version_negotiate(ldcp);
3301 		break;
3302 
3303 	case VH_PHASE2:
3304 		rv = vgen_handshake_phase2(ldcp);
3305 		break;
3306 
3307 	case VH_PHASE3:
3308 		rv = vgen_send_rdx_info(ldcp);
3309 		break;
3310 
3311 	case VH_DONE:
3312 		/*
3313 		 * Save the id of pending handshake timer in cancel_htid.
3314 		 * This will be checked in vgen_ldc_cb() and the handshake
3315 		 * timer will be cancelled after releasing cblock.
3316 		 */
3317 		if (ldcp->htid) {
3318 			ldcp->cancel_htid = ldcp->htid;
3319 			ldcp->htid = 0;
3320 		}
3321 		ldcp->hretries = 0;
3322 		DBG1(vgenp, ldcp, "Handshake Done\n");
3323 
3324 		if (ldcp->portp == vgenp->vsw_portp) {
3325 			/*
3326 			 * If this channel(port) is connected to vsw,
3327 			 * need to sync multicast table with vsw.
3328 			 */
3329 			mutex_exit(&ldcp->cblock);
3330 
3331 			mutex_enter(&vgenp->lock);
3332 			rv = vgen_send_mcast_info(ldcp);
3333 			mutex_exit(&vgenp->lock);
3334 
3335 			mutex_enter(&ldcp->cblock);
3336 			if (rv != VGEN_SUCCESS)
3337 				break;
3338 		}
3339 
3340 		/*
3341 		 * Check if mac layer should be notified to restart
3342 		 * transmissions. This can happen if the channel got
3343 		 * reset and vgen_clobber_tbufs() is called, while
3344 		 * need_resched is set.
3345 		 */
3346 		mutex_enter(&ldcp->tclock);
3347 		if (ldcp->need_resched) {
3348 			ldcp->need_resched = B_FALSE;
3349 			vnet_tx_update(vgenp->vnetp);
3350 		}
3351 		mutex_exit(&ldcp->tclock);
3352 
3353 		break;
3354 
3355 	default:
3356 		break;
3357 	}
3358 
3359 	if (rv == ECONNRESET) {
3360 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3361 			DWARN(vgenp, ldcp, "ldc_status err\n");
3362 		} else {
3363 			ldcp->ldc_status = istatus;
3364 		}
3365 		vgen_handle_evt_reset(ldcp, B_FALSE);
3366 	} else if (rv) {
3367 		vgen_handshake_reset(ldcp);
3368 	}
3369 }
3370 
3371 /*
3372  * Check if the current handshake phase has completed successfully and
3373  * return the status.
3374  */
3375 static int
3376 vgen_handshake_done(vgen_ldc_t *ldcp)
3377 {
3378 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3379 	uint32_t	hphase = ldcp->hphase;
3380 	int 		status = 0;
3381 
3382 	switch (hphase) {
3383 
3384 	case VH_PHASE1:
3385 		/*
3386 		 * Phase1 is done, if version negotiation
3387 		 * completed successfully.
3388 		 */
3389 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3390 		    VER_NEGOTIATED);
3391 		break;
3392 
3393 	case VH_PHASE2:
3394 		/*
3395 		 * Phase 2 is done, if attr info and dring info
3396 		 * have been exchanged successfully.
3397 		 */
3398 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3399 		    ATTR_INFO_EXCHANGED) &&
3400 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3401 		    DRING_INFO_EXCHANGED));
3402 		break;
3403 
3404 	case VH_PHASE3:
3405 		/* Phase 3 is done, if rdx msg has been exchanged */
3406 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3407 		    RDX_EXCHANGED);
3408 		break;
3409 
3410 	default:
3411 		break;
3412 	}
3413 
3414 	if (status == 0) {
3415 		return (VGEN_FAILURE);
3416 	}
3417 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
3418 	return (VGEN_SUCCESS);
3419 }
3420 
3421 /* retry handshake on failure */
3422 static void
3423 vgen_handshake_retry(vgen_ldc_t *ldcp)
3424 {
3425 	/* reset handshake phase */
3426 	vgen_handshake_reset(ldcp);
3427 
3428 	/* handshake retry is specified and the channel is UP */
3429 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
3430 		if (ldcp->hretries++ < vgen_max_hretries) {
3431 			ldcp->local_sid = ddi_get_lbolt();
3432 			vgen_handshake(vh_nextphase(ldcp));
3433 		}
3434 	}
3435 }
3436 
3437 /*
3438  * Handle a version info msg from the peer or an ACK/NACK from the peer
3439  * to a version info msg that we sent.
3440  */
3441 static int
3442 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3443 {
3444 	vgen_t		*vgenp;
3445 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3446 	int		ack = 0;
3447 	int		failed = 0;
3448 	int		idx;
3449 	vgen_ver_t	*versions = ldcp->vgen_versions;
3450 	int		rv = 0;
3451 
3452 	vgenp = LDC_TO_VGEN(ldcp);
3453 	DBG1(vgenp, ldcp, "enter\n");
3454 	switch (tagp->vio_subtype) {
3455 	case VIO_SUBTYPE_INFO:
3456 
3457 		/*  Cache sid of peer if this is the first time */
3458 		if (ldcp->peer_sid == 0) {
3459 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
3460 			    tagp->vio_sid);
3461 			ldcp->peer_sid = tagp->vio_sid;
3462 		}
3463 
3464 		if (ldcp->hphase != VH_PHASE1) {
3465 			/*
3466 			 * If we are not already in VH_PHASE1, reset to
3467 			 * pre-handshake state, and initiate handshake
3468 			 * to the peer too.
3469 			 */
3470 			vgen_handshake_reset(ldcp);
3471 			vgen_handshake(vh_nextphase(ldcp));
3472 		}
3473 		ldcp->hstate |= VER_INFO_RCVD;
3474 
3475 		/* save peer's requested values */
3476 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3477 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3478 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3479 
3480 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3481 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3482 			/* unsupported dev_class, send NACK */
3483 
3484 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3485 
3486 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3487 			tagp->vio_sid = ldcp->local_sid;
3488 			/* send reply msg back to peer */
3489 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3490 			    sizeof (*vermsg), B_FALSE);
3491 			if (rv != VGEN_SUCCESS) {
3492 				return (rv);
3493 			}
3494 			return (VGEN_FAILURE);
3495 		}
3496 
3497 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
3498 		    vermsg->ver_major,  vermsg->ver_minor);
3499 
3500 		idx = 0;
3501 
3502 		for (;;) {
3503 
3504 			if (vermsg->ver_major > versions[idx].ver_major) {
3505 
3506 				/* nack with next lower version */
3507 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3508 				vermsg->ver_major = versions[idx].ver_major;
3509 				vermsg->ver_minor = versions[idx].ver_minor;
3510 				break;
3511 			}
3512 
3513 			if (vermsg->ver_major == versions[idx].ver_major) {
3514 
3515 				/* major version match - ACK version */
3516 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3517 				ack = 1;
3518 
3519 				/*
3520 				 * lower minor version to the one this endpt
3521 				 * supports, if necessary
3522 				 */
3523 				if (vermsg->ver_minor >
3524 				    versions[idx].ver_minor) {
3525 					vermsg->ver_minor =
3526 					    versions[idx].ver_minor;
3527 					ldcp->peer_hparams.ver_minor =
3528 					    versions[idx].ver_minor;
3529 				}
3530 				break;
3531 			}
3532 
3533 			idx++;
3534 
3535 			if (idx == VGEN_NUM_VER) {
3536 
3537 				/* no version match - send NACK */
3538 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3539 				vermsg->ver_major = 0;
3540 				vermsg->ver_minor = 0;
3541 				failed = 1;
3542 				break;
3543 			}
3544 
3545 		}
3546 
3547 		tagp->vio_sid = ldcp->local_sid;
3548 
3549 		/* send reply msg back to peer */
3550 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3551 		    B_FALSE);
3552 		if (rv != VGEN_SUCCESS) {
3553 			return (rv);
3554 		}
3555 
3556 		if (ack) {
3557 			ldcp->hstate |= VER_ACK_SENT;
3558 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
3559 			    vermsg->ver_major, vermsg->ver_minor);
3560 		}
3561 		if (failed) {
3562 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
3563 			return (VGEN_FAILURE);
3564 		}
3565 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3566 
3567 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3568 
3569 			/* local and peer versions match? */
3570 			ASSERT((ldcp->local_hparams.ver_major ==
3571 			    ldcp->peer_hparams.ver_major) &&
3572 			    (ldcp->local_hparams.ver_minor ==
3573 			    ldcp->peer_hparams.ver_minor));
3574 
3575 			/* move to the next phase */
3576 			vgen_handshake(vh_nextphase(ldcp));
3577 		}
3578 
3579 		break;
3580 
3581 	case VIO_SUBTYPE_ACK:
3582 
3583 		if (ldcp->hphase != VH_PHASE1) {
3584 			/*  This should not happen. */
3585 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
3586 			return (VGEN_FAILURE);
3587 		}
3588 
3589 		/* SUCCESS - we have agreed on a version */
3590 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3591 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3592 		ldcp->hstate |= VER_ACK_RCVD;
3593 
3594 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
3595 		    vermsg->ver_major,  vermsg->ver_minor);
3596 
3597 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3598 
3599 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3600 
3601 			/* local and peer versions match? */
3602 			ASSERT((ldcp->local_hparams.ver_major ==
3603 			    ldcp->peer_hparams.ver_major) &&
3604 			    (ldcp->local_hparams.ver_minor ==
3605 			    ldcp->peer_hparams.ver_minor));
3606 
3607 			/* move to the next phase */
3608 			vgen_handshake(vh_nextphase(ldcp));
3609 		}
3610 		break;
3611 
3612 	case VIO_SUBTYPE_NACK:
3613 
3614 		if (ldcp->hphase != VH_PHASE1) {
3615 			/*  This should not happen.  */
3616 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
3617 			"Phase(%u)\n", ldcp->hphase);
3618 			return (VGEN_FAILURE);
3619 		}
3620 
3621 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
3622 		    vermsg->ver_major, vermsg->ver_minor);
3623 
3624 		/* check if version in NACK is zero */
3625 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3626 			/*
3627 			 * Version Negotiation has failed.
3628 			 */
3629 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3630 			return (VGEN_FAILURE);
3631 		}
3632 
3633 		idx = 0;
3634 
3635 		for (;;) {
3636 
3637 			if (vermsg->ver_major > versions[idx].ver_major) {
3638 				/* select next lower version */
3639 
3640 				ldcp->local_hparams.ver_major =
3641 				    versions[idx].ver_major;
3642 				ldcp->local_hparams.ver_minor =
3643 				    versions[idx].ver_minor;
3644 				break;
3645 			}
3646 
3647 			if (vermsg->ver_major == versions[idx].ver_major) {
3648 				/* major version match */
3649 
3650 				ldcp->local_hparams.ver_major =
3651 				    versions[idx].ver_major;
3652 
3653 				ldcp->local_hparams.ver_minor =
3654 				    versions[idx].ver_minor;
3655 				break;
3656 			}
3657 
3658 			idx++;
3659 
3660 			if (idx == VGEN_NUM_VER) {
3661 				/*
3662 				 * no version match.
3663 				 * Version Negotiation has failed.
3664 				 */
3665 				DWARN(vgenp, ldcp,
3666 				    "Version Negotiation Failed\n");
3667 				return (VGEN_FAILURE);
3668 			}
3669 
3670 		}
3671 
3672 		rv = vgen_send_version_negotiate(ldcp);
3673 		if (rv != VGEN_SUCCESS) {
3674 			return (rv);
3675 		}
3676 
3677 		break;
3678 	}
3679 
3680 	DBG1(vgenp, ldcp, "exit\n");
3681 	return (VGEN_SUCCESS);
3682 }
3683 
3684 /* Check if the attributes are supported */
3685 static int
3686 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
3687 {
3688 	_NOTE(ARGUNUSED(ldcp))
3689 
3690 	/*
3691 	 * currently, we support these attr values:
3692 	 * mtu of ethernet, addr_type of mac, xfer_mode of
3693 	 * ldc shared memory, ack_freq of 0 (data is acked if
3694 	 * the ack bit is set in the descriptor) and the address should
3695 	 * match the address in the port node.
3696 	 */
3697 	if ((msg->mtu != ETHERMAX) ||
3698 	    (msg->addr_type != ADDR_TYPE_MAC) ||
3699 	    (msg->xfer_mode != VIO_DRING_MODE) ||
3700 	    (msg->ack_freq > 64)) {
3701 		return (VGEN_FAILURE);
3702 	}
3703 
3704 	return (VGEN_SUCCESS);
3705 }
3706 
3707 /*
3708  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
3709  * to an attr info msg that we sent.
3710  */
3711 static int
3712 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3713 {
3714 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3715 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
3716 	int		ack = 0;
3717 	int		rv = 0;
3718 
3719 	DBG1(vgenp, ldcp, "enter\n");
3720 	if (ldcp->hphase != VH_PHASE2) {
3721 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
3722 		" Invalid Phase(%u)\n",
3723 		    tagp->vio_subtype, ldcp->hphase);
3724 		return (VGEN_FAILURE);
3725 	}
3726 	switch (tagp->vio_subtype) {
3727 	case VIO_SUBTYPE_INFO:
3728 
3729 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
3730 		ldcp->hstate |= ATTR_INFO_RCVD;
3731 
3732 		/* save peer's values */
3733 		ldcp->peer_hparams.mtu = attrmsg->mtu;
3734 		ldcp->peer_hparams.addr = attrmsg->addr;
3735 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
3736 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
3737 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
3738 
3739 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
3740 			/* unsupported attr, send NACK */
3741 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3742 		} else {
3743 			ack = 1;
3744 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3745 		}
3746 		tagp->vio_sid = ldcp->local_sid;
3747 
3748 		/* send reply msg back to peer */
3749 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
3750 		    B_FALSE);
3751 		if (rv != VGEN_SUCCESS) {
3752 			return (rv);
3753 		}
3754 
3755 		if (ack) {
3756 			ldcp->hstate |= ATTR_ACK_SENT;
3757 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
3758 		} else {
3759 			/* failed */
3760 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
3761 			return (VGEN_FAILURE);
3762 		}
3763 
3764 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3765 			vgen_handshake(vh_nextphase(ldcp));
3766 		}
3767 
3768 		break;
3769 
3770 	case VIO_SUBTYPE_ACK:
3771 
3772 		ldcp->hstate |= ATTR_ACK_RCVD;
3773 
3774 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
3775 
3776 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3777 			vgen_handshake(vh_nextphase(ldcp));
3778 		}
3779 		break;
3780 
3781 	case VIO_SUBTYPE_NACK:
3782 
3783 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
3784 		return (VGEN_FAILURE);
3785 	}
3786 	DBG1(vgenp, ldcp, "exit\n");
3787 	return (VGEN_SUCCESS);
3788 }
3789 
3790 /* Check if the dring info msg is ok */
3791 static int
3792 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
3793 {
3794 	/* check if msg contents are ok */
3795 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
3796 	    sizeof (vnet_public_desc_t))) {
3797 		return (VGEN_FAILURE);
3798 	}
3799 	return (VGEN_SUCCESS);
3800 }
3801 
3802 /*
3803  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
3804  * the peer to a dring register msg that we sent.
3805  */
3806 static int
3807 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3808 {
3809 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
3810 	ldc_mem_cookie_t dcookie;
3811 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3812 	int ack = 0;
3813 	int rv = 0;
3814 
3815 	DBG1(vgenp, ldcp, "enter\n");
3816 	if (ldcp->hphase < VH_PHASE2) {
3817 		/* dring_info can be rcvd in any of the phases after Phase1 */
3818 		DWARN(vgenp, ldcp,
3819 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
3820 		    tagp->vio_subtype, ldcp->hphase);
3821 		return (VGEN_FAILURE);
3822 	}
3823 	switch (tagp->vio_subtype) {
3824 	case VIO_SUBTYPE_INFO:
3825 
3826 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
3827 		ldcp->hstate |= DRING_INFO_RCVD;
3828 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
3829 
3830 		ASSERT(msg->ncookies == 1);
3831 
3832 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
3833 			/*
3834 			 * verified dring info msg to be ok,
3835 			 * now try to map the remote dring.
3836 			 */
3837 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
3838 			    msg->descriptor_size, &dcookie,
3839 			    msg->ncookies);
3840 			if (rv == DDI_SUCCESS) {
3841 				/* now we can ack the peer */
3842 				ack = 1;
3843 			}
3844 		}
3845 		if (ack == 0) {
3846 			/* failed, send NACK */
3847 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3848 		} else {
3849 			if (!(ldcp->peer_hparams.dring_ready)) {
3850 
3851 				/* save peer's dring_info values */
3852 				bcopy(&dcookie,
3853 				    &(ldcp->peer_hparams.dring_cookie),
3854 				    sizeof (dcookie));
3855 				ldcp->peer_hparams.num_desc =
3856 				    msg->num_descriptors;
3857 				ldcp->peer_hparams.desc_size =
3858 				    msg->descriptor_size;
3859 				ldcp->peer_hparams.num_dcookies =
3860 				    msg->ncookies;
3861 
3862 				/* set dring_ident for the peer */
3863 				ldcp->peer_hparams.dring_ident =
3864 				    (uint64_t)ldcp->rxdp;
3865 				/* return the dring_ident in ack msg */
3866 				msg->dring_ident =
3867 				    (uint64_t)ldcp->rxdp;
3868 
3869 				ldcp->peer_hparams.dring_ready = B_TRUE;
3870 			}
3871 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3872 		}
3873 		tagp->vio_sid = ldcp->local_sid;
3874 		/* send reply msg back to peer */
3875 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
3876 		    B_FALSE);
3877 		if (rv != VGEN_SUCCESS) {
3878 			return (rv);
3879 		}
3880 
3881 		if (ack) {
3882 			ldcp->hstate |= DRING_ACK_SENT;
3883 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
3884 		} else {
3885 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
3886 			return (VGEN_FAILURE);
3887 		}
3888 
3889 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3890 			vgen_handshake(vh_nextphase(ldcp));
3891 		}
3892 
3893 		break;
3894 
3895 	case VIO_SUBTYPE_ACK:
3896 
3897 		ldcp->hstate |= DRING_ACK_RCVD;
3898 
3899 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
3900 
3901 		if (!(ldcp->local_hparams.dring_ready)) {
3902 			/* local dring is now ready */
3903 			ldcp->local_hparams.dring_ready = B_TRUE;
3904 
3905 			/* save dring_ident acked by peer */
3906 			ldcp->local_hparams.dring_ident =
3907 			    msg->dring_ident;
3908 		}
3909 
3910 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3911 			vgen_handshake(vh_nextphase(ldcp));
3912 		}
3913 
3914 		break;
3915 
3916 	case VIO_SUBTYPE_NACK:
3917 
3918 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
3919 		return (VGEN_FAILURE);
3920 	}
3921 	DBG1(vgenp, ldcp, "exit\n");
3922 	return (VGEN_SUCCESS);
3923 }
3924 
3925 /*
3926  * Handle a rdx info msg from the peer or an ACK/NACK
3927  * from the peer to a rdx info msg that we sent.
3928  */
3929 static int
3930 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3931 {
3932 	int rv = 0;
3933 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3934 
3935 	DBG1(vgenp, ldcp, "enter\n");
3936 	if (ldcp->hphase != VH_PHASE3) {
3937 		DWARN(vgenp, ldcp,
3938 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
3939 		    tagp->vio_subtype, ldcp->hphase);
3940 		return (VGEN_FAILURE);
3941 	}
3942 	switch (tagp->vio_subtype) {
3943 	case VIO_SUBTYPE_INFO:
3944 
3945 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
3946 		ldcp->hstate |= RDX_INFO_RCVD;
3947 
3948 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
3949 		tagp->vio_sid = ldcp->local_sid;
3950 		/* send reply msg back to peer */
3951 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
3952 		    B_FALSE);
3953 		if (rv != VGEN_SUCCESS) {
3954 			return (rv);
3955 		}
3956 
3957 		ldcp->hstate |= RDX_ACK_SENT;
3958 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
3959 
3960 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3961 			vgen_handshake(vh_nextphase(ldcp));
3962 		}
3963 
3964 		break;
3965 
3966 	case VIO_SUBTYPE_ACK:
3967 
3968 		ldcp->hstate |= RDX_ACK_RCVD;
3969 
3970 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
3971 
3972 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3973 			vgen_handshake(vh_nextphase(ldcp));
3974 		}
3975 		break;
3976 
3977 	case VIO_SUBTYPE_NACK:
3978 
3979 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
3980 		return (VGEN_FAILURE);
3981 	}
3982 	DBG1(vgenp, ldcp, "exit\n");
3983 	return (VGEN_SUCCESS);
3984 }
3985 
3986 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
3987 static int
3988 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3989 {
3990 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3991 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
3992 	struct ether_addr *addrp;
3993 	int count;
3994 	int i;
3995 
3996 	DBG1(vgenp, ldcp, "enter\n");
3997 	switch (tagp->vio_subtype) {
3998 
3999 	case VIO_SUBTYPE_INFO:
4000 
4001 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
4002 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
4003 		break;
4004 
4005 	case VIO_SUBTYPE_ACK:
4006 
4007 		/* success adding/removing multicast addr */
4008 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
4009 		break;
4010 
4011 	case VIO_SUBTYPE_NACK:
4012 
4013 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
4014 		if (!(msgp->set)) {
4015 			/* multicast remove request failed */
4016 			break;
4017 		}
4018 
4019 		/* multicast add request failed */
4020 		for (count = 0; count < msgp->count; count++) {
4021 			addrp = &(msgp->mca[count]);
4022 
4023 			/* delete address from the table */
4024 			for (i = 0; i < vgenp->mccount; i++) {
4025 				if (ether_cmp(addrp,
4026 				    &(vgenp->mctab[i])) == 0) {
4027 					if (vgenp->mccount > 1) {
4028 						int t = vgenp->mccount - 1;
4029 						vgenp->mctab[i] =
4030 						    vgenp->mctab[t];
4031 					}
4032 					vgenp->mccount--;
4033 					break;
4034 				}
4035 			}
4036 		}
4037 		break;
4038 
4039 	}
4040 	DBG1(vgenp, ldcp, "exit\n");
4041 
4042 	return (VGEN_SUCCESS);
4043 }
4044 
4045 /* handler for control messages received from the peer ldc end-point */
4046 static int
4047 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4048 {
4049 	int rv = 0;
4050 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4051 
4052 	DBG1(vgenp, ldcp, "enter\n");
4053 	switch (tagp->vio_subtype_env) {
4054 
4055 	case VIO_VER_INFO:
4056 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4057 		break;
4058 
4059 	case VIO_ATTR_INFO:
4060 		rv = vgen_handle_attr_info(ldcp, tagp);
4061 		break;
4062 
4063 	case VIO_DRING_REG:
4064 		rv = vgen_handle_dring_reg(ldcp, tagp);
4065 		break;
4066 
4067 	case VIO_RDX:
4068 		rv = vgen_handle_rdx_info(ldcp, tagp);
4069 		break;
4070 
4071 	case VNET_MCAST_INFO:
4072 		rv = vgen_handle_mcast_info(ldcp, tagp);
4073 		break;
4074 
4075 	}
4076 
4077 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4078 	return (rv);
4079 }
4080 
4081 /* handler for data messages received from the peer ldc end-point */
4082 static int
4083 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4084 {
4085 	int rv = 0;
4086 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4087 
4088 	DBG1(vgenp, ldcp, "enter\n");
4089 
4090 	if (ldcp->hphase != VH_DONE)
4091 		return (rv);
4092 	switch (tagp->vio_subtype_env) {
4093 	case VIO_DRING_DATA:
4094 		rv = vgen_handle_dring_data(ldcp, tagp);
4095 		break;
4096 	default:
4097 		break;
4098 	}
4099 
4100 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4101 	return (rv);
4102 }
4103 
4104 static int
4105 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4106     int32_t end, uint8_t pstate)
4107 {
4108 	int rv = 0;
4109 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4110 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4111 
4112 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4113 	tagp->vio_sid = ldcp->local_sid;
4114 	msgp->start_idx = start;
4115 	msgp->end_idx = end;
4116 	msgp->dring_process_state = pstate;
4117 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4118 	if (rv != VGEN_SUCCESS) {
4119 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4120 	}
4121 	return (rv);
4122 }
4123 
4124 static int
4125 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4126 {
4127 	int rv = 0;
4128 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4129 
4130 
4131 	DBG1(vgenp, ldcp, "enter\n");
4132 	switch (tagp->vio_subtype) {
4133 
4134 	case VIO_SUBTYPE_INFO:
4135 		/*
4136 		 * To reduce the locking contention, release the
4137 		 * cblock here and re-acquire it once we are done
4138 		 * receiving packets.
4139 		 */
4140 		mutex_exit(&ldcp->cblock);
4141 		mutex_enter(&ldcp->rxlock);
4142 		rv = vgen_handle_dring_data_info(ldcp, tagp);
4143 		mutex_exit(&ldcp->rxlock);
4144 		mutex_enter(&ldcp->cblock);
4145 		break;
4146 
4147 	case VIO_SUBTYPE_ACK:
4148 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
4149 		break;
4150 
4151 	case VIO_SUBTYPE_NACK:
4152 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
4153 		break;
4154 	}
4155 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4156 	return (rv);
4157 }
4158 
4159 static int
4160 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4161 {
4162 	uint32_t start;
4163 	int32_t end;
4164 	int rv = 0;
4165 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4166 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4167 #ifdef VGEN_HANDLE_LOST_PKTS
4168 	vgen_stats_t *statsp = &ldcp->stats;
4169 	uint32_t rxi;
4170 	int n;
4171 #endif
4172 
4173 	DBG1(vgenp, ldcp, "enter\n");
4174 
4175 	start = dringmsg->start_idx;
4176 	end = dringmsg->end_idx;
4177 	/*
4178 	 * received a data msg, which contains the start and end
4179 	 * indices of the descriptors within the rx ring holding data,
4180 	 * the seq_num of data packet corresponding to the start index,
4181 	 * and the dring_ident.
4182 	 * We can now read the contents of each of these descriptors
4183 	 * and gather data from it.
4184 	 */
4185 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
4186 	    start, end);
4187 
4188 	/* validate rx start and end indeces */
4189 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4190 	    !(CHECK_RXI(end, ldcp)))) {
4191 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
4192 		    start, end);
4193 		/* drop the message if invalid index */
4194 		return (rv);
4195 	}
4196 
4197 	/* validate dring_ident */
4198 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4199 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4200 		    dringmsg->dring_ident);
4201 		/* invalid dring_ident, drop the msg */
4202 		return (rv);
4203 	}
4204 #ifdef DEBUG
4205 	if (vgen_trigger_rxlost) {
4206 		/* drop this msg to simulate lost pkts for debugging */
4207 		vgen_trigger_rxlost = 0;
4208 		return (rv);
4209 	}
4210 #endif
4211 
4212 #ifdef	VGEN_HANDLE_LOST_PKTS
4213 
4214 	/* receive start index doesn't match expected index */
4215 	if (ldcp->next_rxi != start) {
4216 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
4217 		    ldcp->next_rxi, start);
4218 
4219 		/* calculate the number of pkts lost */
4220 		if (start >= ldcp->next_rxi) {
4221 			n = start - ldcp->next_rxi;
4222 		} else  {
4223 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
4224 		}
4225 
4226 		/*
4227 		 * sequence number of dring data message
4228 		 * is less than the next sequence number that
4229 		 * is expected:
4230 		 *
4231 		 * drop the message and the corresponding packets.
4232 		 */
4233 		if (ldcp->next_rxseq > dringmsg->seq_num) {
4234 			DWARN(vgenp, ldcp, "dropping pkts, expected "
4235 			"rxseq(0x%lx) > recvd(0x%lx)\n",
4236 			    ldcp->next_rxseq, dringmsg->seq_num);
4237 			/*
4238 			 * duplicate/multiple retransmissions from
4239 			 * sender?? drop this msg.
4240 			 */
4241 			return (rv);
4242 		}
4243 
4244 		/*
4245 		 * sequence number of dring data message
4246 		 * is greater than the next expected sequence number
4247 		 *
4248 		 * send a NACK back to the peer to indicate lost
4249 		 * packets.
4250 		 */
4251 		if (dringmsg->seq_num > ldcp->next_rxseq) {
4252 			statsp->rx_lost_pkts += n;
4253 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4254 			tagp->vio_sid = ldcp->local_sid;
4255 			/* indicate the range of lost descriptors */
4256 			dringmsg->start_idx = ldcp->next_rxi;
4257 			rxi = start;
4258 			DECR_RXI(rxi, ldcp);
4259 			dringmsg->end_idx = rxi;
4260 			/* dring ident is left unchanged */
4261 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4262 			    sizeof (*dringmsg), B_FALSE);
4263 			if (rv != VGEN_SUCCESS) {
4264 				DWARN(vgenp, ldcp,
4265 				    "vgen_sendmsg failed, stype:NACK\n");
4266 				return (rv);
4267 			}
4268 #ifdef VGEN_REXMIT
4269 			/*
4270 			 * stop further processing until peer
4271 			 * retransmits with the right index.
4272 			 * update next_rxseq expected.
4273 			 */
4274 			ldcp->next_rxseq += 1;
4275 			return (rv);
4276 #else	/* VGEN_REXMIT */
4277 			/*
4278 			 * treat this range of descrs/pkts as dropped
4279 			 * and set the new expected values for next_rxi
4280 			 * and next_rxseq. continue(below) to process
4281 			 * from the new start index.
4282 			 */
4283 			ldcp->next_rxi = start;
4284 			ldcp->next_rxseq += 1;
4285 #endif	/* VGEN_REXMIT */
4286 
4287 		} else if (dringmsg->seq_num == ldcp->next_rxseq) {
4288 			/*
4289 			 * expected and received seqnums match, but
4290 			 * the descriptor indeces don't?
4291 			 *
4292 			 * restart handshake with peer.
4293 			 */
4294 			DWARN(vgenp, ldcp, "next_rxseq(0x%lx)=="
4295 			    "seq_num(0x%lx)\n", ldcp->next_rxseq,
4296 			    dringmsg->seq_num);
4297 
4298 		}
4299 
4300 	} else {
4301 		/* expected and start dring indeces match */
4302 
4303 		if (dringmsg->seq_num != ldcp->next_rxseq) {
4304 
4305 			/* seqnums don't match */
4306 
4307 			DWARN(vgenp, ldcp,
4308 			    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
4309 			    ldcp->next_rxseq, dringmsg->seq_num);
4310 		}
4311 	}
4312 
4313 #endif	/* VGEN_HANDLE_LOST_PKTS */
4314 
4315 	/* Now receive messages */
4316 	rv = vgen_process_dring_data(ldcp, tagp);
4317 
4318 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4319 	return (rv);
4320 }
4321 
4322 static int
4323 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4324 {
4325 	boolean_t set_ack_start = B_FALSE;
4326 	uint32_t start;
4327 	uint32_t ack_end;
4328 	uint32_t next_rxi;
4329 	uint32_t rxi;
4330 	int count = 0;
4331 	int rv = 0;
4332 	uint32_t retries = 0;
4333 	vgen_stats_t *statsp;
4334 	vnet_public_desc_t *rxdp;
4335 	vio_dring_entry_hdr_t *hdrp;
4336 	mblk_t *bp = NULL;
4337 	mblk_t *bpt = NULL;
4338 	uint32_t ack_start;
4339 	uint32_t datalen;
4340 	uint32_t ncookies;
4341 	boolean_t rxd_err = B_FALSE;
4342 	mblk_t *mp = NULL;
4343 	size_t nbytes;
4344 	boolean_t ack_needed = B_FALSE;
4345 	size_t nread;
4346 	uint64_t off = 0;
4347 	struct ether_header *ehp;
4348 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4349 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4350 
4351 	DBG1(vgenp, ldcp, "enter\n");
4352 
4353 	statsp = &ldcp->stats;
4354 	start = dringmsg->start_idx;
4355 
4356 	/*
4357 	 * start processing the descriptors from the specified
4358 	 * start index, up to the index a descriptor is not ready
4359 	 * to be processed or we process the entire descriptor ring
4360 	 * and wrap around upto the start index.
4361 	 */
4362 
4363 	/* need to set the start index of descriptors to be ack'd */
4364 	set_ack_start = B_TRUE;
4365 
4366 	/* index upto which we have ack'd */
4367 	ack_end = start;
4368 	DECR_RXI(ack_end, ldcp);
4369 
4370 	next_rxi = rxi =  start;
4371 	do {
4372 vgen_recv_retry:
4373 		rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4374 		if (rv != 0) {
4375 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
4376 			    " rv(%d)\n", rv);
4377 			statsp->ierrors++;
4378 			return (rv);
4379 		}
4380 
4381 		rxdp = &(ldcp->rxdp[rxi]);
4382 		hdrp = &rxdp->hdr;
4383 
4384 		if (hdrp->dstate != VIO_DESC_READY) {
4385 			/*
4386 			 * Before waiting and retry here, queue
4387 			 * the messages that are received already.
4388 			 * This will help the soft interrupt to
4389 			 * send them up with less latency.
4390 			 */
4391 			if (bp != NULL) {
4392 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4393 				vgen_ldc_queue_data(ldcp, bp, bpt);
4394 				count = 0;
4395 				bp = bpt = NULL;
4396 			}
4397 			/*
4398 			 * descriptor is not ready.
4399 			 * retry descriptor acquire, stop processing
4400 			 * after max # retries.
4401 			 */
4402 			if (retries == vgen_recv_retries)
4403 				break;
4404 			retries++;
4405 			drv_usecwait(vgen_recv_delay);
4406 			goto vgen_recv_retry;
4407 		}
4408 		retries = 0;
4409 
4410 		if (set_ack_start) {
4411 			/*
4412 			 * initialize the start index of the range
4413 			 * of descriptors to be ack'd.
4414 			 */
4415 			ack_start = rxi;
4416 			set_ack_start = B_FALSE;
4417 		}
4418 
4419 		datalen = rxdp->nbytes;
4420 		ncookies = rxdp->ncookies;
4421 		if ((datalen < ETHERMIN) ||
4422 		    (ncookies == 0) ||
4423 		    (ncookies > MAX_COOKIES)) {
4424 			rxd_err = B_TRUE;
4425 		} else {
4426 			/*
4427 			 * Try to allocate an mblk from the free pool
4428 			 * of recv mblks for the channel.
4429 			 * If this fails, use allocb().
4430 			 */
4431 			nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4432 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
4433 			if (!mp) {
4434 				/*
4435 				 * The data buffer returned by
4436 				 * allocb(9F) is 8byte aligned. We
4437 				 * allocate extra 8 bytes to ensure
4438 				 * size is multiple of 8 bytes for
4439 				 * ldc_mem_copy().
4440 				 */
4441 				statsp->rx_vio_allocb_fail++;
4442 				mp = allocb(VNET_IPALIGN + datalen + 8,
4443 				    BPRI_MED);
4444 			}
4445 		}
4446 		if ((rxd_err) || (mp == NULL)) {
4447 			/*
4448 			 * rxd_err or allocb() failure,
4449 			 * drop this packet, get next.
4450 			 */
4451 			if (rxd_err) {
4452 				statsp->ierrors++;
4453 				rxd_err = B_FALSE;
4454 			} else {
4455 				statsp->rx_allocb_fail++;
4456 			}
4457 
4458 			ack_needed = hdrp->ack;
4459 
4460 			/* set descriptor done bit */
4461 			hdrp->dstate = VIO_DESC_DONE;
4462 
4463 			rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4464 			    rxi, rxi);
4465 			if (rv != 0) {
4466 				DWARN(vgenp, ldcp,
4467 				    "ldc_mem_dring_release err rv(%d)\n", rv);
4468 				return (rv);
4469 			}
4470 
4471 			if (ack_needed) {
4472 				ack_needed = B_FALSE;
4473 				/*
4474 				 * sender needs ack for this packet,
4475 				 * ack pkts upto this index.
4476 				 */
4477 				ack_end = rxi;
4478 
4479 				rv = vgen_send_dring_ack(ldcp, tagp,
4480 				    ack_start, ack_end,
4481 				    VIO_DP_ACTIVE);
4482 				if (rv != VGEN_SUCCESS) {
4483 					goto error_ret;
4484 				}
4485 
4486 				/* need to set new ack start index */
4487 				set_ack_start = B_TRUE;
4488 			}
4489 			goto vgen_next_rxi;
4490 		}
4491 
4492 		nread = nbytes;
4493 		rv = ldc_mem_copy(ldcp->ldc_handle,
4494 		    (caddr_t)mp->b_rptr, off, &nread,
4495 		    rxdp->memcookie, ncookies, LDC_COPY_IN);
4496 
4497 		/* if ldc_mem_copy() failed */
4498 		if (rv) {
4499 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
4500 			statsp->ierrors++;
4501 			freemsg(mp);
4502 			goto error_ret;
4503 		}
4504 
4505 		ack_needed = hdrp->ack;
4506 		hdrp->dstate = VIO_DESC_DONE;
4507 
4508 		rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4509 		if (rv != 0) {
4510 			DWARN(vgenp, ldcp,
4511 			    "ldc_mem_dring_release err rv(%d)\n", rv);
4512 			goto error_ret;
4513 		}
4514 
4515 		mp->b_rptr += VNET_IPALIGN;
4516 
4517 		if (ack_needed) {
4518 			ack_needed = B_FALSE;
4519 			/*
4520 			 * sender needs ack for this packet,
4521 			 * ack pkts upto this index.
4522 			 */
4523 			ack_end = rxi;
4524 
4525 			rv = vgen_send_dring_ack(ldcp, tagp,
4526 			    ack_start, ack_end, VIO_DP_ACTIVE);
4527 			if (rv != VGEN_SUCCESS) {
4528 				goto error_ret;
4529 			}
4530 
4531 			/* need to set new ack start index */
4532 			set_ack_start = B_TRUE;
4533 		}
4534 
4535 		if (nread != nbytes) {
4536 			DWARN(vgenp, ldcp,
4537 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4538 			    nread, nbytes);
4539 			statsp->ierrors++;
4540 			freemsg(mp);
4541 			goto vgen_next_rxi;
4542 		}
4543 
4544 		/* point to the actual end of data */
4545 		mp->b_wptr = mp->b_rptr + datalen;
4546 
4547 		/* update stats */
4548 		statsp->ipackets++;
4549 		statsp->rbytes += datalen;
4550 		ehp = (struct ether_header *)mp->b_rptr;
4551 		if (IS_BROADCAST(ehp))
4552 			statsp->brdcstrcv++;
4553 		else if (IS_MULTICAST(ehp))
4554 			statsp->multircv++;
4555 
4556 		/* build a chain of received packets */
4557 		if (bp == NULL) {
4558 			/* first pkt */
4559 			bp = mp;
4560 			bpt = bp;
4561 			bpt->b_next = NULL;
4562 		} else {
4563 			mp->b_next = NULL;
4564 			bpt->b_next = mp;
4565 			bpt = mp;
4566 		}
4567 
4568 		if (count++ > vgen_chain_len) {
4569 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4570 			vgen_ldc_queue_data(ldcp, bp, bpt);
4571 			count = 0;
4572 			bp = bpt = NULL;
4573 		}
4574 
4575 vgen_next_rxi:
4576 		/* update end index of range of descrs to be ack'd */
4577 		ack_end = rxi;
4578 
4579 		/* update the next index to be processed */
4580 		INCR_RXI(next_rxi, ldcp);
4581 		if (next_rxi == start) {
4582 			/*
4583 			 * processed the entire descriptor ring upto
4584 			 * the index at which we started.
4585 			 */
4586 			break;
4587 		}
4588 
4589 		rxi = next_rxi;
4590 
4591 	_NOTE(CONSTCOND)
4592 	} while (1);
4593 
4594 	/*
4595 	 * send an ack message to peer indicating that we have stopped
4596 	 * processing descriptors.
4597 	 */
4598 	if (set_ack_start) {
4599 		/*
4600 		 * We have ack'd upto some index and we have not
4601 		 * processed any descriptors beyond that index.
4602 		 * Use the last ack'd index as both the start and
4603 		 * end of range of descrs being ack'd.
4604 		 * Note: This results in acking the last index twice
4605 		 * and should be harmless.
4606 		 */
4607 		ack_start = ack_end;
4608 	}
4609 
4610 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4611 	    VIO_DP_STOPPED);
4612 	if (rv != VGEN_SUCCESS) {
4613 		goto error_ret;
4614 	}
4615 
4616 	/* save new recv index and expected seqnum of next dring msg */
4617 	ldcp->next_rxi = next_rxi;
4618 	ldcp->next_rxseq += 1;
4619 
4620 error_ret:
4621 	/* queue the packets received so far */
4622 	if (bp != NULL) {
4623 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4624 		vgen_ldc_queue_data(ldcp, bp, bpt);
4625 		bp = bpt = NULL;
4626 	}
4627 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4628 	return (rv);
4629 
4630 }
4631 
4632 static int
4633 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4634 {
4635 	int rv = 0;
4636 	uint32_t start;
4637 	int32_t end;
4638 	uint32_t txi;
4639 	boolean_t ready_txd = B_FALSE;
4640 	vgen_stats_t *statsp;
4641 	vgen_private_desc_t *tbufp;
4642 	vnet_public_desc_t *txdp;
4643 	vio_dring_entry_hdr_t *hdrp;
4644 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4645 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4646 
4647 	DBG1(vgenp, ldcp, "enter\n");
4648 	start = dringmsg->start_idx;
4649 	end = dringmsg->end_idx;
4650 	statsp = &ldcp->stats;
4651 
4652 	/*
4653 	 * received an ack corresponding to a specific descriptor for
4654 	 * which we had set the ACK bit in the descriptor (during
4655 	 * transmit). This enables us to reclaim descriptors.
4656 	 */
4657 
4658 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
4659 
4660 	/* validate start and end indeces in the tx ack msg */
4661 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4662 		/* drop the message if invalid index */
4663 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
4664 		    start, end);
4665 		return (rv);
4666 	}
4667 	/* validate dring_ident */
4668 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4669 		/* invalid dring_ident, drop the msg */
4670 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4671 		    dringmsg->dring_ident);
4672 		return (rv);
4673 	}
4674 	statsp->dring_data_acks++;
4675 
4676 	/* reclaim descriptors that are done */
4677 	vgen_reclaim(ldcp);
4678 
4679 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
4680 		/*
4681 		 * receiver continued processing descriptors after
4682 		 * sending us the ack.
4683 		 */
4684 		return (rv);
4685 	}
4686 
4687 	statsp->dring_stopped_acks++;
4688 
4689 	/* receiver stopped processing descriptors */
4690 	mutex_enter(&ldcp->wrlock);
4691 	mutex_enter(&ldcp->tclock);
4692 
4693 	/*
4694 	 * determine if there are any pending tx descriptors
4695 	 * ready to be processed by the receiver(peer) and if so,
4696 	 * send a message to the peer to restart receiving.
4697 	 */
4698 	ready_txd = B_FALSE;
4699 
4700 	/*
4701 	 * using the end index of the descriptor range for which
4702 	 * we received the ack, check if the next descriptor is
4703 	 * ready.
4704 	 */
4705 	txi = end;
4706 	INCR_TXI(txi, ldcp);
4707 	tbufp = &ldcp->tbufp[txi];
4708 	txdp = tbufp->descp;
4709 	hdrp = &txdp->hdr;
4710 	if (hdrp->dstate == VIO_DESC_READY) {
4711 		ready_txd = B_TRUE;
4712 	} else {
4713 		/*
4714 		 * descr next to the end of ack'd descr range is not
4715 		 * ready.
4716 		 * starting from the current reclaim index, check
4717 		 * if any descriptor is ready.
4718 		 */
4719 
4720 		txi = ldcp->cur_tbufp - ldcp->tbufp;
4721 		tbufp = &ldcp->tbufp[txi];
4722 
4723 		txdp = tbufp->descp;
4724 		hdrp = &txdp->hdr;
4725 		if (hdrp->dstate == VIO_DESC_READY) {
4726 			ready_txd = B_TRUE;
4727 		}
4728 
4729 	}
4730 
4731 	if (ready_txd) {
4732 		/*
4733 		 * we have tx descriptor(s) ready to be
4734 		 * processed by the receiver.
4735 		 * send a message to the peer with the start index
4736 		 * of ready descriptors.
4737 		 */
4738 		rv = vgen_send_dring_data(ldcp, txi, -1);
4739 		if (rv != VGEN_SUCCESS) {
4740 			ldcp->resched_peer = B_TRUE;
4741 			ldcp->resched_peer_txi = txi;
4742 			mutex_exit(&ldcp->tclock);
4743 			mutex_exit(&ldcp->wrlock);
4744 			return (rv);
4745 		}
4746 	} else {
4747 		/*
4748 		 * no ready tx descriptors. set the flag to send a
4749 		 * message to peer when tx descriptors are ready in
4750 		 * transmit routine.
4751 		 */
4752 		ldcp->resched_peer = B_TRUE;
4753 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
4754 	}
4755 
4756 	mutex_exit(&ldcp->tclock);
4757 	mutex_exit(&ldcp->wrlock);
4758 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4759 	return (rv);
4760 }
4761 
4762 static int
4763 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4764 {
4765 	int rv = 0;
4766 	uint32_t start;
4767 	int32_t end;
4768 	uint32_t txi;
4769 	vnet_public_desc_t *txdp;
4770 	vio_dring_entry_hdr_t *hdrp;
4771 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4772 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4773 #ifdef VGEN_REXMIT
4774 	vgen_stats_t *statsp = &ldcp->stats;
4775 #endif
4776 
4777 	DBG1(vgenp, ldcp, "enter\n");
4778 	start = dringmsg->start_idx;
4779 	end = dringmsg->end_idx;
4780 
4781 	/*
4782 	 * peer sent a NACK msg to indicate lost packets.
4783 	 * The start and end correspond to the range of descriptors
4784 	 * for which the peer didn't receive a dring data msg and so
4785 	 * didn't receive the corresponding data.
4786 	 */
4787 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
4788 
4789 	/* validate start and end indeces in the tx nack msg */
4790 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4791 		/* drop the message if invalid index */
4792 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
4793 		    start, end);
4794 		return (rv);
4795 	}
4796 	/* validate dring_ident */
4797 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4798 		/* invalid dring_ident, drop the msg */
4799 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4800 		    dringmsg->dring_ident);
4801 		return (rv);
4802 	}
4803 	mutex_enter(&ldcp->txlock);
4804 	mutex_enter(&ldcp->tclock);
4805 
4806 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
4807 		/* no busy descriptors, bogus nack ? */
4808 		mutex_exit(&ldcp->tclock);
4809 		mutex_exit(&ldcp->txlock);
4810 		return (rv);
4811 	}
4812 
4813 #ifdef VGEN_REXMIT
4814 	/* send a new dring data msg including the lost descrs */
4815 	end = ldcp->next_tbufp - ldcp->tbufp;
4816 	DECR_TXI(end, ldcp);
4817 	rv = vgen_send_dring_data(ldcp, start, end);
4818 	if (rv != 0) {
4819 		/*
4820 		 * vgen_send_dring_data() error: drop all packets
4821 		 * in this descr range
4822 		 */
4823 		DWARN(vgenp, ldcp, "vgen_send_dring_data failed: rv(%d)\n", rv);
4824 		for (txi = start; txi <= end; ) {
4825 			tbufp = &(ldcp->tbufp[txi]);
4826 			txdp = tbufp->descp;
4827 			hdrp = &txdp->hdr;
4828 			tbufp->flags = VGEN_PRIV_DESC_FREE;
4829 			hdrp->dstate = VIO_DESC_FREE;
4830 			hdrp->ack = B_FALSE;
4831 			statsp->oerrors++;
4832 		}
4833 
4834 		/* update next pointer */
4835 		ldcp->next_tbufp = &(ldcp->tbufp[start]);
4836 		ldcp->next_txi = start;
4837 	}
4838 	DBG2(vgenp, ldcp, "rexmit: start(%d) end(%d)\n", start, end);
4839 #else	/* VGEN_REXMIT */
4840 	/* we just mark the descrs as done so they can be reclaimed */
4841 	for (txi = start; txi <= end; ) {
4842 		txdp = &(ldcp->txdp[txi]);
4843 		hdrp = &txdp->hdr;
4844 		if (hdrp->dstate == VIO_DESC_READY)
4845 			hdrp->dstate = VIO_DESC_DONE;
4846 		INCR_TXI(txi, ldcp);
4847 	}
4848 #endif	/* VGEN_REXMIT */
4849 	mutex_exit(&ldcp->tclock);
4850 	mutex_exit(&ldcp->txlock);
4851 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4852 	return (rv);
4853 }
4854 
4855 static void
4856 vgen_reclaim(vgen_ldc_t *ldcp)
4857 {
4858 	mutex_enter(&ldcp->tclock);
4859 
4860 	vgen_reclaim_dring(ldcp);
4861 	ldcp->reclaim_lbolt = ddi_get_lbolt();
4862 
4863 	mutex_exit(&ldcp->tclock);
4864 }
4865 
4866 /*
4867  * transmit reclaim function. starting from the current reclaim index
4868  * look for descriptors marked DONE and reclaim the descriptor and the
4869  * corresponding buffers (tbuf).
4870  */
4871 static void
4872 vgen_reclaim_dring(vgen_ldc_t *ldcp)
4873 {
4874 	int count = 0;
4875 	vnet_public_desc_t *txdp;
4876 	vgen_private_desc_t *tbufp;
4877 	vio_dring_entry_hdr_t	*hdrp;
4878 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4879 
4880 #ifdef DEBUG
4881 	if (vgen_trigger_txtimeout)
4882 		return;
4883 #endif
4884 
4885 	tbufp = ldcp->cur_tbufp;
4886 	txdp = tbufp->descp;
4887 	hdrp = &txdp->hdr;
4888 
4889 	while ((hdrp->dstate == VIO_DESC_DONE) &&
4890 	    (tbufp != ldcp->next_tbufp)) {
4891 		tbufp->flags = VGEN_PRIV_DESC_FREE;
4892 		hdrp->dstate = VIO_DESC_FREE;
4893 		hdrp->ack = B_FALSE;
4894 
4895 		tbufp = NEXTTBUF(ldcp, tbufp);
4896 		txdp = tbufp->descp;
4897 		hdrp = &txdp->hdr;
4898 		count++;
4899 	}
4900 
4901 	ldcp->cur_tbufp = tbufp;
4902 
4903 	/*
4904 	 * Check if mac layer should be notified to restart transmissions
4905 	 */
4906 	if ((ldcp->need_resched) && (count > 0)) {
4907 		ldcp->need_resched = B_FALSE;
4908 		vnet_tx_update(vgenp->vnetp);
4909 	}
4910 }
4911 
4912 /* return the number of pending transmits for the channel */
4913 static int
4914 vgen_num_txpending(vgen_ldc_t *ldcp)
4915 {
4916 	int n;
4917 
4918 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
4919 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
4920 	} else  {
4921 		/* cur_tbufp > next_tbufp */
4922 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
4923 	}
4924 
4925 	return (n);
4926 }
4927 
4928 /* determine if the transmit descriptor ring is full */
4929 static int
4930 vgen_tx_dring_full(vgen_ldc_t *ldcp)
4931 {
4932 	vgen_private_desc_t	*tbufp;
4933 	vgen_private_desc_t	*ntbufp;
4934 
4935 	tbufp = ldcp->next_tbufp;
4936 	ntbufp = NEXTTBUF(ldcp, tbufp);
4937 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
4938 		return (VGEN_SUCCESS);
4939 	}
4940 	return (VGEN_FAILURE);
4941 }
4942 
4943 /* determine if timeout condition has occured */
4944 static int
4945 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
4946 {
4947 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
4948 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
4949 	    (vnet_ldcwd_txtimeout) &&
4950 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
4951 		return (VGEN_SUCCESS);
4952 	} else {
4953 		return (VGEN_FAILURE);
4954 	}
4955 }
4956 
4957 /* transmit watchdog timeout handler */
4958 static void
4959 vgen_ldc_watchdog(void *arg)
4960 {
4961 	vgen_ldc_t *ldcp;
4962 	vgen_t *vgenp;
4963 	int rv;
4964 
4965 	ldcp = (vgen_ldc_t *)arg;
4966 	vgenp = LDC_TO_VGEN(ldcp);
4967 
4968 	rv = vgen_ldc_txtimeout(ldcp);
4969 	if (rv == VGEN_SUCCESS) {
4970 		DWARN(vgenp, ldcp, "transmit timeout\n");
4971 #ifdef DEBUG
4972 		if (vgen_trigger_txtimeout) {
4973 			/* tx timeout triggered for debugging */
4974 			vgen_trigger_txtimeout = 0;
4975 		}
4976 #endif
4977 		mutex_enter(&ldcp->cblock);
4978 		ldcp->need_ldc_reset = B_TRUE;
4979 		vgen_handshake_retry(ldcp);
4980 		mutex_exit(&ldcp->cblock);
4981 		if (ldcp->need_resched) {
4982 			ldcp->need_resched = B_FALSE;
4983 			vnet_tx_update(vgenp->vnetp);
4984 		}
4985 	}
4986 
4987 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
4988 	    drv_usectohz(vnet_ldcwd_interval * 1000));
4989 }
4990 
4991 /* handler for error messages received from the peer ldc end-point */
4992 static void
4993 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4994 {
4995 	_NOTE(ARGUNUSED(ldcp, tagp))
4996 }
4997 
4998 /* Check if the session id in the received message is valid */
4999 static int
5000 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5001 {
5002 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5003 
5004 	if (tagp->vio_sid != ldcp->peer_sid) {
5005 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5006 		    ldcp->peer_sid, tagp->vio_sid);
5007 		return (VGEN_FAILURE);
5008 	}
5009 	else
5010 		return (VGEN_SUCCESS);
5011 }
5012 
5013 /* convert mac address from string to uint64_t */
5014 static uint64_t
5015 vgen_macaddr_strtoul(const uint8_t *macaddr)
5016 {
5017 	uint64_t val = 0;
5018 	int i;
5019 
5020 	for (i = 0; i < ETHERADDRL; i++) {
5021 		val <<= 8;
5022 		val |= macaddr[i];
5023 	}
5024 
5025 	return (val);
5026 }
5027 
5028 /* convert mac address from uint64_t to string */
5029 static int
5030 vgen_macaddr_ultostr(uint64_t val, uint8_t *macaddr)
5031 {
5032 	int i;
5033 	uint64_t value;
5034 
5035 	value = val;
5036 	for (i = ETHERADDRL - 1; i >= 0; i--) {
5037 		macaddr[i] = value & 0xFF;
5038 		value >>= 8;
5039 	}
5040 	return (VGEN_SUCCESS);
5041 }
5042 
5043 static caddr_t
5044 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5045 {
5046 	(void) sprintf(ebuf,
5047 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5048 	return (ebuf);
5049 }
5050 
5051 /* Handshake watchdog timeout handler */
5052 static void
5053 vgen_hwatchdog(void *arg)
5054 {
5055 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5056 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5057 
5058 	DWARN(vgenp, ldcp,
5059 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5060 	    ldcp->hphase, ldcp->hstate);
5061 
5062 	mutex_enter(&ldcp->cblock);
5063 	if (ldcp->cancel_htid) {
5064 		ldcp->cancel_htid = 0;
5065 		mutex_exit(&ldcp->cblock);
5066 		return;
5067 	}
5068 	ldcp->htid = 0;
5069 	ldcp->need_ldc_reset = B_TRUE;
5070 	vgen_handshake_retry(ldcp);
5071 	mutex_exit(&ldcp->cblock);
5072 }
5073 
5074 static void
5075 vgen_print_hparams(vgen_hparams_t *hp)
5076 {
5077 	uint8_t	addr[6];
5078 	char	ea[6];
5079 	ldc_mem_cookie_t *dc;
5080 
5081 	cmn_err(CE_CONT, "version_info:\n");
5082 	cmn_err(CE_CONT,
5083 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5084 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5085 
5086 	(void) vgen_macaddr_ultostr(hp->addr, addr);
5087 	cmn_err(CE_CONT, "attr_info:\n");
5088 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5089 	    vgen_print_ethaddr(addr, ea));
5090 	cmn_err(CE_CONT,
5091 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5092 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5093 
5094 	dc = &hp->dring_cookie;
5095 	cmn_err(CE_CONT, "dring_info:\n");
5096 	cmn_err(CE_CONT,
5097 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5098 	cmn_err(CE_CONT,
5099 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5100 	    dc->addr, dc->size);
5101 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5102 }
5103 
5104 static void
5105 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5106 {
5107 	vgen_hparams_t *hp;
5108 
5109 	cmn_err(CE_CONT, "Channel Information:\n");
5110 	cmn_err(CE_CONT,
5111 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5112 	    ldcp->ldc_id, ldcp->ldc_status);
5113 	cmn_err(CE_CONT,
5114 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5115 	    ldcp->local_sid, ldcp->peer_sid);
5116 	cmn_err(CE_CONT,
5117 	    "\thphase: 0x%x, hstate: 0x%x\n",
5118 	    ldcp->hphase, ldcp->hstate);
5119 
5120 	cmn_err(CE_CONT, "Local handshake params:\n");
5121 	hp = &ldcp->local_hparams;
5122 	vgen_print_hparams(hp);
5123 
5124 	cmn_err(CE_CONT, "Peer handshake params:\n");
5125 	hp = &ldcp->peer_hparams;
5126 	vgen_print_hparams(hp);
5127 }
5128 
5129 /*
5130  * vgen_ldc_queue_data -- Queue data in the LDC.
5131  */
5132 static void
5133 vgen_ldc_queue_data(vgen_ldc_t *ldcp, mblk_t *rhead, mblk_t *rtail)
5134 {
5135 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5136 
5137 	DBG1(vgenp, ldcp, "enter\n");
5138 	/*
5139 	 * If the receive thread is enabled, then the queue
5140 	 * is protected by the soft_lock. After queuing, trigger
5141 	 * the soft interrupt so that the interrupt handler sends these
5142 	 * messages up the stack.
5143 	 *
5144 	 * If the receive thread is not enabled, then the list is
5145 	 * automatically protected by the cblock lock, so no need
5146 	 * to hold any additional locks.
5147 	 */
5148 	if (ldcp->rcv_thread != NULL) {
5149 		mutex_enter(&ldcp->soft_lock);
5150 	}
5151 	if (ldcp->rcv_mhead == NULL) {
5152 		ldcp->rcv_mhead = rhead;
5153 		ldcp->rcv_mtail = rtail;
5154 	} else {
5155 		ldcp->rcv_mtail->b_next = rhead;
5156 		ldcp->rcv_mtail = rtail;
5157 	}
5158 	if (ldcp->rcv_thread != NULL) {
5159 		mutex_exit(&ldcp->soft_lock);
5160 		(void) ddi_intr_trigger_softint(ldcp->soft_handle, NULL);
5161 	}
5162 	DBG1(vgenp, ldcp, "exit\n");
5163 }
5164 
5165 /*
5166  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
5167  * This thread is woken up by the LDC interrupt handler to process
5168  * LDC packets and receive data.
5169  */
5170 static void
5171 vgen_ldc_rcv_worker(void *arg)
5172 {
5173 	callb_cpr_t	cprinfo;
5174 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5175 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5176 
5177 	DBG1(vgenp, ldcp, "enter\n");
5178 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
5179 	    "vnet_rcv_thread");
5180 	mutex_enter(&ldcp->rcv_thr_lock);
5181 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
5182 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
5183 
5184 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
5185 		/*
5186 		 * Wait until the data is received or a stop
5187 		 * request is received.
5188 		 */
5189 		while (!(ldcp->rcv_thr_flags &
5190 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
5191 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5192 		}
5193 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
5194 
5195 		/*
5196 		 * First process the stop request.
5197 		 */
5198 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
5199 			DBG2(vgenp, ldcp, "stopped\n");
5200 			break;
5201 		}
5202 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
5203 		mutex_exit(&ldcp->rcv_thr_lock);
5204 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
5205 		vgen_handle_evt_read(ldcp);
5206 		mutex_enter(&ldcp->rcv_thr_lock);
5207 	}
5208 
5209 	/*
5210 	 * Update the run status and wakeup the thread that
5211 	 * has sent the stop request.
5212 	 */
5213 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
5214 	cv_signal(&ldcp->rcv_thr_cv);
5215 	CALLB_CPR_EXIT(&cprinfo);
5216 	thread_exit();
5217 	DBG1(vgenp, ldcp, "exit\n");
5218 }
5219 
5220 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
5221 static void
5222 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
5223 {
5224 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5225 
5226 	DBG1(vgenp, ldcp, "enter\n");
5227 	/*
5228 	 * Send a stop request by setting the stop flag and
5229 	 * wait until the receive thread stops.
5230 	 */
5231 	mutex_enter(&ldcp->rcv_thr_lock);
5232 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5233 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
5234 		cv_signal(&ldcp->rcv_thr_cv);
5235 		DBG2(vgenp, ldcp, "waiting...");
5236 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5237 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5238 		}
5239 	}
5240 	mutex_exit(&ldcp->rcv_thr_lock);
5241 	ldcp->rcv_thread = NULL;
5242 	DBG1(vgenp, ldcp, "exit\n");
5243 }
5244 
5245 /*
5246  * vgen_ldc_rcv_softintr -- LDC Soft interrupt handler function.
5247  * Its job is to pickup the recieved packets that are queued in the
5248  * LDC and send them up.
5249  *
5250  * NOTE: An interrupt handler is being used to handle the upper
5251  * layer(s) requirement to send up only at interrupt context.
5252  */
5253 /* ARGSUSED */
5254 static uint_t
5255 vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2)
5256 {
5257 	mblk_t *mp;
5258 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
5259 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5260 
5261 	DBG1(vgenp, ldcp, "enter\n");
5262 	DTRACE_PROBE1(vgen_soft_intr, uint64_t, ldcp->ldc_id);
5263 	mutex_enter(&ldcp->soft_lock);
5264 	mp = ldcp->rcv_mhead;
5265 	ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
5266 	mutex_exit(&ldcp->soft_lock);
5267 	if (mp != NULL) {
5268 		vnet_rx(vgenp->vnetp, NULL, mp);
5269 	}
5270 	DBG1(vgenp, ldcp, "exit\n");
5271 	return (DDI_INTR_CLAIMED);
5272 }
5273 
5274 #if DEBUG
5275 
5276 /*
5277  * Print debug messages - set to 0xf to enable all msgs
5278  */
5279 static void
5280 debug_printf(const char *fname, vgen_t *vgenp,
5281     vgen_ldc_t *ldcp, const char *fmt, ...)
5282 {
5283 	char    buf[256];
5284 	char    *bufp = buf;
5285 	va_list ap;
5286 
5287 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5288 		(void) sprintf(bufp, "vnet%d:",
5289 		    ((vnet_t *)(vgenp->vnetp))->instance);
5290 		bufp += strlen(bufp);
5291 	}
5292 	if (ldcp != NULL) {
5293 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5294 		bufp += strlen(bufp);
5295 	}
5296 	(void) sprintf(bufp, "%s: ", fname);
5297 	bufp += strlen(bufp);
5298 
5299 	va_start(ap, fmt);
5300 	(void) vsprintf(bufp, fmt, ap);
5301 	va_end(ap);
5302 
5303 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5304 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5305 		cmn_err(CE_CONT, "%s\n", buf);
5306 	}
5307 }
5308 #endif
5309