xref: /titanic_51/usr/src/uts/sun4v/io/vnet_gen.c (revision 1d925b368c0579a57acb90e1e8db63c3a5613790)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 
64 /*
65  * Implementation of the mac functionality for vnet using the
66  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
67  */
68 
69 /*
70  * Function prototypes.
71  */
72 /* vgen proxy entry points */
73 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
74 	mac_register_t **vgenmacp);
75 int vgen_uninit(void *arg);
76 static int vgen_start(void *arg);
77 static void vgen_stop(void *arg);
78 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
79 static int vgen_multicst(void *arg, boolean_t add,
80 	const uint8_t *mca);
81 static int vgen_promisc(void *arg, boolean_t on);
82 static int vgen_unicst(void *arg, const uint8_t *mca);
83 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
84 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
85 
86 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
87 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
88 void vnet_del_fdb(void *arg, uint8_t *macaddr);
89 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
90 	void *txarg, boolean_t upgrade);
91 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
92 void vnet_del_def_rte(void *arg);
93 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
94 void vnet_tx_update(void *arg);
95 
96 /* vgen internal functions */
97 static void vgen_detach_ports(vgen_t *vgenp);
98 static void vgen_port_detach(vgen_port_t *portp);
99 static void vgen_port_list_insert(vgen_port_t *portp);
100 static void vgen_port_list_remove(vgen_port_t *portp);
101 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
102 	int port_num);
103 static int vgen_mdeg_reg(vgen_t *vgenp);
104 static void vgen_mdeg_unreg(vgen_t *vgenp);
105 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
106 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
107 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
108 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
109 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
112 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
113 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
114 
115 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
116 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
117 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
118 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_init_ports(vgen_t *vgenp);
120 static void vgen_port_init(vgen_port_t *portp);
121 static void vgen_uninit_ports(vgen_t *vgenp);
122 static void vgen_port_uninit(vgen_port_t *portp);
123 static void vgen_init_ldcs(vgen_port_t *portp);
124 static void vgen_uninit_ldcs(vgen_port_t *portp);
125 static int vgen_ldc_init(vgen_ldc_t *ldcp);
126 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
127 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
128 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
131 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
132 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
135 static void vgen_reclaim(vgen_ldc_t *ldcp);
136 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
137 static int vgen_num_txpending(vgen_ldc_t *ldcp);
138 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
139 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
140 static void vgen_ldc_watchdog(void *arg);
141 static int vgen_setup_kstats(vgen_ldc_t *ldcp);
142 static void vgen_destroy_kstats(vgen_ldc_t *ldcp);
143 static int vgen_kstat_update(kstat_t *ksp, int rw);
144 
145 /* vgen handshake functions */
146 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
147 static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
148 	uint16_t ver_minor);
149 static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
150 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
151 	boolean_t caller_holds_lock);
152 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
153 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
154 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
155 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
156 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
157 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
158 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
159 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
160 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
161 static void vgen_handshake(vgen_ldc_t *ldcp);
162 static int vgen_handshake_done(vgen_ldc_t *ldcp);
163 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
164 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
165 	vio_msg_tag_t *tagp);
166 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
177 	uint32_t start, int32_t end, uint8_t pstate);
178 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
179 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
181 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
182 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static uint64_t	vgen_macaddr_strtoul(const uint8_t *macaddr);
184 static int vgen_macaddr_ultostr(uint64_t value, uint8_t *macaddr);
185 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
186 static void vgen_hwatchdog(void *arg);
187 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
188 static void vgen_print_hparams(vgen_hparams_t *hp);
189 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
190 static uint_t vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2);
191 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
192 static void vgen_ldc_rcv_worker(void *arg);
193 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
194 static void vgen_ldc_queue_data(vgen_ldc_t *ldcp,
195 	mblk_t *rhead, mblk_t *rtail);
196 
197 /*
198  * The handshake process consists of 5 phases defined below, with VH_PHASE0
199  * being the pre-handshake phase and VH_DONE is the phase to indicate
200  * successful completion of all phases.
201  * Each phase may have one to several handshake states which are required
202  * to complete successfully to move to the next phase.
203  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
204  * more details.
205  */
206 /* handshake phases */
207 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
208 
209 /* handshake states */
210 enum {
211 
212 	VER_INFO_SENT	=	0x1,
213 	VER_ACK_RCVD	=	0x2,
214 	VER_INFO_RCVD	=	0x4,
215 	VER_ACK_SENT	=	0x8,
216 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
217 
218 	ATTR_INFO_SENT	=	0x10,
219 	ATTR_ACK_RCVD	=	0x20,
220 	ATTR_INFO_RCVD	=	0x40,
221 	ATTR_ACK_SENT	=	0x80,
222 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
223 
224 	DRING_INFO_SENT	=	0x100,
225 	DRING_ACK_RCVD	=	0x200,
226 	DRING_INFO_RCVD	=	0x400,
227 	DRING_ACK_SENT	=	0x800,
228 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
229 
230 	RDX_INFO_SENT	=	0x1000,
231 	RDX_ACK_RCVD	=	0x2000,
232 	RDX_INFO_RCVD	=	0x4000,
233 	RDX_ACK_SENT	=	0x8000,
234 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
235 
236 };
237 
238 #define	LDC_LOCK(ldcp)	\
239 				mutex_enter(&((ldcp)->cblock));\
240 				mutex_enter(&((ldcp)->rxlock));\
241 				mutex_enter(&((ldcp)->wrlock));\
242 				mutex_enter(&((ldcp)->txlock));\
243 				mutex_enter(&((ldcp)->tclock));
244 #define	LDC_UNLOCK(ldcp)	\
245 				mutex_exit(&((ldcp)->tclock));\
246 				mutex_exit(&((ldcp)->txlock));\
247 				mutex_exit(&((ldcp)->wrlock));\
248 				mutex_exit(&((ldcp)->rxlock));\
249 				mutex_exit(&((ldcp)->cblock));
250 
251 static struct ether_addr etherbroadcastaddr = {
252 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
253 };
254 /*
255  * MIB II broadcast/multicast packets
256  */
257 #define	IS_BROADCAST(ehp) \
258 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
259 #define	IS_MULTICAST(ehp) \
260 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
261 
262 /*
263  * Property names
264  */
265 static char macaddr_propname[] = "mac-address";
266 static char rmacaddr_propname[] = "remote-mac-address";
267 static char channel_propname[] = "channel-endpoint";
268 static char reg_propname[] = "reg";
269 static char port_propname[] = "port";
270 static char swport_propname[] = "switch-port";
271 static char id_propname[] = "id";
272 
273 /* versions supported - in decreasing order */
274 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
275 
276 /* Tunables */
277 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
278 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
279 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
280 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
281 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
282 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
283 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
284 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
285 
286 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
287 
288 /*
289  * max # of packets accumulated prior to sending them up. It is best
290  * to keep this at 60% of the number of recieve buffers.
291  */
292 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
293 
294 /*
295  * Tunables for each receive buffer size and number of buffers for
296  * each buffer size.
297  */
298 uint32_t vgen_rbufsz1 = VGEN_DBLK_SZ_128;
299 uint32_t vgen_rbufsz2 = VGEN_DBLK_SZ_256;
300 uint32_t vgen_rbufsz3 = VGEN_DBLK_SZ_2048;
301 
302 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
303 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
304 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
305 
306 #ifdef DEBUG
307 /* flags to simulate error conditions for debugging */
308 int vgen_trigger_txtimeout = 0;
309 int vgen_trigger_rxlost = 0;
310 #endif
311 
312 /* MD update matching structure */
313 static md_prop_match_t	vport_prop_match[] = {
314 	{ MDET_PROP_VAL,	"id" },
315 	{ MDET_LIST_END,	NULL }
316 };
317 
318 static mdeg_node_match_t vport_match = { "virtual-device-port",
319 					vport_prop_match };
320 
321 /* template for matching a particular vnet instance */
322 static mdeg_prop_spec_t vgen_prop_template[] = {
323 	{ MDET_PROP_STR,	"name",		"network" },
324 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
325 	{ MDET_LIST_END,	NULL,		NULL }
326 };
327 
328 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
329 
330 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
331 
332 static mac_callbacks_t vgen_m_callbacks = {
333 	0,
334 	vgen_stat,
335 	vgen_start,
336 	vgen_stop,
337 	vgen_promisc,
338 	vgen_multicst,
339 	vgen_unicst,
340 	vgen_tx,
341 	NULL,
342 	NULL,
343 	NULL
344 };
345 
346 /* externs */
347 extern pri_t	maxclsyspri;
348 extern proc_t	p0;
349 extern uint32_t vnet_ntxds;
350 extern uint32_t vnet_ldcwd_interval;
351 extern uint32_t vnet_ldcwd_txtimeout;
352 extern uint32_t vnet_ldc_mtu;
353 extern uint32_t vnet_nrbufs;
354 
355 
356 #ifdef DEBUG
357 
358 extern int vnet_dbglevel;
359 static void debug_printf(const char *fname, vgen_t *vgenp,
360 	vgen_ldc_t *ldcp, const char *fmt, ...);
361 
362 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
363 int vgendbg_ldcid = -1;
364 
365 /* simulate handshake error conditions for debug */
366 uint32_t vgen_hdbg;
367 #define	HDBG_VERSION	0x1
368 #define	HDBG_TIMEOUT	0x2
369 #define	HDBG_BAD_SID	0x4
370 #define	HDBG_OUT_STATE	0x8
371 
372 #endif
373 
374 
375 
376 /*
377  * vgen_init() is called by an instance of vnet driver to initialize the
378  * corresponding generic proxy transport layer. The arguments passed by vnet
379  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
380  * the mac address of the vnet device, and a pointer to mac_register_t of
381  * the generic transport is returned in the last argument.
382  */
383 int
384 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
385     mac_register_t **vgenmacp)
386 {
387 	vgen_t *vgenp;
388 	mac_register_t *macp;
389 	int instance;
390 
391 	if ((vnetp == NULL) || (vnetdip == NULL))
392 		return (DDI_FAILURE);
393 
394 	instance = ddi_get_instance(vnetdip);
395 
396 	DBG1(NULL, NULL, "vnet(%d):%s: enter\n", instance);
397 
398 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
399 
400 	vgenp->vnetp = vnetp;
401 	vgenp->vnetdip = vnetdip;
402 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
403 
404 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
405 		KMEM_FREE(vgenp);
406 		return (DDI_FAILURE);
407 	}
408 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
409 	macp->m_driver = vgenp;
410 	macp->m_dip = vnetdip;
411 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
412 	macp->m_callbacks = &vgen_m_callbacks;
413 	macp->m_min_sdu = 0;
414 	macp->m_max_sdu = ETHERMTU;
415 	vgenp->macp = macp;
416 
417 	/* allocate multicast table */
418 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
419 	    sizeof (struct ether_addr), KM_SLEEP);
420 	vgenp->mccount = 0;
421 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
422 
423 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
424 
425 	/* register with MD event generator */
426 	if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
427 		mutex_destroy(&vgenp->lock);
428 		kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
429 		    sizeof (struct ether_addr));
430 		mac_free(vgenp->macp);
431 		KMEM_FREE(vgenp);
432 		return (DDI_FAILURE);
433 	}
434 
435 	/* register macp of this vgen_t with vnet */
436 	*vgenmacp = vgenp->macp;
437 
438 	DBG1(NULL, NULL, "vnet(%d):%s: exit\n", instance);
439 	return (DDI_SUCCESS);
440 }
441 
442 /*
443  * Called by vnet to undo the initializations done by vgen_init().
444  * The handle provided by generic transport during vgen_init() is the argument.
445  */
446 int
447 vgen_uninit(void *arg)
448 {
449 	vgen_t	*vgenp = (vgen_t *)arg;
450 	vio_mblk_pool_t *rp, *nrp;
451 
452 	if (vgenp == NULL) {
453 		return (DDI_FAILURE);
454 	}
455 
456 	DBG1(vgenp, NULL, "enter\n");
457 
458 	/* unregister with MD event generator */
459 	vgen_mdeg_unreg(vgenp);
460 
461 	mutex_enter(&vgenp->lock);
462 
463 	/* detach all ports from the device */
464 	vgen_detach_ports(vgenp);
465 
466 	/*
467 	 * free any pending rx mblk pools,
468 	 * that couldn't be freed previously during channel detach.
469 	 */
470 	rp = vgenp->rmp;
471 	while (rp != NULL) {
472 		nrp = vgenp->rmp = rp->nextp;
473 		if (vio_destroy_mblks(rp)) {
474 			vgenp->rmp = rp;
475 			mutex_exit(&vgenp->lock);
476 			return (DDI_FAILURE);
477 		}
478 		rp = nrp;
479 	}
480 
481 	/* free multicast table */
482 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
483 
484 	mac_free(vgenp->macp);
485 
486 	mutex_exit(&vgenp->lock);
487 
488 	mutex_destroy(&vgenp->lock);
489 
490 	KMEM_FREE(vgenp);
491 
492 	DBG1(vgenp, NULL, "exit\n");
493 
494 	return (DDI_SUCCESS);
495 }
496 
497 /* enable transmit/receive for the device */
498 int
499 vgen_start(void *arg)
500 {
501 	vgen_t		*vgenp = (vgen_t *)arg;
502 
503 	DBG1(vgenp, NULL, "enter\n");
504 
505 	mutex_enter(&vgenp->lock);
506 	vgen_init_ports(vgenp);
507 	vgenp->flags |= VGEN_STARTED;
508 	mutex_exit(&vgenp->lock);
509 
510 	DBG1(vgenp, NULL, "exit\n");
511 	return (DDI_SUCCESS);
512 }
513 
514 /* stop transmit/receive */
515 void
516 vgen_stop(void *arg)
517 {
518 	vgen_t		*vgenp = (vgen_t *)arg;
519 
520 	DBG1(vgenp, NULL, "enter\n");
521 
522 	mutex_enter(&vgenp->lock);
523 	vgen_uninit_ports(vgenp);
524 	vgenp->flags &= ~(VGEN_STARTED);
525 	mutex_exit(&vgenp->lock);
526 
527 	DBG1(vgenp, NULL, "exit\n");
528 }
529 
530 /* vgen transmit function */
531 static mblk_t *
532 vgen_tx(void *arg, mblk_t *mp)
533 {
534 	int i;
535 	vgen_port_t *portp;
536 	int status = VGEN_FAILURE;
537 
538 	portp = (vgen_port_t *)arg;
539 	/*
540 	 * Retry so that we avoid reporting a failure
541 	 * to the upper layer. Returning a failure may cause the
542 	 * upper layer to go into single threaded mode there by
543 	 * causing performance degradation, especially for a large
544 	 * number of connections.
545 	 */
546 	for (i = 0; i < vgen_tx_retries; ) {
547 		status = vgen_portsend(portp, mp);
548 		if (status == VGEN_SUCCESS) {
549 			break;
550 		}
551 		if (++i < vgen_tx_retries)
552 			delay(drv_usectohz(vgen_tx_delay));
553 	}
554 	if (status != VGEN_SUCCESS) {
555 		/* failure */
556 		return (mp);
557 	}
558 	/* success */
559 	return (NULL);
560 }
561 
562 /* transmit packets over the given port */
563 static int
564 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
565 {
566 	vgen_ldclist_t	*ldclp;
567 	vgen_ldc_t *ldcp;
568 	int status;
569 	int rv = VGEN_SUCCESS;
570 
571 	ldclp = &portp->ldclist;
572 	READ_ENTER(&ldclp->rwlock);
573 	/*
574 	 * NOTE: for now, we will assume we have a single channel.
575 	 */
576 	if (ldclp->headp == NULL) {
577 		RW_EXIT(&ldclp->rwlock);
578 		return (VGEN_FAILURE);
579 	}
580 	ldcp = ldclp->headp;
581 
582 	status  = vgen_ldcsend(ldcp, mp);
583 
584 	RW_EXIT(&ldclp->rwlock);
585 
586 	if (status != VGEN_TX_SUCCESS) {
587 		rv = VGEN_FAILURE;
588 	}
589 	return (rv);
590 }
591 
592 /* channel transmit function */
593 static int
594 vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
595 {
596 	vgen_private_desc_t	*tbufp;
597 	vgen_private_desc_t	*rtbufp;
598 	vnet_public_desc_t	*rtxdp;
599 	vgen_private_desc_t	*ntbufp;
600 	vnet_public_desc_t	*txdp;
601 	vio_dring_entry_hdr_t	*hdrp;
602 	vgen_stats_t		*statsp;
603 	struct ether_header	*ehp;
604 	boolean_t	is_bcast = B_FALSE;
605 	boolean_t	is_mcast = B_FALSE;
606 	size_t		mblksz;
607 	caddr_t		dst;
608 	mblk_t		*bp;
609 	size_t		size;
610 	int		rv = 0;
611 	ldc_status_t	istatus;
612 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
613 
614 	statsp = ldcp->statsp;
615 	size = msgsize(mp);
616 
617 	DBG1(vgenp, ldcp, "enter\n");
618 
619 	if (ldcp->ldc_status != LDC_UP) {
620 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
621 		    ldcp->ldc_status);
622 		/* retry ldc_up() if needed */
623 		if (ldcp->flags & CHANNEL_STARTED)
624 			(void) ldc_up(ldcp->ldc_handle);
625 		goto vgen_tx_exit;
626 	}
627 
628 	/* drop the packet if ldc is not up or handshake is not done */
629 	if (ldcp->hphase != VH_DONE) {
630 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
631 		    ldcp->hphase);
632 		goto vgen_tx_exit;
633 	}
634 
635 	if (size > (size_t)ETHERMAX) {
636 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
637 		goto vgen_tx_exit;
638 	}
639 	if (size < ETHERMIN)
640 		size = ETHERMIN;
641 
642 	ehp = (struct ether_header *)mp->b_rptr;
643 	is_bcast = IS_BROADCAST(ehp);
644 	is_mcast = IS_MULTICAST(ehp);
645 
646 	mutex_enter(&ldcp->txlock);
647 	/*
648 	 * allocate a descriptor
649 	 */
650 	tbufp = ldcp->next_tbufp;
651 	ntbufp = NEXTTBUF(ldcp, tbufp);
652 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
653 
654 		mutex_enter(&ldcp->tclock);
655 		/* Try reclaiming now */
656 		vgen_reclaim_dring(ldcp);
657 		ldcp->reclaim_lbolt = ddi_get_lbolt();
658 
659 		if (ntbufp == ldcp->cur_tbufp) {
660 			/* Now we are really out of tbuf/txds */
661 			ldcp->need_resched = B_TRUE;
662 			mutex_exit(&ldcp->tclock);
663 
664 			statsp->tx_no_desc++;
665 			mutex_exit(&ldcp->txlock);
666 
667 			return (VGEN_TX_NORESOURCES);
668 		}
669 		mutex_exit(&ldcp->tclock);
670 	}
671 	/* update next available tbuf in the ring and update tx index */
672 	ldcp->next_tbufp = ntbufp;
673 	INCR_TXI(ldcp->next_txi, ldcp);
674 
675 	/* Mark the buffer busy before releasing the lock */
676 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
677 	mutex_exit(&ldcp->txlock);
678 
679 	/* copy data into pre-allocated transmit buffer */
680 	dst = tbufp->datap + VNET_IPALIGN;
681 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
682 		mblksz = MBLKL(bp);
683 		bcopy(bp->b_rptr, dst, mblksz);
684 		dst += mblksz;
685 	}
686 
687 	tbufp->datalen = size;
688 
689 	/* initialize the corresponding public descriptor (txd) */
690 	txdp = tbufp->descp;
691 	hdrp = &txdp->hdr;
692 	txdp->nbytes = size;
693 	txdp->ncookies = tbufp->ncookies;
694 	bcopy((tbufp->memcookie), (txdp->memcookie),
695 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
696 
697 	mutex_enter(&ldcp->wrlock);
698 	/*
699 	 * If the flags not set to BUSY, it implies that the clobber
700 	 * was done while we were copying the data. In such case,
701 	 * discard the packet and return.
702 	 */
703 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
704 		statsp->oerrors++;
705 		mutex_exit(&ldcp->wrlock);
706 		goto vgen_tx_exit;
707 	}
708 	hdrp->dstate = VIO_DESC_READY;
709 
710 	/* update stats */
711 	statsp->opackets++;
712 	statsp->obytes += size;
713 	if (is_bcast)
714 		statsp->brdcstxmt++;
715 	else if (is_mcast)
716 		statsp->multixmt++;
717 
718 	/* send dring datamsg to the peer */
719 	if (ldcp->resched_peer) {
720 
721 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
722 		rtxdp = rtbufp->descp;
723 
724 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
725 
726 			rv = vgen_send_dring_data(ldcp,
727 			    (uint32_t)ldcp->resched_peer_txi, -1);
728 			if (rv != 0) {
729 				/* error: drop the packet */
730 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
731 				    "failed: rv(%d) len(%d)\n",
732 				    ldcp->ldc_id, rv, size);
733 				statsp->oerrors++;
734 			} else {
735 				ldcp->resched_peer = B_FALSE;
736 			}
737 
738 		}
739 
740 	}
741 
742 	mutex_exit(&ldcp->wrlock);
743 
744 vgen_tx_exit:
745 	if (rv == ECONNRESET) {
746 		/*
747 		 * Check if either callback thread or another tx thread is
748 		 * already running. Calling mutex_enter() will result in a
749 		 * deadlock if the other thread already holds cblock and is
750 		 * blocked in vnet_modify_fdb() (which is called from
751 		 * vgen_handle_evt_reset()) waiting for write access on rwlock,
752 		 * as this transmit thread already holds that lock as a reader
753 		 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
754 		 */
755 		if (mutex_tryenter(&ldcp->cblock)) {
756 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
757 				DWARN(vgenp, ldcp, "ldc_status() error\n");
758 			} else {
759 				ldcp->ldc_status = istatus;
760 			}
761 			if (ldcp->ldc_status != LDC_UP) {
762 				/*
763 				 * Second arg is TRUE, as we know that
764 				 * the caller of this function - vnet_m_tx(),
765 				 * already holds fdb-rwlock as a reader.
766 				 */
767 				vgen_handle_evt_reset(ldcp, B_TRUE);
768 			}
769 			mutex_exit(&ldcp->cblock);
770 		}
771 	}
772 	freemsg(mp);
773 	DBG1(vgenp, ldcp, "exit\n");
774 	return (VGEN_TX_SUCCESS);
775 }
776 
777 /* enable/disable a multicast address */
778 int
779 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
780 {
781 	vgen_t			*vgenp;
782 	vnet_mcast_msg_t	mcastmsg;
783 	vio_msg_tag_t		*tagp;
784 	vgen_port_t		*portp;
785 	vgen_portlist_t		*plistp;
786 	vgen_ldc_t		*ldcp;
787 	vgen_ldclist_t		*ldclp;
788 	struct ether_addr	*addrp;
789 	int			rv = DDI_FAILURE;
790 	uint32_t		i;
791 
792 	vgenp = (vgen_t *)arg;
793 	addrp = (struct ether_addr *)mca;
794 	tagp = &mcastmsg.tag;
795 	bzero(&mcastmsg, sizeof (mcastmsg));
796 
797 	mutex_enter(&vgenp->lock);
798 
799 	plistp = &(vgenp->vgenports);
800 
801 	READ_ENTER(&plistp->rwlock);
802 
803 	portp = vgenp->vsw_portp;
804 	if (portp == NULL) {
805 		RW_EXIT(&plistp->rwlock);
806 		mutex_exit(&vgenp->lock);
807 		return (rv);
808 	}
809 	ldclp = &portp->ldclist;
810 
811 	READ_ENTER(&ldclp->rwlock);
812 
813 	ldcp = ldclp->headp;
814 	if (ldcp == NULL)
815 		goto vgen_mcast_exit;
816 
817 	mutex_enter(&ldcp->cblock);
818 
819 	if (ldcp->hphase == VH_DONE) {
820 		/*
821 		 * If handshake is done, send a msg to vsw to add/remove
822 		 * the multicast address. Otherwise, we just update this
823 		 * mcast address in our table and the table will be sync'd
824 		 * with vsw when handshake completes.
825 		 */
826 		tagp->vio_msgtype = VIO_TYPE_CTRL;
827 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
828 		tagp->vio_subtype_env = VNET_MCAST_INFO;
829 		tagp->vio_sid = ldcp->local_sid;
830 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
831 		mcastmsg.set = add;
832 		mcastmsg.count = 1;
833 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
834 		    B_FALSE) != VGEN_SUCCESS) {
835 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
836 			mutex_exit(&ldcp->cblock);
837 			goto vgen_mcast_exit;
838 		}
839 	}
840 
841 	mutex_exit(&ldcp->cblock);
842 
843 	if (add) {
844 
845 		/* expand multicast table if necessary */
846 		if (vgenp->mccount >= vgenp->mcsize) {
847 			struct ether_addr	*newtab;
848 			uint32_t		newsize;
849 
850 
851 			newsize = vgenp->mcsize * 2;
852 
853 			newtab = kmem_zalloc(newsize *
854 			    sizeof (struct ether_addr), KM_NOSLEEP);
855 			if (newtab == NULL)
856 				goto vgen_mcast_exit;
857 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
858 			    sizeof (struct ether_addr));
859 			kmem_free(vgenp->mctab,
860 			    vgenp->mcsize * sizeof (struct ether_addr));
861 
862 			vgenp->mctab = newtab;
863 			vgenp->mcsize = newsize;
864 		}
865 
866 		/* add address to the table */
867 		vgenp->mctab[vgenp->mccount++] = *addrp;
868 
869 	} else {
870 
871 		/* delete address from the table */
872 		for (i = 0; i < vgenp->mccount; i++) {
873 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
874 
875 				/*
876 				 * If there's more than one address in this
877 				 * table, delete the unwanted one by moving
878 				 * the last one in the list over top of it;
879 				 * otherwise, just remove it.
880 				 */
881 				if (vgenp->mccount > 1) {
882 					vgenp->mctab[i] =
883 					    vgenp->mctab[vgenp->mccount-1];
884 				}
885 				vgenp->mccount--;
886 				break;
887 			}
888 		}
889 	}
890 
891 	rv = DDI_SUCCESS;
892 
893 vgen_mcast_exit:
894 	RW_EXIT(&ldclp->rwlock);
895 	RW_EXIT(&plistp->rwlock);
896 
897 	mutex_exit(&vgenp->lock);
898 	return (rv);
899 }
900 
901 /* set or clear promiscuous mode on the device */
902 static int
903 vgen_promisc(void *arg, boolean_t on)
904 {
905 	_NOTE(ARGUNUSED(arg, on))
906 	return (DDI_SUCCESS);
907 }
908 
909 /* set the unicast mac address of the device */
910 static int
911 vgen_unicst(void *arg, const uint8_t *mca)
912 {
913 	_NOTE(ARGUNUSED(arg, mca))
914 	return (DDI_SUCCESS);
915 }
916 
917 /* get device statistics */
918 int
919 vgen_stat(void *arg, uint_t stat, uint64_t *val)
920 {
921 	vgen_t		*vgenp = (vgen_t *)arg;
922 	vgen_port_t	*portp;
923 	vgen_portlist_t	*plistp;
924 
925 	*val = 0;
926 
927 	plistp = &(vgenp->vgenports);
928 	READ_ENTER(&plistp->rwlock);
929 
930 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
931 		*val += vgen_port_stat(portp, stat);
932 	}
933 
934 	RW_EXIT(&plistp->rwlock);
935 
936 	return (0);
937 }
938 
939 static void
940 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
941 {
942 	 _NOTE(ARGUNUSED(arg, wq, mp))
943 }
944 
945 /* vgen internal functions */
946 /* detach all ports from the device */
947 static void
948 vgen_detach_ports(vgen_t *vgenp)
949 {
950 	vgen_port_t	*portp;
951 	vgen_portlist_t	*plistp;
952 
953 	plistp = &(vgenp->vgenports);
954 	WRITE_ENTER(&plistp->rwlock);
955 
956 	while ((portp = plistp->headp) != NULL) {
957 		vgen_port_detach(portp);
958 	}
959 
960 	RW_EXIT(&plistp->rwlock);
961 }
962 
963 /*
964  * detach the given port.
965  */
966 static void
967 vgen_port_detach(vgen_port_t *portp)
968 {
969 	vgen_t		*vgenp;
970 	vgen_ldclist_t	*ldclp;
971 	int		port_num;
972 
973 	vgenp = portp->vgenp;
974 	port_num = portp->port_num;
975 
976 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
977 
978 	/* remove it from port list */
979 	vgen_port_list_remove(portp);
980 
981 	/* detach channels from this port */
982 	ldclp = &portp->ldclist;
983 	WRITE_ENTER(&ldclp->rwlock);
984 	while (ldclp->headp) {
985 		vgen_ldc_detach(ldclp->headp);
986 	}
987 	RW_EXIT(&ldclp->rwlock);
988 
989 	if (vgenp->vsw_portp == portp) {
990 		vgenp->vsw_portp = NULL;
991 	}
992 	KMEM_FREE(portp);
993 
994 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
995 }
996 
997 /* add a port to port list */
998 static void
999 vgen_port_list_insert(vgen_port_t *portp)
1000 {
1001 	vgen_portlist_t *plistp;
1002 	vgen_t *vgenp;
1003 
1004 	vgenp = portp->vgenp;
1005 	plistp = &(vgenp->vgenports);
1006 
1007 	if (plistp->headp == NULL) {
1008 		plistp->headp = portp;
1009 	} else {
1010 		plistp->tailp->nextp = portp;
1011 	}
1012 	plistp->tailp = portp;
1013 	portp->nextp = NULL;
1014 }
1015 
1016 /* remove a port from port list */
1017 static void
1018 vgen_port_list_remove(vgen_port_t *portp)
1019 {
1020 	vgen_port_t *prevp;
1021 	vgen_port_t *nextp;
1022 	vgen_portlist_t *plistp;
1023 	vgen_t *vgenp;
1024 
1025 	vgenp = portp->vgenp;
1026 
1027 	plistp = &(vgenp->vgenports);
1028 
1029 	if (plistp->headp == NULL)
1030 		return;
1031 
1032 	if (portp == plistp->headp) {
1033 		plistp->headp = portp->nextp;
1034 		if (portp == plistp->tailp)
1035 			plistp->tailp = plistp->headp;
1036 	} else {
1037 		for (prevp = plistp->headp;
1038 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1039 		    prevp = nextp)
1040 			;
1041 		if (nextp == portp) {
1042 			prevp->nextp = portp->nextp;
1043 		}
1044 		if (portp == plistp->tailp)
1045 			plistp->tailp = prevp;
1046 	}
1047 }
1048 
1049 /* lookup a port in the list based on port_num */
1050 static vgen_port_t *
1051 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1052 {
1053 	vgen_port_t *portp = NULL;
1054 
1055 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1056 		if (portp->port_num == port_num) {
1057 			break;
1058 		}
1059 	}
1060 
1061 	return (portp);
1062 }
1063 
1064 /* enable ports for transmit/receive */
1065 static void
1066 vgen_init_ports(vgen_t *vgenp)
1067 {
1068 	vgen_port_t	*portp;
1069 	vgen_portlist_t	*plistp;
1070 
1071 	plistp = &(vgenp->vgenports);
1072 	READ_ENTER(&plistp->rwlock);
1073 
1074 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1075 		vgen_port_init(portp);
1076 	}
1077 
1078 	RW_EXIT(&plistp->rwlock);
1079 }
1080 
1081 static void
1082 vgen_port_init(vgen_port_t *portp)
1083 {
1084 	vgen_t *vgenp;
1085 
1086 	vgenp = portp->vgenp;
1087 	/*
1088 	 * Create fdb entry in vnet, corresponding to the mac
1089 	 * address of this port. Note that the port specified
1090 	 * is vsw-port. This is done so that vsw-port acts
1091 	 * as the route to reach this macaddr, until the
1092 	 * channel for this port comes up (LDC_UP) and
1093 	 * handshake is done successfully.
1094 	 * eg, if the peer is OBP-vnet, it may not bring the
1095 	 * channel up for this port and may communicate via
1096 	 * vsw to reach this port.
1097 	 * Later, when Solaris-vnet comes up at the other end
1098 	 * of the channel for this port and brings up the channel,
1099 	 * it is an indication that peer vnet is capable of
1100 	 * distributed switching, so the direct route through this
1101 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1102 	 */
1103 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1104 	    vgen_tx, vgenp->vsw_portp);
1105 
1106 	if (portp == vgenp->vsw_portp) {
1107 		/*
1108 		 * create the default route entry in vnet's fdb.
1109 		 * This is the entry used by vnet to reach
1110 		 * unknown destinations, which basically goes
1111 		 * through vsw on domain0 and out through the
1112 		 * physical device bound to vsw.
1113 		 */
1114 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1115 	}
1116 
1117 	/* Bring up the channels of this port */
1118 	vgen_init_ldcs(portp);
1119 }
1120 
1121 /* disable transmit/receive on ports */
1122 static void
1123 vgen_uninit_ports(vgen_t *vgenp)
1124 {
1125 	vgen_port_t	*portp;
1126 	vgen_portlist_t	*plistp;
1127 
1128 	plistp = &(vgenp->vgenports);
1129 	READ_ENTER(&plistp->rwlock);
1130 
1131 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1132 		vgen_port_uninit(portp);
1133 	}
1134 
1135 	RW_EXIT(&plistp->rwlock);
1136 }
1137 
1138 static void
1139 vgen_port_uninit(vgen_port_t *portp)
1140 {
1141 	vgen_t *vgenp;
1142 
1143 	vgenp = portp->vgenp;
1144 
1145 	vgen_uninit_ldcs(portp);
1146 	/* delete the entry in vnet's fdb for this port */
1147 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1148 	if (portp == vgenp->vsw_portp) {
1149 		/*
1150 		 * if this is vsw-port, then delete the default
1151 		 * route entry in vnet's fdb.
1152 		 */
1153 		vnet_del_def_rte(vgenp->vnetp);
1154 	}
1155 }
1156 
1157 /* register with MD event generator */
1158 static int
1159 vgen_mdeg_reg(vgen_t *vgenp)
1160 {
1161 	mdeg_prop_spec_t	*pspecp;
1162 	mdeg_node_spec_t	*parentp;
1163 	uint_t			templatesz;
1164 	int			rv;
1165 	mdeg_handle_t		hdl;
1166 	int			i;
1167 
1168 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1169 	    DDI_PROP_DONTPASS, reg_propname, -1);
1170 	if (i == -1) {
1171 		return (DDI_FAILURE);
1172 	}
1173 	templatesz = sizeof (vgen_prop_template);
1174 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1175 	if (pspecp == NULL) {
1176 		return (DDI_FAILURE);
1177 	}
1178 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1179 	if (parentp == NULL) {
1180 		kmem_free(pspecp, templatesz);
1181 		return (DDI_FAILURE);
1182 	}
1183 
1184 	bcopy(vgen_prop_template, pspecp, templatesz);
1185 
1186 	/*
1187 	 * NOTE: The instance here refers to the value of "reg" property and
1188 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1189 	 */
1190 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1191 
1192 	parentp->namep = "virtual-device";
1193 	parentp->specp = pspecp;
1194 
1195 	/* save parentp in vgen_t */
1196 	vgenp->mdeg_parentp = parentp;
1197 
1198 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1199 	if (rv != MDEG_SUCCESS) {
1200 		DERR(vgenp, NULL, "mdeg_register failed\n");
1201 		KMEM_FREE(parentp);
1202 		kmem_free(pspecp, templatesz);
1203 		vgenp->mdeg_parentp = NULL;
1204 		return (DDI_FAILURE);
1205 	}
1206 
1207 	/* save mdeg handle in vgen_t */
1208 	vgenp->mdeg_hdl = hdl;
1209 
1210 	return (DDI_SUCCESS);
1211 }
1212 
1213 /* unregister with MD event generator */
1214 static void
1215 vgen_mdeg_unreg(vgen_t *vgenp)
1216 {
1217 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1218 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1219 	KMEM_FREE(vgenp->mdeg_parentp);
1220 	vgenp->mdeg_parentp = NULL;
1221 	vgenp->mdeg_hdl = NULL;
1222 }
1223 
1224 /* callback function registered with MD event generator */
1225 static int
1226 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1227 {
1228 	int idx;
1229 	int vsw_idx = -1;
1230 	uint64_t val;
1231 	vgen_t *vgenp;
1232 
1233 	if ((resp == NULL) || (cb_argp == NULL)) {
1234 		return (MDEG_FAILURE);
1235 	}
1236 
1237 	vgenp = (vgen_t *)cb_argp;
1238 	DBG1(vgenp, NULL, "enter\n");
1239 
1240 	mutex_enter(&vgenp->lock);
1241 
1242 	DBG1(vgenp, NULL, "ports: removed(%x), "
1243 	"added(%x), updated(%x)\n", resp->removed.nelem,
1244 	    resp->added.nelem, resp->match_curr.nelem);
1245 
1246 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1247 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1248 		    resp->removed.mdep[idx]);
1249 	}
1250 
1251 	if (vgenp->vsw_portp == NULL) {
1252 		/*
1253 		 * find vsw_port and add it first, because other ports need
1254 		 * this when adding fdb entry (see vgen_port_init()).
1255 		 */
1256 		for (idx = 0; idx < resp->added.nelem; idx++) {
1257 			if (!(md_get_prop_val(resp->added.mdp,
1258 			    resp->added.mdep[idx], swport_propname, &val))) {
1259 				if (val == 0) {
1260 					/*
1261 					 * This port is connected to the
1262 					 * vsw on dom0.
1263 					 */
1264 					vsw_idx = idx;
1265 					if (vgen_add_port(vgenp,
1266 					    resp->added.mdp,
1267 					    resp->added.mdep[idx]) !=
1268 					    DDI_SUCCESS) {
1269 						cmn_err(CE_NOTE, "vnet%d Could "
1270 						    "not initialize virtual "
1271 						    "switch port.",
1272 						    ddi_get_instance(vgenp->
1273 						    vnetdip));
1274 						mutex_exit(&vgenp->lock);
1275 						return (MDEG_FAILURE);
1276 					}
1277 					break;
1278 				}
1279 			}
1280 		}
1281 		if (vsw_idx == -1) {
1282 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1283 			mutex_exit(&vgenp->lock);
1284 			return (MDEG_FAILURE);
1285 		}
1286 	}
1287 
1288 	for (idx = 0; idx < resp->added.nelem; idx++) {
1289 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1290 			continue;
1291 
1292 		/* If this port can't be added just skip it. */
1293 		(void) vgen_add_port(vgenp, resp->added.mdp,
1294 		    resp->added.mdep[idx]);
1295 	}
1296 
1297 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1298 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1299 		    resp->match_curr.mdep[idx],
1300 		    resp->match_prev.mdp,
1301 		    resp->match_prev.mdep[idx]);
1302 	}
1303 
1304 	mutex_exit(&vgenp->lock);
1305 	DBG1(vgenp, NULL, "exit\n");
1306 	return (MDEG_SUCCESS);
1307 }
1308 
1309 /* add a new port to the device */
1310 static int
1311 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1312 {
1313 	uint64_t	port_num;
1314 	uint64_t	*ldc_ids;
1315 	uint64_t	macaddr;
1316 	uint64_t	val;
1317 	int		num_ldcs;
1318 	int		vsw_port = B_FALSE;
1319 	int		i;
1320 	int		addrsz;
1321 	int		num_nodes = 0;
1322 	int		listsz = 0;
1323 	int		rv = DDI_SUCCESS;
1324 	mde_cookie_t	*listp = NULL;
1325 	uint8_t		*addrp;
1326 	struct ether_addr	ea;
1327 
1328 	/* read "id" property to get the port number */
1329 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1330 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1331 		return (DDI_FAILURE);
1332 	}
1333 
1334 	/*
1335 	 * Find the channel endpoint node(s) under this port node.
1336 	 */
1337 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1338 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
1339 		    num_nodes);
1340 		return (DDI_FAILURE);
1341 	}
1342 
1343 	/* allocate space for node list */
1344 	listsz = num_nodes * sizeof (mde_cookie_t);
1345 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1346 	if (listp == NULL)
1347 		return (DDI_FAILURE);
1348 
1349 	num_ldcs = md_scan_dag(mdp, mdex,
1350 	    md_find_name(mdp, channel_propname),
1351 	    md_find_name(mdp, "fwd"), listp);
1352 
1353 	if (num_ldcs <= 0) {
1354 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
1355 		kmem_free(listp, listsz);
1356 		return (DDI_FAILURE);
1357 	}
1358 
1359 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
1360 
1361 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1362 	if (ldc_ids == NULL) {
1363 		kmem_free(listp, listsz);
1364 		return (DDI_FAILURE);
1365 	}
1366 
1367 	for (i = 0; i < num_ldcs; i++) {
1368 		/* read channel ids */
1369 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1370 			DWARN(vgenp, NULL, "prop(%s) not found\n",
1371 			    id_propname);
1372 			kmem_free(listp, listsz);
1373 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1374 			return (DDI_FAILURE);
1375 		}
1376 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
1377 	}
1378 
1379 	kmem_free(listp, listsz);
1380 
1381 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1382 	    &addrsz)) {
1383 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
1384 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1385 		return (DDI_FAILURE);
1386 	}
1387 
1388 	if (addrsz < ETHERADDRL) {
1389 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
1390 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1391 		return (DDI_FAILURE);
1392 	}
1393 
1394 	macaddr = *((uint64_t *)addrp);
1395 
1396 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
1397 
1398 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1399 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1400 		macaddr >>= 8;
1401 	}
1402 
1403 	if (vgenp->vsw_portp == NULL) {
1404 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1405 			if (val == 0) {
1406 				/* This port is connected to the vsw on dom0 */
1407 				vsw_port = B_TRUE;
1408 			}
1409 		}
1410 	}
1411 	if (vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1412 	    &ea, vsw_port) != DDI_SUCCESS) {
1413 		cmn_err(CE_NOTE, "vnet%d failed to attach port %d remote MAC "
1414 		    "address %s", ddi_get_instance(vgenp->vnetdip),
1415 		    (int)port_num, ether_sprintf(&ea));
1416 		rv = DDI_FAILURE;
1417 	}
1418 
1419 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1420 
1421 	return (rv);
1422 }
1423 
1424 /* remove a port from the device */
1425 static int
1426 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1427 {
1428 	uint64_t	port_num;
1429 	vgen_port_t	*portp;
1430 	vgen_portlist_t	*plistp;
1431 
1432 	/* read "id" property to get the port number */
1433 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1434 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1435 		return (DDI_FAILURE);
1436 	}
1437 
1438 	plistp = &(vgenp->vgenports);
1439 
1440 	WRITE_ENTER(&plistp->rwlock);
1441 	portp = vgen_port_lookup(plistp, (int)port_num);
1442 	if (portp == NULL) {
1443 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
1444 		RW_EXIT(&plistp->rwlock);
1445 		return (DDI_FAILURE);
1446 	}
1447 
1448 	vgen_port_detach_mdeg(portp);
1449 	RW_EXIT(&plistp->rwlock);
1450 
1451 	return (DDI_SUCCESS);
1452 }
1453 
1454 /* attach a port to the device based on mdeg data */
1455 static int
1456 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1457 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1458 {
1459 	vgen_port_t		*portp;
1460 	vgen_portlist_t		*plistp;
1461 	int			i;
1462 
1463 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1464 	if (portp == NULL) {
1465 		return (DDI_FAILURE);
1466 	}
1467 	portp->vgenp = vgenp;
1468 	portp->port_num = port_num;
1469 
1470 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
1471 
1472 	portp->ldclist.num_ldcs = 0;
1473 	portp->ldclist.headp = NULL;
1474 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1475 
1476 	ether_copy(macaddr, &portp->macaddr);
1477 	for (i = 0; i < num_ids; i++) {
1478 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
1479 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
1480 			rw_destroy(&portp->ldclist.rwlock);
1481 			vgen_port_detach(portp);
1482 			return (DDI_FAILURE);
1483 		}
1484 	}
1485 
1486 	/* link it into the list of ports */
1487 	plistp = &(vgenp->vgenports);
1488 	WRITE_ENTER(&plistp->rwlock);
1489 	vgen_port_list_insert(portp);
1490 	RW_EXIT(&plistp->rwlock);
1491 
1492 	/* This port is connected to the vsw on domain0 */
1493 	if (vsw_port)
1494 		vgenp->vsw_portp = portp;
1495 
1496 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1497 		vgen_port_init(portp);
1498 	}
1499 
1500 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1501 	return (DDI_SUCCESS);
1502 }
1503 
1504 /* detach a port from the device based on mdeg data */
1505 static void
1506 vgen_port_detach_mdeg(vgen_port_t *portp)
1507 {
1508 	vgen_t *vgenp = portp->vgenp;
1509 
1510 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
1511 	/* stop the port if needed */
1512 	if (vgenp->flags & VGEN_STARTED) {
1513 		vgen_port_uninit(portp);
1514 	}
1515 	vgen_port_detach(portp);
1516 
1517 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1518 }
1519 
1520 static int
1521 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1522 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1523 {
1524 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1525 
1526 	/* NOTE: TBD */
1527 	return (DDI_SUCCESS);
1528 }
1529 
1530 static uint64_t
1531 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1532 {
1533 	vgen_ldclist_t	*ldclp;
1534 	vgen_ldc_t *ldcp;
1535 	uint64_t	val;
1536 
1537 	val = 0;
1538 	ldclp = &portp->ldclist;
1539 
1540 	READ_ENTER(&ldclp->rwlock);
1541 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1542 		val += vgen_ldc_stat(ldcp, stat);
1543 	}
1544 	RW_EXIT(&ldclp->rwlock);
1545 
1546 	return (val);
1547 }
1548 
1549 /* attach the channel corresponding to the given ldc_id to the port */
1550 static int
1551 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1552 {
1553 	vgen_t 		*vgenp;
1554 	vgen_ldclist_t	*ldclp;
1555 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1556 	ldc_attr_t 	attr;
1557 	int 		status;
1558 	ldc_status_t	istatus;
1559 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
1560 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1561 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1562 		AST_create_rxmblks = 0x20, AST_add_softintr = 0x40,
1563 		AST_create_rcv_thread = 0x80} attach_state;
1564 
1565 	attach_state = AST_init;
1566 	vgenp = portp->vgenp;
1567 	ldclp = &portp->ldclist;
1568 
1569 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1570 	if (ldcp == NULL) {
1571 		goto ldc_attach_failed;
1572 	}
1573 	ldcp->ldc_id = ldc_id;
1574 	ldcp->portp = portp;
1575 
1576 	attach_state |= AST_ldc_alloc;
1577 
1578 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1579 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1580 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1581 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
1582 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
1583 
1584 	attach_state |= AST_mutex_init;
1585 
1586 	attr.devclass = LDC_DEV_NT;
1587 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1588 	attr.mode = LDC_MODE_UNRELIABLE;
1589 	attr.mtu = vnet_ldc_mtu;
1590 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1591 	if (status != 0) {
1592 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
1593 		goto ldc_attach_failed;
1594 	}
1595 	attach_state |= AST_ldc_init;
1596 
1597 	if (vgen_rcv_thread_enabled) {
1598 		ldcp->rcv_thr_flags = 0;
1599 		ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
1600 		ldcp->soft_pri = PIL_6;
1601 
1602 		status = ddi_intr_add_softint(vgenp->vnetdip,
1603 		    &ldcp->soft_handle, ldcp->soft_pri,
1604 		    vgen_ldc_rcv_softintr, (void *)ldcp);
1605 		if (status != DDI_SUCCESS) {
1606 			DWARN(vgenp, ldcp, "add_softint failed, rv (%d)\n",
1607 			    status);
1608 			goto ldc_attach_failed;
1609 		}
1610 
1611 		/*
1612 		 * Initialize the soft_lock with the same priority as
1613 		 * the soft interrupt to protect from the soft interrupt.
1614 		 */
1615 		mutex_init(&ldcp->soft_lock, NULL, MUTEX_DRIVER,
1616 		    DDI_INTR_PRI(ldcp->soft_pri));
1617 		attach_state |= AST_add_softintr;
1618 
1619 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
1620 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
1621 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
1622 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
1623 
1624 		attach_state |= AST_create_rcv_thread;
1625 		if (ldcp->rcv_thread == NULL) {
1626 			DWARN(vgenp, ldcp, "Failed to create worker thread");
1627 			goto ldc_attach_failed;
1628 		}
1629 	}
1630 
1631 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1632 	if (status != 0) {
1633 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
1634 		    status);
1635 		goto ldc_attach_failed;
1636 	}
1637 	attach_state |= AST_ldc_reg_cb;
1638 
1639 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1640 	ASSERT(istatus == LDC_INIT);
1641 	ldcp->ldc_status = istatus;
1642 
1643 	/* allocate transmit resources */
1644 	status = vgen_alloc_tx_ring(ldcp);
1645 	if (status != 0) {
1646 		goto ldc_attach_failed;
1647 	}
1648 	attach_state |= AST_alloc_tx_ring;
1649 
1650 	/* allocate receive resources */
1651 	status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
1652 	    vgen_rbufsz1, vgen_rbufsz2, vgen_rbufsz3,
1653 	    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
1654 	if (status != 0) {
1655 		goto ldc_attach_failed;
1656 	}
1657 	attach_state |= AST_create_rxmblks;
1658 
1659 	/* Setup kstats for the channel */
1660 	status = vgen_setup_kstats(ldcp);
1661 	if (status != VGEN_SUCCESS) {
1662 		goto ldc_attach_failed;
1663 	}
1664 
1665 	/* initialize vgen_versions supported */
1666 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
1667 
1668 	/* link it into the list of channels for this port */
1669 	WRITE_ENTER(&ldclp->rwlock);
1670 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
1671 	ldcp->nextp = *prev_ldcp;
1672 	*prev_ldcp = ldcp;
1673 	ldclp->num_ldcs++;
1674 	RW_EXIT(&ldclp->rwlock);
1675 
1676 	ldcp->flags |= CHANNEL_ATTACHED;
1677 	return (DDI_SUCCESS);
1678 
1679 ldc_attach_failed:
1680 	if (attach_state & AST_ldc_reg_cb) {
1681 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1682 	}
1683 	if (attach_state & AST_add_softintr) {
1684 		(void) ddi_intr_remove_softint(ldcp->soft_handle);
1685 		mutex_destroy(&ldcp->soft_lock);
1686 	}
1687 	if (attach_state & AST_create_rcv_thread) {
1688 		if (ldcp->rcv_thread != NULL) {
1689 			vgen_stop_rcv_thread(ldcp);
1690 		}
1691 		mutex_destroy(&ldcp->rcv_thr_lock);
1692 		cv_destroy(&ldcp->rcv_thr_cv);
1693 	}
1694 	if (attach_state & AST_create_rxmblks) {
1695 		vio_mblk_pool_t *fvmp = NULL;
1696 
1697 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
1698 		ASSERT(fvmp == NULL);
1699 	}
1700 	if (attach_state & AST_alloc_tx_ring) {
1701 		vgen_free_tx_ring(ldcp);
1702 	}
1703 	if (attach_state & AST_ldc_init) {
1704 		(void) ldc_fini(ldcp->ldc_handle);
1705 	}
1706 	if (attach_state & AST_mutex_init) {
1707 		mutex_destroy(&ldcp->tclock);
1708 		mutex_destroy(&ldcp->txlock);
1709 		mutex_destroy(&ldcp->cblock);
1710 		mutex_destroy(&ldcp->wrlock);
1711 		mutex_destroy(&ldcp->rxlock);
1712 	}
1713 	if (attach_state & AST_ldc_alloc) {
1714 		KMEM_FREE(ldcp);
1715 	}
1716 	return (DDI_FAILURE);
1717 }
1718 
1719 /* detach a channel from the port */
1720 static void
1721 vgen_ldc_detach(vgen_ldc_t *ldcp)
1722 {
1723 	vgen_port_t	*portp;
1724 	vgen_t 		*vgenp;
1725 	vgen_ldc_t 	*pldcp;
1726 	vgen_ldc_t	**prev_ldcp;
1727 	vgen_ldclist_t	*ldclp;
1728 
1729 	portp = ldcp->portp;
1730 	vgenp = portp->vgenp;
1731 	ldclp = &portp->ldclist;
1732 
1733 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
1734 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
1735 		if (pldcp == ldcp) {
1736 			break;
1737 		}
1738 	}
1739 
1740 	if (pldcp == NULL) {
1741 		/* invalid ldcp? */
1742 		return;
1743 	}
1744 
1745 	if (ldcp->ldc_status != LDC_INIT) {
1746 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
1747 	}
1748 
1749 	if (ldcp->flags & CHANNEL_ATTACHED) {
1750 		ldcp->flags &= ~(CHANNEL_ATTACHED);
1751 
1752 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1753 		if (ldcp->rcv_thread != NULL) {
1754 			/* First stop the receive thread */
1755 			vgen_stop_rcv_thread(ldcp);
1756 			(void) ddi_intr_remove_softint(ldcp->soft_handle);
1757 			mutex_destroy(&ldcp->soft_lock);
1758 			mutex_destroy(&ldcp->rcv_thr_lock);
1759 			cv_destroy(&ldcp->rcv_thr_cv);
1760 		}
1761 		/* Free any queued messages */
1762 		if (ldcp->rcv_mhead != NULL) {
1763 			freemsgchain(ldcp->rcv_mhead);
1764 			ldcp->rcv_mhead = NULL;
1765 		}
1766 
1767 		vgen_destroy_kstats(ldcp);
1768 		/*
1769 		 * if we cannot reclaim all mblks, put this
1770 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
1771 		 * device gets detached (see vgen_uninit()).
1772 		 */
1773 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
1774 
1775 		/* free transmit resources */
1776 		vgen_free_tx_ring(ldcp);
1777 
1778 		(void) ldc_fini(ldcp->ldc_handle);
1779 		mutex_destroy(&ldcp->tclock);
1780 		mutex_destroy(&ldcp->txlock);
1781 		mutex_destroy(&ldcp->cblock);
1782 		mutex_destroy(&ldcp->wrlock);
1783 		mutex_destroy(&ldcp->rxlock);
1784 
1785 		/* unlink it from the list */
1786 		*prev_ldcp = ldcp->nextp;
1787 		ldclp->num_ldcs--;
1788 		KMEM_FREE(ldcp);
1789 	}
1790 }
1791 
1792 /*
1793  * This function allocates transmit resources for the channel.
1794  * The resources consist of a transmit descriptor ring and an associated
1795  * transmit buffer ring.
1796  */
1797 static int
1798 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
1799 {
1800 	void *tbufp;
1801 	ldc_mem_info_t minfo;
1802 	uint32_t txdsize;
1803 	uint32_t tbufsize;
1804 	int status;
1805 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1806 
1807 	ldcp->num_txds = vnet_ntxds;
1808 	txdsize = sizeof (vnet_public_desc_t);
1809 	tbufsize = sizeof (vgen_private_desc_t);
1810 
1811 	/* allocate transmit buffer ring */
1812 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
1813 	if (tbufp == NULL) {
1814 		return (DDI_FAILURE);
1815 	}
1816 
1817 	/* create transmit descriptor ring */
1818 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
1819 	    &ldcp->tx_dhandle);
1820 	if (status) {
1821 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
1822 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1823 		return (DDI_FAILURE);
1824 	}
1825 
1826 	/* get the addr of descripror ring */
1827 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
1828 	if (status) {
1829 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
1830 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1831 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1832 		ldcp->tbufp = NULL;
1833 		return (DDI_FAILURE);
1834 	}
1835 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
1836 	ldcp->tbufp = tbufp;
1837 
1838 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
1839 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
1840 
1841 	return (DDI_SUCCESS);
1842 }
1843 
1844 /* Free transmit resources for the channel */
1845 static void
1846 vgen_free_tx_ring(vgen_ldc_t *ldcp)
1847 {
1848 	int tbufsize = sizeof (vgen_private_desc_t);
1849 
1850 	/* free transmit descriptor ring */
1851 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1852 
1853 	/* free transmit buffer ring */
1854 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
1855 	ldcp->txdp = ldcp->txdendp = NULL;
1856 	ldcp->tbufp = ldcp->tbufendp = NULL;
1857 }
1858 
1859 /* enable transmit/receive on the channels for the port */
1860 static void
1861 vgen_init_ldcs(vgen_port_t *portp)
1862 {
1863 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1864 	vgen_ldc_t	*ldcp;
1865 
1866 	READ_ENTER(&ldclp->rwlock);
1867 	ldcp =  ldclp->headp;
1868 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1869 		(void) vgen_ldc_init(ldcp);
1870 	}
1871 	RW_EXIT(&ldclp->rwlock);
1872 }
1873 
1874 /* stop transmit/receive on the channels for the port */
1875 static void
1876 vgen_uninit_ldcs(vgen_port_t *portp)
1877 {
1878 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1879 	vgen_ldc_t	*ldcp;
1880 
1881 	READ_ENTER(&ldclp->rwlock);
1882 	ldcp =  ldclp->headp;
1883 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1884 		vgen_ldc_uninit(ldcp);
1885 	}
1886 	RW_EXIT(&ldclp->rwlock);
1887 }
1888 
1889 /* enable transmit/receive on the channel */
1890 static int
1891 vgen_ldc_init(vgen_ldc_t *ldcp)
1892 {
1893 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1894 	ldc_status_t	istatus;
1895 	int		rv;
1896 	uint32_t	retries = 0;
1897 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
1898 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
1899 	init_state = ST_init;
1900 
1901 	DBG1(vgenp, ldcp, "enter\n");
1902 	LDC_LOCK(ldcp);
1903 
1904 	rv = ldc_open(ldcp->ldc_handle);
1905 	if (rv != 0) {
1906 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
1907 		goto ldcinit_failed;
1908 	}
1909 	init_state |= ST_ldc_open;
1910 
1911 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1912 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
1913 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
1914 		goto ldcinit_failed;
1915 	}
1916 	ldcp->ldc_status = istatus;
1917 
1918 	rv = vgen_init_tbufs(ldcp);
1919 	if (rv != 0) {
1920 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
1921 		goto ldcinit_failed;
1922 	}
1923 	init_state |= ST_init_tbufs;
1924 
1925 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
1926 	if (rv != 0) {
1927 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
1928 		goto ldcinit_failed;
1929 	}
1930 
1931 	init_state |= ST_cb_enable;
1932 
1933 	do {
1934 		rv = ldc_up(ldcp->ldc_handle);
1935 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
1936 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
1937 			drv_usecwait(VGEN_LDC_UP_DELAY);
1938 		}
1939 		if (retries++ >= vgen_ldcup_retries)
1940 			break;
1941 	} while (rv == EWOULDBLOCK);
1942 
1943 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1944 	if (istatus == LDC_UP) {
1945 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
1946 	}
1947 
1948 	ldcp->ldc_status = istatus;
1949 
1950 	/* initialize transmit watchdog timeout */
1951 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
1952 	    drv_usectohz(vnet_ldcwd_interval * 1000));
1953 
1954 	ldcp->hphase = -1;
1955 	ldcp->flags |= CHANNEL_STARTED;
1956 
1957 	/* if channel is already UP - start handshake */
1958 	if (istatus == LDC_UP) {
1959 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1960 		if (ldcp->portp != vgenp->vsw_portp) {
1961 			/*
1962 			 * modify fdb entry to use this port as the
1963 			 * channel is up, instead of going through the
1964 			 * vsw-port (see comments in vgen_port_init())
1965 			 */
1966 			vnet_modify_fdb(vgenp->vnetp,
1967 			    (uint8_t *)&ldcp->portp->macaddr,
1968 			    vgen_tx, ldcp->portp, B_FALSE);
1969 		}
1970 
1971 		/* Initialize local session id */
1972 		ldcp->local_sid = ddi_get_lbolt();
1973 
1974 		/* clear peer session id */
1975 		ldcp->peer_sid = 0;
1976 		ldcp->hretries = 0;
1977 
1978 		/* Initiate Handshake process with peer ldc endpoint */
1979 		vgen_reset_hphase(ldcp);
1980 
1981 		mutex_exit(&ldcp->tclock);
1982 		mutex_exit(&ldcp->txlock);
1983 		mutex_exit(&ldcp->wrlock);
1984 		vgen_handshake(vh_nextphase(ldcp));
1985 		mutex_exit(&ldcp->rxlock);
1986 		mutex_exit(&ldcp->cblock);
1987 	} else {
1988 		LDC_UNLOCK(ldcp);
1989 	}
1990 
1991 	return (DDI_SUCCESS);
1992 
1993 ldcinit_failed:
1994 	if (init_state & ST_cb_enable) {
1995 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1996 	}
1997 	if (init_state & ST_init_tbufs) {
1998 		vgen_uninit_tbufs(ldcp);
1999 	}
2000 	if (init_state & ST_ldc_open) {
2001 		(void) ldc_close(ldcp->ldc_handle);
2002 	}
2003 	LDC_UNLOCK(ldcp);
2004 	DBG1(vgenp, ldcp, "exit\n");
2005 	return (DDI_FAILURE);
2006 }
2007 
2008 /* stop transmit/receive on the channel */
2009 static void
2010 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2011 {
2012 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2013 	int	rv;
2014 
2015 	DBG1(vgenp, ldcp, "enter\n");
2016 	LDC_LOCK(ldcp);
2017 
2018 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2019 		LDC_UNLOCK(ldcp);
2020 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2021 		return;
2022 	}
2023 
2024 	/* disable further callbacks */
2025 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2026 	if (rv != 0) {
2027 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
2028 	}
2029 
2030 	/*
2031 	 * clear handshake done bit and wait for pending tx and cb to finish.
2032 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
2033 	 */
2034 	ldcp->hphase &= ~(VH_DONE);
2035 	LDC_UNLOCK(ldcp);
2036 
2037 	/* cancel handshake watchdog timeout */
2038 	if (ldcp->htid) {
2039 		(void) untimeout(ldcp->htid);
2040 		ldcp->htid = 0;
2041 	}
2042 
2043 	/* cancel transmit watchdog timeout */
2044 	if (ldcp->wd_tid) {
2045 		(void) untimeout(ldcp->wd_tid);
2046 		ldcp->wd_tid = 0;
2047 	}
2048 
2049 	drv_usecwait(1000);
2050 
2051 	/* acquire locks again; any pending transmits and callbacks are done */
2052 	LDC_LOCK(ldcp);
2053 
2054 	vgen_reset_hphase(ldcp);
2055 
2056 	vgen_uninit_tbufs(ldcp);
2057 
2058 	rv = ldc_close(ldcp->ldc_handle);
2059 	if (rv != 0) {
2060 		DWARN(vgenp, ldcp, "ldc_close err\n");
2061 	}
2062 	ldcp->ldc_status = LDC_INIT;
2063 	ldcp->flags &= ~(CHANNEL_STARTED);
2064 
2065 	LDC_UNLOCK(ldcp);
2066 
2067 	DBG1(vgenp, ldcp, "exit\n");
2068 }
2069 
2070 /* Initialize the transmit buffer ring for the channel */
2071 static int
2072 vgen_init_tbufs(vgen_ldc_t *ldcp)
2073 {
2074 	vgen_private_desc_t	*tbufp;
2075 	vnet_public_desc_t	*txdp;
2076 	vio_dring_entry_hdr_t		*hdrp;
2077 	int 			i;
2078 	int 			rv;
2079 	caddr_t			datap = NULL;
2080 	int			ci;
2081 	uint32_t		ncookies;
2082 
2083 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2084 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2085 
2086 	datap = kmem_zalloc(ldcp->num_txds * VGEN_TXDBLK_SZ, KM_SLEEP);
2087 	ldcp->tx_datap = datap;
2088 
2089 	/*
2090 	 * for each private descriptor, allocate a ldc mem_handle which is
2091 	 * required to map the data during transmit, set the flags
2092 	 * to free (available for use by transmit routine).
2093 	 */
2094 
2095 	for (i = 0; i < ldcp->num_txds; i++) {
2096 
2097 		tbufp = &(ldcp->tbufp[i]);
2098 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2099 		    &(tbufp->memhandle));
2100 		if (rv) {
2101 			tbufp->memhandle = 0;
2102 			goto init_tbufs_failed;
2103 		}
2104 
2105 		/*
2106 		 * bind ldc memhandle to the corresponding transmit buffer.
2107 		 */
2108 		ci = ncookies = 0;
2109 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2110 		    (caddr_t)datap, VGEN_TXDBLK_SZ, LDC_SHADOW_MAP,
2111 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2112 		if (rv != 0) {
2113 			goto init_tbufs_failed;
2114 		}
2115 
2116 		/*
2117 		 * successful in binding the handle to tx data buffer.
2118 		 * set datap in the private descr to this buffer.
2119 		 */
2120 		tbufp->datap = datap;
2121 
2122 		if ((ncookies == 0) ||
2123 		    (ncookies > MAX_COOKIES)) {
2124 			goto init_tbufs_failed;
2125 		}
2126 
2127 		for (ci = 1; ci < ncookies; ci++) {
2128 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2129 			    &(tbufp->memcookie[ci]));
2130 			if (rv != 0) {
2131 				goto init_tbufs_failed;
2132 			}
2133 		}
2134 
2135 		tbufp->ncookies = ncookies;
2136 		datap += VGEN_TXDBLK_SZ;
2137 
2138 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2139 		txdp = &(ldcp->txdp[i]);
2140 		hdrp = &txdp->hdr;
2141 		hdrp->dstate = VIO_DESC_FREE;
2142 		hdrp->ack = B_FALSE;
2143 		tbufp->descp = txdp;
2144 
2145 	}
2146 
2147 	/* reset tbuf walking pointers */
2148 	ldcp->next_tbufp = ldcp->tbufp;
2149 	ldcp->cur_tbufp = ldcp->tbufp;
2150 
2151 	/* initialize tx seqnum and index */
2152 	ldcp->next_txseq = VNET_ISS;
2153 	ldcp->next_txi = 0;
2154 
2155 	ldcp->resched_peer = B_TRUE;
2156 	ldcp->resched_peer_txi = 0;
2157 
2158 	return (DDI_SUCCESS);
2159 
2160 init_tbufs_failed:;
2161 	vgen_uninit_tbufs(ldcp);
2162 	return (DDI_FAILURE);
2163 }
2164 
2165 /* Uninitialize transmit buffer ring for the channel */
2166 static void
2167 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2168 {
2169 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2170 	int 			i;
2171 
2172 	/* for each tbuf (priv_desc), free ldc mem_handle */
2173 	for (i = 0; i < ldcp->num_txds; i++) {
2174 
2175 		tbufp = &(ldcp->tbufp[i]);
2176 
2177 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2178 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2179 			tbufp->datap = NULL;
2180 		}
2181 		if (tbufp->memhandle) {
2182 			(void) ldc_mem_free_handle(tbufp->memhandle);
2183 			tbufp->memhandle = 0;
2184 		}
2185 	}
2186 
2187 	if (ldcp->tx_datap) {
2188 		/* prealloc'd tx data buffer */
2189 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_TXDBLK_SZ);
2190 		ldcp->tx_datap = NULL;
2191 	}
2192 
2193 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2194 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2195 }
2196 
2197 /* clobber tx descriptor ring */
2198 static void
2199 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2200 {
2201 	vnet_public_desc_t	*txdp;
2202 	vgen_private_desc_t	*tbufp;
2203 	vio_dring_entry_hdr_t	*hdrp;
2204 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2205 	int i;
2206 #ifdef DEBUG
2207 	int ndone = 0;
2208 #endif
2209 
2210 	for (i = 0; i < ldcp->num_txds; i++) {
2211 
2212 		tbufp = &(ldcp->tbufp[i]);
2213 		txdp = tbufp->descp;
2214 		hdrp = &txdp->hdr;
2215 
2216 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2217 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2218 #ifdef DEBUG
2219 			if (hdrp->dstate == VIO_DESC_DONE)
2220 				ndone++;
2221 #endif
2222 			hdrp->dstate = VIO_DESC_FREE;
2223 			hdrp->ack = B_FALSE;
2224 		}
2225 	}
2226 	/* reset tbuf walking pointers */
2227 	ldcp->next_tbufp = ldcp->tbufp;
2228 	ldcp->cur_tbufp = ldcp->tbufp;
2229 
2230 	/* reset tx seqnum and index */
2231 	ldcp->next_txseq = VNET_ISS;
2232 	ldcp->next_txi = 0;
2233 
2234 	ldcp->resched_peer = B_TRUE;
2235 	ldcp->resched_peer_txi = 0;
2236 
2237 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
2238 }
2239 
2240 /* clobber receive descriptor ring */
2241 static void
2242 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2243 {
2244 	ldcp->rx_dhandle = 0;
2245 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2246 	ldcp->rxdp = NULL;
2247 	ldcp->next_rxi = 0;
2248 	ldcp->num_rxds = 0;
2249 	ldcp->next_rxseq = VNET_ISS;
2250 }
2251 
2252 /* initialize receive descriptor ring */
2253 static int
2254 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2255 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2256 {
2257 	int rv;
2258 	ldc_mem_info_t minfo;
2259 
2260 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2261 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2262 	if (rv != 0) {
2263 		return (DDI_FAILURE);
2264 	}
2265 
2266 	/*
2267 	 * sucessfully mapped, now try to
2268 	 * get info about the mapped dring
2269 	 */
2270 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2271 	if (rv != 0) {
2272 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2273 		return (DDI_FAILURE);
2274 	}
2275 
2276 	/*
2277 	 * save ring address, number of descriptors.
2278 	 */
2279 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2280 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2281 	ldcp->num_rxdcookies = ncookies;
2282 	ldcp->num_rxds = num_desc;
2283 	ldcp->next_rxi = 0;
2284 	ldcp->next_rxseq = VNET_ISS;
2285 
2286 	return (DDI_SUCCESS);
2287 }
2288 
2289 /* get channel statistics */
2290 static uint64_t
2291 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2292 {
2293 	vgen_stats_t *statsp;
2294 	uint64_t val;
2295 
2296 	val = 0;
2297 	statsp = ldcp->statsp;
2298 	switch (stat) {
2299 
2300 	case MAC_STAT_MULTIRCV:
2301 		val = statsp->multircv;
2302 		break;
2303 
2304 	case MAC_STAT_BRDCSTRCV:
2305 		val = statsp->brdcstrcv;
2306 		break;
2307 
2308 	case MAC_STAT_MULTIXMT:
2309 		val = statsp->multixmt;
2310 		break;
2311 
2312 	case MAC_STAT_BRDCSTXMT:
2313 		val = statsp->brdcstxmt;
2314 		break;
2315 
2316 	case MAC_STAT_NORCVBUF:
2317 		val = statsp->norcvbuf;
2318 		break;
2319 
2320 	case MAC_STAT_IERRORS:
2321 		val = statsp->ierrors;
2322 		break;
2323 
2324 	case MAC_STAT_NOXMTBUF:
2325 		val = statsp->noxmtbuf;
2326 		break;
2327 
2328 	case MAC_STAT_OERRORS:
2329 		val = statsp->oerrors;
2330 		break;
2331 
2332 	case MAC_STAT_COLLISIONS:
2333 		break;
2334 
2335 	case MAC_STAT_RBYTES:
2336 		val = statsp->rbytes;
2337 		break;
2338 
2339 	case MAC_STAT_IPACKETS:
2340 		val = statsp->ipackets;
2341 		break;
2342 
2343 	case MAC_STAT_OBYTES:
2344 		val = statsp->obytes;
2345 		break;
2346 
2347 	case MAC_STAT_OPACKETS:
2348 		val = statsp->opackets;
2349 		break;
2350 
2351 	/* stats not relevant to ldc, return 0 */
2352 	case MAC_STAT_IFSPEED:
2353 	case ETHER_STAT_ALIGN_ERRORS:
2354 	case ETHER_STAT_FCS_ERRORS:
2355 	case ETHER_STAT_FIRST_COLLISIONS:
2356 	case ETHER_STAT_MULTI_COLLISIONS:
2357 	case ETHER_STAT_DEFER_XMTS:
2358 	case ETHER_STAT_TX_LATE_COLLISIONS:
2359 	case ETHER_STAT_EX_COLLISIONS:
2360 	case ETHER_STAT_MACXMT_ERRORS:
2361 	case ETHER_STAT_CARRIER_ERRORS:
2362 	case ETHER_STAT_TOOLONG_ERRORS:
2363 	case ETHER_STAT_XCVR_ADDR:
2364 	case ETHER_STAT_XCVR_ID:
2365 	case ETHER_STAT_XCVR_INUSE:
2366 	case ETHER_STAT_CAP_1000FDX:
2367 	case ETHER_STAT_CAP_1000HDX:
2368 	case ETHER_STAT_CAP_100FDX:
2369 	case ETHER_STAT_CAP_100HDX:
2370 	case ETHER_STAT_CAP_10FDX:
2371 	case ETHER_STAT_CAP_10HDX:
2372 	case ETHER_STAT_CAP_ASMPAUSE:
2373 	case ETHER_STAT_CAP_PAUSE:
2374 	case ETHER_STAT_CAP_AUTONEG:
2375 	case ETHER_STAT_ADV_CAP_1000FDX:
2376 	case ETHER_STAT_ADV_CAP_1000HDX:
2377 	case ETHER_STAT_ADV_CAP_100FDX:
2378 	case ETHER_STAT_ADV_CAP_100HDX:
2379 	case ETHER_STAT_ADV_CAP_10FDX:
2380 	case ETHER_STAT_ADV_CAP_10HDX:
2381 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2382 	case ETHER_STAT_ADV_CAP_PAUSE:
2383 	case ETHER_STAT_ADV_CAP_AUTONEG:
2384 	case ETHER_STAT_LP_CAP_1000FDX:
2385 	case ETHER_STAT_LP_CAP_1000HDX:
2386 	case ETHER_STAT_LP_CAP_100FDX:
2387 	case ETHER_STAT_LP_CAP_100HDX:
2388 	case ETHER_STAT_LP_CAP_10FDX:
2389 	case ETHER_STAT_LP_CAP_10HDX:
2390 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2391 	case ETHER_STAT_LP_CAP_PAUSE:
2392 	case ETHER_STAT_LP_CAP_AUTONEG:
2393 	case ETHER_STAT_LINK_ASMPAUSE:
2394 	case ETHER_STAT_LINK_PAUSE:
2395 	case ETHER_STAT_LINK_AUTONEG:
2396 	case ETHER_STAT_LINK_DUPLEX:
2397 	default:
2398 		val = 0;
2399 		break;
2400 
2401 	}
2402 	return (val);
2403 }
2404 
2405 /*
2406  * LDC channel is UP, start handshake process with peer.
2407  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2408  * function is being called from transmit routine, otherwise B_FALSE.
2409  */
2410 static void
2411 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2412 {
2413 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2414 
2415 	DBG1(vgenp, ldcp, "enter\n");
2416 
2417 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2418 
2419 	if (ldcp->portp != vgenp->vsw_portp) {
2420 		/*
2421 		 * modify fdb entry to use this port as the
2422 		 * channel is up, instead of going through the
2423 		 * vsw-port (see comments in vgen_port_init())
2424 		 */
2425 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2426 		    vgen_tx, ldcp->portp, flag);
2427 	}
2428 
2429 	/* Initialize local session id */
2430 	ldcp->local_sid = ddi_get_lbolt();
2431 
2432 	/* clear peer session id */
2433 	ldcp->peer_sid = 0;
2434 	ldcp->hretries = 0;
2435 
2436 	if (ldcp->hphase != VH_PHASE0) {
2437 		vgen_handshake_reset(ldcp);
2438 	}
2439 
2440 	/* Initiate Handshake process with peer ldc endpoint */
2441 	vgen_handshake(vh_nextphase(ldcp));
2442 
2443 	DBG1(vgenp, ldcp, "exit\n");
2444 }
2445 
2446 /*
2447  * LDC channel is Reset, terminate connection with peer and try to
2448  * bring the channel up again.
2449  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2450  * function is being called from transmit routine, otherwise B_FALSE.
2451  */
2452 static void
2453 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2454 {
2455 	ldc_status_t istatus;
2456 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2457 	int	rv;
2458 
2459 	DBG1(vgenp, ldcp, "enter\n");
2460 
2461 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2462 
2463 	if ((ldcp->portp != vgenp->vsw_portp) &&
2464 	    (vgenp->vsw_portp != NULL)) {
2465 		/*
2466 		 * modify fdb entry to use vsw-port  as the
2467 		 * channel is reset and we don't have a direct
2468 		 * link to the destination (see comments
2469 		 * in vgen_port_init()).
2470 		 */
2471 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2472 		    vgen_tx, vgenp->vsw_portp, flag);
2473 	}
2474 
2475 	if (ldcp->hphase != VH_PHASE0) {
2476 		vgen_handshake_reset(ldcp);
2477 	}
2478 
2479 	/* try to bring the channel up */
2480 	rv = ldc_up(ldcp->ldc_handle);
2481 	if (rv != 0) {
2482 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2483 	}
2484 
2485 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2486 		DWARN(vgenp, ldcp, "ldc_status err\n");
2487 	} else {
2488 		ldcp->ldc_status = istatus;
2489 	}
2490 
2491 	/* if channel is already UP - restart handshake */
2492 	if (ldcp->ldc_status == LDC_UP) {
2493 		vgen_handle_evt_up(ldcp, flag);
2494 	}
2495 
2496 	DBG1(vgenp, ldcp, "exit\n");
2497 }
2498 
2499 /* Interrupt handler for the channel */
2500 static uint_t
2501 vgen_ldc_cb(uint64_t event, caddr_t arg)
2502 {
2503 	_NOTE(ARGUNUSED(event))
2504 	vgen_ldc_t	*ldcp;
2505 	vgen_t		*vgenp;
2506 	ldc_status_t 	istatus;
2507 	mblk_t		*bp = NULL;
2508 	vgen_stats_t	*statsp;
2509 
2510 	ldcp = (vgen_ldc_t *)arg;
2511 	vgenp = LDC_TO_VGEN(ldcp);
2512 	statsp = ldcp->statsp;
2513 
2514 	DBG1(vgenp, ldcp, "enter\n");
2515 
2516 	mutex_enter(&ldcp->cblock);
2517 	statsp->callbacks++;
2518 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2519 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
2520 		    ldcp->ldc_status);
2521 		mutex_exit(&ldcp->cblock);
2522 		return (LDC_SUCCESS);
2523 	}
2524 
2525 	/*
2526 	 * NOTE: not using switch() as event could be triggered by
2527 	 * a state change and a read request. Also the ordering	of the
2528 	 * check for the event types is deliberate.
2529 	 */
2530 	if (event & LDC_EVT_UP) {
2531 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2532 			DWARN(vgenp, ldcp, "ldc_status err\n");
2533 		} else {
2534 			ldcp->ldc_status = istatus;
2535 		}
2536 		ASSERT(ldcp->ldc_status == LDC_UP);
2537 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
2538 		    event, ldcp->ldc_status);
2539 
2540 		vgen_handle_evt_up(ldcp, B_FALSE);
2541 
2542 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2543 	}
2544 
2545 	if (event & LDC_EVT_READ) {
2546 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
2547 		    event, ldcp->ldc_status);
2548 
2549 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2550 
2551 		if (ldcp->rcv_thread != NULL) {
2552 			/*
2553 			 * If the receive thread is enabled, then
2554 			 * wakeup the receive thread to process the
2555 			 * LDC messages.
2556 			 */
2557 			mutex_exit(&ldcp->cblock);
2558 			mutex_enter(&ldcp->rcv_thr_lock);
2559 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
2560 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
2561 				cv_signal(&ldcp->rcv_thr_cv);
2562 			}
2563 			mutex_exit(&ldcp->rcv_thr_lock);
2564 			mutex_enter(&ldcp->cblock);
2565 		} else  {
2566 			vgen_handle_evt_read(ldcp);
2567 			bp = ldcp->rcv_mhead;
2568 			ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
2569 		}
2570 	}
2571 
2572 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2573 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2574 			DWARN(vgenp, ldcp, "ldc_status error\n");
2575 		} else {
2576 			ldcp->ldc_status = istatus;
2577 		}
2578 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
2579 		    event, ldcp->ldc_status);
2580 
2581 		vgen_handle_evt_reset(ldcp, B_FALSE);
2582 	}
2583 	mutex_exit(&ldcp->cblock);
2584 
2585 	/* send up the received packets to MAC layer */
2586 	if (bp != NULL) {
2587 		vnet_rx(vgenp->vnetp, NULL, bp);
2588 	}
2589 
2590 	if (ldcp->cancel_htid) {
2591 		/*
2592 		 * Cancel handshake timer.
2593 		 * untimeout(9F) will not return until the pending callback is
2594 		 * cancelled or has run. No problems will result from calling
2595 		 * untimeout if the handler has already completed.
2596 		 * If the timeout handler did run, then it would just
2597 		 * return as cancel_htid is set.
2598 		 */
2599 		(void) untimeout(ldcp->cancel_htid);
2600 		ldcp->cancel_htid = 0;
2601 	}
2602 	DBG1(vgenp, ldcp, "exit\n");
2603 
2604 	return (LDC_SUCCESS);
2605 }
2606 
2607 static void
2608 vgen_handle_evt_read(vgen_ldc_t *ldcp)
2609 {
2610 	int		rv;
2611 	uint64_t	ldcmsg[7];
2612 	size_t		msglen;
2613 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2614 	vio_msg_tag_t	*tagp;
2615 	ldc_status_t 	istatus;
2616 	boolean_t 	has_data;
2617 
2618 	DBG1(vgenp, ldcp, "enter\n");
2619 
2620 	/*
2621 	 * If the receive thread is enabled, then the cblock
2622 	 * need to be acquired here. If not, the vgen_ldc_cb()
2623 	 * calls this function with cblock held already.
2624 	 */
2625 	if (ldcp->rcv_thread != NULL) {
2626 		mutex_enter(&ldcp->cblock);
2627 	} else {
2628 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2629 	}
2630 
2631 vgen_evt_read:
2632 	do {
2633 		msglen = sizeof (ldcmsg);
2634 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
2635 
2636 		if (rv != 0) {
2637 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
2638 			    rv, msglen);
2639 			if (rv == ECONNRESET)
2640 				goto vgen_evtread_error;
2641 			break;
2642 		}
2643 		if (msglen == 0) {
2644 			DBG2(vgenp, ldcp, "ldc_read NODATA");
2645 			break;
2646 		}
2647 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
2648 
2649 		tagp = (vio_msg_tag_t *)ldcmsg;
2650 
2651 		if (ldcp->peer_sid) {
2652 			/*
2653 			 * check sid only after we have received peer's sid
2654 			 * in the version negotiate msg.
2655 			 */
2656 #ifdef DEBUG
2657 			if (vgen_hdbg & HDBG_BAD_SID) {
2658 				/* simulate bad sid condition */
2659 				tagp->vio_sid = 0;
2660 				vgen_hdbg &= ~(HDBG_BAD_SID);
2661 			}
2662 #endif
2663 			rv = vgen_check_sid(ldcp, tagp);
2664 			if (rv != VGEN_SUCCESS) {
2665 				/*
2666 				 * If sid mismatch is detected,
2667 				 * reset the channel.
2668 				 */
2669 				ldcp->need_ldc_reset = B_TRUE;
2670 				goto vgen_evtread_error;
2671 			}
2672 		}
2673 
2674 		switch (tagp->vio_msgtype) {
2675 		case VIO_TYPE_CTRL:
2676 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
2677 			break;
2678 
2679 		case VIO_TYPE_DATA:
2680 			rv = vgen_handle_datamsg(ldcp, tagp);
2681 			break;
2682 
2683 		case VIO_TYPE_ERR:
2684 			vgen_handle_errmsg(ldcp, tagp);
2685 			break;
2686 
2687 		default:
2688 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
2689 			    tagp->vio_msgtype);
2690 			break;
2691 		}
2692 
2693 		/*
2694 		 * If an error is encountered, stop processing and
2695 		 * handle the error.
2696 		 */
2697 		if (rv != 0) {
2698 			goto vgen_evtread_error;
2699 		}
2700 
2701 	} while (msglen);
2702 
2703 	/* check once more before exiting */
2704 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
2705 	if ((rv == 0) && (has_data == B_TRUE)) {
2706 		DTRACE_PROBE(vgen_chkq);
2707 		goto vgen_evt_read;
2708 	}
2709 
2710 vgen_evtread_error:
2711 	if (rv == ECONNRESET) {
2712 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2713 			DWARN(vgenp, ldcp, "ldc_status err\n");
2714 		} else {
2715 			ldcp->ldc_status = istatus;
2716 		}
2717 		vgen_handle_evt_reset(ldcp, B_FALSE);
2718 	} else if (rv) {
2719 		vgen_handshake_retry(ldcp);
2720 	}
2721 
2722 	/*
2723 	 * If the receive thread is not enabled, then cancel the
2724 	 * handshake timeout here.
2725 	 */
2726 	if (ldcp->rcv_thread != NULL) {
2727 		mutex_exit(&ldcp->cblock);
2728 		if (ldcp->cancel_htid) {
2729 			/*
2730 			 * Cancel handshake timer. untimeout(9F) will
2731 			 * not return until the pending callback is cancelled
2732 			 * or has run. No problems will result from calling
2733 			 * untimeout if the handler has already completed.
2734 			 * If the timeout handler did run, then it would just
2735 			 * return as cancel_htid is set.
2736 			 */
2737 			(void) untimeout(ldcp->cancel_htid);
2738 			ldcp->cancel_htid = 0;
2739 		}
2740 	}
2741 
2742 	DBG1(vgenp, ldcp, "exit\n");
2743 }
2744 
2745 /* vgen handshake functions */
2746 
2747 /* change the hphase for the channel to the next phase */
2748 static vgen_ldc_t *
2749 vh_nextphase(vgen_ldc_t *ldcp)
2750 {
2751 	if (ldcp->hphase == VH_PHASE3) {
2752 		ldcp->hphase = VH_DONE;
2753 	} else {
2754 		ldcp->hphase++;
2755 	}
2756 	return (ldcp);
2757 }
2758 
2759 /*
2760  * Check whether the given version is supported or not and
2761  * return VGEN_SUCCESS if supported.
2762  */
2763 static int
2764 vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
2765 uint16_t ver_minor)
2766 {
2767 	vgen_ver_t	*versions = ldcp->vgen_versions;
2768 	int		i = 0;
2769 
2770 	while (i < VGEN_NUM_VER) {
2771 		if ((versions[i].ver_major == 0) &&
2772 		    (versions[i].ver_minor == 0)) {
2773 			break;
2774 		}
2775 		if ((versions[i].ver_major == ver_major) &&
2776 		    (versions[i].ver_minor == ver_minor)) {
2777 			return (VGEN_SUCCESS);
2778 		}
2779 		i++;
2780 	}
2781 	return (VGEN_FAILURE);
2782 }
2783 
2784 /*
2785  * Given a version, return VGEN_SUCCESS if a lower version is supported.
2786  */
2787 static int
2788 vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
2789 {
2790 	vgen_ver_t	*versions = ldcp->vgen_versions;
2791 	int		i = 0;
2792 
2793 	while (i < VGEN_NUM_VER) {
2794 		if ((versions[i].ver_major == 0) &&
2795 		    (versions[i].ver_minor == 0)) {
2796 			break;
2797 		}
2798 		/*
2799 		 * if we support a lower minor version within the same major
2800 		 * version, or if we support a lower major version,
2801 		 * update the verp parameter with this lower version and
2802 		 * return success.
2803 		 */
2804 		if (((versions[i].ver_major == verp->ver_major) &&
2805 		    (versions[i].ver_minor < verp->ver_minor)) ||
2806 		    (versions[i].ver_major < verp->ver_major)) {
2807 			verp->ver_major = versions[i].ver_major;
2808 			verp->ver_minor = versions[i].ver_minor;
2809 			return (VGEN_SUCCESS);
2810 		}
2811 		i++;
2812 	}
2813 
2814 	return (VGEN_FAILURE);
2815 }
2816 
2817 /*
2818  * wrapper routine to send the given message over ldc using ldc_write().
2819  */
2820 static int
2821 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
2822     boolean_t caller_holds_lock)
2823 {
2824 	int	rv;
2825 	size_t	len;
2826 	uint32_t retries = 0;
2827 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2828 
2829 	len = msglen;
2830 	if ((len == 0) || (msg == NULL))
2831 		return (VGEN_FAILURE);
2832 
2833 	if (!caller_holds_lock) {
2834 		mutex_enter(&ldcp->wrlock);
2835 	}
2836 
2837 	do {
2838 		len = msglen;
2839 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
2840 		if (retries++ >= vgen_ldcwr_retries)
2841 			break;
2842 	} while (rv == EWOULDBLOCK);
2843 
2844 	if (!caller_holds_lock) {
2845 		mutex_exit(&ldcp->wrlock);
2846 	}
2847 
2848 	if (rv != 0) {
2849 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
2850 		    rv, msglen);
2851 		return (rv);
2852 	}
2853 
2854 	if (len != msglen) {
2855 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
2856 		    rv, msglen);
2857 		return (VGEN_FAILURE);
2858 	}
2859 
2860 	return (VGEN_SUCCESS);
2861 }
2862 
2863 /* send version negotiate message to the peer over ldc */
2864 static int
2865 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
2866 {
2867 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2868 	vio_ver_msg_t	vermsg;
2869 	vio_msg_tag_t	*tagp = &vermsg.tag;
2870 	int		rv;
2871 
2872 	bzero(&vermsg, sizeof (vermsg));
2873 
2874 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2875 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2876 	tagp->vio_subtype_env = VIO_VER_INFO;
2877 	tagp->vio_sid = ldcp->local_sid;
2878 
2879 	/* get version msg payload from ldcp->local */
2880 	vermsg.ver_major = ldcp->local_hparams.ver_major;
2881 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
2882 	vermsg.dev_class = ldcp->local_hparams.dev_class;
2883 
2884 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
2885 	if (rv != VGEN_SUCCESS) {
2886 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2887 		return (rv);
2888 	}
2889 
2890 	ldcp->hstate |= VER_INFO_SENT;
2891 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
2892 	    vermsg.ver_major, vermsg.ver_minor);
2893 
2894 	return (VGEN_SUCCESS);
2895 }
2896 
2897 /* send attr info message to the peer over ldc */
2898 static int
2899 vgen_send_attr_info(vgen_ldc_t *ldcp)
2900 {
2901 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2902 	vnet_attr_msg_t	attrmsg;
2903 	vio_msg_tag_t	*tagp = &attrmsg.tag;
2904 	int		rv;
2905 
2906 	bzero(&attrmsg, sizeof (attrmsg));
2907 
2908 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2909 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2910 	tagp->vio_subtype_env = VIO_ATTR_INFO;
2911 	tagp->vio_sid = ldcp->local_sid;
2912 
2913 	/* get attr msg payload from ldcp->local */
2914 	attrmsg.mtu = ldcp->local_hparams.mtu;
2915 	attrmsg.addr = ldcp->local_hparams.addr;
2916 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
2917 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
2918 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
2919 
2920 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
2921 	if (rv != VGEN_SUCCESS) {
2922 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2923 		return (rv);
2924 	}
2925 
2926 	ldcp->hstate |= ATTR_INFO_SENT;
2927 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
2928 
2929 	return (VGEN_SUCCESS);
2930 }
2931 
2932 /* send descriptor ring register message to the peer over ldc */
2933 static int
2934 vgen_send_dring_reg(vgen_ldc_t *ldcp)
2935 {
2936 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
2937 	vio_dring_reg_msg_t	msg;
2938 	vio_msg_tag_t		*tagp = &msg.tag;
2939 	int		rv;
2940 
2941 	bzero(&msg, sizeof (msg));
2942 
2943 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2944 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2945 	tagp->vio_subtype_env = VIO_DRING_REG;
2946 	tagp->vio_sid = ldcp->local_sid;
2947 
2948 	/* get dring info msg payload from ldcp->local */
2949 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
2950 	    sizeof (ldc_mem_cookie_t));
2951 	msg.ncookies = ldcp->local_hparams.num_dcookies;
2952 	msg.num_descriptors = ldcp->local_hparams.num_desc;
2953 	msg.descriptor_size = ldcp->local_hparams.desc_size;
2954 
2955 	/*
2956 	 * dring_ident is set to 0. After mapping the dring, peer sets this
2957 	 * value and sends it in the ack, which is saved in
2958 	 * vgen_handle_dring_reg().
2959 	 */
2960 	msg.dring_ident = 0;
2961 
2962 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
2963 	if (rv != VGEN_SUCCESS) {
2964 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2965 		return (rv);
2966 	}
2967 
2968 	ldcp->hstate |= DRING_INFO_SENT;
2969 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
2970 
2971 	return (VGEN_SUCCESS);
2972 }
2973 
2974 static int
2975 vgen_send_rdx_info(vgen_ldc_t *ldcp)
2976 {
2977 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2978 	vio_rdx_msg_t	rdxmsg;
2979 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
2980 	int		rv;
2981 
2982 	bzero(&rdxmsg, sizeof (rdxmsg));
2983 
2984 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2985 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2986 	tagp->vio_subtype_env = VIO_RDX;
2987 	tagp->vio_sid = ldcp->local_sid;
2988 
2989 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
2990 	if (rv != VGEN_SUCCESS) {
2991 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2992 		return (rv);
2993 	}
2994 
2995 	ldcp->hstate |= RDX_INFO_SENT;
2996 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
2997 
2998 	return (VGEN_SUCCESS);
2999 }
3000 
3001 /* send descriptor ring data message to the peer over ldc */
3002 static int
3003 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
3004 {
3005 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3006 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
3007 	vio_msg_tag_t	*tagp = &msgp->tag;
3008 	int		rv;
3009 
3010 	bzero(msgp, sizeof (*msgp));
3011 
3012 	tagp->vio_msgtype = VIO_TYPE_DATA;
3013 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3014 	tagp->vio_subtype_env = VIO_DRING_DATA;
3015 	tagp->vio_sid = ldcp->local_sid;
3016 
3017 	msgp->seq_num = ldcp->next_txseq;
3018 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
3019 	msgp->start_idx = start;
3020 	msgp->end_idx = end;
3021 
3022 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
3023 	if (rv != VGEN_SUCCESS) {
3024 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3025 		return (rv);
3026 	}
3027 
3028 	ldcp->next_txseq++;
3029 	ldcp->statsp->dring_data_msgs++;
3030 
3031 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
3032 
3033 	return (VGEN_SUCCESS);
3034 }
3035 
3036 /* send multicast addr info message to vsw */
3037 static int
3038 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3039 {
3040 	vnet_mcast_msg_t	mcastmsg;
3041 	vnet_mcast_msg_t	*msgp;
3042 	vio_msg_tag_t		*tagp;
3043 	vgen_t			*vgenp;
3044 	struct ether_addr	*mca;
3045 	int			rv;
3046 	int			i;
3047 	uint32_t		size;
3048 	uint32_t		mccount;
3049 	uint32_t		n;
3050 
3051 	msgp = &mcastmsg;
3052 	tagp = &msgp->tag;
3053 	vgenp = LDC_TO_VGEN(ldcp);
3054 
3055 	mccount = vgenp->mccount;
3056 	i = 0;
3057 
3058 	do {
3059 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3060 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3061 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3062 		tagp->vio_sid = ldcp->local_sid;
3063 
3064 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3065 		size = n * sizeof (struct ether_addr);
3066 
3067 		mca = &(vgenp->mctab[i]);
3068 		bcopy(mca, (msgp->mca), size);
3069 		msgp->set = B_TRUE;
3070 		msgp->count = n;
3071 
3072 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3073 		    B_FALSE);
3074 		if (rv != VGEN_SUCCESS) {
3075 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3076 			return (rv);
3077 		}
3078 
3079 		mccount -= n;
3080 		i += n;
3081 
3082 	} while (mccount);
3083 
3084 	return (VGEN_SUCCESS);
3085 }
3086 
3087 /* Initiate Phase 2 of handshake */
3088 static int
3089 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3090 {
3091 	int rv;
3092 	uint32_t ncookies = 0;
3093 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3094 
3095 #ifdef DEBUG
3096 	if (vgen_hdbg & HDBG_OUT_STATE) {
3097 		/* simulate out of state condition */
3098 		vgen_hdbg &= ~(HDBG_OUT_STATE);
3099 		rv = vgen_send_rdx_info(ldcp);
3100 		return (rv);
3101 	}
3102 	if (vgen_hdbg & HDBG_TIMEOUT) {
3103 		/* simulate timeout condition */
3104 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3105 		return (VGEN_SUCCESS);
3106 	}
3107 #endif
3108 	rv = vgen_send_attr_info(ldcp);
3109 	if (rv != VGEN_SUCCESS) {
3110 		return (rv);
3111 	}
3112 
3113 	/* Bind descriptor ring to the channel */
3114 	if (ldcp->num_txdcookies == 0) {
3115 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3116 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3117 		if (rv != 0) {
3118 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
3119 			    "rv(%x)\n", rv);
3120 			return (rv);
3121 		}
3122 		ASSERT(ncookies == 1);
3123 		ldcp->num_txdcookies = ncookies;
3124 	}
3125 
3126 	/* update local dring_info params */
3127 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3128 	    sizeof (ldc_mem_cookie_t));
3129 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3130 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3131 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3132 
3133 	rv = vgen_send_dring_reg(ldcp);
3134 	if (rv != VGEN_SUCCESS) {
3135 		return (rv);
3136 	}
3137 
3138 	return (VGEN_SUCCESS);
3139 }
3140 
3141 /*
3142  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3143  * This can happen after a channel comes up (status: LDC_UP) or
3144  * when handshake gets terminated due to various conditions.
3145  */
3146 static void
3147 vgen_reset_hphase(vgen_ldc_t *ldcp)
3148 {
3149 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3150 	ldc_status_t istatus;
3151 	int rv;
3152 
3153 	DBG1(vgenp, ldcp, "enter\n");
3154 	/* reset hstate and hphase */
3155 	ldcp->hstate = 0;
3156 	ldcp->hphase = VH_PHASE0;
3157 
3158 	/*
3159 	 * Save the id of pending handshake timer in cancel_htid.
3160 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
3161 	 * be cancelled after releasing cblock.
3162 	 */
3163 	if (ldcp->htid) {
3164 		ldcp->cancel_htid = ldcp->htid;
3165 		ldcp->htid = 0;
3166 	}
3167 
3168 	if (ldcp->local_hparams.dring_ready) {
3169 		ldcp->local_hparams.dring_ready = B_FALSE;
3170 	}
3171 
3172 	/* Unbind tx descriptor ring from the channel */
3173 	if (ldcp->num_txdcookies) {
3174 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3175 		if (rv != 0) {
3176 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
3177 		}
3178 		ldcp->num_txdcookies = 0;
3179 	}
3180 
3181 	if (ldcp->peer_hparams.dring_ready) {
3182 		ldcp->peer_hparams.dring_ready = B_FALSE;
3183 		/* Unmap peer's dring */
3184 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3185 		vgen_clobber_rxds(ldcp);
3186 	}
3187 
3188 	vgen_clobber_tbufs(ldcp);
3189 
3190 	/*
3191 	 * clear local handshake params and initialize.
3192 	 */
3193 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3194 
3195 	/* set version to the highest version supported */
3196 	ldcp->local_hparams.ver_major =
3197 	    ldcp->vgen_versions[0].ver_major;
3198 	ldcp->local_hparams.ver_minor =
3199 	    ldcp->vgen_versions[0].ver_minor;
3200 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3201 
3202 	/* set attr_info params */
3203 	ldcp->local_hparams.mtu = ETHERMAX;
3204 	ldcp->local_hparams.addr =
3205 	    vgen_macaddr_strtoul(vgenp->macaddr);
3206 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3207 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
3208 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3209 
3210 	/*
3211 	 * Note: dring is created, but not bound yet.
3212 	 * local dring_info params will be updated when we bind the dring in
3213 	 * vgen_handshake_phase2().
3214 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3215 	 * value and sends it in the ack, which is saved in
3216 	 * vgen_handle_dring_reg().
3217 	 */
3218 	ldcp->local_hparams.dring_ident = 0;
3219 
3220 	/* clear peer_hparams */
3221 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3222 
3223 	/* reset the channel if required */
3224 	if (ldcp->need_ldc_reset) {
3225 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3226 		ldcp->need_ldc_reset = B_FALSE;
3227 		(void) ldc_down(ldcp->ldc_handle);
3228 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3229 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
3230 		ldcp->ldc_status = istatus;
3231 
3232 		/* clear sids */
3233 		ldcp->local_sid = 0;
3234 		ldcp->peer_sid = 0;
3235 
3236 		/* try to bring the channel up */
3237 		rv = ldc_up(ldcp->ldc_handle);
3238 		if (rv != 0) {
3239 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3240 		}
3241 
3242 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3243 			DWARN(vgenp, ldcp, "ldc_status err\n");
3244 		} else {
3245 			ldcp->ldc_status = istatus;
3246 		}
3247 	}
3248 }
3249 
3250 /* wrapper function for vgen_reset_hphase */
3251 static void
3252 vgen_handshake_reset(vgen_ldc_t *ldcp)
3253 {
3254 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3255 	mutex_enter(&ldcp->rxlock);
3256 	mutex_enter(&ldcp->wrlock);
3257 	mutex_enter(&ldcp->txlock);
3258 	mutex_enter(&ldcp->tclock);
3259 
3260 	vgen_reset_hphase(ldcp);
3261 
3262 	mutex_exit(&ldcp->tclock);
3263 	mutex_exit(&ldcp->txlock);
3264 	mutex_exit(&ldcp->wrlock);
3265 	mutex_exit(&ldcp->rxlock);
3266 }
3267 
3268 /*
3269  * Initiate handshake with the peer by sending various messages
3270  * based on the handshake-phase that the channel is currently in.
3271  */
3272 static void
3273 vgen_handshake(vgen_ldc_t *ldcp)
3274 {
3275 	uint32_t hphase = ldcp->hphase;
3276 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3277 	ldc_status_t	istatus;
3278 	int	rv = 0;
3279 
3280 	switch (hphase) {
3281 
3282 	case VH_PHASE1:
3283 
3284 		/*
3285 		 * start timer, for entire handshake process, turn this timer
3286 		 * off if all phases of handshake complete successfully and
3287 		 * hphase goes to VH_DONE(below) or
3288 		 * vgen_reset_hphase() gets called or
3289 		 * channel is reset due to errors or
3290 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3291 		 */
3292 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3293 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
3294 
3295 		/* Phase 1 involves negotiating the version */
3296 		rv = vgen_send_version_negotiate(ldcp);
3297 		break;
3298 
3299 	case VH_PHASE2:
3300 		rv = vgen_handshake_phase2(ldcp);
3301 		break;
3302 
3303 	case VH_PHASE3:
3304 		rv = vgen_send_rdx_info(ldcp);
3305 		break;
3306 
3307 	case VH_DONE:
3308 		/*
3309 		 * Save the id of pending handshake timer in cancel_htid.
3310 		 * This will be checked in vgen_ldc_cb() and the handshake
3311 		 * timer will be cancelled after releasing cblock.
3312 		 */
3313 		if (ldcp->htid) {
3314 			ldcp->cancel_htid = ldcp->htid;
3315 			ldcp->htid = 0;
3316 		}
3317 		ldcp->hretries = 0;
3318 		DBG1(vgenp, ldcp, "Handshake Done\n");
3319 
3320 		if (ldcp->portp == vgenp->vsw_portp) {
3321 			/*
3322 			 * If this channel(port) is connected to vsw,
3323 			 * need to sync multicast table with vsw.
3324 			 */
3325 			mutex_exit(&ldcp->cblock);
3326 
3327 			mutex_enter(&vgenp->lock);
3328 			rv = vgen_send_mcast_info(ldcp);
3329 			mutex_exit(&vgenp->lock);
3330 
3331 			mutex_enter(&ldcp->cblock);
3332 			if (rv != VGEN_SUCCESS)
3333 				break;
3334 		}
3335 
3336 		/*
3337 		 * Check if mac layer should be notified to restart
3338 		 * transmissions. This can happen if the channel got
3339 		 * reset and vgen_clobber_tbufs() is called, while
3340 		 * need_resched is set.
3341 		 */
3342 		mutex_enter(&ldcp->tclock);
3343 		if (ldcp->need_resched) {
3344 			ldcp->need_resched = B_FALSE;
3345 			vnet_tx_update(vgenp->vnetp);
3346 		}
3347 		mutex_exit(&ldcp->tclock);
3348 
3349 		break;
3350 
3351 	default:
3352 		break;
3353 	}
3354 
3355 	if (rv == ECONNRESET) {
3356 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3357 			DWARN(vgenp, ldcp, "ldc_status err\n");
3358 		} else {
3359 			ldcp->ldc_status = istatus;
3360 		}
3361 		vgen_handle_evt_reset(ldcp, B_FALSE);
3362 	} else if (rv) {
3363 		vgen_handshake_reset(ldcp);
3364 	}
3365 }
3366 
3367 /*
3368  * Check if the current handshake phase has completed successfully and
3369  * return the status.
3370  */
3371 static int
3372 vgen_handshake_done(vgen_ldc_t *ldcp)
3373 {
3374 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3375 	uint32_t	hphase = ldcp->hphase;
3376 	int 		status = 0;
3377 
3378 	switch (hphase) {
3379 
3380 	case VH_PHASE1:
3381 		/*
3382 		 * Phase1 is done, if version negotiation
3383 		 * completed successfully.
3384 		 */
3385 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3386 		    VER_NEGOTIATED);
3387 		break;
3388 
3389 	case VH_PHASE2:
3390 		/*
3391 		 * Phase 2 is done, if attr info and dring info
3392 		 * have been exchanged successfully.
3393 		 */
3394 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3395 		    ATTR_INFO_EXCHANGED) &&
3396 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3397 		    DRING_INFO_EXCHANGED));
3398 		break;
3399 
3400 	case VH_PHASE3:
3401 		/* Phase 3 is done, if rdx msg has been exchanged */
3402 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3403 		    RDX_EXCHANGED);
3404 		break;
3405 
3406 	default:
3407 		break;
3408 	}
3409 
3410 	if (status == 0) {
3411 		return (VGEN_FAILURE);
3412 	}
3413 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
3414 	return (VGEN_SUCCESS);
3415 }
3416 
3417 /* retry handshake on failure */
3418 static void
3419 vgen_handshake_retry(vgen_ldc_t *ldcp)
3420 {
3421 	/* reset handshake phase */
3422 	vgen_handshake_reset(ldcp);
3423 
3424 	/* handshake retry is specified and the channel is UP */
3425 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
3426 		if (ldcp->hretries++ < vgen_max_hretries) {
3427 			ldcp->local_sid = ddi_get_lbolt();
3428 			vgen_handshake(vh_nextphase(ldcp));
3429 		}
3430 	}
3431 }
3432 
3433 /*
3434  * Handle a version info msg from the peer or an ACK/NACK from the peer
3435  * to a version info msg that we sent.
3436  */
3437 static int
3438 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3439 {
3440 	vgen_t		*vgenp;
3441 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3442 	int		ack = 0;
3443 	int		failed = 0;
3444 	int		idx;
3445 	vgen_ver_t	*versions = ldcp->vgen_versions;
3446 	int		rv = 0;
3447 
3448 	vgenp = LDC_TO_VGEN(ldcp);
3449 	DBG1(vgenp, ldcp, "enter\n");
3450 	switch (tagp->vio_subtype) {
3451 	case VIO_SUBTYPE_INFO:
3452 
3453 		/*  Cache sid of peer if this is the first time */
3454 		if (ldcp->peer_sid == 0) {
3455 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
3456 			    tagp->vio_sid);
3457 			ldcp->peer_sid = tagp->vio_sid;
3458 		}
3459 
3460 		if (ldcp->hphase != VH_PHASE1) {
3461 			/*
3462 			 * If we are not already in VH_PHASE1, reset to
3463 			 * pre-handshake state, and initiate handshake
3464 			 * to the peer too.
3465 			 */
3466 			vgen_handshake_reset(ldcp);
3467 			vgen_handshake(vh_nextphase(ldcp));
3468 		}
3469 		ldcp->hstate |= VER_INFO_RCVD;
3470 
3471 		/* save peer's requested values */
3472 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3473 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3474 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3475 
3476 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3477 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3478 			/* unsupported dev_class, send NACK */
3479 
3480 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3481 
3482 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3483 			tagp->vio_sid = ldcp->local_sid;
3484 			/* send reply msg back to peer */
3485 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3486 			    sizeof (*vermsg), B_FALSE);
3487 			if (rv != VGEN_SUCCESS) {
3488 				return (rv);
3489 			}
3490 			return (VGEN_FAILURE);
3491 		}
3492 
3493 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
3494 		    vermsg->ver_major,  vermsg->ver_minor);
3495 
3496 		idx = 0;
3497 
3498 		for (;;) {
3499 
3500 			if (vermsg->ver_major > versions[idx].ver_major) {
3501 
3502 				/* nack with next lower version */
3503 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3504 				vermsg->ver_major = versions[idx].ver_major;
3505 				vermsg->ver_minor = versions[idx].ver_minor;
3506 				break;
3507 			}
3508 
3509 			if (vermsg->ver_major == versions[idx].ver_major) {
3510 
3511 				/* major version match - ACK version */
3512 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3513 				ack = 1;
3514 
3515 				/*
3516 				 * lower minor version to the one this endpt
3517 				 * supports, if necessary
3518 				 */
3519 				if (vermsg->ver_minor >
3520 				    versions[idx].ver_minor) {
3521 					vermsg->ver_minor =
3522 					    versions[idx].ver_minor;
3523 					ldcp->peer_hparams.ver_minor =
3524 					    versions[idx].ver_minor;
3525 				}
3526 				break;
3527 			}
3528 
3529 			idx++;
3530 
3531 			if (idx == VGEN_NUM_VER) {
3532 
3533 				/* no version match - send NACK */
3534 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3535 				vermsg->ver_major = 0;
3536 				vermsg->ver_minor = 0;
3537 				failed = 1;
3538 				break;
3539 			}
3540 
3541 		}
3542 
3543 		tagp->vio_sid = ldcp->local_sid;
3544 
3545 		/* send reply msg back to peer */
3546 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3547 		    B_FALSE);
3548 		if (rv != VGEN_SUCCESS) {
3549 			return (rv);
3550 		}
3551 
3552 		if (ack) {
3553 			ldcp->hstate |= VER_ACK_SENT;
3554 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
3555 			    vermsg->ver_major, vermsg->ver_minor);
3556 		}
3557 		if (failed) {
3558 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
3559 			return (VGEN_FAILURE);
3560 		}
3561 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3562 
3563 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3564 
3565 			/* local and peer versions match? */
3566 			ASSERT((ldcp->local_hparams.ver_major ==
3567 			    ldcp->peer_hparams.ver_major) &&
3568 			    (ldcp->local_hparams.ver_minor ==
3569 			    ldcp->peer_hparams.ver_minor));
3570 
3571 			/* move to the next phase */
3572 			vgen_handshake(vh_nextphase(ldcp));
3573 		}
3574 
3575 		break;
3576 
3577 	case VIO_SUBTYPE_ACK:
3578 
3579 		if (ldcp->hphase != VH_PHASE1) {
3580 			/*  This should not happen. */
3581 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
3582 			return (VGEN_FAILURE);
3583 		}
3584 
3585 		/* SUCCESS - we have agreed on a version */
3586 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3587 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3588 		ldcp->hstate |= VER_ACK_RCVD;
3589 
3590 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
3591 		    vermsg->ver_major,  vermsg->ver_minor);
3592 
3593 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3594 
3595 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3596 
3597 			/* local and peer versions match? */
3598 			ASSERT((ldcp->local_hparams.ver_major ==
3599 			    ldcp->peer_hparams.ver_major) &&
3600 			    (ldcp->local_hparams.ver_minor ==
3601 			    ldcp->peer_hparams.ver_minor));
3602 
3603 			/* move to the next phase */
3604 			vgen_handshake(vh_nextphase(ldcp));
3605 		}
3606 		break;
3607 
3608 	case VIO_SUBTYPE_NACK:
3609 
3610 		if (ldcp->hphase != VH_PHASE1) {
3611 			/*  This should not happen.  */
3612 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
3613 			"Phase(%u)\n", ldcp->hphase);
3614 			return (VGEN_FAILURE);
3615 		}
3616 
3617 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
3618 		    vermsg->ver_major, vermsg->ver_minor);
3619 
3620 		/* check if version in NACK is zero */
3621 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3622 			/*
3623 			 * Version Negotiation has failed.
3624 			 */
3625 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3626 			return (VGEN_FAILURE);
3627 		}
3628 
3629 		idx = 0;
3630 
3631 		for (;;) {
3632 
3633 			if (vermsg->ver_major > versions[idx].ver_major) {
3634 				/* select next lower version */
3635 
3636 				ldcp->local_hparams.ver_major =
3637 				    versions[idx].ver_major;
3638 				ldcp->local_hparams.ver_minor =
3639 				    versions[idx].ver_minor;
3640 				break;
3641 			}
3642 
3643 			if (vermsg->ver_major == versions[idx].ver_major) {
3644 				/* major version match */
3645 
3646 				ldcp->local_hparams.ver_major =
3647 				    versions[idx].ver_major;
3648 
3649 				ldcp->local_hparams.ver_minor =
3650 				    versions[idx].ver_minor;
3651 				break;
3652 			}
3653 
3654 			idx++;
3655 
3656 			if (idx == VGEN_NUM_VER) {
3657 				/*
3658 				 * no version match.
3659 				 * Version Negotiation has failed.
3660 				 */
3661 				DWARN(vgenp, ldcp,
3662 				    "Version Negotiation Failed\n");
3663 				return (VGEN_FAILURE);
3664 			}
3665 
3666 		}
3667 
3668 		rv = vgen_send_version_negotiate(ldcp);
3669 		if (rv != VGEN_SUCCESS) {
3670 			return (rv);
3671 		}
3672 
3673 		break;
3674 	}
3675 
3676 	DBG1(vgenp, ldcp, "exit\n");
3677 	return (VGEN_SUCCESS);
3678 }
3679 
3680 /* Check if the attributes are supported */
3681 static int
3682 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
3683 {
3684 	_NOTE(ARGUNUSED(ldcp))
3685 
3686 	/*
3687 	 * currently, we support these attr values:
3688 	 * mtu of ethernet, addr_type of mac, xfer_mode of
3689 	 * ldc shared memory, ack_freq of 0 (data is acked if
3690 	 * the ack bit is set in the descriptor) and the address should
3691 	 * match the address in the port node.
3692 	 */
3693 	if ((msg->mtu != ETHERMAX) ||
3694 	    (msg->addr_type != ADDR_TYPE_MAC) ||
3695 	    (msg->xfer_mode != VIO_DRING_MODE) ||
3696 	    (msg->ack_freq > 64)) {
3697 		return (VGEN_FAILURE);
3698 	}
3699 
3700 	return (VGEN_SUCCESS);
3701 }
3702 
3703 /*
3704  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
3705  * to an attr info msg that we sent.
3706  */
3707 static int
3708 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3709 {
3710 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3711 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
3712 	int		ack = 0;
3713 	int		rv = 0;
3714 
3715 	DBG1(vgenp, ldcp, "enter\n");
3716 	if (ldcp->hphase != VH_PHASE2) {
3717 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
3718 		" Invalid Phase(%u)\n",
3719 		    tagp->vio_subtype, ldcp->hphase);
3720 		return (VGEN_FAILURE);
3721 	}
3722 	switch (tagp->vio_subtype) {
3723 	case VIO_SUBTYPE_INFO:
3724 
3725 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
3726 		ldcp->hstate |= ATTR_INFO_RCVD;
3727 
3728 		/* save peer's values */
3729 		ldcp->peer_hparams.mtu = attrmsg->mtu;
3730 		ldcp->peer_hparams.addr = attrmsg->addr;
3731 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
3732 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
3733 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
3734 
3735 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
3736 			/* unsupported attr, send NACK */
3737 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3738 		} else {
3739 			ack = 1;
3740 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3741 		}
3742 		tagp->vio_sid = ldcp->local_sid;
3743 
3744 		/* send reply msg back to peer */
3745 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
3746 		    B_FALSE);
3747 		if (rv != VGEN_SUCCESS) {
3748 			return (rv);
3749 		}
3750 
3751 		if (ack) {
3752 			ldcp->hstate |= ATTR_ACK_SENT;
3753 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
3754 		} else {
3755 			/* failed */
3756 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
3757 			return (VGEN_FAILURE);
3758 		}
3759 
3760 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3761 			vgen_handshake(vh_nextphase(ldcp));
3762 		}
3763 
3764 		break;
3765 
3766 	case VIO_SUBTYPE_ACK:
3767 
3768 		ldcp->hstate |= ATTR_ACK_RCVD;
3769 
3770 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
3771 
3772 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3773 			vgen_handshake(vh_nextphase(ldcp));
3774 		}
3775 		break;
3776 
3777 	case VIO_SUBTYPE_NACK:
3778 
3779 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
3780 		return (VGEN_FAILURE);
3781 	}
3782 	DBG1(vgenp, ldcp, "exit\n");
3783 	return (VGEN_SUCCESS);
3784 }
3785 
3786 /* Check if the dring info msg is ok */
3787 static int
3788 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
3789 {
3790 	/* check if msg contents are ok */
3791 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
3792 	    sizeof (vnet_public_desc_t))) {
3793 		return (VGEN_FAILURE);
3794 	}
3795 	return (VGEN_SUCCESS);
3796 }
3797 
3798 /*
3799  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
3800  * the peer to a dring register msg that we sent.
3801  */
3802 static int
3803 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3804 {
3805 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
3806 	ldc_mem_cookie_t dcookie;
3807 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3808 	int ack = 0;
3809 	int rv = 0;
3810 
3811 	DBG1(vgenp, ldcp, "enter\n");
3812 	if (ldcp->hphase < VH_PHASE2) {
3813 		/* dring_info can be rcvd in any of the phases after Phase1 */
3814 		DWARN(vgenp, ldcp,
3815 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
3816 		    tagp->vio_subtype, ldcp->hphase);
3817 		return (VGEN_FAILURE);
3818 	}
3819 	switch (tagp->vio_subtype) {
3820 	case VIO_SUBTYPE_INFO:
3821 
3822 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
3823 		ldcp->hstate |= DRING_INFO_RCVD;
3824 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
3825 
3826 		ASSERT(msg->ncookies == 1);
3827 
3828 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
3829 			/*
3830 			 * verified dring info msg to be ok,
3831 			 * now try to map the remote dring.
3832 			 */
3833 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
3834 			    msg->descriptor_size, &dcookie,
3835 			    msg->ncookies);
3836 			if (rv == DDI_SUCCESS) {
3837 				/* now we can ack the peer */
3838 				ack = 1;
3839 			}
3840 		}
3841 		if (ack == 0) {
3842 			/* failed, send NACK */
3843 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3844 		} else {
3845 			if (!(ldcp->peer_hparams.dring_ready)) {
3846 
3847 				/* save peer's dring_info values */
3848 				bcopy(&dcookie,
3849 				    &(ldcp->peer_hparams.dring_cookie),
3850 				    sizeof (dcookie));
3851 				ldcp->peer_hparams.num_desc =
3852 				    msg->num_descriptors;
3853 				ldcp->peer_hparams.desc_size =
3854 				    msg->descriptor_size;
3855 				ldcp->peer_hparams.num_dcookies =
3856 				    msg->ncookies;
3857 
3858 				/* set dring_ident for the peer */
3859 				ldcp->peer_hparams.dring_ident =
3860 				    (uint64_t)ldcp->rxdp;
3861 				/* return the dring_ident in ack msg */
3862 				msg->dring_ident =
3863 				    (uint64_t)ldcp->rxdp;
3864 
3865 				ldcp->peer_hparams.dring_ready = B_TRUE;
3866 			}
3867 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3868 		}
3869 		tagp->vio_sid = ldcp->local_sid;
3870 		/* send reply msg back to peer */
3871 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
3872 		    B_FALSE);
3873 		if (rv != VGEN_SUCCESS) {
3874 			return (rv);
3875 		}
3876 
3877 		if (ack) {
3878 			ldcp->hstate |= DRING_ACK_SENT;
3879 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
3880 		} else {
3881 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
3882 			return (VGEN_FAILURE);
3883 		}
3884 
3885 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3886 			vgen_handshake(vh_nextphase(ldcp));
3887 		}
3888 
3889 		break;
3890 
3891 	case VIO_SUBTYPE_ACK:
3892 
3893 		ldcp->hstate |= DRING_ACK_RCVD;
3894 
3895 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
3896 
3897 		if (!(ldcp->local_hparams.dring_ready)) {
3898 			/* local dring is now ready */
3899 			ldcp->local_hparams.dring_ready = B_TRUE;
3900 
3901 			/* save dring_ident acked by peer */
3902 			ldcp->local_hparams.dring_ident =
3903 			    msg->dring_ident;
3904 		}
3905 
3906 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3907 			vgen_handshake(vh_nextphase(ldcp));
3908 		}
3909 
3910 		break;
3911 
3912 	case VIO_SUBTYPE_NACK:
3913 
3914 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
3915 		return (VGEN_FAILURE);
3916 	}
3917 	DBG1(vgenp, ldcp, "exit\n");
3918 	return (VGEN_SUCCESS);
3919 }
3920 
3921 /*
3922  * Handle a rdx info msg from the peer or an ACK/NACK
3923  * from the peer to a rdx info msg that we sent.
3924  */
3925 static int
3926 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3927 {
3928 	int rv = 0;
3929 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3930 
3931 	DBG1(vgenp, ldcp, "enter\n");
3932 	if (ldcp->hphase != VH_PHASE3) {
3933 		DWARN(vgenp, ldcp,
3934 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
3935 		    tagp->vio_subtype, ldcp->hphase);
3936 		return (VGEN_FAILURE);
3937 	}
3938 	switch (tagp->vio_subtype) {
3939 	case VIO_SUBTYPE_INFO:
3940 
3941 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
3942 		ldcp->hstate |= RDX_INFO_RCVD;
3943 
3944 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
3945 		tagp->vio_sid = ldcp->local_sid;
3946 		/* send reply msg back to peer */
3947 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
3948 		    B_FALSE);
3949 		if (rv != VGEN_SUCCESS) {
3950 			return (rv);
3951 		}
3952 
3953 		ldcp->hstate |= RDX_ACK_SENT;
3954 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
3955 
3956 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3957 			vgen_handshake(vh_nextphase(ldcp));
3958 		}
3959 
3960 		break;
3961 
3962 	case VIO_SUBTYPE_ACK:
3963 
3964 		ldcp->hstate |= RDX_ACK_RCVD;
3965 
3966 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
3967 
3968 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3969 			vgen_handshake(vh_nextphase(ldcp));
3970 		}
3971 		break;
3972 
3973 	case VIO_SUBTYPE_NACK:
3974 
3975 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
3976 		return (VGEN_FAILURE);
3977 	}
3978 	DBG1(vgenp, ldcp, "exit\n");
3979 	return (VGEN_SUCCESS);
3980 }
3981 
3982 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
3983 static int
3984 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3985 {
3986 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3987 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
3988 	struct ether_addr *addrp;
3989 	int count;
3990 	int i;
3991 
3992 	DBG1(vgenp, ldcp, "enter\n");
3993 	switch (tagp->vio_subtype) {
3994 
3995 	case VIO_SUBTYPE_INFO:
3996 
3997 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
3998 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
3999 		break;
4000 
4001 	case VIO_SUBTYPE_ACK:
4002 
4003 		/* success adding/removing multicast addr */
4004 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
4005 		break;
4006 
4007 	case VIO_SUBTYPE_NACK:
4008 
4009 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
4010 		if (!(msgp->set)) {
4011 			/* multicast remove request failed */
4012 			break;
4013 		}
4014 
4015 		/* multicast add request failed */
4016 		for (count = 0; count < msgp->count; count++) {
4017 			addrp = &(msgp->mca[count]);
4018 
4019 			/* delete address from the table */
4020 			for (i = 0; i < vgenp->mccount; i++) {
4021 				if (ether_cmp(addrp,
4022 				    &(vgenp->mctab[i])) == 0) {
4023 					if (vgenp->mccount > 1) {
4024 						int t = vgenp->mccount - 1;
4025 						vgenp->mctab[i] =
4026 						    vgenp->mctab[t];
4027 					}
4028 					vgenp->mccount--;
4029 					break;
4030 				}
4031 			}
4032 		}
4033 		break;
4034 
4035 	}
4036 	DBG1(vgenp, ldcp, "exit\n");
4037 
4038 	return (VGEN_SUCCESS);
4039 }
4040 
4041 /* handler for control messages received from the peer ldc end-point */
4042 static int
4043 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4044 {
4045 	int rv = 0;
4046 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4047 
4048 	DBG1(vgenp, ldcp, "enter\n");
4049 	switch (tagp->vio_subtype_env) {
4050 
4051 	case VIO_VER_INFO:
4052 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4053 		break;
4054 
4055 	case VIO_ATTR_INFO:
4056 		rv = vgen_handle_attr_info(ldcp, tagp);
4057 		break;
4058 
4059 	case VIO_DRING_REG:
4060 		rv = vgen_handle_dring_reg(ldcp, tagp);
4061 		break;
4062 
4063 	case VIO_RDX:
4064 		rv = vgen_handle_rdx_info(ldcp, tagp);
4065 		break;
4066 
4067 	case VNET_MCAST_INFO:
4068 		rv = vgen_handle_mcast_info(ldcp, tagp);
4069 		break;
4070 
4071 	}
4072 
4073 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4074 	return (rv);
4075 }
4076 
4077 /* handler for data messages received from the peer ldc end-point */
4078 static int
4079 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4080 {
4081 	int rv = 0;
4082 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4083 
4084 	DBG1(vgenp, ldcp, "enter\n");
4085 
4086 	if (ldcp->hphase != VH_DONE)
4087 		return (rv);
4088 	switch (tagp->vio_subtype_env) {
4089 	case VIO_DRING_DATA:
4090 		rv = vgen_handle_dring_data(ldcp, tagp);
4091 		break;
4092 	default:
4093 		break;
4094 	}
4095 
4096 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4097 	return (rv);
4098 }
4099 
4100 static int
4101 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4102     int32_t end, uint8_t pstate)
4103 {
4104 	int rv = 0;
4105 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4106 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4107 
4108 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4109 	tagp->vio_sid = ldcp->local_sid;
4110 	msgp->start_idx = start;
4111 	msgp->end_idx = end;
4112 	msgp->dring_process_state = pstate;
4113 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4114 	if (rv != VGEN_SUCCESS) {
4115 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4116 	}
4117 	return (rv);
4118 }
4119 
4120 static int
4121 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4122 {
4123 	int rv = 0;
4124 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4125 
4126 
4127 	DBG1(vgenp, ldcp, "enter\n");
4128 	switch (tagp->vio_subtype) {
4129 
4130 	case VIO_SUBTYPE_INFO:
4131 		/*
4132 		 * To reduce the locking contention, release the
4133 		 * cblock here and re-acquire it once we are done
4134 		 * receiving packets.
4135 		 */
4136 		mutex_exit(&ldcp->cblock);
4137 		mutex_enter(&ldcp->rxlock);
4138 		rv = vgen_handle_dring_data_info(ldcp, tagp);
4139 		mutex_exit(&ldcp->rxlock);
4140 		mutex_enter(&ldcp->cblock);
4141 		break;
4142 
4143 	case VIO_SUBTYPE_ACK:
4144 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
4145 		break;
4146 
4147 	case VIO_SUBTYPE_NACK:
4148 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
4149 		break;
4150 	}
4151 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4152 	return (rv);
4153 }
4154 
4155 static int
4156 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4157 {
4158 	uint32_t start;
4159 	int32_t end;
4160 	int rv = 0;
4161 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4162 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4163 #ifdef VGEN_HANDLE_LOST_PKTS
4164 	vgen_stats_t *statsp = ldcp->statsp;
4165 	uint32_t rxi;
4166 	int n;
4167 #endif
4168 
4169 	DBG1(vgenp, ldcp, "enter\n");
4170 
4171 	start = dringmsg->start_idx;
4172 	end = dringmsg->end_idx;
4173 	/*
4174 	 * received a data msg, which contains the start and end
4175 	 * indices of the descriptors within the rx ring holding data,
4176 	 * the seq_num of data packet corresponding to the start index,
4177 	 * and the dring_ident.
4178 	 * We can now read the contents of each of these descriptors
4179 	 * and gather data from it.
4180 	 */
4181 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
4182 	    start, end);
4183 
4184 	/* validate rx start and end indeces */
4185 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4186 	    !(CHECK_RXI(end, ldcp)))) {
4187 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
4188 		    start, end);
4189 		/* drop the message if invalid index */
4190 		return (rv);
4191 	}
4192 
4193 	/* validate dring_ident */
4194 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4195 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4196 		    dringmsg->dring_ident);
4197 		/* invalid dring_ident, drop the msg */
4198 		return (rv);
4199 	}
4200 #ifdef DEBUG
4201 	if (vgen_trigger_rxlost) {
4202 		/* drop this msg to simulate lost pkts for debugging */
4203 		vgen_trigger_rxlost = 0;
4204 		return (rv);
4205 	}
4206 #endif
4207 
4208 #ifdef	VGEN_HANDLE_LOST_PKTS
4209 
4210 	/* receive start index doesn't match expected index */
4211 	if (ldcp->next_rxi != start) {
4212 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
4213 		    ldcp->next_rxi, start);
4214 
4215 		/* calculate the number of pkts lost */
4216 		if (start >= ldcp->next_rxi) {
4217 			n = start - ldcp->next_rxi;
4218 		} else  {
4219 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
4220 		}
4221 
4222 		/*
4223 		 * sequence number of dring data message
4224 		 * is less than the next sequence number that
4225 		 * is expected:
4226 		 *
4227 		 * drop the message and the corresponding packets.
4228 		 */
4229 		if (ldcp->next_rxseq > dringmsg->seq_num) {
4230 			DWARN(vgenp, ldcp, "dropping pkts, expected "
4231 			"rxseq(0x%lx) > recvd(0x%lx)\n",
4232 			    ldcp->next_rxseq, dringmsg->seq_num);
4233 			/*
4234 			 * duplicate/multiple retransmissions from
4235 			 * sender?? drop this msg.
4236 			 */
4237 			return (rv);
4238 		}
4239 
4240 		/*
4241 		 * sequence number of dring data message
4242 		 * is greater than the next expected sequence number
4243 		 *
4244 		 * send a NACK back to the peer to indicate lost
4245 		 * packets.
4246 		 */
4247 		if (dringmsg->seq_num > ldcp->next_rxseq) {
4248 			statsp->rx_lost_pkts += n;
4249 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4250 			tagp->vio_sid = ldcp->local_sid;
4251 			/* indicate the range of lost descriptors */
4252 			dringmsg->start_idx = ldcp->next_rxi;
4253 			rxi = start;
4254 			DECR_RXI(rxi, ldcp);
4255 			dringmsg->end_idx = rxi;
4256 			/* dring ident is left unchanged */
4257 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4258 			    sizeof (*dringmsg), B_FALSE);
4259 			if (rv != VGEN_SUCCESS) {
4260 				DWARN(vgenp, ldcp,
4261 				    "vgen_sendmsg failed, stype:NACK\n");
4262 				return (rv);
4263 			}
4264 #ifdef VGEN_REXMIT
4265 			/*
4266 			 * stop further processing until peer
4267 			 * retransmits with the right index.
4268 			 * update next_rxseq expected.
4269 			 */
4270 			ldcp->next_rxseq += 1;
4271 			return (rv);
4272 #else	/* VGEN_REXMIT */
4273 			/*
4274 			 * treat this range of descrs/pkts as dropped
4275 			 * and set the new expected values for next_rxi
4276 			 * and next_rxseq. continue(below) to process
4277 			 * from the new start index.
4278 			 */
4279 			ldcp->next_rxi = start;
4280 			ldcp->next_rxseq += 1;
4281 #endif	/* VGEN_REXMIT */
4282 
4283 		} else if (dringmsg->seq_num == ldcp->next_rxseq) {
4284 			/*
4285 			 * expected and received seqnums match, but
4286 			 * the descriptor indeces don't?
4287 			 *
4288 			 * restart handshake with peer.
4289 			 */
4290 			DWARN(vgenp, ldcp, "next_rxseq(0x%lx)=="
4291 			    "seq_num(0x%lx)\n", ldcp->next_rxseq,
4292 			    dringmsg->seq_num);
4293 
4294 		}
4295 
4296 	} else {
4297 		/* expected and start dring indeces match */
4298 
4299 		if (dringmsg->seq_num != ldcp->next_rxseq) {
4300 
4301 			/* seqnums don't match */
4302 
4303 			DWARN(vgenp, ldcp,
4304 			    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
4305 			    ldcp->next_rxseq, dringmsg->seq_num);
4306 		}
4307 	}
4308 
4309 #endif	/* VGEN_HANDLE_LOST_PKTS */
4310 
4311 	/* Now receive messages */
4312 	rv = vgen_process_dring_data(ldcp, tagp);
4313 
4314 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4315 	return (rv);
4316 }
4317 
4318 static int
4319 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4320 {
4321 	boolean_t set_ack_start = B_FALSE;
4322 	uint32_t start;
4323 	uint32_t ack_end;
4324 	uint32_t next_rxi;
4325 	uint32_t rxi;
4326 	int count = 0;
4327 	int rv = 0;
4328 	uint32_t retries = 0;
4329 	vgen_stats_t *statsp;
4330 	vnet_public_desc_t *rxdp;
4331 	vio_dring_entry_hdr_t *hdrp;
4332 	mblk_t *bp = NULL;
4333 	mblk_t *bpt = NULL;
4334 	uint32_t ack_start;
4335 	uint32_t datalen;
4336 	uint32_t ncookies;
4337 	boolean_t rxd_err = B_FALSE;
4338 	mblk_t *mp = NULL;
4339 	size_t nbytes;
4340 	boolean_t ack_needed = B_FALSE;
4341 	size_t nread;
4342 	uint64_t off = 0;
4343 	struct ether_header *ehp;
4344 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4345 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4346 
4347 	DBG1(vgenp, ldcp, "enter\n");
4348 
4349 	statsp = ldcp->statsp;
4350 	start = dringmsg->start_idx;
4351 
4352 	/*
4353 	 * start processing the descriptors from the specified
4354 	 * start index, up to the index a descriptor is not ready
4355 	 * to be processed or we process the entire descriptor ring
4356 	 * and wrap around upto the start index.
4357 	 */
4358 
4359 	/* need to set the start index of descriptors to be ack'd */
4360 	set_ack_start = B_TRUE;
4361 
4362 	/* index upto which we have ack'd */
4363 	ack_end = start;
4364 	DECR_RXI(ack_end, ldcp);
4365 
4366 	next_rxi = rxi =  start;
4367 	do {
4368 vgen_recv_retry:
4369 		rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4370 		if (rv != 0) {
4371 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
4372 			    " rv(%d)\n", rv);
4373 			statsp->ierrors++;
4374 			return (rv);
4375 		}
4376 
4377 		rxdp = &(ldcp->rxdp[rxi]);
4378 		hdrp = &rxdp->hdr;
4379 
4380 		if (hdrp->dstate != VIO_DESC_READY) {
4381 			/*
4382 			 * Before waiting and retry here, queue
4383 			 * the messages that are received already.
4384 			 * This will help the soft interrupt to
4385 			 * send them up with less latency.
4386 			 */
4387 			if (bp != NULL) {
4388 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4389 				vgen_ldc_queue_data(ldcp, bp, bpt);
4390 				count = 0;
4391 				bp = bpt = NULL;
4392 			}
4393 			/*
4394 			 * descriptor is not ready.
4395 			 * retry descriptor acquire, stop processing
4396 			 * after max # retries.
4397 			 */
4398 			if (retries == vgen_recv_retries)
4399 				break;
4400 			retries++;
4401 			drv_usecwait(vgen_recv_delay);
4402 			goto vgen_recv_retry;
4403 		}
4404 		retries = 0;
4405 
4406 		if (set_ack_start) {
4407 			/*
4408 			 * initialize the start index of the range
4409 			 * of descriptors to be ack'd.
4410 			 */
4411 			ack_start = rxi;
4412 			set_ack_start = B_FALSE;
4413 		}
4414 
4415 		datalen = rxdp->nbytes;
4416 		ncookies = rxdp->ncookies;
4417 		if ((datalen < ETHERMIN) ||
4418 		    (ncookies == 0) ||
4419 		    (ncookies > MAX_COOKIES)) {
4420 			rxd_err = B_TRUE;
4421 		} else {
4422 			/*
4423 			 * Try to allocate an mblk from the free pool
4424 			 * of recv mblks for the channel.
4425 			 * If this fails, use allocb().
4426 			 */
4427 			nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4428 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
4429 			if (!mp) {
4430 				/*
4431 				 * The data buffer returned by
4432 				 * allocb(9F) is 8byte aligned. We
4433 				 * allocate extra 8 bytes to ensure
4434 				 * size is multiple of 8 bytes for
4435 				 * ldc_mem_copy().
4436 				 */
4437 				statsp->rx_vio_allocb_fail++;
4438 				mp = allocb(VNET_IPALIGN + datalen + 8,
4439 				    BPRI_MED);
4440 			}
4441 		}
4442 		if ((rxd_err) || (mp == NULL)) {
4443 			/*
4444 			 * rxd_err or allocb() failure,
4445 			 * drop this packet, get next.
4446 			 */
4447 			if (rxd_err) {
4448 				statsp->ierrors++;
4449 				rxd_err = B_FALSE;
4450 			} else {
4451 				statsp->rx_allocb_fail++;
4452 			}
4453 
4454 			ack_needed = hdrp->ack;
4455 
4456 			/* set descriptor done bit */
4457 			hdrp->dstate = VIO_DESC_DONE;
4458 
4459 			rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4460 			    rxi, rxi);
4461 			if (rv != 0) {
4462 				DWARN(vgenp, ldcp,
4463 				    "ldc_mem_dring_release err rv(%d)\n", rv);
4464 				return (rv);
4465 			}
4466 
4467 			if (ack_needed) {
4468 				ack_needed = B_FALSE;
4469 				/*
4470 				 * sender needs ack for this packet,
4471 				 * ack pkts upto this index.
4472 				 */
4473 				ack_end = rxi;
4474 
4475 				rv = vgen_send_dring_ack(ldcp, tagp,
4476 				    ack_start, ack_end,
4477 				    VIO_DP_ACTIVE);
4478 				if (rv != VGEN_SUCCESS) {
4479 					goto error_ret;
4480 				}
4481 
4482 				/* need to set new ack start index */
4483 				set_ack_start = B_TRUE;
4484 			}
4485 			goto vgen_next_rxi;
4486 		}
4487 
4488 		nread = nbytes;
4489 		rv = ldc_mem_copy(ldcp->ldc_handle,
4490 		    (caddr_t)mp->b_rptr, off, &nread,
4491 		    rxdp->memcookie, ncookies, LDC_COPY_IN);
4492 
4493 		/* if ldc_mem_copy() failed */
4494 		if (rv) {
4495 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
4496 			statsp->ierrors++;
4497 			freemsg(mp);
4498 			goto error_ret;
4499 		}
4500 
4501 		ack_needed = hdrp->ack;
4502 		hdrp->dstate = VIO_DESC_DONE;
4503 
4504 		rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4505 		if (rv != 0) {
4506 			DWARN(vgenp, ldcp,
4507 			    "ldc_mem_dring_release err rv(%d)\n", rv);
4508 			goto error_ret;
4509 		}
4510 
4511 		mp->b_rptr += VNET_IPALIGN;
4512 
4513 		if (ack_needed) {
4514 			ack_needed = B_FALSE;
4515 			/*
4516 			 * sender needs ack for this packet,
4517 			 * ack pkts upto this index.
4518 			 */
4519 			ack_end = rxi;
4520 
4521 			rv = vgen_send_dring_ack(ldcp, tagp,
4522 			    ack_start, ack_end, VIO_DP_ACTIVE);
4523 			if (rv != VGEN_SUCCESS) {
4524 				goto error_ret;
4525 			}
4526 
4527 			/* need to set new ack start index */
4528 			set_ack_start = B_TRUE;
4529 		}
4530 
4531 		if (nread != nbytes) {
4532 			DWARN(vgenp, ldcp,
4533 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4534 			    nread, nbytes);
4535 			statsp->ierrors++;
4536 			freemsg(mp);
4537 			goto vgen_next_rxi;
4538 		}
4539 
4540 		/* point to the actual end of data */
4541 		mp->b_wptr = mp->b_rptr + datalen;
4542 
4543 		/* update stats */
4544 		statsp->ipackets++;
4545 		statsp->rbytes += datalen;
4546 		ehp = (struct ether_header *)mp->b_rptr;
4547 		if (IS_BROADCAST(ehp))
4548 			statsp->brdcstrcv++;
4549 		else if (IS_MULTICAST(ehp))
4550 			statsp->multircv++;
4551 
4552 		/* build a chain of received packets */
4553 		if (bp == NULL) {
4554 			/* first pkt */
4555 			bp = mp;
4556 			bpt = bp;
4557 			bpt->b_next = NULL;
4558 		} else {
4559 			mp->b_next = NULL;
4560 			bpt->b_next = mp;
4561 			bpt = mp;
4562 		}
4563 
4564 		if (count++ > vgen_chain_len) {
4565 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4566 			vgen_ldc_queue_data(ldcp, bp, bpt);
4567 			count = 0;
4568 			bp = bpt = NULL;
4569 		}
4570 
4571 vgen_next_rxi:
4572 		/* update end index of range of descrs to be ack'd */
4573 		ack_end = rxi;
4574 
4575 		/* update the next index to be processed */
4576 		INCR_RXI(next_rxi, ldcp);
4577 		if (next_rxi == start) {
4578 			/*
4579 			 * processed the entire descriptor ring upto
4580 			 * the index at which we started.
4581 			 */
4582 			break;
4583 		}
4584 
4585 		rxi = next_rxi;
4586 
4587 	_NOTE(CONSTCOND)
4588 	} while (1);
4589 
4590 	/*
4591 	 * send an ack message to peer indicating that we have stopped
4592 	 * processing descriptors.
4593 	 */
4594 	if (set_ack_start) {
4595 		/*
4596 		 * We have ack'd upto some index and we have not
4597 		 * processed any descriptors beyond that index.
4598 		 * Use the last ack'd index as both the start and
4599 		 * end of range of descrs being ack'd.
4600 		 * Note: This results in acking the last index twice
4601 		 * and should be harmless.
4602 		 */
4603 		ack_start = ack_end;
4604 	}
4605 
4606 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4607 	    VIO_DP_STOPPED);
4608 	if (rv != VGEN_SUCCESS) {
4609 		goto error_ret;
4610 	}
4611 
4612 	/* save new recv index and expected seqnum of next dring msg */
4613 	ldcp->next_rxi = next_rxi;
4614 	ldcp->next_rxseq += 1;
4615 
4616 error_ret:
4617 	/* queue the packets received so far */
4618 	if (bp != NULL) {
4619 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4620 		vgen_ldc_queue_data(ldcp, bp, bpt);
4621 		bp = bpt = NULL;
4622 	}
4623 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4624 	return (rv);
4625 
4626 }
4627 
4628 static int
4629 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4630 {
4631 	int rv = 0;
4632 	uint32_t start;
4633 	int32_t end;
4634 	uint32_t txi;
4635 	boolean_t ready_txd = B_FALSE;
4636 	vgen_stats_t *statsp;
4637 	vgen_private_desc_t *tbufp;
4638 	vnet_public_desc_t *txdp;
4639 	vio_dring_entry_hdr_t *hdrp;
4640 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4641 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4642 
4643 	DBG1(vgenp, ldcp, "enter\n");
4644 	start = dringmsg->start_idx;
4645 	end = dringmsg->end_idx;
4646 	statsp = ldcp->statsp;
4647 
4648 	/*
4649 	 * received an ack corresponding to a specific descriptor for
4650 	 * which we had set the ACK bit in the descriptor (during
4651 	 * transmit). This enables us to reclaim descriptors.
4652 	 */
4653 
4654 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
4655 
4656 	/* validate start and end indeces in the tx ack msg */
4657 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4658 		/* drop the message if invalid index */
4659 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
4660 		    start, end);
4661 		return (rv);
4662 	}
4663 	/* validate dring_ident */
4664 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4665 		/* invalid dring_ident, drop the msg */
4666 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4667 		    dringmsg->dring_ident);
4668 		return (rv);
4669 	}
4670 	statsp->dring_data_acks++;
4671 
4672 	/* reclaim descriptors that are done */
4673 	vgen_reclaim(ldcp);
4674 
4675 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
4676 		/*
4677 		 * receiver continued processing descriptors after
4678 		 * sending us the ack.
4679 		 */
4680 		return (rv);
4681 	}
4682 
4683 	statsp->dring_stopped_acks++;
4684 
4685 	/* receiver stopped processing descriptors */
4686 	mutex_enter(&ldcp->wrlock);
4687 	mutex_enter(&ldcp->tclock);
4688 
4689 	/*
4690 	 * determine if there are any pending tx descriptors
4691 	 * ready to be processed by the receiver(peer) and if so,
4692 	 * send a message to the peer to restart receiving.
4693 	 */
4694 	ready_txd = B_FALSE;
4695 
4696 	/*
4697 	 * using the end index of the descriptor range for which
4698 	 * we received the ack, check if the next descriptor is
4699 	 * ready.
4700 	 */
4701 	txi = end;
4702 	INCR_TXI(txi, ldcp);
4703 	tbufp = &ldcp->tbufp[txi];
4704 	txdp = tbufp->descp;
4705 	hdrp = &txdp->hdr;
4706 	if (hdrp->dstate == VIO_DESC_READY) {
4707 		ready_txd = B_TRUE;
4708 	} else {
4709 		/*
4710 		 * descr next to the end of ack'd descr range is not
4711 		 * ready.
4712 		 * starting from the current reclaim index, check
4713 		 * if any descriptor is ready.
4714 		 */
4715 
4716 		txi = ldcp->cur_tbufp - ldcp->tbufp;
4717 		tbufp = &ldcp->tbufp[txi];
4718 
4719 		txdp = tbufp->descp;
4720 		hdrp = &txdp->hdr;
4721 		if (hdrp->dstate == VIO_DESC_READY) {
4722 			ready_txd = B_TRUE;
4723 		}
4724 
4725 	}
4726 
4727 	if (ready_txd) {
4728 		/*
4729 		 * we have tx descriptor(s) ready to be
4730 		 * processed by the receiver.
4731 		 * send a message to the peer with the start index
4732 		 * of ready descriptors.
4733 		 */
4734 		rv = vgen_send_dring_data(ldcp, txi, -1);
4735 		if (rv != VGEN_SUCCESS) {
4736 			ldcp->resched_peer = B_TRUE;
4737 			ldcp->resched_peer_txi = txi;
4738 			mutex_exit(&ldcp->tclock);
4739 			mutex_exit(&ldcp->wrlock);
4740 			return (rv);
4741 		}
4742 	} else {
4743 		/*
4744 		 * no ready tx descriptors. set the flag to send a
4745 		 * message to peer when tx descriptors are ready in
4746 		 * transmit routine.
4747 		 */
4748 		ldcp->resched_peer = B_TRUE;
4749 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
4750 	}
4751 
4752 	mutex_exit(&ldcp->tclock);
4753 	mutex_exit(&ldcp->wrlock);
4754 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4755 	return (rv);
4756 }
4757 
4758 static int
4759 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4760 {
4761 	int rv = 0;
4762 	uint32_t start;
4763 	int32_t end;
4764 	uint32_t txi;
4765 	vnet_public_desc_t *txdp;
4766 	vio_dring_entry_hdr_t *hdrp;
4767 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4768 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4769 #ifdef VGEN_REXMIT
4770 	vgen_stats_t *statsp = ldcp->statsp;
4771 #endif
4772 
4773 	DBG1(vgenp, ldcp, "enter\n");
4774 	start = dringmsg->start_idx;
4775 	end = dringmsg->end_idx;
4776 
4777 	/*
4778 	 * peer sent a NACK msg to indicate lost packets.
4779 	 * The start and end correspond to the range of descriptors
4780 	 * for which the peer didn't receive a dring data msg and so
4781 	 * didn't receive the corresponding data.
4782 	 */
4783 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
4784 
4785 	/* validate start and end indeces in the tx nack msg */
4786 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4787 		/* drop the message if invalid index */
4788 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
4789 		    start, end);
4790 		return (rv);
4791 	}
4792 	/* validate dring_ident */
4793 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4794 		/* invalid dring_ident, drop the msg */
4795 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4796 		    dringmsg->dring_ident);
4797 		return (rv);
4798 	}
4799 	mutex_enter(&ldcp->txlock);
4800 	mutex_enter(&ldcp->tclock);
4801 
4802 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
4803 		/* no busy descriptors, bogus nack ? */
4804 		mutex_exit(&ldcp->tclock);
4805 		mutex_exit(&ldcp->txlock);
4806 		return (rv);
4807 	}
4808 
4809 #ifdef VGEN_REXMIT
4810 	/* send a new dring data msg including the lost descrs */
4811 	end = ldcp->next_tbufp - ldcp->tbufp;
4812 	DECR_TXI(end, ldcp);
4813 	rv = vgen_send_dring_data(ldcp, start, end);
4814 	if (rv != 0) {
4815 		/*
4816 		 * vgen_send_dring_data() error: drop all packets
4817 		 * in this descr range
4818 		 */
4819 		DWARN(vgenp, ldcp, "vgen_send_dring_data failed: rv(%d)\n", rv);
4820 		for (txi = start; txi <= end; ) {
4821 			tbufp = &(ldcp->tbufp[txi]);
4822 			txdp = tbufp->descp;
4823 			hdrp = &txdp->hdr;
4824 			tbufp->flags = VGEN_PRIV_DESC_FREE;
4825 			hdrp->dstate = VIO_DESC_FREE;
4826 			hdrp->ack = B_FALSE;
4827 			statsp->oerrors++;
4828 		}
4829 
4830 		/* update next pointer */
4831 		ldcp->next_tbufp = &(ldcp->tbufp[start]);
4832 		ldcp->next_txi = start;
4833 	}
4834 	DBG2(vgenp, ldcp, "rexmit: start(%d) end(%d)\n", start, end);
4835 #else	/* VGEN_REXMIT */
4836 	/* we just mark the descrs as done so they can be reclaimed */
4837 	for (txi = start; txi <= end; ) {
4838 		txdp = &(ldcp->txdp[txi]);
4839 		hdrp = &txdp->hdr;
4840 		if (hdrp->dstate == VIO_DESC_READY)
4841 			hdrp->dstate = VIO_DESC_DONE;
4842 		INCR_TXI(txi, ldcp);
4843 	}
4844 #endif	/* VGEN_REXMIT */
4845 	mutex_exit(&ldcp->tclock);
4846 	mutex_exit(&ldcp->txlock);
4847 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4848 	return (rv);
4849 }
4850 
4851 static void
4852 vgen_reclaim(vgen_ldc_t *ldcp)
4853 {
4854 	mutex_enter(&ldcp->tclock);
4855 
4856 	vgen_reclaim_dring(ldcp);
4857 	ldcp->reclaim_lbolt = ddi_get_lbolt();
4858 
4859 	mutex_exit(&ldcp->tclock);
4860 }
4861 
4862 /*
4863  * transmit reclaim function. starting from the current reclaim index
4864  * look for descriptors marked DONE and reclaim the descriptor and the
4865  * corresponding buffers (tbuf).
4866  */
4867 static void
4868 vgen_reclaim_dring(vgen_ldc_t *ldcp)
4869 {
4870 	int count = 0;
4871 	vnet_public_desc_t *txdp;
4872 	vgen_private_desc_t *tbufp;
4873 	vio_dring_entry_hdr_t	*hdrp;
4874 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4875 
4876 #ifdef DEBUG
4877 	if (vgen_trigger_txtimeout)
4878 		return;
4879 #endif
4880 
4881 	tbufp = ldcp->cur_tbufp;
4882 	txdp = tbufp->descp;
4883 	hdrp = &txdp->hdr;
4884 
4885 	while ((hdrp->dstate == VIO_DESC_DONE) &&
4886 	    (tbufp != ldcp->next_tbufp)) {
4887 		tbufp->flags = VGEN_PRIV_DESC_FREE;
4888 		hdrp->dstate = VIO_DESC_FREE;
4889 		hdrp->ack = B_FALSE;
4890 
4891 		tbufp = NEXTTBUF(ldcp, tbufp);
4892 		txdp = tbufp->descp;
4893 		hdrp = &txdp->hdr;
4894 		count++;
4895 	}
4896 
4897 	ldcp->cur_tbufp = tbufp;
4898 
4899 	/*
4900 	 * Check if mac layer should be notified to restart transmissions
4901 	 */
4902 	if ((ldcp->need_resched) && (count > 0)) {
4903 		ldcp->need_resched = B_FALSE;
4904 		vnet_tx_update(vgenp->vnetp);
4905 	}
4906 }
4907 
4908 /* return the number of pending transmits for the channel */
4909 static int
4910 vgen_num_txpending(vgen_ldc_t *ldcp)
4911 {
4912 	int n;
4913 
4914 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
4915 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
4916 	} else  {
4917 		/* cur_tbufp > next_tbufp */
4918 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
4919 	}
4920 
4921 	return (n);
4922 }
4923 
4924 /* determine if the transmit descriptor ring is full */
4925 static int
4926 vgen_tx_dring_full(vgen_ldc_t *ldcp)
4927 {
4928 	vgen_private_desc_t	*tbufp;
4929 	vgen_private_desc_t	*ntbufp;
4930 
4931 	tbufp = ldcp->next_tbufp;
4932 	ntbufp = NEXTTBUF(ldcp, tbufp);
4933 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
4934 		return (VGEN_SUCCESS);
4935 	}
4936 	return (VGEN_FAILURE);
4937 }
4938 
4939 /* determine if timeout condition has occured */
4940 static int
4941 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
4942 {
4943 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
4944 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
4945 	    (vnet_ldcwd_txtimeout) &&
4946 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
4947 		return (VGEN_SUCCESS);
4948 	} else {
4949 		return (VGEN_FAILURE);
4950 	}
4951 }
4952 
4953 /* transmit watchdog timeout handler */
4954 static void
4955 vgen_ldc_watchdog(void *arg)
4956 {
4957 	vgen_ldc_t *ldcp;
4958 	vgen_t *vgenp;
4959 	int rv;
4960 
4961 	ldcp = (vgen_ldc_t *)arg;
4962 	vgenp = LDC_TO_VGEN(ldcp);
4963 
4964 	rv = vgen_ldc_txtimeout(ldcp);
4965 	if (rv == VGEN_SUCCESS) {
4966 		DWARN(vgenp, ldcp, "transmit timeout\n");
4967 #ifdef DEBUG
4968 		if (vgen_trigger_txtimeout) {
4969 			/* tx timeout triggered for debugging */
4970 			vgen_trigger_txtimeout = 0;
4971 		}
4972 #endif
4973 		mutex_enter(&ldcp->cblock);
4974 		ldcp->need_ldc_reset = B_TRUE;
4975 		vgen_handshake_retry(ldcp);
4976 		mutex_exit(&ldcp->cblock);
4977 		if (ldcp->need_resched) {
4978 			ldcp->need_resched = B_FALSE;
4979 			vnet_tx_update(vgenp->vnetp);
4980 		}
4981 	}
4982 
4983 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
4984 	    drv_usectohz(vnet_ldcwd_interval * 1000));
4985 }
4986 
4987 static int
4988 vgen_setup_kstats(vgen_ldc_t *ldcp)
4989 {
4990 	vgen_t *vgenp;
4991 	struct kstat *ksp;
4992 	vgen_stats_t *statsp;
4993 	vgen_kstats_t *ldckp;
4994 	int instance;
4995 	size_t size;
4996 	char name[MAXNAMELEN];
4997 
4998 	vgenp = LDC_TO_VGEN(ldcp);
4999 	instance = ddi_get_instance(vgenp->vnetdip);
5000 	(void) sprintf(name, "vnetldc0x%lx", ldcp->ldc_id);
5001 	statsp = kmem_zalloc(sizeof (vgen_stats_t), KM_SLEEP);
5002 	if (statsp == NULL) {
5003 		return (VGEN_FAILURE);
5004 	}
5005 	size = sizeof (vgen_kstats_t) / sizeof (kstat_named_t);
5006 	ksp = kstat_create("vnet", instance, name, "net", KSTAT_TYPE_NAMED,
5007 	    size, 0);
5008 	if (ksp == NULL) {
5009 		KMEM_FREE(statsp);
5010 		return (VGEN_FAILURE);
5011 	}
5012 
5013 	ldckp = (vgen_kstats_t *)ksp->ks_data;
5014 	kstat_named_init(&ldckp->ipackets,		"ipackets",
5015 	    KSTAT_DATA_ULONG);
5016 	kstat_named_init(&ldckp->ipackets64,		"ipackets64",
5017 	    KSTAT_DATA_ULONGLONG);
5018 	kstat_named_init(&ldckp->ierrors,		"ierrors",
5019 	    KSTAT_DATA_ULONG);
5020 	kstat_named_init(&ldckp->opackets,		"opackets",
5021 	    KSTAT_DATA_ULONG);
5022 	kstat_named_init(&ldckp->opackets64,		"opackets64",
5023 	    KSTAT_DATA_ULONGLONG);
5024 	kstat_named_init(&ldckp->oerrors,		"oerrors",
5025 	    KSTAT_DATA_ULONG);
5026 
5027 
5028 	/* MIB II kstat variables */
5029 	kstat_named_init(&ldckp->rbytes,		"rbytes",
5030 	    KSTAT_DATA_ULONG);
5031 	kstat_named_init(&ldckp->rbytes64,		"rbytes64",
5032 	    KSTAT_DATA_ULONGLONG);
5033 	kstat_named_init(&ldckp->obytes,		"obytes",
5034 	    KSTAT_DATA_ULONG);
5035 	kstat_named_init(&ldckp->obytes64,		"obytes64",
5036 	    KSTAT_DATA_ULONGLONG);
5037 	kstat_named_init(&ldckp->multircv,		"multircv",
5038 	    KSTAT_DATA_ULONG);
5039 	kstat_named_init(&ldckp->multixmt,		"multixmt",
5040 	    KSTAT_DATA_ULONG);
5041 	kstat_named_init(&ldckp->brdcstrcv,		"brdcstrcv",
5042 	    KSTAT_DATA_ULONG);
5043 	kstat_named_init(&ldckp->brdcstxmt,		"brdcstxmt",
5044 	    KSTAT_DATA_ULONG);
5045 	kstat_named_init(&ldckp->norcvbuf,		"norcvbuf",
5046 	    KSTAT_DATA_ULONG);
5047 	kstat_named_init(&ldckp->noxmtbuf,		"noxmtbuf",
5048 	    KSTAT_DATA_ULONG);
5049 
5050 	/* Tx stats */
5051 	kstat_named_init(&ldckp->tx_no_desc,		"tx_no_desc",
5052 	    KSTAT_DATA_ULONG);
5053 
5054 	/* Rx stats */
5055 	kstat_named_init(&ldckp->rx_allocb_fail,	"rx_allocb_fail",
5056 	    KSTAT_DATA_ULONG);
5057 	kstat_named_init(&ldckp->rx_vio_allocb_fail,	"rx_vio_allocb_fail",
5058 	    KSTAT_DATA_ULONG);
5059 	kstat_named_init(&ldckp->rx_lost_pkts,		"rx_lost_pkts",
5060 	    KSTAT_DATA_ULONG);
5061 
5062 	/* Interrupt stats */
5063 	kstat_named_init(&ldckp->callbacks,		"callbacks",
5064 	    KSTAT_DATA_ULONG);
5065 	kstat_named_init(&ldckp->dring_data_acks,	"dring_data_acks",
5066 	    KSTAT_DATA_ULONG);
5067 	kstat_named_init(&ldckp->dring_stopped_acks,	"dring_stopped_acks",
5068 	    KSTAT_DATA_ULONG);
5069 	kstat_named_init(&ldckp->dring_data_msgs,	"dring_data_msgs",
5070 	    KSTAT_DATA_ULONG);
5071 
5072 	ksp->ks_update = vgen_kstat_update;
5073 	ksp->ks_private = (void *)ldcp;
5074 	kstat_install(ksp);
5075 
5076 	ldcp->ksp = ksp;
5077 	ldcp->statsp = statsp;
5078 	return (VGEN_SUCCESS);
5079 }
5080 
5081 static void
5082 vgen_destroy_kstats(vgen_ldc_t *ldcp)
5083 {
5084 	if (ldcp->ksp)
5085 		kstat_delete(ldcp->ksp);
5086 	KMEM_FREE(ldcp->statsp);
5087 }
5088 
5089 static int
5090 vgen_kstat_update(kstat_t *ksp, int rw)
5091 {
5092 	vgen_ldc_t *ldcp;
5093 	vgen_stats_t *statsp;
5094 	vgen_kstats_t *ldckp;
5095 
5096 	ldcp = (vgen_ldc_t *)ksp->ks_private;
5097 	statsp = ldcp->statsp;
5098 	ldckp = (vgen_kstats_t *)ksp->ks_data;
5099 
5100 	if (rw == KSTAT_READ) {
5101 		ldckp->ipackets.value.ul	= (uint32_t)statsp->ipackets;
5102 		ldckp->ipackets64.value.ull	= statsp->ipackets;
5103 		ldckp->ierrors.value.ul		= statsp->ierrors;
5104 		ldckp->opackets.value.ul	= (uint32_t)statsp->opackets;
5105 		ldckp->opackets64.value.ull	= statsp->opackets;
5106 		ldckp->oerrors.value.ul		= statsp->oerrors;
5107 
5108 		/*
5109 		 * MIB II kstat variables
5110 		 */
5111 		ldckp->rbytes.value.ul		= (uint32_t)statsp->rbytes;
5112 		ldckp->rbytes64.value.ull	= statsp->rbytes;
5113 		ldckp->obytes.value.ul		= (uint32_t)statsp->obytes;
5114 		ldckp->obytes64.value.ull	= statsp->obytes;
5115 		ldckp->multircv.value.ul	= statsp->multircv;
5116 		ldckp->multixmt.value.ul	= statsp->multixmt;
5117 		ldckp->brdcstrcv.value.ul	= statsp->brdcstrcv;
5118 		ldckp->brdcstxmt.value.ul	= statsp->brdcstxmt;
5119 		ldckp->norcvbuf.value.ul	= statsp->norcvbuf;
5120 		ldckp->noxmtbuf.value.ul	= statsp->noxmtbuf;
5121 
5122 		ldckp->tx_no_desc.value.ul	= statsp->tx_no_desc;
5123 
5124 		ldckp->rx_allocb_fail.value.ul	= statsp->rx_allocb_fail;
5125 		ldckp->rx_vio_allocb_fail.value.ul = statsp->rx_vio_allocb_fail;
5126 		ldckp->rx_lost_pkts.value.ul	= statsp->rx_lost_pkts;
5127 
5128 		ldckp->callbacks.value.ul	= statsp->callbacks;
5129 		ldckp->dring_data_acks.value.ul	= statsp->dring_data_acks;
5130 		ldckp->dring_stopped_acks.value.ul = statsp->dring_stopped_acks;
5131 		ldckp->dring_data_msgs.value.ul	= statsp->dring_data_msgs;
5132 	} else {
5133 		statsp->ipackets	= ldckp->ipackets64.value.ull;
5134 		statsp->ierrors		= ldckp->ierrors.value.ul;
5135 		statsp->opackets	= ldckp->opackets64.value.ull;
5136 		statsp->oerrors		= ldckp->oerrors.value.ul;
5137 
5138 		/*
5139 		 * MIB II kstat variables
5140 		 */
5141 		statsp->rbytes		= ldckp->rbytes64.value.ull;
5142 		statsp->obytes		= ldckp->obytes64.value.ull;
5143 		statsp->multircv	= ldckp->multircv.value.ul;
5144 		statsp->multixmt	= ldckp->multixmt.value.ul;
5145 		statsp->brdcstrcv	= ldckp->brdcstrcv.value.ul;
5146 		statsp->brdcstxmt	= ldckp->brdcstxmt.value.ul;
5147 		statsp->norcvbuf	= ldckp->norcvbuf.value.ul;
5148 		statsp->noxmtbuf	= ldckp->noxmtbuf.value.ul;
5149 
5150 		statsp->tx_no_desc	= ldckp->tx_no_desc.value.ul;
5151 
5152 		statsp->rx_allocb_fail	= ldckp->rx_allocb_fail.value.ul;
5153 		statsp->rx_vio_allocb_fail = ldckp->rx_vio_allocb_fail.value.ul;
5154 		statsp->rx_lost_pkts	= ldckp->rx_lost_pkts.value.ul;
5155 
5156 		statsp->callbacks	= ldckp->callbacks.value.ul;
5157 		statsp->dring_data_acks	= ldckp->dring_data_acks.value.ul;
5158 		statsp->dring_stopped_acks = ldckp->dring_stopped_acks.value.ul;
5159 		statsp->dring_data_msgs	= ldckp->dring_data_msgs.value.ul;
5160 	}
5161 
5162 	return (VGEN_SUCCESS);
5163 }
5164 
5165 /* handler for error messages received from the peer ldc end-point */
5166 static void
5167 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5168 {
5169 	_NOTE(ARGUNUSED(ldcp, tagp))
5170 }
5171 
5172 /* Check if the session id in the received message is valid */
5173 static int
5174 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5175 {
5176 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5177 
5178 	if (tagp->vio_sid != ldcp->peer_sid) {
5179 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5180 		    ldcp->peer_sid, tagp->vio_sid);
5181 		return (VGEN_FAILURE);
5182 	}
5183 	else
5184 		return (VGEN_SUCCESS);
5185 }
5186 
5187 /* convert mac address from string to uint64_t */
5188 static uint64_t
5189 vgen_macaddr_strtoul(const uint8_t *macaddr)
5190 {
5191 	uint64_t val = 0;
5192 	int i;
5193 
5194 	for (i = 0; i < ETHERADDRL; i++) {
5195 		val <<= 8;
5196 		val |= macaddr[i];
5197 	}
5198 
5199 	return (val);
5200 }
5201 
5202 /* convert mac address from uint64_t to string */
5203 static int
5204 vgen_macaddr_ultostr(uint64_t val, uint8_t *macaddr)
5205 {
5206 	int i;
5207 	uint64_t value;
5208 
5209 	value = val;
5210 	for (i = ETHERADDRL - 1; i >= 0; i--) {
5211 		macaddr[i] = value & 0xFF;
5212 		value >>= 8;
5213 	}
5214 	return (VGEN_SUCCESS);
5215 }
5216 
5217 static caddr_t
5218 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5219 {
5220 	(void) sprintf(ebuf,
5221 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5222 	return (ebuf);
5223 }
5224 
5225 /* Handshake watchdog timeout handler */
5226 static void
5227 vgen_hwatchdog(void *arg)
5228 {
5229 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5230 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5231 
5232 	DWARN(vgenp, ldcp,
5233 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5234 	    ldcp->hphase, ldcp->hstate);
5235 
5236 	mutex_enter(&ldcp->cblock);
5237 	if (ldcp->cancel_htid) {
5238 		ldcp->cancel_htid = 0;
5239 		mutex_exit(&ldcp->cblock);
5240 		return;
5241 	}
5242 	ldcp->htid = 0;
5243 	ldcp->need_ldc_reset = B_TRUE;
5244 	vgen_handshake_retry(ldcp);
5245 	mutex_exit(&ldcp->cblock);
5246 }
5247 
5248 static void
5249 vgen_print_hparams(vgen_hparams_t *hp)
5250 {
5251 	uint8_t	addr[6];
5252 	char	ea[6];
5253 	ldc_mem_cookie_t *dc;
5254 
5255 	cmn_err(CE_CONT, "version_info:\n");
5256 	cmn_err(CE_CONT,
5257 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5258 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5259 
5260 	(void) vgen_macaddr_ultostr(hp->addr, addr);
5261 	cmn_err(CE_CONT, "attr_info:\n");
5262 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5263 	    vgen_print_ethaddr(addr, ea));
5264 	cmn_err(CE_CONT,
5265 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5266 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5267 
5268 	dc = &hp->dring_cookie;
5269 	cmn_err(CE_CONT, "dring_info:\n");
5270 	cmn_err(CE_CONT,
5271 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5272 	cmn_err(CE_CONT,
5273 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5274 	    dc->addr, dc->size);
5275 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5276 }
5277 
5278 static void
5279 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5280 {
5281 	vgen_hparams_t *hp;
5282 
5283 	cmn_err(CE_CONT, "Channel Information:\n");
5284 	cmn_err(CE_CONT,
5285 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5286 	    ldcp->ldc_id, ldcp->ldc_status);
5287 	cmn_err(CE_CONT,
5288 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5289 	    ldcp->local_sid, ldcp->peer_sid);
5290 	cmn_err(CE_CONT,
5291 	    "\thphase: 0x%x, hstate: 0x%x\n",
5292 	    ldcp->hphase, ldcp->hstate);
5293 
5294 	cmn_err(CE_CONT, "Local handshake params:\n");
5295 	hp = &ldcp->local_hparams;
5296 	vgen_print_hparams(hp);
5297 
5298 	cmn_err(CE_CONT, "Peer handshake params:\n");
5299 	hp = &ldcp->peer_hparams;
5300 	vgen_print_hparams(hp);
5301 }
5302 
5303 /*
5304  * vgen_ldc_queue_data -- Queue data in the LDC.
5305  */
5306 static void
5307 vgen_ldc_queue_data(vgen_ldc_t *ldcp, mblk_t *rhead, mblk_t *rtail)
5308 {
5309 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5310 
5311 	DBG1(vgenp, ldcp, "enter\n");
5312 	/*
5313 	 * If the receive thread is enabled, then the queue
5314 	 * is protected by the soft_lock. After queuing, trigger
5315 	 * the soft interrupt so that the interrupt handler sends these
5316 	 * messages up the stack.
5317 	 *
5318 	 * If the receive thread is not enabled, then the list is
5319 	 * automatically protected by the cblock lock, so no need
5320 	 * to hold any additional locks.
5321 	 */
5322 	if (ldcp->rcv_thread != NULL) {
5323 		mutex_enter(&ldcp->soft_lock);
5324 	}
5325 	if (ldcp->rcv_mhead == NULL) {
5326 		ldcp->rcv_mhead = rhead;
5327 		ldcp->rcv_mtail = rtail;
5328 	} else {
5329 		ldcp->rcv_mtail->b_next = rhead;
5330 		ldcp->rcv_mtail = rtail;
5331 	}
5332 	if (ldcp->rcv_thread != NULL) {
5333 		mutex_exit(&ldcp->soft_lock);
5334 		(void) ddi_intr_trigger_softint(ldcp->soft_handle, NULL);
5335 	}
5336 	DBG1(vgenp, ldcp, "exit\n");
5337 }
5338 
5339 /*
5340  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
5341  * This thread is woken up by the LDC interrupt handler to process
5342  * LDC packets and receive data.
5343  */
5344 static void
5345 vgen_ldc_rcv_worker(void *arg)
5346 {
5347 	callb_cpr_t	cprinfo;
5348 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5349 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5350 
5351 	DBG1(vgenp, ldcp, "enter\n");
5352 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
5353 	    "vnet_rcv_thread");
5354 	mutex_enter(&ldcp->rcv_thr_lock);
5355 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
5356 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
5357 
5358 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
5359 		/*
5360 		 * Wait until the data is received or a stop
5361 		 * request is received.
5362 		 */
5363 		while (!(ldcp->rcv_thr_flags &
5364 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
5365 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5366 		}
5367 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
5368 
5369 		/*
5370 		 * First process the stop request.
5371 		 */
5372 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
5373 			DBG2(vgenp, ldcp, "stopped\n");
5374 			break;
5375 		}
5376 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
5377 		mutex_exit(&ldcp->rcv_thr_lock);
5378 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
5379 		vgen_handle_evt_read(ldcp);
5380 		mutex_enter(&ldcp->rcv_thr_lock);
5381 	}
5382 
5383 	/*
5384 	 * Update the run status and wakeup the thread that
5385 	 * has sent the stop request.
5386 	 */
5387 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
5388 	cv_signal(&ldcp->rcv_thr_cv);
5389 	CALLB_CPR_EXIT(&cprinfo);
5390 	thread_exit();
5391 	DBG1(vgenp, ldcp, "exit\n");
5392 }
5393 
5394 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
5395 static void
5396 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
5397 {
5398 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5399 
5400 	DBG1(vgenp, ldcp, "enter\n");
5401 	/*
5402 	 * Send a stop request by setting the stop flag and
5403 	 * wait until the receive thread stops.
5404 	 */
5405 	mutex_enter(&ldcp->rcv_thr_lock);
5406 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5407 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
5408 		cv_signal(&ldcp->rcv_thr_cv);
5409 		DBG2(vgenp, ldcp, "waiting...");
5410 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5411 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5412 		}
5413 	}
5414 	mutex_exit(&ldcp->rcv_thr_lock);
5415 	ldcp->rcv_thread = NULL;
5416 	DBG1(vgenp, ldcp, "exit\n");
5417 }
5418 
5419 /*
5420  * vgen_ldc_rcv_softintr -- LDC Soft interrupt handler function.
5421  * Its job is to pickup the recieved packets that are queued in the
5422  * LDC and send them up.
5423  *
5424  * NOTE: An interrupt handler is being used to handle the upper
5425  * layer(s) requirement to send up only at interrupt context.
5426  */
5427 /* ARGSUSED */
5428 static uint_t
5429 vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2)
5430 {
5431 	mblk_t *mp;
5432 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
5433 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5434 
5435 	DBG1(vgenp, ldcp, "enter\n");
5436 	DTRACE_PROBE1(vgen_soft_intr, uint64_t, ldcp->ldc_id);
5437 	mutex_enter(&ldcp->soft_lock);
5438 	mp = ldcp->rcv_mhead;
5439 	ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
5440 	mutex_exit(&ldcp->soft_lock);
5441 	if (mp != NULL) {
5442 		vnet_rx(vgenp->vnetp, NULL, mp);
5443 	}
5444 	DBG1(vgenp, ldcp, "exit\n");
5445 	return (DDI_INTR_CLAIMED);
5446 }
5447 
5448 #if DEBUG
5449 
5450 /*
5451  * Print debug messages - set to 0xf to enable all msgs
5452  */
5453 static void
5454 debug_printf(const char *fname, vgen_t *vgenp,
5455     vgen_ldc_t *ldcp, const char *fmt, ...)
5456 {
5457 	char    buf[256];
5458 	char    *bufp = buf;
5459 	va_list ap;
5460 
5461 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5462 		(void) sprintf(bufp, "vnet%d:",
5463 		    ((vnet_t *)(vgenp->vnetp))->instance);
5464 		bufp += strlen(bufp);
5465 	}
5466 	if (ldcp != NULL) {
5467 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5468 		bufp += strlen(bufp);
5469 	}
5470 	(void) sprintf(bufp, "%s: ", fname);
5471 	bufp += strlen(bufp);
5472 
5473 	va_start(ap, fmt);
5474 	(void) vsprintf(bufp, fmt, ap);
5475 	va_end(ap);
5476 
5477 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5478 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5479 		cmn_err(CE_CONT, "%s\n", buf);
5480 	}
5481 }
5482 #endif
5483