xref: /titanic_52/usr/src/uts/sun4v/io/vnet_gen.c (revision 1a578a15d3f76161f037cd99883a1f54a9eda785)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 
64 /*
65  * Implementation of the mac functionality for vnet using the
66  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
67  */
68 
69 /*
70  * Function prototypes.
71  */
72 /* vgen proxy entry points */
73 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
74 	mac_register_t **vgenmacp);
75 int vgen_uninit(void *arg);
76 static int vgen_start(void *arg);
77 static void vgen_stop(void *arg);
78 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
79 static int vgen_multicst(void *arg, boolean_t add,
80 	const uint8_t *mca);
81 static int vgen_promisc(void *arg, boolean_t on);
82 static int vgen_unicst(void *arg, const uint8_t *mca);
83 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
84 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
85 
86 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
87 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
88 void vnet_del_fdb(void *arg, uint8_t *macaddr);
89 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
90 	void *txarg, boolean_t upgrade);
91 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
92 void vnet_del_def_rte(void *arg);
93 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
94 void vnet_tx_update(void *arg);
95 
96 /* vgen internal functions */
97 static void vgen_detach_ports(vgen_t *vgenp);
98 static void vgen_port_detach(vgen_port_t *portp);
99 static void vgen_port_list_insert(vgen_port_t *portp);
100 static void vgen_port_list_remove(vgen_port_t *portp);
101 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
102 	int port_num);
103 static int vgen_mdeg_reg(vgen_t *vgenp);
104 static void vgen_mdeg_unreg(vgen_t *vgenp);
105 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
106 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
107 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
108 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
109 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
112 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
113 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
114 
115 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
116 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
117 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
118 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_init_ports(vgen_t *vgenp);
120 static void vgen_port_init(vgen_port_t *portp);
121 static void vgen_uninit_ports(vgen_t *vgenp);
122 static void vgen_port_uninit(vgen_port_t *portp);
123 static void vgen_init_ldcs(vgen_port_t *portp);
124 static void vgen_uninit_ldcs(vgen_port_t *portp);
125 static int vgen_ldc_init(vgen_ldc_t *ldcp);
126 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
127 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
128 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
131 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
132 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
135 static void vgen_reclaim(vgen_ldc_t *ldcp);
136 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
137 static int vgen_num_txpending(vgen_ldc_t *ldcp);
138 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
139 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
140 static void vgen_ldc_watchdog(void *arg);
141 static int vgen_setup_kstats(vgen_ldc_t *ldcp);
142 static void vgen_destroy_kstats(vgen_ldc_t *ldcp);
143 static int vgen_kstat_update(kstat_t *ksp, int rw);
144 
145 /* vgen handshake functions */
146 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
147 static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
148 	uint16_t ver_minor);
149 static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
150 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
151 	boolean_t caller_holds_lock);
152 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
153 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
154 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
155 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
156 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
157 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
158 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
159 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
160 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
161 static void vgen_handshake(vgen_ldc_t *ldcp);
162 static int vgen_handshake_done(vgen_ldc_t *ldcp);
163 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
164 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
165 	vio_msg_tag_t *tagp);
166 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
177 	uint32_t start, int32_t end, uint8_t pstate);
178 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
179 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
181 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
182 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static uint64_t	vgen_macaddr_strtoul(const uint8_t *macaddr);
184 static int vgen_macaddr_ultostr(uint64_t value, uint8_t *macaddr);
185 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
186 static void vgen_hwatchdog(void *arg);
187 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
188 static void vgen_print_hparams(vgen_hparams_t *hp);
189 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
190 static uint_t vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2);
191 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
192 static void vgen_ldc_rcv_worker(void *arg);
193 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
194 static void vgen_ldc_queue_data(vgen_ldc_t *ldcp,
195 	mblk_t *rhead, mblk_t *rtail);
196 
197 /*
198  * The handshake process consists of 5 phases defined below, with VH_PHASE0
199  * being the pre-handshake phase and VH_DONE is the phase to indicate
200  * successful completion of all phases.
201  * Each phase may have one to several handshake states which are required
202  * to complete successfully to move to the next phase.
203  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
204  * more details.
205  */
206 /* handshake phases */
207 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
208 
209 /* handshake states */
210 enum {
211 
212 	VER_INFO_SENT	=	0x1,
213 	VER_ACK_RCVD	=	0x2,
214 	VER_INFO_RCVD	=	0x4,
215 	VER_ACK_SENT	=	0x8,
216 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
217 
218 	ATTR_INFO_SENT	=	0x10,
219 	ATTR_ACK_RCVD	=	0x20,
220 	ATTR_INFO_RCVD	=	0x40,
221 	ATTR_ACK_SENT	=	0x80,
222 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
223 
224 	DRING_INFO_SENT	=	0x100,
225 	DRING_ACK_RCVD	=	0x200,
226 	DRING_INFO_RCVD	=	0x400,
227 	DRING_ACK_SENT	=	0x800,
228 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
229 
230 	RDX_INFO_SENT	=	0x1000,
231 	RDX_ACK_RCVD	=	0x2000,
232 	RDX_INFO_RCVD	=	0x4000,
233 	RDX_ACK_SENT	=	0x8000,
234 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
235 
236 };
237 
238 #define	LDC_LOCK(ldcp)	\
239 				mutex_enter(&((ldcp)->cblock));\
240 				mutex_enter(&((ldcp)->rxlock));\
241 				mutex_enter(&((ldcp)->wrlock));\
242 				mutex_enter(&((ldcp)->txlock));\
243 				mutex_enter(&((ldcp)->tclock));
244 #define	LDC_UNLOCK(ldcp)	\
245 				mutex_exit(&((ldcp)->tclock));\
246 				mutex_exit(&((ldcp)->txlock));\
247 				mutex_exit(&((ldcp)->wrlock));\
248 				mutex_exit(&((ldcp)->rxlock));\
249 				mutex_exit(&((ldcp)->cblock));
250 
251 static struct ether_addr etherbroadcastaddr = {
252 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
253 };
254 /*
255  * MIB II broadcast/multicast packets
256  */
257 #define	IS_BROADCAST(ehp) \
258 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
259 #define	IS_MULTICAST(ehp) \
260 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
261 
262 /*
263  * Property names
264  */
265 static char macaddr_propname[] = "mac-address";
266 static char rmacaddr_propname[] = "remote-mac-address";
267 static char channel_propname[] = "channel-endpoint";
268 static char reg_propname[] = "reg";
269 static char port_propname[] = "port";
270 static char swport_propname[] = "switch-port";
271 static char id_propname[] = "id";
272 
273 /* versions supported - in decreasing order */
274 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
275 
276 /* Tunables */
277 uint32_t vgen_hwd_interval = 1000;	/* handshake watchdog freq in msec */
278 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
279 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
280 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
281 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
282 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
283 
284 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
285 
286 /*
287  * max # of packets accumulated prior to sending them up. It is best
288  * to keep this at 60% of the number of recieve buffers.
289  */
290 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
291 
292 /*
293  * Tunables for each receive buffer size and number of buffers for
294  * each buffer size.
295  */
296 uint32_t vgen_rbufsz1 = VGEN_DBLK_SZ_128;
297 uint32_t vgen_rbufsz2 = VGEN_DBLK_SZ_256;
298 uint32_t vgen_rbufsz3 = VGEN_DBLK_SZ_2048;
299 
300 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
301 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
302 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
303 
304 #ifdef DEBUG
305 /* flags to simulate error conditions for debugging */
306 int vgen_trigger_txtimeout = 0;
307 int vgen_trigger_rxlost = 0;
308 #endif
309 
310 /* MD update matching structure */
311 static md_prop_match_t	vport_prop_match[] = {
312 	{ MDET_PROP_VAL,	"id" },
313 	{ MDET_LIST_END,	NULL }
314 };
315 
316 static mdeg_node_match_t vport_match = { "virtual-device-port",
317 					vport_prop_match };
318 
319 /* template for matching a particular vnet instance */
320 static mdeg_prop_spec_t vgen_prop_template[] = {
321 	{ MDET_PROP_STR,	"name",		"network" },
322 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
323 	{ MDET_LIST_END,	NULL,		NULL }
324 };
325 
326 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
327 
328 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
329 
330 static mac_callbacks_t vgen_m_callbacks = {
331 	0,
332 	vgen_stat,
333 	vgen_start,
334 	vgen_stop,
335 	vgen_promisc,
336 	vgen_multicst,
337 	vgen_unicst,
338 	vgen_tx,
339 	NULL,
340 	NULL,
341 	NULL
342 };
343 
344 /* externs */
345 extern pri_t	maxclsyspri;
346 extern proc_t	p0;
347 extern uint32_t vnet_ntxds;
348 extern uint32_t vnet_ldcwd_interval;
349 extern uint32_t vnet_ldcwd_txtimeout;
350 extern uint32_t vnet_ldc_mtu;
351 extern uint32_t vnet_nrbufs;
352 
353 
354 #ifdef DEBUG
355 
356 extern int vnet_dbglevel;
357 static void debug_printf(const char *fname, vgen_t *vgenp,
358 	vgen_ldc_t *ldcp, const char *fmt, ...);
359 
360 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
361 int vgendbg_ldcid = -1;
362 
363 /* simulate handshake error conditions for debug */
364 uint32_t vgen_hdbg;
365 #define	HDBG_VERSION	0x1
366 #define	HDBG_TIMEOUT	0x2
367 #define	HDBG_BAD_SID	0x4
368 #define	HDBG_OUT_STATE	0x8
369 
370 #endif
371 
372 
373 
374 /*
375  * vgen_init() is called by an instance of vnet driver to initialize the
376  * corresponding generic proxy transport layer. The arguments passed by vnet
377  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
378  * the mac address of the vnet device, and a pointer to mac_register_t of
379  * the generic transport is returned in the last argument.
380  */
381 int
382 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
383     mac_register_t **vgenmacp)
384 {
385 	vgen_t *vgenp;
386 	mac_register_t *macp;
387 	int instance;
388 
389 	if ((vnetp == NULL) || (vnetdip == NULL))
390 		return (DDI_FAILURE);
391 
392 	instance = ddi_get_instance(vnetdip);
393 
394 	DBG1(NULL, NULL, "vnet(%d):%s: enter\n", instance);
395 
396 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
397 
398 	vgenp->vnetp = vnetp;
399 	vgenp->vnetdip = vnetdip;
400 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
401 
402 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
403 		KMEM_FREE(vgenp);
404 		return (DDI_FAILURE);
405 	}
406 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
407 	macp->m_driver = vgenp;
408 	macp->m_dip = vnetdip;
409 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
410 	macp->m_callbacks = &vgen_m_callbacks;
411 	macp->m_min_sdu = 0;
412 	macp->m_max_sdu = ETHERMTU;
413 	vgenp->macp = macp;
414 
415 	/* allocate multicast table */
416 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
417 	    sizeof (struct ether_addr), KM_SLEEP);
418 	vgenp->mccount = 0;
419 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
420 
421 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
422 
423 	/* register with MD event generator */
424 	if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
425 		mutex_destroy(&vgenp->lock);
426 		kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
427 		    sizeof (struct ether_addr));
428 		mac_free(vgenp->macp);
429 		KMEM_FREE(vgenp);
430 		return (DDI_FAILURE);
431 	}
432 
433 	/* register macp of this vgen_t with vnet */
434 	*vgenmacp = vgenp->macp;
435 
436 	DBG1(NULL, NULL, "vnet(%d):%s: exit\n", instance);
437 	return (DDI_SUCCESS);
438 }
439 
440 /*
441  * Called by vnet to undo the initializations done by vgen_init().
442  * The handle provided by generic transport during vgen_init() is the argument.
443  */
444 int
445 vgen_uninit(void *arg)
446 {
447 	vgen_t	*vgenp = (vgen_t *)arg;
448 	vio_mblk_pool_t *rp, *nrp;
449 
450 	if (vgenp == NULL) {
451 		return (DDI_FAILURE);
452 	}
453 
454 	DBG1(vgenp, NULL, "enter\n");
455 
456 	/* unregister with MD event generator */
457 	vgen_mdeg_unreg(vgenp);
458 
459 	mutex_enter(&vgenp->lock);
460 
461 	/* detach all ports from the device */
462 	vgen_detach_ports(vgenp);
463 
464 	/*
465 	 * free any pending rx mblk pools,
466 	 * that couldn't be freed previously during channel detach.
467 	 */
468 	rp = vgenp->rmp;
469 	while (rp != NULL) {
470 		nrp = vgenp->rmp = rp->nextp;
471 		if (vio_destroy_mblks(rp)) {
472 			vgenp->rmp = rp;
473 			mutex_exit(&vgenp->lock);
474 			return (DDI_FAILURE);
475 		}
476 		rp = nrp;
477 	}
478 
479 	/* free multicast table */
480 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
481 
482 	mac_free(vgenp->macp);
483 
484 	mutex_exit(&vgenp->lock);
485 
486 	mutex_destroy(&vgenp->lock);
487 
488 	KMEM_FREE(vgenp);
489 
490 	DBG1(vgenp, NULL, "exit\n");
491 
492 	return (DDI_SUCCESS);
493 }
494 
495 /* enable transmit/receive for the device */
496 int
497 vgen_start(void *arg)
498 {
499 	vgen_t		*vgenp = (vgen_t *)arg;
500 
501 	DBG1(vgenp, NULL, "enter\n");
502 
503 	mutex_enter(&vgenp->lock);
504 	vgen_init_ports(vgenp);
505 	vgenp->flags |= VGEN_STARTED;
506 	mutex_exit(&vgenp->lock);
507 
508 	DBG1(vgenp, NULL, "exit\n");
509 	return (DDI_SUCCESS);
510 }
511 
512 /* stop transmit/receive */
513 void
514 vgen_stop(void *arg)
515 {
516 	vgen_t		*vgenp = (vgen_t *)arg;
517 
518 	DBG1(vgenp, NULL, "enter\n");
519 
520 	mutex_enter(&vgenp->lock);
521 	vgen_uninit_ports(vgenp);
522 	vgenp->flags &= ~(VGEN_STARTED);
523 	mutex_exit(&vgenp->lock);
524 
525 	DBG1(vgenp, NULL, "exit\n");
526 }
527 
528 /* vgen transmit function */
529 static mblk_t *
530 vgen_tx(void *arg, mblk_t *mp)
531 {
532 	vgen_port_t *portp;
533 	int status;
534 
535 	portp = (vgen_port_t *)arg;
536 	status = vgen_portsend(portp, mp);
537 	if (status != VGEN_SUCCESS) {
538 		/* failure */
539 		return (mp);
540 	}
541 	/* success */
542 	return (NULL);
543 }
544 
545 /* transmit packets over the given port */
546 static int
547 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
548 {
549 	vgen_ldclist_t	*ldclp;
550 	vgen_ldc_t *ldcp;
551 	int status;
552 
553 	ldclp = &portp->ldclist;
554 	READ_ENTER(&ldclp->rwlock);
555 	/*
556 	 * NOTE: for now, we will assume we have a single channel.
557 	 */
558 	if (ldclp->headp == NULL) {
559 		RW_EXIT(&ldclp->rwlock);
560 		return (VGEN_FAILURE);
561 	}
562 	ldcp = ldclp->headp;
563 
564 	if (ldcp->need_resched) {
565 		/* out of tx resources, see vgen_ldcsend() for details. */
566 		mutex_enter(&ldcp->txlock);
567 		ldcp->statsp->tx_no_desc++;
568 		mutex_exit(&ldcp->txlock);
569 
570 		RW_EXIT(&ldclp->rwlock);
571 		return (VGEN_FAILURE);
572 	}
573 
574 	status  = vgen_ldcsend(ldcp, mp);
575 	RW_EXIT(&ldclp->rwlock);
576 
577 	if (status != VGEN_TX_SUCCESS)
578 		return (VGEN_FAILURE);
579 
580 	return (VGEN_SUCCESS);
581 }
582 
583 /* channel transmit function */
584 static int
585 vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
586 {
587 	vgen_private_desc_t	*tbufp;
588 	vgen_private_desc_t	*rtbufp;
589 	vnet_public_desc_t	*rtxdp;
590 	vgen_private_desc_t	*ntbufp;
591 	vnet_public_desc_t	*txdp;
592 	vio_dring_entry_hdr_t	*hdrp;
593 	vgen_stats_t		*statsp;
594 	struct ether_header	*ehp;
595 	boolean_t	is_bcast = B_FALSE;
596 	boolean_t	is_mcast = B_FALSE;
597 	size_t		mblksz;
598 	caddr_t		dst;
599 	mblk_t		*bp;
600 	size_t		size;
601 	int		rv = 0;
602 	ldc_status_t	istatus;
603 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
604 
605 	statsp = ldcp->statsp;
606 	size = msgsize(mp);
607 
608 	DBG1(vgenp, ldcp, "enter\n");
609 
610 	if (ldcp->ldc_status != LDC_UP) {
611 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
612 		    ldcp->ldc_status);
613 		/* retry ldc_up() if needed */
614 		if (ldcp->flags & CHANNEL_STARTED)
615 			(void) ldc_up(ldcp->ldc_handle);
616 		goto vgen_tx_exit;
617 	}
618 
619 	/* drop the packet if ldc is not up or handshake is not done */
620 	if (ldcp->hphase != VH_DONE) {
621 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
622 		    ldcp->hphase);
623 		goto vgen_tx_exit;
624 	}
625 
626 	if (size > (size_t)ETHERMAX) {
627 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
628 		goto vgen_tx_exit;
629 	}
630 	if (size < ETHERMIN)
631 		size = ETHERMIN;
632 
633 	ehp = (struct ether_header *)mp->b_rptr;
634 	is_bcast = IS_BROADCAST(ehp);
635 	is_mcast = IS_MULTICAST(ehp);
636 
637 	mutex_enter(&ldcp->txlock);
638 	/*
639 	 * allocate a descriptor
640 	 */
641 	tbufp = ldcp->next_tbufp;
642 	ntbufp = NEXTTBUF(ldcp, tbufp);
643 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
644 
645 		mutex_enter(&ldcp->tclock);
646 		/* Try reclaiming now */
647 		vgen_reclaim_dring(ldcp);
648 		ldcp->reclaim_lbolt = ddi_get_lbolt();
649 
650 		if (ntbufp == ldcp->cur_tbufp) {
651 			/* Now we are really out of tbuf/txds */
652 			ldcp->need_resched = B_TRUE;
653 			mutex_exit(&ldcp->tclock);
654 
655 			statsp->tx_no_desc++;
656 			mutex_exit(&ldcp->txlock);
657 
658 			return (VGEN_TX_NORESOURCES);
659 		}
660 		mutex_exit(&ldcp->tclock);
661 	}
662 	/* update next available tbuf in the ring and update tx index */
663 	ldcp->next_tbufp = ntbufp;
664 	INCR_TXI(ldcp->next_txi, ldcp);
665 
666 	/* Mark the buffer busy before releasing the lock */
667 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
668 	mutex_exit(&ldcp->txlock);
669 
670 	/* copy data into pre-allocated transmit buffer */
671 	dst = tbufp->datap + VNET_IPALIGN;
672 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
673 		mblksz = MBLKL(bp);
674 		bcopy(bp->b_rptr, dst, mblksz);
675 		dst += mblksz;
676 	}
677 
678 	tbufp->datalen = size;
679 
680 	/* initialize the corresponding public descriptor (txd) */
681 	txdp = tbufp->descp;
682 	hdrp = &txdp->hdr;
683 	txdp->nbytes = size;
684 	txdp->ncookies = tbufp->ncookies;
685 	bcopy((tbufp->memcookie), (txdp->memcookie),
686 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
687 
688 	mutex_enter(&ldcp->wrlock);
689 	/*
690 	 * If the flags not set to BUSY, it implies that the clobber
691 	 * was done while we were copying the data. In such case,
692 	 * discard the packet and return.
693 	 */
694 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
695 		statsp->oerrors++;
696 		mutex_exit(&ldcp->wrlock);
697 		goto vgen_tx_exit;
698 	}
699 	hdrp->dstate = VIO_DESC_READY;
700 
701 	/* update stats */
702 	statsp->opackets++;
703 	statsp->obytes += size;
704 	if (is_bcast)
705 		statsp->brdcstxmt++;
706 	else if (is_mcast)
707 		statsp->multixmt++;
708 
709 	/* send dring datamsg to the peer */
710 	if (ldcp->resched_peer) {
711 
712 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
713 		rtxdp = rtbufp->descp;
714 
715 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
716 
717 			rv = vgen_send_dring_data(ldcp,
718 			    (uint32_t)ldcp->resched_peer_txi, -1);
719 			if (rv != 0) {
720 				/* error: drop the packet */
721 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
722 				    "failed: rv(%d) len(%d)\n",
723 				    ldcp->ldc_id, rv, size);
724 				statsp->oerrors++;
725 			} else {
726 				ldcp->resched_peer = B_FALSE;
727 			}
728 
729 		}
730 
731 	}
732 
733 	mutex_exit(&ldcp->wrlock);
734 
735 vgen_tx_exit:
736 	if (rv == ECONNRESET) {
737 		/*
738 		 * Check if either callback thread or another tx thread is
739 		 * already running. Calling mutex_enter() will result in a
740 		 * deadlock if the other thread already holds cblock and is
741 		 * blocked in vnet_modify_fdb() (which is called from
742 		 * vgen_handle_evt_reset()) waiting for write access on rwlock,
743 		 * as this transmit thread already holds that lock as a reader
744 		 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
745 		 */
746 		if (mutex_tryenter(&ldcp->cblock)) {
747 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
748 				DWARN(vgenp, ldcp, "ldc_status() error\n");
749 			} else {
750 				ldcp->ldc_status = istatus;
751 			}
752 			if (ldcp->ldc_status != LDC_UP) {
753 				/*
754 				 * Second arg is TRUE, as we know that
755 				 * the caller of this function - vnet_m_tx(),
756 				 * already holds fdb-rwlock as a reader.
757 				 */
758 				vgen_handle_evt_reset(ldcp, B_TRUE);
759 			}
760 			mutex_exit(&ldcp->cblock);
761 		}
762 	}
763 	freemsg(mp);
764 	DBG1(vgenp, ldcp, "exit\n");
765 	return (VGEN_TX_SUCCESS);
766 }
767 
768 /* enable/disable a multicast address */
769 int
770 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
771 {
772 	vgen_t			*vgenp;
773 	vnet_mcast_msg_t	mcastmsg;
774 	vio_msg_tag_t		*tagp;
775 	vgen_port_t		*portp;
776 	vgen_portlist_t		*plistp;
777 	vgen_ldc_t		*ldcp;
778 	vgen_ldclist_t		*ldclp;
779 	struct ether_addr	*addrp;
780 	int			rv = DDI_FAILURE;
781 	uint32_t		i;
782 
783 	vgenp = (vgen_t *)arg;
784 	addrp = (struct ether_addr *)mca;
785 	tagp = &mcastmsg.tag;
786 	bzero(&mcastmsg, sizeof (mcastmsg));
787 
788 	mutex_enter(&vgenp->lock);
789 
790 	plistp = &(vgenp->vgenports);
791 
792 	READ_ENTER(&plistp->rwlock);
793 
794 	portp = vgenp->vsw_portp;
795 	if (portp == NULL) {
796 		RW_EXIT(&plistp->rwlock);
797 		mutex_exit(&vgenp->lock);
798 		return (rv);
799 	}
800 	ldclp = &portp->ldclist;
801 
802 	READ_ENTER(&ldclp->rwlock);
803 
804 	ldcp = ldclp->headp;
805 	if (ldcp == NULL)
806 		goto vgen_mcast_exit;
807 
808 	mutex_enter(&ldcp->cblock);
809 
810 	if (ldcp->hphase == VH_DONE) {
811 		/*
812 		 * If handshake is done, send a msg to vsw to add/remove
813 		 * the multicast address.
814 		 */
815 		tagp->vio_msgtype = VIO_TYPE_CTRL;
816 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
817 		tagp->vio_subtype_env = VNET_MCAST_INFO;
818 		tagp->vio_sid = ldcp->local_sid;
819 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
820 		mcastmsg.set = add;
821 		mcastmsg.count = 1;
822 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
823 		    B_FALSE) != VGEN_SUCCESS) {
824 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
825 			mutex_exit(&ldcp->cblock);
826 			goto vgen_mcast_exit;
827 		}
828 	} else {
829 		/* set the flag to send a msg to vsw after handshake is done */
830 		ldcp->need_mcast_sync = B_TRUE;
831 	}
832 
833 	mutex_exit(&ldcp->cblock);
834 
835 	if (add) {
836 
837 		/* expand multicast table if necessary */
838 		if (vgenp->mccount >= vgenp->mcsize) {
839 			struct ether_addr	*newtab;
840 			uint32_t		newsize;
841 
842 
843 			newsize = vgenp->mcsize * 2;
844 
845 			newtab = kmem_zalloc(newsize *
846 			    sizeof (struct ether_addr), KM_NOSLEEP);
847 			if (newtab == NULL)
848 				goto vgen_mcast_exit;
849 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
850 			    sizeof (struct ether_addr));
851 			kmem_free(vgenp->mctab,
852 			    vgenp->mcsize * sizeof (struct ether_addr));
853 
854 			vgenp->mctab = newtab;
855 			vgenp->mcsize = newsize;
856 		}
857 
858 		/* add address to the table */
859 		vgenp->mctab[vgenp->mccount++] = *addrp;
860 
861 	} else {
862 
863 		/* delete address from the table */
864 		for (i = 0; i < vgenp->mccount; i++) {
865 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
866 
867 				/*
868 				 * If there's more than one address in this
869 				 * table, delete the unwanted one by moving
870 				 * the last one in the list over top of it;
871 				 * otherwise, just remove it.
872 				 */
873 				if (vgenp->mccount > 1) {
874 					vgenp->mctab[i] =
875 					    vgenp->mctab[vgenp->mccount-1];
876 				}
877 				vgenp->mccount--;
878 				break;
879 			}
880 		}
881 	}
882 
883 	rv = DDI_SUCCESS;
884 
885 vgen_mcast_exit:
886 	RW_EXIT(&ldclp->rwlock);
887 	RW_EXIT(&plistp->rwlock);
888 
889 	mutex_exit(&vgenp->lock);
890 	return (rv);
891 }
892 
893 /* set or clear promiscuous mode on the device */
894 static int
895 vgen_promisc(void *arg, boolean_t on)
896 {
897 	_NOTE(ARGUNUSED(arg, on))
898 	return (DDI_SUCCESS);
899 }
900 
901 /* set the unicast mac address of the device */
902 static int
903 vgen_unicst(void *arg, const uint8_t *mca)
904 {
905 	_NOTE(ARGUNUSED(arg, mca))
906 	return (DDI_SUCCESS);
907 }
908 
909 /* get device statistics */
910 int
911 vgen_stat(void *arg, uint_t stat, uint64_t *val)
912 {
913 	vgen_t		*vgenp = (vgen_t *)arg;
914 	vgen_port_t	*portp;
915 	vgen_portlist_t	*plistp;
916 
917 	*val = 0;
918 
919 	plistp = &(vgenp->vgenports);
920 	READ_ENTER(&plistp->rwlock);
921 
922 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
923 		*val += vgen_port_stat(portp, stat);
924 	}
925 
926 	RW_EXIT(&plistp->rwlock);
927 
928 	return (0);
929 }
930 
931 static void
932 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
933 {
934 	 _NOTE(ARGUNUSED(arg, wq, mp))
935 }
936 
937 /* vgen internal functions */
938 /* detach all ports from the device */
939 static void
940 vgen_detach_ports(vgen_t *vgenp)
941 {
942 	vgen_port_t	*portp;
943 	vgen_portlist_t	*plistp;
944 
945 	plistp = &(vgenp->vgenports);
946 	WRITE_ENTER(&plistp->rwlock);
947 
948 	while ((portp = plistp->headp) != NULL) {
949 		vgen_port_detach(portp);
950 	}
951 
952 	RW_EXIT(&plistp->rwlock);
953 }
954 
955 /*
956  * detach the given port.
957  */
958 static void
959 vgen_port_detach(vgen_port_t *portp)
960 {
961 	vgen_t		*vgenp;
962 	vgen_ldclist_t	*ldclp;
963 	int		port_num;
964 
965 	vgenp = portp->vgenp;
966 	port_num = portp->port_num;
967 
968 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
969 
970 	/* remove it from port list */
971 	vgen_port_list_remove(portp);
972 
973 	/* detach channels from this port */
974 	ldclp = &portp->ldclist;
975 	WRITE_ENTER(&ldclp->rwlock);
976 	while (ldclp->headp) {
977 		vgen_ldc_detach(ldclp->headp);
978 	}
979 	RW_EXIT(&ldclp->rwlock);
980 
981 	if (vgenp->vsw_portp == portp) {
982 		vgenp->vsw_portp = NULL;
983 	}
984 	KMEM_FREE(portp);
985 
986 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
987 }
988 
989 /* add a port to port list */
990 static void
991 vgen_port_list_insert(vgen_port_t *portp)
992 {
993 	vgen_portlist_t *plistp;
994 	vgen_t *vgenp;
995 
996 	vgenp = portp->vgenp;
997 	plistp = &(vgenp->vgenports);
998 
999 	if (plistp->headp == NULL) {
1000 		plistp->headp = portp;
1001 	} else {
1002 		plistp->tailp->nextp = portp;
1003 	}
1004 	plistp->tailp = portp;
1005 	portp->nextp = NULL;
1006 }
1007 
1008 /* remove a port from port list */
1009 static void
1010 vgen_port_list_remove(vgen_port_t *portp)
1011 {
1012 	vgen_port_t *prevp;
1013 	vgen_port_t *nextp;
1014 	vgen_portlist_t *plistp;
1015 	vgen_t *vgenp;
1016 
1017 	vgenp = portp->vgenp;
1018 
1019 	plistp = &(vgenp->vgenports);
1020 
1021 	if (plistp->headp == NULL)
1022 		return;
1023 
1024 	if (portp == plistp->headp) {
1025 		plistp->headp = portp->nextp;
1026 		if (portp == plistp->tailp)
1027 			plistp->tailp = plistp->headp;
1028 	} else {
1029 		for (prevp = plistp->headp;
1030 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1031 		    prevp = nextp)
1032 			;
1033 		if (nextp == portp) {
1034 			prevp->nextp = portp->nextp;
1035 		}
1036 		if (portp == plistp->tailp)
1037 			plistp->tailp = prevp;
1038 	}
1039 }
1040 
1041 /* lookup a port in the list based on port_num */
1042 static vgen_port_t *
1043 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1044 {
1045 	vgen_port_t *portp = NULL;
1046 
1047 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1048 		if (portp->port_num == port_num) {
1049 			break;
1050 		}
1051 	}
1052 
1053 	return (portp);
1054 }
1055 
1056 /* enable ports for transmit/receive */
1057 static void
1058 vgen_init_ports(vgen_t *vgenp)
1059 {
1060 	vgen_port_t	*portp;
1061 	vgen_portlist_t	*plistp;
1062 
1063 	plistp = &(vgenp->vgenports);
1064 	READ_ENTER(&plistp->rwlock);
1065 
1066 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1067 		vgen_port_init(portp);
1068 	}
1069 
1070 	RW_EXIT(&plistp->rwlock);
1071 }
1072 
1073 static void
1074 vgen_port_init(vgen_port_t *portp)
1075 {
1076 	vgen_t *vgenp;
1077 
1078 	vgenp = portp->vgenp;
1079 	/*
1080 	 * Create fdb entry in vnet, corresponding to the mac
1081 	 * address of this port. Note that the port specified
1082 	 * is vsw-port. This is done so that vsw-port acts
1083 	 * as the route to reach this macaddr, until the
1084 	 * channel for this port comes up (LDC_UP) and
1085 	 * handshake is done successfully.
1086 	 * eg, if the peer is OBP-vnet, it may not bring the
1087 	 * channel up for this port and may communicate via
1088 	 * vsw to reach this port.
1089 	 * Later, when Solaris-vnet comes up at the other end
1090 	 * of the channel for this port and brings up the channel,
1091 	 * it is an indication that peer vnet is capable of
1092 	 * distributed switching, so the direct route through this
1093 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1094 	 */
1095 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1096 	    vgen_tx, vgenp->vsw_portp);
1097 
1098 	if (portp == vgenp->vsw_portp) {
1099 		/*
1100 		 * create the default route entry in vnet's fdb.
1101 		 * This is the entry used by vnet to reach
1102 		 * unknown destinations, which basically goes
1103 		 * through vsw on domain0 and out through the
1104 		 * physical device bound to vsw.
1105 		 */
1106 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1107 	}
1108 
1109 	/* Bring up the channels of this port */
1110 	vgen_init_ldcs(portp);
1111 }
1112 
1113 /* disable transmit/receive on ports */
1114 static void
1115 vgen_uninit_ports(vgen_t *vgenp)
1116 {
1117 	vgen_port_t	*portp;
1118 	vgen_portlist_t	*plistp;
1119 
1120 	plistp = &(vgenp->vgenports);
1121 	READ_ENTER(&plistp->rwlock);
1122 
1123 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1124 		vgen_port_uninit(portp);
1125 	}
1126 
1127 	RW_EXIT(&plistp->rwlock);
1128 }
1129 
1130 static void
1131 vgen_port_uninit(vgen_port_t *portp)
1132 {
1133 	vgen_t *vgenp;
1134 
1135 	vgenp = portp->vgenp;
1136 
1137 	vgen_uninit_ldcs(portp);
1138 	/* delete the entry in vnet's fdb for this port */
1139 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1140 	if (portp == vgenp->vsw_portp) {
1141 		/*
1142 		 * if this is vsw-port, then delete the default
1143 		 * route entry in vnet's fdb.
1144 		 */
1145 		vnet_del_def_rte(vgenp->vnetp);
1146 	}
1147 }
1148 
1149 /* register with MD event generator */
1150 static int
1151 vgen_mdeg_reg(vgen_t *vgenp)
1152 {
1153 	mdeg_prop_spec_t	*pspecp;
1154 	mdeg_node_spec_t	*parentp;
1155 	uint_t			templatesz;
1156 	int			rv;
1157 	mdeg_handle_t		hdl;
1158 	int			i;
1159 
1160 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1161 	    DDI_PROP_DONTPASS, reg_propname, -1);
1162 	if (i == -1) {
1163 		return (DDI_FAILURE);
1164 	}
1165 	templatesz = sizeof (vgen_prop_template);
1166 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1167 	if (pspecp == NULL) {
1168 		return (DDI_FAILURE);
1169 	}
1170 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1171 	if (parentp == NULL) {
1172 		kmem_free(pspecp, templatesz);
1173 		return (DDI_FAILURE);
1174 	}
1175 
1176 	bcopy(vgen_prop_template, pspecp, templatesz);
1177 
1178 	/*
1179 	 * NOTE: The instance here refers to the value of "reg" property and
1180 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1181 	 */
1182 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1183 
1184 	parentp->namep = "virtual-device";
1185 	parentp->specp = pspecp;
1186 
1187 	/* save parentp in vgen_t */
1188 	vgenp->mdeg_parentp = parentp;
1189 
1190 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1191 	if (rv != MDEG_SUCCESS) {
1192 		DERR(vgenp, NULL, "mdeg_register failed\n");
1193 		KMEM_FREE(parentp);
1194 		kmem_free(pspecp, templatesz);
1195 		vgenp->mdeg_parentp = NULL;
1196 		return (DDI_FAILURE);
1197 	}
1198 
1199 	/* save mdeg handle in vgen_t */
1200 	vgenp->mdeg_hdl = hdl;
1201 
1202 	return (DDI_SUCCESS);
1203 }
1204 
1205 /* unregister with MD event generator */
1206 static void
1207 vgen_mdeg_unreg(vgen_t *vgenp)
1208 {
1209 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1210 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1211 	KMEM_FREE(vgenp->mdeg_parentp);
1212 	vgenp->mdeg_parentp = NULL;
1213 	vgenp->mdeg_hdl = NULL;
1214 }
1215 
1216 /* callback function registered with MD event generator */
1217 static int
1218 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1219 {
1220 	int idx;
1221 	int vsw_idx = -1;
1222 	uint64_t val;
1223 	vgen_t *vgenp;
1224 
1225 	if ((resp == NULL) || (cb_argp == NULL)) {
1226 		return (MDEG_FAILURE);
1227 	}
1228 
1229 	vgenp = (vgen_t *)cb_argp;
1230 	DBG1(vgenp, NULL, "enter\n");
1231 
1232 	mutex_enter(&vgenp->lock);
1233 
1234 	DBG1(vgenp, NULL, "ports: removed(%x), "
1235 	"added(%x), updated(%x)\n", resp->removed.nelem,
1236 	    resp->added.nelem, resp->match_curr.nelem);
1237 
1238 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1239 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1240 		    resp->removed.mdep[idx]);
1241 	}
1242 
1243 	if (vgenp->vsw_portp == NULL) {
1244 		/*
1245 		 * find vsw_port and add it first, because other ports need
1246 		 * this when adding fdb entry (see vgen_port_init()).
1247 		 */
1248 		for (idx = 0; idx < resp->added.nelem; idx++) {
1249 			if (!(md_get_prop_val(resp->added.mdp,
1250 			    resp->added.mdep[idx], swport_propname, &val))) {
1251 				if (val == 0) {
1252 					/*
1253 					 * This port is connected to the
1254 					 * vsw on dom0.
1255 					 */
1256 					vsw_idx = idx;
1257 					if (vgen_add_port(vgenp,
1258 					    resp->added.mdp,
1259 					    resp->added.mdep[idx]) !=
1260 					    DDI_SUCCESS) {
1261 						cmn_err(CE_NOTE, "vnet%d Could "
1262 						    "not initialize virtual "
1263 						    "switch port.",
1264 						    ddi_get_instance(vgenp->
1265 						    vnetdip));
1266 						mutex_exit(&vgenp->lock);
1267 						return (MDEG_FAILURE);
1268 					}
1269 					break;
1270 				}
1271 			}
1272 		}
1273 		if (vsw_idx == -1) {
1274 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1275 			mutex_exit(&vgenp->lock);
1276 			return (MDEG_FAILURE);
1277 		}
1278 	}
1279 
1280 	for (idx = 0; idx < resp->added.nelem; idx++) {
1281 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1282 			continue;
1283 
1284 		/* If this port can't be added just skip it. */
1285 		(void) vgen_add_port(vgenp, resp->added.mdp,
1286 		    resp->added.mdep[idx]);
1287 	}
1288 
1289 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1290 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1291 		    resp->match_curr.mdep[idx],
1292 		    resp->match_prev.mdp,
1293 		    resp->match_prev.mdep[idx]);
1294 	}
1295 
1296 	mutex_exit(&vgenp->lock);
1297 	DBG1(vgenp, NULL, "exit\n");
1298 	return (MDEG_SUCCESS);
1299 }
1300 
1301 /* add a new port to the device */
1302 static int
1303 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1304 {
1305 	uint64_t	port_num;
1306 	uint64_t	*ldc_ids;
1307 	uint64_t	macaddr;
1308 	uint64_t	val;
1309 	int		num_ldcs;
1310 	int		vsw_port = B_FALSE;
1311 	int		i;
1312 	int		addrsz;
1313 	int		num_nodes = 0;
1314 	int		listsz = 0;
1315 	int		rv = DDI_SUCCESS;
1316 	mde_cookie_t	*listp = NULL;
1317 	uint8_t		*addrp;
1318 	struct ether_addr	ea;
1319 
1320 	/* read "id" property to get the port number */
1321 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1322 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1323 		return (DDI_FAILURE);
1324 	}
1325 
1326 	/*
1327 	 * Find the channel endpoint node(s) under this port node.
1328 	 */
1329 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1330 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
1331 		    num_nodes);
1332 		return (DDI_FAILURE);
1333 	}
1334 
1335 	/* allocate space for node list */
1336 	listsz = num_nodes * sizeof (mde_cookie_t);
1337 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1338 	if (listp == NULL)
1339 		return (DDI_FAILURE);
1340 
1341 	num_ldcs = md_scan_dag(mdp, mdex,
1342 	    md_find_name(mdp, channel_propname),
1343 	    md_find_name(mdp, "fwd"), listp);
1344 
1345 	if (num_ldcs <= 0) {
1346 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
1347 		kmem_free(listp, listsz);
1348 		return (DDI_FAILURE);
1349 	}
1350 
1351 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
1352 
1353 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1354 	if (ldc_ids == NULL) {
1355 		kmem_free(listp, listsz);
1356 		return (DDI_FAILURE);
1357 	}
1358 
1359 	for (i = 0; i < num_ldcs; i++) {
1360 		/* read channel ids */
1361 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1362 			DWARN(vgenp, NULL, "prop(%s) not found\n",
1363 			    id_propname);
1364 			kmem_free(listp, listsz);
1365 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1366 			return (DDI_FAILURE);
1367 		}
1368 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
1369 	}
1370 
1371 	kmem_free(listp, listsz);
1372 
1373 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1374 	    &addrsz)) {
1375 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
1376 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1377 		return (DDI_FAILURE);
1378 	}
1379 
1380 	if (addrsz < ETHERADDRL) {
1381 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
1382 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1383 		return (DDI_FAILURE);
1384 	}
1385 
1386 	macaddr = *((uint64_t *)addrp);
1387 
1388 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
1389 
1390 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1391 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1392 		macaddr >>= 8;
1393 	}
1394 
1395 	if (vgenp->vsw_portp == NULL) {
1396 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1397 			if (val == 0) {
1398 				/* This port is connected to the vsw on dom0 */
1399 				vsw_port = B_TRUE;
1400 			}
1401 		}
1402 	}
1403 	if (vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1404 	    &ea, vsw_port) != DDI_SUCCESS) {
1405 		cmn_err(CE_NOTE, "vnet%d failed to attach port %d remote MAC "
1406 		    "address %s", ddi_get_instance(vgenp->vnetdip),
1407 		    (int)port_num, ether_sprintf(&ea));
1408 		rv = DDI_FAILURE;
1409 	}
1410 
1411 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1412 
1413 	return (rv);
1414 }
1415 
1416 /* remove a port from the device */
1417 static int
1418 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1419 {
1420 	uint64_t	port_num;
1421 	vgen_port_t	*portp;
1422 	vgen_portlist_t	*plistp;
1423 
1424 	/* read "id" property to get the port number */
1425 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1426 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1427 		return (DDI_FAILURE);
1428 	}
1429 
1430 	plistp = &(vgenp->vgenports);
1431 
1432 	WRITE_ENTER(&plistp->rwlock);
1433 	portp = vgen_port_lookup(plistp, (int)port_num);
1434 	if (portp == NULL) {
1435 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
1436 		RW_EXIT(&plistp->rwlock);
1437 		return (DDI_FAILURE);
1438 	}
1439 
1440 	vgen_port_detach_mdeg(portp);
1441 	RW_EXIT(&plistp->rwlock);
1442 
1443 	return (DDI_SUCCESS);
1444 }
1445 
1446 /* attach a port to the device based on mdeg data */
1447 static int
1448 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1449 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1450 {
1451 	vgen_port_t		*portp;
1452 	vgen_portlist_t		*plistp;
1453 	int			i;
1454 
1455 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1456 	if (portp == NULL) {
1457 		return (DDI_FAILURE);
1458 	}
1459 	portp->vgenp = vgenp;
1460 	portp->port_num = port_num;
1461 
1462 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
1463 
1464 	portp->ldclist.num_ldcs = 0;
1465 	portp->ldclist.headp = NULL;
1466 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1467 
1468 	ether_copy(macaddr, &portp->macaddr);
1469 	for (i = 0; i < num_ids; i++) {
1470 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
1471 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
1472 			rw_destroy(&portp->ldclist.rwlock);
1473 			vgen_port_detach(portp);
1474 			return (DDI_FAILURE);
1475 		}
1476 	}
1477 
1478 	/* link it into the list of ports */
1479 	plistp = &(vgenp->vgenports);
1480 	WRITE_ENTER(&plistp->rwlock);
1481 	vgen_port_list_insert(portp);
1482 	RW_EXIT(&plistp->rwlock);
1483 
1484 	/* This port is connected to the vsw on domain0 */
1485 	if (vsw_port)
1486 		vgenp->vsw_portp = portp;
1487 
1488 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1489 		vgen_port_init(portp);
1490 	}
1491 
1492 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1493 	return (DDI_SUCCESS);
1494 }
1495 
1496 /* detach a port from the device based on mdeg data */
1497 static void
1498 vgen_port_detach_mdeg(vgen_port_t *portp)
1499 {
1500 	vgen_t *vgenp = portp->vgenp;
1501 
1502 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
1503 	/* stop the port if needed */
1504 	if (vgenp->flags & VGEN_STARTED) {
1505 		vgen_port_uninit(portp);
1506 	}
1507 	vgen_port_detach(portp);
1508 
1509 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1510 }
1511 
1512 static int
1513 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1514 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1515 {
1516 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1517 
1518 	/* NOTE: TBD */
1519 	return (DDI_SUCCESS);
1520 }
1521 
1522 static uint64_t
1523 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1524 {
1525 	vgen_ldclist_t	*ldclp;
1526 	vgen_ldc_t *ldcp;
1527 	uint64_t	val;
1528 
1529 	val = 0;
1530 	ldclp = &portp->ldclist;
1531 
1532 	READ_ENTER(&ldclp->rwlock);
1533 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1534 		val += vgen_ldc_stat(ldcp, stat);
1535 	}
1536 	RW_EXIT(&ldclp->rwlock);
1537 
1538 	return (val);
1539 }
1540 
1541 /* attach the channel corresponding to the given ldc_id to the port */
1542 static int
1543 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1544 {
1545 	vgen_t 		*vgenp;
1546 	vgen_ldclist_t	*ldclp;
1547 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1548 	ldc_attr_t 	attr;
1549 	int 		status;
1550 	ldc_status_t	istatus;
1551 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
1552 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1553 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1554 		AST_create_rxmblks = 0x20, AST_add_softintr = 0x40,
1555 		AST_create_rcv_thread = 0x80} attach_state;
1556 
1557 	attach_state = AST_init;
1558 	vgenp = portp->vgenp;
1559 	ldclp = &portp->ldclist;
1560 
1561 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1562 	if (ldcp == NULL) {
1563 		goto ldc_attach_failed;
1564 	}
1565 	ldcp->ldc_id = ldc_id;
1566 	ldcp->portp = portp;
1567 
1568 	attach_state |= AST_ldc_alloc;
1569 
1570 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1571 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1572 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1573 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
1574 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
1575 
1576 	attach_state |= AST_mutex_init;
1577 
1578 	attr.devclass = LDC_DEV_NT;
1579 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1580 	attr.mode = LDC_MODE_UNRELIABLE;
1581 	attr.mtu = vnet_ldc_mtu;
1582 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1583 	if (status != 0) {
1584 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
1585 		goto ldc_attach_failed;
1586 	}
1587 	attach_state |= AST_ldc_init;
1588 
1589 	if (vgen_rcv_thread_enabled) {
1590 		ldcp->rcv_thr_flags = 0;
1591 		ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
1592 		ldcp->soft_pri = PIL_6;
1593 
1594 		status = ddi_intr_add_softint(vgenp->vnetdip,
1595 		    &ldcp->soft_handle, ldcp->soft_pri,
1596 		    vgen_ldc_rcv_softintr, (void *)ldcp);
1597 		if (status != DDI_SUCCESS) {
1598 			DWARN(vgenp, ldcp, "add_softint failed, rv (%d)\n",
1599 			    status);
1600 			goto ldc_attach_failed;
1601 		}
1602 
1603 		/*
1604 		 * Initialize the soft_lock with the same priority as
1605 		 * the soft interrupt to protect from the soft interrupt.
1606 		 */
1607 		mutex_init(&ldcp->soft_lock, NULL, MUTEX_DRIVER,
1608 		    DDI_INTR_PRI(ldcp->soft_pri));
1609 		attach_state |= AST_add_softintr;
1610 
1611 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
1612 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
1613 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
1614 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
1615 
1616 		attach_state |= AST_create_rcv_thread;
1617 		if (ldcp->rcv_thread == NULL) {
1618 			DWARN(vgenp, ldcp, "Failed to create worker thread");
1619 			goto ldc_attach_failed;
1620 		}
1621 	}
1622 
1623 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1624 	if (status != 0) {
1625 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
1626 		    status);
1627 		goto ldc_attach_failed;
1628 	}
1629 	attach_state |= AST_ldc_reg_cb;
1630 
1631 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1632 	ASSERT(istatus == LDC_INIT);
1633 	ldcp->ldc_status = istatus;
1634 
1635 	/* allocate transmit resources */
1636 	status = vgen_alloc_tx_ring(ldcp);
1637 	if (status != 0) {
1638 		goto ldc_attach_failed;
1639 	}
1640 	attach_state |= AST_alloc_tx_ring;
1641 
1642 	/* allocate receive resources */
1643 	status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
1644 	    vgen_rbufsz1, vgen_rbufsz2, vgen_rbufsz3,
1645 	    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
1646 	if (status != 0) {
1647 		goto ldc_attach_failed;
1648 	}
1649 	attach_state |= AST_create_rxmblks;
1650 
1651 	/* Setup kstats for the channel */
1652 	status = vgen_setup_kstats(ldcp);
1653 	if (status != VGEN_SUCCESS) {
1654 		goto ldc_attach_failed;
1655 	}
1656 
1657 	/* initialize vgen_versions supported */
1658 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
1659 
1660 	/* link it into the list of channels for this port */
1661 	WRITE_ENTER(&ldclp->rwlock);
1662 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
1663 	ldcp->nextp = *prev_ldcp;
1664 	*prev_ldcp = ldcp;
1665 	ldclp->num_ldcs++;
1666 	RW_EXIT(&ldclp->rwlock);
1667 
1668 	ldcp->flags |= CHANNEL_ATTACHED;
1669 	return (DDI_SUCCESS);
1670 
1671 ldc_attach_failed:
1672 	if (attach_state & AST_ldc_reg_cb) {
1673 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1674 	}
1675 	if (attach_state & AST_add_softintr) {
1676 		(void) ddi_intr_remove_softint(ldcp->soft_handle);
1677 		mutex_destroy(&ldcp->soft_lock);
1678 	}
1679 	if (attach_state & AST_create_rcv_thread) {
1680 		if (ldcp->rcv_thread != NULL) {
1681 			vgen_stop_rcv_thread(ldcp);
1682 		}
1683 		mutex_destroy(&ldcp->rcv_thr_lock);
1684 		cv_destroy(&ldcp->rcv_thr_cv);
1685 	}
1686 	if (attach_state & AST_create_rxmblks) {
1687 		vio_mblk_pool_t *fvmp = NULL;
1688 
1689 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
1690 		ASSERT(fvmp == NULL);
1691 	}
1692 	if (attach_state & AST_alloc_tx_ring) {
1693 		vgen_free_tx_ring(ldcp);
1694 	}
1695 	if (attach_state & AST_ldc_init) {
1696 		(void) ldc_fini(ldcp->ldc_handle);
1697 	}
1698 	if (attach_state & AST_mutex_init) {
1699 		mutex_destroy(&ldcp->tclock);
1700 		mutex_destroy(&ldcp->txlock);
1701 		mutex_destroy(&ldcp->cblock);
1702 		mutex_destroy(&ldcp->wrlock);
1703 		mutex_destroy(&ldcp->rxlock);
1704 	}
1705 	if (attach_state & AST_ldc_alloc) {
1706 		KMEM_FREE(ldcp);
1707 	}
1708 	return (DDI_FAILURE);
1709 }
1710 
1711 /* detach a channel from the port */
1712 static void
1713 vgen_ldc_detach(vgen_ldc_t *ldcp)
1714 {
1715 	vgen_port_t	*portp;
1716 	vgen_t 		*vgenp;
1717 	vgen_ldc_t 	*pldcp;
1718 	vgen_ldc_t	**prev_ldcp;
1719 	vgen_ldclist_t	*ldclp;
1720 
1721 	portp = ldcp->portp;
1722 	vgenp = portp->vgenp;
1723 	ldclp = &portp->ldclist;
1724 
1725 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
1726 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
1727 		if (pldcp == ldcp) {
1728 			break;
1729 		}
1730 	}
1731 
1732 	if (pldcp == NULL) {
1733 		/* invalid ldcp? */
1734 		return;
1735 	}
1736 
1737 	if (ldcp->ldc_status != LDC_INIT) {
1738 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
1739 	}
1740 
1741 	if (ldcp->flags & CHANNEL_ATTACHED) {
1742 		ldcp->flags &= ~(CHANNEL_ATTACHED);
1743 
1744 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1745 		if (ldcp->rcv_thread != NULL) {
1746 			/* First stop the receive thread */
1747 			vgen_stop_rcv_thread(ldcp);
1748 			(void) ddi_intr_remove_softint(ldcp->soft_handle);
1749 			mutex_destroy(&ldcp->soft_lock);
1750 			mutex_destroy(&ldcp->rcv_thr_lock);
1751 			cv_destroy(&ldcp->rcv_thr_cv);
1752 		}
1753 		/* Free any queued messages */
1754 		if (ldcp->rcv_mhead != NULL) {
1755 			freemsgchain(ldcp->rcv_mhead);
1756 			ldcp->rcv_mhead = NULL;
1757 		}
1758 
1759 		vgen_destroy_kstats(ldcp);
1760 		/*
1761 		 * if we cannot reclaim all mblks, put this
1762 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
1763 		 * device gets detached (see vgen_uninit()).
1764 		 */
1765 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
1766 
1767 		/* free transmit resources */
1768 		vgen_free_tx_ring(ldcp);
1769 
1770 		(void) ldc_fini(ldcp->ldc_handle);
1771 		mutex_destroy(&ldcp->tclock);
1772 		mutex_destroy(&ldcp->txlock);
1773 		mutex_destroy(&ldcp->cblock);
1774 		mutex_destroy(&ldcp->wrlock);
1775 		mutex_destroy(&ldcp->rxlock);
1776 
1777 		/* unlink it from the list */
1778 		*prev_ldcp = ldcp->nextp;
1779 		ldclp->num_ldcs--;
1780 		KMEM_FREE(ldcp);
1781 	}
1782 }
1783 
1784 /*
1785  * This function allocates transmit resources for the channel.
1786  * The resources consist of a transmit descriptor ring and an associated
1787  * transmit buffer ring.
1788  */
1789 static int
1790 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
1791 {
1792 	void *tbufp;
1793 	ldc_mem_info_t minfo;
1794 	uint32_t txdsize;
1795 	uint32_t tbufsize;
1796 	int status;
1797 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1798 
1799 	ldcp->num_txds = vnet_ntxds;
1800 	txdsize = sizeof (vnet_public_desc_t);
1801 	tbufsize = sizeof (vgen_private_desc_t);
1802 
1803 	/* allocate transmit buffer ring */
1804 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
1805 	if (tbufp == NULL) {
1806 		return (DDI_FAILURE);
1807 	}
1808 
1809 	/* create transmit descriptor ring */
1810 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
1811 	    &ldcp->tx_dhandle);
1812 	if (status) {
1813 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
1814 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1815 		return (DDI_FAILURE);
1816 	}
1817 
1818 	/* get the addr of descripror ring */
1819 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
1820 	if (status) {
1821 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
1822 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1823 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1824 		ldcp->tbufp = NULL;
1825 		return (DDI_FAILURE);
1826 	}
1827 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
1828 	ldcp->tbufp = tbufp;
1829 
1830 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
1831 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
1832 
1833 	return (DDI_SUCCESS);
1834 }
1835 
1836 /* Free transmit resources for the channel */
1837 static void
1838 vgen_free_tx_ring(vgen_ldc_t *ldcp)
1839 {
1840 	int tbufsize = sizeof (vgen_private_desc_t);
1841 
1842 	/* free transmit descriptor ring */
1843 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1844 
1845 	/* free transmit buffer ring */
1846 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
1847 	ldcp->txdp = ldcp->txdendp = NULL;
1848 	ldcp->tbufp = ldcp->tbufendp = NULL;
1849 }
1850 
1851 /* enable transmit/receive on the channels for the port */
1852 static void
1853 vgen_init_ldcs(vgen_port_t *portp)
1854 {
1855 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1856 	vgen_ldc_t	*ldcp;
1857 
1858 	READ_ENTER(&ldclp->rwlock);
1859 	ldcp =  ldclp->headp;
1860 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1861 		(void) vgen_ldc_init(ldcp);
1862 	}
1863 	RW_EXIT(&ldclp->rwlock);
1864 }
1865 
1866 /* stop transmit/receive on the channels for the port */
1867 static void
1868 vgen_uninit_ldcs(vgen_port_t *portp)
1869 {
1870 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1871 	vgen_ldc_t	*ldcp;
1872 
1873 	READ_ENTER(&ldclp->rwlock);
1874 	ldcp =  ldclp->headp;
1875 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1876 		vgen_ldc_uninit(ldcp);
1877 	}
1878 	RW_EXIT(&ldclp->rwlock);
1879 }
1880 
1881 /* enable transmit/receive on the channel */
1882 static int
1883 vgen_ldc_init(vgen_ldc_t *ldcp)
1884 {
1885 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1886 	ldc_status_t	istatus;
1887 	int		rv;
1888 	uint32_t	retries = 0;
1889 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
1890 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
1891 	init_state = ST_init;
1892 
1893 	DBG1(vgenp, ldcp, "enter\n");
1894 	LDC_LOCK(ldcp);
1895 
1896 	rv = ldc_open(ldcp->ldc_handle);
1897 	if (rv != 0) {
1898 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
1899 		goto ldcinit_failed;
1900 	}
1901 	init_state |= ST_ldc_open;
1902 
1903 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1904 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
1905 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
1906 		goto ldcinit_failed;
1907 	}
1908 	ldcp->ldc_status = istatus;
1909 
1910 	rv = vgen_init_tbufs(ldcp);
1911 	if (rv != 0) {
1912 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
1913 		goto ldcinit_failed;
1914 	}
1915 	init_state |= ST_init_tbufs;
1916 
1917 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
1918 	if (rv != 0) {
1919 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
1920 		goto ldcinit_failed;
1921 	}
1922 
1923 	init_state |= ST_cb_enable;
1924 
1925 	do {
1926 		rv = ldc_up(ldcp->ldc_handle);
1927 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
1928 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
1929 			drv_usecwait(VGEN_LDC_UP_DELAY);
1930 		}
1931 		if (retries++ >= vgen_ldcup_retries)
1932 			break;
1933 	} while (rv == EWOULDBLOCK);
1934 
1935 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1936 	if (istatus == LDC_UP) {
1937 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
1938 	}
1939 
1940 	ldcp->ldc_status = istatus;
1941 
1942 	/* initialize transmit watchdog timeout */
1943 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
1944 	    drv_usectohz(vnet_ldcwd_interval * 1000));
1945 
1946 	ldcp->hphase = -1;
1947 	ldcp->flags |= CHANNEL_STARTED;
1948 
1949 	/* if channel is already UP - start handshake */
1950 	if (istatus == LDC_UP) {
1951 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1952 		if (ldcp->portp != vgenp->vsw_portp) {
1953 			/*
1954 			 * modify fdb entry to use this port as the
1955 			 * channel is up, instead of going through the
1956 			 * vsw-port (see comments in vgen_port_init())
1957 			 */
1958 			vnet_modify_fdb(vgenp->vnetp,
1959 			    (uint8_t *)&ldcp->portp->macaddr,
1960 			    vgen_tx, ldcp->portp, B_FALSE);
1961 		}
1962 
1963 		/* Initialize local session id */
1964 		ldcp->local_sid = ddi_get_lbolt();
1965 
1966 		/* clear peer session id */
1967 		ldcp->peer_sid = 0;
1968 		ldcp->hretries = 0;
1969 
1970 		/* Initiate Handshake process with peer ldc endpoint */
1971 		vgen_reset_hphase(ldcp);
1972 
1973 		mutex_exit(&ldcp->tclock);
1974 		mutex_exit(&ldcp->txlock);
1975 		mutex_exit(&ldcp->wrlock);
1976 		vgen_handshake(vh_nextphase(ldcp));
1977 		mutex_exit(&ldcp->rxlock);
1978 		mutex_exit(&ldcp->cblock);
1979 	} else {
1980 		LDC_UNLOCK(ldcp);
1981 	}
1982 
1983 	return (DDI_SUCCESS);
1984 
1985 ldcinit_failed:
1986 	if (init_state & ST_cb_enable) {
1987 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1988 	}
1989 	if (init_state & ST_init_tbufs) {
1990 		vgen_uninit_tbufs(ldcp);
1991 	}
1992 	if (init_state & ST_ldc_open) {
1993 		(void) ldc_close(ldcp->ldc_handle);
1994 	}
1995 	LDC_UNLOCK(ldcp);
1996 	DBG1(vgenp, ldcp, "exit\n");
1997 	return (DDI_FAILURE);
1998 }
1999 
2000 /* stop transmit/receive on the channel */
2001 static void
2002 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2003 {
2004 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2005 	int	rv;
2006 
2007 	DBG1(vgenp, ldcp, "enter\n");
2008 	LDC_LOCK(ldcp);
2009 
2010 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2011 		LDC_UNLOCK(ldcp);
2012 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2013 		return;
2014 	}
2015 
2016 	/* disable further callbacks */
2017 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2018 	if (rv != 0) {
2019 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
2020 	}
2021 
2022 	/*
2023 	 * clear handshake done bit and wait for pending tx and cb to finish.
2024 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
2025 	 */
2026 	ldcp->hphase &= ~(VH_DONE);
2027 	LDC_UNLOCK(ldcp);
2028 
2029 	/* cancel handshake watchdog timeout */
2030 	if (ldcp->htid) {
2031 		(void) untimeout(ldcp->htid);
2032 		ldcp->htid = 0;
2033 	}
2034 
2035 	/* cancel transmit watchdog timeout */
2036 	if (ldcp->wd_tid) {
2037 		(void) untimeout(ldcp->wd_tid);
2038 		ldcp->wd_tid = 0;
2039 	}
2040 
2041 	drv_usecwait(1000);
2042 
2043 	/* acquire locks again; any pending transmits and callbacks are done */
2044 	LDC_LOCK(ldcp);
2045 
2046 	vgen_reset_hphase(ldcp);
2047 
2048 	vgen_uninit_tbufs(ldcp);
2049 
2050 	rv = ldc_close(ldcp->ldc_handle);
2051 	if (rv != 0) {
2052 		DWARN(vgenp, ldcp, "ldc_close err\n");
2053 	}
2054 	ldcp->ldc_status = LDC_INIT;
2055 	ldcp->flags &= ~(CHANNEL_STARTED);
2056 
2057 	LDC_UNLOCK(ldcp);
2058 
2059 	DBG1(vgenp, ldcp, "exit\n");
2060 }
2061 
2062 /* Initialize the transmit buffer ring for the channel */
2063 static int
2064 vgen_init_tbufs(vgen_ldc_t *ldcp)
2065 {
2066 	vgen_private_desc_t	*tbufp;
2067 	vnet_public_desc_t	*txdp;
2068 	vio_dring_entry_hdr_t		*hdrp;
2069 	int 			i;
2070 	int 			rv;
2071 	caddr_t			datap = NULL;
2072 	int			ci;
2073 	uint32_t		ncookies;
2074 
2075 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2076 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2077 
2078 	datap = kmem_zalloc(ldcp->num_txds * VGEN_TXDBLK_SZ, KM_SLEEP);
2079 	ldcp->tx_datap = datap;
2080 
2081 	/*
2082 	 * for each private descriptor, allocate a ldc mem_handle which is
2083 	 * required to map the data during transmit, set the flags
2084 	 * to free (available for use by transmit routine).
2085 	 */
2086 
2087 	for (i = 0; i < ldcp->num_txds; i++) {
2088 
2089 		tbufp = &(ldcp->tbufp[i]);
2090 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2091 		    &(tbufp->memhandle));
2092 		if (rv) {
2093 			tbufp->memhandle = 0;
2094 			goto init_tbufs_failed;
2095 		}
2096 
2097 		/*
2098 		 * bind ldc memhandle to the corresponding transmit buffer.
2099 		 */
2100 		ci = ncookies = 0;
2101 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2102 		    (caddr_t)datap, VGEN_TXDBLK_SZ, LDC_SHADOW_MAP,
2103 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2104 		if (rv != 0) {
2105 			goto init_tbufs_failed;
2106 		}
2107 
2108 		/*
2109 		 * successful in binding the handle to tx data buffer.
2110 		 * set datap in the private descr to this buffer.
2111 		 */
2112 		tbufp->datap = datap;
2113 
2114 		if ((ncookies == 0) ||
2115 		    (ncookies > MAX_COOKIES)) {
2116 			goto init_tbufs_failed;
2117 		}
2118 
2119 		for (ci = 1; ci < ncookies; ci++) {
2120 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2121 			    &(tbufp->memcookie[ci]));
2122 			if (rv != 0) {
2123 				goto init_tbufs_failed;
2124 			}
2125 		}
2126 
2127 		tbufp->ncookies = ncookies;
2128 		datap += VGEN_TXDBLK_SZ;
2129 
2130 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2131 		txdp = &(ldcp->txdp[i]);
2132 		hdrp = &txdp->hdr;
2133 		hdrp->dstate = VIO_DESC_FREE;
2134 		hdrp->ack = B_FALSE;
2135 		tbufp->descp = txdp;
2136 
2137 	}
2138 
2139 	/* reset tbuf walking pointers */
2140 	ldcp->next_tbufp = ldcp->tbufp;
2141 	ldcp->cur_tbufp = ldcp->tbufp;
2142 
2143 	/* initialize tx seqnum and index */
2144 	ldcp->next_txseq = VNET_ISS;
2145 	ldcp->next_txi = 0;
2146 
2147 	ldcp->resched_peer = B_TRUE;
2148 	ldcp->resched_peer_txi = 0;
2149 
2150 	return (DDI_SUCCESS);
2151 
2152 init_tbufs_failed:;
2153 	vgen_uninit_tbufs(ldcp);
2154 	return (DDI_FAILURE);
2155 }
2156 
2157 /* Uninitialize transmit buffer ring for the channel */
2158 static void
2159 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2160 {
2161 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2162 	int 			i;
2163 
2164 	/* for each tbuf (priv_desc), free ldc mem_handle */
2165 	for (i = 0; i < ldcp->num_txds; i++) {
2166 
2167 		tbufp = &(ldcp->tbufp[i]);
2168 
2169 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2170 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2171 			tbufp->datap = NULL;
2172 		}
2173 		if (tbufp->memhandle) {
2174 			(void) ldc_mem_free_handle(tbufp->memhandle);
2175 			tbufp->memhandle = 0;
2176 		}
2177 	}
2178 
2179 	if (ldcp->tx_datap) {
2180 		/* prealloc'd tx data buffer */
2181 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_TXDBLK_SZ);
2182 		ldcp->tx_datap = NULL;
2183 	}
2184 
2185 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2186 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2187 }
2188 
2189 /* clobber tx descriptor ring */
2190 static void
2191 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2192 {
2193 	vnet_public_desc_t	*txdp;
2194 	vgen_private_desc_t	*tbufp;
2195 	vio_dring_entry_hdr_t	*hdrp;
2196 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2197 	int i;
2198 #ifdef DEBUG
2199 	int ndone = 0;
2200 #endif
2201 
2202 	for (i = 0; i < ldcp->num_txds; i++) {
2203 
2204 		tbufp = &(ldcp->tbufp[i]);
2205 		txdp = tbufp->descp;
2206 		hdrp = &txdp->hdr;
2207 
2208 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2209 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2210 #ifdef DEBUG
2211 			if (hdrp->dstate == VIO_DESC_DONE)
2212 				ndone++;
2213 #endif
2214 			hdrp->dstate = VIO_DESC_FREE;
2215 			hdrp->ack = B_FALSE;
2216 		}
2217 	}
2218 	/* reset tbuf walking pointers */
2219 	ldcp->next_tbufp = ldcp->tbufp;
2220 	ldcp->cur_tbufp = ldcp->tbufp;
2221 
2222 	/* reset tx seqnum and index */
2223 	ldcp->next_txseq = VNET_ISS;
2224 	ldcp->next_txi = 0;
2225 
2226 	ldcp->resched_peer = B_TRUE;
2227 	ldcp->resched_peer_txi = 0;
2228 
2229 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
2230 }
2231 
2232 /* clobber receive descriptor ring */
2233 static void
2234 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2235 {
2236 	ldcp->rx_dhandle = 0;
2237 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2238 	ldcp->rxdp = NULL;
2239 	ldcp->next_rxi = 0;
2240 	ldcp->num_rxds = 0;
2241 	ldcp->next_rxseq = VNET_ISS;
2242 }
2243 
2244 /* initialize receive descriptor ring */
2245 static int
2246 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2247 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2248 {
2249 	int rv;
2250 	ldc_mem_info_t minfo;
2251 
2252 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2253 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2254 	if (rv != 0) {
2255 		return (DDI_FAILURE);
2256 	}
2257 
2258 	/*
2259 	 * sucessfully mapped, now try to
2260 	 * get info about the mapped dring
2261 	 */
2262 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2263 	if (rv != 0) {
2264 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2265 		return (DDI_FAILURE);
2266 	}
2267 
2268 	/*
2269 	 * save ring address, number of descriptors.
2270 	 */
2271 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2272 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2273 	ldcp->num_rxdcookies = ncookies;
2274 	ldcp->num_rxds = num_desc;
2275 	ldcp->next_rxi = 0;
2276 	ldcp->next_rxseq = VNET_ISS;
2277 
2278 	return (DDI_SUCCESS);
2279 }
2280 
2281 /* get channel statistics */
2282 static uint64_t
2283 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2284 {
2285 	vgen_stats_t *statsp;
2286 	uint64_t val;
2287 
2288 	val = 0;
2289 	statsp = ldcp->statsp;
2290 	switch (stat) {
2291 
2292 	case MAC_STAT_MULTIRCV:
2293 		val = statsp->multircv;
2294 		break;
2295 
2296 	case MAC_STAT_BRDCSTRCV:
2297 		val = statsp->brdcstrcv;
2298 		break;
2299 
2300 	case MAC_STAT_MULTIXMT:
2301 		val = statsp->multixmt;
2302 		break;
2303 
2304 	case MAC_STAT_BRDCSTXMT:
2305 		val = statsp->brdcstxmt;
2306 		break;
2307 
2308 	case MAC_STAT_NORCVBUF:
2309 		val = statsp->norcvbuf;
2310 		break;
2311 
2312 	case MAC_STAT_IERRORS:
2313 		val = statsp->ierrors;
2314 		break;
2315 
2316 	case MAC_STAT_NOXMTBUF:
2317 		val = statsp->noxmtbuf;
2318 		break;
2319 
2320 	case MAC_STAT_OERRORS:
2321 		val = statsp->oerrors;
2322 		break;
2323 
2324 	case MAC_STAT_COLLISIONS:
2325 		break;
2326 
2327 	case MAC_STAT_RBYTES:
2328 		val = statsp->rbytes;
2329 		break;
2330 
2331 	case MAC_STAT_IPACKETS:
2332 		val = statsp->ipackets;
2333 		break;
2334 
2335 	case MAC_STAT_OBYTES:
2336 		val = statsp->obytes;
2337 		break;
2338 
2339 	case MAC_STAT_OPACKETS:
2340 		val = statsp->opackets;
2341 		break;
2342 
2343 	/* stats not relevant to ldc, return 0 */
2344 	case MAC_STAT_IFSPEED:
2345 	case ETHER_STAT_ALIGN_ERRORS:
2346 	case ETHER_STAT_FCS_ERRORS:
2347 	case ETHER_STAT_FIRST_COLLISIONS:
2348 	case ETHER_STAT_MULTI_COLLISIONS:
2349 	case ETHER_STAT_DEFER_XMTS:
2350 	case ETHER_STAT_TX_LATE_COLLISIONS:
2351 	case ETHER_STAT_EX_COLLISIONS:
2352 	case ETHER_STAT_MACXMT_ERRORS:
2353 	case ETHER_STAT_CARRIER_ERRORS:
2354 	case ETHER_STAT_TOOLONG_ERRORS:
2355 	case ETHER_STAT_XCVR_ADDR:
2356 	case ETHER_STAT_XCVR_ID:
2357 	case ETHER_STAT_XCVR_INUSE:
2358 	case ETHER_STAT_CAP_1000FDX:
2359 	case ETHER_STAT_CAP_1000HDX:
2360 	case ETHER_STAT_CAP_100FDX:
2361 	case ETHER_STAT_CAP_100HDX:
2362 	case ETHER_STAT_CAP_10FDX:
2363 	case ETHER_STAT_CAP_10HDX:
2364 	case ETHER_STAT_CAP_ASMPAUSE:
2365 	case ETHER_STAT_CAP_PAUSE:
2366 	case ETHER_STAT_CAP_AUTONEG:
2367 	case ETHER_STAT_ADV_CAP_1000FDX:
2368 	case ETHER_STAT_ADV_CAP_1000HDX:
2369 	case ETHER_STAT_ADV_CAP_100FDX:
2370 	case ETHER_STAT_ADV_CAP_100HDX:
2371 	case ETHER_STAT_ADV_CAP_10FDX:
2372 	case ETHER_STAT_ADV_CAP_10HDX:
2373 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2374 	case ETHER_STAT_ADV_CAP_PAUSE:
2375 	case ETHER_STAT_ADV_CAP_AUTONEG:
2376 	case ETHER_STAT_LP_CAP_1000FDX:
2377 	case ETHER_STAT_LP_CAP_1000HDX:
2378 	case ETHER_STAT_LP_CAP_100FDX:
2379 	case ETHER_STAT_LP_CAP_100HDX:
2380 	case ETHER_STAT_LP_CAP_10FDX:
2381 	case ETHER_STAT_LP_CAP_10HDX:
2382 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2383 	case ETHER_STAT_LP_CAP_PAUSE:
2384 	case ETHER_STAT_LP_CAP_AUTONEG:
2385 	case ETHER_STAT_LINK_ASMPAUSE:
2386 	case ETHER_STAT_LINK_PAUSE:
2387 	case ETHER_STAT_LINK_AUTONEG:
2388 	case ETHER_STAT_LINK_DUPLEX:
2389 	default:
2390 		val = 0;
2391 		break;
2392 
2393 	}
2394 	return (val);
2395 }
2396 
2397 /*
2398  * LDC channel is UP, start handshake process with peer.
2399  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2400  * function is being called from transmit routine, otherwise B_FALSE.
2401  */
2402 static void
2403 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2404 {
2405 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2406 
2407 	DBG1(vgenp, ldcp, "enter\n");
2408 
2409 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2410 
2411 	if (ldcp->portp != vgenp->vsw_portp) {
2412 		/*
2413 		 * modify fdb entry to use this port as the
2414 		 * channel is up, instead of going through the
2415 		 * vsw-port (see comments in vgen_port_init())
2416 		 */
2417 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2418 		    vgen_tx, ldcp->portp, flag);
2419 	}
2420 
2421 	/* Initialize local session id */
2422 	ldcp->local_sid = ddi_get_lbolt();
2423 
2424 	/* clear peer session id */
2425 	ldcp->peer_sid = 0;
2426 	ldcp->hretries = 0;
2427 
2428 	if (ldcp->hphase != VH_PHASE0) {
2429 		vgen_handshake_reset(ldcp);
2430 	}
2431 
2432 	/* Initiate Handshake process with peer ldc endpoint */
2433 	vgen_handshake(vh_nextphase(ldcp));
2434 
2435 	DBG1(vgenp, ldcp, "exit\n");
2436 }
2437 
2438 /*
2439  * LDC channel is Reset, terminate connection with peer and try to
2440  * bring the channel up again.
2441  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2442  * function is being called from transmit routine, otherwise B_FALSE.
2443  */
2444 static void
2445 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2446 {
2447 	ldc_status_t istatus;
2448 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2449 	int	rv;
2450 
2451 	DBG1(vgenp, ldcp, "enter\n");
2452 
2453 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2454 
2455 	if ((ldcp->portp != vgenp->vsw_portp) &&
2456 	    (vgenp->vsw_portp != NULL)) {
2457 		/*
2458 		 * modify fdb entry to use vsw-port  as the
2459 		 * channel is reset and we don't have a direct
2460 		 * link to the destination (see comments
2461 		 * in vgen_port_init()).
2462 		 */
2463 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2464 		    vgen_tx, vgenp->vsw_portp, flag);
2465 	}
2466 
2467 	if (ldcp->hphase != VH_PHASE0) {
2468 		vgen_handshake_reset(ldcp);
2469 	}
2470 
2471 	/* try to bring the channel up */
2472 	rv = ldc_up(ldcp->ldc_handle);
2473 	if (rv != 0) {
2474 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2475 	}
2476 
2477 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2478 		DWARN(vgenp, ldcp, "ldc_status err\n");
2479 	} else {
2480 		ldcp->ldc_status = istatus;
2481 	}
2482 
2483 	/* if channel is already UP - restart handshake */
2484 	if (ldcp->ldc_status == LDC_UP) {
2485 		vgen_handle_evt_up(ldcp, flag);
2486 	}
2487 
2488 	DBG1(vgenp, ldcp, "exit\n");
2489 }
2490 
2491 /* Interrupt handler for the channel */
2492 static uint_t
2493 vgen_ldc_cb(uint64_t event, caddr_t arg)
2494 {
2495 	_NOTE(ARGUNUSED(event))
2496 	vgen_ldc_t	*ldcp;
2497 	vgen_t		*vgenp;
2498 	ldc_status_t 	istatus;
2499 	mblk_t		*bp = NULL;
2500 	vgen_stats_t	*statsp;
2501 
2502 	ldcp = (vgen_ldc_t *)arg;
2503 	vgenp = LDC_TO_VGEN(ldcp);
2504 	statsp = ldcp->statsp;
2505 
2506 	DBG1(vgenp, ldcp, "enter\n");
2507 
2508 	mutex_enter(&ldcp->cblock);
2509 	statsp->callbacks++;
2510 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2511 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
2512 		    ldcp->ldc_status);
2513 		mutex_exit(&ldcp->cblock);
2514 		return (LDC_SUCCESS);
2515 	}
2516 
2517 	/*
2518 	 * NOTE: not using switch() as event could be triggered by
2519 	 * a state change and a read request. Also the ordering	of the
2520 	 * check for the event types is deliberate.
2521 	 */
2522 	if (event & LDC_EVT_UP) {
2523 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2524 			DWARN(vgenp, ldcp, "ldc_status err\n");
2525 		} else {
2526 			ldcp->ldc_status = istatus;
2527 		}
2528 		ASSERT(ldcp->ldc_status == LDC_UP);
2529 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
2530 		    event, ldcp->ldc_status);
2531 
2532 		vgen_handle_evt_up(ldcp, B_FALSE);
2533 
2534 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2535 	}
2536 
2537 	if (event & LDC_EVT_READ) {
2538 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
2539 		    event, ldcp->ldc_status);
2540 
2541 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2542 
2543 		if (ldcp->rcv_thread != NULL) {
2544 			/*
2545 			 * If the receive thread is enabled, then
2546 			 * wakeup the receive thread to process the
2547 			 * LDC messages.
2548 			 */
2549 			mutex_exit(&ldcp->cblock);
2550 			mutex_enter(&ldcp->rcv_thr_lock);
2551 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
2552 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
2553 				cv_signal(&ldcp->rcv_thr_cv);
2554 			}
2555 			mutex_exit(&ldcp->rcv_thr_lock);
2556 			mutex_enter(&ldcp->cblock);
2557 		} else  {
2558 			vgen_handle_evt_read(ldcp);
2559 			bp = ldcp->rcv_mhead;
2560 			ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
2561 		}
2562 	}
2563 
2564 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2565 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2566 			DWARN(vgenp, ldcp, "ldc_status error\n");
2567 		} else {
2568 			ldcp->ldc_status = istatus;
2569 		}
2570 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
2571 		    event, ldcp->ldc_status);
2572 
2573 		vgen_handle_evt_reset(ldcp, B_FALSE);
2574 	}
2575 	mutex_exit(&ldcp->cblock);
2576 
2577 	/* send up the received packets to MAC layer */
2578 	if (bp != NULL) {
2579 		vnet_rx(vgenp->vnetp, NULL, bp);
2580 	}
2581 
2582 	if (ldcp->cancel_htid) {
2583 		/*
2584 		 * Cancel handshake timer.
2585 		 * untimeout(9F) will not return until the pending callback is
2586 		 * cancelled or has run. No problems will result from calling
2587 		 * untimeout if the handler has already completed.
2588 		 * If the timeout handler did run, then it would just
2589 		 * return as cancel_htid is set.
2590 		 */
2591 		(void) untimeout(ldcp->cancel_htid);
2592 		ldcp->cancel_htid = 0;
2593 	}
2594 	DBG1(vgenp, ldcp, "exit\n");
2595 
2596 	return (LDC_SUCCESS);
2597 }
2598 
2599 static void
2600 vgen_handle_evt_read(vgen_ldc_t *ldcp)
2601 {
2602 	int		rv;
2603 	uint64_t	ldcmsg[7];
2604 	size_t		msglen;
2605 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2606 	vio_msg_tag_t	*tagp;
2607 	ldc_status_t 	istatus;
2608 	boolean_t 	has_data;
2609 
2610 	DBG1(vgenp, ldcp, "enter\n");
2611 
2612 	/*
2613 	 * If the receive thread is enabled, then the cblock
2614 	 * need to be acquired here. If not, the vgen_ldc_cb()
2615 	 * calls this function with cblock held already.
2616 	 */
2617 	if (ldcp->rcv_thread != NULL) {
2618 		mutex_enter(&ldcp->cblock);
2619 	} else {
2620 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2621 	}
2622 
2623 vgen_evt_read:
2624 	do {
2625 		msglen = sizeof (ldcmsg);
2626 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
2627 
2628 		if (rv != 0) {
2629 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
2630 			    rv, msglen);
2631 			if (rv == ECONNRESET)
2632 				goto vgen_evtread_error;
2633 			break;
2634 		}
2635 		if (msglen == 0) {
2636 			DBG2(vgenp, ldcp, "ldc_read NODATA");
2637 			break;
2638 		}
2639 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
2640 
2641 		tagp = (vio_msg_tag_t *)ldcmsg;
2642 
2643 		if (ldcp->peer_sid) {
2644 			/*
2645 			 * check sid only after we have received peer's sid
2646 			 * in the version negotiate msg.
2647 			 */
2648 #ifdef DEBUG
2649 			if (vgen_hdbg & HDBG_BAD_SID) {
2650 				/* simulate bad sid condition */
2651 				tagp->vio_sid = 0;
2652 				vgen_hdbg &= ~(HDBG_BAD_SID);
2653 			}
2654 #endif
2655 			rv = vgen_check_sid(ldcp, tagp);
2656 			if (rv != VGEN_SUCCESS) {
2657 				/*
2658 				 * If sid mismatch is detected,
2659 				 * reset the channel.
2660 				 */
2661 				ldcp->need_ldc_reset = B_TRUE;
2662 				goto vgen_evtread_error;
2663 			}
2664 		}
2665 
2666 		switch (tagp->vio_msgtype) {
2667 		case VIO_TYPE_CTRL:
2668 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
2669 			break;
2670 
2671 		case VIO_TYPE_DATA:
2672 			rv = vgen_handle_datamsg(ldcp, tagp);
2673 			break;
2674 
2675 		case VIO_TYPE_ERR:
2676 			vgen_handle_errmsg(ldcp, tagp);
2677 			break;
2678 
2679 		default:
2680 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
2681 			    tagp->vio_msgtype);
2682 			break;
2683 		}
2684 
2685 		/*
2686 		 * If an error is encountered, stop processing and
2687 		 * handle the error.
2688 		 */
2689 		if (rv != 0) {
2690 			goto vgen_evtread_error;
2691 		}
2692 
2693 	} while (msglen);
2694 
2695 	/* check once more before exiting */
2696 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
2697 	if ((rv == 0) && (has_data == B_TRUE)) {
2698 		DTRACE_PROBE(vgen_chkq);
2699 		goto vgen_evt_read;
2700 	}
2701 
2702 vgen_evtread_error:
2703 	if (rv == ECONNRESET) {
2704 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2705 			DWARN(vgenp, ldcp, "ldc_status err\n");
2706 		} else {
2707 			ldcp->ldc_status = istatus;
2708 		}
2709 		vgen_handle_evt_reset(ldcp, B_FALSE);
2710 	} else if (rv) {
2711 		vgen_handshake_retry(ldcp);
2712 	}
2713 
2714 	/*
2715 	 * If the receive thread is not enabled, then cancel the
2716 	 * handshake timeout here.
2717 	 */
2718 	if (ldcp->rcv_thread != NULL) {
2719 		mutex_exit(&ldcp->cblock);
2720 		if (ldcp->cancel_htid) {
2721 			/*
2722 			 * Cancel handshake timer. untimeout(9F) will
2723 			 * not return until the pending callback is cancelled
2724 			 * or has run. No problems will result from calling
2725 			 * untimeout if the handler has already completed.
2726 			 * If the timeout handler did run, then it would just
2727 			 * return as cancel_htid is set.
2728 			 */
2729 			(void) untimeout(ldcp->cancel_htid);
2730 			ldcp->cancel_htid = 0;
2731 		}
2732 	}
2733 
2734 	DBG1(vgenp, ldcp, "exit\n");
2735 }
2736 
2737 /* vgen handshake functions */
2738 
2739 /* change the hphase for the channel to the next phase */
2740 static vgen_ldc_t *
2741 vh_nextphase(vgen_ldc_t *ldcp)
2742 {
2743 	if (ldcp->hphase == VH_PHASE3) {
2744 		ldcp->hphase = VH_DONE;
2745 	} else {
2746 		ldcp->hphase++;
2747 	}
2748 	return (ldcp);
2749 }
2750 
2751 /*
2752  * Check whether the given version is supported or not and
2753  * return VGEN_SUCCESS if supported.
2754  */
2755 static int
2756 vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
2757 uint16_t ver_minor)
2758 {
2759 	vgen_ver_t	*versions = ldcp->vgen_versions;
2760 	int		i = 0;
2761 
2762 	while (i < VGEN_NUM_VER) {
2763 		if ((versions[i].ver_major == 0) &&
2764 		    (versions[i].ver_minor == 0)) {
2765 			break;
2766 		}
2767 		if ((versions[i].ver_major == ver_major) &&
2768 		    (versions[i].ver_minor == ver_minor)) {
2769 			return (VGEN_SUCCESS);
2770 		}
2771 		i++;
2772 	}
2773 	return (VGEN_FAILURE);
2774 }
2775 
2776 /*
2777  * Given a version, return VGEN_SUCCESS if a lower version is supported.
2778  */
2779 static int
2780 vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
2781 {
2782 	vgen_ver_t	*versions = ldcp->vgen_versions;
2783 	int		i = 0;
2784 
2785 	while (i < VGEN_NUM_VER) {
2786 		if ((versions[i].ver_major == 0) &&
2787 		    (versions[i].ver_minor == 0)) {
2788 			break;
2789 		}
2790 		/*
2791 		 * if we support a lower minor version within the same major
2792 		 * version, or if we support a lower major version,
2793 		 * update the verp parameter with this lower version and
2794 		 * return success.
2795 		 */
2796 		if (((versions[i].ver_major == verp->ver_major) &&
2797 		    (versions[i].ver_minor < verp->ver_minor)) ||
2798 		    (versions[i].ver_major < verp->ver_major)) {
2799 			verp->ver_major = versions[i].ver_major;
2800 			verp->ver_minor = versions[i].ver_minor;
2801 			return (VGEN_SUCCESS);
2802 		}
2803 		i++;
2804 	}
2805 
2806 	return (VGEN_FAILURE);
2807 }
2808 
2809 /*
2810  * wrapper routine to send the given message over ldc using ldc_write().
2811  */
2812 static int
2813 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
2814     boolean_t caller_holds_lock)
2815 {
2816 	int	rv;
2817 	size_t	len;
2818 	uint32_t retries = 0;
2819 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2820 
2821 	len = msglen;
2822 	if ((len == 0) || (msg == NULL))
2823 		return (VGEN_FAILURE);
2824 
2825 	if (!caller_holds_lock) {
2826 		mutex_enter(&ldcp->wrlock);
2827 	}
2828 
2829 	do {
2830 		len = msglen;
2831 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
2832 		if (retries++ >= vgen_ldcwr_retries)
2833 			break;
2834 	} while (rv == EWOULDBLOCK);
2835 
2836 	if (!caller_holds_lock) {
2837 		mutex_exit(&ldcp->wrlock);
2838 	}
2839 
2840 	if (rv != 0) {
2841 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
2842 		    rv, msglen);
2843 		return (rv);
2844 	}
2845 
2846 	if (len != msglen) {
2847 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
2848 		    rv, msglen);
2849 		return (VGEN_FAILURE);
2850 	}
2851 
2852 	return (VGEN_SUCCESS);
2853 }
2854 
2855 /* send version negotiate message to the peer over ldc */
2856 static int
2857 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
2858 {
2859 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2860 	vio_ver_msg_t	vermsg;
2861 	vio_msg_tag_t	*tagp = &vermsg.tag;
2862 	int		rv;
2863 
2864 	bzero(&vermsg, sizeof (vermsg));
2865 
2866 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2867 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2868 	tagp->vio_subtype_env = VIO_VER_INFO;
2869 	tagp->vio_sid = ldcp->local_sid;
2870 
2871 	/* get version msg payload from ldcp->local */
2872 	vermsg.ver_major = ldcp->local_hparams.ver_major;
2873 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
2874 	vermsg.dev_class = ldcp->local_hparams.dev_class;
2875 
2876 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
2877 	if (rv != VGEN_SUCCESS) {
2878 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2879 		return (rv);
2880 	}
2881 
2882 	ldcp->hstate |= VER_INFO_SENT;
2883 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
2884 	    vermsg.ver_major, vermsg.ver_minor);
2885 
2886 	return (VGEN_SUCCESS);
2887 }
2888 
2889 /* send attr info message to the peer over ldc */
2890 static int
2891 vgen_send_attr_info(vgen_ldc_t *ldcp)
2892 {
2893 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2894 	vnet_attr_msg_t	attrmsg;
2895 	vio_msg_tag_t	*tagp = &attrmsg.tag;
2896 	int		rv;
2897 
2898 	bzero(&attrmsg, sizeof (attrmsg));
2899 
2900 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2901 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2902 	tagp->vio_subtype_env = VIO_ATTR_INFO;
2903 	tagp->vio_sid = ldcp->local_sid;
2904 
2905 	/* get attr msg payload from ldcp->local */
2906 	attrmsg.mtu = ldcp->local_hparams.mtu;
2907 	attrmsg.addr = ldcp->local_hparams.addr;
2908 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
2909 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
2910 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
2911 
2912 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
2913 	if (rv != VGEN_SUCCESS) {
2914 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2915 		return (rv);
2916 	}
2917 
2918 	ldcp->hstate |= ATTR_INFO_SENT;
2919 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
2920 
2921 	return (VGEN_SUCCESS);
2922 }
2923 
2924 /* send descriptor ring register message to the peer over ldc */
2925 static int
2926 vgen_send_dring_reg(vgen_ldc_t *ldcp)
2927 {
2928 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
2929 	vio_dring_reg_msg_t	msg;
2930 	vio_msg_tag_t		*tagp = &msg.tag;
2931 	int		rv;
2932 
2933 	bzero(&msg, sizeof (msg));
2934 
2935 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2936 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2937 	tagp->vio_subtype_env = VIO_DRING_REG;
2938 	tagp->vio_sid = ldcp->local_sid;
2939 
2940 	/* get dring info msg payload from ldcp->local */
2941 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
2942 	    sizeof (ldc_mem_cookie_t));
2943 	msg.ncookies = ldcp->local_hparams.num_dcookies;
2944 	msg.num_descriptors = ldcp->local_hparams.num_desc;
2945 	msg.descriptor_size = ldcp->local_hparams.desc_size;
2946 
2947 	/*
2948 	 * dring_ident is set to 0. After mapping the dring, peer sets this
2949 	 * value and sends it in the ack, which is saved in
2950 	 * vgen_handle_dring_reg().
2951 	 */
2952 	msg.dring_ident = 0;
2953 
2954 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
2955 	if (rv != VGEN_SUCCESS) {
2956 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2957 		return (rv);
2958 	}
2959 
2960 	ldcp->hstate |= DRING_INFO_SENT;
2961 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
2962 
2963 	return (VGEN_SUCCESS);
2964 }
2965 
2966 static int
2967 vgen_send_rdx_info(vgen_ldc_t *ldcp)
2968 {
2969 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2970 	vio_rdx_msg_t	rdxmsg;
2971 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
2972 	int		rv;
2973 
2974 	bzero(&rdxmsg, sizeof (rdxmsg));
2975 
2976 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2977 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2978 	tagp->vio_subtype_env = VIO_RDX;
2979 	tagp->vio_sid = ldcp->local_sid;
2980 
2981 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
2982 	if (rv != VGEN_SUCCESS) {
2983 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2984 		return (rv);
2985 	}
2986 
2987 	ldcp->hstate |= RDX_INFO_SENT;
2988 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
2989 
2990 	return (VGEN_SUCCESS);
2991 }
2992 
2993 /* send descriptor ring data message to the peer over ldc */
2994 static int
2995 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
2996 {
2997 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2998 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
2999 	vio_msg_tag_t	*tagp = &msgp->tag;
3000 	int		rv;
3001 
3002 	bzero(msgp, sizeof (*msgp));
3003 
3004 	tagp->vio_msgtype = VIO_TYPE_DATA;
3005 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3006 	tagp->vio_subtype_env = VIO_DRING_DATA;
3007 	tagp->vio_sid = ldcp->local_sid;
3008 
3009 	msgp->seq_num = ldcp->next_txseq;
3010 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
3011 	msgp->start_idx = start;
3012 	msgp->end_idx = end;
3013 
3014 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
3015 	if (rv != VGEN_SUCCESS) {
3016 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3017 		return (rv);
3018 	}
3019 
3020 	ldcp->next_txseq++;
3021 	ldcp->statsp->dring_data_msgs++;
3022 
3023 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
3024 
3025 	return (VGEN_SUCCESS);
3026 }
3027 
3028 /* send multicast addr info message to vsw */
3029 static int
3030 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3031 {
3032 	vnet_mcast_msg_t	mcastmsg;
3033 	vnet_mcast_msg_t	*msgp;
3034 	vio_msg_tag_t		*tagp;
3035 	vgen_t			*vgenp;
3036 	struct ether_addr	*mca;
3037 	int			rv;
3038 	int			i;
3039 	uint32_t		size;
3040 	uint32_t		mccount;
3041 	uint32_t		n;
3042 
3043 	msgp = &mcastmsg;
3044 	tagp = &msgp->tag;
3045 	vgenp = LDC_TO_VGEN(ldcp);
3046 
3047 	mccount = vgenp->mccount;
3048 	i = 0;
3049 
3050 	do {
3051 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3052 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3053 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3054 		tagp->vio_sid = ldcp->local_sid;
3055 
3056 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3057 		size = n * sizeof (struct ether_addr);
3058 
3059 		mca = &(vgenp->mctab[i]);
3060 		bcopy(mca, (msgp->mca), size);
3061 		msgp->set = B_TRUE;
3062 		msgp->count = n;
3063 
3064 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3065 		    B_FALSE);
3066 		if (rv != VGEN_SUCCESS) {
3067 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3068 			return (rv);
3069 		}
3070 
3071 		mccount -= n;
3072 		i += n;
3073 
3074 	} while (mccount);
3075 
3076 	return (VGEN_SUCCESS);
3077 }
3078 
3079 /* Initiate Phase 2 of handshake */
3080 static int
3081 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3082 {
3083 	int rv;
3084 	uint32_t ncookies = 0;
3085 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3086 
3087 #ifdef DEBUG
3088 	if (vgen_hdbg & HDBG_OUT_STATE) {
3089 		/* simulate out of state condition */
3090 		vgen_hdbg &= ~(HDBG_OUT_STATE);
3091 		rv = vgen_send_rdx_info(ldcp);
3092 		return (rv);
3093 	}
3094 	if (vgen_hdbg & HDBG_TIMEOUT) {
3095 		/* simulate timeout condition */
3096 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3097 		return (VGEN_SUCCESS);
3098 	}
3099 #endif
3100 	rv = vgen_send_attr_info(ldcp);
3101 	if (rv != VGEN_SUCCESS) {
3102 		return (rv);
3103 	}
3104 
3105 	/* Bind descriptor ring to the channel */
3106 	if (ldcp->num_txdcookies == 0) {
3107 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3108 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3109 		if (rv != 0) {
3110 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
3111 			    "rv(%x)\n", rv);
3112 			return (rv);
3113 		}
3114 		ASSERT(ncookies == 1);
3115 		ldcp->num_txdcookies = ncookies;
3116 	}
3117 
3118 	/* update local dring_info params */
3119 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3120 	    sizeof (ldc_mem_cookie_t));
3121 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3122 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3123 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3124 
3125 	rv = vgen_send_dring_reg(ldcp);
3126 	if (rv != VGEN_SUCCESS) {
3127 		return (rv);
3128 	}
3129 
3130 	return (VGEN_SUCCESS);
3131 }
3132 
3133 /*
3134  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3135  * This can happen after a channel comes up (status: LDC_UP) or
3136  * when handshake gets terminated due to various conditions.
3137  */
3138 static void
3139 vgen_reset_hphase(vgen_ldc_t *ldcp)
3140 {
3141 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3142 	ldc_status_t istatus;
3143 	int rv;
3144 
3145 	DBG1(vgenp, ldcp, "enter\n");
3146 	/* reset hstate and hphase */
3147 	ldcp->hstate = 0;
3148 	ldcp->hphase = VH_PHASE0;
3149 
3150 	/*
3151 	 * Save the id of pending handshake timer in cancel_htid.
3152 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
3153 	 * be cancelled after releasing cblock.
3154 	 */
3155 	if (ldcp->htid) {
3156 		ldcp->cancel_htid = ldcp->htid;
3157 		ldcp->htid = 0;
3158 	}
3159 
3160 	if (ldcp->local_hparams.dring_ready) {
3161 		ldcp->local_hparams.dring_ready = B_FALSE;
3162 	}
3163 
3164 	/* Unbind tx descriptor ring from the channel */
3165 	if (ldcp->num_txdcookies) {
3166 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3167 		if (rv != 0) {
3168 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
3169 		}
3170 		ldcp->num_txdcookies = 0;
3171 	}
3172 
3173 	if (ldcp->peer_hparams.dring_ready) {
3174 		ldcp->peer_hparams.dring_ready = B_FALSE;
3175 		/* Unmap peer's dring */
3176 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3177 		vgen_clobber_rxds(ldcp);
3178 	}
3179 
3180 	vgen_clobber_tbufs(ldcp);
3181 
3182 	/*
3183 	 * clear local handshake params and initialize.
3184 	 */
3185 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3186 
3187 	/* set version to the highest version supported */
3188 	ldcp->local_hparams.ver_major =
3189 	    ldcp->vgen_versions[0].ver_major;
3190 	ldcp->local_hparams.ver_minor =
3191 	    ldcp->vgen_versions[0].ver_minor;
3192 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3193 
3194 	/* set attr_info params */
3195 	ldcp->local_hparams.mtu = ETHERMAX;
3196 	ldcp->local_hparams.addr =
3197 	    vgen_macaddr_strtoul(vgenp->macaddr);
3198 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3199 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
3200 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3201 
3202 	/*
3203 	 * Note: dring is created, but not bound yet.
3204 	 * local dring_info params will be updated when we bind the dring in
3205 	 * vgen_handshake_phase2().
3206 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3207 	 * value and sends it in the ack, which is saved in
3208 	 * vgen_handle_dring_reg().
3209 	 */
3210 	ldcp->local_hparams.dring_ident = 0;
3211 
3212 	/* clear peer_hparams */
3213 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3214 
3215 	/* reset the channel if required */
3216 	if (ldcp->need_ldc_reset) {
3217 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3218 		ldcp->need_ldc_reset = B_FALSE;
3219 		(void) ldc_down(ldcp->ldc_handle);
3220 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3221 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
3222 		ldcp->ldc_status = istatus;
3223 
3224 		/* clear sids */
3225 		ldcp->local_sid = 0;
3226 		ldcp->peer_sid = 0;
3227 
3228 		/* try to bring the channel up */
3229 		rv = ldc_up(ldcp->ldc_handle);
3230 		if (rv != 0) {
3231 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3232 		}
3233 
3234 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3235 			DWARN(vgenp, ldcp, "ldc_status err\n");
3236 		} else {
3237 			ldcp->ldc_status = istatus;
3238 		}
3239 	}
3240 }
3241 
3242 /* wrapper function for vgen_reset_hphase */
3243 static void
3244 vgen_handshake_reset(vgen_ldc_t *ldcp)
3245 {
3246 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3247 	mutex_enter(&ldcp->rxlock);
3248 	mutex_enter(&ldcp->wrlock);
3249 	mutex_enter(&ldcp->txlock);
3250 	mutex_enter(&ldcp->tclock);
3251 
3252 	vgen_reset_hphase(ldcp);
3253 
3254 	mutex_exit(&ldcp->tclock);
3255 	mutex_exit(&ldcp->txlock);
3256 	mutex_exit(&ldcp->wrlock);
3257 	mutex_exit(&ldcp->rxlock);
3258 }
3259 
3260 /*
3261  * Initiate handshake with the peer by sending various messages
3262  * based on the handshake-phase that the channel is currently in.
3263  */
3264 static void
3265 vgen_handshake(vgen_ldc_t *ldcp)
3266 {
3267 	uint32_t hphase = ldcp->hphase;
3268 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3269 	ldc_status_t	istatus;
3270 	int	rv = 0;
3271 
3272 	switch (hphase) {
3273 
3274 	case VH_PHASE1:
3275 
3276 		/*
3277 		 * start timer, for entire handshake process, turn this timer
3278 		 * off if all phases of handshake complete successfully and
3279 		 * hphase goes to VH_DONE(below) or
3280 		 * vgen_reset_hphase() gets called or
3281 		 * channel is reset due to errors or
3282 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3283 		 */
3284 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3285 		    drv_usectohz(vgen_hwd_interval * 1000));
3286 
3287 		/* Phase 1 involves negotiating the version */
3288 		rv = vgen_send_version_negotiate(ldcp);
3289 		break;
3290 
3291 	case VH_PHASE2:
3292 		rv = vgen_handshake_phase2(ldcp);
3293 		break;
3294 
3295 	case VH_PHASE3:
3296 		rv = vgen_send_rdx_info(ldcp);
3297 		break;
3298 
3299 	case VH_DONE:
3300 		/*
3301 		 * Save the id of pending handshake timer in cancel_htid.
3302 		 * This will be checked in vgen_ldc_cb() and the handshake
3303 		 * timer will be cancelled after releasing cblock.
3304 		 */
3305 		if (ldcp->htid) {
3306 			ldcp->cancel_htid = ldcp->htid;
3307 			ldcp->htid = 0;
3308 		}
3309 		ldcp->hretries = 0;
3310 		DBG1(vgenp, ldcp, "Handshake Done\n");
3311 
3312 		if (ldcp->need_mcast_sync) {
3313 			/* need to sync multicast table with vsw */
3314 
3315 			ldcp->need_mcast_sync = B_FALSE;
3316 			mutex_exit(&ldcp->cblock);
3317 
3318 			mutex_enter(&vgenp->lock);
3319 			rv = vgen_send_mcast_info(ldcp);
3320 			mutex_exit(&vgenp->lock);
3321 
3322 			mutex_enter(&ldcp->cblock);
3323 			if (rv != VGEN_SUCCESS)
3324 				break;
3325 		}
3326 
3327 		/*
3328 		 * Check if mac layer should be notified to restart
3329 		 * transmissions. This can happen if the channel got
3330 		 * reset and vgen_clobber_tbufs() is called, while
3331 		 * need_resched is set.
3332 		 */
3333 		mutex_enter(&ldcp->tclock);
3334 		if (ldcp->need_resched) {
3335 			ldcp->need_resched = B_FALSE;
3336 			vnet_tx_update(vgenp->vnetp);
3337 		}
3338 		mutex_exit(&ldcp->tclock);
3339 
3340 		break;
3341 
3342 	default:
3343 		break;
3344 	}
3345 
3346 	if (rv == ECONNRESET) {
3347 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3348 			DWARN(vgenp, ldcp, "ldc_status err\n");
3349 		} else {
3350 			ldcp->ldc_status = istatus;
3351 		}
3352 		vgen_handle_evt_reset(ldcp, B_FALSE);
3353 	} else if (rv) {
3354 		vgen_handshake_reset(ldcp);
3355 	}
3356 }
3357 
3358 /*
3359  * Check if the current handshake phase has completed successfully and
3360  * return the status.
3361  */
3362 static int
3363 vgen_handshake_done(vgen_ldc_t *ldcp)
3364 {
3365 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3366 	uint32_t	hphase = ldcp->hphase;
3367 	int 		status = 0;
3368 
3369 	switch (hphase) {
3370 
3371 	case VH_PHASE1:
3372 		/*
3373 		 * Phase1 is done, if version negotiation
3374 		 * completed successfully.
3375 		 */
3376 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3377 		    VER_NEGOTIATED);
3378 		break;
3379 
3380 	case VH_PHASE2:
3381 		/*
3382 		 * Phase 2 is done, if attr info and dring info
3383 		 * have been exchanged successfully.
3384 		 */
3385 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3386 		    ATTR_INFO_EXCHANGED) &&
3387 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3388 		    DRING_INFO_EXCHANGED));
3389 		break;
3390 
3391 	case VH_PHASE3:
3392 		/* Phase 3 is done, if rdx msg has been exchanged */
3393 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3394 		    RDX_EXCHANGED);
3395 		break;
3396 
3397 	default:
3398 		break;
3399 	}
3400 
3401 	if (status == 0) {
3402 		return (VGEN_FAILURE);
3403 	}
3404 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
3405 	return (VGEN_SUCCESS);
3406 }
3407 
3408 /* retry handshake on failure */
3409 static void
3410 vgen_handshake_retry(vgen_ldc_t *ldcp)
3411 {
3412 	/* reset handshake phase */
3413 	vgen_handshake_reset(ldcp);
3414 
3415 	/* handshake retry is specified and the channel is UP */
3416 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
3417 		if (ldcp->hretries++ < vgen_max_hretries) {
3418 			ldcp->local_sid = ddi_get_lbolt();
3419 			vgen_handshake(vh_nextphase(ldcp));
3420 		}
3421 	}
3422 }
3423 
3424 /*
3425  * Handle a version info msg from the peer or an ACK/NACK from the peer
3426  * to a version info msg that we sent.
3427  */
3428 static int
3429 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3430 {
3431 	vgen_t		*vgenp;
3432 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3433 	int		ack = 0;
3434 	int		failed = 0;
3435 	int		idx;
3436 	vgen_ver_t	*versions = ldcp->vgen_versions;
3437 	int		rv = 0;
3438 
3439 	vgenp = LDC_TO_VGEN(ldcp);
3440 	DBG1(vgenp, ldcp, "enter\n");
3441 	switch (tagp->vio_subtype) {
3442 	case VIO_SUBTYPE_INFO:
3443 
3444 		/*  Cache sid of peer if this is the first time */
3445 		if (ldcp->peer_sid == 0) {
3446 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
3447 			    tagp->vio_sid);
3448 			ldcp->peer_sid = tagp->vio_sid;
3449 		}
3450 
3451 		if (ldcp->hphase != VH_PHASE1) {
3452 			/*
3453 			 * If we are not already in VH_PHASE1, reset to
3454 			 * pre-handshake state, and initiate handshake
3455 			 * to the peer too.
3456 			 */
3457 			vgen_handshake_reset(ldcp);
3458 			vgen_handshake(vh_nextphase(ldcp));
3459 		}
3460 		ldcp->hstate |= VER_INFO_RCVD;
3461 
3462 		/* save peer's requested values */
3463 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3464 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3465 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3466 
3467 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3468 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3469 			/* unsupported dev_class, send NACK */
3470 
3471 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3472 
3473 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3474 			tagp->vio_sid = ldcp->local_sid;
3475 			/* send reply msg back to peer */
3476 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3477 			    sizeof (*vermsg), B_FALSE);
3478 			if (rv != VGEN_SUCCESS) {
3479 				return (rv);
3480 			}
3481 			return (VGEN_FAILURE);
3482 		}
3483 
3484 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
3485 		    vermsg->ver_major,  vermsg->ver_minor);
3486 
3487 		idx = 0;
3488 
3489 		for (;;) {
3490 
3491 			if (vermsg->ver_major > versions[idx].ver_major) {
3492 
3493 				/* nack with next lower version */
3494 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3495 				vermsg->ver_major = versions[idx].ver_major;
3496 				vermsg->ver_minor = versions[idx].ver_minor;
3497 				break;
3498 			}
3499 
3500 			if (vermsg->ver_major == versions[idx].ver_major) {
3501 
3502 				/* major version match - ACK version */
3503 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3504 				ack = 1;
3505 
3506 				/*
3507 				 * lower minor version to the one this endpt
3508 				 * supports, if necessary
3509 				 */
3510 				if (vermsg->ver_minor >
3511 				    versions[idx].ver_minor) {
3512 					vermsg->ver_minor =
3513 					    versions[idx].ver_minor;
3514 					ldcp->peer_hparams.ver_minor =
3515 					    versions[idx].ver_minor;
3516 				}
3517 				break;
3518 			}
3519 
3520 			idx++;
3521 
3522 			if (idx == VGEN_NUM_VER) {
3523 
3524 				/* no version match - send NACK */
3525 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3526 				vermsg->ver_major = 0;
3527 				vermsg->ver_minor = 0;
3528 				failed = 1;
3529 				break;
3530 			}
3531 
3532 		}
3533 
3534 		tagp->vio_sid = ldcp->local_sid;
3535 
3536 		/* send reply msg back to peer */
3537 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3538 		    B_FALSE);
3539 		if (rv != VGEN_SUCCESS) {
3540 			return (rv);
3541 		}
3542 
3543 		if (ack) {
3544 			ldcp->hstate |= VER_ACK_SENT;
3545 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
3546 			    vermsg->ver_major, vermsg->ver_minor);
3547 		}
3548 		if (failed) {
3549 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
3550 			return (VGEN_FAILURE);
3551 		}
3552 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3553 
3554 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3555 
3556 			/* local and peer versions match? */
3557 			ASSERT((ldcp->local_hparams.ver_major ==
3558 			    ldcp->peer_hparams.ver_major) &&
3559 			    (ldcp->local_hparams.ver_minor ==
3560 			    ldcp->peer_hparams.ver_minor));
3561 
3562 			/* move to the next phase */
3563 			vgen_handshake(vh_nextphase(ldcp));
3564 		}
3565 
3566 		break;
3567 
3568 	case VIO_SUBTYPE_ACK:
3569 
3570 		if (ldcp->hphase != VH_PHASE1) {
3571 			/*  This should not happen. */
3572 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
3573 			return (VGEN_FAILURE);
3574 		}
3575 
3576 		/* SUCCESS - we have agreed on a version */
3577 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3578 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3579 		ldcp->hstate |= VER_ACK_RCVD;
3580 
3581 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
3582 		    vermsg->ver_major,  vermsg->ver_minor);
3583 
3584 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3585 
3586 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3587 
3588 			/* local and peer versions match? */
3589 			ASSERT((ldcp->local_hparams.ver_major ==
3590 			    ldcp->peer_hparams.ver_major) &&
3591 			    (ldcp->local_hparams.ver_minor ==
3592 			    ldcp->peer_hparams.ver_minor));
3593 
3594 			/* move to the next phase */
3595 			vgen_handshake(vh_nextphase(ldcp));
3596 		}
3597 		break;
3598 
3599 	case VIO_SUBTYPE_NACK:
3600 
3601 		if (ldcp->hphase != VH_PHASE1) {
3602 			/*  This should not happen.  */
3603 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
3604 			"Phase(%u)\n", ldcp->hphase);
3605 			return (VGEN_FAILURE);
3606 		}
3607 
3608 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
3609 		    vermsg->ver_major, vermsg->ver_minor);
3610 
3611 		/* check if version in NACK is zero */
3612 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3613 			/*
3614 			 * Version Negotiation has failed.
3615 			 */
3616 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3617 			return (VGEN_FAILURE);
3618 		}
3619 
3620 		idx = 0;
3621 
3622 		for (;;) {
3623 
3624 			if (vermsg->ver_major > versions[idx].ver_major) {
3625 				/* select next lower version */
3626 
3627 				ldcp->local_hparams.ver_major =
3628 				    versions[idx].ver_major;
3629 				ldcp->local_hparams.ver_minor =
3630 				    versions[idx].ver_minor;
3631 				break;
3632 			}
3633 
3634 			if (vermsg->ver_major == versions[idx].ver_major) {
3635 				/* major version match */
3636 
3637 				ldcp->local_hparams.ver_major =
3638 				    versions[idx].ver_major;
3639 
3640 				ldcp->local_hparams.ver_minor =
3641 				    versions[idx].ver_minor;
3642 				break;
3643 			}
3644 
3645 			idx++;
3646 
3647 			if (idx == VGEN_NUM_VER) {
3648 				/*
3649 				 * no version match.
3650 				 * Version Negotiation has failed.
3651 				 */
3652 				DWARN(vgenp, ldcp,
3653 				    "Version Negotiation Failed\n");
3654 				return (VGEN_FAILURE);
3655 			}
3656 
3657 		}
3658 
3659 		rv = vgen_send_version_negotiate(ldcp);
3660 		if (rv != VGEN_SUCCESS) {
3661 			return (rv);
3662 		}
3663 
3664 		break;
3665 	}
3666 
3667 	DBG1(vgenp, ldcp, "exit\n");
3668 	return (VGEN_SUCCESS);
3669 }
3670 
3671 /* Check if the attributes are supported */
3672 static int
3673 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
3674 {
3675 	_NOTE(ARGUNUSED(ldcp))
3676 
3677 	/*
3678 	 * currently, we support these attr values:
3679 	 * mtu of ethernet, addr_type of mac, xfer_mode of
3680 	 * ldc shared memory, ack_freq of 0 (data is acked if
3681 	 * the ack bit is set in the descriptor) and the address should
3682 	 * match the address in the port node.
3683 	 */
3684 	if ((msg->mtu != ETHERMAX) ||
3685 	    (msg->addr_type != ADDR_TYPE_MAC) ||
3686 	    (msg->xfer_mode != VIO_DRING_MODE) ||
3687 	    (msg->ack_freq > 64)) {
3688 		return (VGEN_FAILURE);
3689 	}
3690 
3691 	return (VGEN_SUCCESS);
3692 }
3693 
3694 /*
3695  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
3696  * to an attr info msg that we sent.
3697  */
3698 static int
3699 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3700 {
3701 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3702 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
3703 	int		ack = 0;
3704 	int		rv = 0;
3705 
3706 	DBG1(vgenp, ldcp, "enter\n");
3707 	if (ldcp->hphase != VH_PHASE2) {
3708 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
3709 		" Invalid Phase(%u)\n",
3710 		    tagp->vio_subtype, ldcp->hphase);
3711 		return (VGEN_FAILURE);
3712 	}
3713 	switch (tagp->vio_subtype) {
3714 	case VIO_SUBTYPE_INFO:
3715 
3716 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
3717 		ldcp->hstate |= ATTR_INFO_RCVD;
3718 
3719 		/* save peer's values */
3720 		ldcp->peer_hparams.mtu = attrmsg->mtu;
3721 		ldcp->peer_hparams.addr = attrmsg->addr;
3722 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
3723 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
3724 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
3725 
3726 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
3727 			/* unsupported attr, send NACK */
3728 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3729 		} else {
3730 			ack = 1;
3731 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3732 		}
3733 		tagp->vio_sid = ldcp->local_sid;
3734 
3735 		/* send reply msg back to peer */
3736 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
3737 		    B_FALSE);
3738 		if (rv != VGEN_SUCCESS) {
3739 			return (rv);
3740 		}
3741 
3742 		if (ack) {
3743 			ldcp->hstate |= ATTR_ACK_SENT;
3744 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
3745 		} else {
3746 			/* failed */
3747 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
3748 			return (VGEN_FAILURE);
3749 		}
3750 
3751 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3752 			vgen_handshake(vh_nextphase(ldcp));
3753 		}
3754 
3755 		break;
3756 
3757 	case VIO_SUBTYPE_ACK:
3758 
3759 		ldcp->hstate |= ATTR_ACK_RCVD;
3760 
3761 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
3762 
3763 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3764 			vgen_handshake(vh_nextphase(ldcp));
3765 		}
3766 		break;
3767 
3768 	case VIO_SUBTYPE_NACK:
3769 
3770 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
3771 		return (VGEN_FAILURE);
3772 	}
3773 	DBG1(vgenp, ldcp, "exit\n");
3774 	return (VGEN_SUCCESS);
3775 }
3776 
3777 /* Check if the dring info msg is ok */
3778 static int
3779 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
3780 {
3781 	/* check if msg contents are ok */
3782 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
3783 	    sizeof (vnet_public_desc_t))) {
3784 		return (VGEN_FAILURE);
3785 	}
3786 	return (VGEN_SUCCESS);
3787 }
3788 
3789 /*
3790  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
3791  * the peer to a dring register msg that we sent.
3792  */
3793 static int
3794 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3795 {
3796 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
3797 	ldc_mem_cookie_t dcookie;
3798 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3799 	int ack = 0;
3800 	int rv = 0;
3801 
3802 	DBG1(vgenp, ldcp, "enter\n");
3803 	if (ldcp->hphase < VH_PHASE2) {
3804 		/* dring_info can be rcvd in any of the phases after Phase1 */
3805 		DWARN(vgenp, ldcp,
3806 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
3807 		    tagp->vio_subtype, ldcp->hphase);
3808 		return (VGEN_FAILURE);
3809 	}
3810 	switch (tagp->vio_subtype) {
3811 	case VIO_SUBTYPE_INFO:
3812 
3813 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
3814 		ldcp->hstate |= DRING_INFO_RCVD;
3815 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
3816 
3817 		ASSERT(msg->ncookies == 1);
3818 
3819 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
3820 			/*
3821 			 * verified dring info msg to be ok,
3822 			 * now try to map the remote dring.
3823 			 */
3824 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
3825 			    msg->descriptor_size, &dcookie,
3826 			    msg->ncookies);
3827 			if (rv == DDI_SUCCESS) {
3828 				/* now we can ack the peer */
3829 				ack = 1;
3830 			}
3831 		}
3832 		if (ack == 0) {
3833 			/* failed, send NACK */
3834 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3835 		} else {
3836 			if (!(ldcp->peer_hparams.dring_ready)) {
3837 
3838 				/* save peer's dring_info values */
3839 				bcopy(&dcookie,
3840 				    &(ldcp->peer_hparams.dring_cookie),
3841 				    sizeof (dcookie));
3842 				ldcp->peer_hparams.num_desc =
3843 				    msg->num_descriptors;
3844 				ldcp->peer_hparams.desc_size =
3845 				    msg->descriptor_size;
3846 				ldcp->peer_hparams.num_dcookies =
3847 				    msg->ncookies;
3848 
3849 				/* set dring_ident for the peer */
3850 				ldcp->peer_hparams.dring_ident =
3851 				    (uint64_t)ldcp->rxdp;
3852 				/* return the dring_ident in ack msg */
3853 				msg->dring_ident =
3854 				    (uint64_t)ldcp->rxdp;
3855 
3856 				ldcp->peer_hparams.dring_ready = B_TRUE;
3857 			}
3858 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3859 		}
3860 		tagp->vio_sid = ldcp->local_sid;
3861 		/* send reply msg back to peer */
3862 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
3863 		    B_FALSE);
3864 		if (rv != VGEN_SUCCESS) {
3865 			return (rv);
3866 		}
3867 
3868 		if (ack) {
3869 			ldcp->hstate |= DRING_ACK_SENT;
3870 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
3871 		} else {
3872 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
3873 			return (VGEN_FAILURE);
3874 		}
3875 
3876 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3877 			vgen_handshake(vh_nextphase(ldcp));
3878 		}
3879 
3880 		break;
3881 
3882 	case VIO_SUBTYPE_ACK:
3883 
3884 		ldcp->hstate |= DRING_ACK_RCVD;
3885 
3886 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
3887 
3888 		if (!(ldcp->local_hparams.dring_ready)) {
3889 			/* local dring is now ready */
3890 			ldcp->local_hparams.dring_ready = B_TRUE;
3891 
3892 			/* save dring_ident acked by peer */
3893 			ldcp->local_hparams.dring_ident =
3894 			    msg->dring_ident;
3895 		}
3896 
3897 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3898 			vgen_handshake(vh_nextphase(ldcp));
3899 		}
3900 
3901 		break;
3902 
3903 	case VIO_SUBTYPE_NACK:
3904 
3905 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
3906 		return (VGEN_FAILURE);
3907 	}
3908 	DBG1(vgenp, ldcp, "exit\n");
3909 	return (VGEN_SUCCESS);
3910 }
3911 
3912 /*
3913  * Handle a rdx info msg from the peer or an ACK/NACK
3914  * from the peer to a rdx info msg that we sent.
3915  */
3916 static int
3917 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3918 {
3919 	int rv = 0;
3920 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3921 
3922 	DBG1(vgenp, ldcp, "enter\n");
3923 	if (ldcp->hphase != VH_PHASE3) {
3924 		DWARN(vgenp, ldcp,
3925 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
3926 		    tagp->vio_subtype, ldcp->hphase);
3927 		return (VGEN_FAILURE);
3928 	}
3929 	switch (tagp->vio_subtype) {
3930 	case VIO_SUBTYPE_INFO:
3931 
3932 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
3933 		ldcp->hstate |= RDX_INFO_RCVD;
3934 
3935 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
3936 		tagp->vio_sid = ldcp->local_sid;
3937 		/* send reply msg back to peer */
3938 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
3939 		    B_FALSE);
3940 		if (rv != VGEN_SUCCESS) {
3941 			return (rv);
3942 		}
3943 
3944 		ldcp->hstate |= RDX_ACK_SENT;
3945 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
3946 
3947 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3948 			vgen_handshake(vh_nextphase(ldcp));
3949 		}
3950 
3951 		break;
3952 
3953 	case VIO_SUBTYPE_ACK:
3954 
3955 		ldcp->hstate |= RDX_ACK_RCVD;
3956 
3957 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
3958 
3959 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3960 			vgen_handshake(vh_nextphase(ldcp));
3961 		}
3962 		break;
3963 
3964 	case VIO_SUBTYPE_NACK:
3965 
3966 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
3967 		return (VGEN_FAILURE);
3968 	}
3969 	DBG1(vgenp, ldcp, "exit\n");
3970 	return (VGEN_SUCCESS);
3971 }
3972 
3973 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
3974 static int
3975 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3976 {
3977 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3978 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
3979 	struct ether_addr *addrp;
3980 	int count;
3981 	int i;
3982 
3983 	DBG1(vgenp, ldcp, "enter\n");
3984 	switch (tagp->vio_subtype) {
3985 
3986 	case VIO_SUBTYPE_INFO:
3987 
3988 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
3989 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
3990 		break;
3991 
3992 	case VIO_SUBTYPE_ACK:
3993 
3994 		/* success adding/removing multicast addr */
3995 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
3996 		break;
3997 
3998 	case VIO_SUBTYPE_NACK:
3999 
4000 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
4001 		if (!(msgp->set)) {
4002 			/* multicast remove request failed */
4003 			break;
4004 		}
4005 
4006 		/* multicast add request failed */
4007 		for (count = 0; count < msgp->count; count++) {
4008 			addrp = &(msgp->mca[count]);
4009 
4010 			/* delete address from the table */
4011 			for (i = 0; i < vgenp->mccount; i++) {
4012 				if (ether_cmp(addrp,
4013 				    &(vgenp->mctab[i])) == 0) {
4014 					if (vgenp->mccount > 1) {
4015 						int t = vgenp->mccount - 1;
4016 						vgenp->mctab[i] =
4017 						    vgenp->mctab[t];
4018 					}
4019 					vgenp->mccount--;
4020 					break;
4021 				}
4022 			}
4023 		}
4024 		break;
4025 
4026 	}
4027 	DBG1(vgenp, ldcp, "exit\n");
4028 
4029 	return (VGEN_SUCCESS);
4030 }
4031 
4032 /* handler for control messages received from the peer ldc end-point */
4033 static int
4034 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4035 {
4036 	int rv = 0;
4037 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4038 
4039 	DBG1(vgenp, ldcp, "enter\n");
4040 	switch (tagp->vio_subtype_env) {
4041 
4042 	case VIO_VER_INFO:
4043 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4044 		break;
4045 
4046 	case VIO_ATTR_INFO:
4047 		rv = vgen_handle_attr_info(ldcp, tagp);
4048 		break;
4049 
4050 	case VIO_DRING_REG:
4051 		rv = vgen_handle_dring_reg(ldcp, tagp);
4052 		break;
4053 
4054 	case VIO_RDX:
4055 		rv = vgen_handle_rdx_info(ldcp, tagp);
4056 		break;
4057 
4058 	case VNET_MCAST_INFO:
4059 		rv = vgen_handle_mcast_info(ldcp, tagp);
4060 		break;
4061 
4062 	}
4063 
4064 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4065 	return (rv);
4066 }
4067 
4068 /* handler for data messages received from the peer ldc end-point */
4069 static int
4070 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4071 {
4072 	int rv = 0;
4073 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4074 
4075 	DBG1(vgenp, ldcp, "enter\n");
4076 
4077 	if (ldcp->hphase != VH_DONE)
4078 		return (rv);
4079 	switch (tagp->vio_subtype_env) {
4080 	case VIO_DRING_DATA:
4081 		rv = vgen_handle_dring_data(ldcp, tagp);
4082 		break;
4083 	default:
4084 		break;
4085 	}
4086 
4087 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4088 	return (rv);
4089 }
4090 
4091 static int
4092 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4093     int32_t end, uint8_t pstate)
4094 {
4095 	int rv = 0;
4096 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4097 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4098 
4099 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4100 	tagp->vio_sid = ldcp->local_sid;
4101 	msgp->start_idx = start;
4102 	msgp->end_idx = end;
4103 	msgp->dring_process_state = pstate;
4104 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4105 	if (rv != VGEN_SUCCESS) {
4106 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4107 	}
4108 	return (rv);
4109 }
4110 
4111 static int
4112 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4113 {
4114 	int rv = 0;
4115 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4116 
4117 
4118 	DBG1(vgenp, ldcp, "enter\n");
4119 	switch (tagp->vio_subtype) {
4120 
4121 	case VIO_SUBTYPE_INFO:
4122 		/*
4123 		 * To reduce the locking contention, release the
4124 		 * cblock here and re-acquire it once we are done
4125 		 * receiving packets.
4126 		 */
4127 		mutex_exit(&ldcp->cblock);
4128 		mutex_enter(&ldcp->rxlock);
4129 		rv = vgen_handle_dring_data_info(ldcp, tagp);
4130 		mutex_exit(&ldcp->rxlock);
4131 		mutex_enter(&ldcp->cblock);
4132 		break;
4133 
4134 	case VIO_SUBTYPE_ACK:
4135 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
4136 		break;
4137 
4138 	case VIO_SUBTYPE_NACK:
4139 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
4140 		break;
4141 	}
4142 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4143 	return (rv);
4144 }
4145 
4146 static int
4147 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4148 {
4149 	uint32_t start;
4150 	int32_t end;
4151 	int rv = 0;
4152 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4153 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4154 #ifdef VGEN_HANDLE_LOST_PKTS
4155 	vgen_stats_t *statsp = ldcp->statsp;
4156 	uint32_t rxi;
4157 	int n;
4158 #endif
4159 
4160 	DBG1(vgenp, ldcp, "enter\n");
4161 
4162 	start = dringmsg->start_idx;
4163 	end = dringmsg->end_idx;
4164 	/*
4165 	 * received a data msg, which contains the start and end
4166 	 * indices of the descriptors within the rx ring holding data,
4167 	 * the seq_num of data packet corresponding to the start index,
4168 	 * and the dring_ident.
4169 	 * We can now read the contents of each of these descriptors
4170 	 * and gather data from it.
4171 	 */
4172 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
4173 	    start, end);
4174 
4175 	/* validate rx start and end indeces */
4176 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4177 	    !(CHECK_RXI(end, ldcp)))) {
4178 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
4179 		    start, end);
4180 		/* drop the message if invalid index */
4181 		return (rv);
4182 	}
4183 
4184 	/* validate dring_ident */
4185 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4186 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4187 		    dringmsg->dring_ident);
4188 		/* invalid dring_ident, drop the msg */
4189 		return (rv);
4190 	}
4191 #ifdef DEBUG
4192 	if (vgen_trigger_rxlost) {
4193 		/* drop this msg to simulate lost pkts for debugging */
4194 		vgen_trigger_rxlost = 0;
4195 		return (rv);
4196 	}
4197 #endif
4198 
4199 #ifdef	VGEN_HANDLE_LOST_PKTS
4200 
4201 	/* receive start index doesn't match expected index */
4202 	if (ldcp->next_rxi != start) {
4203 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
4204 		    ldcp->next_rxi, start);
4205 
4206 		/* calculate the number of pkts lost */
4207 		if (start >= ldcp->next_rxi) {
4208 			n = start - ldcp->next_rxi;
4209 		} else  {
4210 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
4211 		}
4212 
4213 		/*
4214 		 * sequence number of dring data message
4215 		 * is less than the next sequence number that
4216 		 * is expected:
4217 		 *
4218 		 * drop the message and the corresponding packets.
4219 		 */
4220 		if (ldcp->next_rxseq > dringmsg->seq_num) {
4221 			DWARN(vgenp, ldcp, "dropping pkts, expected "
4222 			"rxseq(0x%lx) > recvd(0x%lx)\n",
4223 			    ldcp->next_rxseq, dringmsg->seq_num);
4224 			/*
4225 			 * duplicate/multiple retransmissions from
4226 			 * sender?? drop this msg.
4227 			 */
4228 			return (rv);
4229 		}
4230 
4231 		/*
4232 		 * sequence number of dring data message
4233 		 * is greater than the next expected sequence number
4234 		 *
4235 		 * send a NACK back to the peer to indicate lost
4236 		 * packets.
4237 		 */
4238 		if (dringmsg->seq_num > ldcp->next_rxseq) {
4239 			statsp->rx_lost_pkts += n;
4240 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4241 			tagp->vio_sid = ldcp->local_sid;
4242 			/* indicate the range of lost descriptors */
4243 			dringmsg->start_idx = ldcp->next_rxi;
4244 			rxi = start;
4245 			DECR_RXI(rxi, ldcp);
4246 			dringmsg->end_idx = rxi;
4247 			/* dring ident is left unchanged */
4248 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4249 			    sizeof (*dringmsg), B_FALSE);
4250 			if (rv != VGEN_SUCCESS) {
4251 				DWARN(vgenp, ldcp,
4252 				    "vgen_sendmsg failed, stype:NACK\n");
4253 				return (rv);
4254 			}
4255 #ifdef VGEN_REXMIT
4256 			/*
4257 			 * stop further processing until peer
4258 			 * retransmits with the right index.
4259 			 * update next_rxseq expected.
4260 			 */
4261 			ldcp->next_rxseq += 1;
4262 			return (rv);
4263 #else	/* VGEN_REXMIT */
4264 			/*
4265 			 * treat this range of descrs/pkts as dropped
4266 			 * and set the new expected values for next_rxi
4267 			 * and next_rxseq. continue(below) to process
4268 			 * from the new start index.
4269 			 */
4270 			ldcp->next_rxi = start;
4271 			ldcp->next_rxseq += 1;
4272 #endif	/* VGEN_REXMIT */
4273 
4274 		} else if (dringmsg->seq_num == ldcp->next_rxseq) {
4275 			/*
4276 			 * expected and received seqnums match, but
4277 			 * the descriptor indeces don't?
4278 			 *
4279 			 * restart handshake with peer.
4280 			 */
4281 			DWARN(vgenp, ldcp, "next_rxseq(0x%lx)=="
4282 			    "seq_num(0x%lx)\n", ldcp->next_rxseq,
4283 			    dringmsg->seq_num);
4284 
4285 		}
4286 
4287 	} else {
4288 		/* expected and start dring indeces match */
4289 
4290 		if (dringmsg->seq_num != ldcp->next_rxseq) {
4291 
4292 			/* seqnums don't match */
4293 
4294 			DWARN(vgenp, ldcp,
4295 			    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
4296 			    ldcp->next_rxseq, dringmsg->seq_num);
4297 		}
4298 	}
4299 
4300 #endif	/* VGEN_HANDLE_LOST_PKTS */
4301 
4302 	/* Now receive messages */
4303 	rv = vgen_process_dring_data(ldcp, tagp);
4304 
4305 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4306 	return (rv);
4307 }
4308 
4309 static int
4310 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4311 {
4312 	boolean_t set_ack_start = B_FALSE;
4313 	uint32_t start;
4314 	uint32_t ack_end;
4315 	uint32_t next_rxi;
4316 	uint32_t rxi;
4317 	int count = 0;
4318 	int rv = 0;
4319 	uint32_t retries = 0;
4320 	vgen_stats_t *statsp;
4321 	vnet_public_desc_t *rxdp;
4322 	vio_dring_entry_hdr_t *hdrp;
4323 	mblk_t *bp = NULL;
4324 	mblk_t *bpt = NULL;
4325 	uint32_t ack_start;
4326 	uint32_t datalen;
4327 	uint32_t ncookies;
4328 	boolean_t rxd_err = B_FALSE;
4329 	mblk_t *mp = NULL;
4330 	size_t nbytes;
4331 	boolean_t ack_needed = B_FALSE;
4332 	size_t nread;
4333 	uint64_t off = 0;
4334 	struct ether_header *ehp;
4335 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4336 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4337 
4338 	DBG1(vgenp, ldcp, "enter\n");
4339 
4340 	statsp = ldcp->statsp;
4341 	start = dringmsg->start_idx;
4342 
4343 	/*
4344 	 * start processing the descriptors from the specified
4345 	 * start index, up to the index a descriptor is not ready
4346 	 * to be processed or we process the entire descriptor ring
4347 	 * and wrap around upto the start index.
4348 	 */
4349 
4350 	/* need to set the start index of descriptors to be ack'd */
4351 	set_ack_start = B_TRUE;
4352 
4353 	/* index upto which we have ack'd */
4354 	ack_end = start;
4355 	DECR_RXI(ack_end, ldcp);
4356 
4357 	next_rxi = rxi =  start;
4358 	do {
4359 vgen_recv_retry:
4360 		rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4361 		if (rv != 0) {
4362 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
4363 			    " rv(%d)\n", rv);
4364 			statsp->ierrors++;
4365 			return (rv);
4366 		}
4367 
4368 		rxdp = &(ldcp->rxdp[rxi]);
4369 		hdrp = &rxdp->hdr;
4370 
4371 		if (hdrp->dstate != VIO_DESC_READY) {
4372 			/*
4373 			 * Before waiting and retry here, queue
4374 			 * the messages that are received already.
4375 			 * This will help the soft interrupt to
4376 			 * send them up with less latency.
4377 			 */
4378 			if (bp != NULL) {
4379 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4380 				vgen_ldc_queue_data(ldcp, bp, bpt);
4381 				count = 0;
4382 				bp = bpt = NULL;
4383 			}
4384 			/*
4385 			 * descriptor is not ready.
4386 			 * retry descriptor acquire, stop processing
4387 			 * after max # retries.
4388 			 */
4389 			if (retries == vgen_recv_retries)
4390 				break;
4391 			retries++;
4392 			drv_usecwait(vgen_recv_delay);
4393 			goto vgen_recv_retry;
4394 		}
4395 		retries = 0;
4396 
4397 		if (set_ack_start) {
4398 			/*
4399 			 * initialize the start index of the range
4400 			 * of descriptors to be ack'd.
4401 			 */
4402 			ack_start = rxi;
4403 			set_ack_start = B_FALSE;
4404 		}
4405 
4406 		datalen = rxdp->nbytes;
4407 		ncookies = rxdp->ncookies;
4408 		if ((datalen < ETHERMIN) ||
4409 		    (ncookies == 0) ||
4410 		    (ncookies > MAX_COOKIES)) {
4411 			rxd_err = B_TRUE;
4412 		} else {
4413 			/*
4414 			 * Try to allocate an mblk from the free pool
4415 			 * of recv mblks for the channel.
4416 			 * If this fails, use allocb().
4417 			 */
4418 			nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4419 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
4420 			if (!mp) {
4421 				/*
4422 				 * The data buffer returned by
4423 				 * allocb(9F) is 8byte aligned. We
4424 				 * allocate extra 8 bytes to ensure
4425 				 * size is multiple of 8 bytes for
4426 				 * ldc_mem_copy().
4427 				 */
4428 				statsp->rx_vio_allocb_fail++;
4429 				mp = allocb(VNET_IPALIGN + datalen + 8,
4430 				    BPRI_MED);
4431 			}
4432 		}
4433 		if ((rxd_err) || (mp == NULL)) {
4434 			/*
4435 			 * rxd_err or allocb() failure,
4436 			 * drop this packet, get next.
4437 			 */
4438 			if (rxd_err) {
4439 				statsp->ierrors++;
4440 				rxd_err = B_FALSE;
4441 			} else {
4442 				statsp->rx_allocb_fail++;
4443 			}
4444 
4445 			ack_needed = hdrp->ack;
4446 
4447 			/* set descriptor done bit */
4448 			hdrp->dstate = VIO_DESC_DONE;
4449 
4450 			rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4451 			    rxi, rxi);
4452 			if (rv != 0) {
4453 				DWARN(vgenp, ldcp,
4454 				    "ldc_mem_dring_release err rv(%d)\n", rv);
4455 				return (rv);
4456 			}
4457 
4458 			if (ack_needed) {
4459 				ack_needed = B_FALSE;
4460 				/*
4461 				 * sender needs ack for this packet,
4462 				 * ack pkts upto this index.
4463 				 */
4464 				ack_end = rxi;
4465 
4466 				rv = vgen_send_dring_ack(ldcp, tagp,
4467 				    ack_start, ack_end,
4468 				    VIO_DP_ACTIVE);
4469 				if (rv != VGEN_SUCCESS) {
4470 					goto error_ret;
4471 				}
4472 
4473 				/* need to set new ack start index */
4474 				set_ack_start = B_TRUE;
4475 			}
4476 			goto vgen_next_rxi;
4477 		}
4478 
4479 		nread = nbytes;
4480 		rv = ldc_mem_copy(ldcp->ldc_handle,
4481 		    (caddr_t)mp->b_rptr, off, &nread,
4482 		    rxdp->memcookie, ncookies, LDC_COPY_IN);
4483 
4484 		/* if ldc_mem_copy() failed */
4485 		if (rv) {
4486 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
4487 			statsp->ierrors++;
4488 			freemsg(mp);
4489 			goto error_ret;
4490 		}
4491 
4492 		ack_needed = hdrp->ack;
4493 		hdrp->dstate = VIO_DESC_DONE;
4494 
4495 		rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4496 		if (rv != 0) {
4497 			DWARN(vgenp, ldcp,
4498 			    "ldc_mem_dring_release err rv(%d)\n", rv);
4499 			goto error_ret;
4500 		}
4501 
4502 		mp->b_rptr += VNET_IPALIGN;
4503 
4504 		if (ack_needed) {
4505 			ack_needed = B_FALSE;
4506 			/*
4507 			 * sender needs ack for this packet,
4508 			 * ack pkts upto this index.
4509 			 */
4510 			ack_end = rxi;
4511 
4512 			rv = vgen_send_dring_ack(ldcp, tagp,
4513 			    ack_start, ack_end, VIO_DP_ACTIVE);
4514 			if (rv != VGEN_SUCCESS) {
4515 				goto error_ret;
4516 			}
4517 
4518 			/* need to set new ack start index */
4519 			set_ack_start = B_TRUE;
4520 		}
4521 
4522 		if (nread != nbytes) {
4523 			DWARN(vgenp, ldcp,
4524 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4525 			    nread, nbytes);
4526 			statsp->ierrors++;
4527 			freemsg(mp);
4528 			goto vgen_next_rxi;
4529 		}
4530 
4531 		/* point to the actual end of data */
4532 		mp->b_wptr = mp->b_rptr + datalen;
4533 
4534 		/* update stats */
4535 		statsp->ipackets++;
4536 		statsp->rbytes += datalen;
4537 		ehp = (struct ether_header *)mp->b_rptr;
4538 		if (IS_BROADCAST(ehp))
4539 			statsp->brdcstrcv++;
4540 		else if (IS_MULTICAST(ehp))
4541 			statsp->multircv++;
4542 
4543 		/* build a chain of received packets */
4544 		if (bp == NULL) {
4545 			/* first pkt */
4546 			bp = mp;
4547 			bpt = bp;
4548 			bpt->b_next = NULL;
4549 		} else {
4550 			mp->b_next = NULL;
4551 			bpt->b_next = mp;
4552 			bpt = mp;
4553 		}
4554 
4555 		if (count++ > vgen_chain_len) {
4556 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4557 			vgen_ldc_queue_data(ldcp, bp, bpt);
4558 			count = 0;
4559 			bp = bpt = NULL;
4560 		}
4561 
4562 vgen_next_rxi:
4563 		/* update end index of range of descrs to be ack'd */
4564 		ack_end = rxi;
4565 
4566 		/* update the next index to be processed */
4567 		INCR_RXI(next_rxi, ldcp);
4568 		if (next_rxi == start) {
4569 			/*
4570 			 * processed the entire descriptor ring upto
4571 			 * the index at which we started.
4572 			 */
4573 			break;
4574 		}
4575 
4576 		rxi = next_rxi;
4577 
4578 	_NOTE(CONSTCOND)
4579 	} while (1);
4580 
4581 	/*
4582 	 * send an ack message to peer indicating that we have stopped
4583 	 * processing descriptors.
4584 	 */
4585 	if (set_ack_start) {
4586 		/*
4587 		 * We have ack'd upto some index and we have not
4588 		 * processed any descriptors beyond that index.
4589 		 * Use the last ack'd index as both the start and
4590 		 * end of range of descrs being ack'd.
4591 		 * Note: This results in acking the last index twice
4592 		 * and should be harmless.
4593 		 */
4594 		ack_start = ack_end;
4595 	}
4596 
4597 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4598 	    VIO_DP_STOPPED);
4599 	if (rv != VGEN_SUCCESS) {
4600 		goto error_ret;
4601 	}
4602 
4603 	/* save new recv index and expected seqnum of next dring msg */
4604 	ldcp->next_rxi = next_rxi;
4605 	ldcp->next_rxseq += 1;
4606 
4607 error_ret:
4608 	/* queue the packets received so far */
4609 	if (bp != NULL) {
4610 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4611 		vgen_ldc_queue_data(ldcp, bp, bpt);
4612 		bp = bpt = NULL;
4613 	}
4614 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4615 	return (rv);
4616 
4617 }
4618 
4619 static int
4620 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4621 {
4622 	int rv = 0;
4623 	uint32_t start;
4624 	int32_t end;
4625 	uint32_t txi;
4626 	boolean_t ready_txd = B_FALSE;
4627 	vgen_stats_t *statsp;
4628 	vgen_private_desc_t *tbufp;
4629 	vnet_public_desc_t *txdp;
4630 	vio_dring_entry_hdr_t *hdrp;
4631 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4632 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4633 
4634 	DBG1(vgenp, ldcp, "enter\n");
4635 	start = dringmsg->start_idx;
4636 	end = dringmsg->end_idx;
4637 	statsp = ldcp->statsp;
4638 
4639 	/*
4640 	 * received an ack corresponding to a specific descriptor for
4641 	 * which we had set the ACK bit in the descriptor (during
4642 	 * transmit). This enables us to reclaim descriptors.
4643 	 */
4644 
4645 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
4646 
4647 	/* validate start and end indeces in the tx ack msg */
4648 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4649 		/* drop the message if invalid index */
4650 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
4651 		    start, end);
4652 		return (rv);
4653 	}
4654 	/* validate dring_ident */
4655 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4656 		/* invalid dring_ident, drop the msg */
4657 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4658 		    dringmsg->dring_ident);
4659 		return (rv);
4660 	}
4661 	statsp->dring_data_acks++;
4662 
4663 	/* reclaim descriptors that are done */
4664 	vgen_reclaim(ldcp);
4665 
4666 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
4667 		/*
4668 		 * receiver continued processing descriptors after
4669 		 * sending us the ack.
4670 		 */
4671 		return (rv);
4672 	}
4673 
4674 	statsp->dring_stopped_acks++;
4675 
4676 	/* receiver stopped processing descriptors */
4677 	mutex_enter(&ldcp->wrlock);
4678 	mutex_enter(&ldcp->tclock);
4679 
4680 	/*
4681 	 * determine if there are any pending tx descriptors
4682 	 * ready to be processed by the receiver(peer) and if so,
4683 	 * send a message to the peer to restart receiving.
4684 	 */
4685 	ready_txd = B_FALSE;
4686 
4687 	/*
4688 	 * using the end index of the descriptor range for which
4689 	 * we received the ack, check if the next descriptor is
4690 	 * ready.
4691 	 */
4692 	txi = end;
4693 	INCR_TXI(txi, ldcp);
4694 	tbufp = &ldcp->tbufp[txi];
4695 	txdp = tbufp->descp;
4696 	hdrp = &txdp->hdr;
4697 	if (hdrp->dstate == VIO_DESC_READY) {
4698 		ready_txd = B_TRUE;
4699 	} else {
4700 		/*
4701 		 * descr next to the end of ack'd descr range is not
4702 		 * ready.
4703 		 * starting from the current reclaim index, check
4704 		 * if any descriptor is ready.
4705 		 */
4706 
4707 		txi = ldcp->cur_tbufp - ldcp->tbufp;
4708 		tbufp = &ldcp->tbufp[txi];
4709 
4710 		txdp = tbufp->descp;
4711 		hdrp = &txdp->hdr;
4712 		if (hdrp->dstate == VIO_DESC_READY) {
4713 			ready_txd = B_TRUE;
4714 		}
4715 
4716 	}
4717 
4718 	if (ready_txd) {
4719 		/*
4720 		 * we have tx descriptor(s) ready to be
4721 		 * processed by the receiver.
4722 		 * send a message to the peer with the start index
4723 		 * of ready descriptors.
4724 		 */
4725 		rv = vgen_send_dring_data(ldcp, txi, -1);
4726 		if (rv != VGEN_SUCCESS) {
4727 			ldcp->resched_peer = B_TRUE;
4728 			ldcp->resched_peer_txi = txi;
4729 			mutex_exit(&ldcp->tclock);
4730 			mutex_exit(&ldcp->wrlock);
4731 			return (rv);
4732 		}
4733 	} else {
4734 		/*
4735 		 * no ready tx descriptors. set the flag to send a
4736 		 * message to peer when tx descriptors are ready in
4737 		 * transmit routine.
4738 		 */
4739 		ldcp->resched_peer = B_TRUE;
4740 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
4741 	}
4742 
4743 	mutex_exit(&ldcp->tclock);
4744 	mutex_exit(&ldcp->wrlock);
4745 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4746 	return (rv);
4747 }
4748 
4749 static int
4750 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4751 {
4752 	int rv = 0;
4753 	uint32_t start;
4754 	int32_t end;
4755 	uint32_t txi;
4756 	vnet_public_desc_t *txdp;
4757 	vio_dring_entry_hdr_t *hdrp;
4758 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4759 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4760 #ifdef VGEN_REXMIT
4761 	vgen_stats_t *statsp = ldcp->statsp;
4762 #endif
4763 
4764 	DBG1(vgenp, ldcp, "enter\n");
4765 	start = dringmsg->start_idx;
4766 	end = dringmsg->end_idx;
4767 
4768 	/*
4769 	 * peer sent a NACK msg to indicate lost packets.
4770 	 * The start and end correspond to the range of descriptors
4771 	 * for which the peer didn't receive a dring data msg and so
4772 	 * didn't receive the corresponding data.
4773 	 */
4774 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
4775 
4776 	/* validate start and end indeces in the tx nack msg */
4777 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4778 		/* drop the message if invalid index */
4779 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
4780 		    start, end);
4781 		return (rv);
4782 	}
4783 	/* validate dring_ident */
4784 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4785 		/* invalid dring_ident, drop the msg */
4786 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4787 		    dringmsg->dring_ident);
4788 		return (rv);
4789 	}
4790 	mutex_enter(&ldcp->txlock);
4791 	mutex_enter(&ldcp->tclock);
4792 
4793 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
4794 		/* no busy descriptors, bogus nack ? */
4795 		mutex_exit(&ldcp->tclock);
4796 		mutex_exit(&ldcp->txlock);
4797 		return (rv);
4798 	}
4799 
4800 #ifdef VGEN_REXMIT
4801 	/* send a new dring data msg including the lost descrs */
4802 	end = ldcp->next_tbufp - ldcp->tbufp;
4803 	DECR_TXI(end, ldcp);
4804 	rv = vgen_send_dring_data(ldcp, start, end);
4805 	if (rv != 0) {
4806 		/*
4807 		 * vgen_send_dring_data() error: drop all packets
4808 		 * in this descr range
4809 		 */
4810 		DWARN(vgenp, ldcp, "vgen_send_dring_data failed: rv(%d)\n", rv);
4811 		for (txi = start; txi <= end; ) {
4812 			tbufp = &(ldcp->tbufp[txi]);
4813 			txdp = tbufp->descp;
4814 			hdrp = &txdp->hdr;
4815 			tbufp->flags = VGEN_PRIV_DESC_FREE;
4816 			hdrp->dstate = VIO_DESC_FREE;
4817 			hdrp->ack = B_FALSE;
4818 			statsp->oerrors++;
4819 		}
4820 
4821 		/* update next pointer */
4822 		ldcp->next_tbufp = &(ldcp->tbufp[start]);
4823 		ldcp->next_txi = start;
4824 	}
4825 	DBG2(vgenp, ldcp, "rexmit: start(%d) end(%d)\n", start, end);
4826 #else	/* VGEN_REXMIT */
4827 	/* we just mark the descrs as done so they can be reclaimed */
4828 	for (txi = start; txi <= end; ) {
4829 		txdp = &(ldcp->txdp[txi]);
4830 		hdrp = &txdp->hdr;
4831 		if (hdrp->dstate == VIO_DESC_READY)
4832 			hdrp->dstate = VIO_DESC_DONE;
4833 		INCR_TXI(txi, ldcp);
4834 	}
4835 #endif	/* VGEN_REXMIT */
4836 	mutex_exit(&ldcp->tclock);
4837 	mutex_exit(&ldcp->txlock);
4838 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4839 	return (rv);
4840 }
4841 
4842 static void
4843 vgen_reclaim(vgen_ldc_t *ldcp)
4844 {
4845 	mutex_enter(&ldcp->tclock);
4846 
4847 	vgen_reclaim_dring(ldcp);
4848 	ldcp->reclaim_lbolt = ddi_get_lbolt();
4849 
4850 	mutex_exit(&ldcp->tclock);
4851 }
4852 
4853 /*
4854  * transmit reclaim function. starting from the current reclaim index
4855  * look for descriptors marked DONE and reclaim the descriptor and the
4856  * corresponding buffers (tbuf).
4857  */
4858 static void
4859 vgen_reclaim_dring(vgen_ldc_t *ldcp)
4860 {
4861 	vnet_public_desc_t *txdp;
4862 	vgen_private_desc_t *tbufp;
4863 	vio_dring_entry_hdr_t	*hdrp;
4864 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4865 
4866 #ifdef DEBUG
4867 	if (vgen_trigger_txtimeout)
4868 		return;
4869 #endif
4870 
4871 	tbufp = ldcp->cur_tbufp;
4872 	txdp = tbufp->descp;
4873 	hdrp = &txdp->hdr;
4874 
4875 	while ((hdrp->dstate == VIO_DESC_DONE) &&
4876 	    (tbufp != ldcp->next_tbufp)) {
4877 		tbufp->flags = VGEN_PRIV_DESC_FREE;
4878 		hdrp->dstate = VIO_DESC_FREE;
4879 		hdrp->ack = B_FALSE;
4880 
4881 		tbufp = NEXTTBUF(ldcp, tbufp);
4882 		txdp = tbufp->descp;
4883 		hdrp = &txdp->hdr;
4884 	}
4885 
4886 	ldcp->cur_tbufp = tbufp;
4887 
4888 	/*
4889 	 * Check if mac layer should be notified to restart transmissions
4890 	 */
4891 	if (ldcp->need_resched) {
4892 		ldcp->need_resched = B_FALSE;
4893 		vnet_tx_update(vgenp->vnetp);
4894 	}
4895 }
4896 
4897 /* return the number of pending transmits for the channel */
4898 static int
4899 vgen_num_txpending(vgen_ldc_t *ldcp)
4900 {
4901 	int n;
4902 
4903 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
4904 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
4905 	} else  {
4906 		/* cur_tbufp > next_tbufp */
4907 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
4908 	}
4909 
4910 	return (n);
4911 }
4912 
4913 /* determine if the transmit descriptor ring is full */
4914 static int
4915 vgen_tx_dring_full(vgen_ldc_t *ldcp)
4916 {
4917 	vgen_private_desc_t	*tbufp;
4918 	vgen_private_desc_t	*ntbufp;
4919 
4920 	tbufp = ldcp->next_tbufp;
4921 	ntbufp = NEXTTBUF(ldcp, tbufp);
4922 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
4923 		return (VGEN_SUCCESS);
4924 	}
4925 	return (VGEN_FAILURE);
4926 }
4927 
4928 /* determine if timeout condition has occured */
4929 static int
4930 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
4931 {
4932 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
4933 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
4934 	    (vnet_ldcwd_txtimeout) &&
4935 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
4936 		return (VGEN_SUCCESS);
4937 	} else {
4938 		return (VGEN_FAILURE);
4939 	}
4940 }
4941 
4942 /* transmit watchdog timeout handler */
4943 static void
4944 vgen_ldc_watchdog(void *arg)
4945 {
4946 	vgen_ldc_t *ldcp;
4947 	vgen_t *vgenp;
4948 	int rv;
4949 
4950 	ldcp = (vgen_ldc_t *)arg;
4951 	vgenp = LDC_TO_VGEN(ldcp);
4952 
4953 	rv = vgen_ldc_txtimeout(ldcp);
4954 	if (rv == VGEN_SUCCESS) {
4955 		DWARN(vgenp, ldcp, "transmit timeout\n");
4956 #ifdef DEBUG
4957 		if (vgen_trigger_txtimeout) {
4958 			/* tx timeout triggered for debugging */
4959 			vgen_trigger_txtimeout = 0;
4960 		}
4961 #endif
4962 		mutex_enter(&ldcp->cblock);
4963 		ldcp->need_ldc_reset = B_TRUE;
4964 		vgen_handshake_retry(ldcp);
4965 		mutex_exit(&ldcp->cblock);
4966 		if (ldcp->need_resched) {
4967 			ldcp->need_resched = B_FALSE;
4968 			vnet_tx_update(vgenp->vnetp);
4969 		}
4970 	}
4971 
4972 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
4973 	    drv_usectohz(vnet_ldcwd_interval * 1000));
4974 }
4975 
4976 static int
4977 vgen_setup_kstats(vgen_ldc_t *ldcp)
4978 {
4979 	vgen_t *vgenp;
4980 	struct kstat *ksp;
4981 	vgen_stats_t *statsp;
4982 	vgen_kstats_t *ldckp;
4983 	int instance;
4984 	size_t size;
4985 	char name[MAXNAMELEN];
4986 
4987 	vgenp = LDC_TO_VGEN(ldcp);
4988 	instance = ddi_get_instance(vgenp->vnetdip);
4989 	(void) sprintf(name, "vnetldc0x%lx", ldcp->ldc_id);
4990 	statsp = kmem_zalloc(sizeof (vgen_stats_t), KM_SLEEP);
4991 	if (statsp == NULL) {
4992 		return (VGEN_FAILURE);
4993 	}
4994 	size = sizeof (vgen_kstats_t) / sizeof (kstat_named_t);
4995 	ksp = kstat_create("vnet", instance, name, "net", KSTAT_TYPE_NAMED,
4996 	    size, 0);
4997 	if (ksp == NULL) {
4998 		KMEM_FREE(statsp);
4999 		return (VGEN_FAILURE);
5000 	}
5001 
5002 	ldckp = (vgen_kstats_t *)ksp->ks_data;
5003 	kstat_named_init(&ldckp->ipackets,		"ipackets",
5004 	    KSTAT_DATA_ULONG);
5005 	kstat_named_init(&ldckp->ipackets64,		"ipackets64",
5006 	    KSTAT_DATA_ULONGLONG);
5007 	kstat_named_init(&ldckp->ierrors,		"ierrors",
5008 	    KSTAT_DATA_ULONG);
5009 	kstat_named_init(&ldckp->opackets,		"opackets",
5010 	    KSTAT_DATA_ULONG);
5011 	kstat_named_init(&ldckp->opackets64,		"opackets64",
5012 	    KSTAT_DATA_ULONGLONG);
5013 	kstat_named_init(&ldckp->oerrors,		"oerrors",
5014 	    KSTAT_DATA_ULONG);
5015 
5016 
5017 	/* MIB II kstat variables */
5018 	kstat_named_init(&ldckp->rbytes,		"rbytes",
5019 	    KSTAT_DATA_ULONG);
5020 	kstat_named_init(&ldckp->rbytes64,		"rbytes64",
5021 	    KSTAT_DATA_ULONGLONG);
5022 	kstat_named_init(&ldckp->obytes,		"obytes",
5023 	    KSTAT_DATA_ULONG);
5024 	kstat_named_init(&ldckp->obytes64,		"obytes64",
5025 	    KSTAT_DATA_ULONGLONG);
5026 	kstat_named_init(&ldckp->multircv,		"multircv",
5027 	    KSTAT_DATA_ULONG);
5028 	kstat_named_init(&ldckp->multixmt,		"multixmt",
5029 	    KSTAT_DATA_ULONG);
5030 	kstat_named_init(&ldckp->brdcstrcv,		"brdcstrcv",
5031 	    KSTAT_DATA_ULONG);
5032 	kstat_named_init(&ldckp->brdcstxmt,		"brdcstxmt",
5033 	    KSTAT_DATA_ULONG);
5034 	kstat_named_init(&ldckp->norcvbuf,		"norcvbuf",
5035 	    KSTAT_DATA_ULONG);
5036 	kstat_named_init(&ldckp->noxmtbuf,		"noxmtbuf",
5037 	    KSTAT_DATA_ULONG);
5038 
5039 	/* Tx stats */
5040 	kstat_named_init(&ldckp->tx_no_desc,		"tx_no_desc",
5041 	    KSTAT_DATA_ULONG);
5042 
5043 	/* Rx stats */
5044 	kstat_named_init(&ldckp->rx_allocb_fail,	"rx_allocb_fail",
5045 	    KSTAT_DATA_ULONG);
5046 	kstat_named_init(&ldckp->rx_vio_allocb_fail,	"rx_vio_allocb_fail",
5047 	    KSTAT_DATA_ULONG);
5048 	kstat_named_init(&ldckp->rx_lost_pkts,		"rx_lost_pkts",
5049 	    KSTAT_DATA_ULONG);
5050 
5051 	/* Interrupt stats */
5052 	kstat_named_init(&ldckp->callbacks,		"callbacks",
5053 	    KSTAT_DATA_ULONG);
5054 	kstat_named_init(&ldckp->dring_data_acks,	"dring_data_acks",
5055 	    KSTAT_DATA_ULONG);
5056 	kstat_named_init(&ldckp->dring_stopped_acks,	"dring_stopped_acks",
5057 	    KSTAT_DATA_ULONG);
5058 	kstat_named_init(&ldckp->dring_data_msgs,	"dring_data_msgs",
5059 	    KSTAT_DATA_ULONG);
5060 
5061 	ksp->ks_update = vgen_kstat_update;
5062 	ksp->ks_private = (void *)ldcp;
5063 	kstat_install(ksp);
5064 
5065 	ldcp->ksp = ksp;
5066 	ldcp->statsp = statsp;
5067 	return (VGEN_SUCCESS);
5068 }
5069 
5070 static void
5071 vgen_destroy_kstats(vgen_ldc_t *ldcp)
5072 {
5073 	if (ldcp->ksp)
5074 		kstat_delete(ldcp->ksp);
5075 	KMEM_FREE(ldcp->statsp);
5076 }
5077 
5078 static int
5079 vgen_kstat_update(kstat_t *ksp, int rw)
5080 {
5081 	vgen_ldc_t *ldcp;
5082 	vgen_stats_t *statsp;
5083 	vgen_kstats_t *ldckp;
5084 
5085 	ldcp = (vgen_ldc_t *)ksp->ks_private;
5086 	statsp = ldcp->statsp;
5087 	ldckp = (vgen_kstats_t *)ksp->ks_data;
5088 
5089 	if (rw == KSTAT_READ) {
5090 		ldckp->ipackets.value.ul	= (uint32_t)statsp->ipackets;
5091 		ldckp->ipackets64.value.ull	= statsp->ipackets;
5092 		ldckp->ierrors.value.ul		= statsp->ierrors;
5093 		ldckp->opackets.value.ul	= (uint32_t)statsp->opackets;
5094 		ldckp->opackets64.value.ull	= statsp->opackets;
5095 		ldckp->oerrors.value.ul		= statsp->oerrors;
5096 
5097 		/*
5098 		 * MIB II kstat variables
5099 		 */
5100 		ldckp->rbytes.value.ul		= (uint32_t)statsp->rbytes;
5101 		ldckp->rbytes64.value.ull	= statsp->rbytes;
5102 		ldckp->obytes.value.ul		= (uint32_t)statsp->obytes;
5103 		ldckp->obytes64.value.ull	= statsp->obytes;
5104 		ldckp->multircv.value.ul	= statsp->multircv;
5105 		ldckp->multixmt.value.ul	= statsp->multixmt;
5106 		ldckp->brdcstrcv.value.ul	= statsp->brdcstrcv;
5107 		ldckp->brdcstxmt.value.ul	= statsp->brdcstxmt;
5108 		ldckp->norcvbuf.value.ul	= statsp->norcvbuf;
5109 		ldckp->noxmtbuf.value.ul	= statsp->noxmtbuf;
5110 
5111 		ldckp->tx_no_desc.value.ul	= statsp->tx_no_desc;
5112 
5113 		ldckp->rx_allocb_fail.value.ul	= statsp->rx_allocb_fail;
5114 		ldckp->rx_vio_allocb_fail.value.ul = statsp->rx_vio_allocb_fail;
5115 		ldckp->rx_lost_pkts.value.ul	= statsp->rx_lost_pkts;
5116 
5117 		ldckp->callbacks.value.ul	= statsp->callbacks;
5118 		ldckp->dring_data_acks.value.ul	= statsp->dring_data_acks;
5119 		ldckp->dring_stopped_acks.value.ul = statsp->dring_stopped_acks;
5120 		ldckp->dring_data_msgs.value.ul	= statsp->dring_data_msgs;
5121 	} else {
5122 		statsp->ipackets	= ldckp->ipackets64.value.ull;
5123 		statsp->ierrors		= ldckp->ierrors.value.ul;
5124 		statsp->opackets	= ldckp->opackets64.value.ull;
5125 		statsp->oerrors		= ldckp->oerrors.value.ul;
5126 
5127 		/*
5128 		 * MIB II kstat variables
5129 		 */
5130 		statsp->rbytes		= ldckp->rbytes64.value.ull;
5131 		statsp->obytes		= ldckp->obytes64.value.ull;
5132 		statsp->multircv	= ldckp->multircv.value.ul;
5133 		statsp->multixmt	= ldckp->multixmt.value.ul;
5134 		statsp->brdcstrcv	= ldckp->brdcstrcv.value.ul;
5135 		statsp->brdcstxmt	= ldckp->brdcstxmt.value.ul;
5136 		statsp->norcvbuf	= ldckp->norcvbuf.value.ul;
5137 		statsp->noxmtbuf	= ldckp->noxmtbuf.value.ul;
5138 
5139 		statsp->tx_no_desc	= ldckp->tx_no_desc.value.ul;
5140 
5141 		statsp->rx_allocb_fail	= ldckp->rx_allocb_fail.value.ul;
5142 		statsp->rx_vio_allocb_fail = ldckp->rx_vio_allocb_fail.value.ul;
5143 		statsp->rx_lost_pkts	= ldckp->rx_lost_pkts.value.ul;
5144 
5145 		statsp->callbacks	= ldckp->callbacks.value.ul;
5146 		statsp->dring_data_acks	= ldckp->dring_data_acks.value.ul;
5147 		statsp->dring_stopped_acks = ldckp->dring_stopped_acks.value.ul;
5148 		statsp->dring_data_msgs	= ldckp->dring_data_msgs.value.ul;
5149 	}
5150 
5151 	return (VGEN_SUCCESS);
5152 }
5153 
5154 /* handler for error messages received from the peer ldc end-point */
5155 static void
5156 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5157 {
5158 	_NOTE(ARGUNUSED(ldcp, tagp))
5159 }
5160 
5161 /* Check if the session id in the received message is valid */
5162 static int
5163 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5164 {
5165 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5166 
5167 	if (tagp->vio_sid != ldcp->peer_sid) {
5168 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5169 		    ldcp->peer_sid, tagp->vio_sid);
5170 		return (VGEN_FAILURE);
5171 	}
5172 	else
5173 		return (VGEN_SUCCESS);
5174 }
5175 
5176 /* convert mac address from string to uint64_t */
5177 static uint64_t
5178 vgen_macaddr_strtoul(const uint8_t *macaddr)
5179 {
5180 	uint64_t val = 0;
5181 	int i;
5182 
5183 	for (i = 0; i < ETHERADDRL; i++) {
5184 		val <<= 8;
5185 		val |= macaddr[i];
5186 	}
5187 
5188 	return (val);
5189 }
5190 
5191 /* convert mac address from uint64_t to string */
5192 static int
5193 vgen_macaddr_ultostr(uint64_t val, uint8_t *macaddr)
5194 {
5195 	int i;
5196 	uint64_t value;
5197 
5198 	value = val;
5199 	for (i = ETHERADDRL - 1; i >= 0; i--) {
5200 		macaddr[i] = value & 0xFF;
5201 		value >>= 8;
5202 	}
5203 	return (VGEN_SUCCESS);
5204 }
5205 
5206 static caddr_t
5207 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5208 {
5209 	(void) sprintf(ebuf,
5210 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5211 	return (ebuf);
5212 }
5213 
5214 /* Handshake watchdog timeout handler */
5215 static void
5216 vgen_hwatchdog(void *arg)
5217 {
5218 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5219 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5220 
5221 	DWARN(vgenp, ldcp,
5222 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5223 	    ldcp->hphase, ldcp->hstate);
5224 
5225 	mutex_enter(&ldcp->cblock);
5226 	if (ldcp->cancel_htid) {
5227 		ldcp->cancel_htid = 0;
5228 		mutex_exit(&ldcp->cblock);
5229 		return;
5230 	}
5231 	ldcp->htid = 0;
5232 	ldcp->need_ldc_reset = B_TRUE;
5233 	vgen_handshake_retry(ldcp);
5234 	mutex_exit(&ldcp->cblock);
5235 }
5236 
5237 static void
5238 vgen_print_hparams(vgen_hparams_t *hp)
5239 {
5240 	uint8_t	addr[6];
5241 	char	ea[6];
5242 	ldc_mem_cookie_t *dc;
5243 
5244 	cmn_err(CE_CONT, "version_info:\n");
5245 	cmn_err(CE_CONT,
5246 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5247 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5248 
5249 	(void) vgen_macaddr_ultostr(hp->addr, addr);
5250 	cmn_err(CE_CONT, "attr_info:\n");
5251 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5252 	    vgen_print_ethaddr(addr, ea));
5253 	cmn_err(CE_CONT,
5254 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5255 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5256 
5257 	dc = &hp->dring_cookie;
5258 	cmn_err(CE_CONT, "dring_info:\n");
5259 	cmn_err(CE_CONT,
5260 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5261 	cmn_err(CE_CONT,
5262 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5263 	    dc->addr, dc->size);
5264 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5265 }
5266 
5267 static void
5268 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5269 {
5270 	vgen_hparams_t *hp;
5271 
5272 	cmn_err(CE_CONT, "Channel Information:\n");
5273 	cmn_err(CE_CONT,
5274 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5275 	    ldcp->ldc_id, ldcp->ldc_status);
5276 	cmn_err(CE_CONT,
5277 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5278 	    ldcp->local_sid, ldcp->peer_sid);
5279 	cmn_err(CE_CONT,
5280 	    "\thphase: 0x%x, hstate: 0x%x\n",
5281 	    ldcp->hphase, ldcp->hstate);
5282 
5283 	cmn_err(CE_CONT, "Local handshake params:\n");
5284 	hp = &ldcp->local_hparams;
5285 	vgen_print_hparams(hp);
5286 
5287 	cmn_err(CE_CONT, "Peer handshake params:\n");
5288 	hp = &ldcp->peer_hparams;
5289 	vgen_print_hparams(hp);
5290 }
5291 
5292 /*
5293  * vgen_ldc_queue_data -- Queue data in the LDC.
5294  */
5295 static void
5296 vgen_ldc_queue_data(vgen_ldc_t *ldcp, mblk_t *rhead, mblk_t *rtail)
5297 {
5298 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5299 
5300 	DBG1(vgenp, ldcp, "enter\n");
5301 	/*
5302 	 * If the receive thread is enabled, then the queue
5303 	 * is protected by the soft_lock. After queuing, trigger
5304 	 * the soft interrupt so that the interrupt handler sends these
5305 	 * messages up the stack.
5306 	 *
5307 	 * If the receive thread is not enabled, then the list is
5308 	 * automatically protected by the cblock lock, so no need
5309 	 * to hold any additional locks.
5310 	 */
5311 	if (ldcp->rcv_thread != NULL) {
5312 		mutex_enter(&ldcp->soft_lock);
5313 	}
5314 	if (ldcp->rcv_mhead == NULL) {
5315 		ldcp->rcv_mhead = rhead;
5316 		ldcp->rcv_mtail = rtail;
5317 	} else {
5318 		ldcp->rcv_mtail->b_next = rhead;
5319 		ldcp->rcv_mtail = rtail;
5320 	}
5321 	if (ldcp->rcv_thread != NULL) {
5322 		mutex_exit(&ldcp->soft_lock);
5323 		(void) ddi_intr_trigger_softint(ldcp->soft_handle, NULL);
5324 	}
5325 	DBG1(vgenp, ldcp, "exit\n");
5326 }
5327 
5328 /*
5329  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
5330  * This thread is woken up by the LDC interrupt handler to process
5331  * LDC packets and receive data.
5332  */
5333 static void
5334 vgen_ldc_rcv_worker(void *arg)
5335 {
5336 	callb_cpr_t	cprinfo;
5337 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5338 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5339 
5340 	DBG1(vgenp, ldcp, "enter\n");
5341 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
5342 	    "vnet_rcv_thread");
5343 	mutex_enter(&ldcp->rcv_thr_lock);
5344 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
5345 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
5346 
5347 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
5348 		/*
5349 		 * Wait until the data is received or a stop
5350 		 * request is received.
5351 		 */
5352 		while (!(ldcp->rcv_thr_flags &
5353 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
5354 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5355 		}
5356 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
5357 
5358 		/*
5359 		 * First process the stop request.
5360 		 */
5361 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
5362 			DBG2(vgenp, ldcp, "stopped\n");
5363 			break;
5364 		}
5365 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
5366 		mutex_exit(&ldcp->rcv_thr_lock);
5367 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
5368 		vgen_handle_evt_read(ldcp);
5369 		mutex_enter(&ldcp->rcv_thr_lock);
5370 	}
5371 
5372 	/*
5373 	 * Update the run status and wakeup the thread that
5374 	 * has sent the stop request.
5375 	 */
5376 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
5377 	cv_signal(&ldcp->rcv_thr_cv);
5378 	CALLB_CPR_EXIT(&cprinfo);
5379 	thread_exit();
5380 	DBG1(vgenp, ldcp, "exit\n");
5381 }
5382 
5383 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
5384 static void
5385 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
5386 {
5387 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5388 
5389 	DBG1(vgenp, ldcp, "enter\n");
5390 	/*
5391 	 * Send a stop request by setting the stop flag and
5392 	 * wait until the receive thread stops.
5393 	 */
5394 	mutex_enter(&ldcp->rcv_thr_lock);
5395 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5396 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
5397 		cv_signal(&ldcp->rcv_thr_cv);
5398 		DBG2(vgenp, ldcp, "waiting...");
5399 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5400 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5401 		}
5402 	}
5403 	mutex_exit(&ldcp->rcv_thr_lock);
5404 	ldcp->rcv_thread = NULL;
5405 	DBG1(vgenp, ldcp, "exit\n");
5406 }
5407 
5408 /*
5409  * vgen_ldc_rcv_softintr -- LDC Soft interrupt handler function.
5410  * Its job is to pickup the recieved packets that are queued in the
5411  * LDC and send them up.
5412  *
5413  * NOTE: An interrupt handler is being used to handle the upper
5414  * layer(s) requirement to send up only at interrupt context.
5415  */
5416 /* ARGSUSED */
5417 static uint_t
5418 vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2)
5419 {
5420 	mblk_t *mp;
5421 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
5422 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5423 
5424 	DBG1(vgenp, ldcp, "enter\n");
5425 	DTRACE_PROBE1(vgen_soft_intr, uint64_t, ldcp->ldc_id);
5426 	mutex_enter(&ldcp->soft_lock);
5427 	mp = ldcp->rcv_mhead;
5428 	ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
5429 	mutex_exit(&ldcp->soft_lock);
5430 	if (mp != NULL) {
5431 		vnet_rx(vgenp->vnetp, NULL, mp);
5432 	}
5433 	DBG1(vgenp, ldcp, "exit\n");
5434 	return (DDI_INTR_CLAIMED);
5435 }
5436 
5437 #if DEBUG
5438 
5439 /*
5440  * Print debug messages - set to 0xf to enable all msgs
5441  */
5442 static void
5443 debug_printf(const char *fname, vgen_t *vgenp,
5444     vgen_ldc_t *ldcp, const char *fmt, ...)
5445 {
5446 	char    buf[256];
5447 	char    *bufp = buf;
5448 	va_list ap;
5449 
5450 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5451 		(void) sprintf(bufp, "vnet%d:",
5452 		    ((vnet_t *)(vgenp->vnetp))->instance);
5453 		bufp += strlen(bufp);
5454 	}
5455 	if (ldcp != NULL) {
5456 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5457 		bufp += strlen(bufp);
5458 	}
5459 	(void) sprintf(bufp, "%s: ", fname);
5460 	bufp += strlen(bufp);
5461 
5462 	va_start(ap, fmt);
5463 	(void) vsprintf(bufp, fmt, ap);
5464 	va_end(ap);
5465 
5466 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5467 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5468 		cmn_err(CE_CONT, "%s\n", buf);
5469 	}
5470 }
5471 #endif
5472