xref: /titanic_44/usr/src/uts/sun4v/io/vnet_gen.c (revision 46736d35df047bb400483364f76bfcb08cdcbb25)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <sys/vio_mailbox.h>
51 #include <sys/vio_common.h>
52 #include <sys/vnet_common.h>
53 #include <sys/vnet_mailbox.h>
54 #include <sys/vio_util.h>
55 #include <sys/vnet_gen.h>
56 
57 /*
58  * Implementation of the mac functionality for vnet using the
59  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
60  */
61 
62 /*
63  * Function prototypes.
64  */
65 /* vgen proxy entry points */
66 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
67 	mac_register_t **vgenmacp);
68 int vgen_uninit(void *arg);
69 static int vgen_start(void *arg);
70 static void vgen_stop(void *arg);
71 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
72 static int vgen_multicst(void *arg, boolean_t add,
73 	const uint8_t *mca);
74 static int vgen_promisc(void *arg, boolean_t on);
75 static int vgen_unicst(void *arg, const uint8_t *mca);
76 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
77 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
78 
79 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
80 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
81 void vnet_del_fdb(void *arg, uint8_t *macaddr);
82 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
83 	void *txarg, boolean_t upgrade);
84 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
85 void vnet_del_def_rte(void *arg);
86 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
87 void vnet_tx_update(void *arg);
88 
89 /* vgen internal functions */
90 static void vgen_detach_ports(vgen_t *vgenp);
91 static void vgen_port_detach(vgen_port_t *portp);
92 static void vgen_port_list_insert(vgen_port_t *portp);
93 static void vgen_port_list_remove(vgen_port_t *portp);
94 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
95 	int port_num);
96 static int vgen_mdeg_reg(vgen_t *vgenp);
97 static void vgen_mdeg_unreg(vgen_t *vgenp);
98 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
99 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
100 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
101 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
102 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
103 static void vgen_port_detach_mdeg(vgen_port_t *portp);
104 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
105 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
106 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
107 
108 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
109 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
110 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
111 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
112 static void vgen_init_ports(vgen_t *vgenp);
113 static void vgen_port_init(vgen_port_t *portp);
114 static void vgen_uninit_ports(vgen_t *vgenp);
115 static void vgen_port_uninit(vgen_port_t *portp);
116 static void vgen_init_ldcs(vgen_port_t *portp);
117 static void vgen_uninit_ldcs(vgen_port_t *portp);
118 static int vgen_ldc_init(vgen_ldc_t *ldcp);
119 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
120 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
121 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
122 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
123 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
124 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
125 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
126 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
127 static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
128 static void vgen_reclaim(vgen_ldc_t *ldcp);
129 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
130 static int vgen_num_txpending(vgen_ldc_t *ldcp);
131 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
132 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
133 static void vgen_ldc_watchdog(void *arg);
134 static int vgen_setup_kstats(vgen_ldc_t *ldcp);
135 static void vgen_destroy_kstats(vgen_ldc_t *ldcp);
136 static int vgen_kstat_update(kstat_t *ksp, int rw);
137 
138 /* vgen handshake functions */
139 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
140 static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
141 	uint16_t ver_minor);
142 static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
143 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
144 	boolean_t caller_holds_lock);
145 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
146 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
147 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
148 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
149 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
150 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
151 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
152 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
153 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
154 static void vgen_handshake(vgen_ldc_t *ldcp);
155 static int vgen_handshake_done(vgen_ldc_t *ldcp);
156 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
157 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
158 	vio_msg_tag_t *tagp);
159 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
160 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
161 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
162 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
163 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
165 	mblk_t **headp, mblk_t **tailp);
166 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
167 	uint32_t start, int32_t end, uint8_t pstate);
168 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
169 	mblk_t **headp, mblk_t **tailp);
170 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
172 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
173 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static uint64_t	vgen_macaddr_strtoul(const uint8_t *macaddr);
175 static int vgen_macaddr_ultostr(uint64_t value, uint8_t *macaddr);
176 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
177 static void vgen_hwatchdog(void *arg);
178 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
179 static void vgen_print_hparams(vgen_hparams_t *hp);
180 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
181 
182 /*
183  * The handshake process consists of 5 phases defined below, with VH_PHASE0
184  * being the pre-handshake phase and VH_DONE is the phase to indicate
185  * successful completion of all phases.
186  * Each phase may have one to several handshake states which are required
187  * to complete successfully to move to the next phase.
188  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
189  * more details.
190  */
191 /* handshake phases */
192 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
193 
194 /* handshake states */
195 enum {
196 
197 	VER_INFO_SENT	=	0x1,
198 	VER_ACK_RCVD	=	0x2,
199 	VER_INFO_RCVD	=	0x4,
200 	VER_ACK_SENT	=	0x8,
201 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
202 
203 	ATTR_INFO_SENT	=	0x10,
204 	ATTR_ACK_RCVD	=	0x20,
205 	ATTR_INFO_RCVD	=	0x40,
206 	ATTR_ACK_SENT	=	0x80,
207 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
208 
209 	DRING_INFO_SENT	=	0x100,
210 	DRING_ACK_RCVD	=	0x200,
211 	DRING_INFO_RCVD	=	0x400,
212 	DRING_ACK_SENT	=	0x800,
213 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
214 
215 	RDX_INFO_SENT	=	0x1000,
216 	RDX_ACK_RCVD	=	0x2000,
217 	RDX_INFO_RCVD	=	0x4000,
218 	RDX_ACK_SENT	=	0x8000,
219 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
220 
221 };
222 
223 #define	LDC_LOCK(ldcp)	\
224 				mutex_enter(&((ldcp)->cblock));\
225 				mutex_enter(&((ldcp)->txlock));\
226 				mutex_enter(&((ldcp)->tclock));
227 #define	LDC_UNLOCK(ldcp)	\
228 				mutex_exit(&((ldcp)->tclock));\
229 				mutex_exit(&((ldcp)->txlock));\
230 				mutex_exit(&((ldcp)->cblock));
231 
232 static struct ether_addr etherbroadcastaddr = {
233 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
234 };
235 /*
236  * MIB II broadcast/multicast packets
237  */
238 #define	IS_BROADCAST(ehp) \
239 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
240 #define	IS_MULTICAST(ehp) \
241 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
242 
243 /*
244  * Property names
245  */
246 static char macaddr_propname[] = "mac-address";
247 static char rmacaddr_propname[] = "remote-mac-address";
248 static char channel_propname[] = "channel-endpoint";
249 static char reg_propname[] = "reg";
250 static char port_propname[] = "port";
251 static char swport_propname[] = "switch-port";
252 static char id_propname[] = "id";
253 
254 /* versions supported - in decreasing order */
255 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
256 
257 /* Tunables */
258 uint32_t vgen_hwd_interval = 1000;	/* handshake watchdog freq in msec */
259 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
260 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
261 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
262 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
263 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
264 
265 #ifdef DEBUG
266 /* flags to simulate error conditions for debugging */
267 int vgen_trigger_txtimeout = 0;
268 int vgen_trigger_rxlost = 0;
269 #endif
270 
271 /* MD update matching structure */
272 static md_prop_match_t	vport_prop_match[] = {
273 	{ MDET_PROP_VAL,	"id" },
274 	{ MDET_LIST_END,	NULL }
275 };
276 
277 static mdeg_node_match_t vport_match = { "virtual-device-port",
278 					vport_prop_match };
279 
280 /* template for matching a particular vnet instance */
281 static mdeg_prop_spec_t vgen_prop_template[] = {
282 	{ MDET_PROP_STR,	"name",		"network" },
283 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
284 	{ MDET_LIST_END,	NULL,		NULL }
285 };
286 
287 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
288 
289 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
290 
291 static mac_callbacks_t vgen_m_callbacks = {
292 	0,
293 	vgen_stat,
294 	vgen_start,
295 	vgen_stop,
296 	vgen_promisc,
297 	vgen_multicst,
298 	vgen_unicst,
299 	vgen_tx,
300 	NULL,
301 	NULL,
302 	NULL
303 };
304 
305 /* externs */
306 extern uint32_t vnet_ntxds;
307 extern uint32_t vnet_ldcwd_interval;
308 extern uint32_t vnet_ldcwd_txtimeout;
309 extern uint32_t vnet_ldc_mtu;
310 extern uint32_t vnet_nrbufs;
311 extern int _vnet_dbglevel;
312 extern void _vnetdebug_printf(void *vnetp, const char *fmt, ...);
313 
314 #ifdef DEBUG
315 
316 /*
317  * NOTE: definitions below need to be in sync with those in vnet.c
318  */
319 
320 /*
321  * debug levels:
322  * DBG_LEVEL1:	Function entry/exit tracing
323  * DBG_LEVEL2:	Info messages
324  * DBG_LEVEL3:	Warning messages
325  * DBG_LEVEL4:	Error messages
326  */
327 
328 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
329 	    DBG_LEVEL4 = 0x08 };
330 
331 #define	DBG1(_s)	do {						\
332 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
333 					_vnetdebug_printf _s;		\
334 			    }					\
335 			_NOTE(CONSTCOND) } while (0)
336 
337 #define	DBG2(_s)	do {						\
338 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
339 					_vnetdebug_printf _s;		\
340 			    }					\
341 			_NOTE(CONSTCOND) } while (0)
342 
343 #define	DWARN(_s)	do {						\
344 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
345 					_vnetdebug_printf _s;		\
346 			    }					\
347 			_NOTE(CONSTCOND) } while (0)
348 
349 #define	DERR(_s)	do {						\
350 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
351 					_vnetdebug_printf _s;		\
352 			    }					\
353 			_NOTE(CONSTCOND) } while (0)
354 
355 #else
356 
357 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
358 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
359 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
360 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
361 
362 #endif
363 
364 #ifdef DEBUG
365 
366 /* simulate handshake error conditions for debug */
367 uint32_t vgen_hdbg;
368 #define	HDBG_VERSION	0x1
369 #define	HDBG_TIMEOUT	0x2
370 #define	HDBG_BAD_SID	0x4
371 #define	HDBG_OUT_STATE	0x8
372 
373 #endif
374 
375 
376 
377 /*
378  * vgen_init() is called by an instance of vnet driver to initialize the
379  * corresponding generic proxy transport layer. The arguments passed by vnet
380  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
381  * the mac address of the vnet device, and a pointer to mac_register_t of
382  * the generic transport is returned in the last argument.
383  */
384 int
385 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
386     mac_register_t **vgenmacp)
387 {
388 	vgen_t *vgenp;
389 	mac_register_t *macp;
390 	int instance;
391 
392 	if ((vnetp == NULL) || (vnetdip == NULL))
393 		return (DDI_FAILURE);
394 
395 	instance = ddi_get_instance(vnetdip);
396 
397 	DBG1((vnetp, "vgen_init: enter vnet_instance(%d)\n", instance));
398 
399 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
400 
401 	vgenp->vnetp = vnetp;
402 	vgenp->vnetdip = vnetdip;
403 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
404 
405 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
406 		KMEM_FREE(vgenp);
407 		return (DDI_FAILURE);
408 	}
409 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
410 	macp->m_driver = vgenp;
411 	macp->m_dip = vnetdip;
412 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
413 	macp->m_callbacks = &vgen_m_callbacks;
414 	macp->m_min_sdu = 0;
415 	macp->m_max_sdu = ETHERMTU;
416 	vgenp->macp = macp;
417 
418 	/* allocate multicast table */
419 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
420 	    sizeof (struct ether_addr), KM_SLEEP);
421 	vgenp->mccount = 0;
422 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
423 
424 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
425 
426 	/* register with MD event generator */
427 	if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
428 		mutex_destroy(&vgenp->lock);
429 		kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
430 		    sizeof (struct ether_addr));
431 		mac_free(vgenp->macp);
432 		KMEM_FREE(vgenp);
433 		return (DDI_FAILURE);
434 	}
435 
436 	/* register macp of this vgen_t with vnet */
437 	*vgenmacp = vgenp->macp;
438 
439 	DBG1((vnetp, "vgen_init: exit vnet_instance(%d)\n", instance));
440 	return (DDI_SUCCESS);
441 }
442 
443 /*
444  * Called by vnet to undo the initializations done by vgen_init().
445  * The handle provided by generic transport during vgen_init() is the argument.
446  */
447 int
448 vgen_uninit(void *arg)
449 {
450 	vgen_t	*vgenp = (vgen_t *)arg;
451 	void	*vnetp;
452 	int instance;
453 	vio_mblk_pool_t *rp, *nrp;
454 
455 	if (vgenp == NULL) {
456 		return (DDI_FAILURE);
457 	}
458 
459 	instance = ddi_get_instance(vgenp->vnetdip);
460 	vnetp = vgenp->vnetp;
461 
462 	DBG1((vnetp, "vgen_uninit: enter vnet_instance(%d)\n", instance));
463 
464 	/* unregister with MD event generator */
465 	vgen_mdeg_unreg(vgenp);
466 
467 	mutex_enter(&vgenp->lock);
468 
469 	/* detach all ports from the device */
470 	vgen_detach_ports(vgenp);
471 
472 	/*
473 	 * free any pending rx mblk pools,
474 	 * that couldn't be freed previously during channel detach.
475 	 */
476 	rp = vgenp->rmp;
477 	while (rp != NULL) {
478 		nrp = vgenp->rmp = rp->nextp;
479 		if (vio_destroy_mblks(rp)) {
480 			vgenp->rmp = rp;
481 			mutex_exit(&vgenp->lock);
482 			return (DDI_FAILURE);
483 		}
484 		rp = nrp;
485 	}
486 
487 	/* free multicast table */
488 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
489 
490 	mac_free(vgenp->macp);
491 
492 	mutex_exit(&vgenp->lock);
493 
494 	mutex_destroy(&vgenp->lock);
495 
496 	KMEM_FREE(vgenp);
497 
498 	DBG1((vnetp, "vgen_uninit: exit vnet_instance(%d)\n", instance));
499 
500 	return (DDI_SUCCESS);
501 }
502 
503 /* enable transmit/receive for the device */
504 int
505 vgen_start(void *arg)
506 {
507 	vgen_t		*vgenp = (vgen_t *)arg;
508 
509 	DBG1((vgenp->vnetp, "vgen_start: enter\n"));
510 
511 	mutex_enter(&vgenp->lock);
512 	vgen_init_ports(vgenp);
513 	vgenp->flags |= VGEN_STARTED;
514 	mutex_exit(&vgenp->lock);
515 
516 	DBG1((vgenp->vnetp, "vgen_start: exit\n"));
517 	return (DDI_SUCCESS);
518 }
519 
520 /* stop transmit/receive */
521 void
522 vgen_stop(void *arg)
523 {
524 	vgen_t		*vgenp = (vgen_t *)arg;
525 
526 	DBG1((vgenp->vnetp, "vgen_stop: enter\n"));
527 
528 	mutex_enter(&vgenp->lock);
529 	vgen_uninit_ports(vgenp);
530 	vgenp->flags &= ~(VGEN_STARTED);
531 	mutex_exit(&vgenp->lock);
532 
533 	DBG1((vgenp->vnetp, "vgen_stop: exit\n"));
534 }
535 
536 /* vgen transmit function */
537 static mblk_t *
538 vgen_tx(void *arg, mblk_t *mp)
539 {
540 	vgen_port_t *portp;
541 	int status;
542 
543 	portp = (vgen_port_t *)arg;
544 	status = vgen_portsend(portp, mp);
545 	if (status != VGEN_SUCCESS) {
546 		/* failure */
547 		return (mp);
548 	}
549 	/* success */
550 	return (NULL);
551 }
552 
553 /* transmit packets over the given port */
554 static int
555 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
556 {
557 	vgen_ldclist_t	*ldclp;
558 	vgen_ldc_t *ldcp;
559 	int status;
560 
561 	ldclp = &portp->ldclist;
562 	READ_ENTER(&ldclp->rwlock);
563 	/*
564 	 * NOTE: for now, we will assume we have a single channel.
565 	 */
566 	if (ldclp->headp == NULL) {
567 		RW_EXIT(&ldclp->rwlock);
568 		return (VGEN_FAILURE);
569 	}
570 	ldcp = ldclp->headp;
571 
572 	if (ldcp->need_resched) {
573 		/* out of tx resources, see vgen_ldcsend() for details. */
574 		mutex_enter(&ldcp->txlock);
575 		ldcp->statsp->tx_no_desc++;
576 		mutex_exit(&ldcp->txlock);
577 
578 		RW_EXIT(&ldclp->rwlock);
579 		return (VGEN_FAILURE);
580 	}
581 
582 	status  = vgen_ldcsend(ldcp, mp);
583 	RW_EXIT(&ldclp->rwlock);
584 
585 	if (status != VGEN_TX_SUCCESS)
586 		return (VGEN_FAILURE);
587 
588 	return (VGEN_SUCCESS);
589 }
590 
591 /* channel transmit function */
592 static int
593 vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
594 {
595 	void		*vnetp;
596 	size_t		size;
597 	int		rv = 0;
598 	uint64_t	tbuf_ix;
599 	vgen_private_desc_t	*tbufp;
600 	vgen_private_desc_t	*ntbufp;
601 	vnet_public_desc_t	*txdp;
602 	vio_dring_entry_hdr_t		*hdrp;
603 	vgen_stats_t		*statsp;
604 	struct ether_header	*ehp;
605 	boolean_t	is_bcast = B_FALSE;
606 	boolean_t	is_mcast = B_FALSE;
607 	size_t		mblksz;
608 	caddr_t		dst;
609 	mblk_t		*bp;
610 	ldc_status_t	istatus;
611 
612 	vnetp = LDC_TO_VNET(ldcp);
613 	statsp = ldcp->statsp;
614 	size = msgsize(mp);
615 
616 	DBG1((vnetp, "vgen_ldcsend: enter ldcid(%lx)\n", ldcp->ldc_id));
617 
618 	mutex_enter(&ldcp->txlock);
619 
620 	/* drop the packet if ldc is not up or handshake is not done */
621 	if (ldcp->ldc_status != LDC_UP) {
622 		DWARN((vnetp,
623 		    "vgen_ldcsend: id(%lx) status(%d), dropping packet\n",
624 		    ldcp->ldc_id, ldcp->ldc_status));
625 		/* retry ldc_up() if needed */
626 		if (ldcp->flags & CHANNEL_STARTED)
627 			(void) ldc_up(ldcp->ldc_handle);
628 		goto vgen_tx_exit;
629 	}
630 
631 	if (ldcp->hphase != VH_DONE) {
632 		DWARN((vnetp,
633 		    "vgen_ldcsend: id(%lx) hphase(%x), dropping packet\n",
634 		    ldcp->ldc_id, ldcp->hphase));
635 		goto vgen_tx_exit;
636 	}
637 
638 	if (size > (size_t)ETHERMAX) {
639 		DWARN((vnetp, "vgen_ldcsend: id(%lx) invalid size(%d)\n",
640 		    ldcp->ldc_id, size));
641 		goto vgen_tx_exit;
642 	}
643 
644 	/*
645 	 * allocate a descriptor
646 	 */
647 	tbufp = ldcp->next_tbufp;
648 	ntbufp = NEXTTBUF(ldcp, tbufp);
649 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
650 
651 		mutex_enter(&ldcp->tclock);
652 		if (ntbufp == ldcp->cur_tbufp) {
653 			ldcp->need_resched = B_TRUE;
654 			mutex_exit(&ldcp->tclock);
655 
656 			statsp->tx_no_desc++;
657 			mutex_exit(&ldcp->txlock);
658 
659 			return (VGEN_TX_NORESOURCES);
660 		}
661 		mutex_exit(&ldcp->tclock);
662 	}
663 
664 	if (size < ETHERMIN)
665 		size = ETHERMIN;
666 
667 	/* copy data into pre-allocated transmit buffer */
668 	dst = tbufp->datap + VNET_IPALIGN;
669 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
670 		mblksz = MBLKL(bp);
671 		bcopy(bp->b_rptr, dst, mblksz);
672 		dst += mblksz;
673 	}
674 
675 	tbuf_ix = tbufp - ldcp->tbufp;
676 
677 	ehp = (struct ether_header *)tbufp->datap;
678 	is_bcast = IS_BROADCAST(ehp);
679 	is_mcast = IS_MULTICAST(ehp);
680 
681 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
682 	tbufp->datalen = size;
683 
684 	/* initialize the corresponding public descriptor (txd) */
685 	txdp = tbufp->descp;
686 	hdrp = &txdp->hdr;
687 	txdp->nbytes = size;
688 	txdp->ncookies = tbufp->ncookies;
689 	bcopy((tbufp->memcookie), (txdp->memcookie),
690 		tbufp->ncookies * sizeof (ldc_mem_cookie_t));
691 	hdrp->dstate = VIO_DESC_READY;
692 
693 	/* send dring datamsg to the peer */
694 	if (ldcp->resched_peer) {
695 		rv = vgen_send_dring_data(ldcp, (uint32_t)tbuf_ix, -1);
696 		if (rv != 0) {
697 			/* vgen_send_dring_data() error: drop the packet */
698 			DWARN((vnetp,
699 			    "vgen_ldcsend: vgen_send_dring_data():  failed: "
700 			    "id(%lx) rv(%d) len (%d)\n",
701 			    ldcp->ldc_id, rv, size));
702 			tbufp->flags = VGEN_PRIV_DESC_FREE;	/* free tbuf */
703 			hdrp->dstate = VIO_DESC_FREE;	/* free txd */
704 			hdrp->ack = B_FALSE;
705 			statsp->oerrors++;
706 			goto vgen_tx_exit;
707 		}
708 		ldcp->resched_peer = B_FALSE;
709 	}
710 
711 	/* update next available tbuf in the ring */
712 	ldcp->next_tbufp = ntbufp;
713 
714 	/* update tx index */
715 	INCR_TXI(ldcp->next_txi, ldcp);
716 
717 	/* update stats */
718 	statsp->opackets++;
719 	statsp->obytes += size;
720 	if (is_bcast)
721 		statsp->brdcstxmt++;
722 	else if (is_mcast)
723 		statsp->multixmt++;
724 
725 vgen_tx_exit:
726 	mutex_exit(&ldcp->txlock);
727 
728 	if (rv == ECONNRESET) {
729 		/*
730 		 * Check if either callback thread or another tx thread is
731 		 * already running. Calling mutex_enter() will result in a
732 		 * deadlock if the other thread already holds cblock and is
733 		 * blocked in vnet_modify_fdb() (which is called from
734 		 * vgen_handle_evt_reset()) waiting for write access on rwlock,
735 		 * as this transmit thread already holds that lock as a reader
736 		 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
737 		 */
738 		if (mutex_tryenter(&ldcp->cblock)) {
739 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
740 				DWARN((vnetp,
741 				    "vgen_ldcsend: ldc_status err id(%lx)\n"));
742 			} else {
743 				ldcp->ldc_status = istatus;
744 			}
745 			if (ldcp->ldc_status != LDC_UP) {
746 				/*
747 				 * Second arg is TRUE, as we know that
748 				 * the caller of this function - vnet_m_tx(),
749 				 * already holds fdb-rwlock as a reader.
750 				 */
751 				vgen_handle_evt_reset(ldcp, B_TRUE);
752 			}
753 			mutex_exit(&ldcp->cblock);
754 		}
755 	}
756 
757 	DBG1((vnetp, "vgen_ldcsend: exit: ldcid (%lx)\n", ldcp->ldc_id));
758 
759 	freemsg(mp);
760 	return (VGEN_TX_SUCCESS);
761 }
762 
763 /* enable/disable a multicast address */
764 int
765 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
766 {
767 	vgen_t			*vgenp;
768 	vnet_mcast_msg_t	mcastmsg;
769 	vio_msg_tag_t		*tagp;
770 	vgen_port_t		*portp;
771 	vgen_portlist_t		*plistp;
772 	vgen_ldc_t		*ldcp;
773 	vgen_ldclist_t		*ldclp;
774 	void			*vnetp;
775 	struct ether_addr	*addrp;
776 	int			rv = DDI_FAILURE;
777 	uint32_t		i;
778 
779 	vgenp = (vgen_t *)arg;
780 	vnetp = vgenp->vnetp;
781 	addrp = (struct ether_addr *)mca;
782 	tagp = &mcastmsg.tag;
783 	bzero(&mcastmsg, sizeof (mcastmsg));
784 
785 	mutex_enter(&vgenp->lock);
786 
787 	plistp = &(vgenp->vgenports);
788 
789 	READ_ENTER(&plistp->rwlock);
790 
791 	portp = vgenp->vsw_portp;
792 	if (portp == NULL) {
793 		RW_EXIT(&plistp->rwlock);
794 		mutex_exit(&vgenp->lock);
795 		return (rv);
796 	}
797 	ldclp = &portp->ldclist;
798 
799 	READ_ENTER(&ldclp->rwlock);
800 
801 	ldcp = ldclp->headp;
802 	if (ldcp == NULL)
803 		goto vgen_mcast_exit;
804 
805 	mutex_enter(&ldcp->cblock);
806 
807 	if (ldcp->hphase == VH_DONE) {
808 		/*
809 		 * If handshake is done, send a msg to vsw to add/remove
810 		 * the multicast address.
811 		 */
812 		tagp->vio_msgtype = VIO_TYPE_CTRL;
813 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
814 		tagp->vio_subtype_env = VNET_MCAST_INFO;
815 		tagp->vio_sid = ldcp->local_sid;
816 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
817 		mcastmsg.set = add;
818 		mcastmsg.count = 1;
819 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
820 		    B_FALSE) != VGEN_SUCCESS) {
821 			DWARN((vnetp, "vgen_mutlicst: vgen_sendmsg failed"
822 			    "id (%lx)\n", ldcp->ldc_id));
823 			mutex_exit(&ldcp->cblock);
824 			goto vgen_mcast_exit;
825 		}
826 	} else {
827 		/* set the flag to send a msg to vsw after handshake is done */
828 		ldcp->need_mcast_sync = B_TRUE;
829 	}
830 
831 	mutex_exit(&ldcp->cblock);
832 
833 	if (add) {
834 
835 		/* expand multicast table if necessary */
836 		if (vgenp->mccount >= vgenp->mcsize) {
837 			struct ether_addr	*newtab;
838 			uint32_t		newsize;
839 
840 
841 			newsize = vgenp->mcsize * 2;
842 
843 			newtab = kmem_zalloc(newsize *
844 			    sizeof (struct ether_addr), KM_NOSLEEP);
845 			if (newtab == NULL)
846 				goto vgen_mcast_exit;
847 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
848 			    sizeof (struct ether_addr));
849 			kmem_free(vgenp->mctab,
850 			    vgenp->mcsize * sizeof (struct ether_addr));
851 
852 			vgenp->mctab = newtab;
853 			vgenp->mcsize = newsize;
854 		}
855 
856 		/* add address to the table */
857 		vgenp->mctab[vgenp->mccount++] = *addrp;
858 
859 	} else {
860 
861 		/* delete address from the table */
862 		for (i = 0; i < vgenp->mccount; i++) {
863 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
864 
865 				/*
866 				 * If there's more than one address in this
867 				 * table, delete the unwanted one by moving
868 				 * the last one in the list over top of it;
869 				 * otherwise, just remove it.
870 				 */
871 				if (vgenp->mccount > 1) {
872 					vgenp->mctab[i] =
873 						vgenp->mctab[vgenp->mccount-1];
874 				}
875 				vgenp->mccount--;
876 				break;
877 			}
878 		}
879 	}
880 
881 	rv = DDI_SUCCESS;
882 
883 vgen_mcast_exit:
884 	RW_EXIT(&ldclp->rwlock);
885 	RW_EXIT(&plistp->rwlock);
886 
887 	mutex_exit(&vgenp->lock);
888 	return (rv);
889 }
890 
891 /* set or clear promiscuous mode on the device */
892 static int
893 vgen_promisc(void *arg, boolean_t on)
894 {
895 	_NOTE(ARGUNUSED(arg, on))
896 	return (DDI_SUCCESS);
897 }
898 
899 /* set the unicast mac address of the device */
900 static int
901 vgen_unicst(void *arg, const uint8_t *mca)
902 {
903 	_NOTE(ARGUNUSED(arg, mca))
904 	return (DDI_SUCCESS);
905 }
906 
907 /* get device statistics */
908 int
909 vgen_stat(void *arg, uint_t stat, uint64_t *val)
910 {
911 	vgen_t		*vgenp = (vgen_t *)arg;
912 	vgen_port_t	*portp;
913 	vgen_portlist_t	*plistp;
914 
915 	*val = 0;
916 
917 	plistp = &(vgenp->vgenports);
918 	READ_ENTER(&plistp->rwlock);
919 
920 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
921 		*val += vgen_port_stat(portp, stat);
922 	}
923 
924 	RW_EXIT(&plistp->rwlock);
925 
926 	return (0);
927 }
928 
929 static void
930 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
931 {
932 	 _NOTE(ARGUNUSED(arg, wq, mp))
933 }
934 
935 /* vgen internal functions */
936 /* detach all ports from the device */
937 static void
938 vgen_detach_ports(vgen_t *vgenp)
939 {
940 	vgen_port_t	*portp;
941 	vgen_portlist_t	*plistp;
942 
943 	plistp = &(vgenp->vgenports);
944 	WRITE_ENTER(&plistp->rwlock);
945 
946 	while ((portp = plistp->headp) != NULL) {
947 		vgen_port_detach(portp);
948 	}
949 
950 	RW_EXIT(&plistp->rwlock);
951 }
952 
953 /*
954  * detach the given port.
955  */
956 static void
957 vgen_port_detach(vgen_port_t *portp)
958 {
959 	vgen_t		*vgenp;
960 	vgen_ldclist_t	*ldclp;
961 	int		port_num;
962 
963 	vgenp = portp->vgenp;
964 	port_num = portp->port_num;
965 
966 	DBG1((vgenp->vnetp,
967 	    "vgen_port_detach: enter: port_num(%d)\n", port_num));
968 
969 	/* remove it from port list */
970 	vgen_port_list_remove(portp);
971 
972 	/* detach channels from this port */
973 	ldclp = &portp->ldclist;
974 	WRITE_ENTER(&ldclp->rwlock);
975 	while (ldclp->headp) {
976 		vgen_ldc_detach(ldclp->headp);
977 	}
978 	RW_EXIT(&ldclp->rwlock);
979 
980 	if (vgenp->vsw_portp == portp) {
981 		vgenp->vsw_portp = NULL;
982 	}
983 	KMEM_FREE(portp);
984 
985 	DBG1((vgenp->vnetp,
986 	    "vgen_port_detach: exit: port_num(%d)\n", port_num));
987 }
988 
989 /* add a port to port list */
990 static void
991 vgen_port_list_insert(vgen_port_t *portp)
992 {
993 	vgen_portlist_t *plistp;
994 	vgen_t *vgenp;
995 
996 	vgenp = portp->vgenp;
997 	plistp = &(vgenp->vgenports);
998 
999 	if (plistp->headp == NULL) {
1000 		plistp->headp = portp;
1001 	} else {
1002 		plistp->tailp->nextp = portp;
1003 	}
1004 	plistp->tailp = portp;
1005 	portp->nextp = NULL;
1006 }
1007 
1008 /* remove a port from port list */
1009 static void
1010 vgen_port_list_remove(vgen_port_t *portp)
1011 {
1012 	vgen_port_t *prevp;
1013 	vgen_port_t *nextp;
1014 	vgen_portlist_t *plistp;
1015 	vgen_t *vgenp;
1016 
1017 	vgenp = portp->vgenp;
1018 
1019 	plistp = &(vgenp->vgenports);
1020 
1021 	if (plistp->headp == NULL)
1022 		return;
1023 
1024 	if (portp == plistp->headp) {
1025 		plistp->headp = portp->nextp;
1026 		if (portp == plistp->tailp)
1027 			plistp->tailp = plistp->headp;
1028 	} else {
1029 		for (prevp = plistp->headp; ((nextp = prevp->nextp) != NULL) &&
1030 		    (nextp != portp); prevp = nextp);
1031 		if (nextp == portp) {
1032 			prevp->nextp = portp->nextp;
1033 		}
1034 		if (portp == plistp->tailp)
1035 			plistp->tailp = prevp;
1036 	}
1037 }
1038 
1039 /* lookup a port in the list based on port_num */
1040 static vgen_port_t *
1041 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1042 {
1043 	vgen_port_t *portp = NULL;
1044 
1045 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1046 		if (portp->port_num == port_num) {
1047 			break;
1048 		}
1049 	}
1050 
1051 	return (portp);
1052 }
1053 
1054 /* enable ports for transmit/receive */
1055 static void
1056 vgen_init_ports(vgen_t *vgenp)
1057 {
1058 	vgen_port_t	*portp;
1059 	vgen_portlist_t	*plistp;
1060 
1061 	plistp = &(vgenp->vgenports);
1062 	READ_ENTER(&plistp->rwlock);
1063 
1064 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1065 		vgen_port_init(portp);
1066 	}
1067 
1068 	RW_EXIT(&plistp->rwlock);
1069 }
1070 
1071 static void
1072 vgen_port_init(vgen_port_t *portp)
1073 {
1074 	vgen_t *vgenp;
1075 
1076 	vgenp = portp->vgenp;
1077 	/*
1078 	 * Create fdb entry in vnet, corresponding to the mac
1079 	 * address of this port. Note that the port specified
1080 	 * is vsw-port. This is done so that vsw-port acts
1081 	 * as the route to reach this macaddr, until the
1082 	 * channel for this port comes up (LDC_UP) and
1083 	 * handshake is done successfully.
1084 	 * eg, if the peer is OBP-vnet, it may not bring the
1085 	 * channel up for this port and may communicate via
1086 	 * vsw to reach this port.
1087 	 * Later, when Solaris-vnet comes up at the other end
1088 	 * of the channel for this port and brings up the channel,
1089 	 * it is an indication that peer vnet is capable of
1090 	 * distributed switching, so the direct route through this
1091 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1092 	 */
1093 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1094 	    vgen_tx, vgenp->vsw_portp);
1095 
1096 	if (portp == vgenp->vsw_portp) {
1097 		/*
1098 		 * create the default route entry in vnet's fdb.
1099 		 * This is the entry used by vnet to reach
1100 		 * unknown destinations, which basically goes
1101 		 * through vsw on domain0 and out through the
1102 		 * physical device bound to vsw.
1103 		 */
1104 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1105 	}
1106 
1107 	/* Bring up the channels of this port */
1108 	vgen_init_ldcs(portp);
1109 }
1110 
1111 /* disable transmit/receive on ports */
1112 static void
1113 vgen_uninit_ports(vgen_t *vgenp)
1114 {
1115 	vgen_port_t	*portp;
1116 	vgen_portlist_t	*plistp;
1117 
1118 	plistp = &(vgenp->vgenports);
1119 	READ_ENTER(&plistp->rwlock);
1120 
1121 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1122 		vgen_port_uninit(portp);
1123 	}
1124 
1125 	RW_EXIT(&plistp->rwlock);
1126 }
1127 
1128 static void
1129 vgen_port_uninit(vgen_port_t *portp)
1130 {
1131 	vgen_t *vgenp;
1132 
1133 	vgenp = portp->vgenp;
1134 
1135 	vgen_uninit_ldcs(portp);
1136 	/* delete the entry in vnet's fdb for this port */
1137 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1138 	if (portp == vgenp->vsw_portp) {
1139 		/*
1140 		 * if this is vsw-port, then delete the default
1141 		 * route entry in vnet's fdb.
1142 		 */
1143 		vnet_del_def_rte(vgenp->vnetp);
1144 	}
1145 }
1146 
1147 /* register with MD event generator */
1148 static int
1149 vgen_mdeg_reg(vgen_t *vgenp)
1150 {
1151 	mdeg_prop_spec_t	*pspecp;
1152 	mdeg_node_spec_t	*parentp;
1153 	uint_t			templatesz;
1154 	int			rv;
1155 	mdeg_handle_t		hdl;
1156 	int			i;
1157 	void			*vnetp = vgenp->vnetp;
1158 
1159 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1160 			DDI_PROP_DONTPASS, reg_propname, -1);
1161 	if (i == -1) {
1162 		return (DDI_FAILURE);
1163 	}
1164 	templatesz = sizeof (vgen_prop_template);
1165 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1166 	if (pspecp == NULL) {
1167 		return (DDI_FAILURE);
1168 	}
1169 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1170 	if (parentp == NULL) {
1171 		kmem_free(pspecp, templatesz);
1172 		return (DDI_FAILURE);
1173 	}
1174 
1175 	bcopy(vgen_prop_template, pspecp, templatesz);
1176 
1177 	/*
1178 	 * NOTE: The instance here refers to the value of "reg" property and
1179 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1180 	 */
1181 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1182 
1183 	parentp->namep = "virtual-device";
1184 	parentp->specp = pspecp;
1185 
1186 	/* save parentp in vgen_t */
1187 	vgenp->mdeg_parentp = parentp;
1188 
1189 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1190 	if (rv != MDEG_SUCCESS) {
1191 		DERR((vnetp, "vgen_mdeg_reg: mdeg_register failed\n"));
1192 		KMEM_FREE(parentp);
1193 		kmem_free(pspecp, templatesz);
1194 		vgenp->mdeg_parentp = NULL;
1195 		return (DDI_FAILURE);
1196 	}
1197 
1198 	/* save mdeg handle in vgen_t */
1199 	vgenp->mdeg_hdl = hdl;
1200 
1201 	return (DDI_SUCCESS);
1202 }
1203 
1204 /* unregister with MD event generator */
1205 static void
1206 vgen_mdeg_unreg(vgen_t *vgenp)
1207 {
1208 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1209 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1210 	KMEM_FREE(vgenp->mdeg_parentp);
1211 	vgenp->mdeg_parentp = NULL;
1212 	vgenp->mdeg_hdl = NULL;
1213 }
1214 
1215 /* callback function registered with MD event generator */
1216 static int
1217 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1218 {
1219 	int idx;
1220 	int vsw_idx = -1;
1221 	uint64_t val;
1222 	vgen_t *vgenp;
1223 
1224 	if ((resp == NULL) || (cb_argp == NULL)) {
1225 		return (MDEG_FAILURE);
1226 	}
1227 
1228 	vgenp = (vgen_t *)cb_argp;
1229 	DBG1((vgenp->vnetp, "vgen_mdeg_cb: enter\n"));
1230 
1231 	mutex_enter(&vgenp->lock);
1232 
1233 	DBG1((vgenp->vnetp,
1234 	    "vgen_mdeg_cb: ports: removed(%x), added(%x), updated(%x)\n",
1235 	    resp->removed.nelem, resp->added.nelem, resp->match_curr.nelem));
1236 
1237 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1238 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1239 		    resp->removed.mdep[idx]);
1240 	}
1241 
1242 	if (vgenp->vsw_portp == NULL) {
1243 		/*
1244 		 * find vsw_port and add it first, because other ports need
1245 		 * this when adding fdb entry (see vgen_port_init()).
1246 		 */
1247 		for (idx = 0; idx < resp->added.nelem; idx++) {
1248 			if (!(md_get_prop_val(resp->added.mdp,
1249 			    resp->added.mdep[idx], swport_propname, &val))) {
1250 				if (val == 0) {
1251 					/*
1252 					 * This port is connected to the
1253 					 * vsw on dom0.
1254 					 */
1255 					vsw_idx = idx;
1256 					(void) vgen_add_port(vgenp,
1257 					    resp->added.mdp,
1258 					    resp->added.mdep[idx]);
1259 					break;
1260 				}
1261 			}
1262 		}
1263 		if (vsw_idx == -1) {
1264 			DWARN((vgenp->vnetp, "vgen_mdeg_cb: "
1265 			    "can't find vsw_port\n"));
1266 			return (MDEG_FAILURE);
1267 		}
1268 	}
1269 
1270 	for (idx = 0; idx < resp->added.nelem; idx++) {
1271 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1272 			continue;
1273 		(void) vgen_add_port(vgenp, resp->added.mdp,
1274 		    resp->added.mdep[idx]);
1275 	}
1276 
1277 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1278 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1279 		    resp->match_curr.mdep[idx],
1280 		    resp->match_prev.mdp,
1281 		    resp->match_prev.mdep[idx]);
1282 	}
1283 
1284 	mutex_exit(&vgenp->lock);
1285 	DBG1((vgenp->vnetp, "vgen_mdeg_cb: exit\n"));
1286 	return (MDEG_SUCCESS);
1287 }
1288 
1289 /* add a new port to the device */
1290 static int
1291 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1292 {
1293 	uint64_t	port_num;
1294 	uint64_t	*ldc_ids;
1295 	uint64_t	macaddr;
1296 	uint64_t	val;
1297 	int		num_ldcs;
1298 	int		vsw_port = B_FALSE;
1299 	int		i;
1300 	int		addrsz;
1301 	int		num_nodes = 0;
1302 	int		listsz = 0;
1303 	mde_cookie_t	*listp = NULL;
1304 	uint8_t		*addrp;
1305 	struct ether_addr	ea;
1306 
1307 	/* read "id" property to get the port number */
1308 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1309 		DWARN((vgenp->vnetp,
1310 		    "vgen_add_port: prop(%s) not found\n", id_propname));
1311 		return (DDI_FAILURE);
1312 	}
1313 
1314 	/*
1315 	 * Find the channel endpoint node(s) under this port node.
1316 	 */
1317 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1318 		DWARN((vgenp->vnetp,
1319 		    "vgen_add_port: invalid number of nodes found (%d)",
1320 		    num_nodes));
1321 		return (DDI_FAILURE);
1322 	}
1323 
1324 	/* allocate space for node list */
1325 	listsz = num_nodes * sizeof (mde_cookie_t);
1326 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1327 	if (listp == NULL)
1328 		return (DDI_FAILURE);
1329 
1330 	num_ldcs = md_scan_dag(mdp, mdex,
1331 		md_find_name(mdp, channel_propname),
1332 		md_find_name(mdp, "fwd"), listp);
1333 
1334 	if (num_ldcs <= 0) {
1335 		DWARN((vgenp->vnetp,
1336 		    "vgen_add_port: can't find %s nodes", channel_propname));
1337 		kmem_free(listp, listsz);
1338 		return (DDI_FAILURE);
1339 	}
1340 
1341 	DBG2((vgenp->vnetp, "vgen_add_port: num_ldcs %d", num_ldcs));
1342 
1343 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1344 	if (ldc_ids == NULL) {
1345 		kmem_free(listp, listsz);
1346 		return (DDI_FAILURE);
1347 	}
1348 
1349 	for (i = 0; i < num_ldcs; i++) {
1350 		/* read channel ids */
1351 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1352 			DWARN((vgenp->vnetp,
1353 			    "vgen_add_port: prop(%s) not found\n",
1354 			    id_propname));
1355 			kmem_free(listp, listsz);
1356 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1357 			return (DDI_FAILURE);
1358 		}
1359 		DBG2((vgenp->vnetp, "vgen_add_port: ldc_id 0x%llx",
1360 		    ldc_ids[i]));
1361 	}
1362 
1363 	kmem_free(listp, listsz);
1364 
1365 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1366 	    &addrsz)) {
1367 		DWARN((vgenp->vnetp,
1368 		    "vgen_add_port: prop(%s) not found\n", rmacaddr_propname));
1369 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1370 		return (DDI_FAILURE);
1371 	}
1372 
1373 	if (addrsz < ETHERADDRL) {
1374 		DWARN((vgenp->vnetp,
1375 		    "vgen_add_port: invalid address size (%d)\n", addrsz));
1376 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1377 		return (DDI_FAILURE);
1378 	}
1379 
1380 	macaddr = *((uint64_t *)addrp);
1381 
1382 	DBG2((vgenp->vnetp, "vgen_add_port: remote mac address 0x%llx\n",
1383 	    macaddr));
1384 
1385 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1386 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1387 		macaddr >>= 8;
1388 	}
1389 
1390 	if (vgenp->vsw_portp == NULL) {
1391 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1392 			if (val == 0) {
1393 				/* This port is connected to the vsw on dom0 */
1394 				vsw_port = B_TRUE;
1395 			}
1396 		}
1397 	}
1398 	(void) vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1399 	    &ea, vsw_port);
1400 
1401 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1402 
1403 	return (DDI_SUCCESS);
1404 }
1405 
1406 /* remove a port from the device */
1407 static int
1408 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1409 {
1410 	uint64_t	port_num;
1411 	vgen_port_t	*portp;
1412 	vgen_portlist_t	*plistp;
1413 
1414 	/* read "id" property to get the port number */
1415 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1416 		DWARN((vgenp->vnetp,
1417 		    "vgen_remove_port: prop(%s) not found\n", id_propname));
1418 		return (DDI_FAILURE);
1419 	}
1420 
1421 	plistp = &(vgenp->vgenports);
1422 
1423 	WRITE_ENTER(&plistp->rwlock);
1424 	portp = vgen_port_lookup(plistp, (int)port_num);
1425 	if (portp == NULL) {
1426 		DWARN((vgenp->vnetp, "vgen_remove_port: can't find port(%lx)\n",
1427 		    port_num));
1428 		RW_EXIT(&plistp->rwlock);
1429 		return (DDI_FAILURE);
1430 	}
1431 
1432 	vgen_port_detach_mdeg(portp);
1433 	RW_EXIT(&plistp->rwlock);
1434 
1435 	return (DDI_SUCCESS);
1436 }
1437 
1438 /* attach a port to the device based on mdeg data */
1439 static int
1440 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1441 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1442 {
1443 	vgen_port_t		*portp;
1444 	vgen_portlist_t		*plistp;
1445 	int			i;
1446 
1447 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1448 	if (portp == NULL) {
1449 		return (DDI_FAILURE);
1450 	}
1451 	portp->vgenp = vgenp;
1452 	portp->port_num = port_num;
1453 
1454 	DBG1((vgenp->vnetp,
1455 	    "vgen_port_attach_mdeg: port_num(%d)\n", portp->port_num));
1456 
1457 	portp->ldclist.num_ldcs = 0;
1458 	portp->ldclist.headp = NULL;
1459 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1460 
1461 	ether_copy(macaddr, &portp->macaddr);
1462 	for (i = 0; i < num_ids; i++) {
1463 		DBG2((vgenp->vnetp, "vgen_port_attach_mdeg: ldcid (%lx)\n",
1464 		    ldcids[i]));
1465 		(void) vgen_ldc_attach(portp, ldcids[i]);
1466 	}
1467 
1468 	/* link it into the list of ports */
1469 	plistp = &(vgenp->vgenports);
1470 	WRITE_ENTER(&plistp->rwlock);
1471 	vgen_port_list_insert(portp);
1472 	RW_EXIT(&plistp->rwlock);
1473 
1474 	/* This port is connected to the vsw on domain0 */
1475 	if (vsw_port)
1476 		vgenp->vsw_portp = portp;
1477 
1478 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1479 		vgen_port_init(portp);
1480 	}
1481 
1482 	DBG1((vgenp->vnetp,
1483 	    "vgen_port_attach_mdeg: exit: port_num(%d)\n", portp->port_num));
1484 	return (DDI_SUCCESS);
1485 }
1486 
1487 /* detach a port from the device based on mdeg data */
1488 static void
1489 vgen_port_detach_mdeg(vgen_port_t *portp)
1490 {
1491 	vgen_t *vgenp = portp->vgenp;
1492 
1493 	DBG1((vgenp->vnetp,
1494 	    "vgen_port_detach_mdeg: enter: port_num(%d)\n", portp->port_num));
1495 	/* stop the port if needed */
1496 	if (vgenp->flags & VGEN_STARTED) {
1497 		vgen_port_uninit(portp);
1498 	}
1499 	vgen_port_detach(portp);
1500 
1501 	DBG1((vgenp->vnetp,
1502 	    "vgen_port_detach_mdeg: exit: port_num(%d)\n", portp->port_num));
1503 }
1504 
1505 static int
1506 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1507 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1508 {
1509 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1510 
1511 	/* NOTE: TBD */
1512 	return (DDI_SUCCESS);
1513 }
1514 
1515 static uint64_t
1516 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1517 {
1518 	vgen_ldclist_t	*ldclp;
1519 	vgen_ldc_t *ldcp;
1520 	uint64_t	val;
1521 
1522 	val = 0;
1523 	ldclp = &portp->ldclist;
1524 
1525 	READ_ENTER(&ldclp->rwlock);
1526 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1527 		val += vgen_ldc_stat(ldcp, stat);
1528 	}
1529 	RW_EXIT(&ldclp->rwlock);
1530 
1531 	return (val);
1532 }
1533 
1534 /* attach the channel corresponding to the given ldc_id to the port */
1535 static int
1536 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1537 {
1538 	vgen_t 		*vgenp;
1539 	vgen_ldclist_t	*ldclp;
1540 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1541 	ldc_attr_t 	attr;
1542 	int 		status;
1543 	ldc_status_t	istatus;
1544 	enum		{AST_init = 0x0, AST_ldc_alloc = 0x1,
1545 			AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1546 			AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1547 			AST_create_rxmblks = 0x20}
1548 			attach_state;
1549 
1550 	attach_state = AST_init;
1551 	vgenp = portp->vgenp;
1552 	ldclp = &portp->ldclist;
1553 
1554 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1555 	if (ldcp == NULL) {
1556 		goto ldc_attach_failed;
1557 	}
1558 	ldcp->ldc_id = ldc_id;
1559 	ldcp->portp = portp;
1560 
1561 	attach_state |= AST_ldc_alloc;
1562 
1563 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1564 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1565 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1566 
1567 	attach_state |= AST_mutex_init;
1568 
1569 	attr.devclass = LDC_DEV_NT;
1570 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1571 	attr.mode = LDC_MODE_UNRELIABLE;
1572 	attr.mtu = vnet_ldc_mtu;
1573 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1574 	if (status != 0) {
1575 		DWARN((vgenp->vnetp, "ldc_init failed, id (%lx) rv (%d)\n",
1576 		    ldc_id, status));
1577 		goto ldc_attach_failed;
1578 	}
1579 	attach_state |= AST_ldc_init;
1580 
1581 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1582 	if (status != 0) {
1583 		DWARN((vgenp->vnetp,
1584 		    "ldc_reg_callback failed, id (%lx) rv (%d)\n",
1585 		    ldc_id, status));
1586 		goto ldc_attach_failed;
1587 	}
1588 	attach_state |= AST_ldc_reg_cb;
1589 
1590 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1591 	ASSERT(istatus == LDC_INIT);
1592 	ldcp->ldc_status = istatus;
1593 
1594 	/* allocate transmit resources */
1595 	status = vgen_alloc_tx_ring(ldcp);
1596 	if (status != 0) {
1597 		goto ldc_attach_failed;
1598 	}
1599 	attach_state |= AST_alloc_tx_ring;
1600 
1601 	/* allocate receive resources */
1602 	ldcp->num_rbufs = vnet_nrbufs;
1603 	ldcp->rmp = NULL;
1604 	status = vio_create_mblks(ldcp->num_rbufs, VGEN_DBLK_SZ,
1605 		&(ldcp->rmp));
1606 	if (status != 0) {
1607 		goto ldc_attach_failed;
1608 	}
1609 	attach_state |= AST_create_rxmblks;
1610 
1611 	/* Setup kstats for the channel */
1612 	status = vgen_setup_kstats(ldcp);
1613 	if (status != VGEN_SUCCESS) {
1614 		goto ldc_attach_failed;
1615 	}
1616 
1617 	/* initialize vgen_versions supported */
1618 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
1619 
1620 	/* link it into the list of channels for this port */
1621 	WRITE_ENTER(&ldclp->rwlock);
1622 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
1623 	ldcp->nextp = *prev_ldcp;
1624 	*prev_ldcp = ldcp;
1625 	ldclp->num_ldcs++;
1626 	RW_EXIT(&ldclp->rwlock);
1627 
1628 	ldcp->flags |= CHANNEL_ATTACHED;
1629 	return (DDI_SUCCESS);
1630 
1631 ldc_attach_failed:
1632 	if (attach_state & AST_create_rxmblks) {
1633 		(void) vio_destroy_mblks(ldcp->rmp);
1634 	}
1635 	if (attach_state & AST_alloc_tx_ring) {
1636 		vgen_free_tx_ring(ldcp);
1637 	}
1638 	if (attach_state & AST_ldc_reg_cb) {
1639 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1640 	}
1641 	if (attach_state & AST_ldc_init) {
1642 		(void) ldc_fini(ldcp->ldc_handle);
1643 	}
1644 	if (attach_state & AST_mutex_init) {
1645 		mutex_destroy(&ldcp->tclock);
1646 		mutex_destroy(&ldcp->txlock);
1647 		mutex_destroy(&ldcp->cblock);
1648 	}
1649 	if (attach_state & AST_ldc_alloc) {
1650 		KMEM_FREE(ldcp);
1651 	}
1652 	return (DDI_FAILURE);
1653 }
1654 
1655 /* detach a channel from the port */
1656 static void
1657 vgen_ldc_detach(vgen_ldc_t *ldcp)
1658 {
1659 	vgen_port_t	*portp;
1660 	vgen_t 		*vgenp;
1661 	vgen_ldc_t 	*pldcp;
1662 	vgen_ldc_t	**prev_ldcp;
1663 	vgen_ldclist_t	*ldclp;
1664 
1665 	portp = ldcp->portp;
1666 	vgenp = portp->vgenp;
1667 	ldclp = &portp->ldclist;
1668 
1669 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
1670 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
1671 		if (pldcp == ldcp) {
1672 			break;
1673 		}
1674 	}
1675 
1676 	if (pldcp == NULL) {
1677 		/* invalid ldcp? */
1678 		return;
1679 	}
1680 
1681 	if (ldcp->ldc_status != LDC_INIT) {
1682 		DWARN((vgenp->vnetp,
1683 		    "vgen_ldc_detach: ldc_status is not INIT id(%lx)\n",
1684 			    ldcp->ldc_id));
1685 	}
1686 
1687 	if (ldcp->flags & CHANNEL_ATTACHED) {
1688 		ldcp->flags &= ~(CHANNEL_ATTACHED);
1689 
1690 		vgen_destroy_kstats(ldcp);
1691 
1692 		/* free receive resources */
1693 		if (vio_destroy_mblks(ldcp->rmp)) {
1694 			/*
1695 			 * if we cannot reclaim all mblks, put this
1696 			 * on the list of pools to be reclaimed when the
1697 			 * device gets detached (see vgen_uninit()).
1698 			 */
1699 			ldcp->rmp->nextp =  vgenp->rmp;
1700 			vgenp->rmp = ldcp->rmp;
1701 		}
1702 
1703 		/* free transmit resources */
1704 		vgen_free_tx_ring(ldcp);
1705 
1706 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1707 		(void) ldc_fini(ldcp->ldc_handle);
1708 		mutex_destroy(&ldcp->tclock);
1709 		mutex_destroy(&ldcp->txlock);
1710 		mutex_destroy(&ldcp->cblock);
1711 
1712 		/* unlink it from the list */
1713 		*prev_ldcp = ldcp->nextp;
1714 		ldclp->num_ldcs--;
1715 		KMEM_FREE(ldcp);
1716 	}
1717 }
1718 
1719 /*
1720  * This function allocates transmit resources for the channel.
1721  * The resources consist of a transmit descriptor ring and an associated
1722  * transmit buffer ring.
1723  */
1724 static int
1725 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
1726 {
1727 	void *tbufp;
1728 	ldc_mem_info_t minfo;
1729 	uint32_t txdsize;
1730 	uint32_t tbufsize;
1731 	int status;
1732 	void *vnetp = LDC_TO_VNET(ldcp);
1733 
1734 	ldcp->num_txds = vnet_ntxds;
1735 	txdsize = sizeof (vnet_public_desc_t);
1736 	tbufsize = sizeof (vgen_private_desc_t);
1737 
1738 	/* allocate transmit buffer ring */
1739 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
1740 	if (tbufp == NULL) {
1741 		return (DDI_FAILURE);
1742 	}
1743 
1744 	/* create transmit descriptor ring */
1745 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
1746 	    &ldcp->tx_dhandle);
1747 	if (status) {
1748 		DWARN((vnetp, "vgen_alloc_tx_ring: ldc_mem_dring_create() "
1749 		    "failed, id(%lx)\n", ldcp->ldc_id));
1750 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1751 		return (DDI_FAILURE);
1752 	}
1753 
1754 	/* get the addr of descripror ring */
1755 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
1756 	if (status) {
1757 		DWARN((vnetp, "vgen_alloc_tx_ring: ldc_mem_dring_info() "
1758 		    "failed, id(%lx)\n", ldcp->ldc_id));
1759 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1760 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1761 		ldcp->tbufp = NULL;
1762 		return (DDI_FAILURE);
1763 	}
1764 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
1765 	ldcp->tbufp = tbufp;
1766 
1767 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
1768 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
1769 
1770 	return (DDI_SUCCESS);
1771 }
1772 
1773 /* Free transmit resources for the channel */
1774 static void
1775 vgen_free_tx_ring(vgen_ldc_t *ldcp)
1776 {
1777 	int tbufsize = sizeof (vgen_private_desc_t);
1778 
1779 	/* free transmit descriptor ring */
1780 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1781 
1782 	/* free transmit buffer ring */
1783 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
1784 	ldcp->txdp = ldcp->txdendp = NULL;
1785 	ldcp->tbufp = ldcp->tbufendp = NULL;
1786 }
1787 
1788 /* enable transmit/receive on the channels for the port */
1789 static void
1790 vgen_init_ldcs(vgen_port_t *portp)
1791 {
1792 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1793 	vgen_ldc_t	*ldcp;
1794 
1795 	READ_ENTER(&ldclp->rwlock);
1796 	ldcp =  ldclp->headp;
1797 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1798 		(void) vgen_ldc_init(ldcp);
1799 	}
1800 	RW_EXIT(&ldclp->rwlock);
1801 }
1802 
1803 /* stop transmit/receive on the channels for the port */
1804 static void
1805 vgen_uninit_ldcs(vgen_port_t *portp)
1806 {
1807 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1808 	vgen_ldc_t	*ldcp;
1809 
1810 	READ_ENTER(&ldclp->rwlock);
1811 	ldcp =  ldclp->headp;
1812 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1813 		vgen_ldc_uninit(ldcp);
1814 	}
1815 	RW_EXIT(&ldclp->rwlock);
1816 }
1817 
1818 /* enable transmit/receive on the channel */
1819 static int
1820 vgen_ldc_init(vgen_ldc_t *ldcp)
1821 {
1822 	void *vnetp = LDC_TO_VNET(ldcp);
1823 	ldc_status_t	istatus;
1824 	int		rv;
1825 	enum		{ ST_init = 0x0, ST_ldc_open = 0x1,
1826 			    ST_init_tbufs = 0x2, ST_cb_enable = 0x4
1827 			    }
1828 			init_state;
1829 	uint32_t	retries = 0;
1830 
1831 	init_state = ST_init;
1832 
1833 	LDC_LOCK(ldcp);
1834 
1835 	rv = ldc_open(ldcp->ldc_handle);
1836 	if (rv != 0) {
1837 		DWARN((vnetp,
1838 		    "vgen_ldcinit: ldc_open failed: id<%lx> rv(%d)\n",
1839 		    ldcp->ldc_id, rv));
1840 		goto ldcinit_failed;
1841 	}
1842 	init_state |= ST_ldc_open;
1843 
1844 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1845 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
1846 		DWARN((vnetp,
1847 		    "vgen_ldcinit: id (%lx) status(%d) is not OPEN/READY\n",
1848 		    ldcp->ldc_id, istatus));
1849 		goto ldcinit_failed;
1850 	}
1851 	ldcp->ldc_status = istatus;
1852 
1853 	rv = vgen_init_tbufs(ldcp);
1854 	if (rv != 0) {
1855 		DWARN((vnetp,
1856 		    "vgen_ldcinit: vgen_init_tbufs() failed: id(%lx)\n",
1857 		    ldcp->ldc_id));
1858 		goto ldcinit_failed;
1859 	}
1860 	init_state |= ST_init_tbufs;
1861 
1862 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
1863 	if (rv != 0) {
1864 		DWARN((vnetp, "vgen_ldc_init: ldc_set_cb_mode failed: id(%lx) "
1865 		    "rv(%d)\n", ldcp->ldc_id, rv));
1866 		goto ldcinit_failed;
1867 	}
1868 
1869 	init_state |= ST_cb_enable;
1870 
1871 	do {
1872 		rv = ldc_up(ldcp->ldc_handle);
1873 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
1874 			DBG2((vnetp,
1875 			    "vgen_ldcinit: ldc_up err id(%lx) rv(%d)\n",
1876 			    ldcp->ldc_id, rv));
1877 			drv_usecwait(VGEN_LDC_UP_DELAY);
1878 		}
1879 		if (retries++ >= vgen_ldcup_retries)
1880 			break;
1881 	} while (rv == EWOULDBLOCK);
1882 
1883 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1884 	if (istatus == LDC_UP) {
1885 		DWARN((vnetp, "vgen_ldc_init: id(%lx) status(%d) is UP\n",
1886 		    ldcp->ldc_id, istatus));
1887 	}
1888 
1889 	ldcp->ldc_status = istatus;
1890 
1891 	/* initialize transmit watchdog timeout */
1892 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
1893 	    drv_usectohz(vnet_ldcwd_interval * 1000));
1894 
1895 	ldcp->hphase = -1;
1896 	ldcp->flags |= CHANNEL_STARTED;
1897 
1898 	/* if channel is already UP - start handshake */
1899 	if (istatus == LDC_UP) {
1900 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1901 		if (ldcp->portp != vgenp->vsw_portp) {
1902 			/*
1903 			 * modify fdb entry to use this port as the
1904 			 * channel is up, instead of going through the
1905 			 * vsw-port (see comments in vgen_port_init())
1906 			 */
1907 			vnet_modify_fdb(vnetp,
1908 			    (uint8_t *)&ldcp->portp->macaddr,
1909 			    vgen_tx, ldcp->portp, B_FALSE);
1910 		}
1911 
1912 		/* Initialize local session id */
1913 		ldcp->local_sid = ddi_get_lbolt();
1914 
1915 		/* clear peer session id */
1916 		ldcp->peer_sid = 0;
1917 		ldcp->hretries = 0;
1918 
1919 		/* Initiate Handshake process with peer ldc endpoint */
1920 		vgen_reset_hphase(ldcp);
1921 
1922 		mutex_exit(&ldcp->tclock);
1923 		mutex_exit(&ldcp->txlock);
1924 		vgen_handshake(vh_nextphase(ldcp));
1925 		mutex_exit(&ldcp->cblock);
1926 	} else {
1927 		LDC_UNLOCK(ldcp);
1928 	}
1929 
1930 	return (DDI_SUCCESS);
1931 
1932 ldcinit_failed:
1933 	if (init_state & ST_cb_enable) {
1934 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1935 	}
1936 	if (init_state & ST_init_tbufs) {
1937 		vgen_uninit_tbufs(ldcp);
1938 	}
1939 	if (init_state & ST_ldc_open) {
1940 		(void) ldc_close(ldcp->ldc_handle);
1941 	}
1942 	LDC_UNLOCK(ldcp);
1943 	return (DDI_FAILURE);
1944 }
1945 
1946 /* stop transmit/receive on the channel */
1947 static void
1948 vgen_ldc_uninit(vgen_ldc_t *ldcp)
1949 {
1950 	void *vnetp = LDC_TO_VNET(ldcp);
1951 	int	rv;
1952 
1953 	DBG1((vnetp, "vgen_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id));
1954 	LDC_LOCK(ldcp);
1955 
1956 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
1957 		LDC_UNLOCK(ldcp);
1958 		DWARN((vnetp, "vgen_ldc_uninit: id(%lx) CHANNEL_STARTED"
1959 		    " flag is not set\n", ldcp->ldc_id));
1960 		return;
1961 	}
1962 
1963 	/* disable further callbacks */
1964 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1965 	if (rv != 0) {
1966 		DWARN((vnetp, "vgen_ldc_uninit: id (%lx) "
1967 		    "ldc_set_cb_mode failed\n", ldcp->ldc_id));
1968 	}
1969 
1970 	/*
1971 	 * clear handshake done bit and wait for pending tx and cb to finish.
1972 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
1973 	 */
1974 	ldcp->hphase &= ~(VH_DONE);
1975 	LDC_UNLOCK(ldcp);
1976 
1977 	/* cancel handshake watchdog timeout */
1978 	if (ldcp->htid) {
1979 		(void) untimeout(ldcp->htid);
1980 		ldcp->htid = 0;
1981 	}
1982 
1983 	/* cancel transmit watchdog timeout */
1984 	if (ldcp->wd_tid) {
1985 		(void) untimeout(ldcp->wd_tid);
1986 		ldcp->wd_tid = 0;
1987 	}
1988 
1989 	drv_usecwait(1000);
1990 
1991 	/* acquire locks again; any pending transmits and callbacks are done */
1992 	LDC_LOCK(ldcp);
1993 
1994 	vgen_reset_hphase(ldcp);
1995 
1996 	vgen_uninit_tbufs(ldcp);
1997 
1998 	rv = ldc_close(ldcp->ldc_handle);
1999 	if (rv != 0) {
2000 		DWARN((vnetp, "vgen_ldcuninit: ldc_close err id(%lx)\n",
2001 		    ldcp->ldc_id));
2002 	}
2003 	ldcp->ldc_status = LDC_INIT;
2004 	ldcp->flags &= ~(CHANNEL_STARTED);
2005 
2006 	LDC_UNLOCK(ldcp);
2007 
2008 	DBG1((vnetp, "vgen_ldc_uninit: exit: id(%lx)\n", ldcp->ldc_id));
2009 }
2010 
2011 /* Initialize the transmit buffer ring for the channel */
2012 static int
2013 vgen_init_tbufs(vgen_ldc_t *ldcp)
2014 {
2015 	vgen_private_desc_t	*tbufp;
2016 	vnet_public_desc_t	*txdp;
2017 	vio_dring_entry_hdr_t		*hdrp;
2018 	int 			i;
2019 	int 			rv;
2020 	caddr_t			datap = NULL;
2021 	int			ci;
2022 	uint32_t		ncookies;
2023 
2024 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2025 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2026 
2027 	datap = kmem_zalloc(ldcp->num_txds * VGEN_DBLK_SZ, KM_SLEEP);
2028 	ldcp->tx_datap = datap;
2029 
2030 	/*
2031 	 * for each private descriptor, allocate a ldc mem_handle which is
2032 	 * required to map the data during transmit, set the flags
2033 	 * to free (available for use by transmit routine).
2034 	 */
2035 
2036 	for (i = 0; i < ldcp->num_txds; i++) {
2037 
2038 		tbufp = &(ldcp->tbufp[i]);
2039 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2040 			&(tbufp->memhandle));
2041 		if (rv) {
2042 			tbufp->memhandle = 0;
2043 			goto init_tbufs_failed;
2044 		}
2045 
2046 		/*
2047 		 * bind ldc memhandle to the corresponding transmit buffer.
2048 		 */
2049 		ci = ncookies = 0;
2050 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2051 		    (caddr_t)datap, VGEN_DBLK_SZ, LDC_SHADOW_MAP,
2052 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2053 		if (rv != 0) {
2054 			goto init_tbufs_failed;
2055 		}
2056 
2057 		/*
2058 		 * successful in binding the handle to tx data buffer.
2059 		 * set datap in the private descr to this buffer.
2060 		 */
2061 		tbufp->datap = datap;
2062 
2063 		if ((ncookies == 0) ||
2064 			(ncookies > MAX_COOKIES)) {
2065 			goto init_tbufs_failed;
2066 		}
2067 
2068 		for (ci = 1; ci < ncookies; ci++) {
2069 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2070 				&(tbufp->memcookie[ci]));
2071 			if (rv != 0) {
2072 				goto init_tbufs_failed;
2073 			}
2074 		}
2075 
2076 		tbufp->ncookies = ncookies;
2077 		datap += VGEN_DBLK_SZ;
2078 
2079 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2080 		txdp = &(ldcp->txdp[i]);
2081 		hdrp = &txdp->hdr;
2082 		hdrp->dstate = VIO_DESC_FREE;
2083 		hdrp->ack = B_FALSE;
2084 		tbufp->descp = txdp;
2085 
2086 	}
2087 
2088 	/* reset tbuf walking pointers */
2089 	ldcp->next_tbufp = ldcp->tbufp;
2090 	ldcp->cur_tbufp = ldcp->tbufp;
2091 
2092 	/* initialize tx seqnum and index */
2093 	ldcp->next_txseq = VNET_ISS;
2094 	ldcp->next_txi = 0;
2095 
2096 	ldcp->resched_peer = B_TRUE;
2097 
2098 	return (DDI_SUCCESS);
2099 
2100 init_tbufs_failed:;
2101 	vgen_uninit_tbufs(ldcp);
2102 	return (DDI_FAILURE);
2103 }
2104 
2105 /* Uninitialize transmit buffer ring for the channel */
2106 static void
2107 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2108 {
2109 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2110 	int 			i;
2111 
2112 	/* for each tbuf (priv_desc), free ldc mem_handle */
2113 	for (i = 0; i < ldcp->num_txds; i++) {
2114 
2115 		tbufp = &(ldcp->tbufp[i]);
2116 
2117 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2118 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2119 			tbufp->datap = NULL;
2120 		}
2121 		if (tbufp->memhandle) {
2122 			(void) ldc_mem_free_handle(tbufp->memhandle);
2123 			tbufp->memhandle = 0;
2124 		}
2125 	}
2126 
2127 	if (ldcp->tx_datap) {
2128 		/* prealloc'd tx data buffer */
2129 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_DBLK_SZ);
2130 		ldcp->tx_datap = NULL;
2131 	}
2132 
2133 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2134 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2135 }
2136 
2137 /* clobber tx descriptor ring */
2138 static void
2139 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2140 {
2141 	vnet_public_desc_t	*txdp;
2142 	vgen_private_desc_t	*tbufp;
2143 	vio_dring_entry_hdr_t		*hdrp;
2144 	void *vnetp = LDC_TO_VNET(ldcp);
2145 	int i;
2146 #ifdef DEBUG
2147 	int ndone = 0;
2148 #endif
2149 
2150 	for (i = 0; i < ldcp->num_txds; i++) {
2151 
2152 		tbufp = &(ldcp->tbufp[i]);
2153 		txdp = tbufp->descp;
2154 		hdrp = &txdp->hdr;
2155 
2156 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2157 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2158 #ifdef DEBUG
2159 			if (hdrp->dstate == VIO_DESC_DONE)
2160 				ndone++;
2161 #endif
2162 			hdrp->dstate = VIO_DESC_FREE;
2163 			hdrp->ack = B_FALSE;
2164 		}
2165 	}
2166 	/* reset tbuf walking pointers */
2167 	ldcp->next_tbufp = ldcp->tbufp;
2168 	ldcp->cur_tbufp = ldcp->tbufp;
2169 
2170 	/* reset tx seqnum and index */
2171 	ldcp->next_txseq = VNET_ISS;
2172 	ldcp->next_txi = 0;
2173 
2174 	ldcp->resched_peer = B_TRUE;
2175 
2176 #ifdef DEBUG
2177 	DBG2((vnetp,
2178 	    "vgen_clobber_tbufs: id(0x%lx) num descrs done (%d)\n",
2179 	    ldcp->ldc_id, ndone));
2180 #endif
2181 }
2182 
2183 /* clobber receive descriptor ring */
2184 static void
2185 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2186 {
2187 	ldcp->rx_dhandle = 0;
2188 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2189 	ldcp->rxdp = NULL;
2190 	ldcp->next_rxi = 0;
2191 	ldcp->num_rxds = 0;
2192 	ldcp->next_rxseq = VNET_ISS;
2193 }
2194 
2195 /* initialize receive descriptor ring */
2196 static int
2197 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2198 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2199 {
2200 	int rv;
2201 	ldc_mem_info_t minfo;
2202 
2203 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2204 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2205 	if (rv != 0) {
2206 		return (DDI_FAILURE);
2207 	}
2208 
2209 	/*
2210 	 * sucessfully mapped, now try to
2211 	 * get info about the mapped dring
2212 	 */
2213 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2214 	if (rv != 0) {
2215 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2216 		return (DDI_FAILURE);
2217 	}
2218 
2219 	/*
2220 	 * save ring address, number of descriptors.
2221 	 */
2222 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2223 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2224 	ldcp->num_rxdcookies = ncookies;
2225 	ldcp->num_rxds = num_desc;
2226 	ldcp->next_rxi = 0;
2227 	ldcp->next_rxseq = VNET_ISS;
2228 
2229 	return (DDI_SUCCESS);
2230 }
2231 
2232 /* get channel statistics */
2233 static uint64_t
2234 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2235 {
2236 	vgen_stats_t *statsp;
2237 	uint64_t val;
2238 
2239 	val = 0;
2240 	statsp = ldcp->statsp;
2241 	switch (stat) {
2242 
2243 	case MAC_STAT_MULTIRCV:
2244 		val = statsp->multircv;
2245 		break;
2246 
2247 	case MAC_STAT_BRDCSTRCV:
2248 		val = statsp->brdcstrcv;
2249 		break;
2250 
2251 	case MAC_STAT_MULTIXMT:
2252 		val = statsp->multixmt;
2253 		break;
2254 
2255 	case MAC_STAT_BRDCSTXMT:
2256 		val = statsp->brdcstxmt;
2257 		break;
2258 
2259 	case MAC_STAT_NORCVBUF:
2260 		val = statsp->norcvbuf;
2261 		break;
2262 
2263 	case MAC_STAT_IERRORS:
2264 		val = statsp->ierrors;
2265 		break;
2266 
2267 	case MAC_STAT_NOXMTBUF:
2268 		val = statsp->noxmtbuf;
2269 		break;
2270 
2271 	case MAC_STAT_OERRORS:
2272 		val = statsp->oerrors;
2273 		break;
2274 
2275 	case MAC_STAT_COLLISIONS:
2276 		break;
2277 
2278 	case MAC_STAT_RBYTES:
2279 		val = statsp->rbytes;
2280 		break;
2281 
2282 	case MAC_STAT_IPACKETS:
2283 		val = statsp->ipackets;
2284 		break;
2285 
2286 	case MAC_STAT_OBYTES:
2287 		val = statsp->obytes;
2288 		break;
2289 
2290 	case MAC_STAT_OPACKETS:
2291 		val = statsp->opackets;
2292 		break;
2293 
2294 	/* stats not relevant to ldc, return 0 */
2295 	case MAC_STAT_IFSPEED:
2296 	case ETHER_STAT_ALIGN_ERRORS:
2297 	case ETHER_STAT_FCS_ERRORS:
2298 	case ETHER_STAT_FIRST_COLLISIONS:
2299 	case ETHER_STAT_MULTI_COLLISIONS:
2300 	case ETHER_STAT_DEFER_XMTS:
2301 	case ETHER_STAT_TX_LATE_COLLISIONS:
2302 	case ETHER_STAT_EX_COLLISIONS:
2303 	case ETHER_STAT_MACXMT_ERRORS:
2304 	case ETHER_STAT_CARRIER_ERRORS:
2305 	case ETHER_STAT_TOOLONG_ERRORS:
2306 	case ETHER_STAT_XCVR_ADDR:
2307 	case ETHER_STAT_XCVR_ID:
2308 	case ETHER_STAT_XCVR_INUSE:
2309 	case ETHER_STAT_CAP_1000FDX:
2310 	case ETHER_STAT_CAP_1000HDX:
2311 	case ETHER_STAT_CAP_100FDX:
2312 	case ETHER_STAT_CAP_100HDX:
2313 	case ETHER_STAT_CAP_10FDX:
2314 	case ETHER_STAT_CAP_10HDX:
2315 	case ETHER_STAT_CAP_ASMPAUSE:
2316 	case ETHER_STAT_CAP_PAUSE:
2317 	case ETHER_STAT_CAP_AUTONEG:
2318 	case ETHER_STAT_ADV_CAP_1000FDX:
2319 	case ETHER_STAT_ADV_CAP_1000HDX:
2320 	case ETHER_STAT_ADV_CAP_100FDX:
2321 	case ETHER_STAT_ADV_CAP_100HDX:
2322 	case ETHER_STAT_ADV_CAP_10FDX:
2323 	case ETHER_STAT_ADV_CAP_10HDX:
2324 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2325 	case ETHER_STAT_ADV_CAP_PAUSE:
2326 	case ETHER_STAT_ADV_CAP_AUTONEG:
2327 	case ETHER_STAT_LP_CAP_1000FDX:
2328 	case ETHER_STAT_LP_CAP_1000HDX:
2329 	case ETHER_STAT_LP_CAP_100FDX:
2330 	case ETHER_STAT_LP_CAP_100HDX:
2331 	case ETHER_STAT_LP_CAP_10FDX:
2332 	case ETHER_STAT_LP_CAP_10HDX:
2333 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2334 	case ETHER_STAT_LP_CAP_PAUSE:
2335 	case ETHER_STAT_LP_CAP_AUTONEG:
2336 	case ETHER_STAT_LINK_ASMPAUSE:
2337 	case ETHER_STAT_LINK_PAUSE:
2338 	case ETHER_STAT_LINK_AUTONEG:
2339 	case ETHER_STAT_LINK_DUPLEX:
2340 	default:
2341 		val = 0;
2342 		break;
2343 
2344 	}
2345 	return (val);
2346 }
2347 
2348 /*
2349  * LDC channel is UP, start handshake process with peer.
2350  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2351  * function is being called from transmit routine, otherwise B_FALSE.
2352  */
2353 static void
2354 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2355 {
2356 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2357 	void 	*vnetp = LDC_TO_VNET(ldcp);
2358 
2359 	DBG1((vnetp, "vgen_handle_evt_up: enter: id(%lx)\n", ldcp->ldc_id));
2360 
2361 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2362 
2363 	if (ldcp->portp != vgenp->vsw_portp) {
2364 		/*
2365 		 * modify fdb entry to use this port as the
2366 		 * channel is up, instead of going through the
2367 		 * vsw-port (see comments in vgen_port_init())
2368 		 */
2369 		vnet_modify_fdb(vnetp, (uint8_t *)&ldcp->portp->macaddr,
2370 		    vgen_tx, ldcp->portp, flag);
2371 	}
2372 
2373 	/* Initialize local session id */
2374 	ldcp->local_sid = ddi_get_lbolt();
2375 
2376 	/* clear peer session id */
2377 	ldcp->peer_sid = 0;
2378 	ldcp->hretries = 0;
2379 
2380 	if (ldcp->hphase != VH_PHASE0) {
2381 		vgen_handshake_reset(ldcp);
2382 	}
2383 
2384 	/* Initiate Handshake process with peer ldc endpoint */
2385 	vgen_handshake(vh_nextphase(ldcp));
2386 
2387 	DBG1((vnetp, "vgen_handle_evt_up: exit: id(%lx)\n", ldcp->ldc_id));
2388 }
2389 
2390 /*
2391  * LDC channel is Reset, terminate connection with peer and try to
2392  * bring the channel up again.
2393  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2394  * function is being called from transmit routine, otherwise B_FALSE.
2395  */
2396 static void
2397 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2398 {
2399 	ldc_status_t istatus;
2400 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2401 	void	*vnetp = LDC_TO_VNET(ldcp);
2402 	int	rv;
2403 
2404 	DBG1((vnetp, "vgen_handle_evt_reset: enter: id(%lx)\n", ldcp->ldc_id));
2405 
2406 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2407 
2408 	if ((ldcp->portp != vgenp->vsw_portp) &&
2409 		(vgenp->vsw_portp != NULL)) {
2410 		/*
2411 		 * modify fdb entry to use vsw-port  as the
2412 		 * channel is reset and we don't have a direct
2413 		 * link to the destination (see comments
2414 		 * in vgen_port_init()).
2415 		 */
2416 		vnet_modify_fdb(vnetp, (uint8_t *)&ldcp->portp->macaddr,
2417 		    vgen_tx, vgenp->vsw_portp, flag);
2418 	}
2419 
2420 	if (ldcp->hphase != VH_PHASE0) {
2421 		vgen_handshake_reset(ldcp);
2422 	}
2423 
2424 	/* try to bring the channel up */
2425 	rv = ldc_up(ldcp->ldc_handle);
2426 	if (rv != 0) {
2427 		DWARN((vnetp,
2428 		    "vgen_handle_evt_reset: ldc_up err id(%lx) rv(%d)\n",
2429 		    ldcp->ldc_id, rv));
2430 	}
2431 
2432 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2433 		DWARN((vnetp,
2434 		    "vgen_handle_evt_reset: ldc_status err id(%lx)\n"));
2435 	} else {
2436 		ldcp->ldc_status = istatus;
2437 	}
2438 
2439 	/* if channel is already UP - restart handshake */
2440 	if (ldcp->ldc_status == LDC_UP) {
2441 		vgen_handle_evt_up(ldcp, flag);
2442 	}
2443 
2444 	DBG1((vnetp, "vgen_handle_evt_reset: exit: id(%lx)\n", ldcp->ldc_id));
2445 }
2446 
2447 /* Interrupt handler for the channel */
2448 static uint_t
2449 vgen_ldc_cb(uint64_t event, caddr_t arg)
2450 {
2451 	_NOTE(ARGUNUSED(event))
2452 	vgen_ldc_t	*ldcp;
2453 	void 		*vnetp;
2454 	vgen_t		*vgenp;
2455 	size_t		msglen;
2456 	ldc_status_t 	istatus;
2457 	uint64_t	ldcmsg[7];
2458 	int 		rv = 0;
2459 	vio_msg_tag_t	*tagp;
2460 	mblk_t		*mp = NULL;
2461 	mblk_t		*bp = NULL;
2462 	mblk_t		*bpt = NULL;
2463 	mblk_t		*headp = NULL;
2464 	mblk_t		*tailp = NULL;
2465 	vgen_stats_t	*statsp;
2466 
2467 	ldcp = (vgen_ldc_t *)arg;
2468 	vgenp = LDC_TO_VGEN(ldcp);
2469 	vnetp = LDC_TO_VNET(ldcp);
2470 	statsp = ldcp->statsp;
2471 
2472 	DBG1((vnetp, "vgen_ldc_cb enter: ldcid (%lx)\n", ldcp->ldc_id));
2473 
2474 	mutex_enter(&ldcp->cblock);
2475 	statsp->callbacks++;
2476 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2477 		DWARN((vnetp, "vgen_ldc_cb: id(%lx), status(%d) is LDC_INIT\n",
2478 		    ldcp->ldc_id, ldcp->ldc_status));
2479 		mutex_exit(&ldcp->cblock);
2480 		return (LDC_SUCCESS);
2481 	}
2482 
2483 	/*
2484 	 * NOTE: not using switch() as event could be triggered by
2485 	 * a state change and a read request. Also the ordering	of the
2486 	 * check for the event types is deliberate.
2487 	 */
2488 	if (event & LDC_EVT_UP) {
2489 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2490 			DWARN((vnetp,
2491 			    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2492 		} else {
2493 			ldcp->ldc_status = istatus;
2494 		}
2495 		ASSERT(ldcp->ldc_status == LDC_UP);
2496 		DWARN((vnetp,
2497 		    "vgen_ldc_cb: id(%lx) event(%lx) UP, status(%d)\n",
2498 		    ldcp->ldc_id, event, ldcp->ldc_status));
2499 
2500 		vgen_handle_evt_up(ldcp, B_FALSE);
2501 
2502 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2503 	}
2504 
2505 	if (event & LDC_EVT_READ) {
2506 		DBG2((vnetp,
2507 		    "vgen_ldc_cb: id(%lx) event(%lx) READ, status(%d)\n",
2508 		    ldcp->ldc_id, event, ldcp->ldc_status));
2509 
2510 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2511 		goto vgen_ldccb_rcv;
2512 	}
2513 
2514 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2515 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2516 			DWARN((vnetp,
2517 			    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2518 		} else {
2519 			ldcp->ldc_status = istatus;
2520 		}
2521 		DWARN((vnetp,
2522 		    "vgen_ldc_cb: id(%lx) event(%lx) RESET/DOWN, status(%d)\n",
2523 		    ldcp->ldc_id, event, ldcp->ldc_status));
2524 
2525 		vgen_handle_evt_reset(ldcp, B_FALSE);
2526 	}
2527 
2528 	mutex_exit(&ldcp->cblock);
2529 	goto vgen_ldccb_exit;
2530 
2531 vgen_ldccb_rcv:
2532 
2533 	/* if event is LDC_EVT_READ, receive all packets */
2534 	do {
2535 		msglen = sizeof (ldcmsg);
2536 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
2537 
2538 		if (rv != 0) {
2539 			DWARN((vnetp,
2540 			    "vgen_ldc_cb:ldc_read err id(%lx) rv(%d) "
2541 			    "len(%d)\n", ldcp->ldc_id, rv, msglen));
2542 			if (rv == ECONNRESET)
2543 				goto vgen_ldccb_error;
2544 			break;
2545 		}
2546 		if (msglen == 0) {
2547 			DBG2((vnetp, "vgen_ldc_cb: ldc_read id(%lx) NODATA",
2548 			ldcp->ldc_id));
2549 			break;
2550 		}
2551 		DBG2((vnetp, "vgen_ldc_cb: ldc_read id(%lx): msglen(%d)",
2552 		    ldcp->ldc_id, msglen));
2553 
2554 		tagp = (vio_msg_tag_t *)ldcmsg;
2555 
2556 		if (ldcp->peer_sid) {
2557 			/*
2558 			 * check sid only after we have received peer's sid
2559 			 * in the version negotiate msg.
2560 			 */
2561 #ifdef DEBUG
2562 			if (vgen_hdbg & HDBG_BAD_SID) {
2563 				/* simulate bad sid condition */
2564 				tagp->vio_sid = 0;
2565 				vgen_hdbg &= ~(HDBG_BAD_SID);
2566 			}
2567 #endif
2568 			rv = vgen_check_sid(ldcp, tagp);
2569 			if (rv != VGEN_SUCCESS) {
2570 				/*
2571 				 * If sid mismatch is detected,
2572 				 * reset the channel.
2573 				 */
2574 				ldcp->need_ldc_reset = B_TRUE;
2575 				goto vgen_ldccb_error;
2576 			}
2577 		}
2578 
2579 		switch (tagp->vio_msgtype) {
2580 		case VIO_TYPE_CTRL:
2581 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
2582 			break;
2583 
2584 		case VIO_TYPE_DATA:
2585 			headp = tailp = NULL;
2586 			rv = vgen_handle_datamsg(ldcp, tagp, &headp, &tailp);
2587 			/* build a chain of received packets */
2588 			if (headp != NULL) {
2589 				if (bp == NULL) {
2590 					bp = headp;
2591 					bpt = tailp;
2592 				} else {
2593 					bpt->b_next = headp;
2594 					bpt = tailp;
2595 				}
2596 			}
2597 			break;
2598 
2599 		case VIO_TYPE_ERR:
2600 			vgen_handle_errmsg(ldcp, tagp);
2601 			break;
2602 
2603 		default:
2604 			DWARN((vnetp,
2605 			    "vgen_ldc_cb: Unknown VIO_TYPE(%x)\n",
2606 			    tagp->vio_msgtype));
2607 			break;
2608 		}
2609 
2610 vgen_ldccb_error:
2611 		if (rv == ECONNRESET) {
2612 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2613 				DWARN((vnetp,
2614 				    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2615 			} else {
2616 				ldcp->ldc_status = istatus;
2617 			}
2618 			vgen_handle_evt_reset(ldcp, B_FALSE);
2619 			break;
2620 		} else if (rv) {
2621 			vgen_handshake_retry(ldcp);
2622 			break;
2623 		}
2624 
2625 	} while (msglen);
2626 
2627 	mutex_exit(&ldcp->cblock);
2628 
2629 	/* send up the received packets to MAC layer */
2630 	while (bp != NULL) {
2631 		mp = bp;
2632 		bp = bp->b_next;
2633 		mp->b_next = mp->b_prev = NULL;
2634 		DBG2((vnetp, "vgen_ldc_cb: id(%lx) rx pkt len (%lx)\n",
2635 		    ldcp->ldc_id, MBLKL(mp)));
2636 		vnet_rx(vgenp->vnetp, NULL, mp);
2637 	}
2638 
2639 vgen_ldccb_exit:
2640 	if (ldcp->cancel_htid) {
2641 		/*
2642 		 * Cancel handshake timer.
2643 		 * untimeout(9F) will not return until the pending callback is
2644 		 * cancelled or has run. No problems will result from calling
2645 		 * untimeout if the handler has already completed.
2646 		 * If the timeout handler did run, then it would just
2647 		 * return as cancel_htid is set.
2648 		 */
2649 		(void) untimeout(ldcp->cancel_htid);
2650 		ldcp->cancel_htid = 0;
2651 	}
2652 	DBG1((vnetp, "vgen_ldc_cb exit: ldcid (%lx)\n", ldcp->ldc_id));
2653 
2654 	return (LDC_SUCCESS);
2655 }
2656 
2657 /* vgen handshake functions */
2658 
2659 /* change the hphase for the channel to the next phase */
2660 static vgen_ldc_t *
2661 vh_nextphase(vgen_ldc_t *ldcp)
2662 {
2663 	if (ldcp->hphase == VH_PHASE3) {
2664 		ldcp->hphase = VH_DONE;
2665 	} else {
2666 		ldcp->hphase++;
2667 	}
2668 	return (ldcp);
2669 }
2670 
2671 /*
2672  * Check whether the given version is supported or not and
2673  * return VGEN_SUCCESS if supported.
2674  */
2675 static int
2676 vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
2677 uint16_t ver_minor)
2678 {
2679 	vgen_ver_t	*versions = ldcp->vgen_versions;
2680 	int		i = 0;
2681 
2682 	while (i < VGEN_NUM_VER) {
2683 		if ((versions[i].ver_major == 0) &&
2684 		    (versions[i].ver_minor == 0)) {
2685 			break;
2686 		}
2687 		if ((versions[i].ver_major == ver_major) &&
2688 			(versions[i].ver_minor == ver_minor)) {
2689 			return (VGEN_SUCCESS);
2690 		}
2691 		i++;
2692 	}
2693 	return (VGEN_FAILURE);
2694 }
2695 
2696 /*
2697  * Given a version, return VGEN_SUCCESS if a lower version is supported.
2698  */
2699 static int
2700 vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
2701 {
2702 	vgen_ver_t	*versions = ldcp->vgen_versions;
2703 	int		i = 0;
2704 
2705 	while (i < VGEN_NUM_VER) {
2706 		if ((versions[i].ver_major == 0) &&
2707 		    (versions[i].ver_minor == 0)) {
2708 			break;
2709 		}
2710 		/*
2711 		 * if we support a lower minor version within the same major
2712 		 * version, or if we support a lower major version,
2713 		 * update the verp parameter with this lower version and
2714 		 * return success.
2715 		 */
2716 		if (((versions[i].ver_major == verp->ver_major) &&
2717 			(versions[i].ver_minor < verp->ver_minor)) ||
2718 			(versions[i].ver_major < verp->ver_major)) {
2719 				verp->ver_major = versions[i].ver_major;
2720 				verp->ver_minor = versions[i].ver_minor;
2721 				return (VGEN_SUCCESS);
2722 		}
2723 		i++;
2724 	}
2725 
2726 	return (VGEN_FAILURE);
2727 }
2728 
2729 /*
2730  * wrapper routine to send the given message over ldc using ldc_write().
2731  */
2732 static int
2733 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
2734     boolean_t caller_holds_lock)
2735 {
2736 	int	rv;
2737 	size_t	len;
2738 	void *vnetp = LDC_TO_VNET(ldcp);
2739 	uint32_t retries = 0;
2740 
2741 	len = msglen;
2742 	if ((len == 0) || (msg == NULL))
2743 		return (VGEN_FAILURE);
2744 
2745 	if (!caller_holds_lock) {
2746 		mutex_enter(&ldcp->txlock);
2747 	}
2748 
2749 	do {
2750 		len = msglen;
2751 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
2752 		if (retries++ >= vgen_ldcwr_retries)
2753 			break;
2754 	} while (rv == EWOULDBLOCK);
2755 
2756 	if (!caller_holds_lock) {
2757 		mutex_exit(&ldcp->txlock);
2758 	}
2759 
2760 	if (rv != 0) {
2761 		DWARN((vnetp,
2762 		    "vgen_sendmsg: ldc_write failed: id(%lx) rv(%d)\n",
2763 		    ldcp->ldc_id, rv, msglen));
2764 		return (rv);
2765 	}
2766 
2767 	if (len != msglen) {
2768 		DWARN((vnetp,
2769 		    "vgen_sendmsg: ldc_write failed: id(%lx) rv(%d)"
2770 		    " msglen (%d)\n", ldcp->ldc_id, rv, msglen));
2771 		return (VGEN_FAILURE);
2772 	}
2773 
2774 	return (VGEN_SUCCESS);
2775 }
2776 
2777 /* send version negotiate message to the peer over ldc */
2778 static int
2779 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
2780 {
2781 	vio_ver_msg_t	vermsg;
2782 	vio_msg_tag_t	*tagp = &vermsg.tag;
2783 	void		*vnetp = LDC_TO_VNET(ldcp);
2784 	int		rv;
2785 
2786 	bzero(&vermsg, sizeof (vermsg));
2787 
2788 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2789 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2790 	tagp->vio_subtype_env = VIO_VER_INFO;
2791 	tagp->vio_sid = ldcp->local_sid;
2792 
2793 	/* get version msg payload from ldcp->local */
2794 	vermsg.ver_major = ldcp->local_hparams.ver_major;
2795 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
2796 	vermsg.dev_class = ldcp->local_hparams.dev_class;
2797 
2798 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
2799 	if (rv != VGEN_SUCCESS) {
2800 		DWARN((vnetp, "vgen_send_version_negotiate: vgen_sendmsg failed"
2801 		    "id (%lx)\n", ldcp->ldc_id));
2802 		return (rv);
2803 	}
2804 
2805 	ldcp->hstate |= VER_INFO_SENT;
2806 	DBG2((vnetp,
2807 	    "vgen_send_version_negotiate: VER_INFO_SENT id (%lx) ver(%d,%d)\n",
2808 	    ldcp->ldc_id, vermsg.ver_major, vermsg.ver_minor));
2809 
2810 	return (VGEN_SUCCESS);
2811 }
2812 
2813 /* send attr info message to the peer over ldc */
2814 static int
2815 vgen_send_attr_info(vgen_ldc_t *ldcp)
2816 {
2817 	vnet_attr_msg_t	attrmsg;
2818 	vio_msg_tag_t	*tagp = &attrmsg.tag;
2819 	void		*vnetp = LDC_TO_VNET(ldcp);
2820 	int		rv;
2821 
2822 	bzero(&attrmsg, sizeof (attrmsg));
2823 
2824 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2825 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2826 	tagp->vio_subtype_env = VIO_ATTR_INFO;
2827 	tagp->vio_sid = ldcp->local_sid;
2828 
2829 	/* get attr msg payload from ldcp->local */
2830 	attrmsg.mtu = ldcp->local_hparams.mtu;
2831 	attrmsg.addr = ldcp->local_hparams.addr;
2832 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
2833 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
2834 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
2835 
2836 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
2837 	if (rv != VGEN_SUCCESS) {
2838 		DWARN((vnetp, "vgen_send_attr_info: vgen_sendmsg failed"
2839 		    "id (%lx)\n", ldcp->ldc_id));
2840 		return (rv);
2841 	}
2842 
2843 	ldcp->hstate |= ATTR_INFO_SENT;
2844 	DBG2((vnetp, "vgen_send_attr_info: ATTR_INFO_SENT id (%lx)\n",
2845 	    ldcp->ldc_id));
2846 
2847 	return (VGEN_SUCCESS);
2848 }
2849 
2850 /* send descriptor ring register message to the peer over ldc */
2851 static int
2852 vgen_send_dring_reg(vgen_ldc_t *ldcp)
2853 {
2854 	vio_dring_reg_msg_t	msg;
2855 	vio_msg_tag_t		*tagp = &msg.tag;
2856 	void		*vnetp = LDC_TO_VNET(ldcp);
2857 	int		rv;
2858 
2859 	bzero(&msg, sizeof (msg));
2860 
2861 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2862 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2863 	tagp->vio_subtype_env = VIO_DRING_REG;
2864 	tagp->vio_sid = ldcp->local_sid;
2865 
2866 	/* get dring info msg payload from ldcp->local */
2867 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
2868 		sizeof (ldc_mem_cookie_t));
2869 	msg.ncookies = ldcp->local_hparams.num_dcookies;
2870 	msg.num_descriptors = ldcp->local_hparams.num_desc;
2871 	msg.descriptor_size = ldcp->local_hparams.desc_size;
2872 
2873 	/*
2874 	 * dring_ident is set to 0. After mapping the dring, peer sets this
2875 	 * value and sends it in the ack, which is saved in
2876 	 * vgen_handle_dring_reg().
2877 	 */
2878 	msg.dring_ident = 0;
2879 
2880 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
2881 	if (rv != VGEN_SUCCESS) {
2882 		DWARN((vnetp, "vgen_send_dring_reg: vgen_sendmsg failed"
2883 		    "id (%lx)\n", ldcp->ldc_id));
2884 		return (rv);
2885 	}
2886 
2887 	ldcp->hstate |= DRING_INFO_SENT;
2888 	DBG2((vnetp, "vgen_send_dring_reg: DRING_INFO_SENT id (%lx)\n",
2889 	    ldcp->ldc_id));
2890 
2891 	return (VGEN_SUCCESS);
2892 }
2893 
2894 static int
2895 vgen_send_rdx_info(vgen_ldc_t *ldcp)
2896 {
2897 	vio_rdx_msg_t	rdxmsg;
2898 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
2899 	void		*vnetp = LDC_TO_VNET(ldcp);
2900 	int		rv;
2901 
2902 	bzero(&rdxmsg, sizeof (rdxmsg));
2903 
2904 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2905 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2906 	tagp->vio_subtype_env = VIO_RDX;
2907 	tagp->vio_sid = ldcp->local_sid;
2908 
2909 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
2910 	if (rv != VGEN_SUCCESS) {
2911 		DWARN((vnetp, "vgen_send_rdx_info: vgen_sendmsg failed"
2912 		    "id (%lx)\n", ldcp->ldc_id));
2913 		return (rv);
2914 	}
2915 
2916 	ldcp->hstate |= RDX_INFO_SENT;
2917 	DBG2((vnetp, "vgen_send_rdx_info: RDX_INFO_SENT id (%lx)\n",
2918 	    ldcp->ldc_id));
2919 
2920 	return (VGEN_SUCCESS);
2921 }
2922 
2923 /* send descriptor ring data message to the peer over ldc */
2924 static int
2925 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
2926 {
2927 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
2928 	vio_msg_tag_t	*tagp = &msgp->tag;
2929 	void		*vnetp = LDC_TO_VNET(ldcp);
2930 	int		rv;
2931 
2932 	bzero(msgp, sizeof (*msgp));
2933 
2934 	tagp->vio_msgtype = VIO_TYPE_DATA;
2935 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2936 	tagp->vio_subtype_env = VIO_DRING_DATA;
2937 	tagp->vio_sid = ldcp->local_sid;
2938 
2939 	msgp->seq_num = ldcp->next_txseq;
2940 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
2941 	msgp->start_idx = start;
2942 	msgp->end_idx = end;
2943 
2944 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
2945 	if (rv != VGEN_SUCCESS) {
2946 		DWARN((vnetp, "vgen_send_dring_data: vgen_sendmsg failed"
2947 		    " id (%lx)\n", ldcp->ldc_id));
2948 		return (rv);
2949 	}
2950 
2951 	ldcp->next_txseq++;
2952 	ldcp->statsp->dring_data_msgs++;
2953 
2954 	DBG2((vnetp, "vgen_send_dring_data: DRING_DATA_SENT id (%lx)\n",
2955 	    ldcp->ldc_id));
2956 
2957 	return (VGEN_SUCCESS);
2958 }
2959 
2960 /* send multicast addr info message to vsw */
2961 static int
2962 vgen_send_mcast_info(vgen_ldc_t *ldcp)
2963 {
2964 	vnet_mcast_msg_t	mcastmsg;
2965 	vnet_mcast_msg_t	*msgp;
2966 	vio_msg_tag_t		*tagp;
2967 	vgen_t			*vgenp;
2968 	void			*vnetp;
2969 	struct ether_addr	*mca;
2970 	int			rv;
2971 	int			i;
2972 	uint32_t		size;
2973 	uint32_t		mccount;
2974 	uint32_t		n;
2975 
2976 	msgp = &mcastmsg;
2977 	tagp = &msgp->tag;
2978 	vgenp = LDC_TO_VGEN(ldcp);
2979 	vnetp = LDC_TO_VNET(ldcp);
2980 
2981 	mccount = vgenp->mccount;
2982 	i = 0;
2983 
2984 	do {
2985 		tagp->vio_msgtype = VIO_TYPE_CTRL;
2986 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
2987 		tagp->vio_subtype_env = VNET_MCAST_INFO;
2988 		tagp->vio_sid = ldcp->local_sid;
2989 
2990 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
2991 		size = n * sizeof (struct ether_addr);
2992 
2993 		mca = &(vgenp->mctab[i]);
2994 		bcopy(mca, (msgp->mca), size);
2995 		msgp->set = B_TRUE;
2996 		msgp->count = n;
2997 
2998 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
2999 		    B_FALSE);
3000 		if (rv != VGEN_SUCCESS) {
3001 			DWARN((vnetp, "vgen_send_mcast_info: vgen_sendmsg err"
3002 			    "id (%lx)\n", ldcp->ldc_id));
3003 			return (rv);
3004 		}
3005 
3006 		mccount -= n;
3007 		i += n;
3008 
3009 	} while (mccount);
3010 
3011 	return (VGEN_SUCCESS);
3012 }
3013 
3014 /* Initiate Phase 2 of handshake */
3015 static int
3016 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3017 {
3018 	int rv;
3019 	uint32_t ncookies = 0;
3020 	void	*vnetp = LDC_TO_VNET(ldcp);
3021 #ifdef DEBUG
3022 	if (vgen_hdbg & HDBG_OUT_STATE) {
3023 		/* simulate out of state condition */
3024 		vgen_hdbg &= ~(HDBG_OUT_STATE);
3025 		rv = vgen_send_rdx_info(ldcp);
3026 		return (rv);
3027 	}
3028 	if (vgen_hdbg & HDBG_TIMEOUT) {
3029 		/* simulate timeout condition */
3030 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3031 		return (VGEN_SUCCESS);
3032 	}
3033 #endif
3034 	rv = vgen_send_attr_info(ldcp);
3035 	if (rv != VGEN_SUCCESS) {
3036 		return (rv);
3037 	}
3038 
3039 	/* Bind descriptor ring to the channel */
3040 	if (ldcp->num_txdcookies == 0) {
3041 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3042 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3043 		if (rv != 0) {
3044 			DWARN((vnetp, "vgen_handshake_phase2: id (%lx) "
3045 			    "ldc_mem_dring_bind failed rv(%x)\n",
3046 			    ldcp->ldc_id, rv));
3047 			return (rv);
3048 		}
3049 		ASSERT(ncookies == 1);
3050 		ldcp->num_txdcookies = ncookies;
3051 	}
3052 
3053 	/* update local dring_info params */
3054 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3055 		sizeof (ldc_mem_cookie_t));
3056 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3057 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3058 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3059 
3060 	rv = vgen_send_dring_reg(ldcp);
3061 	if (rv != VGEN_SUCCESS) {
3062 		return (rv);
3063 	}
3064 
3065 	return (VGEN_SUCCESS);
3066 }
3067 
3068 /*
3069  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3070  * This can happen after a channel comes up (status: LDC_UP) or
3071  * when handshake gets terminated due to various conditions.
3072  */
3073 static void
3074 vgen_reset_hphase(vgen_ldc_t *ldcp)
3075 {
3076 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3077 	void	*vnetp = LDC_TO_VNET(ldcp);
3078 	ldc_status_t istatus;
3079 	int rv;
3080 
3081 	DBG2((vnetp, "vgen_reset_hphase: id(0x%lx)\n", ldcp->ldc_id));
3082 	/* reset hstate and hphase */
3083 	ldcp->hstate = 0;
3084 	ldcp->hphase = VH_PHASE0;
3085 
3086 	/*
3087 	 * Save the id of pending handshake timer in cancel_htid.
3088 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
3089 	 * be cancelled after releasing cblock.
3090 	 */
3091 	if (ldcp->htid) {
3092 		ldcp->cancel_htid = ldcp->htid;
3093 		ldcp->htid = 0;
3094 	}
3095 
3096 	if (ldcp->local_hparams.dring_ready) {
3097 		ldcp->local_hparams.dring_ready = B_FALSE;
3098 	}
3099 
3100 	/* Unbind tx descriptor ring from the channel */
3101 	if (ldcp->num_txdcookies) {
3102 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3103 		if (rv != 0) {
3104 			DWARN((vnetp,
3105 			    "vgen_reset_hphase: ldc_mem_dring_unbind "
3106 			    "failed id(%lx)\n", ldcp->ldc_id));
3107 		}
3108 		ldcp->num_txdcookies = 0;
3109 	}
3110 
3111 	if (ldcp->peer_hparams.dring_ready) {
3112 		ldcp->peer_hparams.dring_ready = B_FALSE;
3113 		/* Unmap peer's dring */
3114 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3115 		vgen_clobber_rxds(ldcp);
3116 	}
3117 
3118 	vgen_clobber_tbufs(ldcp);
3119 
3120 	/*
3121 	 * clear local handshake params and initialize.
3122 	 */
3123 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3124 
3125 	/* set version to the highest version supported */
3126 	ldcp->local_hparams.ver_major =
3127 			ldcp->vgen_versions[0].ver_major;
3128 	ldcp->local_hparams.ver_minor =
3129 			ldcp->vgen_versions[0].ver_minor;
3130 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3131 
3132 	/* set attr_info params */
3133 	ldcp->local_hparams.mtu = ETHERMAX;
3134 	ldcp->local_hparams.addr =
3135 		vgen_macaddr_strtoul(vgenp->macaddr);
3136 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3137 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
3138 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3139 
3140 	/*
3141 	 * Note: dring is created, but not bound yet.
3142 	 * local dring_info params will be updated when we bind the dring in
3143 	 * vgen_handshake_phase2().
3144 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3145 	 * value and sends it in the ack, which is saved in
3146 	 * vgen_handle_dring_reg().
3147 	 */
3148 	ldcp->local_hparams.dring_ident = 0;
3149 
3150 	/* clear peer_hparams */
3151 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3152 
3153 	/* reset the channel if required */
3154 	if (ldcp->need_ldc_reset) {
3155 		DWARN((vnetp,
3156 		    "vgen_reset_hphase: id (%lx), Doing Channel Reset...\n",
3157 		    ldcp->ldc_id));
3158 		ldcp->need_ldc_reset = B_FALSE;
3159 		(void) ldc_down(ldcp->ldc_handle);
3160 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3161 		DBG2((vnetp,
3162 		    "vgen_reset_hphase: id (%lx), Reset Done,ldc_status(%x)\n",
3163 		    ldcp->ldc_id, istatus));
3164 		ldcp->ldc_status = istatus;
3165 
3166 		/* clear sids */
3167 		ldcp->local_sid = 0;
3168 		ldcp->peer_sid = 0;
3169 
3170 		/* try to bring the channel up */
3171 		rv = ldc_up(ldcp->ldc_handle);
3172 		if (rv != 0) {
3173 			DWARN((vnetp,
3174 			    "vgen_reset_hphase: ldc_up err id(%lx) rv(%d)\n",
3175 			    ldcp->ldc_id, rv));
3176 		}
3177 
3178 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3179 			DWARN((vnetp,
3180 			    "vgen_reset_hphase: ldc_status err id(%lx)\n"));
3181 		} else {
3182 			ldcp->ldc_status = istatus;
3183 		}
3184 	}
3185 }
3186 
3187 /* wrapper function for vgen_reset_hphase */
3188 static void
3189 vgen_handshake_reset(vgen_ldc_t *ldcp)
3190 {
3191 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3192 	mutex_enter(&ldcp->txlock);
3193 	mutex_enter(&ldcp->tclock);
3194 
3195 	vgen_reset_hphase(ldcp);
3196 
3197 	mutex_exit(&ldcp->tclock);
3198 	mutex_exit(&ldcp->txlock);
3199 }
3200 
3201 /*
3202  * Initiate handshake with the peer by sending various messages
3203  * based on the handshake-phase that the channel is currently in.
3204  */
3205 static void
3206 vgen_handshake(vgen_ldc_t *ldcp)
3207 {
3208 	uint32_t hphase = ldcp->hphase;
3209 	void	*vnetp = LDC_TO_VNET(ldcp);
3210 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3211 	ldc_status_t	istatus;
3212 	int	rv = 0;
3213 
3214 	switch (hphase) {
3215 
3216 	case VH_PHASE1:
3217 
3218 		/*
3219 		 * start timer, for entire handshake process, turn this timer
3220 		 * off if all phases of handshake complete successfully and
3221 		 * hphase goes to VH_DONE(below) or
3222 		 * vgen_reset_hphase() gets called or
3223 		 * channel is reset due to errors or
3224 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3225 		 */
3226 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3227 		    drv_usectohz(vgen_hwd_interval * 1000));
3228 
3229 		/* Phase 1 involves negotiating the version */
3230 		rv = vgen_send_version_negotiate(ldcp);
3231 		break;
3232 
3233 	case VH_PHASE2:
3234 		rv = vgen_handshake_phase2(ldcp);
3235 		break;
3236 
3237 	case VH_PHASE3:
3238 		rv = vgen_send_rdx_info(ldcp);
3239 		break;
3240 
3241 	case VH_DONE:
3242 		/*
3243 		 * Save the id of pending handshake timer in cancel_htid.
3244 		 * This will be checked in vgen_ldc_cb() and the handshake
3245 		 * timer will be cancelled after releasing cblock.
3246 		 */
3247 		if (ldcp->htid) {
3248 			ldcp->cancel_htid = ldcp->htid;
3249 			ldcp->htid = 0;
3250 		}
3251 		ldcp->hretries = 0;
3252 #if 0
3253 		vgen_print_ldcinfo(ldcp);
3254 #endif
3255 		DBG1((vnetp, "vgen_handshake: id(0x%lx) Handshake Done\n",
3256 		    ldcp->ldc_id));
3257 
3258 		if (ldcp->need_mcast_sync) {
3259 			/* need to sync multicast table with vsw */
3260 
3261 			ldcp->need_mcast_sync = B_FALSE;
3262 			mutex_exit(&ldcp->cblock);
3263 
3264 			mutex_enter(&vgenp->lock);
3265 			rv = vgen_send_mcast_info(ldcp);
3266 			mutex_exit(&vgenp->lock);
3267 
3268 			mutex_enter(&ldcp->cblock);
3269 			if (rv != VGEN_SUCCESS)
3270 				break;
3271 		}
3272 
3273 		/*
3274 		 * Check if mac layer should be notified to restart
3275 		 * transmissions. This can happen if the channel got
3276 		 * reset and vgen_clobber_tbufs() is called, while
3277 		 * need_resched is set.
3278 		 */
3279 		mutex_enter(&ldcp->tclock);
3280 		if (ldcp->need_resched) {
3281 			ldcp->need_resched = B_FALSE;
3282 			vnet_tx_update(vgenp->vnetp);
3283 		}
3284 		mutex_exit(&ldcp->tclock);
3285 
3286 		break;
3287 
3288 	default:
3289 		break;
3290 	}
3291 
3292 	if (rv == ECONNRESET) {
3293 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3294 			DWARN((vnetp,
3295 			    "vgen_handshake: ldc_status err id(%lx)\n"));
3296 		} else {
3297 			ldcp->ldc_status = istatus;
3298 		}
3299 		vgen_handle_evt_reset(ldcp, B_FALSE);
3300 	} else if (rv) {
3301 		vgen_handshake_reset(ldcp);
3302 	}
3303 }
3304 
3305 /*
3306  * Check if the current handshake phase has completed successfully and
3307  * return the status.
3308  */
3309 static int
3310 vgen_handshake_done(vgen_ldc_t *ldcp)
3311 {
3312 	uint32_t	hphase = ldcp->hphase;
3313 	int 		status = 0;
3314 	void		*vnetp = LDC_TO_VNET(ldcp);
3315 
3316 	switch (hphase) {
3317 
3318 	case VH_PHASE1:
3319 		/*
3320 		 * Phase1 is done, if version negotiation
3321 		 * completed successfully.
3322 		 */
3323 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3324 			VER_NEGOTIATED);
3325 		break;
3326 
3327 	case VH_PHASE2:
3328 		/*
3329 		 * Phase 2 is done, if attr info and dring info
3330 		 * have been exchanged successfully.
3331 		 */
3332 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3333 			    ATTR_INFO_EXCHANGED) &&
3334 			    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3335 			    DRING_INFO_EXCHANGED));
3336 		break;
3337 
3338 	case VH_PHASE3:
3339 		/* Phase 3 is done, if rdx msg has been exchanged */
3340 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3341 			RDX_EXCHANGED);
3342 		break;
3343 
3344 	default:
3345 		break;
3346 	}
3347 
3348 	if (status == 0) {
3349 		return (VGEN_FAILURE);
3350 	}
3351 	DBG2((vnetp, "VNET_HANDSHAKE_DONE: PHASE(%d)\n", hphase));
3352 	return (VGEN_SUCCESS);
3353 }
3354 
3355 /* retry handshake on failure */
3356 static void
3357 vgen_handshake_retry(vgen_ldc_t *ldcp)
3358 {
3359 	/* reset handshake phase */
3360 	vgen_handshake_reset(ldcp);
3361 
3362 	/* handshake retry is specified and the channel is UP */
3363 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
3364 		if (ldcp->hretries++ < vgen_max_hretries) {
3365 			ldcp->local_sid = ddi_get_lbolt();
3366 			vgen_handshake(vh_nextphase(ldcp));
3367 		}
3368 	}
3369 }
3370 
3371 /*
3372  * Handle a version info msg from the peer or an ACK/NACK from the peer
3373  * to a version info msg that we sent.
3374  */
3375 static int
3376 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3377 {
3378 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3379 	int		ack = 0;
3380 	int		failed = 0;
3381 	void		*vnetp = LDC_TO_VNET(ldcp);
3382 	int		idx;
3383 	vgen_ver_t	*versions = ldcp->vgen_versions;
3384 	int		rv = 0;
3385 
3386 	DBG1((vnetp, "vgen_handle_version_negotiate: enter\n"));
3387 	switch (tagp->vio_subtype) {
3388 	case VIO_SUBTYPE_INFO:
3389 
3390 		/*  Cache sid of peer if this is the first time */
3391 		if (ldcp->peer_sid == 0) {
3392 			DBG2((vnetp,
3393 			    "vgen_handle_version_negotiate: id (%lx) Caching"
3394 			    " peer_sid(%x)\n", ldcp->ldc_id, tagp->vio_sid));
3395 			ldcp->peer_sid = tagp->vio_sid;
3396 		}
3397 
3398 		if (ldcp->hphase != VH_PHASE1) {
3399 			/*
3400 			 * If we are not already in VH_PHASE1, reset to
3401 			 * pre-handshake state, and initiate handshake
3402 			 * to the peer too.
3403 			 */
3404 			vgen_handshake_reset(ldcp);
3405 			vgen_handshake(vh_nextphase(ldcp));
3406 		}
3407 		ldcp->hstate |= VER_INFO_RCVD;
3408 
3409 		/* save peer's requested values */
3410 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3411 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3412 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3413 
3414 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3415 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3416 			/* unsupported dev_class, send NACK */
3417 
3418 			DWARN((vnetp,
3419 			    "vgen_handle_version_negotiate: Version"
3420 			    " Negotiation Failed id (%lx)\n", ldcp->ldc_id));
3421 
3422 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3423 			tagp->vio_sid = ldcp->local_sid;
3424 			/* send reply msg back to peer */
3425 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3426 			    sizeof (*vermsg), B_FALSE);
3427 			if (rv != VGEN_SUCCESS) {
3428 				return (rv);
3429 			}
3430 			return (VGEN_FAILURE);
3431 		}
3432 
3433 		DBG2((vnetp, "vgen_handle_version_negotiate: VER_INFO_RCVD,"
3434 		    " id (%lx), ver(%d,%d)\n", ldcp->ldc_id,
3435 		    vermsg->ver_major,  vermsg->ver_minor));
3436 
3437 		idx = 0;
3438 
3439 		for (;;) {
3440 
3441 			if (vermsg->ver_major > versions[idx].ver_major) {
3442 
3443 				/* nack with next lower version */
3444 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3445 				vermsg->ver_major = versions[idx].ver_major;
3446 				vermsg->ver_minor = versions[idx].ver_minor;
3447 				break;
3448 			}
3449 
3450 			if (vermsg->ver_major == versions[idx].ver_major) {
3451 
3452 				/* major version match - ACK version */
3453 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3454 				ack = 1;
3455 
3456 				/*
3457 				 * lower minor version to the one this endpt
3458 				 * supports, if necessary
3459 				 */
3460 				if (vermsg->ver_minor >
3461 				    versions[idx].ver_minor) {
3462 					vermsg->ver_minor =
3463 						versions[idx].ver_minor;
3464 					ldcp->peer_hparams.ver_minor =
3465 						versions[idx].ver_minor;
3466 				}
3467 				break;
3468 			}
3469 
3470 			idx++;
3471 
3472 			if (idx == VGEN_NUM_VER) {
3473 
3474 				/* no version match - send NACK */
3475 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3476 				vermsg->ver_major = 0;
3477 				vermsg->ver_minor = 0;
3478 				failed = 1;
3479 				break;
3480 			}
3481 
3482 		}
3483 
3484 		tagp->vio_sid = ldcp->local_sid;
3485 
3486 		/* send reply msg back to peer */
3487 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3488 		    B_FALSE);
3489 		if (rv != VGEN_SUCCESS) {
3490 			return (rv);
3491 		}
3492 
3493 		if (ack) {
3494 			ldcp->hstate |= VER_ACK_SENT;
3495 			DBG2((vnetp, "vgen_handle_version_negotiate:"
3496 			    " VER_ACK_SENT, id (%lx) ver(%d,%d) \n",
3497 			    ldcp->ldc_id, vermsg->ver_major,
3498 			    vermsg->ver_minor));
3499 		}
3500 		if (failed) {
3501 			DWARN((vnetp, "vgen_handle_version_negotiate:"
3502 			    " Version Negotiation Failed id (%lx)\n",
3503 			    ldcp->ldc_id));
3504 			return (VGEN_FAILURE);
3505 		}
3506 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3507 
3508 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3509 
3510 			/* local and peer versions match? */
3511 			ASSERT((ldcp->local_hparams.ver_major ==
3512 				ldcp->peer_hparams.ver_major) &&
3513 				(ldcp->local_hparams.ver_minor ==
3514 				ldcp->peer_hparams.ver_minor));
3515 
3516 			/* move to the next phase */
3517 			vgen_handshake(vh_nextphase(ldcp));
3518 		}
3519 
3520 		break;
3521 
3522 	case VIO_SUBTYPE_ACK:
3523 
3524 		if (ldcp->hphase != VH_PHASE1) {
3525 			/*  This should not happen. */
3526 			DWARN((vnetp,
3527 			    "vgen_handle_version_negotiate:"
3528 			    " VER_ACK_RCVD id (%lx) Invalid Phase(%u)\n",
3529 			    ldcp->ldc_id, ldcp->hphase));
3530 			return (VGEN_FAILURE);
3531 		}
3532 
3533 		/* SUCCESS - we have agreed on a version */
3534 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3535 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3536 		ldcp->hstate |= VER_ACK_RCVD;
3537 
3538 		DBG2((vnetp, "vgen_handle_version_negotiate:"
3539 		    " VER_ACK_RCVD, id (%lx) ver(%d,%d) \n",
3540 		    ldcp->ldc_id, vermsg->ver_major,  vermsg->ver_minor));
3541 
3542 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3543 
3544 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3545 
3546 			/* local and peer versions match? */
3547 			ASSERT((ldcp->local_hparams.ver_major ==
3548 				ldcp->peer_hparams.ver_major) &&
3549 				(ldcp->local_hparams.ver_minor ==
3550 				ldcp->peer_hparams.ver_minor));
3551 
3552 			/* move to the next phase */
3553 			vgen_handshake(vh_nextphase(ldcp));
3554 		}
3555 		break;
3556 
3557 	case VIO_SUBTYPE_NACK:
3558 
3559 		if (ldcp->hphase != VH_PHASE1) {
3560 			/*  This should not happen.  */
3561 			DWARN((vnetp,
3562 			    "vgen_handle_version_negotiate:"
3563 			    " VER_NACK_RCVD id (%lx) Invalid Phase(%u)\n",
3564 			    ldcp->ldc_id, ldcp->hphase));
3565 			return (VGEN_FAILURE);
3566 		}
3567 
3568 		DBG2((vnetp, "vgen_handle_version_negotiate:"
3569 		    " VER_NACK_RCVD id(%lx) next ver(%d,%d)\n",
3570 		    ldcp->ldc_id, vermsg->ver_major, vermsg->ver_minor));
3571 
3572 		/* check if version in NACK is zero */
3573 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3574 			/*
3575 			 * Version Negotiation has failed.
3576 			 */
3577 			DWARN((vnetp, "vgen_handle_version_negotiate:"
3578 			    " Version Negotiation Failed id (%lx)\n",
3579 			    ldcp->ldc_id));
3580 			return (VGEN_FAILURE);
3581 		}
3582 
3583 		idx = 0;
3584 
3585 		for (;;) {
3586 
3587 			if (vermsg->ver_major > versions[idx].ver_major) {
3588 				/* select next lower version */
3589 
3590 				ldcp->local_hparams.ver_major =
3591 					versions[idx].ver_major;
3592 				ldcp->local_hparams.ver_minor =
3593 					versions[idx].ver_minor;
3594 				break;
3595 			}
3596 
3597 			if (vermsg->ver_major == versions[idx].ver_major) {
3598 				/* major version match */
3599 
3600 				ldcp->local_hparams.ver_major =
3601 					versions[idx].ver_major;
3602 
3603 				ldcp->local_hparams.ver_minor =
3604 					versions[idx].ver_minor;
3605 				break;
3606 			}
3607 
3608 			idx++;
3609 
3610 			if (idx == VGEN_NUM_VER) {
3611 				/*
3612 				 * no version match.
3613 				 * Version Negotiation has failed.
3614 				 */
3615 				DWARN((vnetp, "vgen_handle_version_negotiate:"
3616 				    " Version Negotiation Failed id (%lx)\n",
3617 				    ldcp->ldc_id));
3618 				return (VGEN_FAILURE);
3619 			}
3620 
3621 		}
3622 
3623 		rv = vgen_send_version_negotiate(ldcp);
3624 		if (rv != VGEN_SUCCESS) {
3625 			return (rv);
3626 		}
3627 
3628 		break;
3629 	}
3630 
3631 	DBG1((vnetp, "vgen_handle_version_negotiate: exit\n"));
3632 	return (VGEN_SUCCESS);
3633 }
3634 
3635 /* Check if the attributes are supported */
3636 static int
3637 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
3638 {
3639 	_NOTE(ARGUNUSED(ldcp))
3640 
3641 #if 0
3642 	uint64_t port_macaddr;
3643 	port_macaddr = vgen_macaddr_strtoul((uint8_t *)
3644 				&(ldcp->portp->macaddr));
3645 #endif
3646 	/*
3647 	 * currently, we support these attr values:
3648 	 * mtu of ethernet, addr_type of mac, xfer_mode of
3649 	 * ldc shared memory, ack_freq of 0 (data is acked if
3650 	 * the ack bit is set in the descriptor) and the address should
3651 	 * match the address in the port node.
3652 	 */
3653 	if ((msg->mtu != ETHERMAX) ||
3654 	    (msg->addr_type != ADDR_TYPE_MAC) ||
3655 	    (msg->xfer_mode != VIO_DRING_MODE) ||
3656 	    (msg->ack_freq > 64)) {
3657 #if 0
3658 	    (msg->addr != port_macaddr))
3659 cmn_err(CE_CONT, "vgen_check_attr_info: msg->addr(%lx), port_macaddr(%lx)\n",
3660 	msg->addr, port_macaddr);
3661 #endif
3662 		return (VGEN_FAILURE);
3663 	}
3664 
3665 	return (VGEN_SUCCESS);
3666 }
3667 
3668 /*
3669  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
3670  * to an attr info msg that we sent.
3671  */
3672 static int
3673 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3674 {
3675 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
3676 	void		*vnetp = LDC_TO_VNET(ldcp);
3677 	int		ack = 0;
3678 	int		rv = 0;
3679 
3680 	DBG1((vnetp, "vgen_handle_attr_info: enter\n"));
3681 	if (ldcp->hphase != VH_PHASE2) {
3682 		DWARN((vnetp,
3683 		    "vgen_handle_attr_info: Rcvd ATTR_INFO id(%lx)"
3684 		    " subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3685 		    tagp->vio_subtype, ldcp->hphase));
3686 		return (VGEN_FAILURE);
3687 	}
3688 	switch (tagp->vio_subtype) {
3689 	case VIO_SUBTYPE_INFO:
3690 
3691 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_INFO_RCVD id(%lx)\n",
3692 		    ldcp->ldc_id));
3693 		ldcp->hstate |= ATTR_INFO_RCVD;
3694 
3695 		/* save peer's values */
3696 		ldcp->peer_hparams.mtu = attrmsg->mtu;
3697 		ldcp->peer_hparams.addr = attrmsg->addr;
3698 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
3699 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
3700 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
3701 
3702 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
3703 			/* unsupported attr, send NACK */
3704 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3705 		} else {
3706 			ack = 1;
3707 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3708 		}
3709 		tagp->vio_sid = ldcp->local_sid;
3710 
3711 		/* send reply msg back to peer */
3712 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
3713 		    B_FALSE);
3714 		if (rv != VGEN_SUCCESS) {
3715 			return (rv);
3716 		}
3717 
3718 		if (ack) {
3719 			ldcp->hstate |= ATTR_ACK_SENT;
3720 			DBG2((vnetp, "vgen_handle_attr_info:"
3721 			    " ATTR_ACK_SENT id(%lx)\n", ldcp->ldc_id));
3722 		} else {
3723 			/* failed */
3724 			DWARN((vnetp, "vgen_handle_attr_info:"
3725 			    " ATTR_NACK_SENT id(%lx)\n", ldcp->ldc_id));
3726 			return (VGEN_FAILURE);
3727 		}
3728 
3729 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3730 			vgen_handshake(vh_nextphase(ldcp));
3731 		}
3732 
3733 		break;
3734 
3735 	case VIO_SUBTYPE_ACK:
3736 
3737 		ldcp->hstate |= ATTR_ACK_RCVD;
3738 
3739 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_ACK_RCVD id(%lx)\n",
3740 		    ldcp->ldc_id));
3741 
3742 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3743 			vgen_handshake(vh_nextphase(ldcp));
3744 		}
3745 		break;
3746 
3747 	case VIO_SUBTYPE_NACK:
3748 
3749 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_NACK_RCVD id(%lx)\n",
3750 		    ldcp->ldc_id));
3751 		return (VGEN_FAILURE);
3752 	}
3753 	DBG1((vnetp, "vgen_handle_attr_info: exit\n"));
3754 	return (VGEN_SUCCESS);
3755 }
3756 
3757 /* Check if the dring info msg is ok */
3758 static int
3759 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
3760 {
3761 	/* check if msg contents are ok */
3762 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
3763 	    sizeof (vnet_public_desc_t))) {
3764 		return (VGEN_FAILURE);
3765 	}
3766 	return (VGEN_SUCCESS);
3767 }
3768 
3769 /*
3770  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
3771  * the peer to a dring register msg that we sent.
3772  */
3773 static int
3774 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3775 {
3776 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
3777 	void *vnetp = LDC_TO_VNET(ldcp);
3778 	ldc_mem_cookie_t dcookie;
3779 	int ack = 0;
3780 	int rv = 0;
3781 
3782 	DBG1((vnetp, "vgen_handle_dring_reg: enter\n"));
3783 	if (ldcp->hphase < VH_PHASE2) {
3784 		/* dring_info can be rcvd in any of the phases after Phase1 */
3785 		DWARN((vnetp,
3786 		    "vgen_handle_dring_reg: Rcvd DRING_INFO, id (%lx)"
3787 		    " Subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3788 		    tagp->vio_subtype, ldcp->hphase));
3789 		return (VGEN_FAILURE);
3790 	}
3791 	switch (tagp->vio_subtype) {
3792 	case VIO_SUBTYPE_INFO:
3793 
3794 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_INFO_RCVD id(%lx)\n",
3795 		    ldcp->ldc_id));
3796 		ldcp->hstate |= DRING_INFO_RCVD;
3797 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
3798 
3799 		ASSERT(msg->ncookies == 1);
3800 
3801 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
3802 			/*
3803 			 * verified dring info msg to be ok,
3804 			 * now try to map the remote dring.
3805 			 */
3806 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
3807 			    msg->descriptor_size, &dcookie,
3808 			    msg->ncookies);
3809 			if (rv == DDI_SUCCESS) {
3810 				/* now we can ack the peer */
3811 				ack = 1;
3812 			}
3813 		}
3814 		if (ack == 0) {
3815 			/* failed, send NACK */
3816 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3817 		} else {
3818 			if (!(ldcp->peer_hparams.dring_ready)) {
3819 
3820 				/* save peer's dring_info values */
3821 				bcopy(&dcookie,
3822 				    &(ldcp->peer_hparams.dring_cookie),
3823 				    sizeof (dcookie));
3824 				ldcp->peer_hparams.num_desc =
3825 						msg->num_descriptors;
3826 				ldcp->peer_hparams.desc_size =
3827 						msg->descriptor_size;
3828 				ldcp->peer_hparams.num_dcookies =
3829 						msg->ncookies;
3830 
3831 				/* set dring_ident for the peer */
3832 				ldcp->peer_hparams.dring_ident =
3833 							(uint64_t)ldcp->rxdp;
3834 				/* return the dring_ident in ack msg */
3835 				msg->dring_ident =
3836 							(uint64_t)ldcp->rxdp;
3837 
3838 				ldcp->peer_hparams.dring_ready = B_TRUE;
3839 			}
3840 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3841 		}
3842 		tagp->vio_sid = ldcp->local_sid;
3843 		/* send reply msg back to peer */
3844 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
3845 		    B_FALSE);
3846 		if (rv != VGEN_SUCCESS) {
3847 			return (rv);
3848 		}
3849 
3850 		if (ack) {
3851 			ldcp->hstate |= DRING_ACK_SENT;
3852 			DBG2((vnetp, "vgen_handle_dring_reg: DRING_ACK_SENT"
3853 			    " id (%lx)\n", ldcp->ldc_id));
3854 		} else {
3855 			DWARN((vnetp, "vgen_handle_dring_reg: DRING_NACK_SENT"
3856 			    " id (%lx)\n", ldcp->ldc_id));
3857 			return (VGEN_FAILURE);
3858 		}
3859 
3860 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3861 			vgen_handshake(vh_nextphase(ldcp));
3862 		}
3863 
3864 		break;
3865 
3866 	case VIO_SUBTYPE_ACK:
3867 
3868 		ldcp->hstate |= DRING_ACK_RCVD;
3869 
3870 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_ACK_RCVD"
3871 		    " id (%lx)\n", ldcp->ldc_id));
3872 
3873 		if (!(ldcp->local_hparams.dring_ready)) {
3874 			/* local dring is now ready */
3875 			ldcp->local_hparams.dring_ready = B_TRUE;
3876 
3877 			/* save dring_ident acked by peer */
3878 			ldcp->local_hparams.dring_ident =
3879 				msg->dring_ident;
3880 		}
3881 
3882 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3883 			vgen_handshake(vh_nextphase(ldcp));
3884 		}
3885 
3886 		break;
3887 
3888 	case VIO_SUBTYPE_NACK:
3889 
3890 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_NACK_RCVD"
3891 		    " id (%lx)\n", ldcp->ldc_id));
3892 		return (VGEN_FAILURE);
3893 	}
3894 	DBG1((vnetp, "vgen_handle_dring_reg: exit\n"));
3895 	return (VGEN_SUCCESS);
3896 }
3897 
3898 /*
3899  * Handle a rdx info msg from the peer or an ACK/NACK
3900  * from the peer to a rdx info msg that we sent.
3901  */
3902 static int
3903 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3904 {
3905 	void *vnetp = LDC_TO_VNET(ldcp);
3906 	int rv = 0;
3907 
3908 	DBG1((vnetp, "vgen_handle_rdx_info: enter\n"));
3909 	if (ldcp->hphase != VH_PHASE3) {
3910 		DWARN((vnetp,
3911 		    "vgen_handle_rdx_info: Rcvd RDX_INFO, id (%lx)"
3912 		    "  Subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3913 		    tagp->vio_subtype, ldcp->hphase));
3914 		return (VGEN_FAILURE);
3915 	}
3916 	switch (tagp->vio_subtype) {
3917 	case VIO_SUBTYPE_INFO:
3918 
3919 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_INFO_RCVD id (%lx)\n",
3920 		    ldcp->ldc_id));
3921 		ldcp->hstate |= RDX_INFO_RCVD;
3922 
3923 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
3924 		tagp->vio_sid = ldcp->local_sid;
3925 		/* send reply msg back to peer */
3926 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
3927 		    B_FALSE);
3928 		if (rv != VGEN_SUCCESS) {
3929 			return (rv);
3930 		}
3931 
3932 		ldcp->hstate |= RDX_ACK_SENT;
3933 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_ACK_SENT id (%lx)\n",
3934 		    ldcp->ldc_id));
3935 
3936 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3937 			vgen_handshake(vh_nextphase(ldcp));
3938 		}
3939 
3940 		break;
3941 
3942 	case VIO_SUBTYPE_ACK:
3943 
3944 		ldcp->hstate |= RDX_ACK_RCVD;
3945 
3946 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_ACK_RCVD id (%lx)\n",
3947 		    ldcp->ldc_id));
3948 
3949 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3950 			vgen_handshake(vh_nextphase(ldcp));
3951 		}
3952 		break;
3953 
3954 	case VIO_SUBTYPE_NACK:
3955 
3956 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_NACK_RCVD id (%lx)\n",
3957 		    ldcp->ldc_id));
3958 		return (VGEN_FAILURE);
3959 	}
3960 	DBG1((vnetp, "vgen_handle_rdx_info: exit\n"));
3961 	return (VGEN_SUCCESS);
3962 }
3963 
3964 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
3965 static int
3966 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3967 {
3968 	void *vnetp = LDC_TO_VNET(ldcp);
3969 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3970 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
3971 	struct ether_addr *addrp;
3972 	int count;
3973 	int i;
3974 
3975 	DBG1((vnetp, "vgen_handle_mcast_info: enter\n"));
3976 	switch (tagp->vio_subtype) {
3977 
3978 	case VIO_SUBTYPE_INFO:
3979 
3980 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
3981 		DWARN((vnetp,
3982 		    "vgen_handle_mcast_info: rcvd SET_MCAST_INFO id (%lx)\n",
3983 		    ldcp->ldc_id));
3984 		break;
3985 
3986 	case VIO_SUBTYPE_ACK:
3987 
3988 		/* success adding/removing multicast addr */
3989 		DBG2((vnetp,
3990 		    "vgen_handle_mcast_info: rcvd SET_MCAST_ACK id (%lx)\n",
3991 		    ldcp->ldc_id));
3992 		break;
3993 
3994 	case VIO_SUBTYPE_NACK:
3995 
3996 		DWARN((vnetp,
3997 		    "vgen_handle_mcast_info: rcvd SET_MCAST_NACK id (%lx)\n",
3998 		    ldcp->ldc_id));
3999 		if (!(msgp->set)) {
4000 			/* multicast remove request failed */
4001 			break;
4002 		}
4003 
4004 		/* multicast add request failed */
4005 		for (count = 0; count < msgp->count; count++) {
4006 			addrp = &(msgp->mca[count]);
4007 
4008 			/* delete address from the table */
4009 			for (i = 0; i < vgenp->mccount; i++) {
4010 				if (ether_cmp(addrp,
4011 				    &(vgenp->mctab[i])) == 0) {
4012 					if (vgenp->mccount > 1) {
4013 						vgenp->mctab[i] =
4014 						vgenp->mctab[vgenp->mccount-1];
4015 					}
4016 					vgenp->mccount--;
4017 					break;
4018 				}
4019 			}
4020 		}
4021 		break;
4022 
4023 	}
4024 	DBG1((vnetp, "vgen_handle_mcast_info: exit\n"));
4025 
4026 	return (VGEN_SUCCESS);
4027 }
4028 
4029 /* handler for control messages received from the peer ldc end-point */
4030 static int
4031 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4032 {
4033 	void *vnetp = LDC_TO_VNET(ldcp);
4034 	int rv = 0;
4035 
4036 	DBG1((vnetp, "vgen_handle_ctrlmsg: enter\n"));
4037 	switch (tagp->vio_subtype_env) {
4038 
4039 	case VIO_VER_INFO:
4040 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4041 		break;
4042 
4043 	case VIO_ATTR_INFO:
4044 		rv = vgen_handle_attr_info(ldcp, tagp);
4045 		break;
4046 
4047 	case VIO_DRING_REG:
4048 		rv = vgen_handle_dring_reg(ldcp, tagp);
4049 		break;
4050 
4051 	case VIO_RDX:
4052 		rv = vgen_handle_rdx_info(ldcp, tagp);
4053 		break;
4054 
4055 	case VNET_MCAST_INFO:
4056 		rv = vgen_handle_mcast_info(ldcp, tagp);
4057 		break;
4058 
4059 	}
4060 
4061 	DBG1((vnetp, "vgen_handle_ctrlmsg: exit\n"));
4062 	return (rv);
4063 }
4064 
4065 /* handler for data messages received from the peer ldc end-point */
4066 static int
4067 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
4068 	mblk_t **headp, mblk_t **tailp)
4069 {
4070 	void *vnetp = LDC_TO_VNET(ldcp);
4071 	int rv = 0;
4072 
4073 	DBG1((vnetp, "vgen_handle_datamsg: enter\n"));
4074 
4075 	if (ldcp->hphase != VH_DONE)
4076 		return (rv);
4077 	switch (tagp->vio_subtype_env) {
4078 	case VIO_DRING_DATA:
4079 		rv = vgen_handle_dring_data(ldcp, tagp, headp, tailp);
4080 		break;
4081 	default:
4082 		break;
4083 	}
4084 
4085 	DBG1((vnetp, "vgen_handle_datamsg: exit\n"));
4086 	return (rv);
4087 }
4088 
4089 static int
4090 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4091     int32_t end, uint8_t pstate)
4092 {
4093 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4094 	void *vnetp = LDC_TO_VNET(ldcp);
4095 	int rv = 0;
4096 
4097 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4098 	tagp->vio_sid = ldcp->local_sid;
4099 	msgp->start_idx = start;
4100 	msgp->end_idx = end;
4101 	msgp->dring_process_state = pstate;
4102 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4103 	if (rv != VGEN_SUCCESS) {
4104 		DWARN((vnetp, "vgen_send_dring_ack: id(%lx) vgen_sendmsg "
4105 		    "failed\n", (ldcp)->ldc_id));
4106 	}
4107 	return (rv);
4108 }
4109 
4110 static int
4111 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
4112 	mblk_t **headp, mblk_t **tailp)
4113 {
4114 	vio_dring_msg_t *dringmsg;
4115 	vnet_public_desc_t *rxdp;
4116 	vnet_public_desc_t *txdp;
4117 	vio_dring_entry_hdr_t *hdrp;
4118 	vgen_stats_t *statsp;
4119 	struct ether_header *ehp;
4120 	mblk_t *mp = NULL;
4121 	mblk_t *bp = NULL;
4122 	mblk_t *bpt = NULL;
4123 	size_t nbytes;
4124 	size_t nread;
4125 	uint64_t off = 0;
4126 	uint32_t start;
4127 	int32_t end;
4128 	uint32_t datalen;
4129 	uint32_t ncookies;
4130 	uint32_t ack_start;
4131 	uint32_t ack_end;
4132 	uint32_t rxi;
4133 	uint32_t txi;
4134 	int rv = 0;
4135 	boolean_t rxd_err = B_FALSE;
4136 	boolean_t set_ack_start = B_FALSE;
4137 	vgen_private_desc_t *tbufp;
4138 	uint32_t next_rxi;
4139 	boolean_t ready_txd = B_FALSE;
4140 	uint32_t retries = 0;
4141 #ifdef VGEN_HANDLE_LOST_PKTS
4142 	int n;
4143 #endif
4144 #ifdef VGEN_REXMIT
4145 	uint64_t seqnum;
4146 #endif
4147 	void *vnetp = LDC_TO_VNET(ldcp);
4148 	boolean_t ack_needed = B_FALSE;
4149 
4150 	dringmsg = (vio_dring_msg_t *)tagp;
4151 	start = dringmsg->start_idx;
4152 	end = dringmsg->end_idx;
4153 	statsp = ldcp->statsp;
4154 
4155 	DBG1((vnetp, "vgen_handle_dring_data: enter\n"));
4156 	switch (tagp->vio_subtype) {
4157 
4158 	case VIO_SUBTYPE_INFO:
4159 		/*
4160 		 * received a data msg, which contains the start and end
4161 		 * indeces of the descriptors within the rx ring holding data,
4162 		 * the seq_num of data packet corresponding to the start index,
4163 		 * and the dring_ident.
4164 		 * We can now read the contents of each of these descriptors
4165 		 * and gather data from it.
4166 		 */
4167 		DBG2((vnetp,
4168 		    "vgen_handle_dring_data: INFO: start(%d), end(%d)\n",
4169 		    start, end));
4170 
4171 		/* validate rx start and end indeces */
4172 		if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4173 		    !(CHECK_RXI(end, ldcp)))) {
4174 			/* drop the message if invalid index */
4175 			break;
4176 		}
4177 
4178 		/* validate dring_ident */
4179 		if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4180 			/* invalid dring_ident, drop the msg */
4181 			break;
4182 		}
4183 #ifdef DEBUG
4184 		if (vgen_trigger_rxlost) {
4185 			/* drop this msg to simulate lost pkts for debugging */
4186 			vgen_trigger_rxlost = 0;
4187 			break;
4188 		}
4189 #endif
4190 
4191 #ifdef	VGEN_HANDLE_LOST_PKTS
4192 
4193 		/* receive start index doesn't match expected index */
4194 		if (ldcp->next_rxi != start) {
4195 
4196 			DWARN((vnetp, "vgen_handle_dring_data: id(%lx) "
4197 			    "next_rxi(%d) != start(%d)\n",
4198 			    ldcp->ldc_id, ldcp->next_rxi, start));
4199 
4200 			/* calculate the number of pkts lost */
4201 			if (start >= ldcp->next_rxi) {
4202 				n = start - ldcp->next_rxi;
4203 			} else  {
4204 				n = ldcp->num_rxds - (ldcp->next_rxi - start);
4205 			}
4206 
4207 			/*
4208 			 * sequence number of dring data message
4209 			 * is less than the next sequence number that
4210 			 * is expected:
4211 			 *
4212 			 * drop the message and the corresponding packets.
4213 			 */
4214 			if (ldcp->next_rxseq > dringmsg->seq_num) {
4215 				DWARN((vnetp, "vgen_handle_dring_data: id(%lx) "
4216 				    "dropping pkts, expected rxseq(0x%lx) "
4217 				    "> recvd(0x%lx)\n",
4218 				    ldcp->ldc_id, ldcp->next_rxseq,
4219 				    dringmsg->seq_num));
4220 				/*
4221 				 * duplicate/multiple retransmissions from
4222 				 * sender?? drop this msg.
4223 				 */
4224 				break;
4225 			}
4226 
4227 			/*
4228 			 * sequence number of dring data message
4229 			 * is greater than the next expected sequence number
4230 			 *
4231 			 * send a NACK back to the peer to indicate lost
4232 			 * packets.
4233 			 */
4234 			if (dringmsg->seq_num > ldcp->next_rxseq) {
4235 				statsp->rx_lost_pkts += n;
4236 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4237 				tagp->vio_sid = ldcp->local_sid;
4238 				/* indicate the range of lost descriptors */
4239 				dringmsg->start_idx = ldcp->next_rxi;
4240 				rxi = start;
4241 				DECR_RXI(rxi, ldcp);
4242 				dringmsg->end_idx = rxi;
4243 				/* dring ident is left unchanged */
4244 				rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4245 				    sizeof (*dringmsg), B_FALSE);
4246 				if (rv != VGEN_SUCCESS) {
4247 					DWARN((vnetp,
4248 					    "vgen_handle_dring_data: id(%lx) "
4249 					    "vgen_sendmsg failed, "
4250 					    "stype: NACK\n", ldcp->ldc_id));
4251 					goto error_ret;
4252 				}
4253 #ifdef VGEN_REXMIT
4254 				/*
4255 				 * stop further processing until peer
4256 				 * retransmits with the right index.
4257 				 * update next_rxseq expected.
4258 				 */
4259 				ldcp->next_rxseq += 1;
4260 				break;
4261 #else	/* VGEN_REXMIT */
4262 				/*
4263 				 * treat this range of descrs/pkts as dropped
4264 				 * and set the new expected values for next_rxi
4265 				 * and next_rxseq. continue(below) to process
4266 				 * from the new start index.
4267 				 */
4268 				ldcp->next_rxi = start;
4269 				ldcp->next_rxseq += 1;
4270 #endif	/* VGEN_REXMIT */
4271 
4272 			} else if (dringmsg->seq_num == ldcp->next_rxseq) {
4273 				/*
4274 				 * expected and received seqnums match, but
4275 				 * the descriptor indeces don't?
4276 				 *
4277 				 * restart handshake with peer.
4278 				 */
4279 				DWARN((vnetp,
4280 				    "vgen_handle_dring_data: id(%lx) "
4281 				    "next_rxseq(0x%lx) == seq_num(0x%lx)\n",
4282 				    ldcp->ldc_id, ldcp->next_rxseq,
4283 				    dringmsg->seq_num));
4284 
4285 			}
4286 
4287 		} else {
4288 			/* expected and start dring indeces match */
4289 
4290 			if (dringmsg->seq_num != ldcp->next_rxseq) {
4291 
4292 				/* seqnums don't match */
4293 
4294 				DWARN((vnetp,
4295 				    "vgen_handle_dring_data: id(%lx) "
4296 				    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
4297 				    ldcp->ldc_id, ldcp->next_rxseq,
4298 				    dringmsg->seq_num));
4299 			}
4300 		}
4301 
4302 #endif	/* VGEN_HANDLE_LOST_PKTS */
4303 
4304 		/*
4305 		 * start processing the descriptors from the specified
4306 		 * start index, up to the index a descriptor is not ready
4307 		 * to be processed or we process the entire descriptor ring
4308 		 * and wrap around upto the start index.
4309 		 */
4310 
4311 		/* need to set the start index of descriptors to be ack'd */
4312 		set_ack_start = B_TRUE;
4313 
4314 		/* index upto which we have ack'd */
4315 		ack_end = start;
4316 		DECR_RXI(ack_end, ldcp);
4317 
4318 		next_rxi = rxi =  start;
4319 		do {
4320 
4321 vgen_recv_retry:	rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4322 			if (rv != 0) {
4323 				DWARN((vnetp, "vgen_handle_dring_data: "
4324 				    "ldc_mem_dring_acquire() failed"
4325 				    " id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4326 				statsp->ierrors++;
4327 				goto error_ret;
4328 			}
4329 
4330 			rxdp = &(ldcp->rxdp[rxi]);
4331 			hdrp = &rxdp->hdr;
4332 
4333 			if (hdrp->dstate != VIO_DESC_READY) {
4334 				/*
4335 				 * descriptor is not ready.
4336 				 * retry descriptor acquire, stop processing
4337 				 * after max # retries.
4338 				 */
4339 				if (retries == vgen_recv_retries)
4340 					break;
4341 				retries++;
4342 				drv_usecwait(vgen_recv_delay);
4343 				goto vgen_recv_retry;
4344 			}
4345 			retries = 0;
4346 
4347 			if (set_ack_start) {
4348 				/*
4349 				 * initialize the start index of the range
4350 				 * of descriptors to be ack'd.
4351 				 */
4352 				ack_start = rxi;
4353 				set_ack_start = B_FALSE;
4354 			}
4355 
4356 			datalen = rxdp->nbytes;
4357 			ncookies = rxdp->ncookies;
4358 			if ((datalen < ETHERMIN) ||
4359 			    (ncookies == 0) ||
4360 			    (ncookies > MAX_COOKIES)) {
4361 				rxd_err = B_TRUE;
4362 			} else {
4363 				/*
4364 				 * Try to allocate an mblk from the free pool
4365 				 * of recv mblks for the channel.
4366 				 * If this fails, use allocb().
4367 				 */
4368 				mp = vio_allocb(ldcp->rmp);
4369 				if (!mp) {
4370 					/*
4371 					 * The data buffer returned by
4372 					 * allocb(9F) is 8byte aligned. We
4373 					 * allocate extra 8 bytes to ensure
4374 					 * size is multiple of 8 bytes for
4375 					 * ldc_mem_copy().
4376 					 */
4377 					statsp->rx_vio_allocb_fail++;
4378 					mp = allocb(VNET_IPALIGN + datalen + 8,
4379 					    BPRI_MED);
4380 				}
4381 				nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4382 			}
4383 			if ((rxd_err) || (mp == NULL)) {
4384 				/*
4385 				 * rxd_err or allocb() failure,
4386 				 * drop this packet, get next.
4387 				 */
4388 				if (rxd_err) {
4389 					statsp->ierrors++;
4390 					rxd_err = B_FALSE;
4391 				} else {
4392 					statsp->rx_allocb_fail++;
4393 				}
4394 
4395 				ack_needed = hdrp->ack;
4396 
4397 				/* set descriptor done bit */
4398 				hdrp->dstate = VIO_DESC_DONE;
4399 
4400 				rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4401 				    rxi, rxi);
4402 				if (rv != 0) {
4403 					DWARN((vnetp, "vgen_handle_dring_data: "
4404 					    "ldc_mem_dring_release err id(%lx)"
4405 					    " rv(%d)\n", ldcp->ldc_id, rv));
4406 					goto error_ret;
4407 				}
4408 
4409 				if (ack_needed) {
4410 					ack_needed = B_FALSE;
4411 					/*
4412 					 * sender needs ack for this packet,
4413 					 * ack pkts upto this index.
4414 					 */
4415 					ack_end = rxi;
4416 
4417 					rv = vgen_send_dring_ack(ldcp, tagp,
4418 					    ack_start, ack_end,
4419 					    VIO_DP_ACTIVE);
4420 					if (rv != VGEN_SUCCESS) {
4421 						goto error_ret;
4422 					}
4423 
4424 					/* need to set new ack start index */
4425 					set_ack_start = B_TRUE;
4426 				}
4427 				goto vgen_next_rxi;
4428 			}
4429 
4430 			nread = nbytes;
4431 			rv = ldc_mem_copy(ldcp->ldc_handle,
4432 			    (caddr_t)mp->b_rptr, off, &nread,
4433 			    rxdp->memcookie, ncookies, LDC_COPY_IN);
4434 
4435 			/* if ldc_mem_copy() failed */
4436 			if (rv) {
4437 				DWARN((vnetp,
4438 				    "vgen_handle_dring_data: ldc_mem_copy err "
4439 				    " id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4440 				statsp->ierrors++;
4441 				freemsg(mp);
4442 				goto error_ret;
4443 			}
4444 
4445 			ack_needed = hdrp->ack;
4446 			hdrp->dstate = VIO_DESC_DONE;
4447 
4448 			rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4449 			if (rv != 0) {
4450 				DWARN((vnetp, "vgen_handle_dring_data: "
4451 				    "ldc_mem_dring_release err id(%lx)"
4452 				    " rv(%d)\n", ldcp->ldc_id, rv));
4453 				goto error_ret;
4454 			}
4455 
4456 			mp->b_rptr += VNET_IPALIGN;
4457 
4458 			if (ack_needed) {
4459 				ack_needed = B_FALSE;
4460 				/*
4461 				 * sender needs ack for this packet,
4462 				 * ack pkts upto this index.
4463 				 */
4464 				ack_end = rxi;
4465 
4466 				rv = vgen_send_dring_ack(ldcp, tagp,
4467 				    ack_start, ack_end, VIO_DP_ACTIVE);
4468 				if (rv != VGEN_SUCCESS) {
4469 					goto error_ret;
4470 				}
4471 
4472 				/* need to set new ack start index */
4473 				set_ack_start = B_TRUE;
4474 			}
4475 
4476 			if (nread != nbytes) {
4477 				DWARN((vnetp,
4478 				    "vgen_handle_dring_data: id(%lx) "
4479 				    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4480 				    ldcp->ldc_id, nread, nbytes));
4481 				statsp->ierrors++;
4482 				freemsg(mp);
4483 				goto vgen_next_rxi;
4484 			}
4485 
4486 			/* point to the actual end of data */
4487 			mp->b_wptr = mp->b_rptr + datalen;
4488 
4489 			/* update stats */
4490 			statsp->ipackets++;
4491 			statsp->rbytes += datalen;
4492 			ehp = (struct ether_header *)mp->b_rptr;
4493 			if (IS_BROADCAST(ehp))
4494 				statsp->brdcstrcv++;
4495 			else if (IS_MULTICAST(ehp))
4496 				statsp->multircv++;
4497 
4498 			/* build a chain of received packets */
4499 			if (bp == NULL) {
4500 				/* first pkt */
4501 				bp = mp;
4502 				bpt = bp;
4503 				bpt->b_next = NULL;
4504 			} else {
4505 				mp->b_next = NULL;
4506 				bpt->b_next = mp;
4507 				bpt = mp;
4508 			}
4509 
4510 
4511 vgen_next_rxi:
4512 			/* update end index of range of descrs to be ack'd */
4513 			ack_end = rxi;
4514 
4515 			/* update the next index to be processed */
4516 			INCR_RXI(next_rxi, ldcp);
4517 			if (next_rxi == start) {
4518 				/*
4519 				 * processed the entire descriptor ring upto
4520 				 * the index at which we started.
4521 				 */
4522 				break;
4523 			}
4524 
4525 			rxi = next_rxi;
4526 
4527 		_NOTE(CONSTCOND)
4528 		} while (1);
4529 
4530 		/*
4531 		 * send an ack message to peer indicating that we have stopped
4532 		 * processing descriptors.
4533 		 */
4534 		if (set_ack_start) {
4535 			/*
4536 			 * We have ack'd upto some index and we have not
4537 			 * processed any descriptors beyond that index.
4538 			 * Use the last ack'd index as both the start and
4539 			 * end of range of descrs being ack'd.
4540 			 * Note: This results in acking the last index twice
4541 			 * and should be harmless.
4542 			 */
4543 			ack_start = ack_end;
4544 		}
4545 
4546 		rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4547 		    VIO_DP_STOPPED);
4548 		if (rv != VGEN_SUCCESS) {
4549 			goto error_ret;
4550 		}
4551 
4552 		/* save new recv index and expected seqnum of next dring msg */
4553 		ldcp->next_rxi = next_rxi;
4554 		ldcp->next_rxseq += 1;
4555 
4556 		break;
4557 
4558 	case VIO_SUBTYPE_ACK:
4559 		/*
4560 		 * received an ack corresponding to a specific descriptor for
4561 		 * which we had set the ACK bit in the descriptor (during
4562 		 * transmit). This enables us to reclaim descriptors.
4563 		 */
4564 
4565 		DBG2((vnetp,
4566 		    "vgen_handle_dring_data: ACK:  start(%d), end(%d)\n",
4567 		    start, end));
4568 
4569 		/* validate start and end indeces in the tx ack msg */
4570 		if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4571 			/* drop the message if invalid index */
4572 			break;
4573 		}
4574 		/* validate dring_ident */
4575 		if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4576 			/* invalid dring_ident, drop the msg */
4577 			break;
4578 		}
4579 		statsp->dring_data_acks++;
4580 
4581 		/* reclaim descriptors that are done */
4582 		vgen_reclaim(ldcp);
4583 
4584 		if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
4585 			/*
4586 			 * receiver continued processing descriptors after
4587 			 * sending us the ack.
4588 			 */
4589 			break;
4590 		}
4591 
4592 		statsp->dring_stopped_acks++;
4593 
4594 		/* receiver stopped processing descriptors */
4595 		mutex_enter(&ldcp->txlock);
4596 		mutex_enter(&ldcp->tclock);
4597 
4598 		/*
4599 		 * determine if there are any pending tx descriptors
4600 		 * ready to be processed by the receiver(peer) and if so,
4601 		 * send a message to the peer to restart receiving.
4602 		 */
4603 		ready_txd = B_FALSE;
4604 
4605 		/*
4606 		 * using the end index of the descriptor range for which
4607 		 * we received the ack, check if the next descriptor is
4608 		 * ready.
4609 		 */
4610 		txi = end;
4611 		INCR_TXI(txi, ldcp);
4612 		tbufp = &ldcp->tbufp[txi];
4613 		txdp = tbufp->descp;
4614 		hdrp = &txdp->hdr;
4615 		if (hdrp->dstate == VIO_DESC_READY) {
4616 			ready_txd = B_TRUE;
4617 		} else {
4618 			/*
4619 			 * descr next to the end of ack'd descr range is not
4620 			 * ready.
4621 			 * starting from the current reclaim index, check
4622 			 * if any descriptor is ready.
4623 			 */
4624 
4625 			txi = ldcp->cur_tbufp - ldcp->tbufp;
4626 			tbufp = &ldcp->tbufp[txi];
4627 
4628 			while (tbufp != ldcp->next_tbufp) {
4629 
4630 				txdp = tbufp->descp;
4631 				hdrp = &txdp->hdr;
4632 				if (hdrp->dstate == VIO_DESC_READY) {
4633 					break;
4634 				}
4635 
4636 				INCR_TXI(txi, ldcp);
4637 				tbufp = &ldcp->tbufp[txi];
4638 
4639 			}
4640 
4641 			if (tbufp != ldcp->next_tbufp)
4642 				ready_txd = B_TRUE;
4643 		}
4644 
4645 		if (ready_txd) {
4646 			/*
4647 			 * we have tx descriptor(s) ready to be
4648 			 * processed by the receiver.
4649 			 * send a message to the peer with the start index
4650 			 * of ready descriptors.
4651 			 */
4652 			rv = vgen_send_dring_data(ldcp, txi, -1);
4653 			if (rv != VGEN_SUCCESS) {
4654 				ldcp->resched_peer = B_TRUE;
4655 				mutex_exit(&ldcp->tclock);
4656 				mutex_exit(&ldcp->txlock);
4657 				goto error_ret;
4658 			}
4659 		} else {
4660 			/*
4661 			 * no ready tx descriptors. set the flag to send a
4662 			 * message to peer when tx descriptors are ready in
4663 			 * transmit routine.
4664 			 */
4665 			ldcp->resched_peer = B_TRUE;
4666 		}
4667 
4668 		mutex_exit(&ldcp->tclock);
4669 		mutex_exit(&ldcp->txlock);
4670 
4671 		break;
4672 
4673 	case VIO_SUBTYPE_NACK:
4674 		/*
4675 		 * peer sent a NACK msg to indicate lost packets.
4676 		 * The start and end correspond to the range of descriptors
4677 		 * for which the peer didn't receive a dring data msg and so
4678 		 * didn't receive the corresponding data.
4679 		 */
4680 		DWARN((vnetp,
4681 		    "vgen_handle_dring_data: NACK:  start(%d), end(%d)\n",
4682 		    start, end));
4683 
4684 		/* validate start and end indeces in the tx nack msg */
4685 		if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4686 			/* drop the message if invalid index */
4687 			break;
4688 		}
4689 		/* validate dring_ident */
4690 		if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4691 			/* invalid dring_ident, drop the msg */
4692 			break;
4693 		}
4694 		mutex_enter(&ldcp->txlock);
4695 		mutex_enter(&ldcp->tclock);
4696 
4697 		if (ldcp->next_tbufp == ldcp->cur_tbufp) {
4698 			/* no busy descriptors, bogus nack ? */
4699 			mutex_exit(&ldcp->tclock);
4700 			mutex_exit(&ldcp->txlock);
4701 			break;
4702 		}
4703 
4704 #ifdef VGEN_REXMIT
4705 		/* send a new dring data msg including the lost descrs */
4706 		end = ldcp->next_tbufp - ldcp->tbufp;
4707 		DECR_TXI(end, ldcp);
4708 		rv = vgen_send_dring_data(ldcp, start, end);
4709 		if (rv != 0) {
4710 			/*
4711 			 * vgen_send_dring_data() error: drop all packets
4712 			 * in this descr range
4713 			 */
4714 			DWARN((vnetp,
4715 			    "vgen_handle_dring_data: "
4716 			    "vgen_send_dring_data failed :"
4717 			    "id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4718 			for (txi = start; txi <= end; ) {
4719 				tbufp = &(ldcp->tbufp[txi]);
4720 				txdp = tbufp->descp;
4721 				hdrp = &txdp->hdr;
4722 				tbufp->flags = VGEN_PRIV_DESC_FREE;
4723 				hdrp->dstate = VIO_DESC_FREE;
4724 				hdrp->ack = B_FALSE;
4725 				statsp->oerrors++;
4726 			}
4727 
4728 			/* update next pointer */
4729 			ldcp->next_tbufp = &(ldcp->tbufp[start]);
4730 			ldcp->next_txi = start;
4731 		}
4732 		DBG2((vnetp,
4733 		    "vgen_handle_dring_data: rexmit: start(%d) end(%d)\n",
4734 		    start, end));
4735 #else	/* VGEN_REXMIT */
4736 		/* we just mark the descrs as done so they can be reclaimed */
4737 		for (txi = start; txi <= end; ) {
4738 			txdp = &(ldcp->txdp[txi]);
4739 			hdrp = &txdp->hdr;
4740 			if (hdrp->dstate == VIO_DESC_READY)
4741 				hdrp->dstate = VIO_DESC_DONE;
4742 			INCR_TXI(txi, ldcp);
4743 		}
4744 #endif	/* VGEN_REXMIT */
4745 		mutex_exit(&ldcp->tclock);
4746 		mutex_exit(&ldcp->txlock);
4747 
4748 		break;
4749 	}
4750 
4751 error_ret:
4752 
4753 	DBG1((vnetp, "vgen_handle_dring_data: exit\n"));
4754 	*headp = bp;
4755 	*tailp = bpt;
4756 
4757 	return (rv);
4758 }
4759 
4760 static void
4761 vgen_reclaim(vgen_ldc_t *ldcp)
4762 {
4763 	mutex_enter(&ldcp->tclock);
4764 
4765 	vgen_reclaim_dring(ldcp);
4766 	ldcp->reclaim_lbolt = ddi_get_lbolt();
4767 
4768 	mutex_exit(&ldcp->tclock);
4769 }
4770 
4771 /*
4772  * transmit reclaim function. starting from the current reclaim index
4773  * look for descriptors marked DONE and reclaim the descriptor and the
4774  * corresponding buffers (tbuf).
4775  */
4776 static void
4777 vgen_reclaim_dring(vgen_ldc_t *ldcp)
4778 {
4779 	vnet_public_desc_t *txdp;
4780 	vgen_private_desc_t *tbufp;
4781 	vio_dring_entry_hdr_t	*hdrp;
4782 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4783 
4784 #ifdef DEBUG
4785 	if (vgen_trigger_txtimeout)
4786 		return;
4787 #endif
4788 
4789 	tbufp = ldcp->cur_tbufp;
4790 	txdp = tbufp->descp;
4791 	hdrp = &txdp->hdr;
4792 
4793 	while ((hdrp->dstate == VIO_DESC_DONE) &&
4794 	    (tbufp != ldcp->next_tbufp)) {
4795 		tbufp->flags = VGEN_PRIV_DESC_FREE;
4796 		hdrp->dstate = VIO_DESC_FREE;
4797 		hdrp->ack = B_FALSE;
4798 
4799 		tbufp = NEXTTBUF(ldcp, tbufp);
4800 		txdp = tbufp->descp;
4801 		hdrp = &txdp->hdr;
4802 	}
4803 
4804 	ldcp->cur_tbufp = tbufp;
4805 
4806 	/*
4807 	 * Check if mac layer should be notified to restart transmissions
4808 	 */
4809 	if (ldcp->need_resched) {
4810 		ldcp->need_resched = B_FALSE;
4811 		vnet_tx_update(vgenp->vnetp);
4812 	}
4813 }
4814 
4815 /* return the number of pending transmits for the channel */
4816 static int
4817 vgen_num_txpending(vgen_ldc_t *ldcp)
4818 {
4819 	int n;
4820 
4821 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
4822 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
4823 	} else  {
4824 		/* cur_tbufp > next_tbufp */
4825 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
4826 	}
4827 
4828 	return (n);
4829 }
4830 
4831 /* determine if the transmit descriptor ring is full */
4832 static int
4833 vgen_tx_dring_full(vgen_ldc_t *ldcp)
4834 {
4835 	vgen_private_desc_t	*tbufp;
4836 	vgen_private_desc_t	*ntbufp;
4837 
4838 	tbufp = ldcp->next_tbufp;
4839 	ntbufp = NEXTTBUF(ldcp, tbufp);
4840 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
4841 		return (VGEN_SUCCESS);
4842 	}
4843 	return (VGEN_FAILURE);
4844 }
4845 
4846 /* determine if timeout condition has occured */
4847 static int
4848 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
4849 {
4850 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
4851 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
4852 	    (vnet_ldcwd_txtimeout) &&
4853 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
4854 		return (VGEN_SUCCESS);
4855 	} else {
4856 		return (VGEN_FAILURE);
4857 	}
4858 }
4859 
4860 /* transmit watchdog timeout handler */
4861 static void
4862 vgen_ldc_watchdog(void *arg)
4863 {
4864 	vgen_ldc_t *ldcp;
4865 	vgen_t *vgenp;
4866 	void *vnetp;
4867 	int rv;
4868 
4869 	ldcp = (vgen_ldc_t *)arg;
4870 	vgenp = LDC_TO_VGEN(ldcp);
4871 	vnetp = LDC_TO_VNET(ldcp);
4872 
4873 	rv = vgen_ldc_txtimeout(ldcp);
4874 	if (rv == VGEN_SUCCESS) {
4875 		DWARN((vnetp,
4876 		    "vgen_ldc_watchdog: transmit timeout ldcid(%lx)\n",
4877 		    ldcp->ldc_id));
4878 #ifdef DEBUG
4879 		if (vgen_trigger_txtimeout) {
4880 			/* tx timeout triggered for debugging */
4881 			vgen_trigger_txtimeout = 0;
4882 		}
4883 #endif
4884 		mutex_enter(&ldcp->cblock);
4885 		ldcp->need_ldc_reset = B_TRUE;
4886 		vgen_handshake_retry(ldcp);
4887 		mutex_exit(&ldcp->cblock);
4888 		if (ldcp->need_resched) {
4889 			ldcp->need_resched = B_FALSE;
4890 			vnet_tx_update(vgenp->vnetp);
4891 		}
4892 	}
4893 
4894 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
4895 	    drv_usectohz(vnet_ldcwd_interval * 1000));
4896 }
4897 
4898 static int
4899 vgen_setup_kstats(vgen_ldc_t *ldcp)
4900 {
4901 	vgen_t *vgenp;
4902 	struct kstat *ksp;
4903 	vgen_stats_t *statsp;
4904 	vgen_kstats_t *ldckp;
4905 	int instance;
4906 	size_t size;
4907 	char name[MAXNAMELEN];
4908 
4909 	vgenp = LDC_TO_VGEN(ldcp);
4910 	instance = ddi_get_instance(vgenp->vnetdip);
4911 	(void) sprintf(name, "vnetldc0x%lx", ldcp->ldc_id);
4912 	statsp = kmem_zalloc(sizeof (vgen_stats_t), KM_SLEEP);
4913 	if (statsp == NULL) {
4914 		return (VGEN_FAILURE);
4915 	}
4916 	size = sizeof (vgen_kstats_t) / sizeof (kstat_named_t);
4917 	ksp = kstat_create("vnet", instance, name, "net", KSTAT_TYPE_NAMED,
4918 		size, 0);
4919 	if (ksp == NULL) {
4920 		KMEM_FREE(statsp);
4921 		return (VGEN_FAILURE);
4922 	}
4923 
4924 	ldckp = (vgen_kstats_t *)ksp->ks_data;
4925 	kstat_named_init(&ldckp->ipackets,		"ipackets",
4926 		KSTAT_DATA_ULONG);
4927 	kstat_named_init(&ldckp->ipackets64,		"ipackets64",
4928 		KSTAT_DATA_ULONGLONG);
4929 	kstat_named_init(&ldckp->ierrors,		"ierrors",
4930 		KSTAT_DATA_ULONG);
4931 	kstat_named_init(&ldckp->opackets,		"opackets",
4932 		KSTAT_DATA_ULONG);
4933 	kstat_named_init(&ldckp->opackets64,		"opackets64",
4934 		KSTAT_DATA_ULONGLONG);
4935 	kstat_named_init(&ldckp->oerrors,		"oerrors",
4936 		KSTAT_DATA_ULONG);
4937 
4938 
4939 	/* MIB II kstat variables */
4940 	kstat_named_init(&ldckp->rbytes,		"rbytes",
4941 		KSTAT_DATA_ULONG);
4942 	kstat_named_init(&ldckp->rbytes64,		"rbytes64",
4943 		KSTAT_DATA_ULONGLONG);
4944 	kstat_named_init(&ldckp->obytes,		"obytes",
4945 		KSTAT_DATA_ULONG);
4946 	kstat_named_init(&ldckp->obytes64,		"obytes64",
4947 		KSTAT_DATA_ULONGLONG);
4948 	kstat_named_init(&ldckp->multircv,		"multircv",
4949 		KSTAT_DATA_ULONG);
4950 	kstat_named_init(&ldckp->multixmt,		"multixmt",
4951 		KSTAT_DATA_ULONG);
4952 	kstat_named_init(&ldckp->brdcstrcv,		"brdcstrcv",
4953 		KSTAT_DATA_ULONG);
4954 	kstat_named_init(&ldckp->brdcstxmt,		"brdcstxmt",
4955 		KSTAT_DATA_ULONG);
4956 	kstat_named_init(&ldckp->norcvbuf,		"norcvbuf",
4957 		KSTAT_DATA_ULONG);
4958 	kstat_named_init(&ldckp->noxmtbuf,		"noxmtbuf",
4959 		KSTAT_DATA_ULONG);
4960 
4961 	/* Tx stats */
4962 	kstat_named_init(&ldckp->tx_no_desc,		"tx_no_desc",
4963 		KSTAT_DATA_ULONG);
4964 
4965 	/* Rx stats */
4966 	kstat_named_init(&ldckp->rx_allocb_fail,	"rx_allocb_fail",
4967 		KSTAT_DATA_ULONG);
4968 	kstat_named_init(&ldckp->rx_vio_allocb_fail,	"rx_vio_allocb_fail",
4969 		KSTAT_DATA_ULONG);
4970 	kstat_named_init(&ldckp->rx_lost_pkts,		"rx_lost_pkts",
4971 		KSTAT_DATA_ULONG);
4972 
4973 	/* Interrupt stats */
4974 	kstat_named_init(&ldckp->callbacks,		"callbacks",
4975 		KSTAT_DATA_ULONG);
4976 	kstat_named_init(&ldckp->dring_data_acks,	"dring_data_acks",
4977 		KSTAT_DATA_ULONG);
4978 	kstat_named_init(&ldckp->dring_stopped_acks,	"dring_stopped_acks",
4979 		KSTAT_DATA_ULONG);
4980 	kstat_named_init(&ldckp->dring_data_msgs,	"dring_data_msgs",
4981 		KSTAT_DATA_ULONG);
4982 
4983 	ksp->ks_update = vgen_kstat_update;
4984 	ksp->ks_private = (void *)ldcp;
4985 	kstat_install(ksp);
4986 
4987 	ldcp->ksp = ksp;
4988 	ldcp->statsp = statsp;
4989 	return (VGEN_SUCCESS);
4990 }
4991 
4992 static void
4993 vgen_destroy_kstats(vgen_ldc_t *ldcp)
4994 {
4995 	if (ldcp->ksp)
4996 		kstat_delete(ldcp->ksp);
4997 	KMEM_FREE(ldcp->statsp);
4998 }
4999 
5000 static int
5001 vgen_kstat_update(kstat_t *ksp, int rw)
5002 {
5003 	vgen_ldc_t *ldcp;
5004 	vgen_stats_t *statsp;
5005 	vgen_kstats_t *ldckp;
5006 
5007 	ldcp = (vgen_ldc_t *)ksp->ks_private;
5008 	statsp = ldcp->statsp;
5009 	ldckp = (vgen_kstats_t *)ksp->ks_data;
5010 
5011 	if (rw == KSTAT_READ) {
5012 		ldckp->ipackets.value.ul	= (uint32_t)statsp->ipackets;
5013 		ldckp->ipackets64.value.ull	= statsp->ipackets;
5014 		ldckp->ierrors.value.ul		= statsp->ierrors;
5015 		ldckp->opackets.value.ul	= (uint32_t)statsp->opackets;
5016 		ldckp->opackets64.value.ull	= statsp->opackets;
5017 		ldckp->oerrors.value.ul		= statsp->oerrors;
5018 
5019 		/*
5020 		 * MIB II kstat variables
5021 		 */
5022 		ldckp->rbytes.value.ul		= (uint32_t)statsp->rbytes;
5023 		ldckp->rbytes64.value.ull	= statsp->rbytes;
5024 		ldckp->obytes.value.ul		= (uint32_t)statsp->obytes;
5025 		ldckp->obytes64.value.ull	= statsp->obytes;
5026 		ldckp->multircv.value.ul	= statsp->multircv;
5027 		ldckp->multixmt.value.ul	= statsp->multixmt;
5028 		ldckp->brdcstrcv.value.ul	= statsp->brdcstrcv;
5029 		ldckp->brdcstxmt.value.ul	= statsp->brdcstxmt;
5030 		ldckp->norcvbuf.value.ul	= statsp->norcvbuf;
5031 		ldckp->noxmtbuf.value.ul	= statsp->noxmtbuf;
5032 
5033 		ldckp->tx_no_desc.value.ul	= statsp->tx_no_desc;
5034 
5035 		ldckp->rx_allocb_fail.value.ul	= statsp->rx_allocb_fail;
5036 		ldckp->rx_vio_allocb_fail.value.ul = statsp->rx_vio_allocb_fail;
5037 		ldckp->rx_lost_pkts.value.ul	= statsp->rx_lost_pkts;
5038 
5039 		ldckp->callbacks.value.ul	= statsp->callbacks;
5040 		ldckp->dring_data_acks.value.ul	= statsp->dring_data_acks;
5041 		ldckp->dring_stopped_acks.value.ul = statsp->dring_stopped_acks;
5042 		ldckp->dring_data_msgs.value.ul	= statsp->dring_data_msgs;
5043 	} else {
5044 		statsp->ipackets	= ldckp->ipackets64.value.ull;
5045 		statsp->ierrors		= ldckp->ierrors.value.ul;
5046 		statsp->opackets	= ldckp->opackets64.value.ull;
5047 		statsp->oerrors		= ldckp->oerrors.value.ul;
5048 
5049 		/*
5050 		 * MIB II kstat variables
5051 		 */
5052 		statsp->rbytes		= ldckp->rbytes64.value.ull;
5053 		statsp->obytes		= ldckp->obytes64.value.ull;
5054 		statsp->multircv	= ldckp->multircv.value.ul;
5055 		statsp->multixmt	= ldckp->multixmt.value.ul;
5056 		statsp->brdcstrcv	= ldckp->brdcstrcv.value.ul;
5057 		statsp->brdcstxmt	= ldckp->brdcstxmt.value.ul;
5058 		statsp->norcvbuf	= ldckp->norcvbuf.value.ul;
5059 		statsp->noxmtbuf	= ldckp->noxmtbuf.value.ul;
5060 
5061 		statsp->tx_no_desc	= ldckp->tx_no_desc.value.ul;
5062 
5063 		statsp->rx_allocb_fail	= ldckp->rx_allocb_fail.value.ul;
5064 		statsp->rx_vio_allocb_fail = ldckp->rx_vio_allocb_fail.value.ul;
5065 		statsp->rx_lost_pkts	= ldckp->rx_lost_pkts.value.ul;
5066 
5067 		statsp->callbacks	= ldckp->callbacks.value.ul;
5068 		statsp->dring_data_acks	= ldckp->dring_data_acks.value.ul;
5069 		statsp->dring_stopped_acks = ldckp->dring_stopped_acks.value.ul;
5070 		statsp->dring_data_msgs	= ldckp->dring_data_msgs.value.ul;
5071 	}
5072 
5073 	return (VGEN_SUCCESS);
5074 }
5075 
5076 /* handler for error messages received from the peer ldc end-point */
5077 static void
5078 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5079 {
5080 	_NOTE(ARGUNUSED(ldcp, tagp))
5081 }
5082 
5083 /* Check if the session id in the received message is valid */
5084 static int
5085 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5086 {
5087 	if (tagp->vio_sid != ldcp->peer_sid) {
5088 		void *vnetp = LDC_TO_VNET(ldcp);
5089 		DWARN((vnetp,
5090 		    "sid mismatch: expected(%x), rcvd(%x)\n",
5091 		    ldcp->peer_sid, tagp->vio_sid));
5092 		return (VGEN_FAILURE);
5093 	}
5094 	else
5095 		return (VGEN_SUCCESS);
5096 }
5097 
5098 /* convert mac address from string to uint64_t */
5099 static uint64_t
5100 vgen_macaddr_strtoul(const uint8_t *macaddr)
5101 {
5102 	uint64_t val = 0;
5103 	int i;
5104 
5105 	for (i = 0; i < ETHERADDRL; i++) {
5106 		val <<= 8;
5107 		val |= macaddr[i];
5108 	}
5109 
5110 	return (val);
5111 }
5112 
5113 /* convert mac address from uint64_t to string */
5114 static int
5115 vgen_macaddr_ultostr(uint64_t val, uint8_t *macaddr)
5116 {
5117 	int i;
5118 	uint64_t value;
5119 
5120 	value = val;
5121 	for (i = ETHERADDRL - 1; i >= 0; i--) {
5122 		macaddr[i] = value & 0xFF;
5123 		value >>= 8;
5124 	}
5125 	return (VGEN_SUCCESS);
5126 }
5127 
5128 static caddr_t
5129 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5130 {
5131 	(void) sprintf(ebuf,
5132 		"%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5133 	return (ebuf);
5134 }
5135 
5136 /* Handshake watchdog timeout handler */
5137 static void
5138 vgen_hwatchdog(void *arg)
5139 {
5140 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5141 	void *vnetp = LDC_TO_VNET(ldcp);
5142 
5143 	DWARN((vnetp,
5144 	    "vgen_hwatchdog: handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5145 	    ldcp->ldc_id, ldcp->hphase, ldcp->hstate));
5146 
5147 	mutex_enter(&ldcp->cblock);
5148 	if (ldcp->cancel_htid) {
5149 		ldcp->cancel_htid = 0;
5150 		mutex_exit(&ldcp->cblock);
5151 		return;
5152 	}
5153 	ldcp->htid = 0;
5154 	ldcp->need_ldc_reset = B_TRUE;
5155 	vgen_handshake_retry(ldcp);
5156 	mutex_exit(&ldcp->cblock);
5157 }
5158 
5159 static void
5160 vgen_print_hparams(vgen_hparams_t *hp)
5161 {
5162 	uint8_t	addr[6];
5163 	char	ea[6];
5164 	ldc_mem_cookie_t *dc;
5165 
5166 	cmn_err(CE_CONT, "version_info:\n");
5167 	cmn_err(CE_CONT,
5168 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5169 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5170 
5171 	(void) vgen_macaddr_ultostr(hp->addr, addr);
5172 	cmn_err(CE_CONT, "attr_info:\n");
5173 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5174 	    vgen_print_ethaddr(addr, ea));
5175 	cmn_err(CE_CONT,
5176 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5177 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5178 
5179 	dc = &hp->dring_cookie;
5180 	cmn_err(CE_CONT, "dring_info:\n");
5181 	cmn_err(CE_CONT,
5182 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5183 	cmn_err(CE_CONT,
5184 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5185 	    dc->addr, dc->size);
5186 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5187 }
5188 
5189 static void
5190 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5191 {
5192 	vgen_hparams_t *hp;
5193 
5194 	cmn_err(CE_CONT, "Channel Information:\n");
5195 	cmn_err(CE_CONT,
5196 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5197 	    ldcp->ldc_id, ldcp->ldc_status);
5198 	cmn_err(CE_CONT,
5199 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5200 	    ldcp->local_sid, ldcp->peer_sid);
5201 	cmn_err(CE_CONT,
5202 	    "\thphase: 0x%x, hstate: 0x%x\n",
5203 	    ldcp->hphase, ldcp->hstate);
5204 
5205 	cmn_err(CE_CONT, "Local handshake params:\n");
5206 	hp = &ldcp->local_hparams;
5207 	vgen_print_hparams(hp);
5208 
5209 	cmn_err(CE_CONT, "Peer handshake params:\n");
5210 	hp = &ldcp->peer_hparams;
5211 	vgen_print_hparams(hp);
5212 }
5213