xref: /titanic_51/usr/src/uts/sun4v/io/vnet_gen.c (revision 99ebb4ca412cb0a19d77a3899a87c055b9c30fa8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <sys/vio_mailbox.h>
51 #include <sys/vio_common.h>
52 #include <sys/vnet_common.h>
53 #include <sys/vnet_mailbox.h>
54 #include <sys/vio_util.h>
55 #include <sys/vnet_gen.h>
56 
57 /*
58  * Implementation of the mac functionality for vnet using the
59  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
60  */
61 
62 /*
63  * Function prototypes.
64  */
65 /* vgen proxy entry points */
66 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
67 	mac_register_t **vgenmacp);
68 int vgen_uninit(void *arg);
69 static int vgen_start(void *arg);
70 static void vgen_stop(void *arg);
71 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
72 static int vgen_multicst(void *arg, boolean_t add,
73 	const uint8_t *mca);
74 static int vgen_promisc(void *arg, boolean_t on);
75 static int vgen_unicst(void *arg, const uint8_t *mca);
76 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
77 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
78 
79 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
80 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
81 void vnet_del_fdb(void *arg, uint8_t *macaddr);
82 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
83 	void *txarg, boolean_t upgrade);
84 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
85 void vnet_del_def_rte(void *arg);
86 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
87 void vnet_tx_update(void *arg);
88 
89 /* vgen internal functions */
90 static void vgen_detach_ports(vgen_t *vgenp);
91 static void vgen_port_detach(vgen_port_t *portp);
92 static void vgen_port_list_insert(vgen_port_t *portp);
93 static void vgen_port_list_remove(vgen_port_t *portp);
94 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
95 	int port_num);
96 static int vgen_mdeg_reg(vgen_t *vgenp);
97 static void vgen_mdeg_unreg(vgen_t *vgenp);
98 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
99 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
100 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
101 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
102 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
103 static void vgen_port_detach_mdeg(vgen_port_t *portp);
104 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
105 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
106 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
107 
108 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
109 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
110 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
111 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
112 static void vgen_init_ports(vgen_t *vgenp);
113 static void vgen_port_init(vgen_port_t *portp);
114 static void vgen_uninit_ports(vgen_t *vgenp);
115 static void vgen_port_uninit(vgen_port_t *portp);
116 static void vgen_init_ldcs(vgen_port_t *portp);
117 static void vgen_uninit_ldcs(vgen_port_t *portp);
118 static int vgen_ldc_init(vgen_ldc_t *ldcp);
119 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
120 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
121 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
122 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
123 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
124 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
125 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
126 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
127 static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
128 static void vgen_reclaim(vgen_ldc_t *ldcp);
129 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
130 static int vgen_num_txpending(vgen_ldc_t *ldcp);
131 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
132 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
133 static void vgen_ldc_watchdog(void *arg);
134 static int vgen_setup_kstats(vgen_ldc_t *ldcp);
135 static void vgen_destroy_kstats(vgen_ldc_t *ldcp);
136 static int vgen_kstat_update(kstat_t *ksp, int rw);
137 
138 /* vgen handshake functions */
139 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
140 static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
141 	uint16_t ver_minor);
142 static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
143 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
144 	boolean_t caller_holds_lock);
145 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
146 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
147 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
148 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
149 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
150 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
151 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
152 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
153 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
154 static void vgen_handshake(vgen_ldc_t *ldcp);
155 static int vgen_handshake_done(vgen_ldc_t *ldcp);
156 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
157 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
158 	vio_msg_tag_t *tagp);
159 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
160 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
161 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
162 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
163 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
165 	mblk_t **headp, mblk_t **tailp);
166 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
167 	uint32_t start, int32_t end, uint8_t pstate);
168 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
169 	mblk_t **headp, mblk_t **tailp);
170 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
172 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
173 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static uint64_t	vgen_macaddr_strtoul(const uint8_t *macaddr);
175 static int vgen_macaddr_ultostr(uint64_t value, uint8_t *macaddr);
176 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
177 static void vgen_hwatchdog(void *arg);
178 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
179 static void vgen_print_hparams(vgen_hparams_t *hp);
180 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
181 
182 /*
183  * The handshake process consists of 5 phases defined below, with VH_PHASE0
184  * being the pre-handshake phase and VH_DONE is the phase to indicate
185  * successful completion of all phases.
186  * Each phase may have one to several handshake states which are required
187  * to complete successfully to move to the next phase.
188  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
189  * more details.
190  */
191 /* handshake phases */
192 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
193 
194 /* handshake states */
195 enum {
196 
197 	VER_INFO_SENT	=	0x1,
198 	VER_ACK_RCVD	=	0x2,
199 	VER_INFO_RCVD	=	0x4,
200 	VER_ACK_SENT	=	0x8,
201 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
202 
203 	ATTR_INFO_SENT	=	0x10,
204 	ATTR_ACK_RCVD	=	0x20,
205 	ATTR_INFO_RCVD	=	0x40,
206 	ATTR_ACK_SENT	=	0x80,
207 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
208 
209 	DRING_INFO_SENT	=	0x100,
210 	DRING_ACK_RCVD	=	0x200,
211 	DRING_INFO_RCVD	=	0x400,
212 	DRING_ACK_SENT	=	0x800,
213 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
214 
215 	RDX_INFO_SENT	=	0x1000,
216 	RDX_ACK_RCVD	=	0x2000,
217 	RDX_INFO_RCVD	=	0x4000,
218 	RDX_ACK_SENT	=	0x8000,
219 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
220 
221 };
222 
223 #define	LDC_LOCK(ldcp)	\
224 				mutex_enter(&((ldcp)->cblock));\
225 				mutex_enter(&((ldcp)->txlock));\
226 				mutex_enter(&((ldcp)->tclock));
227 #define	LDC_UNLOCK(ldcp)	\
228 				mutex_exit(&((ldcp)->tclock));\
229 				mutex_exit(&((ldcp)->txlock));\
230 				mutex_exit(&((ldcp)->cblock));
231 
232 static struct ether_addr etherbroadcastaddr = {
233 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
234 };
235 /*
236  * MIB II broadcast/multicast packets
237  */
238 #define	IS_BROADCAST(ehp) \
239 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
240 #define	IS_MULTICAST(ehp) \
241 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
242 
243 /*
244  * Property names
245  */
246 static char macaddr_propname[] = "mac-address";
247 static char rmacaddr_propname[] = "remote-mac-address";
248 static char channel_propname[] = "channel-endpoint";
249 static char reg_propname[] = "reg";
250 static char port_propname[] = "port";
251 static char swport_propname[] = "switch-port";
252 static char id_propname[] = "id";
253 
254 /* versions supported - in decreasing order */
255 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
256 
257 /* Tunables */
258 uint32_t vgen_hwd_interval = 1000;	/* handshake watchdog freq in msec */
259 uint32_t vgen_max_hretries = 1;		/* max # of handshake retries */
260 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
261 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
262 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
263 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
264 
265 #ifdef DEBUG
266 /* flags to simulate error conditions for debugging */
267 int vgen_trigger_txtimeout = 0;
268 int vgen_trigger_rxlost = 0;
269 #endif
270 
271 /* MD update matching structure */
272 static md_prop_match_t	vport_prop_match[] = {
273 	{ MDET_PROP_VAL,	"id" },
274 	{ MDET_LIST_END,	NULL }
275 };
276 
277 static mdeg_node_match_t vport_match = { "virtual-device-port",
278 					vport_prop_match };
279 
280 /* template for matching a particular vnet instance */
281 static mdeg_prop_spec_t vgen_prop_template[] = {
282 	{ MDET_PROP_STR,	"name",		"network" },
283 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
284 	{ MDET_LIST_END,	NULL,		NULL }
285 };
286 
287 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
288 
289 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
290 
291 static mac_callbacks_t vgen_m_callbacks = {
292 	0,
293 	vgen_stat,
294 	vgen_start,
295 	vgen_stop,
296 	vgen_promisc,
297 	vgen_multicst,
298 	vgen_unicst,
299 	vgen_tx,
300 	NULL,
301 	NULL,
302 	NULL
303 };
304 
305 /* externs */
306 extern uint32_t vnet_ntxds;
307 extern uint32_t vnet_ldcwd_interval;
308 extern uint32_t vnet_ldcwd_txtimeout;
309 extern uint32_t vnet_ldc_mtu;
310 extern uint32_t vnet_nrbufs;
311 extern int _vnet_dbglevel;
312 extern void _vnetdebug_printf(void *vnetp, const char *fmt, ...);
313 
314 #ifdef DEBUG
315 
316 /*
317  * NOTE: definitions below need to be in sync with those in vnet.c
318  */
319 
320 /*
321  * debug levels:
322  * DBG_LEVEL1:	Function entry/exit tracing
323  * DBG_LEVEL2:	Info messages
324  * DBG_LEVEL3:	Warning messages
325  * DBG_LEVEL4:	Error messages
326  */
327 
328 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
329 	    DBG_LEVEL4 = 0x08 };
330 
331 #define	DBG1(_s)	do {						\
332 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
333 					_vnetdebug_printf _s;		\
334 			    }					\
335 			_NOTE(CONSTCOND) } while (0)
336 
337 #define	DBG2(_s)	do {						\
338 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
339 					_vnetdebug_printf _s;		\
340 			    }					\
341 			_NOTE(CONSTCOND) } while (0)
342 
343 #define	DWARN(_s)	do {						\
344 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
345 					_vnetdebug_printf _s;		\
346 			    }					\
347 			_NOTE(CONSTCOND) } while (0)
348 
349 #define	DERR(_s)	do {						\
350 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
351 					_vnetdebug_printf _s;		\
352 			    }					\
353 			_NOTE(CONSTCOND) } while (0)
354 
355 #else
356 
357 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
358 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
359 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
360 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
361 
362 #endif
363 
364 #ifdef DEBUG
365 
366 /* simulate handshake error conditions for debug */
367 uint32_t vgen_hdbg;
368 #define	HDBG_VERSION	0x1
369 #define	HDBG_TIMEOUT	0x2
370 #define	HDBG_BAD_SID	0x4
371 #define	HDBG_OUT_STATE	0x8
372 
373 #endif
374 
375 
376 
377 /*
378  * vgen_init() is called by an instance of vnet driver to initialize the
379  * corresponding generic proxy transport layer. The arguments passed by vnet
380  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
381  * the mac address of the vnet device, and a pointer to mac_register_t of
382  * the generic transport is returned in the last argument.
383  */
384 int
385 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
386     mac_register_t **vgenmacp)
387 {
388 	vgen_t *vgenp;
389 	mac_register_t *macp;
390 	int instance;
391 
392 	if ((vnetp == NULL) || (vnetdip == NULL))
393 		return (DDI_FAILURE);
394 
395 	instance = ddi_get_instance(vnetdip);
396 
397 	DBG1((vnetp, "vgen_init: enter vnet_instance(%d)\n", instance));
398 
399 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
400 
401 	vgenp->vnetp = vnetp;
402 	vgenp->vnetdip = vnetdip;
403 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
404 
405 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
406 		KMEM_FREE(vgenp);
407 		return (DDI_FAILURE);
408 	}
409 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
410 	macp->m_driver = vgenp;
411 	macp->m_dip = vnetdip;
412 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
413 	macp->m_callbacks = &vgen_m_callbacks;
414 	macp->m_min_sdu = 0;
415 	macp->m_max_sdu = ETHERMTU;
416 	vgenp->macp = macp;
417 
418 	/* allocate multicast table */
419 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
420 	    sizeof (struct ether_addr), KM_SLEEP);
421 	vgenp->mccount = 0;
422 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
423 
424 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
425 
426 	/* register with MD event generator */
427 	if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
428 		mutex_destroy(&vgenp->lock);
429 		kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
430 		    sizeof (struct ether_addr));
431 		mac_free(vgenp->macp);
432 		KMEM_FREE(vgenp);
433 		return (DDI_FAILURE);
434 	}
435 
436 	/* register macp of this vgen_t with vnet */
437 	*vgenmacp = vgenp->macp;
438 
439 	DBG1((vnetp, "vgen_init: exit vnet_instance(%d)\n", instance));
440 	return (DDI_SUCCESS);
441 }
442 
443 /*
444  * Called by vnet to undo the initializations done by vgen_init().
445  * The handle provided by generic transport during vgen_init() is the argument.
446  */
447 int
448 vgen_uninit(void *arg)
449 {
450 	vgen_t	*vgenp = (vgen_t *)arg;
451 	void	*vnetp;
452 	int instance;
453 	vio_mblk_pool_t *rp, *nrp;
454 
455 	if (vgenp == NULL) {
456 		return (DDI_FAILURE);
457 	}
458 
459 	instance = ddi_get_instance(vgenp->vnetdip);
460 	vnetp = vgenp->vnetp;
461 
462 	DBG1((vnetp, "vgen_uninit: enter vnet_instance(%d)\n", instance));
463 
464 	/* unregister with MD event generator */
465 	vgen_mdeg_unreg(vgenp);
466 
467 	mutex_enter(&vgenp->lock);
468 
469 	/* detach all ports from the device */
470 	vgen_detach_ports(vgenp);
471 
472 	/*
473 	 * free any pending rx mblk pools,
474 	 * that couldn't be freed previously during channel detach.
475 	 */
476 	rp = vgenp->rmp;
477 	while (rp != NULL) {
478 		nrp = vgenp->rmp = rp->nextp;
479 		if (vio_destroy_mblks(rp)) {
480 			vgenp->rmp = rp;
481 			mutex_exit(&vgenp->lock);
482 			return (DDI_FAILURE);
483 		}
484 		rp = nrp;
485 	}
486 
487 	/* free multicast table */
488 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
489 
490 	mac_free(vgenp->macp);
491 
492 	mutex_exit(&vgenp->lock);
493 
494 	mutex_destroy(&vgenp->lock);
495 
496 	KMEM_FREE(vgenp);
497 
498 	DBG1((vnetp, "vgen_uninit: exit vnet_instance(%d)\n", instance));
499 
500 	return (DDI_SUCCESS);
501 }
502 
503 /* enable transmit/receive for the device */
504 int
505 vgen_start(void *arg)
506 {
507 	vgen_t		*vgenp = (vgen_t *)arg;
508 
509 	DBG1((vgenp->vnetp, "vgen_start: enter\n"));
510 
511 	mutex_enter(&vgenp->lock);
512 	vgen_init_ports(vgenp);
513 	vgenp->flags |= VGEN_STARTED;
514 	mutex_exit(&vgenp->lock);
515 
516 	DBG1((vgenp->vnetp, "vgen_start: exit\n"));
517 	return (DDI_SUCCESS);
518 }
519 
520 /* stop transmit/receive */
521 void
522 vgen_stop(void *arg)
523 {
524 	vgen_t		*vgenp = (vgen_t *)arg;
525 
526 	DBG1((vgenp->vnetp, "vgen_stop: enter\n"));
527 
528 	mutex_enter(&vgenp->lock);
529 	vgen_uninit_ports(vgenp);
530 	vgenp->flags &= ~(VGEN_STARTED);
531 	mutex_exit(&vgenp->lock);
532 
533 	DBG1((vgenp->vnetp, "vgen_stop: exit\n"));
534 }
535 
536 /* vgen transmit function */
537 static mblk_t *
538 vgen_tx(void *arg, mblk_t *mp)
539 {
540 	vgen_port_t *portp;
541 	int status;
542 
543 	portp = (vgen_port_t *)arg;
544 	status = vgen_portsend(portp, mp);
545 	if (status != VGEN_SUCCESS) {
546 		/* failure */
547 		return (mp);
548 	}
549 	/* success */
550 	return (NULL);
551 }
552 
553 /* transmit packets over the given port */
554 static int
555 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
556 {
557 	vgen_ldclist_t	*ldclp;
558 	vgen_ldc_t *ldcp;
559 	int status;
560 
561 	ldclp = &portp->ldclist;
562 	READ_ENTER(&ldclp->rwlock);
563 	/*
564 	 * NOTE: for now, we will assume we have a single channel.
565 	 */
566 	if (ldclp->headp == NULL) {
567 		RW_EXIT(&ldclp->rwlock);
568 		return (VGEN_FAILURE);
569 	}
570 	ldcp = ldclp->headp;
571 
572 	if (ldcp->need_resched) {
573 		/* out of tx resources, see vgen_ldcsend() for details. */
574 		mutex_enter(&ldcp->txlock);
575 		ldcp->statsp->tx_no_desc++;
576 		mutex_exit(&ldcp->txlock);
577 
578 		RW_EXIT(&ldclp->rwlock);
579 		return (VGEN_FAILURE);
580 	}
581 
582 	status  = vgen_ldcsend(ldcp, mp);
583 	RW_EXIT(&ldclp->rwlock);
584 
585 	if (status != VGEN_TX_SUCCESS)
586 		return (VGEN_FAILURE);
587 
588 	return (VGEN_SUCCESS);
589 }
590 
591 /* channel transmit function */
592 static int
593 vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
594 {
595 	void		*vnetp;
596 	size_t		size;
597 	int		rv = 0;
598 	uint64_t	tbuf_ix;
599 	vgen_private_desc_t	*tbufp;
600 	vgen_private_desc_t	*ntbufp;
601 	vnet_public_desc_t	*txdp;
602 	vio_dring_entry_hdr_t		*hdrp;
603 	vgen_stats_t		*statsp;
604 	struct ether_header	*ehp;
605 	boolean_t	is_bcast = B_FALSE;
606 	boolean_t	is_mcast = B_FALSE;
607 	size_t		mblksz;
608 	caddr_t		dst;
609 	mblk_t		*bp;
610 	ldc_status_t	istatus;
611 
612 	vnetp = LDC_TO_VNET(ldcp);
613 	statsp = ldcp->statsp;
614 	size = msgsize(mp);
615 
616 	DBG1((vnetp, "vgen_ldcsend: enter ldcid(%lx)\n", ldcp->ldc_id));
617 
618 	mutex_enter(&ldcp->txlock);
619 
620 	/* drop the packet if ldc is not up or handshake is not done */
621 	if (ldcp->ldc_status != LDC_UP) {
622 		DWARN((vnetp,
623 		    "vgen_ldcsend: id(%lx) status(%d), dropping packet\n",
624 		    ldcp->ldc_id, ldcp->ldc_status));
625 		/* retry ldc_up() if needed */
626 		if (ldcp->flags & CHANNEL_STARTED)
627 			(void) ldc_up(ldcp->ldc_handle);
628 		goto vgen_tx_exit;
629 	}
630 
631 	if (ldcp->hphase != VH_DONE) {
632 		DWARN((vnetp,
633 		    "vgen_ldcsend: id(%lx) hphase(%x), dropping packet\n",
634 		    ldcp->ldc_id, ldcp->hphase));
635 		goto vgen_tx_exit;
636 	}
637 
638 	if (size > (size_t)ETHERMAX) {
639 		DWARN((vnetp, "vgen_ldcsend: id(%lx) invalid size(%d)\n",
640 		    ldcp->ldc_id, size));
641 		goto vgen_tx_exit;
642 	}
643 
644 	/*
645 	 * allocate a descriptor
646 	 */
647 	tbufp = ldcp->next_tbufp;
648 	ntbufp = NEXTTBUF(ldcp, tbufp);
649 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
650 
651 		mutex_enter(&ldcp->tclock);
652 		if (ntbufp == ldcp->cur_tbufp) {
653 			ldcp->need_resched = B_TRUE;
654 			mutex_exit(&ldcp->tclock);
655 
656 			statsp->tx_no_desc++;
657 			mutex_exit(&ldcp->txlock);
658 
659 			return (VGEN_TX_NORESOURCES);
660 		}
661 		mutex_exit(&ldcp->tclock);
662 	}
663 
664 	if (size < ETHERMIN)
665 		size = ETHERMIN;
666 
667 	/* copy data into pre-allocated transmit buffer */
668 	dst = tbufp->datap + VNET_IPALIGN;
669 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
670 		mblksz = MBLKL(bp);
671 		bcopy(bp->b_rptr, dst, mblksz);
672 		dst += mblksz;
673 	}
674 
675 	tbuf_ix = tbufp - ldcp->tbufp;
676 
677 	ehp = (struct ether_header *)tbufp->datap;
678 	is_bcast = IS_BROADCAST(ehp);
679 	is_mcast = IS_MULTICAST(ehp);
680 
681 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
682 	tbufp->datalen = size;
683 
684 	/* initialize the corresponding public descriptor (txd) */
685 	txdp = tbufp->descp;
686 	hdrp = &txdp->hdr;
687 	txdp->nbytes = size;
688 	txdp->ncookies = tbufp->ncookies;
689 	bcopy((tbufp->memcookie), (txdp->memcookie),
690 		tbufp->ncookies * sizeof (ldc_mem_cookie_t));
691 	hdrp->dstate = VIO_DESC_READY;
692 
693 	/* send dring datamsg to the peer */
694 	if (ldcp->resched_peer) {
695 		rv = vgen_send_dring_data(ldcp, (uint32_t)tbuf_ix, -1);
696 		if (rv != 0) {
697 			/* vgen_send_dring_data() error: drop the packet */
698 			DWARN((vnetp,
699 			    "vgen_ldcsend: vgen_send_dring_data():  failed: "
700 			    "id(%lx) rv(%d) len (%d)\n",
701 			    ldcp->ldc_id, rv, size));
702 			tbufp->flags = VGEN_PRIV_DESC_FREE;	/* free tbuf */
703 			hdrp->dstate = VIO_DESC_FREE;	/* free txd */
704 			hdrp->ack = B_FALSE;
705 			statsp->oerrors++;
706 			goto vgen_tx_exit;
707 		}
708 		ldcp->resched_peer = B_FALSE;
709 	}
710 
711 	/* update next available tbuf in the ring */
712 	ldcp->next_tbufp = ntbufp;
713 
714 	/* update tx index */
715 	INCR_TXI(ldcp->next_txi, ldcp);
716 
717 	/* update stats */
718 	statsp->opackets++;
719 	statsp->obytes += size;
720 	if (is_bcast)
721 		statsp->brdcstxmt++;
722 	else if (is_mcast)
723 		statsp->multixmt++;
724 
725 vgen_tx_exit:
726 	mutex_exit(&ldcp->txlock);
727 
728 	if (rv == ECONNRESET) {
729 		/*
730 		 * Check if either callback thread or another tx thread is
731 		 * already running. Calling mutex_enter() will result in a
732 		 * deadlock if the other thread already holds cblock and is
733 		 * blocked in vnet_modify_fdb() (which is called from
734 		 * vgen_handle_evt_reset()) waiting for write access on rwlock,
735 		 * as this transmit thread already holds that lock as a reader
736 		 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
737 		 */
738 		if (mutex_tryenter(&ldcp->cblock)) {
739 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
740 				DWARN((vnetp,
741 				    "vgen_ldcsend: ldc_status err id(%lx)\n"));
742 			} else {
743 				ldcp->ldc_status = istatus;
744 			}
745 			if (ldcp->ldc_status != LDC_UP) {
746 				/*
747 				 * Second arg is TRUE, as we know that
748 				 * the caller of this function - vnet_m_tx(),
749 				 * already holds fdb-rwlock as a reader.
750 				 */
751 				vgen_handle_evt_reset(ldcp, B_TRUE);
752 			}
753 			mutex_exit(&ldcp->cblock);
754 		}
755 	}
756 
757 	DBG1((vnetp, "vgen_ldcsend: exit: ldcid (%lx)\n", ldcp->ldc_id));
758 
759 	freemsg(mp);
760 	return (VGEN_TX_SUCCESS);
761 }
762 
763 /* enable/disable a multicast address */
764 int
765 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
766 {
767 	vgen_t			*vgenp;
768 	vnet_mcast_msg_t	mcastmsg;
769 	vio_msg_tag_t		*tagp;
770 	vgen_port_t		*portp;
771 	vgen_portlist_t		*plistp;
772 	vgen_ldc_t		*ldcp;
773 	vgen_ldclist_t		*ldclp;
774 	void			*vnetp;
775 	struct ether_addr	*addrp;
776 	int			rv;
777 	uint32_t		i;
778 
779 	vgenp = (vgen_t *)arg;
780 	vnetp = vgenp->vnetp;
781 	addrp = (struct ether_addr *)mca;
782 	tagp = &mcastmsg.tag;
783 	bzero(&mcastmsg, sizeof (mcastmsg));
784 
785 	mutex_enter(&vgenp->lock);
786 
787 	plistp = &(vgenp->vgenports);
788 
789 	READ_ENTER(&plistp->rwlock);
790 
791 	portp = vgenp->vsw_portp;
792 	if (portp == NULL) {
793 		RW_EXIT(&plistp->rwlock);
794 		goto vgen_mcast_exit;
795 	}
796 	ldclp = &portp->ldclist;
797 
798 	READ_ENTER(&ldclp->rwlock);
799 
800 	ldcp = ldclp->headp;
801 	if (ldcp == NULL) {
802 		RW_EXIT(&ldclp->rwlock);
803 		RW_EXIT(&plistp->rwlock);
804 		goto vgen_mcast_exit;
805 	}
806 
807 	mutex_enter(&ldcp->cblock);
808 
809 	if (ldcp->hphase == VH_DONE) {
810 		/*
811 		 * If handshake is done, send a msg to vsw to add/remove
812 		 * the multicast address.
813 		 */
814 		tagp->vio_msgtype = VIO_TYPE_CTRL;
815 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
816 		tagp->vio_subtype_env = VNET_MCAST_INFO;
817 		tagp->vio_sid = ldcp->local_sid;
818 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
819 		mcastmsg.set = add;
820 		mcastmsg.count = 1;
821 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
822 		    B_FALSE);
823 		if (rv != VGEN_SUCCESS) {
824 			DWARN((vnetp, "vgen_mutlicst: vgen_sendmsg failed"
825 			    "id (%lx)\n", ldcp->ldc_id));
826 		}
827 	} else {
828 		/* set the flag to send a msg to vsw after handshake is done */
829 		ldcp->need_mcast_sync = B_TRUE;
830 	}
831 
832 	mutex_exit(&ldcp->cblock);
833 
834 	if (add) {
835 
836 		/* expand multicast table if necessary */
837 		if (vgenp->mccount >= vgenp->mcsize) {
838 			struct ether_addr	*newtab;
839 			uint32_t		newsize;
840 
841 
842 			newsize = vgenp->mcsize * 2;
843 
844 			newtab = kmem_zalloc(newsize *
845 			    sizeof (struct ether_addr), KM_NOSLEEP);
846 
847 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
848 			    sizeof (struct ether_addr));
849 			kmem_free(vgenp->mctab,
850 			    vgenp->mcsize * sizeof (struct ether_addr));
851 
852 			vgenp->mctab = newtab;
853 			vgenp->mcsize = newsize;
854 		}
855 
856 		/* add address to the table */
857 		vgenp->mctab[vgenp->mccount++] = *addrp;
858 
859 	} else {
860 
861 		/* delete address from the table */
862 		for (i = 0; i < vgenp->mccount; i++) {
863 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
864 
865 				/*
866 				 * If there's more than one address in this
867 				 * table, delete the unwanted one by moving
868 				 * the last one in the list over top of it;
869 				 * otherwise, just remove it.
870 				 */
871 				if (vgenp->mccount > 1) {
872 					vgenp->mctab[i] =
873 						vgenp->mctab[vgenp->mccount-1];
874 				}
875 				vgenp->mccount--;
876 				break;
877 			}
878 		}
879 	}
880 
881 	RW_EXIT(&ldclp->rwlock);
882 	RW_EXIT(&plistp->rwlock);
883 
884 vgen_mcast_exit:
885 	mutex_exit(&vgenp->lock);
886 	return (DDI_SUCCESS);
887 }
888 
889 /* set or clear promiscuous mode on the device */
890 static int
891 vgen_promisc(void *arg, boolean_t on)
892 {
893 	_NOTE(ARGUNUSED(arg, on))
894 	return (DDI_SUCCESS);
895 }
896 
897 /* set the unicast mac address of the device */
898 static int
899 vgen_unicst(void *arg, const uint8_t *mca)
900 {
901 	_NOTE(ARGUNUSED(arg, mca))
902 	return (DDI_SUCCESS);
903 }
904 
905 /* get device statistics */
906 int
907 vgen_stat(void *arg, uint_t stat, uint64_t *val)
908 {
909 	vgen_t		*vgenp = (vgen_t *)arg;
910 	vgen_port_t	*portp;
911 	vgen_portlist_t	*plistp;
912 
913 	*val = 0;
914 
915 	plistp = &(vgenp->vgenports);
916 	READ_ENTER(&plistp->rwlock);
917 
918 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
919 		*val += vgen_port_stat(portp, stat);
920 	}
921 
922 	RW_EXIT(&plistp->rwlock);
923 
924 	return (0);
925 }
926 
927 static void
928 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
929 {
930 	 _NOTE(ARGUNUSED(arg, wq, mp))
931 }
932 
933 /* vgen internal functions */
934 /* detach all ports from the device */
935 static void
936 vgen_detach_ports(vgen_t *vgenp)
937 {
938 	vgen_port_t	*portp;
939 	vgen_portlist_t	*plistp;
940 
941 	plistp = &(vgenp->vgenports);
942 	WRITE_ENTER(&plistp->rwlock);
943 
944 	while ((portp = plistp->headp) != NULL) {
945 		vgen_port_detach(portp);
946 	}
947 
948 	RW_EXIT(&plistp->rwlock);
949 }
950 
951 /*
952  * detach the given port.
953  */
954 static void
955 vgen_port_detach(vgen_port_t *portp)
956 {
957 	vgen_t		*vgenp;
958 	vgen_ldclist_t	*ldclp;
959 	int		port_num;
960 
961 	vgenp = portp->vgenp;
962 	port_num = portp->port_num;
963 
964 	DBG1((vgenp->vnetp,
965 	    "vgen_port_detach: enter: port_num(%d)\n", port_num));
966 
967 	/* remove it from port list */
968 	vgen_port_list_remove(portp);
969 
970 	/* detach channels from this port */
971 	ldclp = &portp->ldclist;
972 	WRITE_ENTER(&ldclp->rwlock);
973 	while (ldclp->headp) {
974 		vgen_ldc_detach(ldclp->headp);
975 	}
976 	RW_EXIT(&ldclp->rwlock);
977 
978 	if (vgenp->vsw_portp == portp) {
979 		vgenp->vsw_portp = NULL;
980 	}
981 	KMEM_FREE(portp);
982 
983 	DBG1((vgenp->vnetp,
984 	    "vgen_port_detach: exit: port_num(%d)\n", port_num));
985 }
986 
987 /* add a port to port list */
988 static void
989 vgen_port_list_insert(vgen_port_t *portp)
990 {
991 	vgen_portlist_t *plistp;
992 	vgen_t *vgenp;
993 
994 	vgenp = portp->vgenp;
995 	plistp = &(vgenp->vgenports);
996 
997 	if (plistp->headp == NULL) {
998 		plistp->headp = portp;
999 	} else {
1000 		plistp->tailp->nextp = portp;
1001 	}
1002 	plistp->tailp = portp;
1003 	portp->nextp = NULL;
1004 }
1005 
1006 /* remove a port from port list */
1007 static void
1008 vgen_port_list_remove(vgen_port_t *portp)
1009 {
1010 	vgen_port_t *prevp;
1011 	vgen_port_t *nextp;
1012 	vgen_portlist_t *plistp;
1013 	vgen_t *vgenp;
1014 
1015 	vgenp = portp->vgenp;
1016 
1017 	plistp = &(vgenp->vgenports);
1018 
1019 	if (plistp->headp == NULL)
1020 		return;
1021 
1022 	if (portp == plistp->headp) {
1023 		plistp->headp = portp->nextp;
1024 		if (portp == plistp->tailp)
1025 			plistp->tailp = plistp->headp;
1026 	} else {
1027 		for (prevp = plistp->headp; ((nextp = prevp->nextp) != NULL) &&
1028 		    (nextp != portp); prevp = nextp);
1029 		if (nextp == portp) {
1030 			prevp->nextp = portp->nextp;
1031 		}
1032 		if (portp == plistp->tailp)
1033 			plistp->tailp = prevp;
1034 	}
1035 }
1036 
1037 /* lookup a port in the list based on port_num */
1038 static vgen_port_t *
1039 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1040 {
1041 	vgen_port_t *portp = NULL;
1042 
1043 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1044 		if (portp->port_num == port_num) {
1045 			break;
1046 		}
1047 	}
1048 
1049 	return (portp);
1050 }
1051 
1052 /* enable ports for transmit/receive */
1053 static void
1054 vgen_init_ports(vgen_t *vgenp)
1055 {
1056 	vgen_port_t	*portp;
1057 	vgen_portlist_t	*plistp;
1058 
1059 	plistp = &(vgenp->vgenports);
1060 	READ_ENTER(&plistp->rwlock);
1061 
1062 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1063 		vgen_port_init(portp);
1064 	}
1065 
1066 	RW_EXIT(&plistp->rwlock);
1067 }
1068 
1069 static void
1070 vgen_port_init(vgen_port_t *portp)
1071 {
1072 	vgen_t *vgenp;
1073 
1074 	vgenp = portp->vgenp;
1075 	/*
1076 	 * Create fdb entry in vnet, corresponding to the mac
1077 	 * address of this port. Note that the port specified
1078 	 * is vsw-port. This is done so that vsw-port acts
1079 	 * as the route to reach this macaddr, until the
1080 	 * channel for this port comes up (LDC_UP) and
1081 	 * handshake is done successfully.
1082 	 * eg, if the peer is OBP-vnet, it may not bring the
1083 	 * channel up for this port and may communicate via
1084 	 * vsw to reach this port.
1085 	 * Later, when Solaris-vnet comes up at the other end
1086 	 * of the channel for this port and brings up the channel,
1087 	 * it is an indication that peer vnet is capable of
1088 	 * distributed switching, so the direct route through this
1089 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1090 	 */
1091 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1092 	    vgen_tx, vgenp->vsw_portp);
1093 
1094 	if (portp == vgenp->vsw_portp) {
1095 		/*
1096 		 * create the default route entry in vnet's fdb.
1097 		 * This is the entry used by vnet to reach
1098 		 * unknown destinations, which basically goes
1099 		 * through vsw on domain0 and out through the
1100 		 * physical device bound to vsw.
1101 		 */
1102 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1103 	}
1104 
1105 	/* Bring up the channels of this port */
1106 	vgen_init_ldcs(portp);
1107 }
1108 
1109 /* disable transmit/receive on ports */
1110 static void
1111 vgen_uninit_ports(vgen_t *vgenp)
1112 {
1113 	vgen_port_t	*portp;
1114 	vgen_portlist_t	*plistp;
1115 
1116 	plistp = &(vgenp->vgenports);
1117 	READ_ENTER(&plistp->rwlock);
1118 
1119 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1120 		vgen_port_uninit(portp);
1121 	}
1122 
1123 	RW_EXIT(&plistp->rwlock);
1124 }
1125 
1126 static void
1127 vgen_port_uninit(vgen_port_t *portp)
1128 {
1129 	vgen_t *vgenp;
1130 
1131 	vgenp = portp->vgenp;
1132 
1133 	vgen_uninit_ldcs(portp);
1134 	/* delete the entry in vnet's fdb for this port */
1135 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1136 	if (portp == vgenp->vsw_portp) {
1137 		/*
1138 		 * if this is vsw-port, then delete the default
1139 		 * route entry in vnet's fdb.
1140 		 */
1141 		vnet_del_def_rte(vgenp->vnetp);
1142 	}
1143 }
1144 
1145 /* register with MD event generator */
1146 static int
1147 vgen_mdeg_reg(vgen_t *vgenp)
1148 {
1149 	mdeg_prop_spec_t	*pspecp;
1150 	mdeg_node_spec_t	*parentp;
1151 	uint_t			templatesz;
1152 	int			rv;
1153 	mdeg_handle_t		hdl;
1154 	int			i;
1155 	void			*vnetp = vgenp->vnetp;
1156 
1157 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1158 			DDI_PROP_DONTPASS, reg_propname, -1);
1159 	if (i == -1) {
1160 		return (DDI_FAILURE);
1161 	}
1162 	templatesz = sizeof (vgen_prop_template);
1163 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1164 	if (pspecp == NULL) {
1165 		return (DDI_FAILURE);
1166 	}
1167 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1168 	if (parentp == NULL) {
1169 		kmem_free(pspecp, templatesz);
1170 		return (DDI_FAILURE);
1171 	}
1172 
1173 	bcopy(vgen_prop_template, pspecp, templatesz);
1174 
1175 	/*
1176 	 * NOTE: The instance here refers to the value of "reg" property and
1177 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1178 	 */
1179 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1180 
1181 	parentp->namep = "virtual-device";
1182 	parentp->specp = pspecp;
1183 
1184 	/* save parentp in vgen_t */
1185 	vgenp->mdeg_parentp = parentp;
1186 
1187 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1188 	if (rv != MDEG_SUCCESS) {
1189 		DERR((vnetp, "vgen_mdeg_reg: mdeg_register failed\n"));
1190 		KMEM_FREE(parentp);
1191 		kmem_free(pspecp, templatesz);
1192 		vgenp->mdeg_parentp = NULL;
1193 		return (DDI_FAILURE);
1194 	}
1195 
1196 	/* save mdeg handle in vgen_t */
1197 	vgenp->mdeg_hdl = hdl;
1198 
1199 	return (DDI_SUCCESS);
1200 }
1201 
1202 /* unregister with MD event generator */
1203 static void
1204 vgen_mdeg_unreg(vgen_t *vgenp)
1205 {
1206 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1207 	KMEM_FREE(vgenp->mdeg_parentp);
1208 	vgenp->mdeg_parentp = NULL;
1209 	vgenp->mdeg_hdl = NULL;
1210 }
1211 
1212 /* callback function registered with MD event generator */
1213 static int
1214 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1215 {
1216 	int idx;
1217 	int vsw_idx = -1;
1218 	uint64_t val;
1219 	vgen_t *vgenp;
1220 
1221 	if ((resp == NULL) || (cb_argp == NULL)) {
1222 		return (MDEG_FAILURE);
1223 	}
1224 
1225 	vgenp = (vgen_t *)cb_argp;
1226 	DBG1((vgenp->vnetp, "vgen_mdeg_cb: enter\n"));
1227 
1228 	mutex_enter(&vgenp->lock);
1229 
1230 	DBG1((vgenp->vnetp,
1231 	    "vgen_mdeg_cb: ports: removed(%x), added(%x), updated(%x)\n",
1232 	    resp->removed.nelem, resp->added.nelem, resp->match_curr.nelem));
1233 
1234 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1235 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1236 		    resp->removed.mdep[idx]);
1237 	}
1238 
1239 	if (vgenp->vsw_portp == NULL) {
1240 		/*
1241 		 * find vsw_port and add it first, because other ports need
1242 		 * this when adding fdb entry (see vgen_port_init()).
1243 		 */
1244 		for (idx = 0; idx < resp->added.nelem; idx++) {
1245 			if (!(md_get_prop_val(resp->added.mdp,
1246 			    resp->added.mdep[idx], swport_propname, &val))) {
1247 				if (val == 0) {
1248 					/*
1249 					 * This port is connected to the
1250 					 * vsw on dom0.
1251 					 */
1252 					vsw_idx = idx;
1253 					(void) vgen_add_port(vgenp,
1254 					    resp->added.mdp,
1255 					    resp->added.mdep[idx]);
1256 					break;
1257 				}
1258 			}
1259 		}
1260 		if (vsw_idx == -1) {
1261 			DWARN((vgenp->vnetp, "vgen_mdeg_cb: "
1262 			    "can't find vsw_port\n"));
1263 			return (MDEG_FAILURE);
1264 		}
1265 	}
1266 
1267 	for (idx = 0; idx < resp->added.nelem; idx++) {
1268 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1269 			continue;
1270 		(void) vgen_add_port(vgenp, resp->added.mdp,
1271 		    resp->added.mdep[idx]);
1272 	}
1273 
1274 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1275 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1276 		    resp->match_curr.mdep[idx],
1277 		    resp->match_prev.mdp,
1278 		    resp->match_prev.mdep[idx]);
1279 	}
1280 
1281 	mutex_exit(&vgenp->lock);
1282 	DBG1((vgenp->vnetp, "vgen_mdeg_cb: exit\n"));
1283 	return (MDEG_SUCCESS);
1284 }
1285 
1286 /* add a new port to the device */
1287 static int
1288 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1289 {
1290 	uint64_t	port_num;
1291 	uint64_t	*ldc_ids;
1292 	uint64_t	macaddr;
1293 	uint64_t	val;
1294 	int		num_ldcs;
1295 	int		vsw_port = B_FALSE;
1296 	int		i;
1297 	int		addrsz;
1298 	int		num_nodes = 0;
1299 	int		listsz = 0;
1300 	mde_cookie_t	*listp = NULL;
1301 	uint8_t		*addrp;
1302 	struct ether_addr	ea;
1303 
1304 	/* read "id" property to get the port number */
1305 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1306 		DWARN((vgenp->vnetp,
1307 		    "vgen_add_port: prop(%s) not found\n", id_propname));
1308 		return (DDI_FAILURE);
1309 	}
1310 
1311 	/*
1312 	 * Find the channel endpoint node(s) under this port node.
1313 	 */
1314 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1315 		DWARN((vgenp->vnetp,
1316 		    "vgen_add_port: invalid number of nodes found (%d)",
1317 		    num_nodes));
1318 		return (DDI_FAILURE);
1319 	}
1320 
1321 	/* allocate space for node list */
1322 	listsz = num_nodes * sizeof (mde_cookie_t);
1323 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1324 	if (listp == NULL)
1325 		return (DDI_FAILURE);
1326 
1327 	num_ldcs = md_scan_dag(mdp, mdex,
1328 		md_find_name(mdp, channel_propname),
1329 		md_find_name(mdp, "fwd"), listp);
1330 
1331 	if (num_ldcs <= 0) {
1332 		DWARN((vgenp->vnetp,
1333 		    "vgen_add_port: can't find %s nodes", channel_propname));
1334 		kmem_free(listp, listsz);
1335 		return (DDI_FAILURE);
1336 	}
1337 
1338 	DBG2((vgenp->vnetp, "vgen_add_port: num_ldcs %d", num_ldcs));
1339 
1340 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1341 	if (ldc_ids == NULL) {
1342 		kmem_free(listp, listsz);
1343 		return (DDI_FAILURE);
1344 	}
1345 
1346 	for (i = 0; i < num_ldcs; i++) {
1347 		/* read channel ids */
1348 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1349 			DWARN((vgenp->vnetp,
1350 			    "vgen_add_port: prop(%s) not found\n",
1351 			    id_propname));
1352 			kmem_free(listp, listsz);
1353 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1354 			return (DDI_FAILURE);
1355 		}
1356 		DBG2((vgenp->vnetp, "vgen_add_port: ldc_id 0x%llx",
1357 		    ldc_ids[i]));
1358 	}
1359 
1360 	kmem_free(listp, listsz);
1361 
1362 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1363 	    &addrsz)) {
1364 		DWARN((vgenp->vnetp,
1365 		    "vgen_add_port: prop(%s) not found\n", rmacaddr_propname));
1366 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1367 		return (DDI_FAILURE);
1368 	}
1369 
1370 	if (addrsz < ETHERADDRL) {
1371 		DWARN((vgenp->vnetp,
1372 		    "vgen_add_port: invalid address size (%d)\n", addrsz));
1373 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1374 		return (DDI_FAILURE);
1375 	}
1376 
1377 	macaddr = *((uint64_t *)addrp);
1378 
1379 	DBG2((vgenp->vnetp, "vgen_add_port: remote mac address 0x%llx\n",
1380 	    macaddr));
1381 
1382 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1383 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1384 		macaddr >>= 8;
1385 	}
1386 
1387 	if (vgenp->vsw_portp == NULL) {
1388 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1389 			if (val == 0) {
1390 				/* This port is connected to the vsw on dom0 */
1391 				vsw_port = B_TRUE;
1392 			}
1393 		}
1394 	}
1395 	(void) vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1396 	    &ea, vsw_port);
1397 
1398 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1399 
1400 	return (DDI_SUCCESS);
1401 }
1402 
1403 /* remove a port from the device */
1404 static int
1405 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1406 {
1407 	uint64_t	port_num;
1408 	vgen_port_t	*portp;
1409 	vgen_portlist_t	*plistp;
1410 
1411 	/* read "id" property to get the port number */
1412 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1413 		DWARN((vgenp->vnetp,
1414 		    "vgen_remove_port: prop(%s) not found\n", id_propname));
1415 		return (DDI_FAILURE);
1416 	}
1417 
1418 	plistp = &(vgenp->vgenports);
1419 
1420 	WRITE_ENTER(&plistp->rwlock);
1421 	portp = vgen_port_lookup(plistp, (int)port_num);
1422 	if (portp == NULL) {
1423 		DWARN((vgenp->vnetp, "vgen_remove_port: can't find port(%lx)\n",
1424 		    port_num));
1425 		RW_EXIT(&plistp->rwlock);
1426 		return (DDI_FAILURE);
1427 	}
1428 
1429 	vgen_port_detach_mdeg(portp);
1430 	RW_EXIT(&plistp->rwlock);
1431 
1432 	return (DDI_SUCCESS);
1433 }
1434 
1435 /* attach a port to the device based on mdeg data */
1436 static int
1437 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1438 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1439 {
1440 	vgen_port_t		*portp;
1441 	vgen_portlist_t		*plistp;
1442 	int			i;
1443 
1444 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1445 	if (portp == NULL) {
1446 		return (DDI_FAILURE);
1447 	}
1448 	portp->vgenp = vgenp;
1449 	portp->port_num = port_num;
1450 
1451 	DBG1((vgenp->vnetp,
1452 	    "vgen_port_attach_mdeg: port_num(%d)\n", portp->port_num));
1453 
1454 	portp->ldclist.num_ldcs = 0;
1455 	portp->ldclist.headp = NULL;
1456 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1457 
1458 	ether_copy(macaddr, &portp->macaddr);
1459 	for (i = 0; i < num_ids; i++) {
1460 		DBG2((vgenp->vnetp, "vgen_port_attach_mdeg: ldcid (%lx)\n",
1461 		    ldcids[i]));
1462 		(void) vgen_ldc_attach(portp, ldcids[i]);
1463 	}
1464 
1465 	/* link it into the list of ports */
1466 	plistp = &(vgenp->vgenports);
1467 	WRITE_ENTER(&plistp->rwlock);
1468 	vgen_port_list_insert(portp);
1469 	RW_EXIT(&plistp->rwlock);
1470 
1471 	/* This port is connected to the vsw on domain0 */
1472 	if (vsw_port)
1473 		vgenp->vsw_portp = portp;
1474 
1475 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1476 		vgen_port_init(portp);
1477 	}
1478 
1479 	DBG1((vgenp->vnetp,
1480 	    "vgen_port_attach_mdeg: exit: port_num(%d)\n", portp->port_num));
1481 	return (DDI_SUCCESS);
1482 }
1483 
1484 /* detach a port from the device based on mdeg data */
1485 static void
1486 vgen_port_detach_mdeg(vgen_port_t *portp)
1487 {
1488 	vgen_t *vgenp = portp->vgenp;
1489 
1490 	DBG1((vgenp->vnetp,
1491 	    "vgen_port_detach_mdeg: enter: port_num(%d)\n", portp->port_num));
1492 	/* stop the port if needed */
1493 	if (vgenp->flags & VGEN_STARTED) {
1494 		vgen_port_uninit(portp);
1495 	}
1496 	vgen_port_detach(portp);
1497 
1498 	DBG1((vgenp->vnetp,
1499 	    "vgen_port_detach_mdeg: exit: port_num(%d)\n", portp->port_num));
1500 }
1501 
1502 static int
1503 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1504 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1505 {
1506 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1507 
1508 	/* NOTE: TBD */
1509 	return (DDI_SUCCESS);
1510 }
1511 
1512 static uint64_t
1513 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1514 {
1515 	vgen_ldclist_t	*ldclp;
1516 	vgen_ldc_t *ldcp;
1517 	uint64_t	val;
1518 
1519 	val = 0;
1520 	ldclp = &portp->ldclist;
1521 
1522 	READ_ENTER(&ldclp->rwlock);
1523 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1524 		val += vgen_ldc_stat(ldcp, stat);
1525 	}
1526 	RW_EXIT(&ldclp->rwlock);
1527 
1528 	return (val);
1529 }
1530 
1531 /* attach the channel corresponding to the given ldc_id to the port */
1532 static int
1533 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1534 {
1535 	vgen_t 		*vgenp;
1536 	vgen_ldclist_t	*ldclp;
1537 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1538 	ldc_attr_t 	attr;
1539 	int 		status;
1540 	ldc_status_t	istatus;
1541 	enum		{AST_init = 0x0, AST_ldc_alloc = 0x1,
1542 			AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1543 			AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1544 			AST_create_rxmblks = 0x20}
1545 			attach_state;
1546 
1547 	attach_state = AST_init;
1548 	vgenp = portp->vgenp;
1549 	ldclp = &portp->ldclist;
1550 
1551 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1552 	if (ldcp == NULL) {
1553 		goto ldc_attach_failed;
1554 	}
1555 	ldcp->ldc_id = ldc_id;
1556 	ldcp->portp = portp;
1557 
1558 	attach_state |= AST_ldc_alloc;
1559 
1560 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1561 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1562 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1563 
1564 	attach_state |= AST_mutex_init;
1565 
1566 	attr.devclass = LDC_DEV_NT;
1567 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1568 	attr.mode = LDC_MODE_UNRELIABLE;
1569 	attr.mtu = vnet_ldc_mtu;
1570 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1571 	if (status != 0) {
1572 		DWARN((vgenp->vnetp, "ldc_init failed, id (%lx) rv (%d)\n",
1573 		    ldc_id, status));
1574 		goto ldc_attach_failed;
1575 	}
1576 	attach_state |= AST_ldc_init;
1577 
1578 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1579 	if (status != 0) {
1580 		DWARN((vgenp->vnetp,
1581 		    "ldc_reg_callback failed, id (%lx) rv (%d)\n",
1582 		    ldc_id, status));
1583 		goto ldc_attach_failed;
1584 	}
1585 	attach_state |= AST_ldc_reg_cb;
1586 
1587 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1588 	ASSERT(istatus == LDC_INIT);
1589 	ldcp->ldc_status = istatus;
1590 
1591 	/* allocate transmit resources */
1592 	status = vgen_alloc_tx_ring(ldcp);
1593 	if (status != 0) {
1594 		goto ldc_attach_failed;
1595 	}
1596 	attach_state |= AST_alloc_tx_ring;
1597 
1598 	/* allocate receive resources */
1599 	ldcp->num_rbufs = vnet_nrbufs;
1600 	ldcp->rmp = NULL;
1601 	status = vio_create_mblks(ldcp->num_rbufs, VGEN_DBLK_SZ,
1602 		&(ldcp->rmp));
1603 	if (status != 0) {
1604 		goto ldc_attach_failed;
1605 	}
1606 	attach_state |= AST_create_rxmblks;
1607 
1608 	/* Setup kstats for the channel */
1609 	status = vgen_setup_kstats(ldcp);
1610 	if (status != VGEN_SUCCESS) {
1611 		goto ldc_attach_failed;
1612 	}
1613 
1614 	/* initialize vgen_versions supported */
1615 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
1616 
1617 	/* link it into the list of channels for this port */
1618 	WRITE_ENTER(&ldclp->rwlock);
1619 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
1620 	ldcp->nextp = *prev_ldcp;
1621 	*prev_ldcp = ldcp;
1622 	ldclp->num_ldcs++;
1623 	RW_EXIT(&ldclp->rwlock);
1624 
1625 	ldcp->flags |= CHANNEL_ATTACHED;
1626 	return (DDI_SUCCESS);
1627 
1628 ldc_attach_failed:
1629 	if (attach_state & AST_create_rxmblks) {
1630 		(void) vio_destroy_mblks(ldcp->rmp);
1631 	}
1632 	if (attach_state & AST_alloc_tx_ring) {
1633 		vgen_free_tx_ring(ldcp);
1634 	}
1635 	if (attach_state & AST_ldc_reg_cb) {
1636 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1637 	}
1638 	if (attach_state & AST_ldc_init) {
1639 		(void) ldc_fini(ldcp->ldc_handle);
1640 	}
1641 	if (attach_state & AST_mutex_init) {
1642 		mutex_destroy(&ldcp->tclock);
1643 		mutex_destroy(&ldcp->txlock);
1644 		mutex_destroy(&ldcp->cblock);
1645 	}
1646 	if (attach_state & AST_ldc_alloc) {
1647 		KMEM_FREE(ldcp);
1648 	}
1649 	return (DDI_FAILURE);
1650 }
1651 
1652 /* detach a channel from the port */
1653 static void
1654 vgen_ldc_detach(vgen_ldc_t *ldcp)
1655 {
1656 	vgen_port_t	*portp;
1657 	vgen_t 		*vgenp;
1658 	vgen_ldc_t 	*pldcp;
1659 	vgen_ldc_t	**prev_ldcp;
1660 	vgen_ldclist_t	*ldclp;
1661 
1662 	portp = ldcp->portp;
1663 	vgenp = portp->vgenp;
1664 	ldclp = &portp->ldclist;
1665 
1666 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
1667 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
1668 		if (pldcp == ldcp) {
1669 			break;
1670 		}
1671 	}
1672 
1673 	if (pldcp == NULL) {
1674 		/* invalid ldcp? */
1675 		return;
1676 	}
1677 
1678 	if (ldcp->ldc_status != LDC_INIT) {
1679 		DWARN((vgenp->vnetp,
1680 		    "vgen_ldc_detach: ldc_status is not INIT id(%lx)\n",
1681 			    ldcp->ldc_id));
1682 	}
1683 
1684 	if (ldcp->flags & CHANNEL_ATTACHED) {
1685 		ldcp->flags &= ~(CHANNEL_ATTACHED);
1686 
1687 		vgen_destroy_kstats(ldcp);
1688 
1689 		/* free receive resources */
1690 		if (vio_destroy_mblks(ldcp->rmp)) {
1691 			/*
1692 			 * if we cannot reclaim all mblks, put this
1693 			 * on the list of pools to be reclaimed when the
1694 			 * device gets detached (see vgen_uninit()).
1695 			 */
1696 			ldcp->rmp->nextp =  vgenp->rmp;
1697 			vgenp->rmp = ldcp->rmp;
1698 		}
1699 
1700 		/* free transmit resources */
1701 		vgen_free_tx_ring(ldcp);
1702 
1703 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1704 		(void) ldc_fini(ldcp->ldc_handle);
1705 		mutex_destroy(&ldcp->tclock);
1706 		mutex_destroy(&ldcp->txlock);
1707 		mutex_destroy(&ldcp->cblock);
1708 
1709 		/* unlink it from the list */
1710 		*prev_ldcp = ldcp->nextp;
1711 		ldclp->num_ldcs--;
1712 		KMEM_FREE(ldcp);
1713 	}
1714 }
1715 
1716 /*
1717  * This function allocates transmit resources for the channel.
1718  * The resources consist of a transmit descriptor ring and an associated
1719  * transmit buffer ring.
1720  */
1721 static int
1722 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
1723 {
1724 	void *tbufp;
1725 	ldc_mem_info_t minfo;
1726 	uint32_t txdsize;
1727 	uint32_t tbufsize;
1728 	int status;
1729 	void *vnetp = LDC_TO_VNET(ldcp);
1730 
1731 	ldcp->num_txds = vnet_ntxds;
1732 	txdsize = sizeof (vnet_public_desc_t);
1733 	tbufsize = sizeof (vgen_private_desc_t);
1734 
1735 	/* allocate transmit buffer ring */
1736 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
1737 	if (tbufp == NULL) {
1738 		return (DDI_FAILURE);
1739 	}
1740 
1741 	/* create transmit descriptor ring */
1742 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
1743 	    &ldcp->tx_dhandle);
1744 	if (status) {
1745 		DWARN((vnetp, "vgen_alloc_tx_ring: ldc_mem_dring_create() "
1746 		    "failed, id(%lx)\n", ldcp->ldc_id));
1747 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1748 		return (DDI_FAILURE);
1749 	}
1750 
1751 	/* get the addr of descripror ring */
1752 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
1753 	if (status) {
1754 		DWARN((vnetp, "vgen_alloc_tx_ring: ldc_mem_dring_info() "
1755 		    "failed, id(%lx)\n", ldcp->ldc_id));
1756 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1757 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1758 		ldcp->tbufp = NULL;
1759 		return (DDI_FAILURE);
1760 	}
1761 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
1762 	ldcp->tbufp = tbufp;
1763 
1764 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
1765 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
1766 
1767 	return (DDI_SUCCESS);
1768 }
1769 
1770 /* Free transmit resources for the channel */
1771 static void
1772 vgen_free_tx_ring(vgen_ldc_t *ldcp)
1773 {
1774 	int tbufsize = sizeof (vgen_private_desc_t);
1775 
1776 	/* free transmit descriptor ring */
1777 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1778 
1779 	/* free transmit buffer ring */
1780 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
1781 	ldcp->txdp = ldcp->txdendp = NULL;
1782 	ldcp->tbufp = ldcp->tbufendp = NULL;
1783 }
1784 
1785 /* enable transmit/receive on the channels for the port */
1786 static void
1787 vgen_init_ldcs(vgen_port_t *portp)
1788 {
1789 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1790 	vgen_ldc_t	*ldcp;
1791 
1792 	READ_ENTER(&ldclp->rwlock);
1793 	ldcp =  ldclp->headp;
1794 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1795 		(void) vgen_ldc_init(ldcp);
1796 	}
1797 	RW_EXIT(&ldclp->rwlock);
1798 }
1799 
1800 /* stop transmit/receive on the channels for the port */
1801 static void
1802 vgen_uninit_ldcs(vgen_port_t *portp)
1803 {
1804 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1805 	vgen_ldc_t	*ldcp;
1806 
1807 	READ_ENTER(&ldclp->rwlock);
1808 	ldcp =  ldclp->headp;
1809 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1810 		vgen_ldc_uninit(ldcp);
1811 	}
1812 	RW_EXIT(&ldclp->rwlock);
1813 }
1814 
1815 /* enable transmit/receive on the channel */
1816 static int
1817 vgen_ldc_init(vgen_ldc_t *ldcp)
1818 {
1819 	void *vnetp = LDC_TO_VNET(ldcp);
1820 	ldc_status_t	istatus;
1821 	int		rv;
1822 	enum		{ ST_init = 0x0, ST_ldc_open = 0x1,
1823 			    ST_init_tbufs = 0x2, ST_cb_enable = 0x4
1824 			    }
1825 			init_state;
1826 	uint32_t	retries = 0;
1827 
1828 	init_state = ST_init;
1829 
1830 	LDC_LOCK(ldcp);
1831 
1832 	rv = ldc_open(ldcp->ldc_handle);
1833 	if (rv != 0) {
1834 		DWARN((vnetp,
1835 		    "vgen_ldcinit: ldc_open failed: id<%lx> rv(%d)\n",
1836 		    ldcp->ldc_id, rv));
1837 		goto ldcinit_failed;
1838 	}
1839 	init_state |= ST_ldc_open;
1840 
1841 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1842 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
1843 		DWARN((vnetp,
1844 		    "vgen_ldcinit: id (%lx) status(%d) is not OPEN/READY\n",
1845 		    ldcp->ldc_id, istatus));
1846 		goto ldcinit_failed;
1847 	}
1848 	ldcp->ldc_status = istatus;
1849 
1850 	rv = vgen_init_tbufs(ldcp);
1851 	if (rv != 0) {
1852 		DWARN((vnetp,
1853 		    "vgen_ldcinit: vgen_init_tbufs() failed: id(%lx)\n",
1854 		    ldcp->ldc_id));
1855 		goto ldcinit_failed;
1856 	}
1857 	init_state |= ST_init_tbufs;
1858 
1859 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
1860 	if (rv != 0) {
1861 		DWARN((vnetp, "vgen_ldc_init: ldc_set_cb_mode failed: id(%lx) "
1862 		    "rv(%d)\n", ldcp->ldc_id, rv));
1863 		goto ldcinit_failed;
1864 	}
1865 
1866 	init_state |= ST_cb_enable;
1867 
1868 	do {
1869 		rv = ldc_up(ldcp->ldc_handle);
1870 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
1871 			DBG2((vnetp,
1872 			    "vgen_ldcinit: ldc_up err id(%lx) rv(%d)\n",
1873 			    ldcp->ldc_id, rv));
1874 			drv_usecwait(VGEN_LDC_UP_DELAY);
1875 		}
1876 		if (retries++ >= vgen_ldcup_retries)
1877 			break;
1878 	} while (rv == EWOULDBLOCK);
1879 
1880 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1881 	if (istatus == LDC_UP) {
1882 		DWARN((vnetp, "vgen_ldc_init: id(%lx) status(%d) is UP\n",
1883 		    ldcp->ldc_id, istatus));
1884 	}
1885 
1886 	ldcp->ldc_status = istatus;
1887 
1888 	/* initialize transmit watchdog timeout */
1889 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
1890 	    drv_usectohz(vnet_ldcwd_interval * 1000));
1891 
1892 	ldcp->hphase = -1;
1893 	ldcp->flags |= CHANNEL_STARTED;
1894 
1895 	/* if channel is already UP - start handshake */
1896 	if (istatus == LDC_UP) {
1897 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1898 		if (ldcp->portp != vgenp->vsw_portp) {
1899 			/*
1900 			 * modify fdb entry to use this port as the
1901 			 * channel is up, instead of going through the
1902 			 * vsw-port (see comments in vgen_port_init())
1903 			 */
1904 			vnet_modify_fdb(vnetp,
1905 			    (uint8_t *)&ldcp->portp->macaddr,
1906 			    vgen_tx, ldcp->portp, B_FALSE);
1907 		}
1908 
1909 		/* Initialize local session id */
1910 		ldcp->local_sid = ddi_get_lbolt();
1911 
1912 		/* clear peer session id */
1913 		ldcp->peer_sid = 0;
1914 		ldcp->hretries = 0;
1915 
1916 		/* Initiate Handshake process with peer ldc endpoint */
1917 		vgen_reset_hphase(ldcp);
1918 
1919 		mutex_exit(&ldcp->tclock);
1920 		mutex_exit(&ldcp->txlock);
1921 		vgen_handshake(vh_nextphase(ldcp));
1922 		mutex_exit(&ldcp->cblock);
1923 	} else {
1924 		LDC_UNLOCK(ldcp);
1925 	}
1926 
1927 	return (DDI_SUCCESS);
1928 
1929 ldcinit_failed:
1930 	if (init_state & ST_cb_enable) {
1931 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1932 	}
1933 	if (init_state & ST_init_tbufs) {
1934 		vgen_uninit_tbufs(ldcp);
1935 	}
1936 	if (init_state & ST_ldc_open) {
1937 		(void) ldc_close(ldcp->ldc_handle);
1938 	}
1939 	LDC_UNLOCK(ldcp);
1940 	return (DDI_FAILURE);
1941 }
1942 
1943 /* stop transmit/receive on the channel */
1944 static void
1945 vgen_ldc_uninit(vgen_ldc_t *ldcp)
1946 {
1947 	void *vnetp = LDC_TO_VNET(ldcp);
1948 	int	rv;
1949 
1950 	DBG1((vnetp, "vgen_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id));
1951 	LDC_LOCK(ldcp);
1952 
1953 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
1954 		LDC_UNLOCK(ldcp);
1955 		DWARN((vnetp, "vgen_ldc_uninit: id(%lx) CHANNEL_STARTED"
1956 		    " flag is not set\n", ldcp->ldc_id));
1957 		return;
1958 	}
1959 
1960 	/* disable further callbacks */
1961 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1962 	if (rv != 0) {
1963 		DWARN((vnetp, "vgen_ldc_uninit: id (%lx) "
1964 		    "ldc_set_cb_mode failed\n", ldcp->ldc_id));
1965 	}
1966 
1967 	/* clear handshake done bit and wait for pending tx and cb to finish */
1968 	ldcp->hphase &= ~(VH_DONE);
1969 	LDC_UNLOCK(ldcp);
1970 	drv_usecwait(1000);
1971 	LDC_LOCK(ldcp);
1972 
1973 	vgen_reset_hphase(ldcp);
1974 
1975 	/* reset transmit watchdog timeout */
1976 	if (ldcp->wd_tid) {
1977 		(void) untimeout(ldcp->wd_tid);
1978 		ldcp->wd_tid = 0;
1979 	}
1980 
1981 	vgen_uninit_tbufs(ldcp);
1982 
1983 	rv = ldc_close(ldcp->ldc_handle);
1984 	if (rv != 0) {
1985 		DWARN((vnetp, "vgen_ldcuninit: ldc_close err id(%lx)\n",
1986 		    ldcp->ldc_id));
1987 	}
1988 	ldcp->ldc_status = LDC_INIT;
1989 	ldcp->flags &= ~(CHANNEL_STARTED);
1990 
1991 	LDC_UNLOCK(ldcp);
1992 
1993 	DBG1((vnetp, "vgen_ldc_uninit: exit: id(%lx)\n", ldcp->ldc_id));
1994 }
1995 
1996 /* Initialize the transmit buffer ring for the channel */
1997 static int
1998 vgen_init_tbufs(vgen_ldc_t *ldcp)
1999 {
2000 	vgen_private_desc_t	*tbufp;
2001 	vnet_public_desc_t	*txdp;
2002 	vio_dring_entry_hdr_t		*hdrp;
2003 	int 			i;
2004 	int 			rv;
2005 	caddr_t			datap = NULL;
2006 	int			ci;
2007 	uint32_t		ncookies;
2008 
2009 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2010 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2011 
2012 	datap = kmem_zalloc(ldcp->num_txds * VGEN_DBLK_SZ, KM_SLEEP);
2013 	ldcp->tx_datap = datap;
2014 
2015 	/*
2016 	 * for each private descriptor, allocate a ldc mem_handle which is
2017 	 * required to map the data during transmit, set the flags
2018 	 * to free (available for use by transmit routine).
2019 	 */
2020 
2021 	for (i = 0; i < ldcp->num_txds; i++) {
2022 
2023 		tbufp = &(ldcp->tbufp[i]);
2024 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2025 			&(tbufp->memhandle));
2026 		if (rv) {
2027 			tbufp->memhandle = 0;
2028 			goto init_tbufs_failed;
2029 		}
2030 
2031 		/*
2032 		 * bind ldc memhandle to the corresponding transmit buffer.
2033 		 */
2034 		ci = ncookies = 0;
2035 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2036 		    (caddr_t)datap, VGEN_DBLK_SZ, LDC_SHADOW_MAP,
2037 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2038 		if (rv != 0) {
2039 			goto init_tbufs_failed;
2040 		}
2041 
2042 		/*
2043 		 * successful in binding the handle to tx data buffer.
2044 		 * set datap in the private descr to this buffer.
2045 		 */
2046 		tbufp->datap = datap;
2047 
2048 		if ((ncookies == 0) ||
2049 			(ncookies > MAX_COOKIES)) {
2050 			goto init_tbufs_failed;
2051 		}
2052 
2053 		for (ci = 1; ci < ncookies; ci++) {
2054 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2055 				&(tbufp->memcookie[ci]));
2056 			if (rv != 0) {
2057 				goto init_tbufs_failed;
2058 			}
2059 		}
2060 
2061 		tbufp->ncookies = ncookies;
2062 		datap += VGEN_DBLK_SZ;
2063 
2064 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2065 		txdp = &(ldcp->txdp[i]);
2066 		hdrp = &txdp->hdr;
2067 		hdrp->dstate = VIO_DESC_FREE;
2068 		hdrp->ack = B_FALSE;
2069 		tbufp->descp = txdp;
2070 
2071 	}
2072 
2073 	/* reset tbuf walking pointers */
2074 	ldcp->next_tbufp = ldcp->tbufp;
2075 	ldcp->cur_tbufp = ldcp->tbufp;
2076 
2077 	/* initialize tx seqnum and index */
2078 	ldcp->next_txseq = VNET_ISS;
2079 	ldcp->next_txi = 0;
2080 
2081 	ldcp->resched_peer = B_TRUE;
2082 
2083 	return (DDI_SUCCESS);
2084 
2085 init_tbufs_failed:;
2086 	vgen_uninit_tbufs(ldcp);
2087 	return (DDI_FAILURE);
2088 }
2089 
2090 /* Uninitialize transmit buffer ring for the channel */
2091 static void
2092 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2093 {
2094 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2095 	int 			i;
2096 
2097 	/* for each tbuf (priv_desc), free ldc mem_handle */
2098 	for (i = 0; i < ldcp->num_txds; i++) {
2099 
2100 		tbufp = &(ldcp->tbufp[i]);
2101 
2102 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2103 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2104 			tbufp->datap = NULL;
2105 		}
2106 		if (tbufp->memhandle) {
2107 			(void) ldc_mem_free_handle(tbufp->memhandle);
2108 			tbufp->memhandle = 0;
2109 		}
2110 	}
2111 
2112 	if (ldcp->tx_datap) {
2113 		/* prealloc'd tx data buffer */
2114 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_DBLK_SZ);
2115 		ldcp->tx_datap = NULL;
2116 	}
2117 
2118 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2119 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2120 }
2121 
2122 /* clobber tx descriptor ring */
2123 static void
2124 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2125 {
2126 	vnet_public_desc_t	*txdp;
2127 	vgen_private_desc_t	*tbufp;
2128 	vio_dring_entry_hdr_t		*hdrp;
2129 	void *vnetp = LDC_TO_VNET(ldcp);
2130 	int i;
2131 #ifdef DEBUG
2132 	int ndone = 0;
2133 #endif
2134 
2135 	for (i = 0; i < ldcp->num_txds; i++) {
2136 
2137 		tbufp = &(ldcp->tbufp[i]);
2138 		txdp = tbufp->descp;
2139 		hdrp = &txdp->hdr;
2140 
2141 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2142 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2143 #ifdef DEBUG
2144 			if (hdrp->dstate == VIO_DESC_DONE)
2145 				ndone++;
2146 #endif
2147 			hdrp->dstate = VIO_DESC_FREE;
2148 			hdrp->ack = B_FALSE;
2149 		}
2150 	}
2151 	/* reset tbuf walking pointers */
2152 	ldcp->next_tbufp = ldcp->tbufp;
2153 	ldcp->cur_tbufp = ldcp->tbufp;
2154 
2155 	/* reset tx seqnum and index */
2156 	ldcp->next_txseq = VNET_ISS;
2157 	ldcp->next_txi = 0;
2158 
2159 	ldcp->resched_peer = B_TRUE;
2160 
2161 #ifdef DEBUG
2162 	DBG2((vnetp,
2163 	    "vgen_clobber_tbufs: id(0x%lx) num descrs done (%d)\n",
2164 	    ldcp->ldc_id, ndone));
2165 #endif
2166 }
2167 
2168 /* clobber receive descriptor ring */
2169 static void
2170 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2171 {
2172 	ldcp->rx_dhandle = 0;
2173 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2174 	ldcp->rxdp = NULL;
2175 	ldcp->next_rxi = 0;
2176 	ldcp->num_rxds = 0;
2177 	ldcp->next_rxseq = VNET_ISS;
2178 }
2179 
2180 /* initialize receive descriptor ring */
2181 static int
2182 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2183 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2184 {
2185 	int rv;
2186 	ldc_mem_info_t minfo;
2187 
2188 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2189 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2190 	if (rv != 0) {
2191 		return (DDI_FAILURE);
2192 	}
2193 
2194 	/*
2195 	 * sucessfully mapped, now try to
2196 	 * get info about the mapped dring
2197 	 */
2198 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2199 	if (rv != 0) {
2200 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2201 		return (DDI_FAILURE);
2202 	}
2203 
2204 	/*
2205 	 * save ring address, number of descriptors.
2206 	 */
2207 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2208 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2209 	ldcp->num_rxdcookies = ncookies;
2210 	ldcp->num_rxds = num_desc;
2211 	ldcp->next_rxi = 0;
2212 	ldcp->next_rxseq = VNET_ISS;
2213 
2214 	return (DDI_SUCCESS);
2215 }
2216 
2217 /* get channel statistics */
2218 static uint64_t
2219 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2220 {
2221 	vgen_stats_t *statsp;
2222 	uint64_t val;
2223 
2224 	val = 0;
2225 	statsp = ldcp->statsp;
2226 	switch (stat) {
2227 
2228 	case MAC_STAT_MULTIRCV:
2229 		val = statsp->multircv;
2230 		break;
2231 
2232 	case MAC_STAT_BRDCSTRCV:
2233 		val = statsp->brdcstrcv;
2234 		break;
2235 
2236 	case MAC_STAT_MULTIXMT:
2237 		val = statsp->multixmt;
2238 		break;
2239 
2240 	case MAC_STAT_BRDCSTXMT:
2241 		val = statsp->brdcstxmt;
2242 		break;
2243 
2244 	case MAC_STAT_NORCVBUF:
2245 		val = statsp->norcvbuf;
2246 		break;
2247 
2248 	case MAC_STAT_IERRORS:
2249 		val = statsp->ierrors;
2250 		break;
2251 
2252 	case MAC_STAT_NOXMTBUF:
2253 		val = statsp->noxmtbuf;
2254 		break;
2255 
2256 	case MAC_STAT_OERRORS:
2257 		val = statsp->oerrors;
2258 		break;
2259 
2260 	case MAC_STAT_COLLISIONS:
2261 		break;
2262 
2263 	case MAC_STAT_RBYTES:
2264 		val = statsp->rbytes;
2265 		break;
2266 
2267 	case MAC_STAT_IPACKETS:
2268 		val = statsp->ipackets;
2269 		break;
2270 
2271 	case MAC_STAT_OBYTES:
2272 		val = statsp->obytes;
2273 		break;
2274 
2275 	case MAC_STAT_OPACKETS:
2276 		val = statsp->opackets;
2277 		break;
2278 
2279 	/* stats not relevant to ldc, return 0 */
2280 	case MAC_STAT_IFSPEED:
2281 	case ETHER_STAT_ALIGN_ERRORS:
2282 	case ETHER_STAT_FCS_ERRORS:
2283 	case ETHER_STAT_FIRST_COLLISIONS:
2284 	case ETHER_STAT_MULTI_COLLISIONS:
2285 	case ETHER_STAT_DEFER_XMTS:
2286 	case ETHER_STAT_TX_LATE_COLLISIONS:
2287 	case ETHER_STAT_EX_COLLISIONS:
2288 	case ETHER_STAT_MACXMT_ERRORS:
2289 	case ETHER_STAT_CARRIER_ERRORS:
2290 	case ETHER_STAT_TOOLONG_ERRORS:
2291 	case ETHER_STAT_XCVR_ADDR:
2292 	case ETHER_STAT_XCVR_ID:
2293 	case ETHER_STAT_XCVR_INUSE:
2294 	case ETHER_STAT_CAP_1000FDX:
2295 	case ETHER_STAT_CAP_1000HDX:
2296 	case ETHER_STAT_CAP_100FDX:
2297 	case ETHER_STAT_CAP_100HDX:
2298 	case ETHER_STAT_CAP_10FDX:
2299 	case ETHER_STAT_CAP_10HDX:
2300 	case ETHER_STAT_CAP_ASMPAUSE:
2301 	case ETHER_STAT_CAP_PAUSE:
2302 	case ETHER_STAT_CAP_AUTONEG:
2303 	case ETHER_STAT_ADV_CAP_1000FDX:
2304 	case ETHER_STAT_ADV_CAP_1000HDX:
2305 	case ETHER_STAT_ADV_CAP_100FDX:
2306 	case ETHER_STAT_ADV_CAP_100HDX:
2307 	case ETHER_STAT_ADV_CAP_10FDX:
2308 	case ETHER_STAT_ADV_CAP_10HDX:
2309 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2310 	case ETHER_STAT_ADV_CAP_PAUSE:
2311 	case ETHER_STAT_ADV_CAP_AUTONEG:
2312 	case ETHER_STAT_LP_CAP_1000FDX:
2313 	case ETHER_STAT_LP_CAP_1000HDX:
2314 	case ETHER_STAT_LP_CAP_100FDX:
2315 	case ETHER_STAT_LP_CAP_100HDX:
2316 	case ETHER_STAT_LP_CAP_10FDX:
2317 	case ETHER_STAT_LP_CAP_10HDX:
2318 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2319 	case ETHER_STAT_LP_CAP_PAUSE:
2320 	case ETHER_STAT_LP_CAP_AUTONEG:
2321 	case ETHER_STAT_LINK_ASMPAUSE:
2322 	case ETHER_STAT_LINK_PAUSE:
2323 	case ETHER_STAT_LINK_AUTONEG:
2324 	case ETHER_STAT_LINK_DUPLEX:
2325 	default:
2326 		val = 0;
2327 		break;
2328 
2329 	}
2330 	return (val);
2331 }
2332 
2333 /*
2334  * LDC channel is UP, start handshake process with peer.
2335  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2336  * function is being called from transmit routine, otherwise B_FALSE.
2337  */
2338 static void
2339 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2340 {
2341 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2342 	void 	*vnetp = LDC_TO_VNET(ldcp);
2343 
2344 	DBG1((vnetp, "vgen_handle_evt_up: enter: id(%lx)\n", ldcp->ldc_id));
2345 
2346 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2347 
2348 	if (ldcp->portp != vgenp->vsw_portp) {
2349 		/*
2350 		 * modify fdb entry to use this port as the
2351 		 * channel is up, instead of going through the
2352 		 * vsw-port (see comments in vgen_port_init())
2353 		 */
2354 		vnet_modify_fdb(vnetp, (uint8_t *)&ldcp->portp->macaddr,
2355 		    vgen_tx, ldcp->portp, flag);
2356 	}
2357 
2358 	/* Initialize local session id */
2359 	ldcp->local_sid = ddi_get_lbolt();
2360 
2361 	/* clear peer session id */
2362 	ldcp->peer_sid = 0;
2363 	ldcp->hretries = 0;
2364 
2365 	if (ldcp->hphase != VH_PHASE0) {
2366 		vgen_handshake_reset(ldcp);
2367 	}
2368 
2369 	/* Initiate Handshake process with peer ldc endpoint */
2370 	vgen_handshake(vh_nextphase(ldcp));
2371 
2372 	DBG1((vnetp, "vgen_handle_evt_up: exit: id(%lx)\n", ldcp->ldc_id));
2373 }
2374 
2375 /*
2376  * LDC channel is Reset, terminate connection with peer and try to
2377  * bring the channel up again.
2378  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2379  * function is being called from transmit routine, otherwise B_FALSE.
2380  */
2381 static void
2382 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2383 {
2384 	ldc_status_t istatus;
2385 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2386 	void	*vnetp = LDC_TO_VNET(ldcp);
2387 	int	rv;
2388 
2389 	DBG1((vnetp, "vgen_handle_evt_reset: enter: id(%lx)\n", ldcp->ldc_id));
2390 
2391 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2392 
2393 	if ((ldcp->portp != vgenp->vsw_portp) &&
2394 		(vgenp->vsw_portp != NULL)) {
2395 		/*
2396 		 * modify fdb entry to use vsw-port  as the
2397 		 * channel is reset and we don't have a direct
2398 		 * link to the destination (see comments
2399 		 * in vgen_port_init()).
2400 		 */
2401 		vnet_modify_fdb(vnetp, (uint8_t *)&ldcp->portp->macaddr,
2402 		    vgen_tx, vgenp->vsw_portp, flag);
2403 	}
2404 
2405 	if (ldcp->hphase != VH_PHASE0) {
2406 		vgen_handshake_reset(ldcp);
2407 	}
2408 
2409 	/* try to bring the channel up */
2410 	rv = ldc_up(ldcp->ldc_handle);
2411 	if (rv != 0) {
2412 		DWARN((vnetp,
2413 		    "vgen_handle_evt_reset: ldc_up err id(%lx) rv(%d)\n",
2414 		    ldcp->ldc_id, rv));
2415 	}
2416 
2417 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2418 		DWARN((vnetp,
2419 		    "vgen_handle_evt_reset: ldc_status err id(%lx)\n"));
2420 	} else {
2421 		ldcp->ldc_status = istatus;
2422 	}
2423 
2424 	/* if channel is already UP - restart handshake */
2425 	if (ldcp->ldc_status == LDC_UP) {
2426 		vgen_handle_evt_up(ldcp, flag);
2427 	}
2428 
2429 	DBG1((vnetp, "vgen_handle_evt_reset: exit: id(%lx)\n", ldcp->ldc_id));
2430 }
2431 
2432 /* Interrupt handler for the channel */
2433 static uint_t
2434 vgen_ldc_cb(uint64_t event, caddr_t arg)
2435 {
2436 	_NOTE(ARGUNUSED(event))
2437 	vgen_ldc_t	*ldcp;
2438 	void 		*vnetp;
2439 	vgen_t		*vgenp;
2440 	size_t		msglen;
2441 	ldc_status_t 	istatus;
2442 	uint64_t	ldcmsg[7];
2443 	int 		rv = 0;
2444 	vio_msg_tag_t	*tagp;
2445 	mblk_t		*mp = NULL;
2446 	mblk_t		*bp = NULL;
2447 	mblk_t		*bpt = NULL;
2448 	mblk_t		*headp = NULL;
2449 	mblk_t		*tailp = NULL;
2450 	vgen_stats_t	*statsp;
2451 
2452 	ldcp = (vgen_ldc_t *)arg;
2453 	vgenp = LDC_TO_VGEN(ldcp);
2454 	vnetp = LDC_TO_VNET(ldcp);
2455 	statsp = ldcp->statsp;
2456 
2457 	DBG1((vnetp, "vgen_ldc_cb enter: ldcid (%lx)\n", ldcp->ldc_id));
2458 
2459 	mutex_enter(&ldcp->cblock);
2460 	statsp->callbacks++;
2461 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2462 		DWARN((vnetp, "vgen_ldc_cb: id(%lx), status(%d) is LDC_INIT\n",
2463 		    ldcp->ldc_id, ldcp->ldc_status));
2464 		mutex_exit(&ldcp->cblock);
2465 		return (LDC_SUCCESS);
2466 	}
2467 
2468 	/*
2469 	 * NOTE: not using switch() as event could be triggered by
2470 	 * a state change and a read request. Also the ordering	of the
2471 	 * check for the event types is deliberate.
2472 	 */
2473 	if (event & LDC_EVT_UP) {
2474 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2475 			DWARN((vnetp,
2476 			    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2477 		} else {
2478 			ldcp->ldc_status = istatus;
2479 		}
2480 		ASSERT(ldcp->ldc_status == LDC_UP);
2481 		DWARN((vnetp,
2482 		    "vgen_ldc_cb: id(%lx) event(%lx) UP, status(%d)\n",
2483 		    ldcp->ldc_id, event, ldcp->ldc_status));
2484 
2485 		vgen_handle_evt_up(ldcp, B_FALSE);
2486 
2487 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2488 	}
2489 
2490 	if (event & LDC_EVT_READ) {
2491 		DBG2((vnetp,
2492 		    "vgen_ldc_cb: id(%lx) event(%lx) READ, status(%d)\n",
2493 		    ldcp->ldc_id, event, ldcp->ldc_status));
2494 
2495 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2496 		goto vgen_ldccb_rcv;
2497 	}
2498 
2499 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2500 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2501 			DWARN((vnetp,
2502 			    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2503 		} else {
2504 			ldcp->ldc_status = istatus;
2505 		}
2506 		DWARN((vnetp,
2507 		    "vgen_ldc_cb: id(%lx) event(%lx) RESET/DOWN, status(%d)\n",
2508 		    ldcp->ldc_id, event, ldcp->ldc_status));
2509 
2510 		vgen_handle_evt_reset(ldcp, B_FALSE);
2511 	}
2512 
2513 	mutex_exit(&ldcp->cblock);
2514 	return (LDC_SUCCESS);
2515 
2516 vgen_ldccb_rcv:
2517 
2518 	/* if event is LDC_EVT_READ, receive all packets */
2519 	do {
2520 		msglen = sizeof (ldcmsg);
2521 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
2522 
2523 		if (rv != 0) {
2524 			DWARN((vnetp,
2525 			    "vgen_ldc_cb:ldc_read err id(%lx) rv(%d) "
2526 			    "len(%d)\n", ldcp->ldc_id, rv, msglen));
2527 			if (rv == ECONNRESET)
2528 				goto exit_error;
2529 			break;
2530 		}
2531 		if (msglen == 0) {
2532 			DBG2((vnetp, "vgen_ldc_cb: ldc_read id(%lx) NODATA",
2533 			ldcp->ldc_id));
2534 			break;
2535 		}
2536 		DBG2((vnetp, "vgen_ldc_cb: ldc_read id(%lx): msglen(%d)",
2537 		    ldcp->ldc_id, msglen));
2538 
2539 		tagp = (vio_msg_tag_t *)ldcmsg;
2540 
2541 		if (ldcp->peer_sid) {
2542 			/*
2543 			 * check sid only after we have received peer's sid
2544 			 * in the version negotiate msg.
2545 			 */
2546 #ifdef DEBUG
2547 			if (vgen_hdbg & HDBG_BAD_SID) {
2548 				/* simulate bad sid condition */
2549 				tagp->vio_sid = 0;
2550 				vgen_hdbg &= ~(HDBG_BAD_SID);
2551 			}
2552 #endif
2553 			rv = vgen_check_sid(ldcp, tagp);
2554 			if (rv != VGEN_SUCCESS) {
2555 				/*
2556 				 * If sid mismatch is detected,
2557 				 * reset the channel.
2558 				 */
2559 				ldcp->need_ldc_reset = B_TRUE;
2560 				goto exit_error;
2561 			}
2562 		}
2563 
2564 		switch (tagp->vio_msgtype) {
2565 		case VIO_TYPE_CTRL:
2566 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
2567 			break;
2568 
2569 		case VIO_TYPE_DATA:
2570 			headp = tailp = NULL;
2571 			rv = vgen_handle_datamsg(ldcp, tagp, &headp, &tailp);
2572 			/* build a chain of received packets */
2573 			if (headp != NULL) {
2574 				if (bp == NULL) {
2575 					bp = headp;
2576 					bpt = tailp;
2577 				} else {
2578 					bpt->b_next = headp;
2579 					bpt = tailp;
2580 				}
2581 			}
2582 			break;
2583 
2584 		case VIO_TYPE_ERR:
2585 			vgen_handle_errmsg(ldcp, tagp);
2586 			break;
2587 
2588 		default:
2589 			DWARN((vnetp,
2590 			    "vgen_ldc_cb: Unknown VIO_TYPE(%x)\n",
2591 			    tagp->vio_msgtype));
2592 			break;
2593 		}
2594 
2595 exit_error:
2596 		if (rv == ECONNRESET) {
2597 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2598 				DWARN((vnetp,
2599 				    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2600 			} else {
2601 				ldcp->ldc_status = istatus;
2602 			}
2603 			vgen_handle_evt_reset(ldcp, B_FALSE);
2604 			break;
2605 		} else if (rv) {
2606 			vgen_handshake_retry(ldcp);
2607 			break;
2608 		}
2609 
2610 	} while (msglen);
2611 
2612 	mutex_exit(&ldcp->cblock);
2613 
2614 	/* send up the received packets to MAC layer */
2615 	while (bp != NULL) {
2616 		mp = bp;
2617 		bp = bp->b_next;
2618 		mp->b_next = mp->b_prev = NULL;
2619 		DBG2((vnetp, "vgen_ldc_cb: id(%lx) rx pkt len (%lx)\n",
2620 		    ldcp->ldc_id, MBLKL(mp)));
2621 		vnet_rx(vgenp->vnetp, NULL, mp);
2622 	}
2623 	DBG1((vnetp, "vgen_ldc_cb exit: ldcid (%lx)\n", ldcp->ldc_id));
2624 
2625 	return (LDC_SUCCESS);
2626 }
2627 
2628 /* vgen handshake functions */
2629 
2630 /* change the hphase for the channel to the next phase */
2631 static vgen_ldc_t *
2632 vh_nextphase(vgen_ldc_t *ldcp)
2633 {
2634 	if (ldcp->hphase == VH_PHASE3) {
2635 		ldcp->hphase = VH_DONE;
2636 	} else {
2637 		ldcp->hphase++;
2638 	}
2639 	return (ldcp);
2640 }
2641 
2642 /*
2643  * Check whether the given version is supported or not and
2644  * return VGEN_SUCCESS if supported.
2645  */
2646 static int
2647 vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
2648 uint16_t ver_minor)
2649 {
2650 	vgen_ver_t	*versions = ldcp->vgen_versions;
2651 	int		i = 0;
2652 
2653 	while (i < VGEN_NUM_VER) {
2654 		if ((versions[i].ver_major == 0) &&
2655 		    (versions[i].ver_minor == 0)) {
2656 			break;
2657 		}
2658 		if ((versions[i].ver_major == ver_major) &&
2659 			(versions[i].ver_minor == ver_minor)) {
2660 			return (VGEN_SUCCESS);
2661 		}
2662 		i++;
2663 	}
2664 	return (VGEN_FAILURE);
2665 }
2666 
2667 /*
2668  * Given a version, return VGEN_SUCCESS if a lower version is supported.
2669  */
2670 static int
2671 vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
2672 {
2673 	vgen_ver_t	*versions = ldcp->vgen_versions;
2674 	int		i = 0;
2675 
2676 	while (i < VGEN_NUM_VER) {
2677 		if ((versions[i].ver_major == 0) &&
2678 		    (versions[i].ver_minor == 0)) {
2679 			break;
2680 		}
2681 		/*
2682 		 * if we support a lower minor version within the same major
2683 		 * version, or if we support a lower major version,
2684 		 * update the verp parameter with this lower version and
2685 		 * return success.
2686 		 */
2687 		if (((versions[i].ver_major == verp->ver_major) &&
2688 			(versions[i].ver_minor < verp->ver_minor)) ||
2689 			(versions[i].ver_major < verp->ver_major)) {
2690 				verp->ver_major = versions[i].ver_major;
2691 				verp->ver_minor = versions[i].ver_minor;
2692 				return (VGEN_SUCCESS);
2693 		}
2694 		i++;
2695 	}
2696 
2697 	return (VGEN_FAILURE);
2698 }
2699 
2700 /*
2701  * wrapper routine to send the given message over ldc using ldc_write().
2702  */
2703 static int
2704 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
2705     boolean_t caller_holds_lock)
2706 {
2707 	int	rv;
2708 	size_t	len;
2709 	void *vnetp = LDC_TO_VNET(ldcp);
2710 	uint32_t retries = 0;
2711 
2712 	len = msglen;
2713 	if ((len == 0) || (msg == NULL))
2714 		return (VGEN_FAILURE);
2715 
2716 	if (!caller_holds_lock) {
2717 		mutex_enter(&ldcp->txlock);
2718 	}
2719 
2720 	do {
2721 		len = msglen;
2722 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
2723 		if (retries++ >= vgen_ldcwr_retries)
2724 			break;
2725 	} while (rv == EWOULDBLOCK);
2726 
2727 	if (!caller_holds_lock) {
2728 		mutex_exit(&ldcp->txlock);
2729 	}
2730 
2731 	if (rv != 0) {
2732 		DWARN((vnetp,
2733 		    "vgen_sendmsg: ldc_write failed: id(%lx) rv(%d)\n",
2734 		    ldcp->ldc_id, rv, msglen));
2735 		return (rv);
2736 	}
2737 
2738 	if (len != msglen) {
2739 		DWARN((vnetp,
2740 		    "vgen_sendmsg: ldc_write failed: id(%lx) rv(%d)"
2741 		    " msglen (%d)\n", ldcp->ldc_id, rv, msglen));
2742 		return (VGEN_FAILURE);
2743 	}
2744 
2745 	return (VGEN_SUCCESS);
2746 }
2747 
2748 /* send version negotiate message to the peer over ldc */
2749 static int
2750 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
2751 {
2752 	vio_ver_msg_t	vermsg;
2753 	vio_msg_tag_t	*tagp = &vermsg.tag;
2754 	void		*vnetp = LDC_TO_VNET(ldcp);
2755 	int		rv;
2756 
2757 	bzero(&vermsg, sizeof (vermsg));
2758 
2759 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2760 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2761 	tagp->vio_subtype_env = VIO_VER_INFO;
2762 	tagp->vio_sid = ldcp->local_sid;
2763 
2764 	/* get version msg payload from ldcp->local */
2765 	vermsg.ver_major = ldcp->local_hparams.ver_major;
2766 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
2767 	vermsg.dev_class = ldcp->local_hparams.dev_class;
2768 
2769 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
2770 	if (rv != VGEN_SUCCESS) {
2771 		DWARN((vnetp, "vgen_send_version_negotiate: vgen_sendmsg failed"
2772 		    "id (%lx)\n", ldcp->ldc_id));
2773 		return (rv);
2774 	}
2775 
2776 	ldcp->hstate |= VER_INFO_SENT;
2777 	DBG2((vnetp,
2778 	    "vgen_send_version_negotiate: VER_INFO_SENT id (%lx) ver(%d,%d)\n",
2779 	    ldcp->ldc_id, vermsg.ver_major, vermsg.ver_minor));
2780 
2781 	return (VGEN_SUCCESS);
2782 }
2783 
2784 /* send attr info message to the peer over ldc */
2785 static int
2786 vgen_send_attr_info(vgen_ldc_t *ldcp)
2787 {
2788 	vnet_attr_msg_t	attrmsg;
2789 	vio_msg_tag_t	*tagp = &attrmsg.tag;
2790 	void		*vnetp = LDC_TO_VNET(ldcp);
2791 	int		rv;
2792 
2793 	bzero(&attrmsg, sizeof (attrmsg));
2794 
2795 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2796 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2797 	tagp->vio_subtype_env = VIO_ATTR_INFO;
2798 	tagp->vio_sid = ldcp->local_sid;
2799 
2800 	/* get attr msg payload from ldcp->local */
2801 	attrmsg.mtu = ldcp->local_hparams.mtu;
2802 	attrmsg.addr = ldcp->local_hparams.addr;
2803 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
2804 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
2805 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
2806 
2807 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
2808 	if (rv != VGEN_SUCCESS) {
2809 		DWARN((vnetp, "vgen_send_attr_info: vgen_sendmsg failed"
2810 		    "id (%lx)\n", ldcp->ldc_id));
2811 		return (rv);
2812 	}
2813 
2814 	ldcp->hstate |= ATTR_INFO_SENT;
2815 	DBG2((vnetp, "vgen_send_attr_info: ATTR_INFO_SENT id (%lx)\n",
2816 	    ldcp->ldc_id));
2817 
2818 	return (VGEN_SUCCESS);
2819 }
2820 
2821 /* send descriptor ring register message to the peer over ldc */
2822 static int
2823 vgen_send_dring_reg(vgen_ldc_t *ldcp)
2824 {
2825 	vio_dring_reg_msg_t	msg;
2826 	vio_msg_tag_t		*tagp = &msg.tag;
2827 	void		*vnetp = LDC_TO_VNET(ldcp);
2828 	int		rv;
2829 
2830 	bzero(&msg, sizeof (msg));
2831 
2832 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2833 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2834 	tagp->vio_subtype_env = VIO_DRING_REG;
2835 	tagp->vio_sid = ldcp->local_sid;
2836 
2837 	/* get dring info msg payload from ldcp->local */
2838 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
2839 		sizeof (ldc_mem_cookie_t));
2840 	msg.ncookies = ldcp->local_hparams.num_dcookies;
2841 	msg.num_descriptors = ldcp->local_hparams.num_desc;
2842 	msg.descriptor_size = ldcp->local_hparams.desc_size;
2843 
2844 	/*
2845 	 * dring_ident is set to 0. After mapping the dring, peer sets this
2846 	 * value and sends it in the ack, which is saved in
2847 	 * vgen_handle_dring_reg().
2848 	 */
2849 	msg.dring_ident = 0;
2850 
2851 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
2852 	if (rv != VGEN_SUCCESS) {
2853 		DWARN((vnetp, "vgen_send_dring_reg: vgen_sendmsg failed"
2854 		    "id (%lx)\n", ldcp->ldc_id));
2855 		return (rv);
2856 	}
2857 
2858 	ldcp->hstate |= DRING_INFO_SENT;
2859 	DBG2((vnetp, "vgen_send_dring_reg: DRING_INFO_SENT id (%lx)\n",
2860 	    ldcp->ldc_id));
2861 
2862 	return (VGEN_SUCCESS);
2863 }
2864 
2865 static int
2866 vgen_send_rdx_info(vgen_ldc_t *ldcp)
2867 {
2868 	vio_rdx_msg_t	rdxmsg;
2869 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
2870 	void		*vnetp = LDC_TO_VNET(ldcp);
2871 	int		rv;
2872 
2873 	bzero(&rdxmsg, sizeof (rdxmsg));
2874 
2875 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2876 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2877 	tagp->vio_subtype_env = VIO_RDX;
2878 	tagp->vio_sid = ldcp->local_sid;
2879 
2880 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
2881 	if (rv != VGEN_SUCCESS) {
2882 		DWARN((vnetp, "vgen_send_rdx_info: vgen_sendmsg failed"
2883 		    "id (%lx)\n", ldcp->ldc_id));
2884 		return (rv);
2885 	}
2886 
2887 	ldcp->hstate |= RDX_INFO_SENT;
2888 	DBG2((vnetp, "vgen_send_rdx_info: RDX_INFO_SENT id (%lx)\n",
2889 	    ldcp->ldc_id));
2890 
2891 	return (VGEN_SUCCESS);
2892 }
2893 
2894 /* send descriptor ring data message to the peer over ldc */
2895 static int
2896 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
2897 {
2898 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
2899 	vio_msg_tag_t	*tagp = &msgp->tag;
2900 	void		*vnetp = LDC_TO_VNET(ldcp);
2901 	int		rv;
2902 
2903 	bzero(msgp, sizeof (*msgp));
2904 
2905 	tagp->vio_msgtype = VIO_TYPE_DATA;
2906 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2907 	tagp->vio_subtype_env = VIO_DRING_DATA;
2908 	tagp->vio_sid = ldcp->local_sid;
2909 
2910 	msgp->seq_num = ldcp->next_txseq;
2911 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
2912 	msgp->start_idx = start;
2913 	msgp->end_idx = end;
2914 
2915 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
2916 	if (rv != VGEN_SUCCESS) {
2917 		DWARN((vnetp, "vgen_send_dring_data: vgen_sendmsg failed"
2918 		    " id (%lx)\n", ldcp->ldc_id));
2919 		return (rv);
2920 	}
2921 
2922 	ldcp->next_txseq++;
2923 	ldcp->statsp->dring_data_msgs++;
2924 
2925 	DBG2((vnetp, "vgen_send_dring_data: DRING_DATA_SENT id (%lx)\n",
2926 	    ldcp->ldc_id));
2927 
2928 	return (VGEN_SUCCESS);
2929 }
2930 
2931 /* send multicast addr info message to vsw */
2932 static int
2933 vgen_send_mcast_info(vgen_ldc_t *ldcp)
2934 {
2935 	vnet_mcast_msg_t	mcastmsg;
2936 	vnet_mcast_msg_t	*msgp;
2937 	vio_msg_tag_t		*tagp;
2938 	vgen_t			*vgenp;
2939 	void			*vnetp;
2940 	struct ether_addr	*mca;
2941 	int			rv;
2942 	int			i;
2943 	uint32_t		size;
2944 	uint32_t		mccount;
2945 	uint32_t		n;
2946 
2947 	msgp = &mcastmsg;
2948 	tagp = &msgp->tag;
2949 	vgenp = LDC_TO_VGEN(ldcp);
2950 	vnetp = LDC_TO_VNET(ldcp);
2951 
2952 	mccount = vgenp->mccount;
2953 	i = 0;
2954 
2955 	do {
2956 		tagp->vio_msgtype = VIO_TYPE_CTRL;
2957 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
2958 		tagp->vio_subtype_env = VNET_MCAST_INFO;
2959 		tagp->vio_sid = ldcp->local_sid;
2960 
2961 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
2962 		size = n * sizeof (struct ether_addr);
2963 
2964 		mca = &(vgenp->mctab[i]);
2965 		bcopy(mca, (msgp->mca), size);
2966 		msgp->set = B_TRUE;
2967 		msgp->count = n;
2968 
2969 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
2970 		    B_FALSE);
2971 		if (rv != VGEN_SUCCESS) {
2972 			DWARN((vnetp, "vgen_send_mcast_info: vgen_sendmsg err"
2973 			    "id (%lx)\n", ldcp->ldc_id));
2974 			return (rv);
2975 		}
2976 
2977 		mccount -= n;
2978 		i += n;
2979 
2980 	} while (mccount);
2981 
2982 	return (VGEN_SUCCESS);
2983 }
2984 
2985 /* Initiate Phase 2 of handshake */
2986 static int
2987 vgen_handshake_phase2(vgen_ldc_t *ldcp)
2988 {
2989 	int rv;
2990 	uint32_t ncookies = 0;
2991 	void	*vnetp = LDC_TO_VNET(ldcp);
2992 #ifdef DEBUG
2993 	if (vgen_hdbg & HDBG_OUT_STATE) {
2994 		/* simulate out of state condition */
2995 		vgen_hdbg &= ~(HDBG_OUT_STATE);
2996 		rv = vgen_send_rdx_info(ldcp);
2997 		return (rv);
2998 	}
2999 	if (vgen_hdbg & HDBG_TIMEOUT) {
3000 		/* simulate timeout condition */
3001 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3002 		return (VGEN_SUCCESS);
3003 	}
3004 #endif
3005 	rv = vgen_send_attr_info(ldcp);
3006 	if (rv != VGEN_SUCCESS) {
3007 		return (rv);
3008 	}
3009 
3010 	/* Bind descriptor ring to the channel */
3011 	if (ldcp->num_txdcookies == 0) {
3012 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3013 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3014 		if (rv != 0) {
3015 			DWARN((vnetp, "vgen_handshake_phase2: id (%lx) "
3016 			    "ldc_mem_dring_bind failed rv(%x)\n",
3017 			    ldcp->ldc_id, rv));
3018 			return (rv);
3019 		}
3020 		ASSERT(ncookies == 1);
3021 		ldcp->num_txdcookies = ncookies;
3022 	}
3023 
3024 	/* update local dring_info params */
3025 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3026 		sizeof (ldc_mem_cookie_t));
3027 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3028 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3029 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3030 
3031 	rv = vgen_send_dring_reg(ldcp);
3032 	if (rv != VGEN_SUCCESS) {
3033 		return (rv);
3034 	}
3035 
3036 	return (VGEN_SUCCESS);
3037 }
3038 
3039 /*
3040  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3041  * This can happen after a channel comes up (status: LDC_UP) or
3042  * when handshake gets terminated due to various conditions.
3043  */
3044 static void
3045 vgen_reset_hphase(vgen_ldc_t *ldcp)
3046 {
3047 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3048 	void	*vnetp = LDC_TO_VNET(ldcp);
3049 	ldc_status_t istatus;
3050 	int rv;
3051 
3052 	DBG2((vnetp, "vgen_reset_hphase: id(0x%lx)\n", ldcp->ldc_id));
3053 	/* reset hstate and hphase */
3054 	ldcp->hstate = 0;
3055 	ldcp->hphase = VH_PHASE0;
3056 
3057 	/* reset handshake watchdog timeout */
3058 	if (ldcp->htid) {
3059 		(void) untimeout(ldcp->htid);
3060 		ldcp->htid = 0;
3061 	}
3062 
3063 	if (ldcp->local_hparams.dring_ready) {
3064 		ldcp->local_hparams.dring_ready = B_FALSE;
3065 	}
3066 
3067 	/* Unbind tx descriptor ring from the channel */
3068 	if (ldcp->num_txdcookies) {
3069 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3070 		if (rv != 0) {
3071 			DWARN((vnetp,
3072 			    "vgen_reset_hphase: ldc_mem_dring_unbind "
3073 			    "failed id(%lx)\n", ldcp->ldc_id));
3074 		}
3075 		ldcp->num_txdcookies = 0;
3076 	}
3077 
3078 	if (ldcp->peer_hparams.dring_ready) {
3079 		ldcp->peer_hparams.dring_ready = B_FALSE;
3080 		/* Unmap peer's dring */
3081 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3082 		vgen_clobber_rxds(ldcp);
3083 	}
3084 
3085 	vgen_clobber_tbufs(ldcp);
3086 
3087 	/*
3088 	 * clear local handshake params and initialize.
3089 	 */
3090 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3091 
3092 	/* set version to the highest version supported */
3093 	ldcp->local_hparams.ver_major =
3094 			ldcp->vgen_versions[0].ver_major;
3095 	ldcp->local_hparams.ver_minor =
3096 			ldcp->vgen_versions[0].ver_minor;
3097 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3098 
3099 	/* set attr_info params */
3100 	ldcp->local_hparams.mtu = ETHERMAX;
3101 	ldcp->local_hparams.addr =
3102 		vgen_macaddr_strtoul(vgenp->macaddr);
3103 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3104 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
3105 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3106 
3107 	/*
3108 	 * Note: dring is created, but not bound yet.
3109 	 * local dring_info params will be updated when we bind the dring in
3110 	 * vgen_handshake_phase2().
3111 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3112 	 * value and sends it in the ack, which is saved in
3113 	 * vgen_handle_dring_reg().
3114 	 */
3115 	ldcp->local_hparams.dring_ident = 0;
3116 
3117 	/* clear peer_hparams */
3118 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3119 
3120 	/* reset the channel if required */
3121 	if (ldcp->need_ldc_reset) {
3122 		DWARN((vnetp,
3123 		    "vgen_reset_hphase: id (%lx), Doing Channel Reset...\n",
3124 		    ldcp->ldc_id));
3125 		ldcp->need_ldc_reset = B_FALSE;
3126 		(void) ldc_down(ldcp->ldc_handle);
3127 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3128 		DBG2((vnetp,
3129 		    "vgen_reset_hphase: id (%lx), Reset Done,ldc_status(%x)\n",
3130 		    ldcp->ldc_id, istatus));
3131 		ldcp->ldc_status = istatus;
3132 
3133 		/* clear sids */
3134 		ldcp->local_sid = 0;
3135 		ldcp->peer_sid = 0;
3136 
3137 		/* try to bring the channel up */
3138 		rv = ldc_up(ldcp->ldc_handle);
3139 		if (rv != 0) {
3140 			DWARN((vnetp,
3141 			    "vgen_reset_hphase: ldc_up err id(%lx) rv(%d)\n",
3142 			    ldcp->ldc_id, rv));
3143 		}
3144 
3145 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3146 			DWARN((vnetp,
3147 			    "vgen_reset_hphase: ldc_status err id(%lx)\n"));
3148 		} else {
3149 			ldcp->ldc_status = istatus;
3150 		}
3151 
3152 		/* if channel is already UP - restart handshake */
3153 		if (istatus == LDC_UP) {
3154 			/* Initialize local session id */
3155 			ldcp->local_sid = ddi_get_lbolt();
3156 			vgen_handshake(vh_nextphase(ldcp));
3157 		}
3158 	}
3159 }
3160 
3161 /* wrapper function for vgen_reset_hphase */
3162 static void
3163 vgen_handshake_reset(vgen_ldc_t *ldcp)
3164 {
3165 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3166 	mutex_enter(&ldcp->txlock);
3167 	mutex_enter(&ldcp->tclock);
3168 
3169 	vgen_reset_hphase(ldcp);
3170 
3171 	mutex_exit(&ldcp->tclock);
3172 	mutex_exit(&ldcp->txlock);
3173 }
3174 
3175 /*
3176  * Initiate handshake with the peer by sending various messages
3177  * based on the handshake-phase that the channel is currently in.
3178  */
3179 static void
3180 vgen_handshake(vgen_ldc_t *ldcp)
3181 {
3182 	uint32_t hphase = ldcp->hphase;
3183 	void	*vnetp = LDC_TO_VNET(ldcp);
3184 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3185 	ldc_status_t	istatus;
3186 	int	rv = 0;
3187 
3188 	switch (hphase) {
3189 
3190 	case VH_PHASE1:
3191 
3192 		/*
3193 		 * start timer, for entire handshake process, turn this timer
3194 		 * off if all phases of handshake complete successfully and
3195 		 * hphase goes to VH_DONE(below) or
3196 		 * vgen_reset_hphase() gets called or
3197 		 * channel is reset due to errors or
3198 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3199 		 */
3200 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3201 		    drv_usectohz(vgen_hwd_interval * 1000));
3202 
3203 		/* Phase 1 involves negotiating the version */
3204 		rv = vgen_send_version_negotiate(ldcp);
3205 		break;
3206 
3207 	case VH_PHASE2:
3208 		rv = vgen_handshake_phase2(ldcp);
3209 		break;
3210 
3211 	case VH_PHASE3:
3212 		rv = vgen_send_rdx_info(ldcp);
3213 		break;
3214 
3215 	case VH_DONE:
3216 		/* reset handshake watchdog timeout */
3217 		if (ldcp->htid) {
3218 			(void) untimeout(ldcp->htid);
3219 			ldcp->htid = 0;
3220 		}
3221 		ldcp->hretries = 0;
3222 #if 0
3223 		vgen_print_ldcinfo(ldcp);
3224 #endif
3225 		DBG1((vnetp, "vgen_handshake: id(0x%lx) Handshake Done\n",
3226 		    ldcp->ldc_id));
3227 
3228 		if (ldcp->need_mcast_sync) {
3229 			/* need to sync multicast table with vsw */
3230 
3231 			ldcp->need_mcast_sync = B_FALSE;
3232 			mutex_exit(&ldcp->cblock);
3233 
3234 			mutex_enter(&vgenp->lock);
3235 			rv = vgen_send_mcast_info(ldcp);
3236 			mutex_exit(&vgenp->lock);
3237 
3238 			mutex_enter(&ldcp->cblock);
3239 			if (rv != VGEN_SUCCESS)
3240 				break;
3241 		}
3242 
3243 		/*
3244 		 * Check if mac layer should be notified to restart
3245 		 * transmissions. This can happen if the channel got
3246 		 * reset and vgen_clobber_tbufs() is called, while
3247 		 * need_resched is set.
3248 		 */
3249 		mutex_enter(&ldcp->tclock);
3250 		if (ldcp->need_resched) {
3251 			ldcp->need_resched = B_FALSE;
3252 			vnet_tx_update(vgenp->vnetp);
3253 		}
3254 		mutex_exit(&ldcp->tclock);
3255 
3256 		break;
3257 
3258 	default:
3259 		break;
3260 	}
3261 
3262 	if (rv == ECONNRESET) {
3263 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3264 			DWARN((vnetp,
3265 			    "vgen_handshake: ldc_status err id(%lx)\n"));
3266 		} else {
3267 			ldcp->ldc_status = istatus;
3268 		}
3269 		vgen_handle_evt_reset(ldcp, B_FALSE);
3270 	} else if (rv) {
3271 		vgen_handshake_reset(ldcp);
3272 	}
3273 }
3274 
3275 /*
3276  * Check if the current handshake phase has completed successfully and
3277  * return the status.
3278  */
3279 static int
3280 vgen_handshake_done(vgen_ldc_t *ldcp)
3281 {
3282 	uint32_t	hphase = ldcp->hphase;
3283 	int 		status = 0;
3284 	void		*vnetp = LDC_TO_VNET(ldcp);
3285 
3286 	switch (hphase) {
3287 
3288 	case VH_PHASE1:
3289 		/*
3290 		 * Phase1 is done, if version negotiation
3291 		 * completed successfully.
3292 		 */
3293 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3294 			VER_NEGOTIATED);
3295 		break;
3296 
3297 	case VH_PHASE2:
3298 		/*
3299 		 * Phase 2 is done, if attr info and dring info
3300 		 * have been exchanged successfully.
3301 		 */
3302 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3303 			    ATTR_INFO_EXCHANGED) &&
3304 			    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3305 			    DRING_INFO_EXCHANGED));
3306 		break;
3307 
3308 	case VH_PHASE3:
3309 		/* Phase 3 is done, if rdx msg has been exchanged */
3310 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3311 			RDX_EXCHANGED);
3312 		break;
3313 
3314 	default:
3315 		break;
3316 	}
3317 
3318 	if (status == 0) {
3319 		return (VGEN_FAILURE);
3320 	}
3321 	DBG2((vnetp, "VNET_HANDSHAKE_DONE: PHASE(%d)\n", hphase));
3322 	return (VGEN_SUCCESS);
3323 }
3324 
3325 /* retry handshake on failure */
3326 static void
3327 vgen_handshake_retry(vgen_ldc_t *ldcp)
3328 {
3329 	/* reset handshake phase */
3330 	vgen_handshake_reset(ldcp);
3331 	if (vgen_max_hretries) {	/* handshake retry is specified */
3332 		if (ldcp->hretries++ < vgen_max_hretries)
3333 			vgen_handshake(vh_nextphase(ldcp));
3334 	}
3335 }
3336 
3337 /*
3338  * Handle a version info msg from the peer or an ACK/NACK from the peer
3339  * to a version info msg that we sent.
3340  */
3341 static int
3342 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3343 {
3344 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3345 	int		ack = 0;
3346 	int		failed = 0;
3347 	void		*vnetp = LDC_TO_VNET(ldcp);
3348 	int		idx;
3349 	vgen_ver_t	*versions = ldcp->vgen_versions;
3350 	int		rv = 0;
3351 
3352 	DBG1((vnetp, "vgen_handle_version_negotiate: enter\n"));
3353 	switch (tagp->vio_subtype) {
3354 	case VIO_SUBTYPE_INFO:
3355 
3356 		/*  Cache sid of peer if this is the first time */
3357 		if (ldcp->peer_sid == 0) {
3358 			DBG2((vnetp,
3359 			    "vgen_handle_version_negotiate: id (%lx) Caching"
3360 			    " peer_sid(%x)\n", ldcp->ldc_id, tagp->vio_sid));
3361 			ldcp->peer_sid = tagp->vio_sid;
3362 		}
3363 
3364 		if (ldcp->hphase != VH_PHASE1) {
3365 			/*
3366 			 * If we are not already in VH_PHASE1, reset to
3367 			 * pre-handshake state, and initiate handshake
3368 			 * to the peer too.
3369 			 */
3370 			vgen_handshake_reset(ldcp);
3371 			vgen_handshake(vh_nextphase(ldcp));
3372 		}
3373 		ldcp->hstate |= VER_INFO_RCVD;
3374 
3375 		/* save peer's requested values */
3376 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3377 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3378 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3379 
3380 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3381 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3382 			/* unsupported dev_class, send NACK */
3383 
3384 			DWARN((vnetp,
3385 			    "vgen_handle_version_negotiate: Version"
3386 			    " Negotiation Failed id (%lx)\n", ldcp->ldc_id));
3387 
3388 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3389 			tagp->vio_sid = ldcp->local_sid;
3390 			/* send reply msg back to peer */
3391 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3392 			    sizeof (*vermsg), B_FALSE);
3393 			if (rv != VGEN_SUCCESS) {
3394 				return (rv);
3395 			}
3396 			return (VGEN_FAILURE);
3397 		}
3398 
3399 		DBG2((vnetp, "vgen_handle_version_negotiate: VER_INFO_RCVD,"
3400 		    " id (%lx), ver(%d,%d)\n", ldcp->ldc_id,
3401 		    vermsg->ver_major,  vermsg->ver_minor));
3402 
3403 		idx = 0;
3404 
3405 		for (;;) {
3406 
3407 			if (vermsg->ver_major > versions[idx].ver_major) {
3408 
3409 				/* nack with next lower version */
3410 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3411 				vermsg->ver_major = versions[idx].ver_major;
3412 				vermsg->ver_minor = versions[idx].ver_minor;
3413 				break;
3414 			}
3415 
3416 			if (vermsg->ver_major == versions[idx].ver_major) {
3417 
3418 				/* major version match - ACK version */
3419 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3420 				ack = 1;
3421 
3422 				/*
3423 				 * lower minor version to the one this endpt
3424 				 * supports, if necessary
3425 				 */
3426 				if (vermsg->ver_minor >
3427 				    versions[idx].ver_minor) {
3428 					vermsg->ver_minor =
3429 						versions[idx].ver_minor;
3430 					ldcp->peer_hparams.ver_minor =
3431 						versions[idx].ver_minor;
3432 				}
3433 				break;
3434 			}
3435 
3436 			idx++;
3437 
3438 			if (idx == VGEN_NUM_VER) {
3439 
3440 				/* no version match - send NACK */
3441 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3442 				vermsg->ver_major = 0;
3443 				vermsg->ver_minor = 0;
3444 				failed = 1;
3445 				break;
3446 			}
3447 
3448 		}
3449 
3450 		tagp->vio_sid = ldcp->local_sid;
3451 
3452 		/* send reply msg back to peer */
3453 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3454 		    B_FALSE);
3455 		if (rv != VGEN_SUCCESS) {
3456 			return (rv);
3457 		}
3458 
3459 		if (ack) {
3460 			ldcp->hstate |= VER_ACK_SENT;
3461 			DBG2((vnetp, "vgen_handle_version_negotiate:"
3462 			    " VER_ACK_SENT, id (%lx) ver(%d,%d) \n",
3463 			    ldcp->ldc_id, vermsg->ver_major,
3464 			    vermsg->ver_minor));
3465 		}
3466 		if (failed) {
3467 			DWARN((vnetp, "vgen_handle_version_negotiate:"
3468 			    " Version Negotiation Failed id (%lx)\n",
3469 			    ldcp->ldc_id));
3470 			return (VGEN_FAILURE);
3471 		}
3472 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3473 
3474 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3475 
3476 			/* local and peer versions match? */
3477 			ASSERT((ldcp->local_hparams.ver_major ==
3478 				ldcp->peer_hparams.ver_major) &&
3479 				(ldcp->local_hparams.ver_minor ==
3480 				ldcp->peer_hparams.ver_minor));
3481 
3482 			/* move to the next phase */
3483 			vgen_handshake(vh_nextphase(ldcp));
3484 		}
3485 
3486 		break;
3487 
3488 	case VIO_SUBTYPE_ACK:
3489 
3490 		if (ldcp->hphase != VH_PHASE1) {
3491 			/*  This should not happen. */
3492 			DWARN((vnetp,
3493 			    "vgen_handle_version_negotiate:"
3494 			    " VER_ACK_RCVD id (%lx) Invalid Phase(%u)\n",
3495 			    ldcp->ldc_id, ldcp->hphase));
3496 			return (VGEN_FAILURE);
3497 		}
3498 
3499 		/* SUCCESS - we have agreed on a version */
3500 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3501 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3502 		ldcp->hstate |= VER_ACK_RCVD;
3503 
3504 		DBG2((vnetp, "vgen_handle_version_negotiate:"
3505 		    " VER_ACK_RCVD, id (%lx) ver(%d,%d) \n",
3506 		    ldcp->ldc_id, vermsg->ver_major,  vermsg->ver_minor));
3507 
3508 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3509 
3510 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3511 
3512 			/* local and peer versions match? */
3513 			ASSERT((ldcp->local_hparams.ver_major ==
3514 				ldcp->peer_hparams.ver_major) &&
3515 				(ldcp->local_hparams.ver_minor ==
3516 				ldcp->peer_hparams.ver_minor));
3517 
3518 			/* move to the next phase */
3519 			vgen_handshake(vh_nextphase(ldcp));
3520 		}
3521 		break;
3522 
3523 	case VIO_SUBTYPE_NACK:
3524 
3525 		if (ldcp->hphase != VH_PHASE1) {
3526 			/*  This should not happen.  */
3527 			DWARN((vnetp,
3528 			    "vgen_handle_version_negotiate:"
3529 			    " VER_NACK_RCVD id (%lx) Invalid Phase(%u)\n",
3530 			    ldcp->ldc_id, ldcp->hphase));
3531 			return (VGEN_FAILURE);
3532 		}
3533 
3534 		DBG2((vnetp, "vgen_handle_version_negotiate:"
3535 		    " VER_NACK_RCVD id(%lx) next ver(%d,%d)\n",
3536 		    ldcp->ldc_id, vermsg->ver_major, vermsg->ver_minor));
3537 
3538 		/* check if version in NACK is zero */
3539 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3540 			/*
3541 			 * Version Negotiation has failed.
3542 			 */
3543 			DWARN((vnetp, "vgen_handle_version_negotiate:"
3544 			    " Version Negotiation Failed id (%lx)\n",
3545 			    ldcp->ldc_id));
3546 			return (VGEN_FAILURE);
3547 		}
3548 
3549 		idx = 0;
3550 
3551 		for (;;) {
3552 
3553 			if (vermsg->ver_major > versions[idx].ver_major) {
3554 				/* select next lower version */
3555 
3556 				ldcp->local_hparams.ver_major =
3557 					versions[idx].ver_major;
3558 				ldcp->local_hparams.ver_minor =
3559 					versions[idx].ver_minor;
3560 				break;
3561 			}
3562 
3563 			if (vermsg->ver_major == versions[idx].ver_major) {
3564 				/* major version match */
3565 
3566 				ldcp->local_hparams.ver_major =
3567 					versions[idx].ver_major;
3568 
3569 				ldcp->local_hparams.ver_minor =
3570 					versions[idx].ver_minor;
3571 				break;
3572 			}
3573 
3574 			idx++;
3575 
3576 			if (idx == VGEN_NUM_VER) {
3577 				/*
3578 				 * no version match.
3579 				 * Version Negotiation has failed.
3580 				 */
3581 				DWARN((vnetp, "vgen_handle_version_negotiate:"
3582 				    " Version Negotiation Failed id (%lx)\n",
3583 				    ldcp->ldc_id));
3584 				return (VGEN_FAILURE);
3585 			}
3586 
3587 		}
3588 
3589 		rv = vgen_send_version_negotiate(ldcp);
3590 		if (rv != VGEN_SUCCESS) {
3591 			return (rv);
3592 		}
3593 
3594 		break;
3595 	}
3596 
3597 	DBG1((vnetp, "vgen_handle_version_negotiate: exit\n"));
3598 	return (VGEN_SUCCESS);
3599 }
3600 
3601 /* Check if the attributes are supported */
3602 static int
3603 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
3604 {
3605 	_NOTE(ARGUNUSED(ldcp))
3606 
3607 #if 0
3608 	uint64_t port_macaddr;
3609 	port_macaddr = vgen_macaddr_strtoul((uint8_t *)
3610 				&(ldcp->portp->macaddr));
3611 #endif
3612 	/*
3613 	 * currently, we support these attr values:
3614 	 * mtu of ethernet, addr_type of mac, xfer_mode of
3615 	 * ldc shared memory, ack_freq of 0 (data is acked if
3616 	 * the ack bit is set in the descriptor) and the address should
3617 	 * match the address in the port node.
3618 	 */
3619 	if ((msg->mtu != ETHERMAX) ||
3620 	    (msg->addr_type != ADDR_TYPE_MAC) ||
3621 	    (msg->xfer_mode != VIO_DRING_MODE) ||
3622 	    (msg->ack_freq > 64)) {
3623 #if 0
3624 	    (msg->addr != port_macaddr))
3625 cmn_err(CE_CONT, "vgen_check_attr_info: msg->addr(%lx), port_macaddr(%lx)\n",
3626 	msg->addr, port_macaddr);
3627 #endif
3628 		return (VGEN_FAILURE);
3629 	}
3630 
3631 	return (VGEN_SUCCESS);
3632 }
3633 
3634 /*
3635  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
3636  * to an attr info msg that we sent.
3637  */
3638 static int
3639 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3640 {
3641 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
3642 	void		*vnetp = LDC_TO_VNET(ldcp);
3643 	int		ack = 0;
3644 	int		rv = 0;
3645 
3646 	DBG1((vnetp, "vgen_handle_attr_info: enter\n"));
3647 	if (ldcp->hphase != VH_PHASE2) {
3648 		DWARN((vnetp,
3649 		    "vgen_handle_attr_info: Rcvd ATTR_INFO id(%lx)"
3650 		    " subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3651 		    tagp->vio_subtype, ldcp->hphase));
3652 		return (VGEN_FAILURE);
3653 	}
3654 	switch (tagp->vio_subtype) {
3655 	case VIO_SUBTYPE_INFO:
3656 
3657 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_INFO_RCVD id(%lx)\n",
3658 		    ldcp->ldc_id));
3659 		ldcp->hstate |= ATTR_INFO_RCVD;
3660 
3661 		/* save peer's values */
3662 		ldcp->peer_hparams.mtu = attrmsg->mtu;
3663 		ldcp->peer_hparams.addr = attrmsg->addr;
3664 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
3665 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
3666 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
3667 
3668 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
3669 			/* unsupported attr, send NACK */
3670 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3671 		} else {
3672 			ack = 1;
3673 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3674 		}
3675 		tagp->vio_sid = ldcp->local_sid;
3676 
3677 		/* send reply msg back to peer */
3678 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
3679 		    B_FALSE);
3680 		if (rv != VGEN_SUCCESS) {
3681 			return (rv);
3682 		}
3683 
3684 		if (ack) {
3685 			ldcp->hstate |= ATTR_ACK_SENT;
3686 			DBG2((vnetp, "vgen_handle_attr_info:"
3687 			    " ATTR_ACK_SENT id(%lx)\n", ldcp->ldc_id));
3688 		} else {
3689 			/* failed */
3690 			DWARN((vnetp, "vgen_handle_attr_info:"
3691 			    " ATTR_NACK_SENT id(%lx)\n", ldcp->ldc_id));
3692 			return (VGEN_FAILURE);
3693 		}
3694 
3695 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3696 			vgen_handshake(vh_nextphase(ldcp));
3697 		}
3698 
3699 		break;
3700 
3701 	case VIO_SUBTYPE_ACK:
3702 
3703 		ldcp->hstate |= ATTR_ACK_RCVD;
3704 
3705 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_ACK_RCVD id(%lx)\n",
3706 		    ldcp->ldc_id));
3707 
3708 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3709 			vgen_handshake(vh_nextphase(ldcp));
3710 		}
3711 		break;
3712 
3713 	case VIO_SUBTYPE_NACK:
3714 
3715 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_NACK_RCVD id(%lx)\n",
3716 		    ldcp->ldc_id));
3717 		return (VGEN_FAILURE);
3718 	}
3719 	DBG1((vnetp, "vgen_handle_attr_info: exit\n"));
3720 	return (VGEN_SUCCESS);
3721 }
3722 
3723 /* Check if the dring info msg is ok */
3724 static int
3725 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
3726 {
3727 	/* check if msg contents are ok */
3728 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
3729 	    sizeof (vnet_public_desc_t))) {
3730 		return (VGEN_FAILURE);
3731 	}
3732 	return (VGEN_SUCCESS);
3733 }
3734 
3735 /*
3736  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
3737  * the peer to a dring register msg that we sent.
3738  */
3739 static int
3740 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3741 {
3742 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
3743 	void *vnetp = LDC_TO_VNET(ldcp);
3744 	ldc_mem_cookie_t dcookie;
3745 	int ack = 0;
3746 	int rv = 0;
3747 
3748 	DBG1((vnetp, "vgen_handle_dring_reg: enter\n"));
3749 	if (ldcp->hphase < VH_PHASE2) {
3750 		/* dring_info can be rcvd in any of the phases after Phase1 */
3751 		DWARN((vnetp,
3752 		    "vgen_handle_dring_reg: Rcvd DRING_INFO, id (%lx)"
3753 		    " Subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3754 		    tagp->vio_subtype, ldcp->hphase));
3755 		return (VGEN_FAILURE);
3756 	}
3757 	switch (tagp->vio_subtype) {
3758 	case VIO_SUBTYPE_INFO:
3759 
3760 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_INFO_RCVD id(%lx)\n",
3761 		    ldcp->ldc_id));
3762 		ldcp->hstate |= DRING_INFO_RCVD;
3763 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
3764 
3765 		ASSERT(msg->ncookies == 1);
3766 
3767 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
3768 			/*
3769 			 * verified dring info msg to be ok,
3770 			 * now try to map the remote dring.
3771 			 */
3772 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
3773 			    msg->descriptor_size, &dcookie,
3774 			    msg->ncookies);
3775 			if (rv == DDI_SUCCESS) {
3776 				/* now we can ack the peer */
3777 				ack = 1;
3778 			}
3779 		}
3780 		if (ack == 0) {
3781 			/* failed, send NACK */
3782 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3783 		} else {
3784 			if (!(ldcp->peer_hparams.dring_ready)) {
3785 
3786 				/* save peer's dring_info values */
3787 				bcopy(&dcookie,
3788 				    &(ldcp->peer_hparams.dring_cookie),
3789 				    sizeof (dcookie));
3790 				ldcp->peer_hparams.num_desc =
3791 						msg->num_descriptors;
3792 				ldcp->peer_hparams.desc_size =
3793 						msg->descriptor_size;
3794 				ldcp->peer_hparams.num_dcookies =
3795 						msg->ncookies;
3796 
3797 				/* set dring_ident for the peer */
3798 				ldcp->peer_hparams.dring_ident =
3799 							(uint64_t)ldcp->rxdp;
3800 				/* return the dring_ident in ack msg */
3801 				msg->dring_ident =
3802 							(uint64_t)ldcp->rxdp;
3803 
3804 				ldcp->peer_hparams.dring_ready = B_TRUE;
3805 			}
3806 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3807 		}
3808 		tagp->vio_sid = ldcp->local_sid;
3809 		/* send reply msg back to peer */
3810 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
3811 		    B_FALSE);
3812 		if (rv != VGEN_SUCCESS) {
3813 			return (rv);
3814 		}
3815 
3816 		if (ack) {
3817 			ldcp->hstate |= DRING_ACK_SENT;
3818 			DBG2((vnetp, "vgen_handle_dring_reg: DRING_ACK_SENT"
3819 			    " id (%lx)\n", ldcp->ldc_id));
3820 		} else {
3821 			DWARN((vnetp, "vgen_handle_dring_reg: DRING_NACK_SENT"
3822 			    " id (%lx)\n", ldcp->ldc_id));
3823 			return (VGEN_FAILURE);
3824 		}
3825 
3826 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3827 			vgen_handshake(vh_nextphase(ldcp));
3828 		}
3829 
3830 		break;
3831 
3832 	case VIO_SUBTYPE_ACK:
3833 
3834 		ldcp->hstate |= DRING_ACK_RCVD;
3835 
3836 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_ACK_RCVD"
3837 		    " id (%lx)\n", ldcp->ldc_id));
3838 
3839 		if (!(ldcp->local_hparams.dring_ready)) {
3840 			/* local dring is now ready */
3841 			ldcp->local_hparams.dring_ready = B_TRUE;
3842 
3843 			/* save dring_ident acked by peer */
3844 			ldcp->local_hparams.dring_ident =
3845 				msg->dring_ident;
3846 		}
3847 
3848 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3849 			vgen_handshake(vh_nextphase(ldcp));
3850 		}
3851 
3852 		break;
3853 
3854 	case VIO_SUBTYPE_NACK:
3855 
3856 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_NACK_RCVD"
3857 		    " id (%lx)\n", ldcp->ldc_id));
3858 		return (VGEN_FAILURE);
3859 	}
3860 	DBG1((vnetp, "vgen_handle_dring_reg: exit\n"));
3861 	return (VGEN_SUCCESS);
3862 }
3863 
3864 /*
3865  * Handle a rdx info msg from the peer or an ACK/NACK
3866  * from the peer to a rdx info msg that we sent.
3867  */
3868 static int
3869 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3870 {
3871 	void *vnetp = LDC_TO_VNET(ldcp);
3872 	int rv = 0;
3873 
3874 	DBG1((vnetp, "vgen_handle_rdx_info: enter\n"));
3875 	if (ldcp->hphase != VH_PHASE3) {
3876 		DWARN((vnetp,
3877 		    "vgen_handle_rdx_info: Rcvd RDX_INFO, id (%lx)"
3878 		    "  Subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3879 		    tagp->vio_subtype, ldcp->hphase));
3880 		return (VGEN_FAILURE);
3881 	}
3882 	switch (tagp->vio_subtype) {
3883 	case VIO_SUBTYPE_INFO:
3884 
3885 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_INFO_RCVD id (%lx)\n",
3886 		    ldcp->ldc_id));
3887 		ldcp->hstate |= RDX_INFO_RCVD;
3888 
3889 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
3890 		tagp->vio_sid = ldcp->local_sid;
3891 		/* send reply msg back to peer */
3892 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
3893 		    B_FALSE);
3894 		if (rv != VGEN_SUCCESS) {
3895 			return (rv);
3896 		}
3897 
3898 		ldcp->hstate |= RDX_ACK_SENT;
3899 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_ACK_SENT id (%lx)\n",
3900 		    ldcp->ldc_id));
3901 
3902 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3903 			vgen_handshake(vh_nextphase(ldcp));
3904 		}
3905 
3906 		break;
3907 
3908 	case VIO_SUBTYPE_ACK:
3909 
3910 		ldcp->hstate |= RDX_ACK_RCVD;
3911 
3912 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_ACK_RCVD id (%lx)\n",
3913 		    ldcp->ldc_id));
3914 
3915 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3916 			vgen_handshake(vh_nextphase(ldcp));
3917 		}
3918 		break;
3919 
3920 	case VIO_SUBTYPE_NACK:
3921 
3922 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_NACK_RCVD id (%lx)\n",
3923 		    ldcp->ldc_id));
3924 		return (VGEN_FAILURE);
3925 	}
3926 	DBG1((vnetp, "vgen_handle_rdx_info: exit\n"));
3927 	return (VGEN_SUCCESS);
3928 }
3929 
3930 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
3931 static int
3932 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3933 {
3934 	void *vnetp = LDC_TO_VNET(ldcp);
3935 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3936 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
3937 	struct ether_addr *addrp;
3938 	int count;
3939 	int i;
3940 
3941 	DBG1((vnetp, "vgen_handle_mcast_info: enter\n"));
3942 	switch (tagp->vio_subtype) {
3943 
3944 	case VIO_SUBTYPE_INFO:
3945 
3946 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
3947 		DWARN((vnetp,
3948 		    "vgen_handle_mcast_info: rcvd SET_MCAST_INFO id (%lx)\n",
3949 		    ldcp->ldc_id));
3950 		break;
3951 
3952 	case VIO_SUBTYPE_ACK:
3953 
3954 		/* success adding/removing multicast addr */
3955 		DBG2((vnetp,
3956 		    "vgen_handle_mcast_info: rcvd SET_MCAST_ACK id (%lx)\n",
3957 		    ldcp->ldc_id));
3958 		break;
3959 
3960 	case VIO_SUBTYPE_NACK:
3961 
3962 		DWARN((vnetp,
3963 		    "vgen_handle_mcast_info: rcvd SET_MCAST_NACK id (%lx)\n",
3964 		    ldcp->ldc_id));
3965 		if (!(msgp->set)) {
3966 			/* multicast remove request failed */
3967 			break;
3968 		}
3969 
3970 		/* multicast add request failed */
3971 		for (count = 0; count < msgp->count; count++) {
3972 			addrp = &(msgp->mca[count]);
3973 
3974 			/* delete address from the table */
3975 			for (i = 0; i < vgenp->mccount; i++) {
3976 				if (ether_cmp(addrp,
3977 				    &(vgenp->mctab[i])) == 0) {
3978 					if (vgenp->mccount > 1) {
3979 						vgenp->mctab[i] =
3980 						vgenp->mctab[vgenp->mccount-1];
3981 					}
3982 					vgenp->mccount--;
3983 					break;
3984 				}
3985 			}
3986 		}
3987 		break;
3988 
3989 	}
3990 	DBG1((vnetp, "vgen_handle_mcast_info: exit\n"));
3991 
3992 	return (VGEN_SUCCESS);
3993 }
3994 
3995 /* handler for control messages received from the peer ldc end-point */
3996 static int
3997 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3998 {
3999 	void *vnetp = LDC_TO_VNET(ldcp);
4000 	int rv = 0;
4001 
4002 	DBG1((vnetp, "vgen_handle_ctrlmsg: enter\n"));
4003 	switch (tagp->vio_subtype_env) {
4004 
4005 	case VIO_VER_INFO:
4006 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4007 		break;
4008 
4009 	case VIO_ATTR_INFO:
4010 		rv = vgen_handle_attr_info(ldcp, tagp);
4011 		break;
4012 
4013 	case VIO_DRING_REG:
4014 		rv = vgen_handle_dring_reg(ldcp, tagp);
4015 		break;
4016 
4017 	case VIO_RDX:
4018 		rv = vgen_handle_rdx_info(ldcp, tagp);
4019 		break;
4020 
4021 	case VNET_MCAST_INFO:
4022 		rv = vgen_handle_mcast_info(ldcp, tagp);
4023 		break;
4024 
4025 	}
4026 
4027 	DBG1((vnetp, "vgen_handle_ctrlmsg: exit\n"));
4028 	return (rv);
4029 }
4030 
4031 /* handler for data messages received from the peer ldc end-point */
4032 static int
4033 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
4034 	mblk_t **headp, mblk_t **tailp)
4035 {
4036 	void *vnetp = LDC_TO_VNET(ldcp);
4037 	int rv = 0;
4038 
4039 	DBG1((vnetp, "vgen_handle_datamsg: enter\n"));
4040 
4041 	if (ldcp->hphase != VH_DONE)
4042 		return (rv);
4043 	switch (tagp->vio_subtype_env) {
4044 	case VIO_DRING_DATA:
4045 		rv = vgen_handle_dring_data(ldcp, tagp, headp, tailp);
4046 		break;
4047 	default:
4048 		break;
4049 	}
4050 
4051 	DBG1((vnetp, "vgen_handle_datamsg: exit\n"));
4052 	return (rv);
4053 }
4054 
4055 static int
4056 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4057     int32_t end, uint8_t pstate)
4058 {
4059 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4060 	void *vnetp = LDC_TO_VNET(ldcp);
4061 	int rv = 0;
4062 
4063 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4064 	tagp->vio_sid = ldcp->local_sid;
4065 	msgp->start_idx = start;
4066 	msgp->end_idx = end;
4067 	msgp->dring_process_state = pstate;
4068 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4069 	if (rv != VGEN_SUCCESS) {
4070 		DWARN((vnetp, "vgen_send_dring_ack: id(%lx) vgen_sendmsg "
4071 		    "failed\n", (ldcp)->ldc_id));
4072 	}
4073 	return (rv);
4074 }
4075 
4076 static int
4077 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
4078 	mblk_t **headp, mblk_t **tailp)
4079 {
4080 	vio_dring_msg_t *dringmsg;
4081 	vnet_public_desc_t *rxdp;
4082 	vnet_public_desc_t *txdp;
4083 	vio_dring_entry_hdr_t *hdrp;
4084 	vgen_stats_t *statsp;
4085 	struct ether_header *ehp;
4086 	mblk_t *mp = NULL;
4087 	mblk_t *bp = NULL;
4088 	mblk_t *bpt = NULL;
4089 	size_t nbytes;
4090 	size_t nread;
4091 	uint64_t off = 0;
4092 	uint32_t start;
4093 	int32_t end;
4094 	uint32_t datalen;
4095 	uint32_t ncookies;
4096 	uint32_t ack_start;
4097 	uint32_t ack_end;
4098 	uint32_t rxi;
4099 	uint32_t txi;
4100 	int rv = 0;
4101 	boolean_t rxd_err = B_FALSE;
4102 	boolean_t set_ack_start = B_FALSE;
4103 	vgen_private_desc_t *tbufp;
4104 	uint32_t next_rxi;
4105 	boolean_t ready_txd = B_FALSE;
4106 	uint32_t retries = 0;
4107 #ifdef VGEN_HANDLE_LOST_PKTS
4108 	int n;
4109 #endif
4110 #ifdef VGEN_REXMIT
4111 	uint64_t seqnum;
4112 #endif
4113 	void *vnetp = LDC_TO_VNET(ldcp);
4114 	boolean_t ack_needed = B_FALSE;
4115 
4116 	dringmsg = (vio_dring_msg_t *)tagp;
4117 	start = dringmsg->start_idx;
4118 	end = dringmsg->end_idx;
4119 	statsp = ldcp->statsp;
4120 
4121 	DBG1((vnetp, "vgen_handle_dring_data: enter\n"));
4122 	switch (tagp->vio_subtype) {
4123 
4124 	case VIO_SUBTYPE_INFO:
4125 		/*
4126 		 * received a data msg, which contains the start and end
4127 		 * indeces of the descriptors within the rx ring holding data,
4128 		 * the seq_num of data packet corresponding to the start index,
4129 		 * and the dring_ident.
4130 		 * We can now read the contents of each of these descriptors
4131 		 * and gather data from it.
4132 		 */
4133 		DBG2((vnetp,
4134 		    "vgen_handle_dring_data: INFO: start(%d), end(%d)\n",
4135 		    start, end));
4136 
4137 		/* validate rx start and end indeces */
4138 		if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4139 		    !(CHECK_RXI(end, ldcp)))) {
4140 			/* drop the message if invalid index */
4141 			break;
4142 		}
4143 
4144 		/* validate dring_ident */
4145 		if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4146 			/* invalid dring_ident, drop the msg */
4147 			break;
4148 		}
4149 #ifdef DEBUG
4150 		if (vgen_trigger_rxlost) {
4151 			/* drop this msg to simulate lost pkts for debugging */
4152 			vgen_trigger_rxlost = 0;
4153 			break;
4154 		}
4155 #endif
4156 
4157 #ifdef	VGEN_HANDLE_LOST_PKTS
4158 
4159 		/* receive start index doesn't match expected index */
4160 		if (ldcp->next_rxi != start) {
4161 
4162 			DWARN((vnetp, "vgen_handle_dring_data: id(%lx) "
4163 			    "next_rxi(%d) != start(%d)\n",
4164 			    ldcp->ldc_id, ldcp->next_rxi, start));
4165 
4166 			/* calculate the number of pkts lost */
4167 			if (start >= ldcp->next_rxi) {
4168 				n = start - ldcp->next_rxi;
4169 			} else  {
4170 				n = ldcp->num_rxds - (ldcp->next_rxi - start);
4171 			}
4172 
4173 			/*
4174 			 * sequence number of dring data message
4175 			 * is less than the next sequence number that
4176 			 * is expected:
4177 			 *
4178 			 * drop the message and the corresponding packets.
4179 			 */
4180 			if (ldcp->next_rxseq > dringmsg->seq_num) {
4181 				DWARN((vnetp, "vgen_handle_dring_data: id(%lx) "
4182 				    "dropping pkts, expected rxseq(0x%lx) "
4183 				    "> recvd(0x%lx)\n",
4184 				    ldcp->ldc_id, ldcp->next_rxseq,
4185 				    dringmsg->seq_num));
4186 				/*
4187 				 * duplicate/multiple retransmissions from
4188 				 * sender?? drop this msg.
4189 				 */
4190 				break;
4191 			}
4192 
4193 			/*
4194 			 * sequence number of dring data message
4195 			 * is greater than the next expected sequence number
4196 			 *
4197 			 * send a NACK back to the peer to indicate lost
4198 			 * packets.
4199 			 */
4200 			if (dringmsg->seq_num > ldcp->next_rxseq) {
4201 				statsp->rx_lost_pkts += n;
4202 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4203 				tagp->vio_sid = ldcp->local_sid;
4204 				/* indicate the range of lost descriptors */
4205 				dringmsg->start_idx = ldcp->next_rxi;
4206 				rxi = start;
4207 				DECR_RXI(rxi, ldcp);
4208 				dringmsg->end_idx = rxi;
4209 				/* dring ident is left unchanged */
4210 				rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4211 				    sizeof (*dringmsg), B_FALSE);
4212 				if (rv != VGEN_SUCCESS) {
4213 					DWARN((vnetp,
4214 					    "vgen_handle_dring_data: id(%lx) "
4215 					    "vgen_sendmsg failed, "
4216 					    "stype: NACK\n", ldcp->ldc_id));
4217 					goto error_ret;
4218 				}
4219 #ifdef VGEN_REXMIT
4220 				/*
4221 				 * stop further processing until peer
4222 				 * retransmits with the right index.
4223 				 * update next_rxseq expected.
4224 				 */
4225 				ldcp->next_rxseq += 1;
4226 				break;
4227 #else	/* VGEN_REXMIT */
4228 				/*
4229 				 * treat this range of descrs/pkts as dropped
4230 				 * and set the new expected values for next_rxi
4231 				 * and next_rxseq. continue(below) to process
4232 				 * from the new start index.
4233 				 */
4234 				ldcp->next_rxi = start;
4235 				ldcp->next_rxseq += 1;
4236 #endif	/* VGEN_REXMIT */
4237 
4238 			} else if (dringmsg->seq_num == ldcp->next_rxseq) {
4239 				/*
4240 				 * expected and received seqnums match, but
4241 				 * the descriptor indeces don't?
4242 				 *
4243 				 * restart handshake with peer.
4244 				 */
4245 				DWARN((vnetp,
4246 				    "vgen_handle_dring_data: id(%lx) "
4247 				    "next_rxseq(0x%lx) == seq_num(0x%lx)\n",
4248 				    ldcp->ldc_id, ldcp->next_rxseq,
4249 				    dringmsg->seq_num));
4250 
4251 			}
4252 
4253 		} else {
4254 			/* expected and start dring indeces match */
4255 
4256 			if (dringmsg->seq_num != ldcp->next_rxseq) {
4257 
4258 				/* seqnums don't match */
4259 
4260 				DWARN((vnetp,
4261 				    "vgen_handle_dring_data: id(%lx) "
4262 				    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
4263 				    ldcp->ldc_id, ldcp->next_rxseq,
4264 				    dringmsg->seq_num));
4265 			}
4266 		}
4267 
4268 #endif	/* VGEN_HANDLE_LOST_PKTS */
4269 
4270 		/*
4271 		 * start processing the descriptors from the specified
4272 		 * start index, up to the index a descriptor is not ready
4273 		 * to be processed or we process the entire descriptor ring
4274 		 * and wrap around upto the start index.
4275 		 */
4276 
4277 		/* need to set the start index of descriptors to be ack'd */
4278 		set_ack_start = B_TRUE;
4279 
4280 		/* index upto which we have ack'd */
4281 		ack_end = start;
4282 		DECR_RXI(ack_end, ldcp);
4283 
4284 		next_rxi = rxi =  start;
4285 		do {
4286 
4287 vgen_recv_retry:	rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4288 			if (rv != 0) {
4289 				DWARN((vnetp, "vgen_handle_dring_data: "
4290 				    "ldc_mem_dring_acquire() failed"
4291 				    " id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4292 				statsp->ierrors++;
4293 				goto error_ret;
4294 			}
4295 
4296 			rxdp = &(ldcp->rxdp[rxi]);
4297 			hdrp = &rxdp->hdr;
4298 
4299 			if (hdrp->dstate != VIO_DESC_READY) {
4300 				/*
4301 				 * descriptor is not ready.
4302 				 * retry descriptor acquire, stop processing
4303 				 * after max # retries.
4304 				 */
4305 				if (retries == vgen_recv_retries)
4306 					break;
4307 				retries++;
4308 				drv_usecwait(vgen_recv_delay);
4309 				goto vgen_recv_retry;
4310 			}
4311 			retries = 0;
4312 
4313 			if (set_ack_start) {
4314 				/*
4315 				 * initialize the start index of the range
4316 				 * of descriptors to be ack'd.
4317 				 */
4318 				ack_start = rxi;
4319 				set_ack_start = B_FALSE;
4320 			}
4321 
4322 			datalen = rxdp->nbytes;
4323 			ncookies = rxdp->ncookies;
4324 			if ((datalen < ETHERMIN) ||
4325 			    (ncookies == 0) ||
4326 			    (ncookies > MAX_COOKIES)) {
4327 				rxd_err = B_TRUE;
4328 			} else {
4329 				/*
4330 				 * Try to allocate an mblk from the free pool
4331 				 * of recv mblks for the channel.
4332 				 * If this fails, use allocb().
4333 				 */
4334 				mp = vio_allocb(ldcp->rmp);
4335 				if (!mp) {
4336 					/*
4337 					 * The data buffer returned by
4338 					 * allocb(9F) is 8byte aligned. We
4339 					 * allocate extra 8 bytes to ensure
4340 					 * size is multiple of 8 bytes for
4341 					 * ldc_mem_copy().
4342 					 */
4343 					statsp->rx_vio_allocb_fail++;
4344 					mp = allocb(VNET_IPALIGN + datalen + 8,
4345 					    BPRI_MED);
4346 				}
4347 				nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4348 			}
4349 			if ((rxd_err) || (mp == NULL)) {
4350 				/*
4351 				 * rxd_err or allocb() failure,
4352 				 * drop this packet, get next.
4353 				 */
4354 				if (rxd_err) {
4355 					statsp->ierrors++;
4356 					rxd_err = B_FALSE;
4357 				} else {
4358 					statsp->rx_allocb_fail++;
4359 				}
4360 
4361 				ack_needed = hdrp->ack;
4362 
4363 				/* set descriptor done bit */
4364 				hdrp->dstate = VIO_DESC_DONE;
4365 
4366 				rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4367 				    rxi, rxi);
4368 				if (rv != 0) {
4369 					DWARN((vnetp, "vgen_handle_dring_data: "
4370 					    "ldc_mem_dring_release err id(%lx)"
4371 					    " rv(%d)\n", ldcp->ldc_id, rv));
4372 					goto error_ret;
4373 				}
4374 
4375 				if (ack_needed) {
4376 					ack_needed = B_FALSE;
4377 					/*
4378 					 * sender needs ack for this packet,
4379 					 * ack pkts upto this index.
4380 					 */
4381 					ack_end = rxi;
4382 
4383 					rv = vgen_send_dring_ack(ldcp, tagp,
4384 					    ack_start, ack_end,
4385 					    VIO_DP_ACTIVE);
4386 					if (rv != VGEN_SUCCESS) {
4387 						goto error_ret;
4388 					}
4389 
4390 					/* need to set new ack start index */
4391 					set_ack_start = B_TRUE;
4392 				}
4393 				goto vgen_next_rxi;
4394 			}
4395 
4396 			nread = nbytes;
4397 			rv = ldc_mem_copy(ldcp->ldc_handle,
4398 			    (caddr_t)mp->b_rptr, off, &nread,
4399 			    rxdp->memcookie, ncookies, LDC_COPY_IN);
4400 
4401 			/* if ldc_mem_copy() failed */
4402 			if (rv) {
4403 				DWARN((vnetp,
4404 				    "vgen_handle_dring_data: ldc_mem_copy err "
4405 				    " id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4406 				statsp->ierrors++;
4407 				freemsg(mp);
4408 				goto error_ret;
4409 			}
4410 
4411 			ack_needed = hdrp->ack;
4412 			hdrp->dstate = VIO_DESC_DONE;
4413 
4414 			rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4415 			if (rv != 0) {
4416 				DWARN((vnetp, "vgen_handle_dring_data: "
4417 				    "ldc_mem_dring_release err id(%lx)"
4418 				    " rv(%d)\n", ldcp->ldc_id, rv));
4419 				goto error_ret;
4420 			}
4421 
4422 			mp->b_rptr += VNET_IPALIGN;
4423 
4424 			if (ack_needed) {
4425 				ack_needed = B_FALSE;
4426 				/*
4427 				 * sender needs ack for this packet,
4428 				 * ack pkts upto this index.
4429 				 */
4430 				ack_end = rxi;
4431 
4432 				rv = vgen_send_dring_ack(ldcp, tagp,
4433 				    ack_start, ack_end, VIO_DP_ACTIVE);
4434 				if (rv != VGEN_SUCCESS) {
4435 					goto error_ret;
4436 				}
4437 
4438 				/* need to set new ack start index */
4439 				set_ack_start = B_TRUE;
4440 			}
4441 
4442 			if (nread != nbytes) {
4443 				DWARN((vnetp,
4444 				    "vgen_handle_dring_data: id(%lx) "
4445 				    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4446 				    ldcp->ldc_id, nread, nbytes));
4447 				statsp->ierrors++;
4448 				freemsg(mp);
4449 				goto vgen_next_rxi;
4450 			}
4451 
4452 			/* point to the actual end of data */
4453 			mp->b_wptr = mp->b_rptr + datalen;
4454 
4455 			/* update stats */
4456 			statsp->ipackets++;
4457 			statsp->rbytes += datalen;
4458 			ehp = (struct ether_header *)mp->b_rptr;
4459 			if (IS_BROADCAST(ehp))
4460 				statsp->brdcstrcv++;
4461 			else if (IS_MULTICAST(ehp))
4462 				statsp->multircv++;
4463 
4464 			/* build a chain of received packets */
4465 			if (bp == NULL) {
4466 				/* first pkt */
4467 				bp = mp;
4468 				bpt = bp;
4469 				bpt->b_next = NULL;
4470 			} else {
4471 				mp->b_next = NULL;
4472 				bpt->b_next = mp;
4473 				bpt = mp;
4474 			}
4475 
4476 
4477 vgen_next_rxi:
4478 			/* update end index of range of descrs to be ack'd */
4479 			ack_end = rxi;
4480 
4481 			/* update the next index to be processed */
4482 			INCR_RXI(next_rxi, ldcp);
4483 			if (next_rxi == start) {
4484 				/*
4485 				 * processed the entire descriptor ring upto
4486 				 * the index at which we started.
4487 				 */
4488 				break;
4489 			}
4490 
4491 			rxi = next_rxi;
4492 
4493 		_NOTE(CONSTCOND)
4494 		} while (1);
4495 
4496 		/*
4497 		 * send an ack message to peer indicating that we have stopped
4498 		 * processing descriptors.
4499 		 */
4500 		if (set_ack_start) {
4501 			/*
4502 			 * We have ack'd upto some index and we have not
4503 			 * processed any descriptors beyond that index.
4504 			 * Use the last ack'd index as both the start and
4505 			 * end of range of descrs being ack'd.
4506 			 * Note: This results in acking the last index twice
4507 			 * and should be harmless.
4508 			 */
4509 			ack_start = ack_end;
4510 		}
4511 
4512 		rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4513 		    VIO_DP_STOPPED);
4514 		if (rv != VGEN_SUCCESS) {
4515 			goto error_ret;
4516 		}
4517 
4518 		/* save new recv index and expected seqnum of next dring msg */
4519 		ldcp->next_rxi = next_rxi;
4520 		ldcp->next_rxseq += 1;
4521 
4522 		break;
4523 
4524 	case VIO_SUBTYPE_ACK:
4525 		/*
4526 		 * received an ack corresponding to a specific descriptor for
4527 		 * which we had set the ACK bit in the descriptor (during
4528 		 * transmit). This enables us to reclaim descriptors.
4529 		 */
4530 
4531 		DBG2((vnetp,
4532 		    "vgen_handle_dring_data: ACK:  start(%d), end(%d)\n",
4533 		    start, end));
4534 
4535 		/* validate start and end indeces in the tx ack msg */
4536 		if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4537 			/* drop the message if invalid index */
4538 			break;
4539 		}
4540 		/* validate dring_ident */
4541 		if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4542 			/* invalid dring_ident, drop the msg */
4543 			break;
4544 		}
4545 		statsp->dring_data_acks++;
4546 
4547 		/* reclaim descriptors that are done */
4548 		vgen_reclaim(ldcp);
4549 
4550 		if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
4551 			/*
4552 			 * receiver continued processing descriptors after
4553 			 * sending us the ack.
4554 			 */
4555 			break;
4556 		}
4557 
4558 		statsp->dring_stopped_acks++;
4559 
4560 		/* receiver stopped processing descriptors */
4561 		mutex_enter(&ldcp->txlock);
4562 		mutex_enter(&ldcp->tclock);
4563 
4564 		/*
4565 		 * determine if there are any pending tx descriptors
4566 		 * ready to be processed by the receiver(peer) and if so,
4567 		 * send a message to the peer to restart receiving.
4568 		 */
4569 		ready_txd = B_FALSE;
4570 
4571 		/*
4572 		 * using the end index of the descriptor range for which
4573 		 * we received the ack, check if the next descriptor is
4574 		 * ready.
4575 		 */
4576 		txi = end;
4577 		INCR_TXI(txi, ldcp);
4578 		tbufp = &ldcp->tbufp[txi];
4579 		txdp = tbufp->descp;
4580 		hdrp = &txdp->hdr;
4581 		if (hdrp->dstate == VIO_DESC_READY) {
4582 			ready_txd = B_TRUE;
4583 		} else {
4584 			/*
4585 			 * descr next to the end of ack'd descr range is not
4586 			 * ready.
4587 			 * starting from the current reclaim index, check
4588 			 * if any descriptor is ready.
4589 			 */
4590 
4591 			txi = ldcp->cur_tbufp - ldcp->tbufp;
4592 			tbufp = &ldcp->tbufp[txi];
4593 
4594 			while (tbufp != ldcp->next_tbufp) {
4595 
4596 				txdp = tbufp->descp;
4597 				hdrp = &txdp->hdr;
4598 				if (hdrp->dstate == VIO_DESC_READY) {
4599 					break;
4600 				}
4601 
4602 				INCR_TXI(txi, ldcp);
4603 				tbufp = &ldcp->tbufp[txi];
4604 
4605 			}
4606 
4607 			if (tbufp != ldcp->next_tbufp)
4608 				ready_txd = B_TRUE;
4609 		}
4610 
4611 		if (ready_txd) {
4612 			/*
4613 			 * we have tx descriptor(s) ready to be
4614 			 * processed by the receiver.
4615 			 * send a message to the peer with the start index
4616 			 * of ready descriptors.
4617 			 */
4618 			rv = vgen_send_dring_data(ldcp, txi, -1);
4619 			if (rv != VGEN_SUCCESS) {
4620 				ldcp->resched_peer = B_TRUE;
4621 				mutex_exit(&ldcp->tclock);
4622 				mutex_exit(&ldcp->txlock);
4623 				goto error_ret;
4624 			}
4625 		} else {
4626 			/*
4627 			 * no ready tx descriptors. set the flag to send a
4628 			 * message to peer when tx descriptors are ready in
4629 			 * transmit routine.
4630 			 */
4631 			ldcp->resched_peer = B_TRUE;
4632 		}
4633 
4634 		mutex_exit(&ldcp->tclock);
4635 		mutex_exit(&ldcp->txlock);
4636 
4637 		break;
4638 
4639 	case VIO_SUBTYPE_NACK:
4640 		/*
4641 		 * peer sent a NACK msg to indicate lost packets.
4642 		 * The start and end correspond to the range of descriptors
4643 		 * for which the peer didn't receive a dring data msg and so
4644 		 * didn't receive the corresponding data.
4645 		 */
4646 		DWARN((vnetp,
4647 		    "vgen_handle_dring_data: NACK:  start(%d), end(%d)\n",
4648 		    start, end));
4649 
4650 		/* validate start and end indeces in the tx nack msg */
4651 		if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4652 			/* drop the message if invalid index */
4653 			break;
4654 		}
4655 		/* validate dring_ident */
4656 		if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4657 			/* invalid dring_ident, drop the msg */
4658 			break;
4659 		}
4660 		mutex_enter(&ldcp->txlock);
4661 		mutex_enter(&ldcp->tclock);
4662 
4663 		if (ldcp->next_tbufp == ldcp->cur_tbufp) {
4664 			/* no busy descriptors, bogus nack ? */
4665 			mutex_exit(&ldcp->tclock);
4666 			mutex_exit(&ldcp->txlock);
4667 			break;
4668 		}
4669 
4670 #ifdef VGEN_REXMIT
4671 		/* send a new dring data msg including the lost descrs */
4672 		end = ldcp->next_tbufp - ldcp->tbufp;
4673 		DECR_TXI(end, ldcp);
4674 		rv = vgen_send_dring_data(ldcp, start, end);
4675 		if (rv != 0) {
4676 			/*
4677 			 * vgen_send_dring_data() error: drop all packets
4678 			 * in this descr range
4679 			 */
4680 			DWARN((vnetp,
4681 			    "vgen_handle_dring_data: "
4682 			    "vgen_send_dring_data failed :"
4683 			    "id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4684 			for (txi = start; txi <= end; ) {
4685 				tbufp = &(ldcp->tbufp[txi]);
4686 				txdp = tbufp->descp;
4687 				hdrp = &txdp->hdr;
4688 				tbufp->flags = VGEN_PRIV_DESC_FREE;
4689 				hdrp->dstate = VIO_DESC_FREE;
4690 				hdrp->ack = B_FALSE;
4691 				statsp->oerrors++;
4692 			}
4693 
4694 			/* update next pointer */
4695 			ldcp->next_tbufp = &(ldcp->tbufp[start]);
4696 			ldcp->next_txi = start;
4697 		}
4698 		DBG2((vnetp,
4699 		    "vgen_handle_dring_data: rexmit: start(%d) end(%d)\n",
4700 		    start, end));
4701 #else	/* VGEN_REXMIT */
4702 		/* we just mark the descrs as done so they can be reclaimed */
4703 		for (txi = start; txi <= end; ) {
4704 			txdp = &(ldcp->txdp[txi]);
4705 			hdrp = &txdp->hdr;
4706 			if (hdrp->dstate == VIO_DESC_READY)
4707 				hdrp->dstate = VIO_DESC_DONE;
4708 			INCR_TXI(txi, ldcp);
4709 		}
4710 #endif	/* VGEN_REXMIT */
4711 		mutex_exit(&ldcp->tclock);
4712 		mutex_exit(&ldcp->txlock);
4713 
4714 		break;
4715 	}
4716 
4717 error_ret:
4718 
4719 	DBG1((vnetp, "vgen_handle_dring_data: exit\n"));
4720 	*headp = bp;
4721 	*tailp = bpt;
4722 
4723 	return (rv);
4724 }
4725 
4726 static void
4727 vgen_reclaim(vgen_ldc_t *ldcp)
4728 {
4729 	mutex_enter(&ldcp->tclock);
4730 
4731 	vgen_reclaim_dring(ldcp);
4732 	ldcp->reclaim_lbolt = ddi_get_lbolt();
4733 
4734 	mutex_exit(&ldcp->tclock);
4735 }
4736 
4737 /*
4738  * transmit reclaim function. starting from the current reclaim index
4739  * look for descriptors marked DONE and reclaim the descriptor and the
4740  * corresponding buffers (tbuf).
4741  */
4742 static void
4743 vgen_reclaim_dring(vgen_ldc_t *ldcp)
4744 {
4745 	vnet_public_desc_t *txdp;
4746 	vgen_private_desc_t *tbufp;
4747 	vio_dring_entry_hdr_t	*hdrp;
4748 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4749 
4750 #ifdef DEBUG
4751 	if (vgen_trigger_txtimeout)
4752 		return;
4753 #endif
4754 
4755 	tbufp = ldcp->cur_tbufp;
4756 	txdp = tbufp->descp;
4757 	hdrp = &txdp->hdr;
4758 
4759 	while ((hdrp->dstate == VIO_DESC_DONE) &&
4760 	    (tbufp != ldcp->next_tbufp)) {
4761 		tbufp->flags = VGEN_PRIV_DESC_FREE;
4762 		hdrp->dstate = VIO_DESC_FREE;
4763 		hdrp->ack = B_FALSE;
4764 
4765 		tbufp = NEXTTBUF(ldcp, tbufp);
4766 		txdp = tbufp->descp;
4767 		hdrp = &txdp->hdr;
4768 	}
4769 
4770 	ldcp->cur_tbufp = tbufp;
4771 
4772 	/*
4773 	 * Check if mac layer should be notified to restart transmissions
4774 	 */
4775 	if (ldcp->need_resched) {
4776 		ldcp->need_resched = B_FALSE;
4777 		vnet_tx_update(vgenp->vnetp);
4778 	}
4779 }
4780 
4781 /* return the number of pending transmits for the channel */
4782 static int
4783 vgen_num_txpending(vgen_ldc_t *ldcp)
4784 {
4785 	int n;
4786 
4787 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
4788 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
4789 	} else  {
4790 		/* cur_tbufp > next_tbufp */
4791 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
4792 	}
4793 
4794 	return (n);
4795 }
4796 
4797 /* determine if the transmit descriptor ring is full */
4798 static int
4799 vgen_tx_dring_full(vgen_ldc_t *ldcp)
4800 {
4801 	vgen_private_desc_t	*tbufp;
4802 	vgen_private_desc_t	*ntbufp;
4803 
4804 	tbufp = ldcp->next_tbufp;
4805 	ntbufp = NEXTTBUF(ldcp, tbufp);
4806 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
4807 		return (VGEN_SUCCESS);
4808 	}
4809 	return (VGEN_FAILURE);
4810 }
4811 
4812 /* determine if timeout condition has occured */
4813 static int
4814 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
4815 {
4816 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
4817 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
4818 	    (vnet_ldcwd_txtimeout) &&
4819 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
4820 		return (VGEN_SUCCESS);
4821 	} else {
4822 		return (VGEN_FAILURE);
4823 	}
4824 }
4825 
4826 /* transmit watchdog timeout handler */
4827 static void
4828 vgen_ldc_watchdog(void *arg)
4829 {
4830 	vgen_ldc_t *ldcp;
4831 	vgen_t *vgenp;
4832 	void *vnetp;
4833 	int rv;
4834 
4835 	ldcp = (vgen_ldc_t *)arg;
4836 	vgenp = LDC_TO_VGEN(ldcp);
4837 	vnetp = LDC_TO_VNET(ldcp);
4838 
4839 	rv = vgen_ldc_txtimeout(ldcp);
4840 	if (rv == VGEN_SUCCESS) {
4841 		DWARN((vnetp,
4842 		    "vgen_ldc_watchdog: transmit timeout ldcid(%lx)\n",
4843 		    ldcp->ldc_id));
4844 #ifdef DEBUG
4845 		if (vgen_trigger_txtimeout) {
4846 			/* tx timeout triggered for debugging */
4847 			vgen_trigger_txtimeout = 0;
4848 		}
4849 #endif
4850 		mutex_enter(&ldcp->cblock);
4851 		ldcp->need_ldc_reset = B_TRUE;
4852 		vgen_handshake_reset(ldcp);
4853 		mutex_exit(&ldcp->cblock);
4854 		if (ldcp->need_resched) {
4855 			ldcp->need_resched = B_FALSE;
4856 			vnet_tx_update(vgenp->vnetp);
4857 		}
4858 	}
4859 
4860 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
4861 	    drv_usectohz(vnet_ldcwd_interval * 1000));
4862 }
4863 
4864 static int
4865 vgen_setup_kstats(vgen_ldc_t *ldcp)
4866 {
4867 	vgen_t *vgenp;
4868 	struct kstat *ksp;
4869 	vgen_stats_t *statsp;
4870 	vgen_kstats_t *ldckp;
4871 	int instance;
4872 	size_t size;
4873 	char name[MAXNAMELEN];
4874 
4875 	vgenp = LDC_TO_VGEN(ldcp);
4876 	instance = ddi_get_instance(vgenp->vnetdip);
4877 	(void) sprintf(name, "vnetldc0x%lx", ldcp->ldc_id);
4878 	statsp = kmem_zalloc(sizeof (vgen_stats_t), KM_SLEEP);
4879 	if (statsp == NULL) {
4880 		return (VGEN_FAILURE);
4881 	}
4882 	size = sizeof (vgen_kstats_t) / sizeof (kstat_named_t);
4883 	ksp = kstat_create("vnet", instance, name, "net", KSTAT_TYPE_NAMED,
4884 		size, 0);
4885 	if (ksp == NULL) {
4886 		KMEM_FREE(statsp);
4887 		return (VGEN_FAILURE);
4888 	}
4889 
4890 	ldckp = (vgen_kstats_t *)ksp->ks_data;
4891 	kstat_named_init(&ldckp->ipackets,		"ipackets",
4892 		KSTAT_DATA_ULONG);
4893 	kstat_named_init(&ldckp->ipackets64,		"ipackets64",
4894 		KSTAT_DATA_ULONGLONG);
4895 	kstat_named_init(&ldckp->ierrors,		"ierrors",
4896 		KSTAT_DATA_ULONG);
4897 	kstat_named_init(&ldckp->opackets,		"opackets",
4898 		KSTAT_DATA_ULONG);
4899 	kstat_named_init(&ldckp->opackets64,		"opackets64",
4900 		KSTAT_DATA_ULONGLONG);
4901 	kstat_named_init(&ldckp->oerrors,		"oerrors",
4902 		KSTAT_DATA_ULONG);
4903 
4904 
4905 	/* MIB II kstat variables */
4906 	kstat_named_init(&ldckp->rbytes,		"rbytes",
4907 		KSTAT_DATA_ULONG);
4908 	kstat_named_init(&ldckp->rbytes64,		"rbytes64",
4909 		KSTAT_DATA_ULONGLONG);
4910 	kstat_named_init(&ldckp->obytes,		"obytes",
4911 		KSTAT_DATA_ULONG);
4912 	kstat_named_init(&ldckp->obytes64,		"obytes64",
4913 		KSTAT_DATA_ULONGLONG);
4914 	kstat_named_init(&ldckp->multircv,		"multircv",
4915 		KSTAT_DATA_ULONG);
4916 	kstat_named_init(&ldckp->multixmt,		"multixmt",
4917 		KSTAT_DATA_ULONG);
4918 	kstat_named_init(&ldckp->brdcstrcv,		"brdcstrcv",
4919 		KSTAT_DATA_ULONG);
4920 	kstat_named_init(&ldckp->brdcstxmt,		"brdcstxmt",
4921 		KSTAT_DATA_ULONG);
4922 	kstat_named_init(&ldckp->norcvbuf,		"norcvbuf",
4923 		KSTAT_DATA_ULONG);
4924 	kstat_named_init(&ldckp->noxmtbuf,		"noxmtbuf",
4925 		KSTAT_DATA_ULONG);
4926 
4927 	/* Tx stats */
4928 	kstat_named_init(&ldckp->tx_no_desc,		"tx_no_desc",
4929 		KSTAT_DATA_ULONG);
4930 
4931 	/* Rx stats */
4932 	kstat_named_init(&ldckp->rx_allocb_fail,	"rx_allocb_fail",
4933 		KSTAT_DATA_ULONG);
4934 	kstat_named_init(&ldckp->rx_vio_allocb_fail,	"rx_vio_allocb_fail",
4935 		KSTAT_DATA_ULONG);
4936 	kstat_named_init(&ldckp->rx_lost_pkts,		"rx_lost_pkts",
4937 		KSTAT_DATA_ULONG);
4938 
4939 	/* Interrupt stats */
4940 	kstat_named_init(&ldckp->callbacks,		"callbacks",
4941 		KSTAT_DATA_ULONG);
4942 	kstat_named_init(&ldckp->dring_data_acks,	"dring_data_acks",
4943 		KSTAT_DATA_ULONG);
4944 	kstat_named_init(&ldckp->dring_stopped_acks,	"dring_stopped_acks",
4945 		KSTAT_DATA_ULONG);
4946 	kstat_named_init(&ldckp->dring_data_msgs,	"dring_data_msgs",
4947 		KSTAT_DATA_ULONG);
4948 
4949 	ksp->ks_update = vgen_kstat_update;
4950 	ksp->ks_private = (void *)ldcp;
4951 	kstat_install(ksp);
4952 
4953 	ldcp->ksp = ksp;
4954 	ldcp->statsp = statsp;
4955 	return (VGEN_SUCCESS);
4956 }
4957 
4958 static void
4959 vgen_destroy_kstats(vgen_ldc_t *ldcp)
4960 {
4961 	if (ldcp->ksp)
4962 		kstat_delete(ldcp->ksp);
4963 	KMEM_FREE(ldcp->statsp);
4964 }
4965 
4966 static int
4967 vgen_kstat_update(kstat_t *ksp, int rw)
4968 {
4969 	vgen_ldc_t *ldcp;
4970 	vgen_stats_t *statsp;
4971 	vgen_kstats_t *ldckp;
4972 
4973 	ldcp = (vgen_ldc_t *)ksp->ks_private;
4974 	statsp = ldcp->statsp;
4975 	ldckp = (vgen_kstats_t *)ksp->ks_data;
4976 
4977 	if (rw == KSTAT_READ) {
4978 		ldckp->ipackets.value.ul	= (uint32_t)statsp->ipackets;
4979 		ldckp->ipackets64.value.ull	= statsp->ipackets;
4980 		ldckp->ierrors.value.ul		= statsp->ierrors;
4981 		ldckp->opackets.value.ul	= (uint32_t)statsp->opackets;
4982 		ldckp->opackets64.value.ull	= statsp->opackets;
4983 		ldckp->oerrors.value.ul		= statsp->oerrors;
4984 
4985 		/*
4986 		 * MIB II kstat variables
4987 		 */
4988 		ldckp->rbytes.value.ul		= (uint32_t)statsp->rbytes;
4989 		ldckp->rbytes64.value.ull	= statsp->rbytes;
4990 		ldckp->obytes.value.ul		= (uint32_t)statsp->obytes;
4991 		ldckp->obytes64.value.ull	= statsp->obytes;
4992 		ldckp->multircv.value.ul	= statsp->multircv;
4993 		ldckp->multixmt.value.ul	= statsp->multixmt;
4994 		ldckp->brdcstrcv.value.ul	= statsp->brdcstrcv;
4995 		ldckp->brdcstxmt.value.ul	= statsp->brdcstxmt;
4996 		ldckp->norcvbuf.value.ul	= statsp->norcvbuf;
4997 		ldckp->noxmtbuf.value.ul	= statsp->noxmtbuf;
4998 
4999 		ldckp->tx_no_desc.value.ul	= statsp->tx_no_desc;
5000 
5001 		ldckp->rx_allocb_fail.value.ul	= statsp->rx_allocb_fail;
5002 		ldckp->rx_vio_allocb_fail.value.ul = statsp->rx_vio_allocb_fail;
5003 		ldckp->rx_lost_pkts.value.ul	= statsp->rx_lost_pkts;
5004 
5005 		ldckp->callbacks.value.ul	= statsp->callbacks;
5006 		ldckp->dring_data_acks.value.ul	= statsp->dring_data_acks;
5007 		ldckp->dring_stopped_acks.value.ul = statsp->dring_stopped_acks;
5008 		ldckp->dring_data_msgs.value.ul	= statsp->dring_data_msgs;
5009 	} else {
5010 		statsp->ipackets	= ldckp->ipackets64.value.ull;
5011 		statsp->ierrors		= ldckp->ierrors.value.ul;
5012 		statsp->opackets	= ldckp->opackets64.value.ull;
5013 		statsp->oerrors		= ldckp->oerrors.value.ul;
5014 
5015 		/*
5016 		 * MIB II kstat variables
5017 		 */
5018 		statsp->rbytes		= ldckp->rbytes64.value.ull;
5019 		statsp->obytes		= ldckp->obytes64.value.ull;
5020 		statsp->multircv	= ldckp->multircv.value.ul;
5021 		statsp->multixmt	= ldckp->multixmt.value.ul;
5022 		statsp->brdcstrcv	= ldckp->brdcstrcv.value.ul;
5023 		statsp->brdcstxmt	= ldckp->brdcstxmt.value.ul;
5024 		statsp->norcvbuf	= ldckp->norcvbuf.value.ul;
5025 		statsp->noxmtbuf	= ldckp->noxmtbuf.value.ul;
5026 
5027 		statsp->tx_no_desc	= ldckp->tx_no_desc.value.ul;
5028 
5029 		statsp->rx_allocb_fail	= ldckp->rx_allocb_fail.value.ul;
5030 		statsp->rx_vio_allocb_fail = ldckp->rx_vio_allocb_fail.value.ul;
5031 		statsp->rx_lost_pkts	= ldckp->rx_lost_pkts.value.ul;
5032 
5033 		statsp->callbacks	= ldckp->callbacks.value.ul;
5034 		statsp->dring_data_acks	= ldckp->dring_data_acks.value.ul;
5035 		statsp->dring_stopped_acks = ldckp->dring_stopped_acks.value.ul;
5036 		statsp->dring_data_msgs	= ldckp->dring_data_msgs.value.ul;
5037 	}
5038 
5039 	return (VGEN_SUCCESS);
5040 }
5041 
5042 /* handler for error messages received from the peer ldc end-point */
5043 static void
5044 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5045 {
5046 	_NOTE(ARGUNUSED(ldcp, tagp))
5047 }
5048 
5049 /* Check if the session id in the received message is valid */
5050 static int
5051 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5052 {
5053 	if (tagp->vio_sid != ldcp->peer_sid) {
5054 		void *vnetp = LDC_TO_VNET(ldcp);
5055 		DWARN((vnetp,
5056 		    "sid mismatch: expected(%x), rcvd(%x)\n",
5057 		    ldcp->peer_sid, tagp->vio_sid));
5058 		return (VGEN_FAILURE);
5059 	}
5060 	else
5061 		return (VGEN_SUCCESS);
5062 }
5063 
5064 /* convert mac address from string to uint64_t */
5065 static uint64_t
5066 vgen_macaddr_strtoul(const uint8_t *macaddr)
5067 {
5068 	uint64_t val = 0;
5069 	int i;
5070 
5071 	for (i = 0; i < ETHERADDRL; i++) {
5072 		val <<= 8;
5073 		val |= macaddr[i];
5074 	}
5075 
5076 	return (val);
5077 }
5078 
5079 /* convert mac address from uint64_t to string */
5080 static int
5081 vgen_macaddr_ultostr(uint64_t val, uint8_t *macaddr)
5082 {
5083 	int i;
5084 	uint64_t value;
5085 
5086 	value = val;
5087 	for (i = ETHERADDRL - 1; i >= 0; i--) {
5088 		macaddr[i] = value & 0xFF;
5089 		value >>= 8;
5090 	}
5091 	return (VGEN_SUCCESS);
5092 }
5093 
5094 static caddr_t
5095 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5096 {
5097 	(void) sprintf(ebuf,
5098 		"%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5099 	return (ebuf);
5100 }
5101 
5102 /* Handshake watchdog timeout handler */
5103 static void
5104 vgen_hwatchdog(void *arg)
5105 {
5106 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5107 	void *vnetp = LDC_TO_VNET(ldcp);
5108 
5109 	DWARN((vnetp,
5110 	    "vgen_hwatchdog: handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5111 	    ldcp->ldc_id, ldcp->hphase, ldcp->hstate));
5112 
5113 	mutex_enter(&ldcp->cblock);
5114 	ldcp->htid = 0;
5115 	ldcp->need_ldc_reset = B_TRUE;
5116 	vgen_handshake_retry(ldcp);
5117 	mutex_exit(&ldcp->cblock);
5118 }
5119 
5120 static void
5121 vgen_print_hparams(vgen_hparams_t *hp)
5122 {
5123 	uint8_t	addr[6];
5124 	char	ea[6];
5125 	ldc_mem_cookie_t *dc;
5126 
5127 	cmn_err(CE_CONT, "version_info:\n");
5128 	cmn_err(CE_CONT,
5129 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5130 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5131 
5132 	(void) vgen_macaddr_ultostr(hp->addr, addr);
5133 	cmn_err(CE_CONT, "attr_info:\n");
5134 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5135 	    vgen_print_ethaddr(addr, ea));
5136 	cmn_err(CE_CONT,
5137 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5138 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5139 
5140 	dc = &hp->dring_cookie;
5141 	cmn_err(CE_CONT, "dring_info:\n");
5142 	cmn_err(CE_CONT,
5143 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5144 	cmn_err(CE_CONT,
5145 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5146 	    dc->addr, dc->size);
5147 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5148 }
5149 
5150 static void
5151 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5152 {
5153 	vgen_hparams_t *hp;
5154 
5155 	cmn_err(CE_CONT, "Channel Information:\n");
5156 	cmn_err(CE_CONT,
5157 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5158 	    ldcp->ldc_id, ldcp->ldc_status);
5159 	cmn_err(CE_CONT,
5160 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5161 	    ldcp->local_sid, ldcp->peer_sid);
5162 	cmn_err(CE_CONT,
5163 	    "\thphase: 0x%x, hstate: 0x%x\n",
5164 	    ldcp->hphase, ldcp->hstate);
5165 
5166 	cmn_err(CE_CONT, "Local handshake params:\n");
5167 	hp = &ldcp->local_hparams;
5168 	vgen_print_hparams(hp);
5169 
5170 	cmn_err(CE_CONT, "Peer handshake params:\n");
5171 	hp = &ldcp->peer_hparams;
5172 	vgen_print_hparams(hp);
5173 }
5174