xref: /titanic_51/usr/src/uts/sun4v/io/vnet_gen.c (revision c16fc6609d9aa72229802524dc1d8c4ead6e9d2a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <sys/vio_mailbox.h>
51 #include <sys/vio_common.h>
52 #include <sys/vnet_common.h>
53 #include <sys/vnet_mailbox.h>
54 #include <sys/vio_util.h>
55 #include <sys/vnet_gen.h>
56 
57 /*
58  * Implementation of the mac functionality for vnet using the
59  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
60  */
61 
62 /*
63  * Function prototypes.
64  */
65 /* vgen proxy entry points */
66 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
67 	mac_register_t **vgenmacp);
68 int vgen_uninit(void *arg);
69 static int vgen_start(void *arg);
70 static void vgen_stop(void *arg);
71 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
72 static int vgen_multicst(void *arg, boolean_t add,
73 	const uint8_t *mca);
74 static int vgen_promisc(void *arg, boolean_t on);
75 static int vgen_unicst(void *arg, const uint8_t *mca);
76 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
77 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
78 
79 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
80 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
81 void vnet_del_fdb(void *arg, uint8_t *macaddr);
82 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
83 	void *txarg, boolean_t upgrade);
84 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
85 void vnet_del_def_rte(void *arg);
86 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
87 void vnet_tx_update(void *arg);
88 
89 /* vgen internal functions */
90 static void vgen_detach_ports(vgen_t *vgenp);
91 static void vgen_port_detach(vgen_port_t *portp);
92 static void vgen_port_list_insert(vgen_port_t *portp);
93 static void vgen_port_list_remove(vgen_port_t *portp);
94 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
95 	int port_num);
96 static int vgen_mdeg_reg(vgen_t *vgenp);
97 static void vgen_mdeg_unreg(vgen_t *vgenp);
98 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
99 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
100 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
101 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
102 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
103 static void vgen_port_detach_mdeg(vgen_port_t *portp);
104 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
105 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
106 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
107 
108 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
109 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
110 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
111 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
112 static void vgen_init_ports(vgen_t *vgenp);
113 static void vgen_port_init(vgen_port_t *portp);
114 static void vgen_uninit_ports(vgen_t *vgenp);
115 static void vgen_port_uninit(vgen_port_t *portp);
116 static void vgen_init_ldcs(vgen_port_t *portp);
117 static void vgen_uninit_ldcs(vgen_port_t *portp);
118 static int vgen_ldc_init(vgen_ldc_t *ldcp);
119 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
120 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
121 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
122 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
123 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
124 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
125 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
126 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
127 static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
128 static void vgen_reclaim(vgen_ldc_t *ldcp);
129 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
130 static int vgen_num_txpending(vgen_ldc_t *ldcp);
131 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
132 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
133 static void vgen_ldc_watchdog(void *arg);
134 static int vgen_setup_kstats(vgen_ldc_t *ldcp);
135 static void vgen_destroy_kstats(vgen_ldc_t *ldcp);
136 static int vgen_kstat_update(kstat_t *ksp, int rw);
137 
138 /* vgen handshake functions */
139 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
140 static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
141 	uint16_t ver_minor);
142 static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
143 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
144 	boolean_t caller_holds_lock);
145 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
146 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
147 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
148 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
149 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
150 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
151 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
152 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
153 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
154 static void vgen_handshake(vgen_ldc_t *ldcp);
155 static int vgen_handshake_done(vgen_ldc_t *ldcp);
156 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
157 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
158 	vio_msg_tag_t *tagp);
159 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
160 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
161 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
162 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
163 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
165 	mblk_t **headp, mblk_t **tailp);
166 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
167 	uint32_t start, int32_t end, uint8_t pstate);
168 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
169 	mblk_t **headp, mblk_t **tailp);
170 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
172 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
173 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static uint64_t	vgen_macaddr_strtoul(const uint8_t *macaddr);
175 static int vgen_macaddr_ultostr(uint64_t value, uint8_t *macaddr);
176 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
177 static void vgen_hwatchdog(void *arg);
178 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
179 static void vgen_print_hparams(vgen_hparams_t *hp);
180 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
181 
182 /*
183  * The handshake process consists of 5 phases defined below, with VH_PHASE0
184  * being the pre-handshake phase and VH_DONE is the phase to indicate
185  * successful completion of all phases.
186  * Each phase may have one to several handshake states which are required
187  * to complete successfully to move to the next phase.
188  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
189  * more details.
190  */
191 /* handshake phases */
192 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
193 
194 /* handshake states */
195 enum {
196 
197 	VER_INFO_SENT	=	0x1,
198 	VER_ACK_RCVD	=	0x2,
199 	VER_INFO_RCVD	=	0x4,
200 	VER_ACK_SENT	=	0x8,
201 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
202 
203 	ATTR_INFO_SENT	=	0x10,
204 	ATTR_ACK_RCVD	=	0x20,
205 	ATTR_INFO_RCVD	=	0x40,
206 	ATTR_ACK_SENT	=	0x80,
207 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
208 
209 	DRING_INFO_SENT	=	0x100,
210 	DRING_ACK_RCVD	=	0x200,
211 	DRING_INFO_RCVD	=	0x400,
212 	DRING_ACK_SENT	=	0x800,
213 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
214 
215 	RDX_INFO_SENT	=	0x1000,
216 	RDX_ACK_RCVD	=	0x2000,
217 	RDX_INFO_RCVD	=	0x4000,
218 	RDX_ACK_SENT	=	0x8000,
219 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
220 
221 };
222 
223 #define	LDC_LOCK(ldcp)	\
224 				mutex_enter(&((ldcp)->cblock));\
225 				mutex_enter(&((ldcp)->txlock));\
226 				mutex_enter(&((ldcp)->tclock));
227 #define	LDC_UNLOCK(ldcp)	\
228 				mutex_exit(&((ldcp)->tclock));\
229 				mutex_exit(&((ldcp)->txlock));\
230 				mutex_exit(&((ldcp)->cblock));
231 
232 static struct ether_addr etherbroadcastaddr = {
233 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
234 };
235 /*
236  * MIB II broadcast/multicast packets
237  */
238 #define	IS_BROADCAST(ehp) \
239 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
240 #define	IS_MULTICAST(ehp) \
241 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
242 
243 /*
244  * Property names
245  */
246 static char macaddr_propname[] = "mac-address";
247 static char rmacaddr_propname[] = "remote-mac-address";
248 static char channel_propname[] = "channel-endpoint";
249 static char reg_propname[] = "reg";
250 static char port_propname[] = "port";
251 static char swport_propname[] = "switch-port";
252 static char id_propname[] = "id";
253 
254 /* versions supported - in decreasing order */
255 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
256 
257 /* Tunables */
258 uint32_t vgen_hwd_interval = 1000;	/* handshake watchdog freq in msec */
259 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
260 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
261 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
262 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
263 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
264 
265 #ifdef DEBUG
266 /* flags to simulate error conditions for debugging */
267 int vgen_trigger_txtimeout = 0;
268 int vgen_trigger_rxlost = 0;
269 #endif
270 
271 /* MD update matching structure */
272 static md_prop_match_t	vport_prop_match[] = {
273 	{ MDET_PROP_VAL,	"id" },
274 	{ MDET_LIST_END,	NULL }
275 };
276 
277 static mdeg_node_match_t vport_match = { "virtual-device-port",
278 					vport_prop_match };
279 
280 /* template for matching a particular vnet instance */
281 static mdeg_prop_spec_t vgen_prop_template[] = {
282 	{ MDET_PROP_STR,	"name",		"network" },
283 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
284 	{ MDET_LIST_END,	NULL,		NULL }
285 };
286 
287 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
288 
289 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
290 
291 static mac_callbacks_t vgen_m_callbacks = {
292 	0,
293 	vgen_stat,
294 	vgen_start,
295 	vgen_stop,
296 	vgen_promisc,
297 	vgen_multicst,
298 	vgen_unicst,
299 	vgen_tx,
300 	NULL,
301 	NULL,
302 	NULL
303 };
304 
305 /* externs */
306 extern uint32_t vnet_ntxds;
307 extern uint32_t vnet_ldcwd_interval;
308 extern uint32_t vnet_ldcwd_txtimeout;
309 extern uint32_t vnet_ldc_mtu;
310 extern uint32_t vnet_nrbufs;
311 extern int _vnet_dbglevel;
312 extern void _vnetdebug_printf(void *vnetp, const char *fmt, ...);
313 
314 #ifdef DEBUG
315 
316 /*
317  * NOTE: definitions below need to be in sync with those in vnet.c
318  */
319 
320 /*
321  * debug levels:
322  * DBG_LEVEL1:	Function entry/exit tracing
323  * DBG_LEVEL2:	Info messages
324  * DBG_LEVEL3:	Warning messages
325  * DBG_LEVEL4:	Error messages
326  */
327 
328 enum	{ DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
329 	    DBG_LEVEL4 = 0x08 };
330 
331 #define	DBG1(_s)	do {						\
332 			    if ((_vnet_dbglevel & DBG_LEVEL1) != 0) {	\
333 					_vnetdebug_printf _s;		\
334 			    }					\
335 			_NOTE(CONSTCOND) } while (0)
336 
337 #define	DBG2(_s)	do {						\
338 			    if ((_vnet_dbglevel & DBG_LEVEL2) != 0) {	\
339 					_vnetdebug_printf _s;		\
340 			    }					\
341 			_NOTE(CONSTCOND) } while (0)
342 
343 #define	DWARN(_s)	do {						\
344 			    if ((_vnet_dbglevel & DBG_LEVEL3) != 0) {	\
345 					_vnetdebug_printf _s;		\
346 			    }					\
347 			_NOTE(CONSTCOND) } while (0)
348 
349 #define	DERR(_s)	do {						\
350 			    if ((_vnet_dbglevel & DBG_LEVEL4) != 0) {	\
351 					_vnetdebug_printf _s;		\
352 			    }					\
353 			_NOTE(CONSTCOND) } while (0)
354 
355 #else
356 
357 #define	DBG1(_s)	if (0)	_vnetdebug_printf _s
358 #define	DBG2(_s)	if (0)	_vnetdebug_printf _s
359 #define	DWARN(_s)	if (0)	_vnetdebug_printf _s
360 #define	DERR(_s)	if (0)	_vnetdebug_printf _s
361 
362 #endif
363 
364 #ifdef DEBUG
365 
366 /* simulate handshake error conditions for debug */
367 uint32_t vgen_hdbg;
368 #define	HDBG_VERSION	0x1
369 #define	HDBG_TIMEOUT	0x2
370 #define	HDBG_BAD_SID	0x4
371 #define	HDBG_OUT_STATE	0x8
372 
373 #endif
374 
375 
376 
377 /*
378  * vgen_init() is called by an instance of vnet driver to initialize the
379  * corresponding generic proxy transport layer. The arguments passed by vnet
380  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
381  * the mac address of the vnet device, and a pointer to mac_register_t of
382  * the generic transport is returned in the last argument.
383  */
384 int
385 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
386     mac_register_t **vgenmacp)
387 {
388 	vgen_t *vgenp;
389 	mac_register_t *macp;
390 	int instance;
391 
392 	if ((vnetp == NULL) || (vnetdip == NULL))
393 		return (DDI_FAILURE);
394 
395 	instance = ddi_get_instance(vnetdip);
396 
397 	DBG1((vnetp, "vgen_init: enter vnet_instance(%d)\n", instance));
398 
399 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
400 
401 	vgenp->vnetp = vnetp;
402 	vgenp->vnetdip = vnetdip;
403 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
404 
405 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
406 		KMEM_FREE(vgenp);
407 		return (DDI_FAILURE);
408 	}
409 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
410 	macp->m_driver = vgenp;
411 	macp->m_dip = vnetdip;
412 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
413 	macp->m_callbacks = &vgen_m_callbacks;
414 	macp->m_min_sdu = 0;
415 	macp->m_max_sdu = ETHERMTU;
416 	vgenp->macp = macp;
417 
418 	/* allocate multicast table */
419 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
420 	    sizeof (struct ether_addr), KM_SLEEP);
421 	vgenp->mccount = 0;
422 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
423 
424 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
425 
426 	/* register with MD event generator */
427 	if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
428 		mutex_destroy(&vgenp->lock);
429 		kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
430 		    sizeof (struct ether_addr));
431 		mac_free(vgenp->macp);
432 		KMEM_FREE(vgenp);
433 		return (DDI_FAILURE);
434 	}
435 
436 	/* register macp of this vgen_t with vnet */
437 	*vgenmacp = vgenp->macp;
438 
439 	DBG1((vnetp, "vgen_init: exit vnet_instance(%d)\n", instance));
440 	return (DDI_SUCCESS);
441 }
442 
443 /*
444  * Called by vnet to undo the initializations done by vgen_init().
445  * The handle provided by generic transport during vgen_init() is the argument.
446  */
447 int
448 vgen_uninit(void *arg)
449 {
450 	vgen_t	*vgenp = (vgen_t *)arg;
451 	void	*vnetp;
452 	int instance;
453 	vio_mblk_pool_t *rp, *nrp;
454 
455 	if (vgenp == NULL) {
456 		return (DDI_FAILURE);
457 	}
458 
459 	instance = ddi_get_instance(vgenp->vnetdip);
460 	vnetp = vgenp->vnetp;
461 
462 	DBG1((vnetp, "vgen_uninit: enter vnet_instance(%d)\n", instance));
463 
464 	/* unregister with MD event generator */
465 	vgen_mdeg_unreg(vgenp);
466 
467 	mutex_enter(&vgenp->lock);
468 
469 	/* detach all ports from the device */
470 	vgen_detach_ports(vgenp);
471 
472 	/*
473 	 * free any pending rx mblk pools,
474 	 * that couldn't be freed previously during channel detach.
475 	 */
476 	rp = vgenp->rmp;
477 	while (rp != NULL) {
478 		nrp = vgenp->rmp = rp->nextp;
479 		if (vio_destroy_mblks(rp)) {
480 			vgenp->rmp = rp;
481 			mutex_exit(&vgenp->lock);
482 			return (DDI_FAILURE);
483 		}
484 		rp = nrp;
485 	}
486 
487 	/* free multicast table */
488 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
489 
490 	mac_free(vgenp->macp);
491 
492 	mutex_exit(&vgenp->lock);
493 
494 	mutex_destroy(&vgenp->lock);
495 
496 	KMEM_FREE(vgenp);
497 
498 	DBG1((vnetp, "vgen_uninit: exit vnet_instance(%d)\n", instance));
499 
500 	return (DDI_SUCCESS);
501 }
502 
503 /* enable transmit/receive for the device */
504 int
505 vgen_start(void *arg)
506 {
507 	vgen_t		*vgenp = (vgen_t *)arg;
508 
509 	DBG1((vgenp->vnetp, "vgen_start: enter\n"));
510 
511 	mutex_enter(&vgenp->lock);
512 	vgen_init_ports(vgenp);
513 	vgenp->flags |= VGEN_STARTED;
514 	mutex_exit(&vgenp->lock);
515 
516 	DBG1((vgenp->vnetp, "vgen_start: exit\n"));
517 	return (DDI_SUCCESS);
518 }
519 
520 /* stop transmit/receive */
521 void
522 vgen_stop(void *arg)
523 {
524 	vgen_t		*vgenp = (vgen_t *)arg;
525 
526 	DBG1((vgenp->vnetp, "vgen_stop: enter\n"));
527 
528 	mutex_enter(&vgenp->lock);
529 	vgen_uninit_ports(vgenp);
530 	vgenp->flags &= ~(VGEN_STARTED);
531 	mutex_exit(&vgenp->lock);
532 
533 	DBG1((vgenp->vnetp, "vgen_stop: exit\n"));
534 }
535 
536 /* vgen transmit function */
537 static mblk_t *
538 vgen_tx(void *arg, mblk_t *mp)
539 {
540 	vgen_port_t *portp;
541 	int status;
542 
543 	portp = (vgen_port_t *)arg;
544 	status = vgen_portsend(portp, mp);
545 	if (status != VGEN_SUCCESS) {
546 		/* failure */
547 		return (mp);
548 	}
549 	/* success */
550 	return (NULL);
551 }
552 
553 /* transmit packets over the given port */
554 static int
555 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
556 {
557 	vgen_ldclist_t	*ldclp;
558 	vgen_ldc_t *ldcp;
559 	int status;
560 
561 	ldclp = &portp->ldclist;
562 	READ_ENTER(&ldclp->rwlock);
563 	/*
564 	 * NOTE: for now, we will assume we have a single channel.
565 	 */
566 	if (ldclp->headp == NULL) {
567 		RW_EXIT(&ldclp->rwlock);
568 		return (VGEN_FAILURE);
569 	}
570 	ldcp = ldclp->headp;
571 
572 	if (ldcp->need_resched) {
573 		/* out of tx resources, see vgen_ldcsend() for details. */
574 		mutex_enter(&ldcp->txlock);
575 		ldcp->statsp->tx_no_desc++;
576 		mutex_exit(&ldcp->txlock);
577 
578 		RW_EXIT(&ldclp->rwlock);
579 		return (VGEN_FAILURE);
580 	}
581 
582 	status  = vgen_ldcsend(ldcp, mp);
583 	RW_EXIT(&ldclp->rwlock);
584 
585 	if (status != VGEN_TX_SUCCESS)
586 		return (VGEN_FAILURE);
587 
588 	return (VGEN_SUCCESS);
589 }
590 
591 /* channel transmit function */
592 static int
593 vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
594 {
595 	void		*vnetp;
596 	size_t		size;
597 	int		rv = 0;
598 	uint64_t	tbuf_ix;
599 	vgen_private_desc_t	*tbufp;
600 	vgen_private_desc_t	*ntbufp;
601 	vnet_public_desc_t	*txdp;
602 	vio_dring_entry_hdr_t		*hdrp;
603 	vgen_stats_t		*statsp;
604 	struct ether_header	*ehp;
605 	boolean_t	is_bcast = B_FALSE;
606 	boolean_t	is_mcast = B_FALSE;
607 	size_t		mblksz;
608 	caddr_t		dst;
609 	mblk_t		*bp;
610 	ldc_status_t	istatus;
611 
612 	vnetp = LDC_TO_VNET(ldcp);
613 	statsp = ldcp->statsp;
614 	size = msgsize(mp);
615 
616 	DBG1((vnetp, "vgen_ldcsend: enter ldcid(%lx)\n", ldcp->ldc_id));
617 
618 	mutex_enter(&ldcp->txlock);
619 
620 	/* drop the packet if ldc is not up or handshake is not done */
621 	if (ldcp->ldc_status != LDC_UP) {
622 		DWARN((vnetp,
623 		    "vgen_ldcsend: id(%lx) status(%d), dropping packet\n",
624 		    ldcp->ldc_id, ldcp->ldc_status));
625 		/* retry ldc_up() if needed */
626 		if (ldcp->flags & CHANNEL_STARTED)
627 			(void) ldc_up(ldcp->ldc_handle);
628 		goto vgen_tx_exit;
629 	}
630 
631 	if (ldcp->hphase != VH_DONE) {
632 		DWARN((vnetp,
633 		    "vgen_ldcsend: id(%lx) hphase(%x), dropping packet\n",
634 		    ldcp->ldc_id, ldcp->hphase));
635 		goto vgen_tx_exit;
636 	}
637 
638 	if (size > (size_t)ETHERMAX) {
639 		DWARN((vnetp, "vgen_ldcsend: id(%lx) invalid size(%d)\n",
640 		    ldcp->ldc_id, size));
641 		goto vgen_tx_exit;
642 	}
643 
644 	/*
645 	 * allocate a descriptor
646 	 */
647 	tbufp = ldcp->next_tbufp;
648 	ntbufp = NEXTTBUF(ldcp, tbufp);
649 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
650 
651 		mutex_enter(&ldcp->tclock);
652 		if (ntbufp == ldcp->cur_tbufp) {
653 			ldcp->need_resched = B_TRUE;
654 			mutex_exit(&ldcp->tclock);
655 
656 			statsp->tx_no_desc++;
657 			mutex_exit(&ldcp->txlock);
658 
659 			return (VGEN_TX_NORESOURCES);
660 		}
661 		mutex_exit(&ldcp->tclock);
662 	}
663 
664 	if (size < ETHERMIN)
665 		size = ETHERMIN;
666 
667 	/* copy data into pre-allocated transmit buffer */
668 	dst = tbufp->datap + VNET_IPALIGN;
669 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
670 		mblksz = MBLKL(bp);
671 		bcopy(bp->b_rptr, dst, mblksz);
672 		dst += mblksz;
673 	}
674 
675 	tbuf_ix = tbufp - ldcp->tbufp;
676 
677 	ehp = (struct ether_header *)tbufp->datap;
678 	is_bcast = IS_BROADCAST(ehp);
679 	is_mcast = IS_MULTICAST(ehp);
680 
681 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
682 	tbufp->datalen = size;
683 
684 	/* initialize the corresponding public descriptor (txd) */
685 	txdp = tbufp->descp;
686 	hdrp = &txdp->hdr;
687 	txdp->nbytes = size;
688 	txdp->ncookies = tbufp->ncookies;
689 	bcopy((tbufp->memcookie), (txdp->memcookie),
690 		tbufp->ncookies * sizeof (ldc_mem_cookie_t));
691 	hdrp->dstate = VIO_DESC_READY;
692 
693 	/* send dring datamsg to the peer */
694 	if (ldcp->resched_peer) {
695 		rv = vgen_send_dring_data(ldcp, (uint32_t)tbuf_ix, -1);
696 		if (rv != 0) {
697 			/* vgen_send_dring_data() error: drop the packet */
698 			DWARN((vnetp,
699 			    "vgen_ldcsend: vgen_send_dring_data():  failed: "
700 			    "id(%lx) rv(%d) len (%d)\n",
701 			    ldcp->ldc_id, rv, size));
702 			tbufp->flags = VGEN_PRIV_DESC_FREE;	/* free tbuf */
703 			hdrp->dstate = VIO_DESC_FREE;	/* free txd */
704 			hdrp->ack = B_FALSE;
705 			statsp->oerrors++;
706 			goto vgen_tx_exit;
707 		}
708 		ldcp->resched_peer = B_FALSE;
709 	}
710 
711 	/* update next available tbuf in the ring */
712 	ldcp->next_tbufp = ntbufp;
713 
714 	/* update tx index */
715 	INCR_TXI(ldcp->next_txi, ldcp);
716 
717 	/* update stats */
718 	statsp->opackets++;
719 	statsp->obytes += size;
720 	if (is_bcast)
721 		statsp->brdcstxmt++;
722 	else if (is_mcast)
723 		statsp->multixmt++;
724 
725 vgen_tx_exit:
726 	mutex_exit(&ldcp->txlock);
727 
728 	if (rv == ECONNRESET) {
729 		/*
730 		 * Check if either callback thread or another tx thread is
731 		 * already running. Calling mutex_enter() will result in a
732 		 * deadlock if the other thread already holds cblock and is
733 		 * blocked in vnet_modify_fdb() (which is called from
734 		 * vgen_handle_evt_reset()) waiting for write access on rwlock,
735 		 * as this transmit thread already holds that lock as a reader
736 		 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
737 		 */
738 		if (mutex_tryenter(&ldcp->cblock)) {
739 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
740 				DWARN((vnetp,
741 				    "vgen_ldcsend: ldc_status err id(%lx)\n"));
742 			} else {
743 				ldcp->ldc_status = istatus;
744 			}
745 			if (ldcp->ldc_status != LDC_UP) {
746 				/*
747 				 * Second arg is TRUE, as we know that
748 				 * the caller of this function - vnet_m_tx(),
749 				 * already holds fdb-rwlock as a reader.
750 				 */
751 				vgen_handle_evt_reset(ldcp, B_TRUE);
752 			}
753 			mutex_exit(&ldcp->cblock);
754 		}
755 	}
756 
757 	DBG1((vnetp, "vgen_ldcsend: exit: ldcid (%lx)\n", ldcp->ldc_id));
758 
759 	freemsg(mp);
760 	return (VGEN_TX_SUCCESS);
761 }
762 
763 /* enable/disable a multicast address */
764 int
765 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
766 {
767 	vgen_t			*vgenp;
768 	vnet_mcast_msg_t	mcastmsg;
769 	vio_msg_tag_t		*tagp;
770 	vgen_port_t		*portp;
771 	vgen_portlist_t		*plistp;
772 	vgen_ldc_t		*ldcp;
773 	vgen_ldclist_t		*ldclp;
774 	void			*vnetp;
775 	struct ether_addr	*addrp;
776 	int			rv;
777 	uint32_t		i;
778 
779 	vgenp = (vgen_t *)arg;
780 	vnetp = vgenp->vnetp;
781 	addrp = (struct ether_addr *)mca;
782 	tagp = &mcastmsg.tag;
783 	bzero(&mcastmsg, sizeof (mcastmsg));
784 
785 	mutex_enter(&vgenp->lock);
786 
787 	plistp = &(vgenp->vgenports);
788 
789 	READ_ENTER(&plistp->rwlock);
790 
791 	portp = vgenp->vsw_portp;
792 	if (portp == NULL) {
793 		RW_EXIT(&plistp->rwlock);
794 		goto vgen_mcast_exit;
795 	}
796 	ldclp = &portp->ldclist;
797 
798 	READ_ENTER(&ldclp->rwlock);
799 
800 	ldcp = ldclp->headp;
801 	if (ldcp == NULL) {
802 		RW_EXIT(&ldclp->rwlock);
803 		RW_EXIT(&plistp->rwlock);
804 		goto vgen_mcast_exit;
805 	}
806 
807 	mutex_enter(&ldcp->cblock);
808 
809 	if (ldcp->hphase == VH_DONE) {
810 		/*
811 		 * If handshake is done, send a msg to vsw to add/remove
812 		 * the multicast address.
813 		 */
814 		tagp->vio_msgtype = VIO_TYPE_CTRL;
815 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
816 		tagp->vio_subtype_env = VNET_MCAST_INFO;
817 		tagp->vio_sid = ldcp->local_sid;
818 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
819 		mcastmsg.set = add;
820 		mcastmsg.count = 1;
821 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
822 		    B_FALSE);
823 		if (rv != VGEN_SUCCESS) {
824 			DWARN((vnetp, "vgen_mutlicst: vgen_sendmsg failed"
825 			    "id (%lx)\n", ldcp->ldc_id));
826 		}
827 	} else {
828 		/* set the flag to send a msg to vsw after handshake is done */
829 		ldcp->need_mcast_sync = B_TRUE;
830 	}
831 
832 	mutex_exit(&ldcp->cblock);
833 
834 	if (add) {
835 
836 		/* expand multicast table if necessary */
837 		if (vgenp->mccount >= vgenp->mcsize) {
838 			struct ether_addr	*newtab;
839 			uint32_t		newsize;
840 
841 
842 			newsize = vgenp->mcsize * 2;
843 
844 			newtab = kmem_zalloc(newsize *
845 			    sizeof (struct ether_addr), KM_NOSLEEP);
846 
847 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
848 			    sizeof (struct ether_addr));
849 			kmem_free(vgenp->mctab,
850 			    vgenp->mcsize * sizeof (struct ether_addr));
851 
852 			vgenp->mctab = newtab;
853 			vgenp->mcsize = newsize;
854 		}
855 
856 		/* add address to the table */
857 		vgenp->mctab[vgenp->mccount++] = *addrp;
858 
859 	} else {
860 
861 		/* delete address from the table */
862 		for (i = 0; i < vgenp->mccount; i++) {
863 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
864 
865 				/*
866 				 * If there's more than one address in this
867 				 * table, delete the unwanted one by moving
868 				 * the last one in the list over top of it;
869 				 * otherwise, just remove it.
870 				 */
871 				if (vgenp->mccount > 1) {
872 					vgenp->mctab[i] =
873 						vgenp->mctab[vgenp->mccount-1];
874 				}
875 				vgenp->mccount--;
876 				break;
877 			}
878 		}
879 	}
880 
881 	RW_EXIT(&ldclp->rwlock);
882 	RW_EXIT(&plistp->rwlock);
883 
884 vgen_mcast_exit:
885 	mutex_exit(&vgenp->lock);
886 	return (DDI_SUCCESS);
887 }
888 
889 /* set or clear promiscuous mode on the device */
890 static int
891 vgen_promisc(void *arg, boolean_t on)
892 {
893 	_NOTE(ARGUNUSED(arg, on))
894 	return (DDI_SUCCESS);
895 }
896 
897 /* set the unicast mac address of the device */
898 static int
899 vgen_unicst(void *arg, const uint8_t *mca)
900 {
901 	_NOTE(ARGUNUSED(arg, mca))
902 	return (DDI_SUCCESS);
903 }
904 
905 /* get device statistics */
906 int
907 vgen_stat(void *arg, uint_t stat, uint64_t *val)
908 {
909 	vgen_t		*vgenp = (vgen_t *)arg;
910 	vgen_port_t	*portp;
911 	vgen_portlist_t	*plistp;
912 
913 	*val = 0;
914 
915 	plistp = &(vgenp->vgenports);
916 	READ_ENTER(&plistp->rwlock);
917 
918 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
919 		*val += vgen_port_stat(portp, stat);
920 	}
921 
922 	RW_EXIT(&plistp->rwlock);
923 
924 	return (0);
925 }
926 
927 static void
928 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
929 {
930 	 _NOTE(ARGUNUSED(arg, wq, mp))
931 }
932 
933 /* vgen internal functions */
934 /* detach all ports from the device */
935 static void
936 vgen_detach_ports(vgen_t *vgenp)
937 {
938 	vgen_port_t	*portp;
939 	vgen_portlist_t	*plistp;
940 
941 	plistp = &(vgenp->vgenports);
942 	WRITE_ENTER(&plistp->rwlock);
943 
944 	while ((portp = plistp->headp) != NULL) {
945 		vgen_port_detach(portp);
946 	}
947 
948 	RW_EXIT(&plistp->rwlock);
949 }
950 
951 /*
952  * detach the given port.
953  */
954 static void
955 vgen_port_detach(vgen_port_t *portp)
956 {
957 	vgen_t		*vgenp;
958 	vgen_ldclist_t	*ldclp;
959 	int		port_num;
960 
961 	vgenp = portp->vgenp;
962 	port_num = portp->port_num;
963 
964 	DBG1((vgenp->vnetp,
965 	    "vgen_port_detach: enter: port_num(%d)\n", port_num));
966 
967 	/* remove it from port list */
968 	vgen_port_list_remove(portp);
969 
970 	/* detach channels from this port */
971 	ldclp = &portp->ldclist;
972 	WRITE_ENTER(&ldclp->rwlock);
973 	while (ldclp->headp) {
974 		vgen_ldc_detach(ldclp->headp);
975 	}
976 	RW_EXIT(&ldclp->rwlock);
977 
978 	if (vgenp->vsw_portp == portp) {
979 		vgenp->vsw_portp = NULL;
980 	}
981 	KMEM_FREE(portp);
982 
983 	DBG1((vgenp->vnetp,
984 	    "vgen_port_detach: exit: port_num(%d)\n", port_num));
985 }
986 
987 /* add a port to port list */
988 static void
989 vgen_port_list_insert(vgen_port_t *portp)
990 {
991 	vgen_portlist_t *plistp;
992 	vgen_t *vgenp;
993 
994 	vgenp = portp->vgenp;
995 	plistp = &(vgenp->vgenports);
996 
997 	if (plistp->headp == NULL) {
998 		plistp->headp = portp;
999 	} else {
1000 		plistp->tailp->nextp = portp;
1001 	}
1002 	plistp->tailp = portp;
1003 	portp->nextp = NULL;
1004 }
1005 
1006 /* remove a port from port list */
1007 static void
1008 vgen_port_list_remove(vgen_port_t *portp)
1009 {
1010 	vgen_port_t *prevp;
1011 	vgen_port_t *nextp;
1012 	vgen_portlist_t *plistp;
1013 	vgen_t *vgenp;
1014 
1015 	vgenp = portp->vgenp;
1016 
1017 	plistp = &(vgenp->vgenports);
1018 
1019 	if (plistp->headp == NULL)
1020 		return;
1021 
1022 	if (portp == plistp->headp) {
1023 		plistp->headp = portp->nextp;
1024 		if (portp == plistp->tailp)
1025 			plistp->tailp = plistp->headp;
1026 	} else {
1027 		for (prevp = plistp->headp; ((nextp = prevp->nextp) != NULL) &&
1028 		    (nextp != portp); prevp = nextp);
1029 		if (nextp == portp) {
1030 			prevp->nextp = portp->nextp;
1031 		}
1032 		if (portp == plistp->tailp)
1033 			plistp->tailp = prevp;
1034 	}
1035 }
1036 
1037 /* lookup a port in the list based on port_num */
1038 static vgen_port_t *
1039 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1040 {
1041 	vgen_port_t *portp = NULL;
1042 
1043 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1044 		if (portp->port_num == port_num) {
1045 			break;
1046 		}
1047 	}
1048 
1049 	return (portp);
1050 }
1051 
1052 /* enable ports for transmit/receive */
1053 static void
1054 vgen_init_ports(vgen_t *vgenp)
1055 {
1056 	vgen_port_t	*portp;
1057 	vgen_portlist_t	*plistp;
1058 
1059 	plistp = &(vgenp->vgenports);
1060 	READ_ENTER(&plistp->rwlock);
1061 
1062 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1063 		vgen_port_init(portp);
1064 	}
1065 
1066 	RW_EXIT(&plistp->rwlock);
1067 }
1068 
1069 static void
1070 vgen_port_init(vgen_port_t *portp)
1071 {
1072 	vgen_t *vgenp;
1073 
1074 	vgenp = portp->vgenp;
1075 	/*
1076 	 * Create fdb entry in vnet, corresponding to the mac
1077 	 * address of this port. Note that the port specified
1078 	 * is vsw-port. This is done so that vsw-port acts
1079 	 * as the route to reach this macaddr, until the
1080 	 * channel for this port comes up (LDC_UP) and
1081 	 * handshake is done successfully.
1082 	 * eg, if the peer is OBP-vnet, it may not bring the
1083 	 * channel up for this port and may communicate via
1084 	 * vsw to reach this port.
1085 	 * Later, when Solaris-vnet comes up at the other end
1086 	 * of the channel for this port and brings up the channel,
1087 	 * it is an indication that peer vnet is capable of
1088 	 * distributed switching, so the direct route through this
1089 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1090 	 */
1091 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1092 	    vgen_tx, vgenp->vsw_portp);
1093 
1094 	if (portp == vgenp->vsw_portp) {
1095 		/*
1096 		 * create the default route entry in vnet's fdb.
1097 		 * This is the entry used by vnet to reach
1098 		 * unknown destinations, which basically goes
1099 		 * through vsw on domain0 and out through the
1100 		 * physical device bound to vsw.
1101 		 */
1102 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1103 	}
1104 
1105 	/* Bring up the channels of this port */
1106 	vgen_init_ldcs(portp);
1107 }
1108 
1109 /* disable transmit/receive on ports */
1110 static void
1111 vgen_uninit_ports(vgen_t *vgenp)
1112 {
1113 	vgen_port_t	*portp;
1114 	vgen_portlist_t	*plistp;
1115 
1116 	plistp = &(vgenp->vgenports);
1117 	READ_ENTER(&plistp->rwlock);
1118 
1119 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1120 		vgen_port_uninit(portp);
1121 	}
1122 
1123 	RW_EXIT(&plistp->rwlock);
1124 }
1125 
1126 static void
1127 vgen_port_uninit(vgen_port_t *portp)
1128 {
1129 	vgen_t *vgenp;
1130 
1131 	vgenp = portp->vgenp;
1132 
1133 	vgen_uninit_ldcs(portp);
1134 	/* delete the entry in vnet's fdb for this port */
1135 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1136 	if (portp == vgenp->vsw_portp) {
1137 		/*
1138 		 * if this is vsw-port, then delete the default
1139 		 * route entry in vnet's fdb.
1140 		 */
1141 		vnet_del_def_rte(vgenp->vnetp);
1142 	}
1143 }
1144 
1145 /* register with MD event generator */
1146 static int
1147 vgen_mdeg_reg(vgen_t *vgenp)
1148 {
1149 	mdeg_prop_spec_t	*pspecp;
1150 	mdeg_node_spec_t	*parentp;
1151 	uint_t			templatesz;
1152 	int			rv;
1153 	mdeg_handle_t		hdl;
1154 	int			i;
1155 	void			*vnetp = vgenp->vnetp;
1156 
1157 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1158 			DDI_PROP_DONTPASS, reg_propname, -1);
1159 	if (i == -1) {
1160 		return (DDI_FAILURE);
1161 	}
1162 	templatesz = sizeof (vgen_prop_template);
1163 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1164 	if (pspecp == NULL) {
1165 		return (DDI_FAILURE);
1166 	}
1167 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1168 	if (parentp == NULL) {
1169 		kmem_free(pspecp, templatesz);
1170 		return (DDI_FAILURE);
1171 	}
1172 
1173 	bcopy(vgen_prop_template, pspecp, templatesz);
1174 
1175 	/*
1176 	 * NOTE: The instance here refers to the value of "reg" property and
1177 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1178 	 */
1179 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1180 
1181 	parentp->namep = "virtual-device";
1182 	parentp->specp = pspecp;
1183 
1184 	/* save parentp in vgen_t */
1185 	vgenp->mdeg_parentp = parentp;
1186 
1187 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1188 	if (rv != MDEG_SUCCESS) {
1189 		DERR((vnetp, "vgen_mdeg_reg: mdeg_register failed\n"));
1190 		KMEM_FREE(parentp);
1191 		kmem_free(pspecp, templatesz);
1192 		vgenp->mdeg_parentp = NULL;
1193 		return (DDI_FAILURE);
1194 	}
1195 
1196 	/* save mdeg handle in vgen_t */
1197 	vgenp->mdeg_hdl = hdl;
1198 
1199 	return (DDI_SUCCESS);
1200 }
1201 
1202 /* unregister with MD event generator */
1203 static void
1204 vgen_mdeg_unreg(vgen_t *vgenp)
1205 {
1206 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1207 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1208 	KMEM_FREE(vgenp->mdeg_parentp);
1209 	vgenp->mdeg_parentp = NULL;
1210 	vgenp->mdeg_hdl = NULL;
1211 }
1212 
1213 /* callback function registered with MD event generator */
1214 static int
1215 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1216 {
1217 	int idx;
1218 	int vsw_idx = -1;
1219 	uint64_t val;
1220 	vgen_t *vgenp;
1221 
1222 	if ((resp == NULL) || (cb_argp == NULL)) {
1223 		return (MDEG_FAILURE);
1224 	}
1225 
1226 	vgenp = (vgen_t *)cb_argp;
1227 	DBG1((vgenp->vnetp, "vgen_mdeg_cb: enter\n"));
1228 
1229 	mutex_enter(&vgenp->lock);
1230 
1231 	DBG1((vgenp->vnetp,
1232 	    "vgen_mdeg_cb: ports: removed(%x), added(%x), updated(%x)\n",
1233 	    resp->removed.nelem, resp->added.nelem, resp->match_curr.nelem));
1234 
1235 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1236 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1237 		    resp->removed.mdep[idx]);
1238 	}
1239 
1240 	if (vgenp->vsw_portp == NULL) {
1241 		/*
1242 		 * find vsw_port and add it first, because other ports need
1243 		 * this when adding fdb entry (see vgen_port_init()).
1244 		 */
1245 		for (idx = 0; idx < resp->added.nelem; idx++) {
1246 			if (!(md_get_prop_val(resp->added.mdp,
1247 			    resp->added.mdep[idx], swport_propname, &val))) {
1248 				if (val == 0) {
1249 					/*
1250 					 * This port is connected to the
1251 					 * vsw on dom0.
1252 					 */
1253 					vsw_idx = idx;
1254 					(void) vgen_add_port(vgenp,
1255 					    resp->added.mdp,
1256 					    resp->added.mdep[idx]);
1257 					break;
1258 				}
1259 			}
1260 		}
1261 		if (vsw_idx == -1) {
1262 			DWARN((vgenp->vnetp, "vgen_mdeg_cb: "
1263 			    "can't find vsw_port\n"));
1264 			return (MDEG_FAILURE);
1265 		}
1266 	}
1267 
1268 	for (idx = 0; idx < resp->added.nelem; idx++) {
1269 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1270 			continue;
1271 		(void) vgen_add_port(vgenp, resp->added.mdp,
1272 		    resp->added.mdep[idx]);
1273 	}
1274 
1275 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1276 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1277 		    resp->match_curr.mdep[idx],
1278 		    resp->match_prev.mdp,
1279 		    resp->match_prev.mdep[idx]);
1280 	}
1281 
1282 	mutex_exit(&vgenp->lock);
1283 	DBG1((vgenp->vnetp, "vgen_mdeg_cb: exit\n"));
1284 	return (MDEG_SUCCESS);
1285 }
1286 
1287 /* add a new port to the device */
1288 static int
1289 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1290 {
1291 	uint64_t	port_num;
1292 	uint64_t	*ldc_ids;
1293 	uint64_t	macaddr;
1294 	uint64_t	val;
1295 	int		num_ldcs;
1296 	int		vsw_port = B_FALSE;
1297 	int		i;
1298 	int		addrsz;
1299 	int		num_nodes = 0;
1300 	int		listsz = 0;
1301 	mde_cookie_t	*listp = NULL;
1302 	uint8_t		*addrp;
1303 	struct ether_addr	ea;
1304 
1305 	/* read "id" property to get the port number */
1306 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1307 		DWARN((vgenp->vnetp,
1308 		    "vgen_add_port: prop(%s) not found\n", id_propname));
1309 		return (DDI_FAILURE);
1310 	}
1311 
1312 	/*
1313 	 * Find the channel endpoint node(s) under this port node.
1314 	 */
1315 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1316 		DWARN((vgenp->vnetp,
1317 		    "vgen_add_port: invalid number of nodes found (%d)",
1318 		    num_nodes));
1319 		return (DDI_FAILURE);
1320 	}
1321 
1322 	/* allocate space for node list */
1323 	listsz = num_nodes * sizeof (mde_cookie_t);
1324 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1325 	if (listp == NULL)
1326 		return (DDI_FAILURE);
1327 
1328 	num_ldcs = md_scan_dag(mdp, mdex,
1329 		md_find_name(mdp, channel_propname),
1330 		md_find_name(mdp, "fwd"), listp);
1331 
1332 	if (num_ldcs <= 0) {
1333 		DWARN((vgenp->vnetp,
1334 		    "vgen_add_port: can't find %s nodes", channel_propname));
1335 		kmem_free(listp, listsz);
1336 		return (DDI_FAILURE);
1337 	}
1338 
1339 	DBG2((vgenp->vnetp, "vgen_add_port: num_ldcs %d", num_ldcs));
1340 
1341 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1342 	if (ldc_ids == NULL) {
1343 		kmem_free(listp, listsz);
1344 		return (DDI_FAILURE);
1345 	}
1346 
1347 	for (i = 0; i < num_ldcs; i++) {
1348 		/* read channel ids */
1349 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1350 			DWARN((vgenp->vnetp,
1351 			    "vgen_add_port: prop(%s) not found\n",
1352 			    id_propname));
1353 			kmem_free(listp, listsz);
1354 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1355 			return (DDI_FAILURE);
1356 		}
1357 		DBG2((vgenp->vnetp, "vgen_add_port: ldc_id 0x%llx",
1358 		    ldc_ids[i]));
1359 	}
1360 
1361 	kmem_free(listp, listsz);
1362 
1363 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1364 	    &addrsz)) {
1365 		DWARN((vgenp->vnetp,
1366 		    "vgen_add_port: prop(%s) not found\n", rmacaddr_propname));
1367 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1368 		return (DDI_FAILURE);
1369 	}
1370 
1371 	if (addrsz < ETHERADDRL) {
1372 		DWARN((vgenp->vnetp,
1373 		    "vgen_add_port: invalid address size (%d)\n", addrsz));
1374 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1375 		return (DDI_FAILURE);
1376 	}
1377 
1378 	macaddr = *((uint64_t *)addrp);
1379 
1380 	DBG2((vgenp->vnetp, "vgen_add_port: remote mac address 0x%llx\n",
1381 	    macaddr));
1382 
1383 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1384 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1385 		macaddr >>= 8;
1386 	}
1387 
1388 	if (vgenp->vsw_portp == NULL) {
1389 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1390 			if (val == 0) {
1391 				/* This port is connected to the vsw on dom0 */
1392 				vsw_port = B_TRUE;
1393 			}
1394 		}
1395 	}
1396 	(void) vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1397 	    &ea, vsw_port);
1398 
1399 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1400 
1401 	return (DDI_SUCCESS);
1402 }
1403 
1404 /* remove a port from the device */
1405 static int
1406 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1407 {
1408 	uint64_t	port_num;
1409 	vgen_port_t	*portp;
1410 	vgen_portlist_t	*plistp;
1411 
1412 	/* read "id" property to get the port number */
1413 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1414 		DWARN((vgenp->vnetp,
1415 		    "vgen_remove_port: prop(%s) not found\n", id_propname));
1416 		return (DDI_FAILURE);
1417 	}
1418 
1419 	plistp = &(vgenp->vgenports);
1420 
1421 	WRITE_ENTER(&plistp->rwlock);
1422 	portp = vgen_port_lookup(plistp, (int)port_num);
1423 	if (portp == NULL) {
1424 		DWARN((vgenp->vnetp, "vgen_remove_port: can't find port(%lx)\n",
1425 		    port_num));
1426 		RW_EXIT(&plistp->rwlock);
1427 		return (DDI_FAILURE);
1428 	}
1429 
1430 	vgen_port_detach_mdeg(portp);
1431 	RW_EXIT(&plistp->rwlock);
1432 
1433 	return (DDI_SUCCESS);
1434 }
1435 
1436 /* attach a port to the device based on mdeg data */
1437 static int
1438 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1439 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1440 {
1441 	vgen_port_t		*portp;
1442 	vgen_portlist_t		*plistp;
1443 	int			i;
1444 
1445 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1446 	if (portp == NULL) {
1447 		return (DDI_FAILURE);
1448 	}
1449 	portp->vgenp = vgenp;
1450 	portp->port_num = port_num;
1451 
1452 	DBG1((vgenp->vnetp,
1453 	    "vgen_port_attach_mdeg: port_num(%d)\n", portp->port_num));
1454 
1455 	portp->ldclist.num_ldcs = 0;
1456 	portp->ldclist.headp = NULL;
1457 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1458 
1459 	ether_copy(macaddr, &portp->macaddr);
1460 	for (i = 0; i < num_ids; i++) {
1461 		DBG2((vgenp->vnetp, "vgen_port_attach_mdeg: ldcid (%lx)\n",
1462 		    ldcids[i]));
1463 		(void) vgen_ldc_attach(portp, ldcids[i]);
1464 	}
1465 
1466 	/* link it into the list of ports */
1467 	plistp = &(vgenp->vgenports);
1468 	WRITE_ENTER(&plistp->rwlock);
1469 	vgen_port_list_insert(portp);
1470 	RW_EXIT(&plistp->rwlock);
1471 
1472 	/* This port is connected to the vsw on domain0 */
1473 	if (vsw_port)
1474 		vgenp->vsw_portp = portp;
1475 
1476 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1477 		vgen_port_init(portp);
1478 	}
1479 
1480 	DBG1((vgenp->vnetp,
1481 	    "vgen_port_attach_mdeg: exit: port_num(%d)\n", portp->port_num));
1482 	return (DDI_SUCCESS);
1483 }
1484 
1485 /* detach a port from the device based on mdeg data */
1486 static void
1487 vgen_port_detach_mdeg(vgen_port_t *portp)
1488 {
1489 	vgen_t *vgenp = portp->vgenp;
1490 
1491 	DBG1((vgenp->vnetp,
1492 	    "vgen_port_detach_mdeg: enter: port_num(%d)\n", portp->port_num));
1493 	/* stop the port if needed */
1494 	if (vgenp->flags & VGEN_STARTED) {
1495 		vgen_port_uninit(portp);
1496 	}
1497 	vgen_port_detach(portp);
1498 
1499 	DBG1((vgenp->vnetp,
1500 	    "vgen_port_detach_mdeg: exit: port_num(%d)\n", portp->port_num));
1501 }
1502 
1503 static int
1504 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1505 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1506 {
1507 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1508 
1509 	/* NOTE: TBD */
1510 	return (DDI_SUCCESS);
1511 }
1512 
1513 static uint64_t
1514 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1515 {
1516 	vgen_ldclist_t	*ldclp;
1517 	vgen_ldc_t *ldcp;
1518 	uint64_t	val;
1519 
1520 	val = 0;
1521 	ldclp = &portp->ldclist;
1522 
1523 	READ_ENTER(&ldclp->rwlock);
1524 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1525 		val += vgen_ldc_stat(ldcp, stat);
1526 	}
1527 	RW_EXIT(&ldclp->rwlock);
1528 
1529 	return (val);
1530 }
1531 
1532 /* attach the channel corresponding to the given ldc_id to the port */
1533 static int
1534 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1535 {
1536 	vgen_t 		*vgenp;
1537 	vgen_ldclist_t	*ldclp;
1538 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1539 	ldc_attr_t 	attr;
1540 	int 		status;
1541 	ldc_status_t	istatus;
1542 	enum		{AST_init = 0x0, AST_ldc_alloc = 0x1,
1543 			AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1544 			AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1545 			AST_create_rxmblks = 0x20}
1546 			attach_state;
1547 
1548 	attach_state = AST_init;
1549 	vgenp = portp->vgenp;
1550 	ldclp = &portp->ldclist;
1551 
1552 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1553 	if (ldcp == NULL) {
1554 		goto ldc_attach_failed;
1555 	}
1556 	ldcp->ldc_id = ldc_id;
1557 	ldcp->portp = portp;
1558 
1559 	attach_state |= AST_ldc_alloc;
1560 
1561 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1562 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1563 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1564 
1565 	attach_state |= AST_mutex_init;
1566 
1567 	attr.devclass = LDC_DEV_NT;
1568 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1569 	attr.mode = LDC_MODE_UNRELIABLE;
1570 	attr.mtu = vnet_ldc_mtu;
1571 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1572 	if (status != 0) {
1573 		DWARN((vgenp->vnetp, "ldc_init failed, id (%lx) rv (%d)\n",
1574 		    ldc_id, status));
1575 		goto ldc_attach_failed;
1576 	}
1577 	attach_state |= AST_ldc_init;
1578 
1579 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1580 	if (status != 0) {
1581 		DWARN((vgenp->vnetp,
1582 		    "ldc_reg_callback failed, id (%lx) rv (%d)\n",
1583 		    ldc_id, status));
1584 		goto ldc_attach_failed;
1585 	}
1586 	attach_state |= AST_ldc_reg_cb;
1587 
1588 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1589 	ASSERT(istatus == LDC_INIT);
1590 	ldcp->ldc_status = istatus;
1591 
1592 	/* allocate transmit resources */
1593 	status = vgen_alloc_tx_ring(ldcp);
1594 	if (status != 0) {
1595 		goto ldc_attach_failed;
1596 	}
1597 	attach_state |= AST_alloc_tx_ring;
1598 
1599 	/* allocate receive resources */
1600 	ldcp->num_rbufs = vnet_nrbufs;
1601 	ldcp->rmp = NULL;
1602 	status = vio_create_mblks(ldcp->num_rbufs, VGEN_DBLK_SZ,
1603 		&(ldcp->rmp));
1604 	if (status != 0) {
1605 		goto ldc_attach_failed;
1606 	}
1607 	attach_state |= AST_create_rxmblks;
1608 
1609 	/* Setup kstats for the channel */
1610 	status = vgen_setup_kstats(ldcp);
1611 	if (status != VGEN_SUCCESS) {
1612 		goto ldc_attach_failed;
1613 	}
1614 
1615 	/* initialize vgen_versions supported */
1616 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
1617 
1618 	/* link it into the list of channels for this port */
1619 	WRITE_ENTER(&ldclp->rwlock);
1620 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
1621 	ldcp->nextp = *prev_ldcp;
1622 	*prev_ldcp = ldcp;
1623 	ldclp->num_ldcs++;
1624 	RW_EXIT(&ldclp->rwlock);
1625 
1626 	ldcp->flags |= CHANNEL_ATTACHED;
1627 	return (DDI_SUCCESS);
1628 
1629 ldc_attach_failed:
1630 	if (attach_state & AST_create_rxmblks) {
1631 		(void) vio_destroy_mblks(ldcp->rmp);
1632 	}
1633 	if (attach_state & AST_alloc_tx_ring) {
1634 		vgen_free_tx_ring(ldcp);
1635 	}
1636 	if (attach_state & AST_ldc_reg_cb) {
1637 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1638 	}
1639 	if (attach_state & AST_ldc_init) {
1640 		(void) ldc_fini(ldcp->ldc_handle);
1641 	}
1642 	if (attach_state & AST_mutex_init) {
1643 		mutex_destroy(&ldcp->tclock);
1644 		mutex_destroy(&ldcp->txlock);
1645 		mutex_destroy(&ldcp->cblock);
1646 	}
1647 	if (attach_state & AST_ldc_alloc) {
1648 		KMEM_FREE(ldcp);
1649 	}
1650 	return (DDI_FAILURE);
1651 }
1652 
1653 /* detach a channel from the port */
1654 static void
1655 vgen_ldc_detach(vgen_ldc_t *ldcp)
1656 {
1657 	vgen_port_t	*portp;
1658 	vgen_t 		*vgenp;
1659 	vgen_ldc_t 	*pldcp;
1660 	vgen_ldc_t	**prev_ldcp;
1661 	vgen_ldclist_t	*ldclp;
1662 
1663 	portp = ldcp->portp;
1664 	vgenp = portp->vgenp;
1665 	ldclp = &portp->ldclist;
1666 
1667 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
1668 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
1669 		if (pldcp == ldcp) {
1670 			break;
1671 		}
1672 	}
1673 
1674 	if (pldcp == NULL) {
1675 		/* invalid ldcp? */
1676 		return;
1677 	}
1678 
1679 	if (ldcp->ldc_status != LDC_INIT) {
1680 		DWARN((vgenp->vnetp,
1681 		    "vgen_ldc_detach: ldc_status is not INIT id(%lx)\n",
1682 			    ldcp->ldc_id));
1683 	}
1684 
1685 	if (ldcp->flags & CHANNEL_ATTACHED) {
1686 		ldcp->flags &= ~(CHANNEL_ATTACHED);
1687 
1688 		vgen_destroy_kstats(ldcp);
1689 
1690 		/* free receive resources */
1691 		if (vio_destroy_mblks(ldcp->rmp)) {
1692 			/*
1693 			 * if we cannot reclaim all mblks, put this
1694 			 * on the list of pools to be reclaimed when the
1695 			 * device gets detached (see vgen_uninit()).
1696 			 */
1697 			ldcp->rmp->nextp =  vgenp->rmp;
1698 			vgenp->rmp = ldcp->rmp;
1699 		}
1700 
1701 		/* free transmit resources */
1702 		vgen_free_tx_ring(ldcp);
1703 
1704 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1705 		(void) ldc_fini(ldcp->ldc_handle);
1706 		mutex_destroy(&ldcp->tclock);
1707 		mutex_destroy(&ldcp->txlock);
1708 		mutex_destroy(&ldcp->cblock);
1709 
1710 		/* unlink it from the list */
1711 		*prev_ldcp = ldcp->nextp;
1712 		ldclp->num_ldcs--;
1713 		KMEM_FREE(ldcp);
1714 	}
1715 }
1716 
1717 /*
1718  * This function allocates transmit resources for the channel.
1719  * The resources consist of a transmit descriptor ring and an associated
1720  * transmit buffer ring.
1721  */
1722 static int
1723 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
1724 {
1725 	void *tbufp;
1726 	ldc_mem_info_t minfo;
1727 	uint32_t txdsize;
1728 	uint32_t tbufsize;
1729 	int status;
1730 	void *vnetp = LDC_TO_VNET(ldcp);
1731 
1732 	ldcp->num_txds = vnet_ntxds;
1733 	txdsize = sizeof (vnet_public_desc_t);
1734 	tbufsize = sizeof (vgen_private_desc_t);
1735 
1736 	/* allocate transmit buffer ring */
1737 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
1738 	if (tbufp == NULL) {
1739 		return (DDI_FAILURE);
1740 	}
1741 
1742 	/* create transmit descriptor ring */
1743 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
1744 	    &ldcp->tx_dhandle);
1745 	if (status) {
1746 		DWARN((vnetp, "vgen_alloc_tx_ring: ldc_mem_dring_create() "
1747 		    "failed, id(%lx)\n", ldcp->ldc_id));
1748 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1749 		return (DDI_FAILURE);
1750 	}
1751 
1752 	/* get the addr of descripror ring */
1753 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
1754 	if (status) {
1755 		DWARN((vnetp, "vgen_alloc_tx_ring: ldc_mem_dring_info() "
1756 		    "failed, id(%lx)\n", ldcp->ldc_id));
1757 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1758 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1759 		ldcp->tbufp = NULL;
1760 		return (DDI_FAILURE);
1761 	}
1762 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
1763 	ldcp->tbufp = tbufp;
1764 
1765 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
1766 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
1767 
1768 	return (DDI_SUCCESS);
1769 }
1770 
1771 /* Free transmit resources for the channel */
1772 static void
1773 vgen_free_tx_ring(vgen_ldc_t *ldcp)
1774 {
1775 	int tbufsize = sizeof (vgen_private_desc_t);
1776 
1777 	/* free transmit descriptor ring */
1778 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1779 
1780 	/* free transmit buffer ring */
1781 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
1782 	ldcp->txdp = ldcp->txdendp = NULL;
1783 	ldcp->tbufp = ldcp->tbufendp = NULL;
1784 }
1785 
1786 /* enable transmit/receive on the channels for the port */
1787 static void
1788 vgen_init_ldcs(vgen_port_t *portp)
1789 {
1790 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1791 	vgen_ldc_t	*ldcp;
1792 
1793 	READ_ENTER(&ldclp->rwlock);
1794 	ldcp =  ldclp->headp;
1795 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1796 		(void) vgen_ldc_init(ldcp);
1797 	}
1798 	RW_EXIT(&ldclp->rwlock);
1799 }
1800 
1801 /* stop transmit/receive on the channels for the port */
1802 static void
1803 vgen_uninit_ldcs(vgen_port_t *portp)
1804 {
1805 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1806 	vgen_ldc_t	*ldcp;
1807 
1808 	READ_ENTER(&ldclp->rwlock);
1809 	ldcp =  ldclp->headp;
1810 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1811 		vgen_ldc_uninit(ldcp);
1812 	}
1813 	RW_EXIT(&ldclp->rwlock);
1814 }
1815 
1816 /* enable transmit/receive on the channel */
1817 static int
1818 vgen_ldc_init(vgen_ldc_t *ldcp)
1819 {
1820 	void *vnetp = LDC_TO_VNET(ldcp);
1821 	ldc_status_t	istatus;
1822 	int		rv;
1823 	enum		{ ST_init = 0x0, ST_ldc_open = 0x1,
1824 			    ST_init_tbufs = 0x2, ST_cb_enable = 0x4
1825 			    }
1826 			init_state;
1827 	uint32_t	retries = 0;
1828 
1829 	init_state = ST_init;
1830 
1831 	LDC_LOCK(ldcp);
1832 
1833 	rv = ldc_open(ldcp->ldc_handle);
1834 	if (rv != 0) {
1835 		DWARN((vnetp,
1836 		    "vgen_ldcinit: ldc_open failed: id<%lx> rv(%d)\n",
1837 		    ldcp->ldc_id, rv));
1838 		goto ldcinit_failed;
1839 	}
1840 	init_state |= ST_ldc_open;
1841 
1842 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1843 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
1844 		DWARN((vnetp,
1845 		    "vgen_ldcinit: id (%lx) status(%d) is not OPEN/READY\n",
1846 		    ldcp->ldc_id, istatus));
1847 		goto ldcinit_failed;
1848 	}
1849 	ldcp->ldc_status = istatus;
1850 
1851 	rv = vgen_init_tbufs(ldcp);
1852 	if (rv != 0) {
1853 		DWARN((vnetp,
1854 		    "vgen_ldcinit: vgen_init_tbufs() failed: id(%lx)\n",
1855 		    ldcp->ldc_id));
1856 		goto ldcinit_failed;
1857 	}
1858 	init_state |= ST_init_tbufs;
1859 
1860 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
1861 	if (rv != 0) {
1862 		DWARN((vnetp, "vgen_ldc_init: ldc_set_cb_mode failed: id(%lx) "
1863 		    "rv(%d)\n", ldcp->ldc_id, rv));
1864 		goto ldcinit_failed;
1865 	}
1866 
1867 	init_state |= ST_cb_enable;
1868 
1869 	do {
1870 		rv = ldc_up(ldcp->ldc_handle);
1871 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
1872 			DBG2((vnetp,
1873 			    "vgen_ldcinit: ldc_up err id(%lx) rv(%d)\n",
1874 			    ldcp->ldc_id, rv));
1875 			drv_usecwait(VGEN_LDC_UP_DELAY);
1876 		}
1877 		if (retries++ >= vgen_ldcup_retries)
1878 			break;
1879 	} while (rv == EWOULDBLOCK);
1880 
1881 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1882 	if (istatus == LDC_UP) {
1883 		DWARN((vnetp, "vgen_ldc_init: id(%lx) status(%d) is UP\n",
1884 		    ldcp->ldc_id, istatus));
1885 	}
1886 
1887 	ldcp->ldc_status = istatus;
1888 
1889 	/* initialize transmit watchdog timeout */
1890 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
1891 	    drv_usectohz(vnet_ldcwd_interval * 1000));
1892 
1893 	ldcp->hphase = -1;
1894 	ldcp->flags |= CHANNEL_STARTED;
1895 
1896 	/* if channel is already UP - start handshake */
1897 	if (istatus == LDC_UP) {
1898 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1899 		if (ldcp->portp != vgenp->vsw_portp) {
1900 			/*
1901 			 * modify fdb entry to use this port as the
1902 			 * channel is up, instead of going through the
1903 			 * vsw-port (see comments in vgen_port_init())
1904 			 */
1905 			vnet_modify_fdb(vnetp,
1906 			    (uint8_t *)&ldcp->portp->macaddr,
1907 			    vgen_tx, ldcp->portp, B_FALSE);
1908 		}
1909 
1910 		/* Initialize local session id */
1911 		ldcp->local_sid = ddi_get_lbolt();
1912 
1913 		/* clear peer session id */
1914 		ldcp->peer_sid = 0;
1915 		ldcp->hretries = 0;
1916 
1917 		/* Initiate Handshake process with peer ldc endpoint */
1918 		vgen_reset_hphase(ldcp);
1919 
1920 		mutex_exit(&ldcp->tclock);
1921 		mutex_exit(&ldcp->txlock);
1922 		vgen_handshake(vh_nextphase(ldcp));
1923 		mutex_exit(&ldcp->cblock);
1924 	} else {
1925 		LDC_UNLOCK(ldcp);
1926 	}
1927 
1928 	return (DDI_SUCCESS);
1929 
1930 ldcinit_failed:
1931 	if (init_state & ST_cb_enable) {
1932 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1933 	}
1934 	if (init_state & ST_init_tbufs) {
1935 		vgen_uninit_tbufs(ldcp);
1936 	}
1937 	if (init_state & ST_ldc_open) {
1938 		(void) ldc_close(ldcp->ldc_handle);
1939 	}
1940 	LDC_UNLOCK(ldcp);
1941 	return (DDI_FAILURE);
1942 }
1943 
1944 /* stop transmit/receive on the channel */
1945 static void
1946 vgen_ldc_uninit(vgen_ldc_t *ldcp)
1947 {
1948 	void *vnetp = LDC_TO_VNET(ldcp);
1949 	int	rv;
1950 
1951 	DBG1((vnetp, "vgen_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id));
1952 	LDC_LOCK(ldcp);
1953 
1954 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
1955 		LDC_UNLOCK(ldcp);
1956 		DWARN((vnetp, "vgen_ldc_uninit: id(%lx) CHANNEL_STARTED"
1957 		    " flag is not set\n", ldcp->ldc_id));
1958 		return;
1959 	}
1960 
1961 	/* disable further callbacks */
1962 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1963 	if (rv != 0) {
1964 		DWARN((vnetp, "vgen_ldc_uninit: id (%lx) "
1965 		    "ldc_set_cb_mode failed\n", ldcp->ldc_id));
1966 	}
1967 
1968 	/* clear handshake done bit and wait for pending tx and cb to finish */
1969 	ldcp->hphase &= ~(VH_DONE);
1970 	LDC_UNLOCK(ldcp);
1971 	drv_usecwait(1000);
1972 	LDC_LOCK(ldcp);
1973 
1974 	vgen_reset_hphase(ldcp);
1975 
1976 	/* reset transmit watchdog timeout */
1977 	if (ldcp->wd_tid) {
1978 		(void) untimeout(ldcp->wd_tid);
1979 		ldcp->wd_tid = 0;
1980 	}
1981 
1982 	vgen_uninit_tbufs(ldcp);
1983 
1984 	rv = ldc_close(ldcp->ldc_handle);
1985 	if (rv != 0) {
1986 		DWARN((vnetp, "vgen_ldcuninit: ldc_close err id(%lx)\n",
1987 		    ldcp->ldc_id));
1988 	}
1989 	ldcp->ldc_status = LDC_INIT;
1990 	ldcp->flags &= ~(CHANNEL_STARTED);
1991 
1992 	LDC_UNLOCK(ldcp);
1993 
1994 	DBG1((vnetp, "vgen_ldc_uninit: exit: id(%lx)\n", ldcp->ldc_id));
1995 }
1996 
1997 /* Initialize the transmit buffer ring for the channel */
1998 static int
1999 vgen_init_tbufs(vgen_ldc_t *ldcp)
2000 {
2001 	vgen_private_desc_t	*tbufp;
2002 	vnet_public_desc_t	*txdp;
2003 	vio_dring_entry_hdr_t		*hdrp;
2004 	int 			i;
2005 	int 			rv;
2006 	caddr_t			datap = NULL;
2007 	int			ci;
2008 	uint32_t		ncookies;
2009 
2010 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2011 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2012 
2013 	datap = kmem_zalloc(ldcp->num_txds * VGEN_DBLK_SZ, KM_SLEEP);
2014 	ldcp->tx_datap = datap;
2015 
2016 	/*
2017 	 * for each private descriptor, allocate a ldc mem_handle which is
2018 	 * required to map the data during transmit, set the flags
2019 	 * to free (available for use by transmit routine).
2020 	 */
2021 
2022 	for (i = 0; i < ldcp->num_txds; i++) {
2023 
2024 		tbufp = &(ldcp->tbufp[i]);
2025 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2026 			&(tbufp->memhandle));
2027 		if (rv) {
2028 			tbufp->memhandle = 0;
2029 			goto init_tbufs_failed;
2030 		}
2031 
2032 		/*
2033 		 * bind ldc memhandle to the corresponding transmit buffer.
2034 		 */
2035 		ci = ncookies = 0;
2036 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2037 		    (caddr_t)datap, VGEN_DBLK_SZ, LDC_SHADOW_MAP,
2038 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2039 		if (rv != 0) {
2040 			goto init_tbufs_failed;
2041 		}
2042 
2043 		/*
2044 		 * successful in binding the handle to tx data buffer.
2045 		 * set datap in the private descr to this buffer.
2046 		 */
2047 		tbufp->datap = datap;
2048 
2049 		if ((ncookies == 0) ||
2050 			(ncookies > MAX_COOKIES)) {
2051 			goto init_tbufs_failed;
2052 		}
2053 
2054 		for (ci = 1; ci < ncookies; ci++) {
2055 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2056 				&(tbufp->memcookie[ci]));
2057 			if (rv != 0) {
2058 				goto init_tbufs_failed;
2059 			}
2060 		}
2061 
2062 		tbufp->ncookies = ncookies;
2063 		datap += VGEN_DBLK_SZ;
2064 
2065 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2066 		txdp = &(ldcp->txdp[i]);
2067 		hdrp = &txdp->hdr;
2068 		hdrp->dstate = VIO_DESC_FREE;
2069 		hdrp->ack = B_FALSE;
2070 		tbufp->descp = txdp;
2071 
2072 	}
2073 
2074 	/* reset tbuf walking pointers */
2075 	ldcp->next_tbufp = ldcp->tbufp;
2076 	ldcp->cur_tbufp = ldcp->tbufp;
2077 
2078 	/* initialize tx seqnum and index */
2079 	ldcp->next_txseq = VNET_ISS;
2080 	ldcp->next_txi = 0;
2081 
2082 	ldcp->resched_peer = B_TRUE;
2083 
2084 	return (DDI_SUCCESS);
2085 
2086 init_tbufs_failed:;
2087 	vgen_uninit_tbufs(ldcp);
2088 	return (DDI_FAILURE);
2089 }
2090 
2091 /* Uninitialize transmit buffer ring for the channel */
2092 static void
2093 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2094 {
2095 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2096 	int 			i;
2097 
2098 	/* for each tbuf (priv_desc), free ldc mem_handle */
2099 	for (i = 0; i < ldcp->num_txds; i++) {
2100 
2101 		tbufp = &(ldcp->tbufp[i]);
2102 
2103 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2104 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2105 			tbufp->datap = NULL;
2106 		}
2107 		if (tbufp->memhandle) {
2108 			(void) ldc_mem_free_handle(tbufp->memhandle);
2109 			tbufp->memhandle = 0;
2110 		}
2111 	}
2112 
2113 	if (ldcp->tx_datap) {
2114 		/* prealloc'd tx data buffer */
2115 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_DBLK_SZ);
2116 		ldcp->tx_datap = NULL;
2117 	}
2118 
2119 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2120 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2121 }
2122 
2123 /* clobber tx descriptor ring */
2124 static void
2125 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2126 {
2127 	vnet_public_desc_t	*txdp;
2128 	vgen_private_desc_t	*tbufp;
2129 	vio_dring_entry_hdr_t		*hdrp;
2130 	void *vnetp = LDC_TO_VNET(ldcp);
2131 	int i;
2132 #ifdef DEBUG
2133 	int ndone = 0;
2134 #endif
2135 
2136 	for (i = 0; i < ldcp->num_txds; i++) {
2137 
2138 		tbufp = &(ldcp->tbufp[i]);
2139 		txdp = tbufp->descp;
2140 		hdrp = &txdp->hdr;
2141 
2142 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2143 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2144 #ifdef DEBUG
2145 			if (hdrp->dstate == VIO_DESC_DONE)
2146 				ndone++;
2147 #endif
2148 			hdrp->dstate = VIO_DESC_FREE;
2149 			hdrp->ack = B_FALSE;
2150 		}
2151 	}
2152 	/* reset tbuf walking pointers */
2153 	ldcp->next_tbufp = ldcp->tbufp;
2154 	ldcp->cur_tbufp = ldcp->tbufp;
2155 
2156 	/* reset tx seqnum and index */
2157 	ldcp->next_txseq = VNET_ISS;
2158 	ldcp->next_txi = 0;
2159 
2160 	ldcp->resched_peer = B_TRUE;
2161 
2162 #ifdef DEBUG
2163 	DBG2((vnetp,
2164 	    "vgen_clobber_tbufs: id(0x%lx) num descrs done (%d)\n",
2165 	    ldcp->ldc_id, ndone));
2166 #endif
2167 }
2168 
2169 /* clobber receive descriptor ring */
2170 static void
2171 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2172 {
2173 	ldcp->rx_dhandle = 0;
2174 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2175 	ldcp->rxdp = NULL;
2176 	ldcp->next_rxi = 0;
2177 	ldcp->num_rxds = 0;
2178 	ldcp->next_rxseq = VNET_ISS;
2179 }
2180 
2181 /* initialize receive descriptor ring */
2182 static int
2183 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2184 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2185 {
2186 	int rv;
2187 	ldc_mem_info_t minfo;
2188 
2189 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2190 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2191 	if (rv != 0) {
2192 		return (DDI_FAILURE);
2193 	}
2194 
2195 	/*
2196 	 * sucessfully mapped, now try to
2197 	 * get info about the mapped dring
2198 	 */
2199 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2200 	if (rv != 0) {
2201 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2202 		return (DDI_FAILURE);
2203 	}
2204 
2205 	/*
2206 	 * save ring address, number of descriptors.
2207 	 */
2208 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2209 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2210 	ldcp->num_rxdcookies = ncookies;
2211 	ldcp->num_rxds = num_desc;
2212 	ldcp->next_rxi = 0;
2213 	ldcp->next_rxseq = VNET_ISS;
2214 
2215 	return (DDI_SUCCESS);
2216 }
2217 
2218 /* get channel statistics */
2219 static uint64_t
2220 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2221 {
2222 	vgen_stats_t *statsp;
2223 	uint64_t val;
2224 
2225 	val = 0;
2226 	statsp = ldcp->statsp;
2227 	switch (stat) {
2228 
2229 	case MAC_STAT_MULTIRCV:
2230 		val = statsp->multircv;
2231 		break;
2232 
2233 	case MAC_STAT_BRDCSTRCV:
2234 		val = statsp->brdcstrcv;
2235 		break;
2236 
2237 	case MAC_STAT_MULTIXMT:
2238 		val = statsp->multixmt;
2239 		break;
2240 
2241 	case MAC_STAT_BRDCSTXMT:
2242 		val = statsp->brdcstxmt;
2243 		break;
2244 
2245 	case MAC_STAT_NORCVBUF:
2246 		val = statsp->norcvbuf;
2247 		break;
2248 
2249 	case MAC_STAT_IERRORS:
2250 		val = statsp->ierrors;
2251 		break;
2252 
2253 	case MAC_STAT_NOXMTBUF:
2254 		val = statsp->noxmtbuf;
2255 		break;
2256 
2257 	case MAC_STAT_OERRORS:
2258 		val = statsp->oerrors;
2259 		break;
2260 
2261 	case MAC_STAT_COLLISIONS:
2262 		break;
2263 
2264 	case MAC_STAT_RBYTES:
2265 		val = statsp->rbytes;
2266 		break;
2267 
2268 	case MAC_STAT_IPACKETS:
2269 		val = statsp->ipackets;
2270 		break;
2271 
2272 	case MAC_STAT_OBYTES:
2273 		val = statsp->obytes;
2274 		break;
2275 
2276 	case MAC_STAT_OPACKETS:
2277 		val = statsp->opackets;
2278 		break;
2279 
2280 	/* stats not relevant to ldc, return 0 */
2281 	case MAC_STAT_IFSPEED:
2282 	case ETHER_STAT_ALIGN_ERRORS:
2283 	case ETHER_STAT_FCS_ERRORS:
2284 	case ETHER_STAT_FIRST_COLLISIONS:
2285 	case ETHER_STAT_MULTI_COLLISIONS:
2286 	case ETHER_STAT_DEFER_XMTS:
2287 	case ETHER_STAT_TX_LATE_COLLISIONS:
2288 	case ETHER_STAT_EX_COLLISIONS:
2289 	case ETHER_STAT_MACXMT_ERRORS:
2290 	case ETHER_STAT_CARRIER_ERRORS:
2291 	case ETHER_STAT_TOOLONG_ERRORS:
2292 	case ETHER_STAT_XCVR_ADDR:
2293 	case ETHER_STAT_XCVR_ID:
2294 	case ETHER_STAT_XCVR_INUSE:
2295 	case ETHER_STAT_CAP_1000FDX:
2296 	case ETHER_STAT_CAP_1000HDX:
2297 	case ETHER_STAT_CAP_100FDX:
2298 	case ETHER_STAT_CAP_100HDX:
2299 	case ETHER_STAT_CAP_10FDX:
2300 	case ETHER_STAT_CAP_10HDX:
2301 	case ETHER_STAT_CAP_ASMPAUSE:
2302 	case ETHER_STAT_CAP_PAUSE:
2303 	case ETHER_STAT_CAP_AUTONEG:
2304 	case ETHER_STAT_ADV_CAP_1000FDX:
2305 	case ETHER_STAT_ADV_CAP_1000HDX:
2306 	case ETHER_STAT_ADV_CAP_100FDX:
2307 	case ETHER_STAT_ADV_CAP_100HDX:
2308 	case ETHER_STAT_ADV_CAP_10FDX:
2309 	case ETHER_STAT_ADV_CAP_10HDX:
2310 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2311 	case ETHER_STAT_ADV_CAP_PAUSE:
2312 	case ETHER_STAT_ADV_CAP_AUTONEG:
2313 	case ETHER_STAT_LP_CAP_1000FDX:
2314 	case ETHER_STAT_LP_CAP_1000HDX:
2315 	case ETHER_STAT_LP_CAP_100FDX:
2316 	case ETHER_STAT_LP_CAP_100HDX:
2317 	case ETHER_STAT_LP_CAP_10FDX:
2318 	case ETHER_STAT_LP_CAP_10HDX:
2319 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2320 	case ETHER_STAT_LP_CAP_PAUSE:
2321 	case ETHER_STAT_LP_CAP_AUTONEG:
2322 	case ETHER_STAT_LINK_ASMPAUSE:
2323 	case ETHER_STAT_LINK_PAUSE:
2324 	case ETHER_STAT_LINK_AUTONEG:
2325 	case ETHER_STAT_LINK_DUPLEX:
2326 	default:
2327 		val = 0;
2328 		break;
2329 
2330 	}
2331 	return (val);
2332 }
2333 
2334 /*
2335  * LDC channel is UP, start handshake process with peer.
2336  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2337  * function is being called from transmit routine, otherwise B_FALSE.
2338  */
2339 static void
2340 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2341 {
2342 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2343 	void 	*vnetp = LDC_TO_VNET(ldcp);
2344 
2345 	DBG1((vnetp, "vgen_handle_evt_up: enter: id(%lx)\n", ldcp->ldc_id));
2346 
2347 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2348 
2349 	if (ldcp->portp != vgenp->vsw_portp) {
2350 		/*
2351 		 * modify fdb entry to use this port as the
2352 		 * channel is up, instead of going through the
2353 		 * vsw-port (see comments in vgen_port_init())
2354 		 */
2355 		vnet_modify_fdb(vnetp, (uint8_t *)&ldcp->portp->macaddr,
2356 		    vgen_tx, ldcp->portp, flag);
2357 	}
2358 
2359 	/* Initialize local session id */
2360 	ldcp->local_sid = ddi_get_lbolt();
2361 
2362 	/* clear peer session id */
2363 	ldcp->peer_sid = 0;
2364 	ldcp->hretries = 0;
2365 
2366 	if (ldcp->hphase != VH_PHASE0) {
2367 		vgen_handshake_reset(ldcp);
2368 	}
2369 
2370 	/* Initiate Handshake process with peer ldc endpoint */
2371 	vgen_handshake(vh_nextphase(ldcp));
2372 
2373 	DBG1((vnetp, "vgen_handle_evt_up: exit: id(%lx)\n", ldcp->ldc_id));
2374 }
2375 
2376 /*
2377  * LDC channel is Reset, terminate connection with peer and try to
2378  * bring the channel up again.
2379  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2380  * function is being called from transmit routine, otherwise B_FALSE.
2381  */
2382 static void
2383 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2384 {
2385 	ldc_status_t istatus;
2386 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2387 	void	*vnetp = LDC_TO_VNET(ldcp);
2388 	int	rv;
2389 
2390 	DBG1((vnetp, "vgen_handle_evt_reset: enter: id(%lx)\n", ldcp->ldc_id));
2391 
2392 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2393 
2394 	if ((ldcp->portp != vgenp->vsw_portp) &&
2395 		(vgenp->vsw_portp != NULL)) {
2396 		/*
2397 		 * modify fdb entry to use vsw-port  as the
2398 		 * channel is reset and we don't have a direct
2399 		 * link to the destination (see comments
2400 		 * in vgen_port_init()).
2401 		 */
2402 		vnet_modify_fdb(vnetp, (uint8_t *)&ldcp->portp->macaddr,
2403 		    vgen_tx, vgenp->vsw_portp, flag);
2404 	}
2405 
2406 	if (ldcp->hphase != VH_PHASE0) {
2407 		vgen_handshake_reset(ldcp);
2408 	}
2409 
2410 	/* try to bring the channel up */
2411 	rv = ldc_up(ldcp->ldc_handle);
2412 	if (rv != 0) {
2413 		DWARN((vnetp,
2414 		    "vgen_handle_evt_reset: ldc_up err id(%lx) rv(%d)\n",
2415 		    ldcp->ldc_id, rv));
2416 	}
2417 
2418 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2419 		DWARN((vnetp,
2420 		    "vgen_handle_evt_reset: ldc_status err id(%lx)\n"));
2421 	} else {
2422 		ldcp->ldc_status = istatus;
2423 	}
2424 
2425 	/* if channel is already UP - restart handshake */
2426 	if (ldcp->ldc_status == LDC_UP) {
2427 		vgen_handle_evt_up(ldcp, flag);
2428 	}
2429 
2430 	DBG1((vnetp, "vgen_handle_evt_reset: exit: id(%lx)\n", ldcp->ldc_id));
2431 }
2432 
2433 /* Interrupt handler for the channel */
2434 static uint_t
2435 vgen_ldc_cb(uint64_t event, caddr_t arg)
2436 {
2437 	_NOTE(ARGUNUSED(event))
2438 	vgen_ldc_t	*ldcp;
2439 	void 		*vnetp;
2440 	vgen_t		*vgenp;
2441 	size_t		msglen;
2442 	ldc_status_t 	istatus;
2443 	uint64_t	ldcmsg[7];
2444 	int 		rv = 0;
2445 	vio_msg_tag_t	*tagp;
2446 	mblk_t		*mp = NULL;
2447 	mblk_t		*bp = NULL;
2448 	mblk_t		*bpt = NULL;
2449 	mblk_t		*headp = NULL;
2450 	mblk_t		*tailp = NULL;
2451 	vgen_stats_t	*statsp;
2452 
2453 	ldcp = (vgen_ldc_t *)arg;
2454 	vgenp = LDC_TO_VGEN(ldcp);
2455 	vnetp = LDC_TO_VNET(ldcp);
2456 	statsp = ldcp->statsp;
2457 
2458 	DBG1((vnetp, "vgen_ldc_cb enter: ldcid (%lx)\n", ldcp->ldc_id));
2459 
2460 	mutex_enter(&ldcp->cblock);
2461 	statsp->callbacks++;
2462 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2463 		DWARN((vnetp, "vgen_ldc_cb: id(%lx), status(%d) is LDC_INIT\n",
2464 		    ldcp->ldc_id, ldcp->ldc_status));
2465 		mutex_exit(&ldcp->cblock);
2466 		return (LDC_SUCCESS);
2467 	}
2468 
2469 	/*
2470 	 * NOTE: not using switch() as event could be triggered by
2471 	 * a state change and a read request. Also the ordering	of the
2472 	 * check for the event types is deliberate.
2473 	 */
2474 	if (event & LDC_EVT_UP) {
2475 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2476 			DWARN((vnetp,
2477 			    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2478 		} else {
2479 			ldcp->ldc_status = istatus;
2480 		}
2481 		ASSERT(ldcp->ldc_status == LDC_UP);
2482 		DWARN((vnetp,
2483 		    "vgen_ldc_cb: id(%lx) event(%lx) UP, status(%d)\n",
2484 		    ldcp->ldc_id, event, ldcp->ldc_status));
2485 
2486 		vgen_handle_evt_up(ldcp, B_FALSE);
2487 
2488 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2489 	}
2490 
2491 	if (event & LDC_EVT_READ) {
2492 		DBG2((vnetp,
2493 		    "vgen_ldc_cb: id(%lx) event(%lx) READ, status(%d)\n",
2494 		    ldcp->ldc_id, event, ldcp->ldc_status));
2495 
2496 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2497 		goto vgen_ldccb_rcv;
2498 	}
2499 
2500 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2501 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2502 			DWARN((vnetp,
2503 			    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2504 		} else {
2505 			ldcp->ldc_status = istatus;
2506 		}
2507 		DWARN((vnetp,
2508 		    "vgen_ldc_cb: id(%lx) event(%lx) RESET/DOWN, status(%d)\n",
2509 		    ldcp->ldc_id, event, ldcp->ldc_status));
2510 
2511 		vgen_handle_evt_reset(ldcp, B_FALSE);
2512 	}
2513 
2514 	mutex_exit(&ldcp->cblock);
2515 	return (LDC_SUCCESS);
2516 
2517 vgen_ldccb_rcv:
2518 
2519 	/* if event is LDC_EVT_READ, receive all packets */
2520 	do {
2521 		msglen = sizeof (ldcmsg);
2522 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
2523 
2524 		if (rv != 0) {
2525 			DWARN((vnetp,
2526 			    "vgen_ldc_cb:ldc_read err id(%lx) rv(%d) "
2527 			    "len(%d)\n", ldcp->ldc_id, rv, msglen));
2528 			if (rv == ECONNRESET)
2529 				goto exit_error;
2530 			break;
2531 		}
2532 		if (msglen == 0) {
2533 			DBG2((vnetp, "vgen_ldc_cb: ldc_read id(%lx) NODATA",
2534 			ldcp->ldc_id));
2535 			break;
2536 		}
2537 		DBG2((vnetp, "vgen_ldc_cb: ldc_read id(%lx): msglen(%d)",
2538 		    ldcp->ldc_id, msglen));
2539 
2540 		tagp = (vio_msg_tag_t *)ldcmsg;
2541 
2542 		if (ldcp->peer_sid) {
2543 			/*
2544 			 * check sid only after we have received peer's sid
2545 			 * in the version negotiate msg.
2546 			 */
2547 #ifdef DEBUG
2548 			if (vgen_hdbg & HDBG_BAD_SID) {
2549 				/* simulate bad sid condition */
2550 				tagp->vio_sid = 0;
2551 				vgen_hdbg &= ~(HDBG_BAD_SID);
2552 			}
2553 #endif
2554 			rv = vgen_check_sid(ldcp, tagp);
2555 			if (rv != VGEN_SUCCESS) {
2556 				/*
2557 				 * If sid mismatch is detected,
2558 				 * reset the channel.
2559 				 */
2560 				ldcp->need_ldc_reset = B_TRUE;
2561 				goto exit_error;
2562 			}
2563 		}
2564 
2565 		switch (tagp->vio_msgtype) {
2566 		case VIO_TYPE_CTRL:
2567 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
2568 			break;
2569 
2570 		case VIO_TYPE_DATA:
2571 			headp = tailp = NULL;
2572 			rv = vgen_handle_datamsg(ldcp, tagp, &headp, &tailp);
2573 			/* build a chain of received packets */
2574 			if (headp != NULL) {
2575 				if (bp == NULL) {
2576 					bp = headp;
2577 					bpt = tailp;
2578 				} else {
2579 					bpt->b_next = headp;
2580 					bpt = tailp;
2581 				}
2582 			}
2583 			break;
2584 
2585 		case VIO_TYPE_ERR:
2586 			vgen_handle_errmsg(ldcp, tagp);
2587 			break;
2588 
2589 		default:
2590 			DWARN((vnetp,
2591 			    "vgen_ldc_cb: Unknown VIO_TYPE(%x)\n",
2592 			    tagp->vio_msgtype));
2593 			break;
2594 		}
2595 
2596 exit_error:
2597 		if (rv == ECONNRESET) {
2598 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2599 				DWARN((vnetp,
2600 				    "vgen_ldc_cb: ldc_status err id(%lx)\n"));
2601 			} else {
2602 				ldcp->ldc_status = istatus;
2603 			}
2604 			vgen_handle_evt_reset(ldcp, B_FALSE);
2605 			break;
2606 		} else if (rv) {
2607 			vgen_handshake_retry(ldcp);
2608 			break;
2609 		}
2610 
2611 	} while (msglen);
2612 
2613 	mutex_exit(&ldcp->cblock);
2614 
2615 	/* send up the received packets to MAC layer */
2616 	while (bp != NULL) {
2617 		mp = bp;
2618 		bp = bp->b_next;
2619 		mp->b_next = mp->b_prev = NULL;
2620 		DBG2((vnetp, "vgen_ldc_cb: id(%lx) rx pkt len (%lx)\n",
2621 		    ldcp->ldc_id, MBLKL(mp)));
2622 		vnet_rx(vgenp->vnetp, NULL, mp);
2623 	}
2624 	DBG1((vnetp, "vgen_ldc_cb exit: ldcid (%lx)\n", ldcp->ldc_id));
2625 
2626 	return (LDC_SUCCESS);
2627 }
2628 
2629 /* vgen handshake functions */
2630 
2631 /* change the hphase for the channel to the next phase */
2632 static vgen_ldc_t *
2633 vh_nextphase(vgen_ldc_t *ldcp)
2634 {
2635 	if (ldcp->hphase == VH_PHASE3) {
2636 		ldcp->hphase = VH_DONE;
2637 	} else {
2638 		ldcp->hphase++;
2639 	}
2640 	return (ldcp);
2641 }
2642 
2643 /*
2644  * Check whether the given version is supported or not and
2645  * return VGEN_SUCCESS if supported.
2646  */
2647 static int
2648 vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
2649 uint16_t ver_minor)
2650 {
2651 	vgen_ver_t	*versions = ldcp->vgen_versions;
2652 	int		i = 0;
2653 
2654 	while (i < VGEN_NUM_VER) {
2655 		if ((versions[i].ver_major == 0) &&
2656 		    (versions[i].ver_minor == 0)) {
2657 			break;
2658 		}
2659 		if ((versions[i].ver_major == ver_major) &&
2660 			(versions[i].ver_minor == ver_minor)) {
2661 			return (VGEN_SUCCESS);
2662 		}
2663 		i++;
2664 	}
2665 	return (VGEN_FAILURE);
2666 }
2667 
2668 /*
2669  * Given a version, return VGEN_SUCCESS if a lower version is supported.
2670  */
2671 static int
2672 vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
2673 {
2674 	vgen_ver_t	*versions = ldcp->vgen_versions;
2675 	int		i = 0;
2676 
2677 	while (i < VGEN_NUM_VER) {
2678 		if ((versions[i].ver_major == 0) &&
2679 		    (versions[i].ver_minor == 0)) {
2680 			break;
2681 		}
2682 		/*
2683 		 * if we support a lower minor version within the same major
2684 		 * version, or if we support a lower major version,
2685 		 * update the verp parameter with this lower version and
2686 		 * return success.
2687 		 */
2688 		if (((versions[i].ver_major == verp->ver_major) &&
2689 			(versions[i].ver_minor < verp->ver_minor)) ||
2690 			(versions[i].ver_major < verp->ver_major)) {
2691 				verp->ver_major = versions[i].ver_major;
2692 				verp->ver_minor = versions[i].ver_minor;
2693 				return (VGEN_SUCCESS);
2694 		}
2695 		i++;
2696 	}
2697 
2698 	return (VGEN_FAILURE);
2699 }
2700 
2701 /*
2702  * wrapper routine to send the given message over ldc using ldc_write().
2703  */
2704 static int
2705 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
2706     boolean_t caller_holds_lock)
2707 {
2708 	int	rv;
2709 	size_t	len;
2710 	void *vnetp = LDC_TO_VNET(ldcp);
2711 	uint32_t retries = 0;
2712 
2713 	len = msglen;
2714 	if ((len == 0) || (msg == NULL))
2715 		return (VGEN_FAILURE);
2716 
2717 	if (!caller_holds_lock) {
2718 		mutex_enter(&ldcp->txlock);
2719 	}
2720 
2721 	do {
2722 		len = msglen;
2723 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
2724 		if (retries++ >= vgen_ldcwr_retries)
2725 			break;
2726 	} while (rv == EWOULDBLOCK);
2727 
2728 	if (!caller_holds_lock) {
2729 		mutex_exit(&ldcp->txlock);
2730 	}
2731 
2732 	if (rv != 0) {
2733 		DWARN((vnetp,
2734 		    "vgen_sendmsg: ldc_write failed: id(%lx) rv(%d)\n",
2735 		    ldcp->ldc_id, rv, msglen));
2736 		return (rv);
2737 	}
2738 
2739 	if (len != msglen) {
2740 		DWARN((vnetp,
2741 		    "vgen_sendmsg: ldc_write failed: id(%lx) rv(%d)"
2742 		    " msglen (%d)\n", ldcp->ldc_id, rv, msglen));
2743 		return (VGEN_FAILURE);
2744 	}
2745 
2746 	return (VGEN_SUCCESS);
2747 }
2748 
2749 /* send version negotiate message to the peer over ldc */
2750 static int
2751 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
2752 {
2753 	vio_ver_msg_t	vermsg;
2754 	vio_msg_tag_t	*tagp = &vermsg.tag;
2755 	void		*vnetp = LDC_TO_VNET(ldcp);
2756 	int		rv;
2757 
2758 	bzero(&vermsg, sizeof (vermsg));
2759 
2760 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2761 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2762 	tagp->vio_subtype_env = VIO_VER_INFO;
2763 	tagp->vio_sid = ldcp->local_sid;
2764 
2765 	/* get version msg payload from ldcp->local */
2766 	vermsg.ver_major = ldcp->local_hparams.ver_major;
2767 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
2768 	vermsg.dev_class = ldcp->local_hparams.dev_class;
2769 
2770 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
2771 	if (rv != VGEN_SUCCESS) {
2772 		DWARN((vnetp, "vgen_send_version_negotiate: vgen_sendmsg failed"
2773 		    "id (%lx)\n", ldcp->ldc_id));
2774 		return (rv);
2775 	}
2776 
2777 	ldcp->hstate |= VER_INFO_SENT;
2778 	DBG2((vnetp,
2779 	    "vgen_send_version_negotiate: VER_INFO_SENT id (%lx) ver(%d,%d)\n",
2780 	    ldcp->ldc_id, vermsg.ver_major, vermsg.ver_minor));
2781 
2782 	return (VGEN_SUCCESS);
2783 }
2784 
2785 /* send attr info message to the peer over ldc */
2786 static int
2787 vgen_send_attr_info(vgen_ldc_t *ldcp)
2788 {
2789 	vnet_attr_msg_t	attrmsg;
2790 	vio_msg_tag_t	*tagp = &attrmsg.tag;
2791 	void		*vnetp = LDC_TO_VNET(ldcp);
2792 	int		rv;
2793 
2794 	bzero(&attrmsg, sizeof (attrmsg));
2795 
2796 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2797 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2798 	tagp->vio_subtype_env = VIO_ATTR_INFO;
2799 	tagp->vio_sid = ldcp->local_sid;
2800 
2801 	/* get attr msg payload from ldcp->local */
2802 	attrmsg.mtu = ldcp->local_hparams.mtu;
2803 	attrmsg.addr = ldcp->local_hparams.addr;
2804 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
2805 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
2806 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
2807 
2808 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
2809 	if (rv != VGEN_SUCCESS) {
2810 		DWARN((vnetp, "vgen_send_attr_info: vgen_sendmsg failed"
2811 		    "id (%lx)\n", ldcp->ldc_id));
2812 		return (rv);
2813 	}
2814 
2815 	ldcp->hstate |= ATTR_INFO_SENT;
2816 	DBG2((vnetp, "vgen_send_attr_info: ATTR_INFO_SENT id (%lx)\n",
2817 	    ldcp->ldc_id));
2818 
2819 	return (VGEN_SUCCESS);
2820 }
2821 
2822 /* send descriptor ring register message to the peer over ldc */
2823 static int
2824 vgen_send_dring_reg(vgen_ldc_t *ldcp)
2825 {
2826 	vio_dring_reg_msg_t	msg;
2827 	vio_msg_tag_t		*tagp = &msg.tag;
2828 	void		*vnetp = LDC_TO_VNET(ldcp);
2829 	int		rv;
2830 
2831 	bzero(&msg, sizeof (msg));
2832 
2833 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2834 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2835 	tagp->vio_subtype_env = VIO_DRING_REG;
2836 	tagp->vio_sid = ldcp->local_sid;
2837 
2838 	/* get dring info msg payload from ldcp->local */
2839 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
2840 		sizeof (ldc_mem_cookie_t));
2841 	msg.ncookies = ldcp->local_hparams.num_dcookies;
2842 	msg.num_descriptors = ldcp->local_hparams.num_desc;
2843 	msg.descriptor_size = ldcp->local_hparams.desc_size;
2844 
2845 	/*
2846 	 * dring_ident is set to 0. After mapping the dring, peer sets this
2847 	 * value and sends it in the ack, which is saved in
2848 	 * vgen_handle_dring_reg().
2849 	 */
2850 	msg.dring_ident = 0;
2851 
2852 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
2853 	if (rv != VGEN_SUCCESS) {
2854 		DWARN((vnetp, "vgen_send_dring_reg: vgen_sendmsg failed"
2855 		    "id (%lx)\n", ldcp->ldc_id));
2856 		return (rv);
2857 	}
2858 
2859 	ldcp->hstate |= DRING_INFO_SENT;
2860 	DBG2((vnetp, "vgen_send_dring_reg: DRING_INFO_SENT id (%lx)\n",
2861 	    ldcp->ldc_id));
2862 
2863 	return (VGEN_SUCCESS);
2864 }
2865 
2866 static int
2867 vgen_send_rdx_info(vgen_ldc_t *ldcp)
2868 {
2869 	vio_rdx_msg_t	rdxmsg;
2870 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
2871 	void		*vnetp = LDC_TO_VNET(ldcp);
2872 	int		rv;
2873 
2874 	bzero(&rdxmsg, sizeof (rdxmsg));
2875 
2876 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2877 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2878 	tagp->vio_subtype_env = VIO_RDX;
2879 	tagp->vio_sid = ldcp->local_sid;
2880 
2881 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
2882 	if (rv != VGEN_SUCCESS) {
2883 		DWARN((vnetp, "vgen_send_rdx_info: vgen_sendmsg failed"
2884 		    "id (%lx)\n", ldcp->ldc_id));
2885 		return (rv);
2886 	}
2887 
2888 	ldcp->hstate |= RDX_INFO_SENT;
2889 	DBG2((vnetp, "vgen_send_rdx_info: RDX_INFO_SENT id (%lx)\n",
2890 	    ldcp->ldc_id));
2891 
2892 	return (VGEN_SUCCESS);
2893 }
2894 
2895 /* send descriptor ring data message to the peer over ldc */
2896 static int
2897 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
2898 {
2899 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
2900 	vio_msg_tag_t	*tagp = &msgp->tag;
2901 	void		*vnetp = LDC_TO_VNET(ldcp);
2902 	int		rv;
2903 
2904 	bzero(msgp, sizeof (*msgp));
2905 
2906 	tagp->vio_msgtype = VIO_TYPE_DATA;
2907 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2908 	tagp->vio_subtype_env = VIO_DRING_DATA;
2909 	tagp->vio_sid = ldcp->local_sid;
2910 
2911 	msgp->seq_num = ldcp->next_txseq;
2912 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
2913 	msgp->start_idx = start;
2914 	msgp->end_idx = end;
2915 
2916 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
2917 	if (rv != VGEN_SUCCESS) {
2918 		DWARN((vnetp, "vgen_send_dring_data: vgen_sendmsg failed"
2919 		    " id (%lx)\n", ldcp->ldc_id));
2920 		return (rv);
2921 	}
2922 
2923 	ldcp->next_txseq++;
2924 	ldcp->statsp->dring_data_msgs++;
2925 
2926 	DBG2((vnetp, "vgen_send_dring_data: DRING_DATA_SENT id (%lx)\n",
2927 	    ldcp->ldc_id));
2928 
2929 	return (VGEN_SUCCESS);
2930 }
2931 
2932 /* send multicast addr info message to vsw */
2933 static int
2934 vgen_send_mcast_info(vgen_ldc_t *ldcp)
2935 {
2936 	vnet_mcast_msg_t	mcastmsg;
2937 	vnet_mcast_msg_t	*msgp;
2938 	vio_msg_tag_t		*tagp;
2939 	vgen_t			*vgenp;
2940 	void			*vnetp;
2941 	struct ether_addr	*mca;
2942 	int			rv;
2943 	int			i;
2944 	uint32_t		size;
2945 	uint32_t		mccount;
2946 	uint32_t		n;
2947 
2948 	msgp = &mcastmsg;
2949 	tagp = &msgp->tag;
2950 	vgenp = LDC_TO_VGEN(ldcp);
2951 	vnetp = LDC_TO_VNET(ldcp);
2952 
2953 	mccount = vgenp->mccount;
2954 	i = 0;
2955 
2956 	do {
2957 		tagp->vio_msgtype = VIO_TYPE_CTRL;
2958 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
2959 		tagp->vio_subtype_env = VNET_MCAST_INFO;
2960 		tagp->vio_sid = ldcp->local_sid;
2961 
2962 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
2963 		size = n * sizeof (struct ether_addr);
2964 
2965 		mca = &(vgenp->mctab[i]);
2966 		bcopy(mca, (msgp->mca), size);
2967 		msgp->set = B_TRUE;
2968 		msgp->count = n;
2969 
2970 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
2971 		    B_FALSE);
2972 		if (rv != VGEN_SUCCESS) {
2973 			DWARN((vnetp, "vgen_send_mcast_info: vgen_sendmsg err"
2974 			    "id (%lx)\n", ldcp->ldc_id));
2975 			return (rv);
2976 		}
2977 
2978 		mccount -= n;
2979 		i += n;
2980 
2981 	} while (mccount);
2982 
2983 	return (VGEN_SUCCESS);
2984 }
2985 
2986 /* Initiate Phase 2 of handshake */
2987 static int
2988 vgen_handshake_phase2(vgen_ldc_t *ldcp)
2989 {
2990 	int rv;
2991 	uint32_t ncookies = 0;
2992 	void	*vnetp = LDC_TO_VNET(ldcp);
2993 #ifdef DEBUG
2994 	if (vgen_hdbg & HDBG_OUT_STATE) {
2995 		/* simulate out of state condition */
2996 		vgen_hdbg &= ~(HDBG_OUT_STATE);
2997 		rv = vgen_send_rdx_info(ldcp);
2998 		return (rv);
2999 	}
3000 	if (vgen_hdbg & HDBG_TIMEOUT) {
3001 		/* simulate timeout condition */
3002 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3003 		return (VGEN_SUCCESS);
3004 	}
3005 #endif
3006 	rv = vgen_send_attr_info(ldcp);
3007 	if (rv != VGEN_SUCCESS) {
3008 		return (rv);
3009 	}
3010 
3011 	/* Bind descriptor ring to the channel */
3012 	if (ldcp->num_txdcookies == 0) {
3013 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3014 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3015 		if (rv != 0) {
3016 			DWARN((vnetp, "vgen_handshake_phase2: id (%lx) "
3017 			    "ldc_mem_dring_bind failed rv(%x)\n",
3018 			    ldcp->ldc_id, rv));
3019 			return (rv);
3020 		}
3021 		ASSERT(ncookies == 1);
3022 		ldcp->num_txdcookies = ncookies;
3023 	}
3024 
3025 	/* update local dring_info params */
3026 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3027 		sizeof (ldc_mem_cookie_t));
3028 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3029 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3030 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3031 
3032 	rv = vgen_send_dring_reg(ldcp);
3033 	if (rv != VGEN_SUCCESS) {
3034 		return (rv);
3035 	}
3036 
3037 	return (VGEN_SUCCESS);
3038 }
3039 
3040 /*
3041  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3042  * This can happen after a channel comes up (status: LDC_UP) or
3043  * when handshake gets terminated due to various conditions.
3044  */
3045 static void
3046 vgen_reset_hphase(vgen_ldc_t *ldcp)
3047 {
3048 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3049 	void	*vnetp = LDC_TO_VNET(ldcp);
3050 	ldc_status_t istatus;
3051 	int rv;
3052 
3053 	DBG2((vnetp, "vgen_reset_hphase: id(0x%lx)\n", ldcp->ldc_id));
3054 	/* reset hstate and hphase */
3055 	ldcp->hstate = 0;
3056 	ldcp->hphase = VH_PHASE0;
3057 
3058 	/* reset handshake watchdog timeout */
3059 	if (ldcp->htid) {
3060 		(void) untimeout(ldcp->htid);
3061 		ldcp->htid = 0;
3062 	}
3063 
3064 	if (ldcp->local_hparams.dring_ready) {
3065 		ldcp->local_hparams.dring_ready = B_FALSE;
3066 	}
3067 
3068 	/* Unbind tx descriptor ring from the channel */
3069 	if (ldcp->num_txdcookies) {
3070 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3071 		if (rv != 0) {
3072 			DWARN((vnetp,
3073 			    "vgen_reset_hphase: ldc_mem_dring_unbind "
3074 			    "failed id(%lx)\n", ldcp->ldc_id));
3075 		}
3076 		ldcp->num_txdcookies = 0;
3077 	}
3078 
3079 	if (ldcp->peer_hparams.dring_ready) {
3080 		ldcp->peer_hparams.dring_ready = B_FALSE;
3081 		/* Unmap peer's dring */
3082 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3083 		vgen_clobber_rxds(ldcp);
3084 	}
3085 
3086 	vgen_clobber_tbufs(ldcp);
3087 
3088 	/*
3089 	 * clear local handshake params and initialize.
3090 	 */
3091 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3092 
3093 	/* set version to the highest version supported */
3094 	ldcp->local_hparams.ver_major =
3095 			ldcp->vgen_versions[0].ver_major;
3096 	ldcp->local_hparams.ver_minor =
3097 			ldcp->vgen_versions[0].ver_minor;
3098 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3099 
3100 	/* set attr_info params */
3101 	ldcp->local_hparams.mtu = ETHERMAX;
3102 	ldcp->local_hparams.addr =
3103 		vgen_macaddr_strtoul(vgenp->macaddr);
3104 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3105 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
3106 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3107 
3108 	/*
3109 	 * Note: dring is created, but not bound yet.
3110 	 * local dring_info params will be updated when we bind the dring in
3111 	 * vgen_handshake_phase2().
3112 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3113 	 * value and sends it in the ack, which is saved in
3114 	 * vgen_handle_dring_reg().
3115 	 */
3116 	ldcp->local_hparams.dring_ident = 0;
3117 
3118 	/* clear peer_hparams */
3119 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3120 
3121 	/* reset the channel if required */
3122 	if (ldcp->need_ldc_reset) {
3123 		DWARN((vnetp,
3124 		    "vgen_reset_hphase: id (%lx), Doing Channel Reset...\n",
3125 		    ldcp->ldc_id));
3126 		ldcp->need_ldc_reset = B_FALSE;
3127 		(void) ldc_down(ldcp->ldc_handle);
3128 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3129 		DBG2((vnetp,
3130 		    "vgen_reset_hphase: id (%lx), Reset Done,ldc_status(%x)\n",
3131 		    ldcp->ldc_id, istatus));
3132 		ldcp->ldc_status = istatus;
3133 
3134 		/* clear sids */
3135 		ldcp->local_sid = 0;
3136 		ldcp->peer_sid = 0;
3137 
3138 		/* try to bring the channel up */
3139 		rv = ldc_up(ldcp->ldc_handle);
3140 		if (rv != 0) {
3141 			DWARN((vnetp,
3142 			    "vgen_reset_hphase: ldc_up err id(%lx) rv(%d)\n",
3143 			    ldcp->ldc_id, rv));
3144 		}
3145 
3146 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3147 			DWARN((vnetp,
3148 			    "vgen_reset_hphase: ldc_status err id(%lx)\n"));
3149 		} else {
3150 			ldcp->ldc_status = istatus;
3151 		}
3152 
3153 		/* if channel is already UP - restart handshake */
3154 		if (istatus == LDC_UP) {
3155 			/* Initialize local session id */
3156 			ldcp->local_sid = ddi_get_lbolt();
3157 			vgen_handshake(vh_nextphase(ldcp));
3158 		}
3159 	}
3160 }
3161 
3162 /* wrapper function for vgen_reset_hphase */
3163 static void
3164 vgen_handshake_reset(vgen_ldc_t *ldcp)
3165 {
3166 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3167 	mutex_enter(&ldcp->txlock);
3168 	mutex_enter(&ldcp->tclock);
3169 
3170 	vgen_reset_hphase(ldcp);
3171 
3172 	mutex_exit(&ldcp->tclock);
3173 	mutex_exit(&ldcp->txlock);
3174 }
3175 
3176 /*
3177  * Initiate handshake with the peer by sending various messages
3178  * based on the handshake-phase that the channel is currently in.
3179  */
3180 static void
3181 vgen_handshake(vgen_ldc_t *ldcp)
3182 {
3183 	uint32_t hphase = ldcp->hphase;
3184 	void	*vnetp = LDC_TO_VNET(ldcp);
3185 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3186 	ldc_status_t	istatus;
3187 	int	rv = 0;
3188 
3189 	switch (hphase) {
3190 
3191 	case VH_PHASE1:
3192 
3193 		/*
3194 		 * start timer, for entire handshake process, turn this timer
3195 		 * off if all phases of handshake complete successfully and
3196 		 * hphase goes to VH_DONE(below) or
3197 		 * vgen_reset_hphase() gets called or
3198 		 * channel is reset due to errors or
3199 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3200 		 */
3201 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3202 		    drv_usectohz(vgen_hwd_interval * 1000));
3203 
3204 		/* Phase 1 involves negotiating the version */
3205 		rv = vgen_send_version_negotiate(ldcp);
3206 		break;
3207 
3208 	case VH_PHASE2:
3209 		rv = vgen_handshake_phase2(ldcp);
3210 		break;
3211 
3212 	case VH_PHASE3:
3213 		rv = vgen_send_rdx_info(ldcp);
3214 		break;
3215 
3216 	case VH_DONE:
3217 		/* reset handshake watchdog timeout */
3218 		if (ldcp->htid) {
3219 			(void) untimeout(ldcp->htid);
3220 			ldcp->htid = 0;
3221 		}
3222 		ldcp->hretries = 0;
3223 #if 0
3224 		vgen_print_ldcinfo(ldcp);
3225 #endif
3226 		DBG1((vnetp, "vgen_handshake: id(0x%lx) Handshake Done\n",
3227 		    ldcp->ldc_id));
3228 
3229 		if (ldcp->need_mcast_sync) {
3230 			/* need to sync multicast table with vsw */
3231 
3232 			ldcp->need_mcast_sync = B_FALSE;
3233 			mutex_exit(&ldcp->cblock);
3234 
3235 			mutex_enter(&vgenp->lock);
3236 			rv = vgen_send_mcast_info(ldcp);
3237 			mutex_exit(&vgenp->lock);
3238 
3239 			mutex_enter(&ldcp->cblock);
3240 			if (rv != VGEN_SUCCESS)
3241 				break;
3242 		}
3243 
3244 		/*
3245 		 * Check if mac layer should be notified to restart
3246 		 * transmissions. This can happen if the channel got
3247 		 * reset and vgen_clobber_tbufs() is called, while
3248 		 * need_resched is set.
3249 		 */
3250 		mutex_enter(&ldcp->tclock);
3251 		if (ldcp->need_resched) {
3252 			ldcp->need_resched = B_FALSE;
3253 			vnet_tx_update(vgenp->vnetp);
3254 		}
3255 		mutex_exit(&ldcp->tclock);
3256 
3257 		break;
3258 
3259 	default:
3260 		break;
3261 	}
3262 
3263 	if (rv == ECONNRESET) {
3264 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3265 			DWARN((vnetp,
3266 			    "vgen_handshake: ldc_status err id(%lx)\n"));
3267 		} else {
3268 			ldcp->ldc_status = istatus;
3269 		}
3270 		vgen_handle_evt_reset(ldcp, B_FALSE);
3271 	} else if (rv) {
3272 		vgen_handshake_reset(ldcp);
3273 	}
3274 }
3275 
3276 /*
3277  * Check if the current handshake phase has completed successfully and
3278  * return the status.
3279  */
3280 static int
3281 vgen_handshake_done(vgen_ldc_t *ldcp)
3282 {
3283 	uint32_t	hphase = ldcp->hphase;
3284 	int 		status = 0;
3285 	void		*vnetp = LDC_TO_VNET(ldcp);
3286 
3287 	switch (hphase) {
3288 
3289 	case VH_PHASE1:
3290 		/*
3291 		 * Phase1 is done, if version negotiation
3292 		 * completed successfully.
3293 		 */
3294 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3295 			VER_NEGOTIATED);
3296 		break;
3297 
3298 	case VH_PHASE2:
3299 		/*
3300 		 * Phase 2 is done, if attr info and dring info
3301 		 * have been exchanged successfully.
3302 		 */
3303 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3304 			    ATTR_INFO_EXCHANGED) &&
3305 			    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3306 			    DRING_INFO_EXCHANGED));
3307 		break;
3308 
3309 	case VH_PHASE3:
3310 		/* Phase 3 is done, if rdx msg has been exchanged */
3311 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3312 			RDX_EXCHANGED);
3313 		break;
3314 
3315 	default:
3316 		break;
3317 	}
3318 
3319 	if (status == 0) {
3320 		return (VGEN_FAILURE);
3321 	}
3322 	DBG2((vnetp, "VNET_HANDSHAKE_DONE: PHASE(%d)\n", hphase));
3323 	return (VGEN_SUCCESS);
3324 }
3325 
3326 /* retry handshake on failure */
3327 static void
3328 vgen_handshake_retry(vgen_ldc_t *ldcp)
3329 {
3330 	/* reset handshake phase */
3331 	vgen_handshake_reset(ldcp);
3332 	if (vgen_max_hretries) {	/* handshake retry is specified */
3333 		if (ldcp->hretries++ < vgen_max_hretries)
3334 			vgen_handshake(vh_nextphase(ldcp));
3335 	}
3336 }
3337 
3338 /*
3339  * Handle a version info msg from the peer or an ACK/NACK from the peer
3340  * to a version info msg that we sent.
3341  */
3342 static int
3343 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3344 {
3345 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3346 	int		ack = 0;
3347 	int		failed = 0;
3348 	void		*vnetp = LDC_TO_VNET(ldcp);
3349 	int		idx;
3350 	vgen_ver_t	*versions = ldcp->vgen_versions;
3351 	int		rv = 0;
3352 
3353 	DBG1((vnetp, "vgen_handle_version_negotiate: enter\n"));
3354 	switch (tagp->vio_subtype) {
3355 	case VIO_SUBTYPE_INFO:
3356 
3357 		/*  Cache sid of peer if this is the first time */
3358 		if (ldcp->peer_sid == 0) {
3359 			DBG2((vnetp,
3360 			    "vgen_handle_version_negotiate: id (%lx) Caching"
3361 			    " peer_sid(%x)\n", ldcp->ldc_id, tagp->vio_sid));
3362 			ldcp->peer_sid = tagp->vio_sid;
3363 		}
3364 
3365 		if (ldcp->hphase != VH_PHASE1) {
3366 			/*
3367 			 * If we are not already in VH_PHASE1, reset to
3368 			 * pre-handshake state, and initiate handshake
3369 			 * to the peer too.
3370 			 */
3371 			vgen_handshake_reset(ldcp);
3372 			vgen_handshake(vh_nextphase(ldcp));
3373 		}
3374 		ldcp->hstate |= VER_INFO_RCVD;
3375 
3376 		/* save peer's requested values */
3377 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3378 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3379 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3380 
3381 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3382 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3383 			/* unsupported dev_class, send NACK */
3384 
3385 			DWARN((vnetp,
3386 			    "vgen_handle_version_negotiate: Version"
3387 			    " Negotiation Failed id (%lx)\n", ldcp->ldc_id));
3388 
3389 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3390 			tagp->vio_sid = ldcp->local_sid;
3391 			/* send reply msg back to peer */
3392 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3393 			    sizeof (*vermsg), B_FALSE);
3394 			if (rv != VGEN_SUCCESS) {
3395 				return (rv);
3396 			}
3397 			return (VGEN_FAILURE);
3398 		}
3399 
3400 		DBG2((vnetp, "vgen_handle_version_negotiate: VER_INFO_RCVD,"
3401 		    " id (%lx), ver(%d,%d)\n", ldcp->ldc_id,
3402 		    vermsg->ver_major,  vermsg->ver_minor));
3403 
3404 		idx = 0;
3405 
3406 		for (;;) {
3407 
3408 			if (vermsg->ver_major > versions[idx].ver_major) {
3409 
3410 				/* nack with next lower version */
3411 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3412 				vermsg->ver_major = versions[idx].ver_major;
3413 				vermsg->ver_minor = versions[idx].ver_minor;
3414 				break;
3415 			}
3416 
3417 			if (vermsg->ver_major == versions[idx].ver_major) {
3418 
3419 				/* major version match - ACK version */
3420 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3421 				ack = 1;
3422 
3423 				/*
3424 				 * lower minor version to the one this endpt
3425 				 * supports, if necessary
3426 				 */
3427 				if (vermsg->ver_minor >
3428 				    versions[idx].ver_minor) {
3429 					vermsg->ver_minor =
3430 						versions[idx].ver_minor;
3431 					ldcp->peer_hparams.ver_minor =
3432 						versions[idx].ver_minor;
3433 				}
3434 				break;
3435 			}
3436 
3437 			idx++;
3438 
3439 			if (idx == VGEN_NUM_VER) {
3440 
3441 				/* no version match - send NACK */
3442 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3443 				vermsg->ver_major = 0;
3444 				vermsg->ver_minor = 0;
3445 				failed = 1;
3446 				break;
3447 			}
3448 
3449 		}
3450 
3451 		tagp->vio_sid = ldcp->local_sid;
3452 
3453 		/* send reply msg back to peer */
3454 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3455 		    B_FALSE);
3456 		if (rv != VGEN_SUCCESS) {
3457 			return (rv);
3458 		}
3459 
3460 		if (ack) {
3461 			ldcp->hstate |= VER_ACK_SENT;
3462 			DBG2((vnetp, "vgen_handle_version_negotiate:"
3463 			    " VER_ACK_SENT, id (%lx) ver(%d,%d) \n",
3464 			    ldcp->ldc_id, vermsg->ver_major,
3465 			    vermsg->ver_minor));
3466 		}
3467 		if (failed) {
3468 			DWARN((vnetp, "vgen_handle_version_negotiate:"
3469 			    " Version Negotiation Failed id (%lx)\n",
3470 			    ldcp->ldc_id));
3471 			return (VGEN_FAILURE);
3472 		}
3473 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3474 
3475 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3476 
3477 			/* local and peer versions match? */
3478 			ASSERT((ldcp->local_hparams.ver_major ==
3479 				ldcp->peer_hparams.ver_major) &&
3480 				(ldcp->local_hparams.ver_minor ==
3481 				ldcp->peer_hparams.ver_minor));
3482 
3483 			/* move to the next phase */
3484 			vgen_handshake(vh_nextphase(ldcp));
3485 		}
3486 
3487 		break;
3488 
3489 	case VIO_SUBTYPE_ACK:
3490 
3491 		if (ldcp->hphase != VH_PHASE1) {
3492 			/*  This should not happen. */
3493 			DWARN((vnetp,
3494 			    "vgen_handle_version_negotiate:"
3495 			    " VER_ACK_RCVD id (%lx) Invalid Phase(%u)\n",
3496 			    ldcp->ldc_id, ldcp->hphase));
3497 			return (VGEN_FAILURE);
3498 		}
3499 
3500 		/* SUCCESS - we have agreed on a version */
3501 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3502 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3503 		ldcp->hstate |= VER_ACK_RCVD;
3504 
3505 		DBG2((vnetp, "vgen_handle_version_negotiate:"
3506 		    " VER_ACK_RCVD, id (%lx) ver(%d,%d) \n",
3507 		    ldcp->ldc_id, vermsg->ver_major,  vermsg->ver_minor));
3508 
3509 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3510 
3511 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3512 
3513 			/* local and peer versions match? */
3514 			ASSERT((ldcp->local_hparams.ver_major ==
3515 				ldcp->peer_hparams.ver_major) &&
3516 				(ldcp->local_hparams.ver_minor ==
3517 				ldcp->peer_hparams.ver_minor));
3518 
3519 			/* move to the next phase */
3520 			vgen_handshake(vh_nextphase(ldcp));
3521 		}
3522 		break;
3523 
3524 	case VIO_SUBTYPE_NACK:
3525 
3526 		if (ldcp->hphase != VH_PHASE1) {
3527 			/*  This should not happen.  */
3528 			DWARN((vnetp,
3529 			    "vgen_handle_version_negotiate:"
3530 			    " VER_NACK_RCVD id (%lx) Invalid Phase(%u)\n",
3531 			    ldcp->ldc_id, ldcp->hphase));
3532 			return (VGEN_FAILURE);
3533 		}
3534 
3535 		DBG2((vnetp, "vgen_handle_version_negotiate:"
3536 		    " VER_NACK_RCVD id(%lx) next ver(%d,%d)\n",
3537 		    ldcp->ldc_id, vermsg->ver_major, vermsg->ver_minor));
3538 
3539 		/* check if version in NACK is zero */
3540 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3541 			/*
3542 			 * Version Negotiation has failed.
3543 			 */
3544 			DWARN((vnetp, "vgen_handle_version_negotiate:"
3545 			    " Version Negotiation Failed id (%lx)\n",
3546 			    ldcp->ldc_id));
3547 			return (VGEN_FAILURE);
3548 		}
3549 
3550 		idx = 0;
3551 
3552 		for (;;) {
3553 
3554 			if (vermsg->ver_major > versions[idx].ver_major) {
3555 				/* select next lower version */
3556 
3557 				ldcp->local_hparams.ver_major =
3558 					versions[idx].ver_major;
3559 				ldcp->local_hparams.ver_minor =
3560 					versions[idx].ver_minor;
3561 				break;
3562 			}
3563 
3564 			if (vermsg->ver_major == versions[idx].ver_major) {
3565 				/* major version match */
3566 
3567 				ldcp->local_hparams.ver_major =
3568 					versions[idx].ver_major;
3569 
3570 				ldcp->local_hparams.ver_minor =
3571 					versions[idx].ver_minor;
3572 				break;
3573 			}
3574 
3575 			idx++;
3576 
3577 			if (idx == VGEN_NUM_VER) {
3578 				/*
3579 				 * no version match.
3580 				 * Version Negotiation has failed.
3581 				 */
3582 				DWARN((vnetp, "vgen_handle_version_negotiate:"
3583 				    " Version Negotiation Failed id (%lx)\n",
3584 				    ldcp->ldc_id));
3585 				return (VGEN_FAILURE);
3586 			}
3587 
3588 		}
3589 
3590 		rv = vgen_send_version_negotiate(ldcp);
3591 		if (rv != VGEN_SUCCESS) {
3592 			return (rv);
3593 		}
3594 
3595 		break;
3596 	}
3597 
3598 	DBG1((vnetp, "vgen_handle_version_negotiate: exit\n"));
3599 	return (VGEN_SUCCESS);
3600 }
3601 
3602 /* Check if the attributes are supported */
3603 static int
3604 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
3605 {
3606 	_NOTE(ARGUNUSED(ldcp))
3607 
3608 #if 0
3609 	uint64_t port_macaddr;
3610 	port_macaddr = vgen_macaddr_strtoul((uint8_t *)
3611 				&(ldcp->portp->macaddr));
3612 #endif
3613 	/*
3614 	 * currently, we support these attr values:
3615 	 * mtu of ethernet, addr_type of mac, xfer_mode of
3616 	 * ldc shared memory, ack_freq of 0 (data is acked if
3617 	 * the ack bit is set in the descriptor) and the address should
3618 	 * match the address in the port node.
3619 	 */
3620 	if ((msg->mtu != ETHERMAX) ||
3621 	    (msg->addr_type != ADDR_TYPE_MAC) ||
3622 	    (msg->xfer_mode != VIO_DRING_MODE) ||
3623 	    (msg->ack_freq > 64)) {
3624 #if 0
3625 	    (msg->addr != port_macaddr))
3626 cmn_err(CE_CONT, "vgen_check_attr_info: msg->addr(%lx), port_macaddr(%lx)\n",
3627 	msg->addr, port_macaddr);
3628 #endif
3629 		return (VGEN_FAILURE);
3630 	}
3631 
3632 	return (VGEN_SUCCESS);
3633 }
3634 
3635 /*
3636  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
3637  * to an attr info msg that we sent.
3638  */
3639 static int
3640 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3641 {
3642 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
3643 	void		*vnetp = LDC_TO_VNET(ldcp);
3644 	int		ack = 0;
3645 	int		rv = 0;
3646 
3647 	DBG1((vnetp, "vgen_handle_attr_info: enter\n"));
3648 	if (ldcp->hphase != VH_PHASE2) {
3649 		DWARN((vnetp,
3650 		    "vgen_handle_attr_info: Rcvd ATTR_INFO id(%lx)"
3651 		    " subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3652 		    tagp->vio_subtype, ldcp->hphase));
3653 		return (VGEN_FAILURE);
3654 	}
3655 	switch (tagp->vio_subtype) {
3656 	case VIO_SUBTYPE_INFO:
3657 
3658 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_INFO_RCVD id(%lx)\n",
3659 		    ldcp->ldc_id));
3660 		ldcp->hstate |= ATTR_INFO_RCVD;
3661 
3662 		/* save peer's values */
3663 		ldcp->peer_hparams.mtu = attrmsg->mtu;
3664 		ldcp->peer_hparams.addr = attrmsg->addr;
3665 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
3666 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
3667 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
3668 
3669 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
3670 			/* unsupported attr, send NACK */
3671 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3672 		} else {
3673 			ack = 1;
3674 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3675 		}
3676 		tagp->vio_sid = ldcp->local_sid;
3677 
3678 		/* send reply msg back to peer */
3679 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
3680 		    B_FALSE);
3681 		if (rv != VGEN_SUCCESS) {
3682 			return (rv);
3683 		}
3684 
3685 		if (ack) {
3686 			ldcp->hstate |= ATTR_ACK_SENT;
3687 			DBG2((vnetp, "vgen_handle_attr_info:"
3688 			    " ATTR_ACK_SENT id(%lx)\n", ldcp->ldc_id));
3689 		} else {
3690 			/* failed */
3691 			DWARN((vnetp, "vgen_handle_attr_info:"
3692 			    " ATTR_NACK_SENT id(%lx)\n", ldcp->ldc_id));
3693 			return (VGEN_FAILURE);
3694 		}
3695 
3696 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3697 			vgen_handshake(vh_nextphase(ldcp));
3698 		}
3699 
3700 		break;
3701 
3702 	case VIO_SUBTYPE_ACK:
3703 
3704 		ldcp->hstate |= ATTR_ACK_RCVD;
3705 
3706 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_ACK_RCVD id(%lx)\n",
3707 		    ldcp->ldc_id));
3708 
3709 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3710 			vgen_handshake(vh_nextphase(ldcp));
3711 		}
3712 		break;
3713 
3714 	case VIO_SUBTYPE_NACK:
3715 
3716 		DBG2((vnetp, "vgen_handle_attr_info: ATTR_NACK_RCVD id(%lx)\n",
3717 		    ldcp->ldc_id));
3718 		return (VGEN_FAILURE);
3719 	}
3720 	DBG1((vnetp, "vgen_handle_attr_info: exit\n"));
3721 	return (VGEN_SUCCESS);
3722 }
3723 
3724 /* Check if the dring info msg is ok */
3725 static int
3726 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
3727 {
3728 	/* check if msg contents are ok */
3729 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
3730 	    sizeof (vnet_public_desc_t))) {
3731 		return (VGEN_FAILURE);
3732 	}
3733 	return (VGEN_SUCCESS);
3734 }
3735 
3736 /*
3737  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
3738  * the peer to a dring register msg that we sent.
3739  */
3740 static int
3741 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3742 {
3743 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
3744 	void *vnetp = LDC_TO_VNET(ldcp);
3745 	ldc_mem_cookie_t dcookie;
3746 	int ack = 0;
3747 	int rv = 0;
3748 
3749 	DBG1((vnetp, "vgen_handle_dring_reg: enter\n"));
3750 	if (ldcp->hphase < VH_PHASE2) {
3751 		/* dring_info can be rcvd in any of the phases after Phase1 */
3752 		DWARN((vnetp,
3753 		    "vgen_handle_dring_reg: Rcvd DRING_INFO, id (%lx)"
3754 		    " Subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3755 		    tagp->vio_subtype, ldcp->hphase));
3756 		return (VGEN_FAILURE);
3757 	}
3758 	switch (tagp->vio_subtype) {
3759 	case VIO_SUBTYPE_INFO:
3760 
3761 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_INFO_RCVD id(%lx)\n",
3762 		    ldcp->ldc_id));
3763 		ldcp->hstate |= DRING_INFO_RCVD;
3764 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
3765 
3766 		ASSERT(msg->ncookies == 1);
3767 
3768 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
3769 			/*
3770 			 * verified dring info msg to be ok,
3771 			 * now try to map the remote dring.
3772 			 */
3773 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
3774 			    msg->descriptor_size, &dcookie,
3775 			    msg->ncookies);
3776 			if (rv == DDI_SUCCESS) {
3777 				/* now we can ack the peer */
3778 				ack = 1;
3779 			}
3780 		}
3781 		if (ack == 0) {
3782 			/* failed, send NACK */
3783 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3784 		} else {
3785 			if (!(ldcp->peer_hparams.dring_ready)) {
3786 
3787 				/* save peer's dring_info values */
3788 				bcopy(&dcookie,
3789 				    &(ldcp->peer_hparams.dring_cookie),
3790 				    sizeof (dcookie));
3791 				ldcp->peer_hparams.num_desc =
3792 						msg->num_descriptors;
3793 				ldcp->peer_hparams.desc_size =
3794 						msg->descriptor_size;
3795 				ldcp->peer_hparams.num_dcookies =
3796 						msg->ncookies;
3797 
3798 				/* set dring_ident for the peer */
3799 				ldcp->peer_hparams.dring_ident =
3800 							(uint64_t)ldcp->rxdp;
3801 				/* return the dring_ident in ack msg */
3802 				msg->dring_ident =
3803 							(uint64_t)ldcp->rxdp;
3804 
3805 				ldcp->peer_hparams.dring_ready = B_TRUE;
3806 			}
3807 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3808 		}
3809 		tagp->vio_sid = ldcp->local_sid;
3810 		/* send reply msg back to peer */
3811 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
3812 		    B_FALSE);
3813 		if (rv != VGEN_SUCCESS) {
3814 			return (rv);
3815 		}
3816 
3817 		if (ack) {
3818 			ldcp->hstate |= DRING_ACK_SENT;
3819 			DBG2((vnetp, "vgen_handle_dring_reg: DRING_ACK_SENT"
3820 			    " id (%lx)\n", ldcp->ldc_id));
3821 		} else {
3822 			DWARN((vnetp, "vgen_handle_dring_reg: DRING_NACK_SENT"
3823 			    " id (%lx)\n", ldcp->ldc_id));
3824 			return (VGEN_FAILURE);
3825 		}
3826 
3827 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3828 			vgen_handshake(vh_nextphase(ldcp));
3829 		}
3830 
3831 		break;
3832 
3833 	case VIO_SUBTYPE_ACK:
3834 
3835 		ldcp->hstate |= DRING_ACK_RCVD;
3836 
3837 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_ACK_RCVD"
3838 		    " id (%lx)\n", ldcp->ldc_id));
3839 
3840 		if (!(ldcp->local_hparams.dring_ready)) {
3841 			/* local dring is now ready */
3842 			ldcp->local_hparams.dring_ready = B_TRUE;
3843 
3844 			/* save dring_ident acked by peer */
3845 			ldcp->local_hparams.dring_ident =
3846 				msg->dring_ident;
3847 		}
3848 
3849 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3850 			vgen_handshake(vh_nextphase(ldcp));
3851 		}
3852 
3853 		break;
3854 
3855 	case VIO_SUBTYPE_NACK:
3856 
3857 		DBG2((vnetp, "vgen_handle_dring_reg: DRING_NACK_RCVD"
3858 		    " id (%lx)\n", ldcp->ldc_id));
3859 		return (VGEN_FAILURE);
3860 	}
3861 	DBG1((vnetp, "vgen_handle_dring_reg: exit\n"));
3862 	return (VGEN_SUCCESS);
3863 }
3864 
3865 /*
3866  * Handle a rdx info msg from the peer or an ACK/NACK
3867  * from the peer to a rdx info msg that we sent.
3868  */
3869 static int
3870 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3871 {
3872 	void *vnetp = LDC_TO_VNET(ldcp);
3873 	int rv = 0;
3874 
3875 	DBG1((vnetp, "vgen_handle_rdx_info: enter\n"));
3876 	if (ldcp->hphase != VH_PHASE3) {
3877 		DWARN((vnetp,
3878 		    "vgen_handle_rdx_info: Rcvd RDX_INFO, id (%lx)"
3879 		    "  Subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
3880 		    tagp->vio_subtype, ldcp->hphase));
3881 		return (VGEN_FAILURE);
3882 	}
3883 	switch (tagp->vio_subtype) {
3884 	case VIO_SUBTYPE_INFO:
3885 
3886 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_INFO_RCVD id (%lx)\n",
3887 		    ldcp->ldc_id));
3888 		ldcp->hstate |= RDX_INFO_RCVD;
3889 
3890 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
3891 		tagp->vio_sid = ldcp->local_sid;
3892 		/* send reply msg back to peer */
3893 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
3894 		    B_FALSE);
3895 		if (rv != VGEN_SUCCESS) {
3896 			return (rv);
3897 		}
3898 
3899 		ldcp->hstate |= RDX_ACK_SENT;
3900 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_ACK_SENT id (%lx)\n",
3901 		    ldcp->ldc_id));
3902 
3903 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3904 			vgen_handshake(vh_nextphase(ldcp));
3905 		}
3906 
3907 		break;
3908 
3909 	case VIO_SUBTYPE_ACK:
3910 
3911 		ldcp->hstate |= RDX_ACK_RCVD;
3912 
3913 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_ACK_RCVD id (%lx)\n",
3914 		    ldcp->ldc_id));
3915 
3916 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3917 			vgen_handshake(vh_nextphase(ldcp));
3918 		}
3919 		break;
3920 
3921 	case VIO_SUBTYPE_NACK:
3922 
3923 		DBG2((vnetp, "vgen_handle_rdx_info: RDX_NACK_RCVD id (%lx)\n",
3924 		    ldcp->ldc_id));
3925 		return (VGEN_FAILURE);
3926 	}
3927 	DBG1((vnetp, "vgen_handle_rdx_info: exit\n"));
3928 	return (VGEN_SUCCESS);
3929 }
3930 
3931 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
3932 static int
3933 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3934 {
3935 	void *vnetp = LDC_TO_VNET(ldcp);
3936 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3937 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
3938 	struct ether_addr *addrp;
3939 	int count;
3940 	int i;
3941 
3942 	DBG1((vnetp, "vgen_handle_mcast_info: enter\n"));
3943 	switch (tagp->vio_subtype) {
3944 
3945 	case VIO_SUBTYPE_INFO:
3946 
3947 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
3948 		DWARN((vnetp,
3949 		    "vgen_handle_mcast_info: rcvd SET_MCAST_INFO id (%lx)\n",
3950 		    ldcp->ldc_id));
3951 		break;
3952 
3953 	case VIO_SUBTYPE_ACK:
3954 
3955 		/* success adding/removing multicast addr */
3956 		DBG2((vnetp,
3957 		    "vgen_handle_mcast_info: rcvd SET_MCAST_ACK id (%lx)\n",
3958 		    ldcp->ldc_id));
3959 		break;
3960 
3961 	case VIO_SUBTYPE_NACK:
3962 
3963 		DWARN((vnetp,
3964 		    "vgen_handle_mcast_info: rcvd SET_MCAST_NACK id (%lx)\n",
3965 		    ldcp->ldc_id));
3966 		if (!(msgp->set)) {
3967 			/* multicast remove request failed */
3968 			break;
3969 		}
3970 
3971 		/* multicast add request failed */
3972 		for (count = 0; count < msgp->count; count++) {
3973 			addrp = &(msgp->mca[count]);
3974 
3975 			/* delete address from the table */
3976 			for (i = 0; i < vgenp->mccount; i++) {
3977 				if (ether_cmp(addrp,
3978 				    &(vgenp->mctab[i])) == 0) {
3979 					if (vgenp->mccount > 1) {
3980 						vgenp->mctab[i] =
3981 						vgenp->mctab[vgenp->mccount-1];
3982 					}
3983 					vgenp->mccount--;
3984 					break;
3985 				}
3986 			}
3987 		}
3988 		break;
3989 
3990 	}
3991 	DBG1((vnetp, "vgen_handle_mcast_info: exit\n"));
3992 
3993 	return (VGEN_SUCCESS);
3994 }
3995 
3996 /* handler for control messages received from the peer ldc end-point */
3997 static int
3998 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3999 {
4000 	void *vnetp = LDC_TO_VNET(ldcp);
4001 	int rv = 0;
4002 
4003 	DBG1((vnetp, "vgen_handle_ctrlmsg: enter\n"));
4004 	switch (tagp->vio_subtype_env) {
4005 
4006 	case VIO_VER_INFO:
4007 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4008 		break;
4009 
4010 	case VIO_ATTR_INFO:
4011 		rv = vgen_handle_attr_info(ldcp, tagp);
4012 		break;
4013 
4014 	case VIO_DRING_REG:
4015 		rv = vgen_handle_dring_reg(ldcp, tagp);
4016 		break;
4017 
4018 	case VIO_RDX:
4019 		rv = vgen_handle_rdx_info(ldcp, tagp);
4020 		break;
4021 
4022 	case VNET_MCAST_INFO:
4023 		rv = vgen_handle_mcast_info(ldcp, tagp);
4024 		break;
4025 
4026 	}
4027 
4028 	DBG1((vnetp, "vgen_handle_ctrlmsg: exit\n"));
4029 	return (rv);
4030 }
4031 
4032 /* handler for data messages received from the peer ldc end-point */
4033 static int
4034 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
4035 	mblk_t **headp, mblk_t **tailp)
4036 {
4037 	void *vnetp = LDC_TO_VNET(ldcp);
4038 	int rv = 0;
4039 
4040 	DBG1((vnetp, "vgen_handle_datamsg: enter\n"));
4041 
4042 	if (ldcp->hphase != VH_DONE)
4043 		return (rv);
4044 	switch (tagp->vio_subtype_env) {
4045 	case VIO_DRING_DATA:
4046 		rv = vgen_handle_dring_data(ldcp, tagp, headp, tailp);
4047 		break;
4048 	default:
4049 		break;
4050 	}
4051 
4052 	DBG1((vnetp, "vgen_handle_datamsg: exit\n"));
4053 	return (rv);
4054 }
4055 
4056 static int
4057 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4058     int32_t end, uint8_t pstate)
4059 {
4060 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4061 	void *vnetp = LDC_TO_VNET(ldcp);
4062 	int rv = 0;
4063 
4064 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4065 	tagp->vio_sid = ldcp->local_sid;
4066 	msgp->start_idx = start;
4067 	msgp->end_idx = end;
4068 	msgp->dring_process_state = pstate;
4069 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4070 	if (rv != VGEN_SUCCESS) {
4071 		DWARN((vnetp, "vgen_send_dring_ack: id(%lx) vgen_sendmsg "
4072 		    "failed\n", (ldcp)->ldc_id));
4073 	}
4074 	return (rv);
4075 }
4076 
4077 static int
4078 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
4079 	mblk_t **headp, mblk_t **tailp)
4080 {
4081 	vio_dring_msg_t *dringmsg;
4082 	vnet_public_desc_t *rxdp;
4083 	vnet_public_desc_t *txdp;
4084 	vio_dring_entry_hdr_t *hdrp;
4085 	vgen_stats_t *statsp;
4086 	struct ether_header *ehp;
4087 	mblk_t *mp = NULL;
4088 	mblk_t *bp = NULL;
4089 	mblk_t *bpt = NULL;
4090 	size_t nbytes;
4091 	size_t nread;
4092 	uint64_t off = 0;
4093 	uint32_t start;
4094 	int32_t end;
4095 	uint32_t datalen;
4096 	uint32_t ncookies;
4097 	uint32_t ack_start;
4098 	uint32_t ack_end;
4099 	uint32_t rxi;
4100 	uint32_t txi;
4101 	int rv = 0;
4102 	boolean_t rxd_err = B_FALSE;
4103 	boolean_t set_ack_start = B_FALSE;
4104 	vgen_private_desc_t *tbufp;
4105 	uint32_t next_rxi;
4106 	boolean_t ready_txd = B_FALSE;
4107 	uint32_t retries = 0;
4108 #ifdef VGEN_HANDLE_LOST_PKTS
4109 	int n;
4110 #endif
4111 #ifdef VGEN_REXMIT
4112 	uint64_t seqnum;
4113 #endif
4114 	void *vnetp = LDC_TO_VNET(ldcp);
4115 	boolean_t ack_needed = B_FALSE;
4116 
4117 	dringmsg = (vio_dring_msg_t *)tagp;
4118 	start = dringmsg->start_idx;
4119 	end = dringmsg->end_idx;
4120 	statsp = ldcp->statsp;
4121 
4122 	DBG1((vnetp, "vgen_handle_dring_data: enter\n"));
4123 	switch (tagp->vio_subtype) {
4124 
4125 	case VIO_SUBTYPE_INFO:
4126 		/*
4127 		 * received a data msg, which contains the start and end
4128 		 * indeces of the descriptors within the rx ring holding data,
4129 		 * the seq_num of data packet corresponding to the start index,
4130 		 * and the dring_ident.
4131 		 * We can now read the contents of each of these descriptors
4132 		 * and gather data from it.
4133 		 */
4134 		DBG2((vnetp,
4135 		    "vgen_handle_dring_data: INFO: start(%d), end(%d)\n",
4136 		    start, end));
4137 
4138 		/* validate rx start and end indeces */
4139 		if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4140 		    !(CHECK_RXI(end, ldcp)))) {
4141 			/* drop the message if invalid index */
4142 			break;
4143 		}
4144 
4145 		/* validate dring_ident */
4146 		if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4147 			/* invalid dring_ident, drop the msg */
4148 			break;
4149 		}
4150 #ifdef DEBUG
4151 		if (vgen_trigger_rxlost) {
4152 			/* drop this msg to simulate lost pkts for debugging */
4153 			vgen_trigger_rxlost = 0;
4154 			break;
4155 		}
4156 #endif
4157 
4158 #ifdef	VGEN_HANDLE_LOST_PKTS
4159 
4160 		/* receive start index doesn't match expected index */
4161 		if (ldcp->next_rxi != start) {
4162 
4163 			DWARN((vnetp, "vgen_handle_dring_data: id(%lx) "
4164 			    "next_rxi(%d) != start(%d)\n",
4165 			    ldcp->ldc_id, ldcp->next_rxi, start));
4166 
4167 			/* calculate the number of pkts lost */
4168 			if (start >= ldcp->next_rxi) {
4169 				n = start - ldcp->next_rxi;
4170 			} else  {
4171 				n = ldcp->num_rxds - (ldcp->next_rxi - start);
4172 			}
4173 
4174 			/*
4175 			 * sequence number of dring data message
4176 			 * is less than the next sequence number that
4177 			 * is expected:
4178 			 *
4179 			 * drop the message and the corresponding packets.
4180 			 */
4181 			if (ldcp->next_rxseq > dringmsg->seq_num) {
4182 				DWARN((vnetp, "vgen_handle_dring_data: id(%lx) "
4183 				    "dropping pkts, expected rxseq(0x%lx) "
4184 				    "> recvd(0x%lx)\n",
4185 				    ldcp->ldc_id, ldcp->next_rxseq,
4186 				    dringmsg->seq_num));
4187 				/*
4188 				 * duplicate/multiple retransmissions from
4189 				 * sender?? drop this msg.
4190 				 */
4191 				break;
4192 			}
4193 
4194 			/*
4195 			 * sequence number of dring data message
4196 			 * is greater than the next expected sequence number
4197 			 *
4198 			 * send a NACK back to the peer to indicate lost
4199 			 * packets.
4200 			 */
4201 			if (dringmsg->seq_num > ldcp->next_rxseq) {
4202 				statsp->rx_lost_pkts += n;
4203 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4204 				tagp->vio_sid = ldcp->local_sid;
4205 				/* indicate the range of lost descriptors */
4206 				dringmsg->start_idx = ldcp->next_rxi;
4207 				rxi = start;
4208 				DECR_RXI(rxi, ldcp);
4209 				dringmsg->end_idx = rxi;
4210 				/* dring ident is left unchanged */
4211 				rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4212 				    sizeof (*dringmsg), B_FALSE);
4213 				if (rv != VGEN_SUCCESS) {
4214 					DWARN((vnetp,
4215 					    "vgen_handle_dring_data: id(%lx) "
4216 					    "vgen_sendmsg failed, "
4217 					    "stype: NACK\n", ldcp->ldc_id));
4218 					goto error_ret;
4219 				}
4220 #ifdef VGEN_REXMIT
4221 				/*
4222 				 * stop further processing until peer
4223 				 * retransmits with the right index.
4224 				 * update next_rxseq expected.
4225 				 */
4226 				ldcp->next_rxseq += 1;
4227 				break;
4228 #else	/* VGEN_REXMIT */
4229 				/*
4230 				 * treat this range of descrs/pkts as dropped
4231 				 * and set the new expected values for next_rxi
4232 				 * and next_rxseq. continue(below) to process
4233 				 * from the new start index.
4234 				 */
4235 				ldcp->next_rxi = start;
4236 				ldcp->next_rxseq += 1;
4237 #endif	/* VGEN_REXMIT */
4238 
4239 			} else if (dringmsg->seq_num == ldcp->next_rxseq) {
4240 				/*
4241 				 * expected and received seqnums match, but
4242 				 * the descriptor indeces don't?
4243 				 *
4244 				 * restart handshake with peer.
4245 				 */
4246 				DWARN((vnetp,
4247 				    "vgen_handle_dring_data: id(%lx) "
4248 				    "next_rxseq(0x%lx) == seq_num(0x%lx)\n",
4249 				    ldcp->ldc_id, ldcp->next_rxseq,
4250 				    dringmsg->seq_num));
4251 
4252 			}
4253 
4254 		} else {
4255 			/* expected and start dring indeces match */
4256 
4257 			if (dringmsg->seq_num != ldcp->next_rxseq) {
4258 
4259 				/* seqnums don't match */
4260 
4261 				DWARN((vnetp,
4262 				    "vgen_handle_dring_data: id(%lx) "
4263 				    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
4264 				    ldcp->ldc_id, ldcp->next_rxseq,
4265 				    dringmsg->seq_num));
4266 			}
4267 		}
4268 
4269 #endif	/* VGEN_HANDLE_LOST_PKTS */
4270 
4271 		/*
4272 		 * start processing the descriptors from the specified
4273 		 * start index, up to the index a descriptor is not ready
4274 		 * to be processed or we process the entire descriptor ring
4275 		 * and wrap around upto the start index.
4276 		 */
4277 
4278 		/* need to set the start index of descriptors to be ack'd */
4279 		set_ack_start = B_TRUE;
4280 
4281 		/* index upto which we have ack'd */
4282 		ack_end = start;
4283 		DECR_RXI(ack_end, ldcp);
4284 
4285 		next_rxi = rxi =  start;
4286 		do {
4287 
4288 vgen_recv_retry:	rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4289 			if (rv != 0) {
4290 				DWARN((vnetp, "vgen_handle_dring_data: "
4291 				    "ldc_mem_dring_acquire() failed"
4292 				    " id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4293 				statsp->ierrors++;
4294 				goto error_ret;
4295 			}
4296 
4297 			rxdp = &(ldcp->rxdp[rxi]);
4298 			hdrp = &rxdp->hdr;
4299 
4300 			if (hdrp->dstate != VIO_DESC_READY) {
4301 				/*
4302 				 * descriptor is not ready.
4303 				 * retry descriptor acquire, stop processing
4304 				 * after max # retries.
4305 				 */
4306 				if (retries == vgen_recv_retries)
4307 					break;
4308 				retries++;
4309 				drv_usecwait(vgen_recv_delay);
4310 				goto vgen_recv_retry;
4311 			}
4312 			retries = 0;
4313 
4314 			if (set_ack_start) {
4315 				/*
4316 				 * initialize the start index of the range
4317 				 * of descriptors to be ack'd.
4318 				 */
4319 				ack_start = rxi;
4320 				set_ack_start = B_FALSE;
4321 			}
4322 
4323 			datalen = rxdp->nbytes;
4324 			ncookies = rxdp->ncookies;
4325 			if ((datalen < ETHERMIN) ||
4326 			    (ncookies == 0) ||
4327 			    (ncookies > MAX_COOKIES)) {
4328 				rxd_err = B_TRUE;
4329 			} else {
4330 				/*
4331 				 * Try to allocate an mblk from the free pool
4332 				 * of recv mblks for the channel.
4333 				 * If this fails, use allocb().
4334 				 */
4335 				mp = vio_allocb(ldcp->rmp);
4336 				if (!mp) {
4337 					/*
4338 					 * The data buffer returned by
4339 					 * allocb(9F) is 8byte aligned. We
4340 					 * allocate extra 8 bytes to ensure
4341 					 * size is multiple of 8 bytes for
4342 					 * ldc_mem_copy().
4343 					 */
4344 					statsp->rx_vio_allocb_fail++;
4345 					mp = allocb(VNET_IPALIGN + datalen + 8,
4346 					    BPRI_MED);
4347 				}
4348 				nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4349 			}
4350 			if ((rxd_err) || (mp == NULL)) {
4351 				/*
4352 				 * rxd_err or allocb() failure,
4353 				 * drop this packet, get next.
4354 				 */
4355 				if (rxd_err) {
4356 					statsp->ierrors++;
4357 					rxd_err = B_FALSE;
4358 				} else {
4359 					statsp->rx_allocb_fail++;
4360 				}
4361 
4362 				ack_needed = hdrp->ack;
4363 
4364 				/* set descriptor done bit */
4365 				hdrp->dstate = VIO_DESC_DONE;
4366 
4367 				rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4368 				    rxi, rxi);
4369 				if (rv != 0) {
4370 					DWARN((vnetp, "vgen_handle_dring_data: "
4371 					    "ldc_mem_dring_release err id(%lx)"
4372 					    " rv(%d)\n", ldcp->ldc_id, rv));
4373 					goto error_ret;
4374 				}
4375 
4376 				if (ack_needed) {
4377 					ack_needed = B_FALSE;
4378 					/*
4379 					 * sender needs ack for this packet,
4380 					 * ack pkts upto this index.
4381 					 */
4382 					ack_end = rxi;
4383 
4384 					rv = vgen_send_dring_ack(ldcp, tagp,
4385 					    ack_start, ack_end,
4386 					    VIO_DP_ACTIVE);
4387 					if (rv != VGEN_SUCCESS) {
4388 						goto error_ret;
4389 					}
4390 
4391 					/* need to set new ack start index */
4392 					set_ack_start = B_TRUE;
4393 				}
4394 				goto vgen_next_rxi;
4395 			}
4396 
4397 			nread = nbytes;
4398 			rv = ldc_mem_copy(ldcp->ldc_handle,
4399 			    (caddr_t)mp->b_rptr, off, &nread,
4400 			    rxdp->memcookie, ncookies, LDC_COPY_IN);
4401 
4402 			/* if ldc_mem_copy() failed */
4403 			if (rv) {
4404 				DWARN((vnetp,
4405 				    "vgen_handle_dring_data: ldc_mem_copy err "
4406 				    " id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4407 				statsp->ierrors++;
4408 				freemsg(mp);
4409 				goto error_ret;
4410 			}
4411 
4412 			ack_needed = hdrp->ack;
4413 			hdrp->dstate = VIO_DESC_DONE;
4414 
4415 			rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4416 			if (rv != 0) {
4417 				DWARN((vnetp, "vgen_handle_dring_data: "
4418 				    "ldc_mem_dring_release err id(%lx)"
4419 				    " rv(%d)\n", ldcp->ldc_id, rv));
4420 				goto error_ret;
4421 			}
4422 
4423 			mp->b_rptr += VNET_IPALIGN;
4424 
4425 			if (ack_needed) {
4426 				ack_needed = B_FALSE;
4427 				/*
4428 				 * sender needs ack for this packet,
4429 				 * ack pkts upto this index.
4430 				 */
4431 				ack_end = rxi;
4432 
4433 				rv = vgen_send_dring_ack(ldcp, tagp,
4434 				    ack_start, ack_end, VIO_DP_ACTIVE);
4435 				if (rv != VGEN_SUCCESS) {
4436 					goto error_ret;
4437 				}
4438 
4439 				/* need to set new ack start index */
4440 				set_ack_start = B_TRUE;
4441 			}
4442 
4443 			if (nread != nbytes) {
4444 				DWARN((vnetp,
4445 				    "vgen_handle_dring_data: id(%lx) "
4446 				    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4447 				    ldcp->ldc_id, nread, nbytes));
4448 				statsp->ierrors++;
4449 				freemsg(mp);
4450 				goto vgen_next_rxi;
4451 			}
4452 
4453 			/* point to the actual end of data */
4454 			mp->b_wptr = mp->b_rptr + datalen;
4455 
4456 			/* update stats */
4457 			statsp->ipackets++;
4458 			statsp->rbytes += datalen;
4459 			ehp = (struct ether_header *)mp->b_rptr;
4460 			if (IS_BROADCAST(ehp))
4461 				statsp->brdcstrcv++;
4462 			else if (IS_MULTICAST(ehp))
4463 				statsp->multircv++;
4464 
4465 			/* build a chain of received packets */
4466 			if (bp == NULL) {
4467 				/* first pkt */
4468 				bp = mp;
4469 				bpt = bp;
4470 				bpt->b_next = NULL;
4471 			} else {
4472 				mp->b_next = NULL;
4473 				bpt->b_next = mp;
4474 				bpt = mp;
4475 			}
4476 
4477 
4478 vgen_next_rxi:
4479 			/* update end index of range of descrs to be ack'd */
4480 			ack_end = rxi;
4481 
4482 			/* update the next index to be processed */
4483 			INCR_RXI(next_rxi, ldcp);
4484 			if (next_rxi == start) {
4485 				/*
4486 				 * processed the entire descriptor ring upto
4487 				 * the index at which we started.
4488 				 */
4489 				break;
4490 			}
4491 
4492 			rxi = next_rxi;
4493 
4494 		_NOTE(CONSTCOND)
4495 		} while (1);
4496 
4497 		/*
4498 		 * send an ack message to peer indicating that we have stopped
4499 		 * processing descriptors.
4500 		 */
4501 		if (set_ack_start) {
4502 			/*
4503 			 * We have ack'd upto some index and we have not
4504 			 * processed any descriptors beyond that index.
4505 			 * Use the last ack'd index as both the start and
4506 			 * end of range of descrs being ack'd.
4507 			 * Note: This results in acking the last index twice
4508 			 * and should be harmless.
4509 			 */
4510 			ack_start = ack_end;
4511 		}
4512 
4513 		rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4514 		    VIO_DP_STOPPED);
4515 		if (rv != VGEN_SUCCESS) {
4516 			goto error_ret;
4517 		}
4518 
4519 		/* save new recv index and expected seqnum of next dring msg */
4520 		ldcp->next_rxi = next_rxi;
4521 		ldcp->next_rxseq += 1;
4522 
4523 		break;
4524 
4525 	case VIO_SUBTYPE_ACK:
4526 		/*
4527 		 * received an ack corresponding to a specific descriptor for
4528 		 * which we had set the ACK bit in the descriptor (during
4529 		 * transmit). This enables us to reclaim descriptors.
4530 		 */
4531 
4532 		DBG2((vnetp,
4533 		    "vgen_handle_dring_data: ACK:  start(%d), end(%d)\n",
4534 		    start, end));
4535 
4536 		/* validate start and end indeces in the tx ack msg */
4537 		if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4538 			/* drop the message if invalid index */
4539 			break;
4540 		}
4541 		/* validate dring_ident */
4542 		if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4543 			/* invalid dring_ident, drop the msg */
4544 			break;
4545 		}
4546 		statsp->dring_data_acks++;
4547 
4548 		/* reclaim descriptors that are done */
4549 		vgen_reclaim(ldcp);
4550 
4551 		if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
4552 			/*
4553 			 * receiver continued processing descriptors after
4554 			 * sending us the ack.
4555 			 */
4556 			break;
4557 		}
4558 
4559 		statsp->dring_stopped_acks++;
4560 
4561 		/* receiver stopped processing descriptors */
4562 		mutex_enter(&ldcp->txlock);
4563 		mutex_enter(&ldcp->tclock);
4564 
4565 		/*
4566 		 * determine if there are any pending tx descriptors
4567 		 * ready to be processed by the receiver(peer) and if so,
4568 		 * send a message to the peer to restart receiving.
4569 		 */
4570 		ready_txd = B_FALSE;
4571 
4572 		/*
4573 		 * using the end index of the descriptor range for which
4574 		 * we received the ack, check if the next descriptor is
4575 		 * ready.
4576 		 */
4577 		txi = end;
4578 		INCR_TXI(txi, ldcp);
4579 		tbufp = &ldcp->tbufp[txi];
4580 		txdp = tbufp->descp;
4581 		hdrp = &txdp->hdr;
4582 		if (hdrp->dstate == VIO_DESC_READY) {
4583 			ready_txd = B_TRUE;
4584 		} else {
4585 			/*
4586 			 * descr next to the end of ack'd descr range is not
4587 			 * ready.
4588 			 * starting from the current reclaim index, check
4589 			 * if any descriptor is ready.
4590 			 */
4591 
4592 			txi = ldcp->cur_tbufp - ldcp->tbufp;
4593 			tbufp = &ldcp->tbufp[txi];
4594 
4595 			while (tbufp != ldcp->next_tbufp) {
4596 
4597 				txdp = tbufp->descp;
4598 				hdrp = &txdp->hdr;
4599 				if (hdrp->dstate == VIO_DESC_READY) {
4600 					break;
4601 				}
4602 
4603 				INCR_TXI(txi, ldcp);
4604 				tbufp = &ldcp->tbufp[txi];
4605 
4606 			}
4607 
4608 			if (tbufp != ldcp->next_tbufp)
4609 				ready_txd = B_TRUE;
4610 		}
4611 
4612 		if (ready_txd) {
4613 			/*
4614 			 * we have tx descriptor(s) ready to be
4615 			 * processed by the receiver.
4616 			 * send a message to the peer with the start index
4617 			 * of ready descriptors.
4618 			 */
4619 			rv = vgen_send_dring_data(ldcp, txi, -1);
4620 			if (rv != VGEN_SUCCESS) {
4621 				ldcp->resched_peer = B_TRUE;
4622 				mutex_exit(&ldcp->tclock);
4623 				mutex_exit(&ldcp->txlock);
4624 				goto error_ret;
4625 			}
4626 		} else {
4627 			/*
4628 			 * no ready tx descriptors. set the flag to send a
4629 			 * message to peer when tx descriptors are ready in
4630 			 * transmit routine.
4631 			 */
4632 			ldcp->resched_peer = B_TRUE;
4633 		}
4634 
4635 		mutex_exit(&ldcp->tclock);
4636 		mutex_exit(&ldcp->txlock);
4637 
4638 		break;
4639 
4640 	case VIO_SUBTYPE_NACK:
4641 		/*
4642 		 * peer sent a NACK msg to indicate lost packets.
4643 		 * The start and end correspond to the range of descriptors
4644 		 * for which the peer didn't receive a dring data msg and so
4645 		 * didn't receive the corresponding data.
4646 		 */
4647 		DWARN((vnetp,
4648 		    "vgen_handle_dring_data: NACK:  start(%d), end(%d)\n",
4649 		    start, end));
4650 
4651 		/* validate start and end indeces in the tx nack msg */
4652 		if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4653 			/* drop the message if invalid index */
4654 			break;
4655 		}
4656 		/* validate dring_ident */
4657 		if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4658 			/* invalid dring_ident, drop the msg */
4659 			break;
4660 		}
4661 		mutex_enter(&ldcp->txlock);
4662 		mutex_enter(&ldcp->tclock);
4663 
4664 		if (ldcp->next_tbufp == ldcp->cur_tbufp) {
4665 			/* no busy descriptors, bogus nack ? */
4666 			mutex_exit(&ldcp->tclock);
4667 			mutex_exit(&ldcp->txlock);
4668 			break;
4669 		}
4670 
4671 #ifdef VGEN_REXMIT
4672 		/* send a new dring data msg including the lost descrs */
4673 		end = ldcp->next_tbufp - ldcp->tbufp;
4674 		DECR_TXI(end, ldcp);
4675 		rv = vgen_send_dring_data(ldcp, start, end);
4676 		if (rv != 0) {
4677 			/*
4678 			 * vgen_send_dring_data() error: drop all packets
4679 			 * in this descr range
4680 			 */
4681 			DWARN((vnetp,
4682 			    "vgen_handle_dring_data: "
4683 			    "vgen_send_dring_data failed :"
4684 			    "id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
4685 			for (txi = start; txi <= end; ) {
4686 				tbufp = &(ldcp->tbufp[txi]);
4687 				txdp = tbufp->descp;
4688 				hdrp = &txdp->hdr;
4689 				tbufp->flags = VGEN_PRIV_DESC_FREE;
4690 				hdrp->dstate = VIO_DESC_FREE;
4691 				hdrp->ack = B_FALSE;
4692 				statsp->oerrors++;
4693 			}
4694 
4695 			/* update next pointer */
4696 			ldcp->next_tbufp = &(ldcp->tbufp[start]);
4697 			ldcp->next_txi = start;
4698 		}
4699 		DBG2((vnetp,
4700 		    "vgen_handle_dring_data: rexmit: start(%d) end(%d)\n",
4701 		    start, end));
4702 #else	/* VGEN_REXMIT */
4703 		/* we just mark the descrs as done so they can be reclaimed */
4704 		for (txi = start; txi <= end; ) {
4705 			txdp = &(ldcp->txdp[txi]);
4706 			hdrp = &txdp->hdr;
4707 			if (hdrp->dstate == VIO_DESC_READY)
4708 				hdrp->dstate = VIO_DESC_DONE;
4709 			INCR_TXI(txi, ldcp);
4710 		}
4711 #endif	/* VGEN_REXMIT */
4712 		mutex_exit(&ldcp->tclock);
4713 		mutex_exit(&ldcp->txlock);
4714 
4715 		break;
4716 	}
4717 
4718 error_ret:
4719 
4720 	DBG1((vnetp, "vgen_handle_dring_data: exit\n"));
4721 	*headp = bp;
4722 	*tailp = bpt;
4723 
4724 	return (rv);
4725 }
4726 
4727 static void
4728 vgen_reclaim(vgen_ldc_t *ldcp)
4729 {
4730 	mutex_enter(&ldcp->tclock);
4731 
4732 	vgen_reclaim_dring(ldcp);
4733 	ldcp->reclaim_lbolt = ddi_get_lbolt();
4734 
4735 	mutex_exit(&ldcp->tclock);
4736 }
4737 
4738 /*
4739  * transmit reclaim function. starting from the current reclaim index
4740  * look for descriptors marked DONE and reclaim the descriptor and the
4741  * corresponding buffers (tbuf).
4742  */
4743 static void
4744 vgen_reclaim_dring(vgen_ldc_t *ldcp)
4745 {
4746 	vnet_public_desc_t *txdp;
4747 	vgen_private_desc_t *tbufp;
4748 	vio_dring_entry_hdr_t	*hdrp;
4749 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4750 
4751 #ifdef DEBUG
4752 	if (vgen_trigger_txtimeout)
4753 		return;
4754 #endif
4755 
4756 	tbufp = ldcp->cur_tbufp;
4757 	txdp = tbufp->descp;
4758 	hdrp = &txdp->hdr;
4759 
4760 	while ((hdrp->dstate == VIO_DESC_DONE) &&
4761 	    (tbufp != ldcp->next_tbufp)) {
4762 		tbufp->flags = VGEN_PRIV_DESC_FREE;
4763 		hdrp->dstate = VIO_DESC_FREE;
4764 		hdrp->ack = B_FALSE;
4765 
4766 		tbufp = NEXTTBUF(ldcp, tbufp);
4767 		txdp = tbufp->descp;
4768 		hdrp = &txdp->hdr;
4769 	}
4770 
4771 	ldcp->cur_tbufp = tbufp;
4772 
4773 	/*
4774 	 * Check if mac layer should be notified to restart transmissions
4775 	 */
4776 	if (ldcp->need_resched) {
4777 		ldcp->need_resched = B_FALSE;
4778 		vnet_tx_update(vgenp->vnetp);
4779 	}
4780 }
4781 
4782 /* return the number of pending transmits for the channel */
4783 static int
4784 vgen_num_txpending(vgen_ldc_t *ldcp)
4785 {
4786 	int n;
4787 
4788 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
4789 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
4790 	} else  {
4791 		/* cur_tbufp > next_tbufp */
4792 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
4793 	}
4794 
4795 	return (n);
4796 }
4797 
4798 /* determine if the transmit descriptor ring is full */
4799 static int
4800 vgen_tx_dring_full(vgen_ldc_t *ldcp)
4801 {
4802 	vgen_private_desc_t	*tbufp;
4803 	vgen_private_desc_t	*ntbufp;
4804 
4805 	tbufp = ldcp->next_tbufp;
4806 	ntbufp = NEXTTBUF(ldcp, tbufp);
4807 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
4808 		return (VGEN_SUCCESS);
4809 	}
4810 	return (VGEN_FAILURE);
4811 }
4812 
4813 /* determine if timeout condition has occured */
4814 static int
4815 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
4816 {
4817 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
4818 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
4819 	    (vnet_ldcwd_txtimeout) &&
4820 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
4821 		return (VGEN_SUCCESS);
4822 	} else {
4823 		return (VGEN_FAILURE);
4824 	}
4825 }
4826 
4827 /* transmit watchdog timeout handler */
4828 static void
4829 vgen_ldc_watchdog(void *arg)
4830 {
4831 	vgen_ldc_t *ldcp;
4832 	vgen_t *vgenp;
4833 	void *vnetp;
4834 	int rv;
4835 
4836 	ldcp = (vgen_ldc_t *)arg;
4837 	vgenp = LDC_TO_VGEN(ldcp);
4838 	vnetp = LDC_TO_VNET(ldcp);
4839 
4840 	rv = vgen_ldc_txtimeout(ldcp);
4841 	if (rv == VGEN_SUCCESS) {
4842 		DWARN((vnetp,
4843 		    "vgen_ldc_watchdog: transmit timeout ldcid(%lx)\n",
4844 		    ldcp->ldc_id));
4845 #ifdef DEBUG
4846 		if (vgen_trigger_txtimeout) {
4847 			/* tx timeout triggered for debugging */
4848 			vgen_trigger_txtimeout = 0;
4849 		}
4850 #endif
4851 		mutex_enter(&ldcp->cblock);
4852 		ldcp->need_ldc_reset = B_TRUE;
4853 		vgen_handshake_reset(ldcp);
4854 		mutex_exit(&ldcp->cblock);
4855 		if (ldcp->need_resched) {
4856 			ldcp->need_resched = B_FALSE;
4857 			vnet_tx_update(vgenp->vnetp);
4858 		}
4859 	}
4860 
4861 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
4862 	    drv_usectohz(vnet_ldcwd_interval * 1000));
4863 }
4864 
4865 static int
4866 vgen_setup_kstats(vgen_ldc_t *ldcp)
4867 {
4868 	vgen_t *vgenp;
4869 	struct kstat *ksp;
4870 	vgen_stats_t *statsp;
4871 	vgen_kstats_t *ldckp;
4872 	int instance;
4873 	size_t size;
4874 	char name[MAXNAMELEN];
4875 
4876 	vgenp = LDC_TO_VGEN(ldcp);
4877 	instance = ddi_get_instance(vgenp->vnetdip);
4878 	(void) sprintf(name, "vnetldc0x%lx", ldcp->ldc_id);
4879 	statsp = kmem_zalloc(sizeof (vgen_stats_t), KM_SLEEP);
4880 	if (statsp == NULL) {
4881 		return (VGEN_FAILURE);
4882 	}
4883 	size = sizeof (vgen_kstats_t) / sizeof (kstat_named_t);
4884 	ksp = kstat_create("vnet", instance, name, "net", KSTAT_TYPE_NAMED,
4885 		size, 0);
4886 	if (ksp == NULL) {
4887 		KMEM_FREE(statsp);
4888 		return (VGEN_FAILURE);
4889 	}
4890 
4891 	ldckp = (vgen_kstats_t *)ksp->ks_data;
4892 	kstat_named_init(&ldckp->ipackets,		"ipackets",
4893 		KSTAT_DATA_ULONG);
4894 	kstat_named_init(&ldckp->ipackets64,		"ipackets64",
4895 		KSTAT_DATA_ULONGLONG);
4896 	kstat_named_init(&ldckp->ierrors,		"ierrors",
4897 		KSTAT_DATA_ULONG);
4898 	kstat_named_init(&ldckp->opackets,		"opackets",
4899 		KSTAT_DATA_ULONG);
4900 	kstat_named_init(&ldckp->opackets64,		"opackets64",
4901 		KSTAT_DATA_ULONGLONG);
4902 	kstat_named_init(&ldckp->oerrors,		"oerrors",
4903 		KSTAT_DATA_ULONG);
4904 
4905 
4906 	/* MIB II kstat variables */
4907 	kstat_named_init(&ldckp->rbytes,		"rbytes",
4908 		KSTAT_DATA_ULONG);
4909 	kstat_named_init(&ldckp->rbytes64,		"rbytes64",
4910 		KSTAT_DATA_ULONGLONG);
4911 	kstat_named_init(&ldckp->obytes,		"obytes",
4912 		KSTAT_DATA_ULONG);
4913 	kstat_named_init(&ldckp->obytes64,		"obytes64",
4914 		KSTAT_DATA_ULONGLONG);
4915 	kstat_named_init(&ldckp->multircv,		"multircv",
4916 		KSTAT_DATA_ULONG);
4917 	kstat_named_init(&ldckp->multixmt,		"multixmt",
4918 		KSTAT_DATA_ULONG);
4919 	kstat_named_init(&ldckp->brdcstrcv,		"brdcstrcv",
4920 		KSTAT_DATA_ULONG);
4921 	kstat_named_init(&ldckp->brdcstxmt,		"brdcstxmt",
4922 		KSTAT_DATA_ULONG);
4923 	kstat_named_init(&ldckp->norcvbuf,		"norcvbuf",
4924 		KSTAT_DATA_ULONG);
4925 	kstat_named_init(&ldckp->noxmtbuf,		"noxmtbuf",
4926 		KSTAT_DATA_ULONG);
4927 
4928 	/* Tx stats */
4929 	kstat_named_init(&ldckp->tx_no_desc,		"tx_no_desc",
4930 		KSTAT_DATA_ULONG);
4931 
4932 	/* Rx stats */
4933 	kstat_named_init(&ldckp->rx_allocb_fail,	"rx_allocb_fail",
4934 		KSTAT_DATA_ULONG);
4935 	kstat_named_init(&ldckp->rx_vio_allocb_fail,	"rx_vio_allocb_fail",
4936 		KSTAT_DATA_ULONG);
4937 	kstat_named_init(&ldckp->rx_lost_pkts,		"rx_lost_pkts",
4938 		KSTAT_DATA_ULONG);
4939 
4940 	/* Interrupt stats */
4941 	kstat_named_init(&ldckp->callbacks,		"callbacks",
4942 		KSTAT_DATA_ULONG);
4943 	kstat_named_init(&ldckp->dring_data_acks,	"dring_data_acks",
4944 		KSTAT_DATA_ULONG);
4945 	kstat_named_init(&ldckp->dring_stopped_acks,	"dring_stopped_acks",
4946 		KSTAT_DATA_ULONG);
4947 	kstat_named_init(&ldckp->dring_data_msgs,	"dring_data_msgs",
4948 		KSTAT_DATA_ULONG);
4949 
4950 	ksp->ks_update = vgen_kstat_update;
4951 	ksp->ks_private = (void *)ldcp;
4952 	kstat_install(ksp);
4953 
4954 	ldcp->ksp = ksp;
4955 	ldcp->statsp = statsp;
4956 	return (VGEN_SUCCESS);
4957 }
4958 
4959 static void
4960 vgen_destroy_kstats(vgen_ldc_t *ldcp)
4961 {
4962 	if (ldcp->ksp)
4963 		kstat_delete(ldcp->ksp);
4964 	KMEM_FREE(ldcp->statsp);
4965 }
4966 
4967 static int
4968 vgen_kstat_update(kstat_t *ksp, int rw)
4969 {
4970 	vgen_ldc_t *ldcp;
4971 	vgen_stats_t *statsp;
4972 	vgen_kstats_t *ldckp;
4973 
4974 	ldcp = (vgen_ldc_t *)ksp->ks_private;
4975 	statsp = ldcp->statsp;
4976 	ldckp = (vgen_kstats_t *)ksp->ks_data;
4977 
4978 	if (rw == KSTAT_READ) {
4979 		ldckp->ipackets.value.ul	= (uint32_t)statsp->ipackets;
4980 		ldckp->ipackets64.value.ull	= statsp->ipackets;
4981 		ldckp->ierrors.value.ul		= statsp->ierrors;
4982 		ldckp->opackets.value.ul	= (uint32_t)statsp->opackets;
4983 		ldckp->opackets64.value.ull	= statsp->opackets;
4984 		ldckp->oerrors.value.ul		= statsp->oerrors;
4985 
4986 		/*
4987 		 * MIB II kstat variables
4988 		 */
4989 		ldckp->rbytes.value.ul		= (uint32_t)statsp->rbytes;
4990 		ldckp->rbytes64.value.ull	= statsp->rbytes;
4991 		ldckp->obytes.value.ul		= (uint32_t)statsp->obytes;
4992 		ldckp->obytes64.value.ull	= statsp->obytes;
4993 		ldckp->multircv.value.ul	= statsp->multircv;
4994 		ldckp->multixmt.value.ul	= statsp->multixmt;
4995 		ldckp->brdcstrcv.value.ul	= statsp->brdcstrcv;
4996 		ldckp->brdcstxmt.value.ul	= statsp->brdcstxmt;
4997 		ldckp->norcvbuf.value.ul	= statsp->norcvbuf;
4998 		ldckp->noxmtbuf.value.ul	= statsp->noxmtbuf;
4999 
5000 		ldckp->tx_no_desc.value.ul	= statsp->tx_no_desc;
5001 
5002 		ldckp->rx_allocb_fail.value.ul	= statsp->rx_allocb_fail;
5003 		ldckp->rx_vio_allocb_fail.value.ul = statsp->rx_vio_allocb_fail;
5004 		ldckp->rx_lost_pkts.value.ul	= statsp->rx_lost_pkts;
5005 
5006 		ldckp->callbacks.value.ul	= statsp->callbacks;
5007 		ldckp->dring_data_acks.value.ul	= statsp->dring_data_acks;
5008 		ldckp->dring_stopped_acks.value.ul = statsp->dring_stopped_acks;
5009 		ldckp->dring_data_msgs.value.ul	= statsp->dring_data_msgs;
5010 	} else {
5011 		statsp->ipackets	= ldckp->ipackets64.value.ull;
5012 		statsp->ierrors		= ldckp->ierrors.value.ul;
5013 		statsp->opackets	= ldckp->opackets64.value.ull;
5014 		statsp->oerrors		= ldckp->oerrors.value.ul;
5015 
5016 		/*
5017 		 * MIB II kstat variables
5018 		 */
5019 		statsp->rbytes		= ldckp->rbytes64.value.ull;
5020 		statsp->obytes		= ldckp->obytes64.value.ull;
5021 		statsp->multircv	= ldckp->multircv.value.ul;
5022 		statsp->multixmt	= ldckp->multixmt.value.ul;
5023 		statsp->brdcstrcv	= ldckp->brdcstrcv.value.ul;
5024 		statsp->brdcstxmt	= ldckp->brdcstxmt.value.ul;
5025 		statsp->norcvbuf	= ldckp->norcvbuf.value.ul;
5026 		statsp->noxmtbuf	= ldckp->noxmtbuf.value.ul;
5027 
5028 		statsp->tx_no_desc	= ldckp->tx_no_desc.value.ul;
5029 
5030 		statsp->rx_allocb_fail	= ldckp->rx_allocb_fail.value.ul;
5031 		statsp->rx_vio_allocb_fail = ldckp->rx_vio_allocb_fail.value.ul;
5032 		statsp->rx_lost_pkts	= ldckp->rx_lost_pkts.value.ul;
5033 
5034 		statsp->callbacks	= ldckp->callbacks.value.ul;
5035 		statsp->dring_data_acks	= ldckp->dring_data_acks.value.ul;
5036 		statsp->dring_stopped_acks = ldckp->dring_stopped_acks.value.ul;
5037 		statsp->dring_data_msgs	= ldckp->dring_data_msgs.value.ul;
5038 	}
5039 
5040 	return (VGEN_SUCCESS);
5041 }
5042 
5043 /* handler for error messages received from the peer ldc end-point */
5044 static void
5045 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5046 {
5047 	_NOTE(ARGUNUSED(ldcp, tagp))
5048 }
5049 
5050 /* Check if the session id in the received message is valid */
5051 static int
5052 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5053 {
5054 	if (tagp->vio_sid != ldcp->peer_sid) {
5055 		void *vnetp = LDC_TO_VNET(ldcp);
5056 		DWARN((vnetp,
5057 		    "sid mismatch: expected(%x), rcvd(%x)\n",
5058 		    ldcp->peer_sid, tagp->vio_sid));
5059 		return (VGEN_FAILURE);
5060 	}
5061 	else
5062 		return (VGEN_SUCCESS);
5063 }
5064 
5065 /* convert mac address from string to uint64_t */
5066 static uint64_t
5067 vgen_macaddr_strtoul(const uint8_t *macaddr)
5068 {
5069 	uint64_t val = 0;
5070 	int i;
5071 
5072 	for (i = 0; i < ETHERADDRL; i++) {
5073 		val <<= 8;
5074 		val |= macaddr[i];
5075 	}
5076 
5077 	return (val);
5078 }
5079 
5080 /* convert mac address from uint64_t to string */
5081 static int
5082 vgen_macaddr_ultostr(uint64_t val, uint8_t *macaddr)
5083 {
5084 	int i;
5085 	uint64_t value;
5086 
5087 	value = val;
5088 	for (i = ETHERADDRL - 1; i >= 0; i--) {
5089 		macaddr[i] = value & 0xFF;
5090 		value >>= 8;
5091 	}
5092 	return (VGEN_SUCCESS);
5093 }
5094 
5095 static caddr_t
5096 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5097 {
5098 	(void) sprintf(ebuf,
5099 		"%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5100 	return (ebuf);
5101 }
5102 
5103 /* Handshake watchdog timeout handler */
5104 static void
5105 vgen_hwatchdog(void *arg)
5106 {
5107 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5108 	void *vnetp = LDC_TO_VNET(ldcp);
5109 
5110 	DWARN((vnetp,
5111 	    "vgen_hwatchdog: handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5112 	    ldcp->ldc_id, ldcp->hphase, ldcp->hstate));
5113 
5114 	mutex_enter(&ldcp->cblock);
5115 	ldcp->htid = 0;
5116 	ldcp->need_ldc_reset = B_TRUE;
5117 	vgen_handshake_retry(ldcp);
5118 	mutex_exit(&ldcp->cblock);
5119 }
5120 
5121 static void
5122 vgen_print_hparams(vgen_hparams_t *hp)
5123 {
5124 	uint8_t	addr[6];
5125 	char	ea[6];
5126 	ldc_mem_cookie_t *dc;
5127 
5128 	cmn_err(CE_CONT, "version_info:\n");
5129 	cmn_err(CE_CONT,
5130 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5131 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5132 
5133 	(void) vgen_macaddr_ultostr(hp->addr, addr);
5134 	cmn_err(CE_CONT, "attr_info:\n");
5135 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5136 	    vgen_print_ethaddr(addr, ea));
5137 	cmn_err(CE_CONT,
5138 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5139 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5140 
5141 	dc = &hp->dring_cookie;
5142 	cmn_err(CE_CONT, "dring_info:\n");
5143 	cmn_err(CE_CONT,
5144 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5145 	cmn_err(CE_CONT,
5146 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5147 	    dc->addr, dc->size);
5148 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5149 }
5150 
5151 static void
5152 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5153 {
5154 	vgen_hparams_t *hp;
5155 
5156 	cmn_err(CE_CONT, "Channel Information:\n");
5157 	cmn_err(CE_CONT,
5158 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5159 	    ldcp->ldc_id, ldcp->ldc_status);
5160 	cmn_err(CE_CONT,
5161 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5162 	    ldcp->local_sid, ldcp->peer_sid);
5163 	cmn_err(CE_CONT,
5164 	    "\thphase: 0x%x, hstate: 0x%x\n",
5165 	    ldcp->hphase, ldcp->hstate);
5166 
5167 	cmn_err(CE_CONT, "Local handshake params:\n");
5168 	hp = &ldcp->local_hparams;
5169 	vgen_print_hparams(hp);
5170 
5171 	cmn_err(CE_CONT, "Peer handshake params:\n");
5172 	hp = &ldcp->peer_hparams;
5173 	vgen_print_hparams(hp);
5174 }
5175