xref: /titanic_50/usr/src/uts/sun4v/io/vnet_gen.c (revision b9bc7f7832704fda46b4d6b04f3f7be1227dc644)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 
64 /*
65  * Implementation of the mac functionality for vnet using the
66  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
67  */
68 
69 /*
70  * Function prototypes.
71  */
72 /* vgen proxy entry points */
73 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
74 	mac_register_t **vgenmacp);
75 int vgen_uninit(void *arg);
76 static int vgen_start(void *arg);
77 static void vgen_stop(void *arg);
78 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
79 static int vgen_multicst(void *arg, boolean_t add,
80 	const uint8_t *mca);
81 static int vgen_promisc(void *arg, boolean_t on);
82 static int vgen_unicst(void *arg, const uint8_t *mca);
83 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
84 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
85 
86 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
87 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
88 void vnet_del_fdb(void *arg, uint8_t *macaddr);
89 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
90 	void *txarg, boolean_t upgrade);
91 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
92 void vnet_del_def_rte(void *arg);
93 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
94 void vnet_tx_update(void *arg);
95 
96 /* vgen internal functions */
97 static void vgen_detach_ports(vgen_t *vgenp);
98 static void vgen_port_detach(vgen_port_t *portp);
99 static void vgen_port_list_insert(vgen_port_t *portp);
100 static void vgen_port_list_remove(vgen_port_t *portp);
101 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
102 	int port_num);
103 static int vgen_mdeg_reg(vgen_t *vgenp);
104 static void vgen_mdeg_unreg(vgen_t *vgenp);
105 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
106 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
107 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
108 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
109 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
112 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
113 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
114 
115 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
116 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
117 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
118 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_init_ports(vgen_t *vgenp);
120 static void vgen_port_init(vgen_port_t *portp);
121 static void vgen_uninit_ports(vgen_t *vgenp);
122 static void vgen_port_uninit(vgen_port_t *portp);
123 static void vgen_init_ldcs(vgen_port_t *portp);
124 static void vgen_uninit_ldcs(vgen_port_t *portp);
125 static int vgen_ldc_init(vgen_ldc_t *ldcp);
126 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
127 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
128 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
131 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
132 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
135 static void vgen_reclaim(vgen_ldc_t *ldcp);
136 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
137 static int vgen_num_txpending(vgen_ldc_t *ldcp);
138 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
139 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
140 static void vgen_ldc_watchdog(void *arg);
141 
142 /* vgen handshake functions */
143 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
144 static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
145 	uint16_t ver_minor);
146 static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
147 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
148 	boolean_t caller_holds_lock);
149 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
150 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
151 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
152 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
153 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
154 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
155 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
156 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
157 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
158 static void vgen_handshake(vgen_ldc_t *ldcp);
159 static int vgen_handshake_done(vgen_ldc_t *ldcp);
160 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
161 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
162 	vio_msg_tag_t *tagp);
163 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
166 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
174 	uint32_t start, int32_t end, uint8_t pstate);
175 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
178 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
179 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
181 static void vgen_hwatchdog(void *arg);
182 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
183 static void vgen_print_hparams(vgen_hparams_t *hp);
184 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
185 static uint_t vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2);
186 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
187 static void vgen_ldc_rcv_worker(void *arg);
188 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
189 static void vgen_ldc_queue_data(vgen_ldc_t *ldcp,
190 	mblk_t *rhead, mblk_t *rtail);
191 
192 /*
193  * The handshake process consists of 5 phases defined below, with VH_PHASE0
194  * being the pre-handshake phase and VH_DONE is the phase to indicate
195  * successful completion of all phases.
196  * Each phase may have one to several handshake states which are required
197  * to complete successfully to move to the next phase.
198  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
199  * more details.
200  */
201 /* handshake phases */
202 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
203 
204 /* handshake states */
205 enum {
206 
207 	VER_INFO_SENT	=	0x1,
208 	VER_ACK_RCVD	=	0x2,
209 	VER_INFO_RCVD	=	0x4,
210 	VER_ACK_SENT	=	0x8,
211 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
212 
213 	ATTR_INFO_SENT	=	0x10,
214 	ATTR_ACK_RCVD	=	0x20,
215 	ATTR_INFO_RCVD	=	0x40,
216 	ATTR_ACK_SENT	=	0x80,
217 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
218 
219 	DRING_INFO_SENT	=	0x100,
220 	DRING_ACK_RCVD	=	0x200,
221 	DRING_INFO_RCVD	=	0x400,
222 	DRING_ACK_SENT	=	0x800,
223 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
224 
225 	RDX_INFO_SENT	=	0x1000,
226 	RDX_ACK_RCVD	=	0x2000,
227 	RDX_INFO_RCVD	=	0x4000,
228 	RDX_ACK_SENT	=	0x8000,
229 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
230 
231 };
232 
233 #define	LDC_LOCK(ldcp)	\
234 				mutex_enter(&((ldcp)->cblock));\
235 				mutex_enter(&((ldcp)->rxlock));\
236 				mutex_enter(&((ldcp)->wrlock));\
237 				mutex_enter(&((ldcp)->txlock));\
238 				mutex_enter(&((ldcp)->tclock));
239 #define	LDC_UNLOCK(ldcp)	\
240 				mutex_exit(&((ldcp)->tclock));\
241 				mutex_exit(&((ldcp)->txlock));\
242 				mutex_exit(&((ldcp)->wrlock));\
243 				mutex_exit(&((ldcp)->rxlock));\
244 				mutex_exit(&((ldcp)->cblock));
245 
246 static struct ether_addr etherbroadcastaddr = {
247 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
248 };
249 /*
250  * MIB II broadcast/multicast packets
251  */
252 #define	IS_BROADCAST(ehp) \
253 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
254 #define	IS_MULTICAST(ehp) \
255 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
256 
257 /*
258  * Property names
259  */
260 static char macaddr_propname[] = "mac-address";
261 static char rmacaddr_propname[] = "remote-mac-address";
262 static char channel_propname[] = "channel-endpoint";
263 static char reg_propname[] = "reg";
264 static char port_propname[] = "port";
265 static char swport_propname[] = "switch-port";
266 static char id_propname[] = "id";
267 
268 /* versions supported - in decreasing order */
269 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
270 
271 /* Tunables */
272 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
273 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
274 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
275 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
276 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
277 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
278 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
279 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
280 
281 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
282 
283 /*
284  * max # of packets accumulated prior to sending them up. It is best
285  * to keep this at 60% of the number of recieve buffers.
286  */
287 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
288 
289 /*
290  * Tunables for each receive buffer size and number of buffers for
291  * each buffer size.
292  */
293 uint32_t vgen_rbufsz1 = VGEN_DBLK_SZ_128;
294 uint32_t vgen_rbufsz2 = VGEN_DBLK_SZ_256;
295 uint32_t vgen_rbufsz3 = VGEN_DBLK_SZ_2048;
296 
297 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
298 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
299 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
300 
301 #ifdef DEBUG
302 /* flags to simulate error conditions for debugging */
303 int vgen_trigger_txtimeout = 0;
304 int vgen_trigger_rxlost = 0;
305 #endif
306 
307 /* MD update matching structure */
308 static md_prop_match_t	vport_prop_match[] = {
309 	{ MDET_PROP_VAL,	"id" },
310 	{ MDET_LIST_END,	NULL }
311 };
312 
313 static mdeg_node_match_t vport_match = { "virtual-device-port",
314 					vport_prop_match };
315 
316 /* template for matching a particular vnet instance */
317 static mdeg_prop_spec_t vgen_prop_template[] = {
318 	{ MDET_PROP_STR,	"name",		"network" },
319 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
320 	{ MDET_LIST_END,	NULL,		NULL }
321 };
322 
323 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
324 
325 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
326 
327 static mac_callbacks_t vgen_m_callbacks = {
328 	0,
329 	vgen_stat,
330 	vgen_start,
331 	vgen_stop,
332 	vgen_promisc,
333 	vgen_multicst,
334 	vgen_unicst,
335 	vgen_tx,
336 	NULL,
337 	NULL,
338 	NULL
339 };
340 
341 /* externs */
342 extern pri_t	maxclsyspri;
343 extern proc_t	p0;
344 extern uint32_t vnet_ntxds;
345 extern uint32_t vnet_ldcwd_interval;
346 extern uint32_t vnet_ldcwd_txtimeout;
347 extern uint32_t vnet_ldc_mtu;
348 extern uint32_t vnet_nrbufs;
349 
350 
351 #ifdef DEBUG
352 
353 extern int vnet_dbglevel;
354 static void debug_printf(const char *fname, vgen_t *vgenp,
355 	vgen_ldc_t *ldcp, const char *fmt, ...);
356 
357 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
358 int vgendbg_ldcid = -1;
359 
360 /* simulate handshake error conditions for debug */
361 uint32_t vgen_hdbg;
362 #define	HDBG_VERSION	0x1
363 #define	HDBG_TIMEOUT	0x2
364 #define	HDBG_BAD_SID	0x4
365 #define	HDBG_OUT_STATE	0x8
366 
367 #endif
368 
369 
370 
371 /*
372  * vgen_init() is called by an instance of vnet driver to initialize the
373  * corresponding generic proxy transport layer. The arguments passed by vnet
374  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
375  * the mac address of the vnet device, and a pointer to mac_register_t of
376  * the generic transport is returned in the last argument.
377  */
378 int
379 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
380     mac_register_t **vgenmacp)
381 {
382 	vgen_t *vgenp;
383 	mac_register_t *macp;
384 	int instance;
385 
386 	if ((vnetp == NULL) || (vnetdip == NULL))
387 		return (DDI_FAILURE);
388 
389 	instance = ddi_get_instance(vnetdip);
390 
391 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
392 
393 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
394 
395 	vgenp->vnetp = vnetp;
396 	vgenp->vnetdip = vnetdip;
397 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
398 
399 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
400 		KMEM_FREE(vgenp);
401 		return (DDI_FAILURE);
402 	}
403 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
404 	macp->m_driver = vgenp;
405 	macp->m_dip = vnetdip;
406 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
407 	macp->m_callbacks = &vgen_m_callbacks;
408 	macp->m_min_sdu = 0;
409 	macp->m_max_sdu = ETHERMTU;
410 	vgenp->macp = macp;
411 
412 	/* allocate multicast table */
413 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
414 	    sizeof (struct ether_addr), KM_SLEEP);
415 	vgenp->mccount = 0;
416 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
417 
418 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
419 
420 	/* register with MD event generator */
421 	if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
422 		mutex_destroy(&vgenp->lock);
423 		kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
424 		    sizeof (struct ether_addr));
425 		mac_free(vgenp->macp);
426 		KMEM_FREE(vgenp);
427 		return (DDI_FAILURE);
428 	}
429 
430 	/* register macp of this vgen_t with vnet */
431 	*vgenmacp = vgenp->macp;
432 
433 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
434 	return (DDI_SUCCESS);
435 }
436 
437 /*
438  * Called by vnet to undo the initializations done by vgen_init().
439  * The handle provided by generic transport during vgen_init() is the argument.
440  */
441 int
442 vgen_uninit(void *arg)
443 {
444 	vgen_t	*vgenp = (vgen_t *)arg;
445 	vio_mblk_pool_t *rp, *nrp;
446 
447 	if (vgenp == NULL) {
448 		return (DDI_FAILURE);
449 	}
450 
451 	DBG1(vgenp, NULL, "enter\n");
452 
453 	/* unregister with MD event generator */
454 	vgen_mdeg_unreg(vgenp);
455 
456 	mutex_enter(&vgenp->lock);
457 
458 	/* detach all ports from the device */
459 	vgen_detach_ports(vgenp);
460 
461 	/*
462 	 * free any pending rx mblk pools,
463 	 * that couldn't be freed previously during channel detach.
464 	 */
465 	rp = vgenp->rmp;
466 	while (rp != NULL) {
467 		nrp = vgenp->rmp = rp->nextp;
468 		if (vio_destroy_mblks(rp)) {
469 			vgenp->rmp = rp;
470 			mutex_exit(&vgenp->lock);
471 			return (DDI_FAILURE);
472 		}
473 		rp = nrp;
474 	}
475 
476 	/* free multicast table */
477 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
478 
479 	mac_free(vgenp->macp);
480 
481 	mutex_exit(&vgenp->lock);
482 
483 	mutex_destroy(&vgenp->lock);
484 
485 	KMEM_FREE(vgenp);
486 
487 	DBG1(vgenp, NULL, "exit\n");
488 
489 	return (DDI_SUCCESS);
490 }
491 
492 /* enable transmit/receive for the device */
493 int
494 vgen_start(void *arg)
495 {
496 	vgen_t		*vgenp = (vgen_t *)arg;
497 
498 	DBG1(vgenp, NULL, "enter\n");
499 
500 	mutex_enter(&vgenp->lock);
501 	vgen_init_ports(vgenp);
502 	vgenp->flags |= VGEN_STARTED;
503 	mutex_exit(&vgenp->lock);
504 
505 	DBG1(vgenp, NULL, "exit\n");
506 	return (DDI_SUCCESS);
507 }
508 
509 /* stop transmit/receive */
510 void
511 vgen_stop(void *arg)
512 {
513 	vgen_t		*vgenp = (vgen_t *)arg;
514 
515 	DBG1(vgenp, NULL, "enter\n");
516 
517 	mutex_enter(&vgenp->lock);
518 	vgen_uninit_ports(vgenp);
519 	vgenp->flags &= ~(VGEN_STARTED);
520 	mutex_exit(&vgenp->lock);
521 
522 	DBG1(vgenp, NULL, "exit\n");
523 }
524 
525 /* vgen transmit function */
526 static mblk_t *
527 vgen_tx(void *arg, mblk_t *mp)
528 {
529 	int i;
530 	vgen_port_t *portp;
531 	int status = VGEN_FAILURE;
532 
533 	portp = (vgen_port_t *)arg;
534 	/*
535 	 * Retry so that we avoid reporting a failure
536 	 * to the upper layer. Returning a failure may cause the
537 	 * upper layer to go into single threaded mode there by
538 	 * causing performance degradation, especially for a large
539 	 * number of connections.
540 	 */
541 	for (i = 0; i < vgen_tx_retries; ) {
542 		status = vgen_portsend(portp, mp);
543 		if (status == VGEN_SUCCESS) {
544 			break;
545 		}
546 		if (++i < vgen_tx_retries)
547 			delay(drv_usectohz(vgen_tx_delay));
548 	}
549 	if (status != VGEN_SUCCESS) {
550 		/* failure */
551 		return (mp);
552 	}
553 	/* success */
554 	return (NULL);
555 }
556 
557 /* transmit packets over the given port */
558 static int
559 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
560 {
561 	vgen_ldclist_t	*ldclp;
562 	vgen_ldc_t *ldcp;
563 	int status;
564 	int rv = VGEN_SUCCESS;
565 
566 	ldclp = &portp->ldclist;
567 	READ_ENTER(&ldclp->rwlock);
568 	/*
569 	 * NOTE: for now, we will assume we have a single channel.
570 	 */
571 	if (ldclp->headp == NULL) {
572 		RW_EXIT(&ldclp->rwlock);
573 		return (VGEN_FAILURE);
574 	}
575 	ldcp = ldclp->headp;
576 
577 	status  = vgen_ldcsend(ldcp, mp);
578 
579 	RW_EXIT(&ldclp->rwlock);
580 
581 	if (status != VGEN_TX_SUCCESS) {
582 		rv = VGEN_FAILURE;
583 	}
584 	return (rv);
585 }
586 
587 /* channel transmit function */
588 static int
589 vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
590 {
591 	vgen_private_desc_t	*tbufp;
592 	vgen_private_desc_t	*rtbufp;
593 	vnet_public_desc_t	*rtxdp;
594 	vgen_private_desc_t	*ntbufp;
595 	vnet_public_desc_t	*txdp;
596 	vio_dring_entry_hdr_t	*hdrp;
597 	vgen_stats_t		*statsp;
598 	struct ether_header	*ehp;
599 	boolean_t	is_bcast = B_FALSE;
600 	boolean_t	is_mcast = B_FALSE;
601 	size_t		mblksz;
602 	caddr_t		dst;
603 	mblk_t		*bp;
604 	size_t		size;
605 	int		rv = 0;
606 	ldc_status_t	istatus;
607 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
608 
609 	statsp = &ldcp->stats;
610 	size = msgsize(mp);
611 
612 	DBG1(vgenp, ldcp, "enter\n");
613 
614 	if (ldcp->ldc_status != LDC_UP) {
615 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
616 		    ldcp->ldc_status);
617 		/* retry ldc_up() if needed */
618 		if (ldcp->flags & CHANNEL_STARTED)
619 			(void) ldc_up(ldcp->ldc_handle);
620 		goto vgen_tx_exit;
621 	}
622 
623 	/* drop the packet if ldc is not up or handshake is not done */
624 	if (ldcp->hphase != VH_DONE) {
625 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
626 		    ldcp->hphase);
627 		goto vgen_tx_exit;
628 	}
629 
630 	if (size > (size_t)ETHERMAX) {
631 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
632 		goto vgen_tx_exit;
633 	}
634 	if (size < ETHERMIN)
635 		size = ETHERMIN;
636 
637 	ehp = (struct ether_header *)mp->b_rptr;
638 	is_bcast = IS_BROADCAST(ehp);
639 	is_mcast = IS_MULTICAST(ehp);
640 
641 	mutex_enter(&ldcp->txlock);
642 	/*
643 	 * allocate a descriptor
644 	 */
645 	tbufp = ldcp->next_tbufp;
646 	ntbufp = NEXTTBUF(ldcp, tbufp);
647 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
648 
649 		mutex_enter(&ldcp->tclock);
650 		/* Try reclaiming now */
651 		vgen_reclaim_dring(ldcp);
652 		ldcp->reclaim_lbolt = ddi_get_lbolt();
653 
654 		if (ntbufp == ldcp->cur_tbufp) {
655 			/* Now we are really out of tbuf/txds */
656 			ldcp->need_resched = B_TRUE;
657 			mutex_exit(&ldcp->tclock);
658 
659 			statsp->tx_no_desc++;
660 			mutex_exit(&ldcp->txlock);
661 
662 			return (VGEN_TX_NORESOURCES);
663 		}
664 		mutex_exit(&ldcp->tclock);
665 	}
666 	/* update next available tbuf in the ring and update tx index */
667 	ldcp->next_tbufp = ntbufp;
668 	INCR_TXI(ldcp->next_txi, ldcp);
669 
670 	/* Mark the buffer busy before releasing the lock */
671 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
672 	mutex_exit(&ldcp->txlock);
673 
674 	/* copy data into pre-allocated transmit buffer */
675 	dst = tbufp->datap + VNET_IPALIGN;
676 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
677 		mblksz = MBLKL(bp);
678 		bcopy(bp->b_rptr, dst, mblksz);
679 		dst += mblksz;
680 	}
681 
682 	tbufp->datalen = size;
683 
684 	/* initialize the corresponding public descriptor (txd) */
685 	txdp = tbufp->descp;
686 	hdrp = &txdp->hdr;
687 	txdp->nbytes = size;
688 	txdp->ncookies = tbufp->ncookies;
689 	bcopy((tbufp->memcookie), (txdp->memcookie),
690 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
691 
692 	mutex_enter(&ldcp->wrlock);
693 	/*
694 	 * If the flags not set to BUSY, it implies that the clobber
695 	 * was done while we were copying the data. In such case,
696 	 * discard the packet and return.
697 	 */
698 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
699 		statsp->oerrors++;
700 		mutex_exit(&ldcp->wrlock);
701 		goto vgen_tx_exit;
702 	}
703 	hdrp->dstate = VIO_DESC_READY;
704 
705 	/* update stats */
706 	statsp->opackets++;
707 	statsp->obytes += size;
708 	if (is_bcast)
709 		statsp->brdcstxmt++;
710 	else if (is_mcast)
711 		statsp->multixmt++;
712 
713 	/* send dring datamsg to the peer */
714 	if (ldcp->resched_peer) {
715 
716 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
717 		rtxdp = rtbufp->descp;
718 
719 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
720 
721 			rv = vgen_send_dring_data(ldcp,
722 			    (uint32_t)ldcp->resched_peer_txi, -1);
723 			if (rv != 0) {
724 				/* error: drop the packet */
725 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
726 				    "failed: rv(%d) len(%d)\n",
727 				    ldcp->ldc_id, rv, size);
728 				statsp->oerrors++;
729 			} else {
730 				ldcp->resched_peer = B_FALSE;
731 			}
732 
733 		}
734 
735 	}
736 
737 	mutex_exit(&ldcp->wrlock);
738 
739 vgen_tx_exit:
740 	if (rv == ECONNRESET) {
741 		/*
742 		 * Check if either callback thread or another tx thread is
743 		 * already running. Calling mutex_enter() will result in a
744 		 * deadlock if the other thread already holds cblock and is
745 		 * blocked in vnet_modify_fdb() (which is called from
746 		 * vgen_handle_evt_reset()) waiting for write access on rwlock,
747 		 * as this transmit thread already holds that lock as a reader
748 		 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
749 		 */
750 		if (mutex_tryenter(&ldcp->cblock)) {
751 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
752 				DWARN(vgenp, ldcp, "ldc_status() error\n");
753 			} else {
754 				ldcp->ldc_status = istatus;
755 			}
756 			if (ldcp->ldc_status != LDC_UP) {
757 				/*
758 				 * Second arg is TRUE, as we know that
759 				 * the caller of this function - vnet_m_tx(),
760 				 * already holds fdb-rwlock as a reader.
761 				 */
762 				vgen_handle_evt_reset(ldcp, B_TRUE);
763 			}
764 			mutex_exit(&ldcp->cblock);
765 		}
766 	}
767 	freemsg(mp);
768 	DBG1(vgenp, ldcp, "exit\n");
769 	return (VGEN_TX_SUCCESS);
770 }
771 
772 /* enable/disable a multicast address */
773 int
774 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
775 {
776 	vgen_t			*vgenp;
777 	vnet_mcast_msg_t	mcastmsg;
778 	vio_msg_tag_t		*tagp;
779 	vgen_port_t		*portp;
780 	vgen_portlist_t		*plistp;
781 	vgen_ldc_t		*ldcp;
782 	vgen_ldclist_t		*ldclp;
783 	struct ether_addr	*addrp;
784 	int			rv = DDI_FAILURE;
785 	uint32_t		i;
786 
787 	vgenp = (vgen_t *)arg;
788 	addrp = (struct ether_addr *)mca;
789 	tagp = &mcastmsg.tag;
790 	bzero(&mcastmsg, sizeof (mcastmsg));
791 
792 	mutex_enter(&vgenp->lock);
793 
794 	plistp = &(vgenp->vgenports);
795 
796 	READ_ENTER(&plistp->rwlock);
797 
798 	portp = vgenp->vsw_portp;
799 	if (portp == NULL) {
800 		RW_EXIT(&plistp->rwlock);
801 		mutex_exit(&vgenp->lock);
802 		return (rv);
803 	}
804 	ldclp = &portp->ldclist;
805 
806 	READ_ENTER(&ldclp->rwlock);
807 
808 	ldcp = ldclp->headp;
809 	if (ldcp == NULL)
810 		goto vgen_mcast_exit;
811 
812 	mutex_enter(&ldcp->cblock);
813 
814 	if (ldcp->hphase == VH_DONE) {
815 		/*
816 		 * If handshake is done, send a msg to vsw to add/remove
817 		 * the multicast address. Otherwise, we just update this
818 		 * mcast address in our table and the table will be sync'd
819 		 * with vsw when handshake completes.
820 		 */
821 		tagp->vio_msgtype = VIO_TYPE_CTRL;
822 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
823 		tagp->vio_subtype_env = VNET_MCAST_INFO;
824 		tagp->vio_sid = ldcp->local_sid;
825 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
826 		mcastmsg.set = add;
827 		mcastmsg.count = 1;
828 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
829 		    B_FALSE) != VGEN_SUCCESS) {
830 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
831 			mutex_exit(&ldcp->cblock);
832 			goto vgen_mcast_exit;
833 		}
834 	}
835 
836 	mutex_exit(&ldcp->cblock);
837 
838 	if (add) {
839 
840 		/* expand multicast table if necessary */
841 		if (vgenp->mccount >= vgenp->mcsize) {
842 			struct ether_addr	*newtab;
843 			uint32_t		newsize;
844 
845 
846 			newsize = vgenp->mcsize * 2;
847 
848 			newtab = kmem_zalloc(newsize *
849 			    sizeof (struct ether_addr), KM_NOSLEEP);
850 			if (newtab == NULL)
851 				goto vgen_mcast_exit;
852 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
853 			    sizeof (struct ether_addr));
854 			kmem_free(vgenp->mctab,
855 			    vgenp->mcsize * sizeof (struct ether_addr));
856 
857 			vgenp->mctab = newtab;
858 			vgenp->mcsize = newsize;
859 		}
860 
861 		/* add address to the table */
862 		vgenp->mctab[vgenp->mccount++] = *addrp;
863 
864 	} else {
865 
866 		/* delete address from the table */
867 		for (i = 0; i < vgenp->mccount; i++) {
868 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
869 
870 				/*
871 				 * If there's more than one address in this
872 				 * table, delete the unwanted one by moving
873 				 * the last one in the list over top of it;
874 				 * otherwise, just remove it.
875 				 */
876 				if (vgenp->mccount > 1) {
877 					vgenp->mctab[i] =
878 					    vgenp->mctab[vgenp->mccount-1];
879 				}
880 				vgenp->mccount--;
881 				break;
882 			}
883 		}
884 	}
885 
886 	rv = DDI_SUCCESS;
887 
888 vgen_mcast_exit:
889 	RW_EXIT(&ldclp->rwlock);
890 	RW_EXIT(&plistp->rwlock);
891 
892 	mutex_exit(&vgenp->lock);
893 	return (rv);
894 }
895 
896 /* set or clear promiscuous mode on the device */
897 static int
898 vgen_promisc(void *arg, boolean_t on)
899 {
900 	_NOTE(ARGUNUSED(arg, on))
901 	return (DDI_SUCCESS);
902 }
903 
904 /* set the unicast mac address of the device */
905 static int
906 vgen_unicst(void *arg, const uint8_t *mca)
907 {
908 	_NOTE(ARGUNUSED(arg, mca))
909 	return (DDI_SUCCESS);
910 }
911 
912 /* get device statistics */
913 int
914 vgen_stat(void *arg, uint_t stat, uint64_t *val)
915 {
916 	vgen_t		*vgenp = (vgen_t *)arg;
917 	vgen_port_t	*portp;
918 	vgen_portlist_t	*plistp;
919 
920 	*val = 0;
921 
922 	plistp = &(vgenp->vgenports);
923 	READ_ENTER(&plistp->rwlock);
924 
925 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
926 		*val += vgen_port_stat(portp, stat);
927 	}
928 
929 	RW_EXIT(&plistp->rwlock);
930 
931 	return (0);
932 }
933 
934 static void
935 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
936 {
937 	 _NOTE(ARGUNUSED(arg, wq, mp))
938 }
939 
940 /* vgen internal functions */
941 /* detach all ports from the device */
942 static void
943 vgen_detach_ports(vgen_t *vgenp)
944 {
945 	vgen_port_t	*portp;
946 	vgen_portlist_t	*plistp;
947 
948 	plistp = &(vgenp->vgenports);
949 	WRITE_ENTER(&plistp->rwlock);
950 
951 	while ((portp = plistp->headp) != NULL) {
952 		vgen_port_detach(portp);
953 	}
954 
955 	RW_EXIT(&plistp->rwlock);
956 }
957 
958 /*
959  * detach the given port.
960  */
961 static void
962 vgen_port_detach(vgen_port_t *portp)
963 {
964 	vgen_t		*vgenp;
965 	vgen_ldclist_t	*ldclp;
966 	int		port_num;
967 
968 	vgenp = portp->vgenp;
969 	port_num = portp->port_num;
970 
971 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
972 
973 	/* remove it from port list */
974 	vgen_port_list_remove(portp);
975 
976 	/* detach channels from this port */
977 	ldclp = &portp->ldclist;
978 	WRITE_ENTER(&ldclp->rwlock);
979 	while (ldclp->headp) {
980 		vgen_ldc_detach(ldclp->headp);
981 	}
982 	RW_EXIT(&ldclp->rwlock);
983 
984 	if (vgenp->vsw_portp == portp) {
985 		vgenp->vsw_portp = NULL;
986 	}
987 	KMEM_FREE(portp);
988 
989 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
990 }
991 
992 /* add a port to port list */
993 static void
994 vgen_port_list_insert(vgen_port_t *portp)
995 {
996 	vgen_portlist_t *plistp;
997 	vgen_t *vgenp;
998 
999 	vgenp = portp->vgenp;
1000 	plistp = &(vgenp->vgenports);
1001 
1002 	if (plistp->headp == NULL) {
1003 		plistp->headp = portp;
1004 	} else {
1005 		plistp->tailp->nextp = portp;
1006 	}
1007 	plistp->tailp = portp;
1008 	portp->nextp = NULL;
1009 }
1010 
1011 /* remove a port from port list */
1012 static void
1013 vgen_port_list_remove(vgen_port_t *portp)
1014 {
1015 	vgen_port_t *prevp;
1016 	vgen_port_t *nextp;
1017 	vgen_portlist_t *plistp;
1018 	vgen_t *vgenp;
1019 
1020 	vgenp = portp->vgenp;
1021 
1022 	plistp = &(vgenp->vgenports);
1023 
1024 	if (plistp->headp == NULL)
1025 		return;
1026 
1027 	if (portp == plistp->headp) {
1028 		plistp->headp = portp->nextp;
1029 		if (portp == plistp->tailp)
1030 			plistp->tailp = plistp->headp;
1031 	} else {
1032 		for (prevp = plistp->headp;
1033 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1034 		    prevp = nextp)
1035 			;
1036 		if (nextp == portp) {
1037 			prevp->nextp = portp->nextp;
1038 		}
1039 		if (portp == plistp->tailp)
1040 			plistp->tailp = prevp;
1041 	}
1042 }
1043 
1044 /* lookup a port in the list based on port_num */
1045 static vgen_port_t *
1046 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1047 {
1048 	vgen_port_t *portp = NULL;
1049 
1050 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1051 		if (portp->port_num == port_num) {
1052 			break;
1053 		}
1054 	}
1055 
1056 	return (portp);
1057 }
1058 
1059 /* enable ports for transmit/receive */
1060 static void
1061 vgen_init_ports(vgen_t *vgenp)
1062 {
1063 	vgen_port_t	*portp;
1064 	vgen_portlist_t	*plistp;
1065 
1066 	plistp = &(vgenp->vgenports);
1067 	READ_ENTER(&plistp->rwlock);
1068 
1069 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1070 		vgen_port_init(portp);
1071 	}
1072 
1073 	RW_EXIT(&plistp->rwlock);
1074 }
1075 
1076 static void
1077 vgen_port_init(vgen_port_t *portp)
1078 {
1079 	vgen_t *vgenp;
1080 
1081 	vgenp = portp->vgenp;
1082 	/*
1083 	 * Create fdb entry in vnet, corresponding to the mac
1084 	 * address of this port. Note that the port specified
1085 	 * is vsw-port. This is done so that vsw-port acts
1086 	 * as the route to reach this macaddr, until the
1087 	 * channel for this port comes up (LDC_UP) and
1088 	 * handshake is done successfully.
1089 	 * eg, if the peer is OBP-vnet, it may not bring the
1090 	 * channel up for this port and may communicate via
1091 	 * vsw to reach this port.
1092 	 * Later, when Solaris-vnet comes up at the other end
1093 	 * of the channel for this port and brings up the channel,
1094 	 * it is an indication that peer vnet is capable of
1095 	 * distributed switching, so the direct route through this
1096 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1097 	 */
1098 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1099 	    vgen_tx, vgenp->vsw_portp);
1100 
1101 	if (portp == vgenp->vsw_portp) {
1102 		/*
1103 		 * create the default route entry in vnet's fdb.
1104 		 * This is the entry used by vnet to reach
1105 		 * unknown destinations, which basically goes
1106 		 * through vsw on domain0 and out through the
1107 		 * physical device bound to vsw.
1108 		 */
1109 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1110 	}
1111 
1112 	/* Bring up the channels of this port */
1113 	vgen_init_ldcs(portp);
1114 }
1115 
1116 /* disable transmit/receive on ports */
1117 static void
1118 vgen_uninit_ports(vgen_t *vgenp)
1119 {
1120 	vgen_port_t	*portp;
1121 	vgen_portlist_t	*plistp;
1122 
1123 	plistp = &(vgenp->vgenports);
1124 	READ_ENTER(&plistp->rwlock);
1125 
1126 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1127 		vgen_port_uninit(portp);
1128 	}
1129 
1130 	RW_EXIT(&plistp->rwlock);
1131 }
1132 
1133 static void
1134 vgen_port_uninit(vgen_port_t *portp)
1135 {
1136 	vgen_t *vgenp;
1137 
1138 	vgenp = portp->vgenp;
1139 
1140 	vgen_uninit_ldcs(portp);
1141 	/* delete the entry in vnet's fdb for this port */
1142 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1143 	if (portp == vgenp->vsw_portp) {
1144 		/*
1145 		 * if this is vsw-port, then delete the default
1146 		 * route entry in vnet's fdb.
1147 		 */
1148 		vnet_del_def_rte(vgenp->vnetp);
1149 	}
1150 }
1151 
1152 /* register with MD event generator */
1153 static int
1154 vgen_mdeg_reg(vgen_t *vgenp)
1155 {
1156 	mdeg_prop_spec_t	*pspecp;
1157 	mdeg_node_spec_t	*parentp;
1158 	uint_t			templatesz;
1159 	int			rv;
1160 	mdeg_handle_t		hdl;
1161 	int			i;
1162 
1163 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1164 	    DDI_PROP_DONTPASS, reg_propname, -1);
1165 	if (i == -1) {
1166 		return (DDI_FAILURE);
1167 	}
1168 	templatesz = sizeof (vgen_prop_template);
1169 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1170 	if (pspecp == NULL) {
1171 		return (DDI_FAILURE);
1172 	}
1173 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1174 	if (parentp == NULL) {
1175 		kmem_free(pspecp, templatesz);
1176 		return (DDI_FAILURE);
1177 	}
1178 
1179 	bcopy(vgen_prop_template, pspecp, templatesz);
1180 
1181 	/*
1182 	 * NOTE: The instance here refers to the value of "reg" property and
1183 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1184 	 */
1185 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1186 
1187 	parentp->namep = "virtual-device";
1188 	parentp->specp = pspecp;
1189 
1190 	/* save parentp in vgen_t */
1191 	vgenp->mdeg_parentp = parentp;
1192 
1193 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1194 	if (rv != MDEG_SUCCESS) {
1195 		DERR(vgenp, NULL, "mdeg_register failed\n");
1196 		KMEM_FREE(parentp);
1197 		kmem_free(pspecp, templatesz);
1198 		vgenp->mdeg_parentp = NULL;
1199 		return (DDI_FAILURE);
1200 	}
1201 
1202 	/* save mdeg handle in vgen_t */
1203 	vgenp->mdeg_hdl = hdl;
1204 
1205 	return (DDI_SUCCESS);
1206 }
1207 
1208 /* unregister with MD event generator */
1209 static void
1210 vgen_mdeg_unreg(vgen_t *vgenp)
1211 {
1212 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1213 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1214 	KMEM_FREE(vgenp->mdeg_parentp);
1215 	vgenp->mdeg_parentp = NULL;
1216 	vgenp->mdeg_hdl = NULL;
1217 }
1218 
1219 /* callback function registered with MD event generator */
1220 static int
1221 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1222 {
1223 	int idx;
1224 	int vsw_idx = -1;
1225 	uint64_t val;
1226 	vgen_t *vgenp;
1227 
1228 	if ((resp == NULL) || (cb_argp == NULL)) {
1229 		return (MDEG_FAILURE);
1230 	}
1231 
1232 	vgenp = (vgen_t *)cb_argp;
1233 	DBG1(vgenp, NULL, "enter\n");
1234 
1235 	mutex_enter(&vgenp->lock);
1236 
1237 	DBG1(vgenp, NULL, "ports: removed(%x), "
1238 	"added(%x), updated(%x)\n", resp->removed.nelem,
1239 	    resp->added.nelem, resp->match_curr.nelem);
1240 
1241 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1242 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1243 		    resp->removed.mdep[idx]);
1244 	}
1245 
1246 	if (vgenp->vsw_portp == NULL) {
1247 		/*
1248 		 * find vsw_port and add it first, because other ports need
1249 		 * this when adding fdb entry (see vgen_port_init()).
1250 		 */
1251 		for (idx = 0; idx < resp->added.nelem; idx++) {
1252 			if (!(md_get_prop_val(resp->added.mdp,
1253 			    resp->added.mdep[idx], swport_propname, &val))) {
1254 				if (val == 0) {
1255 					/*
1256 					 * This port is connected to the
1257 					 * vsw on dom0.
1258 					 */
1259 					vsw_idx = idx;
1260 					if (vgen_add_port(vgenp,
1261 					    resp->added.mdp,
1262 					    resp->added.mdep[idx]) !=
1263 					    DDI_SUCCESS) {
1264 						cmn_err(CE_NOTE, "vnet%d Could "
1265 						    "not initialize virtual "
1266 						    "switch port.",
1267 						    ddi_get_instance(vgenp->
1268 						    vnetdip));
1269 						mutex_exit(&vgenp->lock);
1270 						return (MDEG_FAILURE);
1271 					}
1272 					break;
1273 				}
1274 			}
1275 		}
1276 		if (vsw_idx == -1) {
1277 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1278 			mutex_exit(&vgenp->lock);
1279 			return (MDEG_FAILURE);
1280 		}
1281 	}
1282 
1283 	for (idx = 0; idx < resp->added.nelem; idx++) {
1284 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1285 			continue;
1286 
1287 		/* If this port can't be added just skip it. */
1288 		(void) vgen_add_port(vgenp, resp->added.mdp,
1289 		    resp->added.mdep[idx]);
1290 	}
1291 
1292 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1293 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1294 		    resp->match_curr.mdep[idx],
1295 		    resp->match_prev.mdp,
1296 		    resp->match_prev.mdep[idx]);
1297 	}
1298 
1299 	mutex_exit(&vgenp->lock);
1300 	DBG1(vgenp, NULL, "exit\n");
1301 	return (MDEG_SUCCESS);
1302 }
1303 
1304 /* add a new port to the device */
1305 static int
1306 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1307 {
1308 	uint64_t	port_num;
1309 	uint64_t	*ldc_ids;
1310 	uint64_t	macaddr;
1311 	uint64_t	val;
1312 	int		num_ldcs;
1313 	int		vsw_port = B_FALSE;
1314 	int		i;
1315 	int		addrsz;
1316 	int		num_nodes = 0;
1317 	int		listsz = 0;
1318 	int		rv = DDI_SUCCESS;
1319 	mde_cookie_t	*listp = NULL;
1320 	uint8_t		*addrp;
1321 	struct ether_addr	ea;
1322 
1323 	/* read "id" property to get the port number */
1324 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1325 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1326 		return (DDI_FAILURE);
1327 	}
1328 
1329 	/*
1330 	 * Find the channel endpoint node(s) under this port node.
1331 	 */
1332 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1333 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
1334 		    num_nodes);
1335 		return (DDI_FAILURE);
1336 	}
1337 
1338 	/* allocate space for node list */
1339 	listsz = num_nodes * sizeof (mde_cookie_t);
1340 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1341 	if (listp == NULL)
1342 		return (DDI_FAILURE);
1343 
1344 	num_ldcs = md_scan_dag(mdp, mdex,
1345 	    md_find_name(mdp, channel_propname),
1346 	    md_find_name(mdp, "fwd"), listp);
1347 
1348 	if (num_ldcs <= 0) {
1349 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
1350 		kmem_free(listp, listsz);
1351 		return (DDI_FAILURE);
1352 	}
1353 
1354 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
1355 
1356 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1357 	if (ldc_ids == NULL) {
1358 		kmem_free(listp, listsz);
1359 		return (DDI_FAILURE);
1360 	}
1361 
1362 	for (i = 0; i < num_ldcs; i++) {
1363 		/* read channel ids */
1364 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1365 			DWARN(vgenp, NULL, "prop(%s) not found\n",
1366 			    id_propname);
1367 			kmem_free(listp, listsz);
1368 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1369 			return (DDI_FAILURE);
1370 		}
1371 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
1372 	}
1373 
1374 	kmem_free(listp, listsz);
1375 
1376 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1377 	    &addrsz)) {
1378 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
1379 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1380 		return (DDI_FAILURE);
1381 	}
1382 
1383 	if (addrsz < ETHERADDRL) {
1384 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
1385 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1386 		return (DDI_FAILURE);
1387 	}
1388 
1389 	macaddr = *((uint64_t *)addrp);
1390 
1391 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
1392 
1393 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1394 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1395 		macaddr >>= 8;
1396 	}
1397 
1398 	if (vgenp->vsw_portp == NULL) {
1399 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1400 			if (val == 0) {
1401 				/* This port is connected to the vsw on dom0 */
1402 				vsw_port = B_TRUE;
1403 			}
1404 		}
1405 	}
1406 	if (vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1407 	    &ea, vsw_port) != DDI_SUCCESS) {
1408 		cmn_err(CE_NOTE, "vnet%d failed to attach port %d remote MAC "
1409 		    "address %s", ddi_get_instance(vgenp->vnetdip),
1410 		    (int)port_num, ether_sprintf(&ea));
1411 		rv = DDI_FAILURE;
1412 	}
1413 
1414 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1415 
1416 	return (rv);
1417 }
1418 
1419 /* remove a port from the device */
1420 static int
1421 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1422 {
1423 	uint64_t	port_num;
1424 	vgen_port_t	*portp;
1425 	vgen_portlist_t	*plistp;
1426 
1427 	/* read "id" property to get the port number */
1428 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1429 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1430 		return (DDI_FAILURE);
1431 	}
1432 
1433 	plistp = &(vgenp->vgenports);
1434 
1435 	WRITE_ENTER(&plistp->rwlock);
1436 	portp = vgen_port_lookup(plistp, (int)port_num);
1437 	if (portp == NULL) {
1438 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
1439 		RW_EXIT(&plistp->rwlock);
1440 		return (DDI_FAILURE);
1441 	}
1442 
1443 	vgen_port_detach_mdeg(portp);
1444 	RW_EXIT(&plistp->rwlock);
1445 
1446 	return (DDI_SUCCESS);
1447 }
1448 
1449 /* attach a port to the device based on mdeg data */
1450 static int
1451 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1452 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1453 {
1454 	vgen_port_t		*portp;
1455 	vgen_portlist_t		*plistp;
1456 	int			i;
1457 
1458 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1459 	if (portp == NULL) {
1460 		return (DDI_FAILURE);
1461 	}
1462 	portp->vgenp = vgenp;
1463 	portp->port_num = port_num;
1464 
1465 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
1466 
1467 	portp->ldclist.num_ldcs = 0;
1468 	portp->ldclist.headp = NULL;
1469 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1470 
1471 	ether_copy(macaddr, &portp->macaddr);
1472 	for (i = 0; i < num_ids; i++) {
1473 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
1474 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
1475 			rw_destroy(&portp->ldclist.rwlock);
1476 			vgen_port_detach(portp);
1477 			return (DDI_FAILURE);
1478 		}
1479 	}
1480 
1481 	/* link it into the list of ports */
1482 	plistp = &(vgenp->vgenports);
1483 	WRITE_ENTER(&plistp->rwlock);
1484 	vgen_port_list_insert(portp);
1485 	RW_EXIT(&plistp->rwlock);
1486 
1487 	/* This port is connected to the vsw on domain0 */
1488 	if (vsw_port)
1489 		vgenp->vsw_portp = portp;
1490 
1491 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1492 		vgen_port_init(portp);
1493 	}
1494 
1495 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1496 	return (DDI_SUCCESS);
1497 }
1498 
1499 /* detach a port from the device based on mdeg data */
1500 static void
1501 vgen_port_detach_mdeg(vgen_port_t *portp)
1502 {
1503 	vgen_t *vgenp = portp->vgenp;
1504 
1505 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
1506 	/* stop the port if needed */
1507 	if (vgenp->flags & VGEN_STARTED) {
1508 		vgen_port_uninit(portp);
1509 	}
1510 	vgen_port_detach(portp);
1511 
1512 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1513 }
1514 
1515 static int
1516 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1517 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1518 {
1519 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1520 
1521 	/* NOTE: TBD */
1522 	return (DDI_SUCCESS);
1523 }
1524 
1525 static uint64_t
1526 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1527 {
1528 	vgen_ldclist_t	*ldclp;
1529 	vgen_ldc_t *ldcp;
1530 	uint64_t	val;
1531 
1532 	val = 0;
1533 	ldclp = &portp->ldclist;
1534 
1535 	READ_ENTER(&ldclp->rwlock);
1536 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1537 		val += vgen_ldc_stat(ldcp, stat);
1538 	}
1539 	RW_EXIT(&ldclp->rwlock);
1540 
1541 	return (val);
1542 }
1543 
1544 /* attach the channel corresponding to the given ldc_id to the port */
1545 static int
1546 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1547 {
1548 	vgen_t 		*vgenp;
1549 	vgen_ldclist_t	*ldclp;
1550 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1551 	ldc_attr_t 	attr;
1552 	int 		status;
1553 	ldc_status_t	istatus;
1554 	char		kname[MAXNAMELEN];
1555 	int		instance;
1556 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
1557 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1558 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1559 		AST_create_rxmblks = 0x20, AST_add_softintr = 0x40,
1560 		AST_create_rcv_thread = 0x80} attach_state;
1561 
1562 	attach_state = AST_init;
1563 	vgenp = portp->vgenp;
1564 	ldclp = &portp->ldclist;
1565 
1566 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1567 	if (ldcp == NULL) {
1568 		goto ldc_attach_failed;
1569 	}
1570 	ldcp->ldc_id = ldc_id;
1571 	ldcp->portp = portp;
1572 
1573 	attach_state |= AST_ldc_alloc;
1574 
1575 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1576 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1577 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1578 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
1579 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
1580 
1581 	attach_state |= AST_mutex_init;
1582 
1583 	attr.devclass = LDC_DEV_NT;
1584 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1585 	attr.mode = LDC_MODE_UNRELIABLE;
1586 	attr.mtu = vnet_ldc_mtu;
1587 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1588 	if (status != 0) {
1589 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
1590 		goto ldc_attach_failed;
1591 	}
1592 	attach_state |= AST_ldc_init;
1593 
1594 	if (vgen_rcv_thread_enabled) {
1595 		ldcp->rcv_thr_flags = 0;
1596 		ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
1597 		ldcp->soft_pri = PIL_6;
1598 
1599 		status = ddi_intr_add_softint(vgenp->vnetdip,
1600 		    &ldcp->soft_handle, ldcp->soft_pri,
1601 		    vgen_ldc_rcv_softintr, (void *)ldcp);
1602 		if (status != DDI_SUCCESS) {
1603 			DWARN(vgenp, ldcp, "add_softint failed, rv (%d)\n",
1604 			    status);
1605 			goto ldc_attach_failed;
1606 		}
1607 
1608 		/*
1609 		 * Initialize the soft_lock with the same priority as
1610 		 * the soft interrupt to protect from the soft interrupt.
1611 		 */
1612 		mutex_init(&ldcp->soft_lock, NULL, MUTEX_DRIVER,
1613 		    DDI_INTR_PRI(ldcp->soft_pri));
1614 		attach_state |= AST_add_softintr;
1615 
1616 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
1617 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
1618 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
1619 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
1620 
1621 		attach_state |= AST_create_rcv_thread;
1622 		if (ldcp->rcv_thread == NULL) {
1623 			DWARN(vgenp, ldcp, "Failed to create worker thread");
1624 			goto ldc_attach_failed;
1625 		}
1626 	}
1627 
1628 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1629 	if (status != 0) {
1630 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
1631 		    status);
1632 		goto ldc_attach_failed;
1633 	}
1634 	attach_state |= AST_ldc_reg_cb;
1635 
1636 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1637 	ASSERT(istatus == LDC_INIT);
1638 	ldcp->ldc_status = istatus;
1639 
1640 	/* allocate transmit resources */
1641 	status = vgen_alloc_tx_ring(ldcp);
1642 	if (status != 0) {
1643 		goto ldc_attach_failed;
1644 	}
1645 	attach_state |= AST_alloc_tx_ring;
1646 
1647 	/* allocate receive resources */
1648 	status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
1649 	    vgen_rbufsz1, vgen_rbufsz2, vgen_rbufsz3,
1650 	    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
1651 	if (status != 0) {
1652 		goto ldc_attach_failed;
1653 	}
1654 	attach_state |= AST_create_rxmblks;
1655 
1656 	/* Setup kstats for the channel */
1657 	instance = ddi_get_instance(vgenp->vnetdip);
1658 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
1659 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
1660 	if (ldcp->ksp == NULL) {
1661 		goto ldc_attach_failed;
1662 	}
1663 
1664 	/* initialize vgen_versions supported */
1665 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
1666 
1667 	/* link it into the list of channels for this port */
1668 	WRITE_ENTER(&ldclp->rwlock);
1669 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
1670 	ldcp->nextp = *prev_ldcp;
1671 	*prev_ldcp = ldcp;
1672 	ldclp->num_ldcs++;
1673 	RW_EXIT(&ldclp->rwlock);
1674 
1675 	ldcp->flags |= CHANNEL_ATTACHED;
1676 	return (DDI_SUCCESS);
1677 
1678 ldc_attach_failed:
1679 	if (attach_state & AST_ldc_reg_cb) {
1680 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1681 	}
1682 	if (attach_state & AST_add_softintr) {
1683 		(void) ddi_intr_remove_softint(ldcp->soft_handle);
1684 		mutex_destroy(&ldcp->soft_lock);
1685 	}
1686 	if (attach_state & AST_create_rcv_thread) {
1687 		if (ldcp->rcv_thread != NULL) {
1688 			vgen_stop_rcv_thread(ldcp);
1689 		}
1690 		mutex_destroy(&ldcp->rcv_thr_lock);
1691 		cv_destroy(&ldcp->rcv_thr_cv);
1692 	}
1693 	if (attach_state & AST_create_rxmblks) {
1694 		vio_mblk_pool_t *fvmp = NULL;
1695 
1696 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
1697 		ASSERT(fvmp == NULL);
1698 	}
1699 	if (attach_state & AST_alloc_tx_ring) {
1700 		vgen_free_tx_ring(ldcp);
1701 	}
1702 	if (attach_state & AST_ldc_init) {
1703 		(void) ldc_fini(ldcp->ldc_handle);
1704 	}
1705 	if (attach_state & AST_mutex_init) {
1706 		mutex_destroy(&ldcp->tclock);
1707 		mutex_destroy(&ldcp->txlock);
1708 		mutex_destroy(&ldcp->cblock);
1709 		mutex_destroy(&ldcp->wrlock);
1710 		mutex_destroy(&ldcp->rxlock);
1711 	}
1712 	if (attach_state & AST_ldc_alloc) {
1713 		KMEM_FREE(ldcp);
1714 	}
1715 	return (DDI_FAILURE);
1716 }
1717 
1718 /* detach a channel from the port */
1719 static void
1720 vgen_ldc_detach(vgen_ldc_t *ldcp)
1721 {
1722 	vgen_port_t	*portp;
1723 	vgen_t 		*vgenp;
1724 	vgen_ldc_t 	*pldcp;
1725 	vgen_ldc_t	**prev_ldcp;
1726 	vgen_ldclist_t	*ldclp;
1727 
1728 	portp = ldcp->portp;
1729 	vgenp = portp->vgenp;
1730 	ldclp = &portp->ldclist;
1731 
1732 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
1733 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
1734 		if (pldcp == ldcp) {
1735 			break;
1736 		}
1737 	}
1738 
1739 	if (pldcp == NULL) {
1740 		/* invalid ldcp? */
1741 		return;
1742 	}
1743 
1744 	if (ldcp->ldc_status != LDC_INIT) {
1745 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
1746 	}
1747 
1748 	if (ldcp->flags & CHANNEL_ATTACHED) {
1749 		ldcp->flags &= ~(CHANNEL_ATTACHED);
1750 
1751 		(void) ldc_unreg_callback(ldcp->ldc_handle);
1752 		if (ldcp->rcv_thread != NULL) {
1753 			/* First stop the receive thread */
1754 			vgen_stop_rcv_thread(ldcp);
1755 			(void) ddi_intr_remove_softint(ldcp->soft_handle);
1756 			mutex_destroy(&ldcp->soft_lock);
1757 			mutex_destroy(&ldcp->rcv_thr_lock);
1758 			cv_destroy(&ldcp->rcv_thr_cv);
1759 		}
1760 		/* Free any queued messages */
1761 		if (ldcp->rcv_mhead != NULL) {
1762 			freemsgchain(ldcp->rcv_mhead);
1763 			ldcp->rcv_mhead = NULL;
1764 		}
1765 
1766 		vgen_destroy_kstats(ldcp->ksp);
1767 		ldcp->ksp = NULL;
1768 
1769 		/*
1770 		 * if we cannot reclaim all mblks, put this
1771 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
1772 		 * device gets detached (see vgen_uninit()).
1773 		 */
1774 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
1775 
1776 		/* free transmit resources */
1777 		vgen_free_tx_ring(ldcp);
1778 
1779 		(void) ldc_fini(ldcp->ldc_handle);
1780 		mutex_destroy(&ldcp->tclock);
1781 		mutex_destroy(&ldcp->txlock);
1782 		mutex_destroy(&ldcp->cblock);
1783 		mutex_destroy(&ldcp->wrlock);
1784 		mutex_destroy(&ldcp->rxlock);
1785 
1786 		/* unlink it from the list */
1787 		*prev_ldcp = ldcp->nextp;
1788 		ldclp->num_ldcs--;
1789 		KMEM_FREE(ldcp);
1790 	}
1791 }
1792 
1793 /*
1794  * This function allocates transmit resources for the channel.
1795  * The resources consist of a transmit descriptor ring and an associated
1796  * transmit buffer ring.
1797  */
1798 static int
1799 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
1800 {
1801 	void *tbufp;
1802 	ldc_mem_info_t minfo;
1803 	uint32_t txdsize;
1804 	uint32_t tbufsize;
1805 	int status;
1806 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1807 
1808 	ldcp->num_txds = vnet_ntxds;
1809 	txdsize = sizeof (vnet_public_desc_t);
1810 	tbufsize = sizeof (vgen_private_desc_t);
1811 
1812 	/* allocate transmit buffer ring */
1813 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
1814 	if (tbufp == NULL) {
1815 		return (DDI_FAILURE);
1816 	}
1817 
1818 	/* create transmit descriptor ring */
1819 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
1820 	    &ldcp->tx_dhandle);
1821 	if (status) {
1822 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
1823 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1824 		return (DDI_FAILURE);
1825 	}
1826 
1827 	/* get the addr of descripror ring */
1828 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
1829 	if (status) {
1830 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
1831 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
1832 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1833 		ldcp->tbufp = NULL;
1834 		return (DDI_FAILURE);
1835 	}
1836 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
1837 	ldcp->tbufp = tbufp;
1838 
1839 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
1840 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
1841 
1842 	return (DDI_SUCCESS);
1843 }
1844 
1845 /* Free transmit resources for the channel */
1846 static void
1847 vgen_free_tx_ring(vgen_ldc_t *ldcp)
1848 {
1849 	int tbufsize = sizeof (vgen_private_desc_t);
1850 
1851 	/* free transmit descriptor ring */
1852 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
1853 
1854 	/* free transmit buffer ring */
1855 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
1856 	ldcp->txdp = ldcp->txdendp = NULL;
1857 	ldcp->tbufp = ldcp->tbufendp = NULL;
1858 }
1859 
1860 /* enable transmit/receive on the channels for the port */
1861 static void
1862 vgen_init_ldcs(vgen_port_t *portp)
1863 {
1864 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1865 	vgen_ldc_t	*ldcp;
1866 
1867 	READ_ENTER(&ldclp->rwlock);
1868 	ldcp =  ldclp->headp;
1869 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1870 		(void) vgen_ldc_init(ldcp);
1871 	}
1872 	RW_EXIT(&ldclp->rwlock);
1873 }
1874 
1875 /* stop transmit/receive on the channels for the port */
1876 static void
1877 vgen_uninit_ldcs(vgen_port_t *portp)
1878 {
1879 	vgen_ldclist_t	*ldclp = &portp->ldclist;
1880 	vgen_ldc_t	*ldcp;
1881 
1882 	READ_ENTER(&ldclp->rwlock);
1883 	ldcp =  ldclp->headp;
1884 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
1885 		vgen_ldc_uninit(ldcp);
1886 	}
1887 	RW_EXIT(&ldclp->rwlock);
1888 }
1889 
1890 /* enable transmit/receive on the channel */
1891 static int
1892 vgen_ldc_init(vgen_ldc_t *ldcp)
1893 {
1894 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1895 	ldc_status_t	istatus;
1896 	int		rv;
1897 	uint32_t	retries = 0;
1898 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
1899 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
1900 	init_state = ST_init;
1901 
1902 	DBG1(vgenp, ldcp, "enter\n");
1903 	LDC_LOCK(ldcp);
1904 
1905 	rv = ldc_open(ldcp->ldc_handle);
1906 	if (rv != 0) {
1907 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
1908 		goto ldcinit_failed;
1909 	}
1910 	init_state |= ST_ldc_open;
1911 
1912 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1913 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
1914 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
1915 		goto ldcinit_failed;
1916 	}
1917 	ldcp->ldc_status = istatus;
1918 
1919 	rv = vgen_init_tbufs(ldcp);
1920 	if (rv != 0) {
1921 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
1922 		goto ldcinit_failed;
1923 	}
1924 	init_state |= ST_init_tbufs;
1925 
1926 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
1927 	if (rv != 0) {
1928 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
1929 		goto ldcinit_failed;
1930 	}
1931 
1932 	init_state |= ST_cb_enable;
1933 
1934 	do {
1935 		rv = ldc_up(ldcp->ldc_handle);
1936 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
1937 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
1938 			drv_usecwait(VGEN_LDC_UP_DELAY);
1939 		}
1940 		if (retries++ >= vgen_ldcup_retries)
1941 			break;
1942 	} while (rv == EWOULDBLOCK);
1943 
1944 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1945 	if (istatus == LDC_UP) {
1946 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
1947 	}
1948 
1949 	ldcp->ldc_status = istatus;
1950 
1951 	/* initialize transmit watchdog timeout */
1952 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
1953 	    drv_usectohz(vnet_ldcwd_interval * 1000));
1954 
1955 	ldcp->hphase = -1;
1956 	ldcp->flags |= CHANNEL_STARTED;
1957 
1958 	/* if channel is already UP - start handshake */
1959 	if (istatus == LDC_UP) {
1960 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1961 		if (ldcp->portp != vgenp->vsw_portp) {
1962 			/*
1963 			 * modify fdb entry to use this port as the
1964 			 * channel is up, instead of going through the
1965 			 * vsw-port (see comments in vgen_port_init())
1966 			 */
1967 			vnet_modify_fdb(vgenp->vnetp,
1968 			    (uint8_t *)&ldcp->portp->macaddr,
1969 			    vgen_tx, ldcp->portp, B_FALSE);
1970 		}
1971 
1972 		/* Initialize local session id */
1973 		ldcp->local_sid = ddi_get_lbolt();
1974 
1975 		/* clear peer session id */
1976 		ldcp->peer_sid = 0;
1977 		ldcp->hretries = 0;
1978 
1979 		/* Initiate Handshake process with peer ldc endpoint */
1980 		vgen_reset_hphase(ldcp);
1981 
1982 		mutex_exit(&ldcp->tclock);
1983 		mutex_exit(&ldcp->txlock);
1984 		mutex_exit(&ldcp->wrlock);
1985 		vgen_handshake(vh_nextphase(ldcp));
1986 		mutex_exit(&ldcp->rxlock);
1987 		mutex_exit(&ldcp->cblock);
1988 	} else {
1989 		LDC_UNLOCK(ldcp);
1990 	}
1991 
1992 	return (DDI_SUCCESS);
1993 
1994 ldcinit_failed:
1995 	if (init_state & ST_cb_enable) {
1996 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
1997 	}
1998 	if (init_state & ST_init_tbufs) {
1999 		vgen_uninit_tbufs(ldcp);
2000 	}
2001 	if (init_state & ST_ldc_open) {
2002 		(void) ldc_close(ldcp->ldc_handle);
2003 	}
2004 	LDC_UNLOCK(ldcp);
2005 	DBG1(vgenp, ldcp, "exit\n");
2006 	return (DDI_FAILURE);
2007 }
2008 
2009 /* stop transmit/receive on the channel */
2010 static void
2011 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2012 {
2013 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2014 	int	rv;
2015 
2016 	DBG1(vgenp, ldcp, "enter\n");
2017 	LDC_LOCK(ldcp);
2018 
2019 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2020 		LDC_UNLOCK(ldcp);
2021 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2022 		return;
2023 	}
2024 
2025 	/* disable further callbacks */
2026 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2027 	if (rv != 0) {
2028 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
2029 	}
2030 
2031 	/*
2032 	 * clear handshake done bit and wait for pending tx and cb to finish.
2033 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
2034 	 */
2035 	ldcp->hphase &= ~(VH_DONE);
2036 	LDC_UNLOCK(ldcp);
2037 
2038 	/* cancel handshake watchdog timeout */
2039 	if (ldcp->htid) {
2040 		(void) untimeout(ldcp->htid);
2041 		ldcp->htid = 0;
2042 	}
2043 
2044 	/* cancel transmit watchdog timeout */
2045 	if (ldcp->wd_tid) {
2046 		(void) untimeout(ldcp->wd_tid);
2047 		ldcp->wd_tid = 0;
2048 	}
2049 
2050 	drv_usecwait(1000);
2051 
2052 	/* acquire locks again; any pending transmits and callbacks are done */
2053 	LDC_LOCK(ldcp);
2054 
2055 	vgen_reset_hphase(ldcp);
2056 
2057 	vgen_uninit_tbufs(ldcp);
2058 
2059 	rv = ldc_close(ldcp->ldc_handle);
2060 	if (rv != 0) {
2061 		DWARN(vgenp, ldcp, "ldc_close err\n");
2062 	}
2063 	ldcp->ldc_status = LDC_INIT;
2064 	ldcp->flags &= ~(CHANNEL_STARTED);
2065 
2066 	LDC_UNLOCK(ldcp);
2067 
2068 	DBG1(vgenp, ldcp, "exit\n");
2069 }
2070 
2071 /* Initialize the transmit buffer ring for the channel */
2072 static int
2073 vgen_init_tbufs(vgen_ldc_t *ldcp)
2074 {
2075 	vgen_private_desc_t	*tbufp;
2076 	vnet_public_desc_t	*txdp;
2077 	vio_dring_entry_hdr_t		*hdrp;
2078 	int 			i;
2079 	int 			rv;
2080 	caddr_t			datap = NULL;
2081 	int			ci;
2082 	uint32_t		ncookies;
2083 
2084 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2085 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2086 
2087 	datap = kmem_zalloc(ldcp->num_txds * VGEN_TXDBLK_SZ, KM_SLEEP);
2088 	ldcp->tx_datap = datap;
2089 
2090 	/*
2091 	 * for each private descriptor, allocate a ldc mem_handle which is
2092 	 * required to map the data during transmit, set the flags
2093 	 * to free (available for use by transmit routine).
2094 	 */
2095 
2096 	for (i = 0; i < ldcp->num_txds; i++) {
2097 
2098 		tbufp = &(ldcp->tbufp[i]);
2099 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2100 		    &(tbufp->memhandle));
2101 		if (rv) {
2102 			tbufp->memhandle = 0;
2103 			goto init_tbufs_failed;
2104 		}
2105 
2106 		/*
2107 		 * bind ldc memhandle to the corresponding transmit buffer.
2108 		 */
2109 		ci = ncookies = 0;
2110 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2111 		    (caddr_t)datap, VGEN_TXDBLK_SZ, LDC_SHADOW_MAP,
2112 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2113 		if (rv != 0) {
2114 			goto init_tbufs_failed;
2115 		}
2116 
2117 		/*
2118 		 * successful in binding the handle to tx data buffer.
2119 		 * set datap in the private descr to this buffer.
2120 		 */
2121 		tbufp->datap = datap;
2122 
2123 		if ((ncookies == 0) ||
2124 		    (ncookies > MAX_COOKIES)) {
2125 			goto init_tbufs_failed;
2126 		}
2127 
2128 		for (ci = 1; ci < ncookies; ci++) {
2129 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2130 			    &(tbufp->memcookie[ci]));
2131 			if (rv != 0) {
2132 				goto init_tbufs_failed;
2133 			}
2134 		}
2135 
2136 		tbufp->ncookies = ncookies;
2137 		datap += VGEN_TXDBLK_SZ;
2138 
2139 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2140 		txdp = &(ldcp->txdp[i]);
2141 		hdrp = &txdp->hdr;
2142 		hdrp->dstate = VIO_DESC_FREE;
2143 		hdrp->ack = B_FALSE;
2144 		tbufp->descp = txdp;
2145 
2146 	}
2147 
2148 	/* reset tbuf walking pointers */
2149 	ldcp->next_tbufp = ldcp->tbufp;
2150 	ldcp->cur_tbufp = ldcp->tbufp;
2151 
2152 	/* initialize tx seqnum and index */
2153 	ldcp->next_txseq = VNET_ISS;
2154 	ldcp->next_txi = 0;
2155 
2156 	ldcp->resched_peer = B_TRUE;
2157 	ldcp->resched_peer_txi = 0;
2158 
2159 	return (DDI_SUCCESS);
2160 
2161 init_tbufs_failed:;
2162 	vgen_uninit_tbufs(ldcp);
2163 	return (DDI_FAILURE);
2164 }
2165 
2166 /* Uninitialize transmit buffer ring for the channel */
2167 static void
2168 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2169 {
2170 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2171 	int 			i;
2172 
2173 	/* for each tbuf (priv_desc), free ldc mem_handle */
2174 	for (i = 0; i < ldcp->num_txds; i++) {
2175 
2176 		tbufp = &(ldcp->tbufp[i]);
2177 
2178 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2179 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2180 			tbufp->datap = NULL;
2181 		}
2182 		if (tbufp->memhandle) {
2183 			(void) ldc_mem_free_handle(tbufp->memhandle);
2184 			tbufp->memhandle = 0;
2185 		}
2186 	}
2187 
2188 	if (ldcp->tx_datap) {
2189 		/* prealloc'd tx data buffer */
2190 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_TXDBLK_SZ);
2191 		ldcp->tx_datap = NULL;
2192 	}
2193 
2194 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2195 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2196 }
2197 
2198 /* clobber tx descriptor ring */
2199 static void
2200 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2201 {
2202 	vnet_public_desc_t	*txdp;
2203 	vgen_private_desc_t	*tbufp;
2204 	vio_dring_entry_hdr_t	*hdrp;
2205 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2206 	int i;
2207 #ifdef DEBUG
2208 	int ndone = 0;
2209 #endif
2210 
2211 	for (i = 0; i < ldcp->num_txds; i++) {
2212 
2213 		tbufp = &(ldcp->tbufp[i]);
2214 		txdp = tbufp->descp;
2215 		hdrp = &txdp->hdr;
2216 
2217 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2218 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2219 #ifdef DEBUG
2220 			if (hdrp->dstate == VIO_DESC_DONE)
2221 				ndone++;
2222 #endif
2223 			hdrp->dstate = VIO_DESC_FREE;
2224 			hdrp->ack = B_FALSE;
2225 		}
2226 	}
2227 	/* reset tbuf walking pointers */
2228 	ldcp->next_tbufp = ldcp->tbufp;
2229 	ldcp->cur_tbufp = ldcp->tbufp;
2230 
2231 	/* reset tx seqnum and index */
2232 	ldcp->next_txseq = VNET_ISS;
2233 	ldcp->next_txi = 0;
2234 
2235 	ldcp->resched_peer = B_TRUE;
2236 	ldcp->resched_peer_txi = 0;
2237 
2238 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
2239 }
2240 
2241 /* clobber receive descriptor ring */
2242 static void
2243 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2244 {
2245 	ldcp->rx_dhandle = 0;
2246 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2247 	ldcp->rxdp = NULL;
2248 	ldcp->next_rxi = 0;
2249 	ldcp->num_rxds = 0;
2250 	ldcp->next_rxseq = VNET_ISS;
2251 }
2252 
2253 /* initialize receive descriptor ring */
2254 static int
2255 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2256 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2257 {
2258 	int rv;
2259 	ldc_mem_info_t minfo;
2260 
2261 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2262 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2263 	if (rv != 0) {
2264 		return (DDI_FAILURE);
2265 	}
2266 
2267 	/*
2268 	 * sucessfully mapped, now try to
2269 	 * get info about the mapped dring
2270 	 */
2271 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2272 	if (rv != 0) {
2273 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2274 		return (DDI_FAILURE);
2275 	}
2276 
2277 	/*
2278 	 * save ring address, number of descriptors.
2279 	 */
2280 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2281 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2282 	ldcp->num_rxdcookies = ncookies;
2283 	ldcp->num_rxds = num_desc;
2284 	ldcp->next_rxi = 0;
2285 	ldcp->next_rxseq = VNET_ISS;
2286 
2287 	return (DDI_SUCCESS);
2288 }
2289 
2290 /* get channel statistics */
2291 static uint64_t
2292 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2293 {
2294 	vgen_stats_t *statsp;
2295 	uint64_t val;
2296 
2297 	val = 0;
2298 	statsp = &ldcp->stats;
2299 	switch (stat) {
2300 
2301 	case MAC_STAT_MULTIRCV:
2302 		val = statsp->multircv;
2303 		break;
2304 
2305 	case MAC_STAT_BRDCSTRCV:
2306 		val = statsp->brdcstrcv;
2307 		break;
2308 
2309 	case MAC_STAT_MULTIXMT:
2310 		val = statsp->multixmt;
2311 		break;
2312 
2313 	case MAC_STAT_BRDCSTXMT:
2314 		val = statsp->brdcstxmt;
2315 		break;
2316 
2317 	case MAC_STAT_NORCVBUF:
2318 		val = statsp->norcvbuf;
2319 		break;
2320 
2321 	case MAC_STAT_IERRORS:
2322 		val = statsp->ierrors;
2323 		break;
2324 
2325 	case MAC_STAT_NOXMTBUF:
2326 		val = statsp->noxmtbuf;
2327 		break;
2328 
2329 	case MAC_STAT_OERRORS:
2330 		val = statsp->oerrors;
2331 		break;
2332 
2333 	case MAC_STAT_COLLISIONS:
2334 		break;
2335 
2336 	case MAC_STAT_RBYTES:
2337 		val = statsp->rbytes;
2338 		break;
2339 
2340 	case MAC_STAT_IPACKETS:
2341 		val = statsp->ipackets;
2342 		break;
2343 
2344 	case MAC_STAT_OBYTES:
2345 		val = statsp->obytes;
2346 		break;
2347 
2348 	case MAC_STAT_OPACKETS:
2349 		val = statsp->opackets;
2350 		break;
2351 
2352 	/* stats not relevant to ldc, return 0 */
2353 	case MAC_STAT_IFSPEED:
2354 	case ETHER_STAT_ALIGN_ERRORS:
2355 	case ETHER_STAT_FCS_ERRORS:
2356 	case ETHER_STAT_FIRST_COLLISIONS:
2357 	case ETHER_STAT_MULTI_COLLISIONS:
2358 	case ETHER_STAT_DEFER_XMTS:
2359 	case ETHER_STAT_TX_LATE_COLLISIONS:
2360 	case ETHER_STAT_EX_COLLISIONS:
2361 	case ETHER_STAT_MACXMT_ERRORS:
2362 	case ETHER_STAT_CARRIER_ERRORS:
2363 	case ETHER_STAT_TOOLONG_ERRORS:
2364 	case ETHER_STAT_XCVR_ADDR:
2365 	case ETHER_STAT_XCVR_ID:
2366 	case ETHER_STAT_XCVR_INUSE:
2367 	case ETHER_STAT_CAP_1000FDX:
2368 	case ETHER_STAT_CAP_1000HDX:
2369 	case ETHER_STAT_CAP_100FDX:
2370 	case ETHER_STAT_CAP_100HDX:
2371 	case ETHER_STAT_CAP_10FDX:
2372 	case ETHER_STAT_CAP_10HDX:
2373 	case ETHER_STAT_CAP_ASMPAUSE:
2374 	case ETHER_STAT_CAP_PAUSE:
2375 	case ETHER_STAT_CAP_AUTONEG:
2376 	case ETHER_STAT_ADV_CAP_1000FDX:
2377 	case ETHER_STAT_ADV_CAP_1000HDX:
2378 	case ETHER_STAT_ADV_CAP_100FDX:
2379 	case ETHER_STAT_ADV_CAP_100HDX:
2380 	case ETHER_STAT_ADV_CAP_10FDX:
2381 	case ETHER_STAT_ADV_CAP_10HDX:
2382 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2383 	case ETHER_STAT_ADV_CAP_PAUSE:
2384 	case ETHER_STAT_ADV_CAP_AUTONEG:
2385 	case ETHER_STAT_LP_CAP_1000FDX:
2386 	case ETHER_STAT_LP_CAP_1000HDX:
2387 	case ETHER_STAT_LP_CAP_100FDX:
2388 	case ETHER_STAT_LP_CAP_100HDX:
2389 	case ETHER_STAT_LP_CAP_10FDX:
2390 	case ETHER_STAT_LP_CAP_10HDX:
2391 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2392 	case ETHER_STAT_LP_CAP_PAUSE:
2393 	case ETHER_STAT_LP_CAP_AUTONEG:
2394 	case ETHER_STAT_LINK_ASMPAUSE:
2395 	case ETHER_STAT_LINK_PAUSE:
2396 	case ETHER_STAT_LINK_AUTONEG:
2397 	case ETHER_STAT_LINK_DUPLEX:
2398 	default:
2399 		val = 0;
2400 		break;
2401 
2402 	}
2403 	return (val);
2404 }
2405 
2406 /*
2407  * LDC channel is UP, start handshake process with peer.
2408  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2409  * function is being called from transmit routine, otherwise B_FALSE.
2410  */
2411 static void
2412 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2413 {
2414 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2415 
2416 	DBG1(vgenp, ldcp, "enter\n");
2417 
2418 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2419 
2420 	if (ldcp->portp != vgenp->vsw_portp) {
2421 		/*
2422 		 * modify fdb entry to use this port as the
2423 		 * channel is up, instead of going through the
2424 		 * vsw-port (see comments in vgen_port_init())
2425 		 */
2426 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2427 		    vgen_tx, ldcp->portp, flag);
2428 	}
2429 
2430 	/* Initialize local session id */
2431 	ldcp->local_sid = ddi_get_lbolt();
2432 
2433 	/* clear peer session id */
2434 	ldcp->peer_sid = 0;
2435 	ldcp->hretries = 0;
2436 
2437 	if (ldcp->hphase != VH_PHASE0) {
2438 		vgen_handshake_reset(ldcp);
2439 	}
2440 
2441 	/* Initiate Handshake process with peer ldc endpoint */
2442 	vgen_handshake(vh_nextphase(ldcp));
2443 
2444 	DBG1(vgenp, ldcp, "exit\n");
2445 }
2446 
2447 /*
2448  * LDC channel is Reset, terminate connection with peer and try to
2449  * bring the channel up again.
2450  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2451  * function is being called from transmit routine, otherwise B_FALSE.
2452  */
2453 static void
2454 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2455 {
2456 	ldc_status_t istatus;
2457 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2458 	int	rv;
2459 
2460 	DBG1(vgenp, ldcp, "enter\n");
2461 
2462 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2463 
2464 	if ((ldcp->portp != vgenp->vsw_portp) &&
2465 	    (vgenp->vsw_portp != NULL)) {
2466 		/*
2467 		 * modify fdb entry to use vsw-port  as the
2468 		 * channel is reset and we don't have a direct
2469 		 * link to the destination (see comments
2470 		 * in vgen_port_init()).
2471 		 */
2472 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2473 		    vgen_tx, vgenp->vsw_portp, flag);
2474 	}
2475 
2476 	if (ldcp->hphase != VH_PHASE0) {
2477 		vgen_handshake_reset(ldcp);
2478 	}
2479 
2480 	/* try to bring the channel up */
2481 	rv = ldc_up(ldcp->ldc_handle);
2482 	if (rv != 0) {
2483 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2484 	}
2485 
2486 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2487 		DWARN(vgenp, ldcp, "ldc_status err\n");
2488 	} else {
2489 		ldcp->ldc_status = istatus;
2490 	}
2491 
2492 	/* if channel is already UP - restart handshake */
2493 	if (ldcp->ldc_status == LDC_UP) {
2494 		vgen_handle_evt_up(ldcp, flag);
2495 	}
2496 
2497 	DBG1(vgenp, ldcp, "exit\n");
2498 }
2499 
2500 /* Interrupt handler for the channel */
2501 static uint_t
2502 vgen_ldc_cb(uint64_t event, caddr_t arg)
2503 {
2504 	_NOTE(ARGUNUSED(event))
2505 	vgen_ldc_t	*ldcp;
2506 	vgen_t		*vgenp;
2507 	ldc_status_t 	istatus;
2508 	mblk_t		*bp = NULL;
2509 	vgen_stats_t	*statsp;
2510 
2511 	ldcp = (vgen_ldc_t *)arg;
2512 	vgenp = LDC_TO_VGEN(ldcp);
2513 	statsp = &ldcp->stats;
2514 
2515 	DBG1(vgenp, ldcp, "enter\n");
2516 
2517 	mutex_enter(&ldcp->cblock);
2518 	statsp->callbacks++;
2519 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2520 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
2521 		    ldcp->ldc_status);
2522 		mutex_exit(&ldcp->cblock);
2523 		return (LDC_SUCCESS);
2524 	}
2525 
2526 	/*
2527 	 * NOTE: not using switch() as event could be triggered by
2528 	 * a state change and a read request. Also the ordering	of the
2529 	 * check for the event types is deliberate.
2530 	 */
2531 	if (event & LDC_EVT_UP) {
2532 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2533 			DWARN(vgenp, ldcp, "ldc_status err\n");
2534 		} else {
2535 			ldcp->ldc_status = istatus;
2536 		}
2537 		ASSERT(ldcp->ldc_status == LDC_UP);
2538 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
2539 		    event, ldcp->ldc_status);
2540 
2541 		vgen_handle_evt_up(ldcp, B_FALSE);
2542 
2543 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2544 	}
2545 
2546 	if (event & LDC_EVT_READ) {
2547 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
2548 		    event, ldcp->ldc_status);
2549 
2550 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2551 
2552 		if (ldcp->rcv_thread != NULL) {
2553 			/*
2554 			 * If the receive thread is enabled, then
2555 			 * wakeup the receive thread to process the
2556 			 * LDC messages.
2557 			 */
2558 			mutex_exit(&ldcp->cblock);
2559 			mutex_enter(&ldcp->rcv_thr_lock);
2560 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
2561 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
2562 				cv_signal(&ldcp->rcv_thr_cv);
2563 			}
2564 			mutex_exit(&ldcp->rcv_thr_lock);
2565 			mutex_enter(&ldcp->cblock);
2566 		} else  {
2567 			vgen_handle_evt_read(ldcp);
2568 			bp = ldcp->rcv_mhead;
2569 			ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
2570 		}
2571 	}
2572 
2573 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2574 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2575 			DWARN(vgenp, ldcp, "ldc_status error\n");
2576 		} else {
2577 			ldcp->ldc_status = istatus;
2578 		}
2579 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
2580 		    event, ldcp->ldc_status);
2581 
2582 		vgen_handle_evt_reset(ldcp, B_FALSE);
2583 	}
2584 	mutex_exit(&ldcp->cblock);
2585 
2586 	/* send up the received packets to MAC layer */
2587 	if (bp != NULL) {
2588 		vnet_rx(vgenp->vnetp, NULL, bp);
2589 	}
2590 
2591 	if (ldcp->cancel_htid) {
2592 		/*
2593 		 * Cancel handshake timer.
2594 		 * untimeout(9F) will not return until the pending callback is
2595 		 * cancelled or has run. No problems will result from calling
2596 		 * untimeout if the handler has already completed.
2597 		 * If the timeout handler did run, then it would just
2598 		 * return as cancel_htid is set.
2599 		 */
2600 		(void) untimeout(ldcp->cancel_htid);
2601 		ldcp->cancel_htid = 0;
2602 	}
2603 	DBG1(vgenp, ldcp, "exit\n");
2604 
2605 	return (LDC_SUCCESS);
2606 }
2607 
2608 static void
2609 vgen_handle_evt_read(vgen_ldc_t *ldcp)
2610 {
2611 	int		rv;
2612 	uint64_t	ldcmsg[7];
2613 	size_t		msglen;
2614 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2615 	vio_msg_tag_t	*tagp;
2616 	ldc_status_t 	istatus;
2617 	boolean_t 	has_data;
2618 
2619 	DBG1(vgenp, ldcp, "enter\n");
2620 
2621 	/*
2622 	 * If the receive thread is enabled, then the cblock
2623 	 * need to be acquired here. If not, the vgen_ldc_cb()
2624 	 * calls this function with cblock held already.
2625 	 */
2626 	if (ldcp->rcv_thread != NULL) {
2627 		mutex_enter(&ldcp->cblock);
2628 	} else {
2629 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2630 	}
2631 
2632 vgen_evt_read:
2633 	do {
2634 		msglen = sizeof (ldcmsg);
2635 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
2636 
2637 		if (rv != 0) {
2638 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
2639 			    rv, msglen);
2640 			if (rv == ECONNRESET)
2641 				goto vgen_evtread_error;
2642 			break;
2643 		}
2644 		if (msglen == 0) {
2645 			DBG2(vgenp, ldcp, "ldc_read NODATA");
2646 			break;
2647 		}
2648 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
2649 
2650 		tagp = (vio_msg_tag_t *)ldcmsg;
2651 
2652 		if (ldcp->peer_sid) {
2653 			/*
2654 			 * check sid only after we have received peer's sid
2655 			 * in the version negotiate msg.
2656 			 */
2657 #ifdef DEBUG
2658 			if (vgen_hdbg & HDBG_BAD_SID) {
2659 				/* simulate bad sid condition */
2660 				tagp->vio_sid = 0;
2661 				vgen_hdbg &= ~(HDBG_BAD_SID);
2662 			}
2663 #endif
2664 			rv = vgen_check_sid(ldcp, tagp);
2665 			if (rv != VGEN_SUCCESS) {
2666 				/*
2667 				 * If sid mismatch is detected,
2668 				 * reset the channel.
2669 				 */
2670 				ldcp->need_ldc_reset = B_TRUE;
2671 				goto vgen_evtread_error;
2672 			}
2673 		}
2674 
2675 		switch (tagp->vio_msgtype) {
2676 		case VIO_TYPE_CTRL:
2677 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
2678 			break;
2679 
2680 		case VIO_TYPE_DATA:
2681 			rv = vgen_handle_datamsg(ldcp, tagp);
2682 			break;
2683 
2684 		case VIO_TYPE_ERR:
2685 			vgen_handle_errmsg(ldcp, tagp);
2686 			break;
2687 
2688 		default:
2689 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
2690 			    tagp->vio_msgtype);
2691 			break;
2692 		}
2693 
2694 		/*
2695 		 * If an error is encountered, stop processing and
2696 		 * handle the error.
2697 		 */
2698 		if (rv != 0) {
2699 			goto vgen_evtread_error;
2700 		}
2701 
2702 	} while (msglen);
2703 
2704 	/* check once more before exiting */
2705 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
2706 	if ((rv == 0) && (has_data == B_TRUE)) {
2707 		DTRACE_PROBE(vgen_chkq);
2708 		goto vgen_evt_read;
2709 	}
2710 
2711 vgen_evtread_error:
2712 	if (rv == ECONNRESET) {
2713 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2714 			DWARN(vgenp, ldcp, "ldc_status err\n");
2715 		} else {
2716 			ldcp->ldc_status = istatus;
2717 		}
2718 		vgen_handle_evt_reset(ldcp, B_FALSE);
2719 	} else if (rv) {
2720 		vgen_handshake_retry(ldcp);
2721 	}
2722 
2723 	/*
2724 	 * If the receive thread is not enabled, then cancel the
2725 	 * handshake timeout here.
2726 	 */
2727 	if (ldcp->rcv_thread != NULL) {
2728 		mutex_exit(&ldcp->cblock);
2729 		if (ldcp->cancel_htid) {
2730 			/*
2731 			 * Cancel handshake timer. untimeout(9F) will
2732 			 * not return until the pending callback is cancelled
2733 			 * or has run. No problems will result from calling
2734 			 * untimeout if the handler has already completed.
2735 			 * If the timeout handler did run, then it would just
2736 			 * return as cancel_htid is set.
2737 			 */
2738 			(void) untimeout(ldcp->cancel_htid);
2739 			ldcp->cancel_htid = 0;
2740 		}
2741 	}
2742 
2743 	DBG1(vgenp, ldcp, "exit\n");
2744 }
2745 
2746 /* vgen handshake functions */
2747 
2748 /* change the hphase for the channel to the next phase */
2749 static vgen_ldc_t *
2750 vh_nextphase(vgen_ldc_t *ldcp)
2751 {
2752 	if (ldcp->hphase == VH_PHASE3) {
2753 		ldcp->hphase = VH_DONE;
2754 	} else {
2755 		ldcp->hphase++;
2756 	}
2757 	return (ldcp);
2758 }
2759 
2760 /*
2761  * Check whether the given version is supported or not and
2762  * return VGEN_SUCCESS if supported.
2763  */
2764 static int
2765 vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
2766 uint16_t ver_minor)
2767 {
2768 	vgen_ver_t	*versions = ldcp->vgen_versions;
2769 	int		i = 0;
2770 
2771 	while (i < VGEN_NUM_VER) {
2772 		if ((versions[i].ver_major == 0) &&
2773 		    (versions[i].ver_minor == 0)) {
2774 			break;
2775 		}
2776 		if ((versions[i].ver_major == ver_major) &&
2777 		    (versions[i].ver_minor == ver_minor)) {
2778 			return (VGEN_SUCCESS);
2779 		}
2780 		i++;
2781 	}
2782 	return (VGEN_FAILURE);
2783 }
2784 
2785 /*
2786  * Given a version, return VGEN_SUCCESS if a lower version is supported.
2787  */
2788 static int
2789 vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
2790 {
2791 	vgen_ver_t	*versions = ldcp->vgen_versions;
2792 	int		i = 0;
2793 
2794 	while (i < VGEN_NUM_VER) {
2795 		if ((versions[i].ver_major == 0) &&
2796 		    (versions[i].ver_minor == 0)) {
2797 			break;
2798 		}
2799 		/*
2800 		 * if we support a lower minor version within the same major
2801 		 * version, or if we support a lower major version,
2802 		 * update the verp parameter with this lower version and
2803 		 * return success.
2804 		 */
2805 		if (((versions[i].ver_major == verp->ver_major) &&
2806 		    (versions[i].ver_minor < verp->ver_minor)) ||
2807 		    (versions[i].ver_major < verp->ver_major)) {
2808 			verp->ver_major = versions[i].ver_major;
2809 			verp->ver_minor = versions[i].ver_minor;
2810 			return (VGEN_SUCCESS);
2811 		}
2812 		i++;
2813 	}
2814 
2815 	return (VGEN_FAILURE);
2816 }
2817 
2818 /*
2819  * wrapper routine to send the given message over ldc using ldc_write().
2820  */
2821 static int
2822 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
2823     boolean_t caller_holds_lock)
2824 {
2825 	int	rv;
2826 	size_t	len;
2827 	uint32_t retries = 0;
2828 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2829 
2830 	len = msglen;
2831 	if ((len == 0) || (msg == NULL))
2832 		return (VGEN_FAILURE);
2833 
2834 	if (!caller_holds_lock) {
2835 		mutex_enter(&ldcp->wrlock);
2836 	}
2837 
2838 	do {
2839 		len = msglen;
2840 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
2841 		if (retries++ >= vgen_ldcwr_retries)
2842 			break;
2843 	} while (rv == EWOULDBLOCK);
2844 
2845 	if (!caller_holds_lock) {
2846 		mutex_exit(&ldcp->wrlock);
2847 	}
2848 
2849 	if (rv != 0) {
2850 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
2851 		    rv, msglen);
2852 		return (rv);
2853 	}
2854 
2855 	if (len != msglen) {
2856 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
2857 		    rv, msglen);
2858 		return (VGEN_FAILURE);
2859 	}
2860 
2861 	return (VGEN_SUCCESS);
2862 }
2863 
2864 /* send version negotiate message to the peer over ldc */
2865 static int
2866 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
2867 {
2868 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2869 	vio_ver_msg_t	vermsg;
2870 	vio_msg_tag_t	*tagp = &vermsg.tag;
2871 	int		rv;
2872 
2873 	bzero(&vermsg, sizeof (vermsg));
2874 
2875 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2876 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2877 	tagp->vio_subtype_env = VIO_VER_INFO;
2878 	tagp->vio_sid = ldcp->local_sid;
2879 
2880 	/* get version msg payload from ldcp->local */
2881 	vermsg.ver_major = ldcp->local_hparams.ver_major;
2882 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
2883 	vermsg.dev_class = ldcp->local_hparams.dev_class;
2884 
2885 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
2886 	if (rv != VGEN_SUCCESS) {
2887 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2888 		return (rv);
2889 	}
2890 
2891 	ldcp->hstate |= VER_INFO_SENT;
2892 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
2893 	    vermsg.ver_major, vermsg.ver_minor);
2894 
2895 	return (VGEN_SUCCESS);
2896 }
2897 
2898 /* send attr info message to the peer over ldc */
2899 static int
2900 vgen_send_attr_info(vgen_ldc_t *ldcp)
2901 {
2902 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2903 	vnet_attr_msg_t	attrmsg;
2904 	vio_msg_tag_t	*tagp = &attrmsg.tag;
2905 	int		rv;
2906 
2907 	bzero(&attrmsg, sizeof (attrmsg));
2908 
2909 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2910 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2911 	tagp->vio_subtype_env = VIO_ATTR_INFO;
2912 	tagp->vio_sid = ldcp->local_sid;
2913 
2914 	/* get attr msg payload from ldcp->local */
2915 	attrmsg.mtu = ldcp->local_hparams.mtu;
2916 	attrmsg.addr = ldcp->local_hparams.addr;
2917 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
2918 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
2919 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
2920 
2921 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
2922 	if (rv != VGEN_SUCCESS) {
2923 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2924 		return (rv);
2925 	}
2926 
2927 	ldcp->hstate |= ATTR_INFO_SENT;
2928 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
2929 
2930 	return (VGEN_SUCCESS);
2931 }
2932 
2933 /* send descriptor ring register message to the peer over ldc */
2934 static int
2935 vgen_send_dring_reg(vgen_ldc_t *ldcp)
2936 {
2937 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
2938 	vio_dring_reg_msg_t	msg;
2939 	vio_msg_tag_t		*tagp = &msg.tag;
2940 	int		rv;
2941 
2942 	bzero(&msg, sizeof (msg));
2943 
2944 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2945 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2946 	tagp->vio_subtype_env = VIO_DRING_REG;
2947 	tagp->vio_sid = ldcp->local_sid;
2948 
2949 	/* get dring info msg payload from ldcp->local */
2950 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
2951 	    sizeof (ldc_mem_cookie_t));
2952 	msg.ncookies = ldcp->local_hparams.num_dcookies;
2953 	msg.num_descriptors = ldcp->local_hparams.num_desc;
2954 	msg.descriptor_size = ldcp->local_hparams.desc_size;
2955 
2956 	/*
2957 	 * dring_ident is set to 0. After mapping the dring, peer sets this
2958 	 * value and sends it in the ack, which is saved in
2959 	 * vgen_handle_dring_reg().
2960 	 */
2961 	msg.dring_ident = 0;
2962 
2963 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
2964 	if (rv != VGEN_SUCCESS) {
2965 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2966 		return (rv);
2967 	}
2968 
2969 	ldcp->hstate |= DRING_INFO_SENT;
2970 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
2971 
2972 	return (VGEN_SUCCESS);
2973 }
2974 
2975 static int
2976 vgen_send_rdx_info(vgen_ldc_t *ldcp)
2977 {
2978 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2979 	vio_rdx_msg_t	rdxmsg;
2980 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
2981 	int		rv;
2982 
2983 	bzero(&rdxmsg, sizeof (rdxmsg));
2984 
2985 	tagp->vio_msgtype = VIO_TYPE_CTRL;
2986 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
2987 	tagp->vio_subtype_env = VIO_RDX;
2988 	tagp->vio_sid = ldcp->local_sid;
2989 
2990 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
2991 	if (rv != VGEN_SUCCESS) {
2992 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
2993 		return (rv);
2994 	}
2995 
2996 	ldcp->hstate |= RDX_INFO_SENT;
2997 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
2998 
2999 	return (VGEN_SUCCESS);
3000 }
3001 
3002 /* send descriptor ring data message to the peer over ldc */
3003 static int
3004 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
3005 {
3006 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3007 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
3008 	vio_msg_tag_t	*tagp = &msgp->tag;
3009 	vgen_stats_t	*statsp = &ldcp->stats;
3010 	int		rv;
3011 
3012 	bzero(msgp, sizeof (*msgp));
3013 
3014 	tagp->vio_msgtype = VIO_TYPE_DATA;
3015 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3016 	tagp->vio_subtype_env = VIO_DRING_DATA;
3017 	tagp->vio_sid = ldcp->local_sid;
3018 
3019 	msgp->seq_num = ldcp->next_txseq;
3020 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
3021 	msgp->start_idx = start;
3022 	msgp->end_idx = end;
3023 
3024 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
3025 	if (rv != VGEN_SUCCESS) {
3026 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3027 		return (rv);
3028 	}
3029 
3030 	ldcp->next_txseq++;
3031 	statsp->dring_data_msgs++;
3032 
3033 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
3034 
3035 	return (VGEN_SUCCESS);
3036 }
3037 
3038 /* send multicast addr info message to vsw */
3039 static int
3040 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3041 {
3042 	vnet_mcast_msg_t	mcastmsg;
3043 	vnet_mcast_msg_t	*msgp;
3044 	vio_msg_tag_t		*tagp;
3045 	vgen_t			*vgenp;
3046 	struct ether_addr	*mca;
3047 	int			rv;
3048 	int			i;
3049 	uint32_t		size;
3050 	uint32_t		mccount;
3051 	uint32_t		n;
3052 
3053 	msgp = &mcastmsg;
3054 	tagp = &msgp->tag;
3055 	vgenp = LDC_TO_VGEN(ldcp);
3056 
3057 	mccount = vgenp->mccount;
3058 	i = 0;
3059 
3060 	do {
3061 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3062 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3063 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3064 		tagp->vio_sid = ldcp->local_sid;
3065 
3066 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3067 		size = n * sizeof (struct ether_addr);
3068 
3069 		mca = &(vgenp->mctab[i]);
3070 		bcopy(mca, (msgp->mca), size);
3071 		msgp->set = B_TRUE;
3072 		msgp->count = n;
3073 
3074 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3075 		    B_FALSE);
3076 		if (rv != VGEN_SUCCESS) {
3077 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3078 			return (rv);
3079 		}
3080 
3081 		mccount -= n;
3082 		i += n;
3083 
3084 	} while (mccount);
3085 
3086 	return (VGEN_SUCCESS);
3087 }
3088 
3089 /* Initiate Phase 2 of handshake */
3090 static int
3091 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3092 {
3093 	int rv;
3094 	uint32_t ncookies = 0;
3095 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3096 
3097 #ifdef DEBUG
3098 	if (vgen_hdbg & HDBG_OUT_STATE) {
3099 		/* simulate out of state condition */
3100 		vgen_hdbg &= ~(HDBG_OUT_STATE);
3101 		rv = vgen_send_rdx_info(ldcp);
3102 		return (rv);
3103 	}
3104 	if (vgen_hdbg & HDBG_TIMEOUT) {
3105 		/* simulate timeout condition */
3106 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3107 		return (VGEN_SUCCESS);
3108 	}
3109 #endif
3110 	rv = vgen_send_attr_info(ldcp);
3111 	if (rv != VGEN_SUCCESS) {
3112 		return (rv);
3113 	}
3114 
3115 	/* Bind descriptor ring to the channel */
3116 	if (ldcp->num_txdcookies == 0) {
3117 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3118 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3119 		if (rv != 0) {
3120 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
3121 			    "rv(%x)\n", rv);
3122 			return (rv);
3123 		}
3124 		ASSERT(ncookies == 1);
3125 		ldcp->num_txdcookies = ncookies;
3126 	}
3127 
3128 	/* update local dring_info params */
3129 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3130 	    sizeof (ldc_mem_cookie_t));
3131 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3132 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3133 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3134 
3135 	rv = vgen_send_dring_reg(ldcp);
3136 	if (rv != VGEN_SUCCESS) {
3137 		return (rv);
3138 	}
3139 
3140 	return (VGEN_SUCCESS);
3141 }
3142 
3143 /*
3144  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3145  * This can happen after a channel comes up (status: LDC_UP) or
3146  * when handshake gets terminated due to various conditions.
3147  */
3148 static void
3149 vgen_reset_hphase(vgen_ldc_t *ldcp)
3150 {
3151 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3152 	ldc_status_t istatus;
3153 	int rv;
3154 
3155 	DBG1(vgenp, ldcp, "enter\n");
3156 	/* reset hstate and hphase */
3157 	ldcp->hstate = 0;
3158 	ldcp->hphase = VH_PHASE0;
3159 
3160 	/*
3161 	 * Save the id of pending handshake timer in cancel_htid.
3162 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
3163 	 * be cancelled after releasing cblock.
3164 	 */
3165 	if (ldcp->htid) {
3166 		ldcp->cancel_htid = ldcp->htid;
3167 		ldcp->htid = 0;
3168 	}
3169 
3170 	if (ldcp->local_hparams.dring_ready) {
3171 		ldcp->local_hparams.dring_ready = B_FALSE;
3172 	}
3173 
3174 	/* Unbind tx descriptor ring from the channel */
3175 	if (ldcp->num_txdcookies) {
3176 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3177 		if (rv != 0) {
3178 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
3179 		}
3180 		ldcp->num_txdcookies = 0;
3181 	}
3182 
3183 	if (ldcp->peer_hparams.dring_ready) {
3184 		ldcp->peer_hparams.dring_ready = B_FALSE;
3185 		/* Unmap peer's dring */
3186 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3187 		vgen_clobber_rxds(ldcp);
3188 	}
3189 
3190 	vgen_clobber_tbufs(ldcp);
3191 
3192 	/*
3193 	 * clear local handshake params and initialize.
3194 	 */
3195 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3196 
3197 	/* set version to the highest version supported */
3198 	ldcp->local_hparams.ver_major =
3199 	    ldcp->vgen_versions[0].ver_major;
3200 	ldcp->local_hparams.ver_minor =
3201 	    ldcp->vgen_versions[0].ver_minor;
3202 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3203 
3204 	/* set attr_info params */
3205 	ldcp->local_hparams.mtu = ETHERMAX;
3206 	ldcp->local_hparams.addr =
3207 	    vnet_macaddr_strtoul(vgenp->macaddr);
3208 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3209 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
3210 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3211 
3212 	/*
3213 	 * Note: dring is created, but not bound yet.
3214 	 * local dring_info params will be updated when we bind the dring in
3215 	 * vgen_handshake_phase2().
3216 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3217 	 * value and sends it in the ack, which is saved in
3218 	 * vgen_handle_dring_reg().
3219 	 */
3220 	ldcp->local_hparams.dring_ident = 0;
3221 
3222 	/* clear peer_hparams */
3223 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3224 
3225 	/* reset the channel if required */
3226 	if (ldcp->need_ldc_reset) {
3227 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3228 		ldcp->need_ldc_reset = B_FALSE;
3229 		(void) ldc_down(ldcp->ldc_handle);
3230 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3231 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
3232 		ldcp->ldc_status = istatus;
3233 
3234 		/* clear sids */
3235 		ldcp->local_sid = 0;
3236 		ldcp->peer_sid = 0;
3237 
3238 		/* try to bring the channel up */
3239 		rv = ldc_up(ldcp->ldc_handle);
3240 		if (rv != 0) {
3241 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3242 		}
3243 
3244 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3245 			DWARN(vgenp, ldcp, "ldc_status err\n");
3246 		} else {
3247 			ldcp->ldc_status = istatus;
3248 		}
3249 	}
3250 }
3251 
3252 /* wrapper function for vgen_reset_hphase */
3253 static void
3254 vgen_handshake_reset(vgen_ldc_t *ldcp)
3255 {
3256 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3257 	mutex_enter(&ldcp->rxlock);
3258 	mutex_enter(&ldcp->wrlock);
3259 	mutex_enter(&ldcp->txlock);
3260 	mutex_enter(&ldcp->tclock);
3261 
3262 	vgen_reset_hphase(ldcp);
3263 
3264 	mutex_exit(&ldcp->tclock);
3265 	mutex_exit(&ldcp->txlock);
3266 	mutex_exit(&ldcp->wrlock);
3267 	mutex_exit(&ldcp->rxlock);
3268 }
3269 
3270 /*
3271  * Initiate handshake with the peer by sending various messages
3272  * based on the handshake-phase that the channel is currently in.
3273  */
3274 static void
3275 vgen_handshake(vgen_ldc_t *ldcp)
3276 {
3277 	uint32_t hphase = ldcp->hphase;
3278 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3279 	ldc_status_t	istatus;
3280 	int	rv = 0;
3281 
3282 	switch (hphase) {
3283 
3284 	case VH_PHASE1:
3285 
3286 		/*
3287 		 * start timer, for entire handshake process, turn this timer
3288 		 * off if all phases of handshake complete successfully and
3289 		 * hphase goes to VH_DONE(below) or
3290 		 * vgen_reset_hphase() gets called or
3291 		 * channel is reset due to errors or
3292 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3293 		 */
3294 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3295 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
3296 
3297 		/* Phase 1 involves negotiating the version */
3298 		rv = vgen_send_version_negotiate(ldcp);
3299 		break;
3300 
3301 	case VH_PHASE2:
3302 		rv = vgen_handshake_phase2(ldcp);
3303 		break;
3304 
3305 	case VH_PHASE3:
3306 		rv = vgen_send_rdx_info(ldcp);
3307 		break;
3308 
3309 	case VH_DONE:
3310 		/*
3311 		 * Save the id of pending handshake timer in cancel_htid.
3312 		 * This will be checked in vgen_ldc_cb() and the handshake
3313 		 * timer will be cancelled after releasing cblock.
3314 		 */
3315 		if (ldcp->htid) {
3316 			ldcp->cancel_htid = ldcp->htid;
3317 			ldcp->htid = 0;
3318 		}
3319 		ldcp->hretries = 0;
3320 		DBG1(vgenp, ldcp, "Handshake Done\n");
3321 
3322 		if (ldcp->portp == vgenp->vsw_portp) {
3323 			/*
3324 			 * If this channel(port) is connected to vsw,
3325 			 * need to sync multicast table with vsw.
3326 			 */
3327 			mutex_exit(&ldcp->cblock);
3328 
3329 			mutex_enter(&vgenp->lock);
3330 			rv = vgen_send_mcast_info(ldcp);
3331 			mutex_exit(&vgenp->lock);
3332 
3333 			mutex_enter(&ldcp->cblock);
3334 			if (rv != VGEN_SUCCESS)
3335 				break;
3336 		}
3337 
3338 		/*
3339 		 * Check if mac layer should be notified to restart
3340 		 * transmissions. This can happen if the channel got
3341 		 * reset and vgen_clobber_tbufs() is called, while
3342 		 * need_resched is set.
3343 		 */
3344 		mutex_enter(&ldcp->tclock);
3345 		if (ldcp->need_resched) {
3346 			ldcp->need_resched = B_FALSE;
3347 			vnet_tx_update(vgenp->vnetp);
3348 		}
3349 		mutex_exit(&ldcp->tclock);
3350 
3351 		break;
3352 
3353 	default:
3354 		break;
3355 	}
3356 
3357 	if (rv == ECONNRESET) {
3358 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3359 			DWARN(vgenp, ldcp, "ldc_status err\n");
3360 		} else {
3361 			ldcp->ldc_status = istatus;
3362 		}
3363 		vgen_handle_evt_reset(ldcp, B_FALSE);
3364 	} else if (rv) {
3365 		vgen_handshake_reset(ldcp);
3366 	}
3367 }
3368 
3369 /*
3370  * Check if the current handshake phase has completed successfully and
3371  * return the status.
3372  */
3373 static int
3374 vgen_handshake_done(vgen_ldc_t *ldcp)
3375 {
3376 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3377 	uint32_t	hphase = ldcp->hphase;
3378 	int 		status = 0;
3379 
3380 	switch (hphase) {
3381 
3382 	case VH_PHASE1:
3383 		/*
3384 		 * Phase1 is done, if version negotiation
3385 		 * completed successfully.
3386 		 */
3387 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3388 		    VER_NEGOTIATED);
3389 		break;
3390 
3391 	case VH_PHASE2:
3392 		/*
3393 		 * Phase 2 is done, if attr info and dring info
3394 		 * have been exchanged successfully.
3395 		 */
3396 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3397 		    ATTR_INFO_EXCHANGED) &&
3398 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3399 		    DRING_INFO_EXCHANGED));
3400 		break;
3401 
3402 	case VH_PHASE3:
3403 		/* Phase 3 is done, if rdx msg has been exchanged */
3404 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3405 		    RDX_EXCHANGED);
3406 		break;
3407 
3408 	default:
3409 		break;
3410 	}
3411 
3412 	if (status == 0) {
3413 		return (VGEN_FAILURE);
3414 	}
3415 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
3416 	return (VGEN_SUCCESS);
3417 }
3418 
3419 /* retry handshake on failure */
3420 static void
3421 vgen_handshake_retry(vgen_ldc_t *ldcp)
3422 {
3423 	/* reset handshake phase */
3424 	vgen_handshake_reset(ldcp);
3425 
3426 	/* handshake retry is specified and the channel is UP */
3427 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
3428 		if (ldcp->hretries++ < vgen_max_hretries) {
3429 			ldcp->local_sid = ddi_get_lbolt();
3430 			vgen_handshake(vh_nextphase(ldcp));
3431 		}
3432 	}
3433 }
3434 
3435 /*
3436  * Handle a version info msg from the peer or an ACK/NACK from the peer
3437  * to a version info msg that we sent.
3438  */
3439 static int
3440 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3441 {
3442 	vgen_t		*vgenp;
3443 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3444 	int		ack = 0;
3445 	int		failed = 0;
3446 	int		idx;
3447 	vgen_ver_t	*versions = ldcp->vgen_versions;
3448 	int		rv = 0;
3449 
3450 	vgenp = LDC_TO_VGEN(ldcp);
3451 	DBG1(vgenp, ldcp, "enter\n");
3452 	switch (tagp->vio_subtype) {
3453 	case VIO_SUBTYPE_INFO:
3454 
3455 		/*  Cache sid of peer if this is the first time */
3456 		if (ldcp->peer_sid == 0) {
3457 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
3458 			    tagp->vio_sid);
3459 			ldcp->peer_sid = tagp->vio_sid;
3460 		}
3461 
3462 		if (ldcp->hphase != VH_PHASE1) {
3463 			/*
3464 			 * If we are not already in VH_PHASE1, reset to
3465 			 * pre-handshake state, and initiate handshake
3466 			 * to the peer too.
3467 			 */
3468 			vgen_handshake_reset(ldcp);
3469 			vgen_handshake(vh_nextphase(ldcp));
3470 		}
3471 		ldcp->hstate |= VER_INFO_RCVD;
3472 
3473 		/* save peer's requested values */
3474 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3475 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3476 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3477 
3478 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3479 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3480 			/* unsupported dev_class, send NACK */
3481 
3482 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3483 
3484 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3485 			tagp->vio_sid = ldcp->local_sid;
3486 			/* send reply msg back to peer */
3487 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3488 			    sizeof (*vermsg), B_FALSE);
3489 			if (rv != VGEN_SUCCESS) {
3490 				return (rv);
3491 			}
3492 			return (VGEN_FAILURE);
3493 		}
3494 
3495 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
3496 		    vermsg->ver_major,  vermsg->ver_minor);
3497 
3498 		idx = 0;
3499 
3500 		for (;;) {
3501 
3502 			if (vermsg->ver_major > versions[idx].ver_major) {
3503 
3504 				/* nack with next lower version */
3505 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3506 				vermsg->ver_major = versions[idx].ver_major;
3507 				vermsg->ver_minor = versions[idx].ver_minor;
3508 				break;
3509 			}
3510 
3511 			if (vermsg->ver_major == versions[idx].ver_major) {
3512 
3513 				/* major version match - ACK version */
3514 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3515 				ack = 1;
3516 
3517 				/*
3518 				 * lower minor version to the one this endpt
3519 				 * supports, if necessary
3520 				 */
3521 				if (vermsg->ver_minor >
3522 				    versions[idx].ver_minor) {
3523 					vermsg->ver_minor =
3524 					    versions[idx].ver_minor;
3525 					ldcp->peer_hparams.ver_minor =
3526 					    versions[idx].ver_minor;
3527 				}
3528 				break;
3529 			}
3530 
3531 			idx++;
3532 
3533 			if (idx == VGEN_NUM_VER) {
3534 
3535 				/* no version match - send NACK */
3536 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3537 				vermsg->ver_major = 0;
3538 				vermsg->ver_minor = 0;
3539 				failed = 1;
3540 				break;
3541 			}
3542 
3543 		}
3544 
3545 		tagp->vio_sid = ldcp->local_sid;
3546 
3547 		/* send reply msg back to peer */
3548 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3549 		    B_FALSE);
3550 		if (rv != VGEN_SUCCESS) {
3551 			return (rv);
3552 		}
3553 
3554 		if (ack) {
3555 			ldcp->hstate |= VER_ACK_SENT;
3556 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
3557 			    vermsg->ver_major, vermsg->ver_minor);
3558 		}
3559 		if (failed) {
3560 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
3561 			return (VGEN_FAILURE);
3562 		}
3563 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3564 
3565 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3566 
3567 			/* local and peer versions match? */
3568 			ASSERT((ldcp->local_hparams.ver_major ==
3569 			    ldcp->peer_hparams.ver_major) &&
3570 			    (ldcp->local_hparams.ver_minor ==
3571 			    ldcp->peer_hparams.ver_minor));
3572 
3573 			/* move to the next phase */
3574 			vgen_handshake(vh_nextphase(ldcp));
3575 		}
3576 
3577 		break;
3578 
3579 	case VIO_SUBTYPE_ACK:
3580 
3581 		if (ldcp->hphase != VH_PHASE1) {
3582 			/*  This should not happen. */
3583 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
3584 			return (VGEN_FAILURE);
3585 		}
3586 
3587 		/* SUCCESS - we have agreed on a version */
3588 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3589 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3590 		ldcp->hstate |= VER_ACK_RCVD;
3591 
3592 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
3593 		    vermsg->ver_major,  vermsg->ver_minor);
3594 
3595 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3596 
3597 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3598 
3599 			/* local and peer versions match? */
3600 			ASSERT((ldcp->local_hparams.ver_major ==
3601 			    ldcp->peer_hparams.ver_major) &&
3602 			    (ldcp->local_hparams.ver_minor ==
3603 			    ldcp->peer_hparams.ver_minor));
3604 
3605 			/* move to the next phase */
3606 			vgen_handshake(vh_nextphase(ldcp));
3607 		}
3608 		break;
3609 
3610 	case VIO_SUBTYPE_NACK:
3611 
3612 		if (ldcp->hphase != VH_PHASE1) {
3613 			/*  This should not happen.  */
3614 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
3615 			"Phase(%u)\n", ldcp->hphase);
3616 			return (VGEN_FAILURE);
3617 		}
3618 
3619 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
3620 		    vermsg->ver_major, vermsg->ver_minor);
3621 
3622 		/* check if version in NACK is zero */
3623 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3624 			/*
3625 			 * Version Negotiation has failed.
3626 			 */
3627 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3628 			return (VGEN_FAILURE);
3629 		}
3630 
3631 		idx = 0;
3632 
3633 		for (;;) {
3634 
3635 			if (vermsg->ver_major > versions[idx].ver_major) {
3636 				/* select next lower version */
3637 
3638 				ldcp->local_hparams.ver_major =
3639 				    versions[idx].ver_major;
3640 				ldcp->local_hparams.ver_minor =
3641 				    versions[idx].ver_minor;
3642 				break;
3643 			}
3644 
3645 			if (vermsg->ver_major == versions[idx].ver_major) {
3646 				/* major version match */
3647 
3648 				ldcp->local_hparams.ver_major =
3649 				    versions[idx].ver_major;
3650 
3651 				ldcp->local_hparams.ver_minor =
3652 				    versions[idx].ver_minor;
3653 				break;
3654 			}
3655 
3656 			idx++;
3657 
3658 			if (idx == VGEN_NUM_VER) {
3659 				/*
3660 				 * no version match.
3661 				 * Version Negotiation has failed.
3662 				 */
3663 				DWARN(vgenp, ldcp,
3664 				    "Version Negotiation Failed\n");
3665 				return (VGEN_FAILURE);
3666 			}
3667 
3668 		}
3669 
3670 		rv = vgen_send_version_negotiate(ldcp);
3671 		if (rv != VGEN_SUCCESS) {
3672 			return (rv);
3673 		}
3674 
3675 		break;
3676 	}
3677 
3678 	DBG1(vgenp, ldcp, "exit\n");
3679 	return (VGEN_SUCCESS);
3680 }
3681 
3682 /* Check if the attributes are supported */
3683 static int
3684 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
3685 {
3686 	_NOTE(ARGUNUSED(ldcp))
3687 
3688 	/*
3689 	 * currently, we support these attr values:
3690 	 * mtu of ethernet, addr_type of mac, xfer_mode of
3691 	 * ldc shared memory, ack_freq of 0 (data is acked if
3692 	 * the ack bit is set in the descriptor) and the address should
3693 	 * match the address in the port node.
3694 	 */
3695 	if ((msg->mtu != ETHERMAX) ||
3696 	    (msg->addr_type != ADDR_TYPE_MAC) ||
3697 	    (msg->xfer_mode != VIO_DRING_MODE) ||
3698 	    (msg->ack_freq > 64)) {
3699 		return (VGEN_FAILURE);
3700 	}
3701 
3702 	return (VGEN_SUCCESS);
3703 }
3704 
3705 /*
3706  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
3707  * to an attr info msg that we sent.
3708  */
3709 static int
3710 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3711 {
3712 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3713 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
3714 	int		ack = 0;
3715 	int		rv = 0;
3716 
3717 	DBG1(vgenp, ldcp, "enter\n");
3718 	if (ldcp->hphase != VH_PHASE2) {
3719 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
3720 		" Invalid Phase(%u)\n",
3721 		    tagp->vio_subtype, ldcp->hphase);
3722 		return (VGEN_FAILURE);
3723 	}
3724 	switch (tagp->vio_subtype) {
3725 	case VIO_SUBTYPE_INFO:
3726 
3727 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
3728 		ldcp->hstate |= ATTR_INFO_RCVD;
3729 
3730 		/* save peer's values */
3731 		ldcp->peer_hparams.mtu = attrmsg->mtu;
3732 		ldcp->peer_hparams.addr = attrmsg->addr;
3733 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
3734 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
3735 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
3736 
3737 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
3738 			/* unsupported attr, send NACK */
3739 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3740 		} else {
3741 			ack = 1;
3742 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3743 		}
3744 		tagp->vio_sid = ldcp->local_sid;
3745 
3746 		/* send reply msg back to peer */
3747 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
3748 		    B_FALSE);
3749 		if (rv != VGEN_SUCCESS) {
3750 			return (rv);
3751 		}
3752 
3753 		if (ack) {
3754 			ldcp->hstate |= ATTR_ACK_SENT;
3755 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
3756 		} else {
3757 			/* failed */
3758 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
3759 			return (VGEN_FAILURE);
3760 		}
3761 
3762 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3763 			vgen_handshake(vh_nextphase(ldcp));
3764 		}
3765 
3766 		break;
3767 
3768 	case VIO_SUBTYPE_ACK:
3769 
3770 		ldcp->hstate |= ATTR_ACK_RCVD;
3771 
3772 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
3773 
3774 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3775 			vgen_handshake(vh_nextphase(ldcp));
3776 		}
3777 		break;
3778 
3779 	case VIO_SUBTYPE_NACK:
3780 
3781 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
3782 		return (VGEN_FAILURE);
3783 	}
3784 	DBG1(vgenp, ldcp, "exit\n");
3785 	return (VGEN_SUCCESS);
3786 }
3787 
3788 /* Check if the dring info msg is ok */
3789 static int
3790 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
3791 {
3792 	/* check if msg contents are ok */
3793 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
3794 	    sizeof (vnet_public_desc_t))) {
3795 		return (VGEN_FAILURE);
3796 	}
3797 	return (VGEN_SUCCESS);
3798 }
3799 
3800 /*
3801  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
3802  * the peer to a dring register msg that we sent.
3803  */
3804 static int
3805 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3806 {
3807 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
3808 	ldc_mem_cookie_t dcookie;
3809 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3810 	int ack = 0;
3811 	int rv = 0;
3812 
3813 	DBG1(vgenp, ldcp, "enter\n");
3814 	if (ldcp->hphase < VH_PHASE2) {
3815 		/* dring_info can be rcvd in any of the phases after Phase1 */
3816 		DWARN(vgenp, ldcp,
3817 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
3818 		    tagp->vio_subtype, ldcp->hphase);
3819 		return (VGEN_FAILURE);
3820 	}
3821 	switch (tagp->vio_subtype) {
3822 	case VIO_SUBTYPE_INFO:
3823 
3824 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
3825 		ldcp->hstate |= DRING_INFO_RCVD;
3826 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
3827 
3828 		ASSERT(msg->ncookies == 1);
3829 
3830 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
3831 			/*
3832 			 * verified dring info msg to be ok,
3833 			 * now try to map the remote dring.
3834 			 */
3835 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
3836 			    msg->descriptor_size, &dcookie,
3837 			    msg->ncookies);
3838 			if (rv == DDI_SUCCESS) {
3839 				/* now we can ack the peer */
3840 				ack = 1;
3841 			}
3842 		}
3843 		if (ack == 0) {
3844 			/* failed, send NACK */
3845 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3846 		} else {
3847 			if (!(ldcp->peer_hparams.dring_ready)) {
3848 
3849 				/* save peer's dring_info values */
3850 				bcopy(&dcookie,
3851 				    &(ldcp->peer_hparams.dring_cookie),
3852 				    sizeof (dcookie));
3853 				ldcp->peer_hparams.num_desc =
3854 				    msg->num_descriptors;
3855 				ldcp->peer_hparams.desc_size =
3856 				    msg->descriptor_size;
3857 				ldcp->peer_hparams.num_dcookies =
3858 				    msg->ncookies;
3859 
3860 				/* set dring_ident for the peer */
3861 				ldcp->peer_hparams.dring_ident =
3862 				    (uint64_t)ldcp->rxdp;
3863 				/* return the dring_ident in ack msg */
3864 				msg->dring_ident =
3865 				    (uint64_t)ldcp->rxdp;
3866 
3867 				ldcp->peer_hparams.dring_ready = B_TRUE;
3868 			}
3869 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
3870 		}
3871 		tagp->vio_sid = ldcp->local_sid;
3872 		/* send reply msg back to peer */
3873 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
3874 		    B_FALSE);
3875 		if (rv != VGEN_SUCCESS) {
3876 			return (rv);
3877 		}
3878 
3879 		if (ack) {
3880 			ldcp->hstate |= DRING_ACK_SENT;
3881 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
3882 		} else {
3883 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
3884 			return (VGEN_FAILURE);
3885 		}
3886 
3887 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3888 			vgen_handshake(vh_nextphase(ldcp));
3889 		}
3890 
3891 		break;
3892 
3893 	case VIO_SUBTYPE_ACK:
3894 
3895 		ldcp->hstate |= DRING_ACK_RCVD;
3896 
3897 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
3898 
3899 		if (!(ldcp->local_hparams.dring_ready)) {
3900 			/* local dring is now ready */
3901 			ldcp->local_hparams.dring_ready = B_TRUE;
3902 
3903 			/* save dring_ident acked by peer */
3904 			ldcp->local_hparams.dring_ident =
3905 			    msg->dring_ident;
3906 		}
3907 
3908 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3909 			vgen_handshake(vh_nextphase(ldcp));
3910 		}
3911 
3912 		break;
3913 
3914 	case VIO_SUBTYPE_NACK:
3915 
3916 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
3917 		return (VGEN_FAILURE);
3918 	}
3919 	DBG1(vgenp, ldcp, "exit\n");
3920 	return (VGEN_SUCCESS);
3921 }
3922 
3923 /*
3924  * Handle a rdx info msg from the peer or an ACK/NACK
3925  * from the peer to a rdx info msg that we sent.
3926  */
3927 static int
3928 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3929 {
3930 	int rv = 0;
3931 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3932 
3933 	DBG1(vgenp, ldcp, "enter\n");
3934 	if (ldcp->hphase != VH_PHASE3) {
3935 		DWARN(vgenp, ldcp,
3936 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
3937 		    tagp->vio_subtype, ldcp->hphase);
3938 		return (VGEN_FAILURE);
3939 	}
3940 	switch (tagp->vio_subtype) {
3941 	case VIO_SUBTYPE_INFO:
3942 
3943 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
3944 		ldcp->hstate |= RDX_INFO_RCVD;
3945 
3946 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
3947 		tagp->vio_sid = ldcp->local_sid;
3948 		/* send reply msg back to peer */
3949 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
3950 		    B_FALSE);
3951 		if (rv != VGEN_SUCCESS) {
3952 			return (rv);
3953 		}
3954 
3955 		ldcp->hstate |= RDX_ACK_SENT;
3956 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
3957 
3958 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3959 			vgen_handshake(vh_nextphase(ldcp));
3960 		}
3961 
3962 		break;
3963 
3964 	case VIO_SUBTYPE_ACK:
3965 
3966 		ldcp->hstate |= RDX_ACK_RCVD;
3967 
3968 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
3969 
3970 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3971 			vgen_handshake(vh_nextphase(ldcp));
3972 		}
3973 		break;
3974 
3975 	case VIO_SUBTYPE_NACK:
3976 
3977 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
3978 		return (VGEN_FAILURE);
3979 	}
3980 	DBG1(vgenp, ldcp, "exit\n");
3981 	return (VGEN_SUCCESS);
3982 }
3983 
3984 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
3985 static int
3986 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3987 {
3988 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3989 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
3990 	struct ether_addr *addrp;
3991 	int count;
3992 	int i;
3993 
3994 	DBG1(vgenp, ldcp, "enter\n");
3995 	switch (tagp->vio_subtype) {
3996 
3997 	case VIO_SUBTYPE_INFO:
3998 
3999 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
4000 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
4001 		break;
4002 
4003 	case VIO_SUBTYPE_ACK:
4004 
4005 		/* success adding/removing multicast addr */
4006 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
4007 		break;
4008 
4009 	case VIO_SUBTYPE_NACK:
4010 
4011 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
4012 		if (!(msgp->set)) {
4013 			/* multicast remove request failed */
4014 			break;
4015 		}
4016 
4017 		/* multicast add request failed */
4018 		for (count = 0; count < msgp->count; count++) {
4019 			addrp = &(msgp->mca[count]);
4020 
4021 			/* delete address from the table */
4022 			for (i = 0; i < vgenp->mccount; i++) {
4023 				if (ether_cmp(addrp,
4024 				    &(vgenp->mctab[i])) == 0) {
4025 					if (vgenp->mccount > 1) {
4026 						int t = vgenp->mccount - 1;
4027 						vgenp->mctab[i] =
4028 						    vgenp->mctab[t];
4029 					}
4030 					vgenp->mccount--;
4031 					break;
4032 				}
4033 			}
4034 		}
4035 		break;
4036 
4037 	}
4038 	DBG1(vgenp, ldcp, "exit\n");
4039 
4040 	return (VGEN_SUCCESS);
4041 }
4042 
4043 /* handler for control messages received from the peer ldc end-point */
4044 static int
4045 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4046 {
4047 	int rv = 0;
4048 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4049 
4050 	DBG1(vgenp, ldcp, "enter\n");
4051 	switch (tagp->vio_subtype_env) {
4052 
4053 	case VIO_VER_INFO:
4054 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4055 		break;
4056 
4057 	case VIO_ATTR_INFO:
4058 		rv = vgen_handle_attr_info(ldcp, tagp);
4059 		break;
4060 
4061 	case VIO_DRING_REG:
4062 		rv = vgen_handle_dring_reg(ldcp, tagp);
4063 		break;
4064 
4065 	case VIO_RDX:
4066 		rv = vgen_handle_rdx_info(ldcp, tagp);
4067 		break;
4068 
4069 	case VNET_MCAST_INFO:
4070 		rv = vgen_handle_mcast_info(ldcp, tagp);
4071 		break;
4072 
4073 	}
4074 
4075 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4076 	return (rv);
4077 }
4078 
4079 /* handler for data messages received from the peer ldc end-point */
4080 static int
4081 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4082 {
4083 	int rv = 0;
4084 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4085 
4086 	DBG1(vgenp, ldcp, "enter\n");
4087 
4088 	if (ldcp->hphase != VH_DONE)
4089 		return (rv);
4090 	switch (tagp->vio_subtype_env) {
4091 	case VIO_DRING_DATA:
4092 		rv = vgen_handle_dring_data(ldcp, tagp);
4093 		break;
4094 	default:
4095 		break;
4096 	}
4097 
4098 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4099 	return (rv);
4100 }
4101 
4102 static int
4103 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4104     int32_t end, uint8_t pstate)
4105 {
4106 	int rv = 0;
4107 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4108 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4109 
4110 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4111 	tagp->vio_sid = ldcp->local_sid;
4112 	msgp->start_idx = start;
4113 	msgp->end_idx = end;
4114 	msgp->dring_process_state = pstate;
4115 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4116 	if (rv != VGEN_SUCCESS) {
4117 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4118 	}
4119 	return (rv);
4120 }
4121 
4122 static int
4123 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4124 {
4125 	int rv = 0;
4126 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4127 
4128 
4129 	DBG1(vgenp, ldcp, "enter\n");
4130 	switch (tagp->vio_subtype) {
4131 
4132 	case VIO_SUBTYPE_INFO:
4133 		/*
4134 		 * To reduce the locking contention, release the
4135 		 * cblock here and re-acquire it once we are done
4136 		 * receiving packets.
4137 		 */
4138 		mutex_exit(&ldcp->cblock);
4139 		mutex_enter(&ldcp->rxlock);
4140 		rv = vgen_handle_dring_data_info(ldcp, tagp);
4141 		mutex_exit(&ldcp->rxlock);
4142 		mutex_enter(&ldcp->cblock);
4143 		break;
4144 
4145 	case VIO_SUBTYPE_ACK:
4146 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
4147 		break;
4148 
4149 	case VIO_SUBTYPE_NACK:
4150 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
4151 		break;
4152 	}
4153 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4154 	return (rv);
4155 }
4156 
4157 static int
4158 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4159 {
4160 	uint32_t start;
4161 	int32_t end;
4162 	int rv = 0;
4163 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4164 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4165 #ifdef VGEN_HANDLE_LOST_PKTS
4166 	vgen_stats_t *statsp = &ldcp->stats;
4167 	uint32_t rxi;
4168 	int n;
4169 #endif
4170 
4171 	DBG1(vgenp, ldcp, "enter\n");
4172 
4173 	start = dringmsg->start_idx;
4174 	end = dringmsg->end_idx;
4175 	/*
4176 	 * received a data msg, which contains the start and end
4177 	 * indices of the descriptors within the rx ring holding data,
4178 	 * the seq_num of data packet corresponding to the start index,
4179 	 * and the dring_ident.
4180 	 * We can now read the contents of each of these descriptors
4181 	 * and gather data from it.
4182 	 */
4183 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
4184 	    start, end);
4185 
4186 	/* validate rx start and end indeces */
4187 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4188 	    !(CHECK_RXI(end, ldcp)))) {
4189 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
4190 		    start, end);
4191 		/* drop the message if invalid index */
4192 		return (rv);
4193 	}
4194 
4195 	/* validate dring_ident */
4196 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4197 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4198 		    dringmsg->dring_ident);
4199 		/* invalid dring_ident, drop the msg */
4200 		return (rv);
4201 	}
4202 #ifdef DEBUG
4203 	if (vgen_trigger_rxlost) {
4204 		/* drop this msg to simulate lost pkts for debugging */
4205 		vgen_trigger_rxlost = 0;
4206 		return (rv);
4207 	}
4208 #endif
4209 
4210 #ifdef	VGEN_HANDLE_LOST_PKTS
4211 
4212 	/* receive start index doesn't match expected index */
4213 	if (ldcp->next_rxi != start) {
4214 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
4215 		    ldcp->next_rxi, start);
4216 
4217 		/* calculate the number of pkts lost */
4218 		if (start >= ldcp->next_rxi) {
4219 			n = start - ldcp->next_rxi;
4220 		} else  {
4221 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
4222 		}
4223 
4224 		/*
4225 		 * sequence number of dring data message
4226 		 * is less than the next sequence number that
4227 		 * is expected:
4228 		 *
4229 		 * drop the message and the corresponding packets.
4230 		 */
4231 		if (ldcp->next_rxseq > dringmsg->seq_num) {
4232 			DWARN(vgenp, ldcp, "dropping pkts, expected "
4233 			"rxseq(0x%lx) > recvd(0x%lx)\n",
4234 			    ldcp->next_rxseq, dringmsg->seq_num);
4235 			/*
4236 			 * duplicate/multiple retransmissions from
4237 			 * sender?? drop this msg.
4238 			 */
4239 			return (rv);
4240 		}
4241 
4242 		/*
4243 		 * sequence number of dring data message
4244 		 * is greater than the next expected sequence number
4245 		 *
4246 		 * send a NACK back to the peer to indicate lost
4247 		 * packets.
4248 		 */
4249 		if (dringmsg->seq_num > ldcp->next_rxseq) {
4250 			statsp->rx_lost_pkts += n;
4251 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4252 			tagp->vio_sid = ldcp->local_sid;
4253 			/* indicate the range of lost descriptors */
4254 			dringmsg->start_idx = ldcp->next_rxi;
4255 			rxi = start;
4256 			DECR_RXI(rxi, ldcp);
4257 			dringmsg->end_idx = rxi;
4258 			/* dring ident is left unchanged */
4259 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4260 			    sizeof (*dringmsg), B_FALSE);
4261 			if (rv != VGEN_SUCCESS) {
4262 				DWARN(vgenp, ldcp,
4263 				    "vgen_sendmsg failed, stype:NACK\n");
4264 				return (rv);
4265 			}
4266 #ifdef VGEN_REXMIT
4267 			/*
4268 			 * stop further processing until peer
4269 			 * retransmits with the right index.
4270 			 * update next_rxseq expected.
4271 			 */
4272 			ldcp->next_rxseq += 1;
4273 			return (rv);
4274 #else	/* VGEN_REXMIT */
4275 			/*
4276 			 * treat this range of descrs/pkts as dropped
4277 			 * and set the new expected values for next_rxi
4278 			 * and next_rxseq. continue(below) to process
4279 			 * from the new start index.
4280 			 */
4281 			ldcp->next_rxi = start;
4282 			ldcp->next_rxseq += 1;
4283 #endif	/* VGEN_REXMIT */
4284 
4285 		} else if (dringmsg->seq_num == ldcp->next_rxseq) {
4286 			/*
4287 			 * expected and received seqnums match, but
4288 			 * the descriptor indeces don't?
4289 			 *
4290 			 * restart handshake with peer.
4291 			 */
4292 			DWARN(vgenp, ldcp, "next_rxseq(0x%lx)=="
4293 			    "seq_num(0x%lx)\n", ldcp->next_rxseq,
4294 			    dringmsg->seq_num);
4295 
4296 		}
4297 
4298 	} else {
4299 		/* expected and start dring indeces match */
4300 
4301 		if (dringmsg->seq_num != ldcp->next_rxseq) {
4302 
4303 			/* seqnums don't match */
4304 
4305 			DWARN(vgenp, ldcp,
4306 			    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
4307 			    ldcp->next_rxseq, dringmsg->seq_num);
4308 		}
4309 	}
4310 
4311 #endif	/* VGEN_HANDLE_LOST_PKTS */
4312 
4313 	/* Now receive messages */
4314 	rv = vgen_process_dring_data(ldcp, tagp);
4315 
4316 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4317 	return (rv);
4318 }
4319 
4320 static int
4321 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4322 {
4323 	boolean_t set_ack_start = B_FALSE;
4324 	uint32_t start;
4325 	uint32_t ack_end;
4326 	uint32_t next_rxi;
4327 	uint32_t rxi;
4328 	int count = 0;
4329 	int rv = 0;
4330 	uint32_t retries = 0;
4331 	vgen_stats_t *statsp;
4332 	vnet_public_desc_t *rxdp;
4333 	vio_dring_entry_hdr_t *hdrp;
4334 	mblk_t *bp = NULL;
4335 	mblk_t *bpt = NULL;
4336 	uint32_t ack_start;
4337 	uint32_t datalen;
4338 	uint32_t ncookies;
4339 	boolean_t rxd_err = B_FALSE;
4340 	mblk_t *mp = NULL;
4341 	size_t nbytes;
4342 	boolean_t ack_needed = B_FALSE;
4343 	size_t nread;
4344 	uint64_t off = 0;
4345 	struct ether_header *ehp;
4346 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4347 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4348 
4349 	DBG1(vgenp, ldcp, "enter\n");
4350 
4351 	statsp = &ldcp->stats;
4352 	start = dringmsg->start_idx;
4353 
4354 	/*
4355 	 * start processing the descriptors from the specified
4356 	 * start index, up to the index a descriptor is not ready
4357 	 * to be processed or we process the entire descriptor ring
4358 	 * and wrap around upto the start index.
4359 	 */
4360 
4361 	/* need to set the start index of descriptors to be ack'd */
4362 	set_ack_start = B_TRUE;
4363 
4364 	/* index upto which we have ack'd */
4365 	ack_end = start;
4366 	DECR_RXI(ack_end, ldcp);
4367 
4368 	next_rxi = rxi =  start;
4369 	do {
4370 vgen_recv_retry:
4371 		rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4372 		if (rv != 0) {
4373 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
4374 			    " rv(%d)\n", rv);
4375 			statsp->ierrors++;
4376 			return (rv);
4377 		}
4378 
4379 		rxdp = &(ldcp->rxdp[rxi]);
4380 		hdrp = &rxdp->hdr;
4381 
4382 		if (hdrp->dstate != VIO_DESC_READY) {
4383 			/*
4384 			 * Before waiting and retry here, queue
4385 			 * the messages that are received already.
4386 			 * This will help the soft interrupt to
4387 			 * send them up with less latency.
4388 			 */
4389 			if (bp != NULL) {
4390 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4391 				vgen_ldc_queue_data(ldcp, bp, bpt);
4392 				count = 0;
4393 				bp = bpt = NULL;
4394 			}
4395 			/*
4396 			 * descriptor is not ready.
4397 			 * retry descriptor acquire, stop processing
4398 			 * after max # retries.
4399 			 */
4400 			if (retries == vgen_recv_retries)
4401 				break;
4402 			retries++;
4403 			drv_usecwait(vgen_recv_delay);
4404 			goto vgen_recv_retry;
4405 		}
4406 		retries = 0;
4407 
4408 		if (set_ack_start) {
4409 			/*
4410 			 * initialize the start index of the range
4411 			 * of descriptors to be ack'd.
4412 			 */
4413 			ack_start = rxi;
4414 			set_ack_start = B_FALSE;
4415 		}
4416 
4417 		datalen = rxdp->nbytes;
4418 		ncookies = rxdp->ncookies;
4419 		if ((datalen < ETHERMIN) ||
4420 		    (ncookies == 0) ||
4421 		    (ncookies > MAX_COOKIES)) {
4422 			rxd_err = B_TRUE;
4423 		} else {
4424 			/*
4425 			 * Try to allocate an mblk from the free pool
4426 			 * of recv mblks for the channel.
4427 			 * If this fails, use allocb().
4428 			 */
4429 			nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4430 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
4431 			if (!mp) {
4432 				/*
4433 				 * The data buffer returned by
4434 				 * allocb(9F) is 8byte aligned. We
4435 				 * allocate extra 8 bytes to ensure
4436 				 * size is multiple of 8 bytes for
4437 				 * ldc_mem_copy().
4438 				 */
4439 				statsp->rx_vio_allocb_fail++;
4440 				mp = allocb(VNET_IPALIGN + datalen + 8,
4441 				    BPRI_MED);
4442 			}
4443 		}
4444 		if ((rxd_err) || (mp == NULL)) {
4445 			/*
4446 			 * rxd_err or allocb() failure,
4447 			 * drop this packet, get next.
4448 			 */
4449 			if (rxd_err) {
4450 				statsp->ierrors++;
4451 				rxd_err = B_FALSE;
4452 			} else {
4453 				statsp->rx_allocb_fail++;
4454 			}
4455 
4456 			ack_needed = hdrp->ack;
4457 
4458 			/* set descriptor done bit */
4459 			hdrp->dstate = VIO_DESC_DONE;
4460 
4461 			rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4462 			    rxi, rxi);
4463 			if (rv != 0) {
4464 				DWARN(vgenp, ldcp,
4465 				    "ldc_mem_dring_release err rv(%d)\n", rv);
4466 				return (rv);
4467 			}
4468 
4469 			if (ack_needed) {
4470 				ack_needed = B_FALSE;
4471 				/*
4472 				 * sender needs ack for this packet,
4473 				 * ack pkts upto this index.
4474 				 */
4475 				ack_end = rxi;
4476 
4477 				rv = vgen_send_dring_ack(ldcp, tagp,
4478 				    ack_start, ack_end,
4479 				    VIO_DP_ACTIVE);
4480 				if (rv != VGEN_SUCCESS) {
4481 					goto error_ret;
4482 				}
4483 
4484 				/* need to set new ack start index */
4485 				set_ack_start = B_TRUE;
4486 			}
4487 			goto vgen_next_rxi;
4488 		}
4489 
4490 		nread = nbytes;
4491 		rv = ldc_mem_copy(ldcp->ldc_handle,
4492 		    (caddr_t)mp->b_rptr, off, &nread,
4493 		    rxdp->memcookie, ncookies, LDC_COPY_IN);
4494 
4495 		/* if ldc_mem_copy() failed */
4496 		if (rv) {
4497 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
4498 			statsp->ierrors++;
4499 			freemsg(mp);
4500 			goto error_ret;
4501 		}
4502 
4503 		ack_needed = hdrp->ack;
4504 		hdrp->dstate = VIO_DESC_DONE;
4505 
4506 		rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4507 		if (rv != 0) {
4508 			DWARN(vgenp, ldcp,
4509 			    "ldc_mem_dring_release err rv(%d)\n", rv);
4510 			goto error_ret;
4511 		}
4512 
4513 		mp->b_rptr += VNET_IPALIGN;
4514 
4515 		if (ack_needed) {
4516 			ack_needed = B_FALSE;
4517 			/*
4518 			 * sender needs ack for this packet,
4519 			 * ack pkts upto this index.
4520 			 */
4521 			ack_end = rxi;
4522 
4523 			rv = vgen_send_dring_ack(ldcp, tagp,
4524 			    ack_start, ack_end, VIO_DP_ACTIVE);
4525 			if (rv != VGEN_SUCCESS) {
4526 				goto error_ret;
4527 			}
4528 
4529 			/* need to set new ack start index */
4530 			set_ack_start = B_TRUE;
4531 		}
4532 
4533 		if (nread != nbytes) {
4534 			DWARN(vgenp, ldcp,
4535 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4536 			    nread, nbytes);
4537 			statsp->ierrors++;
4538 			freemsg(mp);
4539 			goto vgen_next_rxi;
4540 		}
4541 
4542 		/* point to the actual end of data */
4543 		mp->b_wptr = mp->b_rptr + datalen;
4544 
4545 		/* update stats */
4546 		statsp->ipackets++;
4547 		statsp->rbytes += datalen;
4548 		ehp = (struct ether_header *)mp->b_rptr;
4549 		if (IS_BROADCAST(ehp))
4550 			statsp->brdcstrcv++;
4551 		else if (IS_MULTICAST(ehp))
4552 			statsp->multircv++;
4553 
4554 		/* build a chain of received packets */
4555 		if (bp == NULL) {
4556 			/* first pkt */
4557 			bp = mp;
4558 			bpt = bp;
4559 			bpt->b_next = NULL;
4560 		} else {
4561 			mp->b_next = NULL;
4562 			bpt->b_next = mp;
4563 			bpt = mp;
4564 		}
4565 
4566 		if (count++ > vgen_chain_len) {
4567 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4568 			vgen_ldc_queue_data(ldcp, bp, bpt);
4569 			count = 0;
4570 			bp = bpt = NULL;
4571 		}
4572 
4573 vgen_next_rxi:
4574 		/* update end index of range of descrs to be ack'd */
4575 		ack_end = rxi;
4576 
4577 		/* update the next index to be processed */
4578 		INCR_RXI(next_rxi, ldcp);
4579 		if (next_rxi == start) {
4580 			/*
4581 			 * processed the entire descriptor ring upto
4582 			 * the index at which we started.
4583 			 */
4584 			break;
4585 		}
4586 
4587 		rxi = next_rxi;
4588 
4589 	_NOTE(CONSTCOND)
4590 	} while (1);
4591 
4592 	/*
4593 	 * send an ack message to peer indicating that we have stopped
4594 	 * processing descriptors.
4595 	 */
4596 	if (set_ack_start) {
4597 		/*
4598 		 * We have ack'd upto some index and we have not
4599 		 * processed any descriptors beyond that index.
4600 		 * Use the last ack'd index as both the start and
4601 		 * end of range of descrs being ack'd.
4602 		 * Note: This results in acking the last index twice
4603 		 * and should be harmless.
4604 		 */
4605 		ack_start = ack_end;
4606 	}
4607 
4608 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4609 	    VIO_DP_STOPPED);
4610 	if (rv != VGEN_SUCCESS) {
4611 		goto error_ret;
4612 	}
4613 
4614 	/* save new recv index and expected seqnum of next dring msg */
4615 	ldcp->next_rxi = next_rxi;
4616 	ldcp->next_rxseq += 1;
4617 
4618 error_ret:
4619 	/* queue the packets received so far */
4620 	if (bp != NULL) {
4621 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4622 		vgen_ldc_queue_data(ldcp, bp, bpt);
4623 		bp = bpt = NULL;
4624 	}
4625 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4626 	return (rv);
4627 
4628 }
4629 
4630 static int
4631 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4632 {
4633 	int rv = 0;
4634 	uint32_t start;
4635 	int32_t end;
4636 	uint32_t txi;
4637 	boolean_t ready_txd = B_FALSE;
4638 	vgen_stats_t *statsp;
4639 	vgen_private_desc_t *tbufp;
4640 	vnet_public_desc_t *txdp;
4641 	vio_dring_entry_hdr_t *hdrp;
4642 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4643 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4644 
4645 	DBG1(vgenp, ldcp, "enter\n");
4646 	start = dringmsg->start_idx;
4647 	end = dringmsg->end_idx;
4648 	statsp = &ldcp->stats;
4649 
4650 	/*
4651 	 * received an ack corresponding to a specific descriptor for
4652 	 * which we had set the ACK bit in the descriptor (during
4653 	 * transmit). This enables us to reclaim descriptors.
4654 	 */
4655 
4656 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
4657 
4658 	/* validate start and end indeces in the tx ack msg */
4659 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4660 		/* drop the message if invalid index */
4661 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
4662 		    start, end);
4663 		return (rv);
4664 	}
4665 	/* validate dring_ident */
4666 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4667 		/* invalid dring_ident, drop the msg */
4668 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4669 		    dringmsg->dring_ident);
4670 		return (rv);
4671 	}
4672 	statsp->dring_data_acks++;
4673 
4674 	/* reclaim descriptors that are done */
4675 	vgen_reclaim(ldcp);
4676 
4677 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
4678 		/*
4679 		 * receiver continued processing descriptors after
4680 		 * sending us the ack.
4681 		 */
4682 		return (rv);
4683 	}
4684 
4685 	statsp->dring_stopped_acks++;
4686 
4687 	/* receiver stopped processing descriptors */
4688 	mutex_enter(&ldcp->wrlock);
4689 	mutex_enter(&ldcp->tclock);
4690 
4691 	/*
4692 	 * determine if there are any pending tx descriptors
4693 	 * ready to be processed by the receiver(peer) and if so,
4694 	 * send a message to the peer to restart receiving.
4695 	 */
4696 	ready_txd = B_FALSE;
4697 
4698 	/*
4699 	 * using the end index of the descriptor range for which
4700 	 * we received the ack, check if the next descriptor is
4701 	 * ready.
4702 	 */
4703 	txi = end;
4704 	INCR_TXI(txi, ldcp);
4705 	tbufp = &ldcp->tbufp[txi];
4706 	txdp = tbufp->descp;
4707 	hdrp = &txdp->hdr;
4708 	if (hdrp->dstate == VIO_DESC_READY) {
4709 		ready_txd = B_TRUE;
4710 	} else {
4711 		/*
4712 		 * descr next to the end of ack'd descr range is not
4713 		 * ready.
4714 		 * starting from the current reclaim index, check
4715 		 * if any descriptor is ready.
4716 		 */
4717 
4718 		txi = ldcp->cur_tbufp - ldcp->tbufp;
4719 		tbufp = &ldcp->tbufp[txi];
4720 
4721 		txdp = tbufp->descp;
4722 		hdrp = &txdp->hdr;
4723 		if (hdrp->dstate == VIO_DESC_READY) {
4724 			ready_txd = B_TRUE;
4725 		}
4726 
4727 	}
4728 
4729 	if (ready_txd) {
4730 		/*
4731 		 * we have tx descriptor(s) ready to be
4732 		 * processed by the receiver.
4733 		 * send a message to the peer with the start index
4734 		 * of ready descriptors.
4735 		 */
4736 		rv = vgen_send_dring_data(ldcp, txi, -1);
4737 		if (rv != VGEN_SUCCESS) {
4738 			ldcp->resched_peer = B_TRUE;
4739 			ldcp->resched_peer_txi = txi;
4740 			mutex_exit(&ldcp->tclock);
4741 			mutex_exit(&ldcp->wrlock);
4742 			return (rv);
4743 		}
4744 	} else {
4745 		/*
4746 		 * no ready tx descriptors. set the flag to send a
4747 		 * message to peer when tx descriptors are ready in
4748 		 * transmit routine.
4749 		 */
4750 		ldcp->resched_peer = B_TRUE;
4751 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
4752 	}
4753 
4754 	mutex_exit(&ldcp->tclock);
4755 	mutex_exit(&ldcp->wrlock);
4756 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4757 	return (rv);
4758 }
4759 
4760 static int
4761 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4762 {
4763 	int rv = 0;
4764 	uint32_t start;
4765 	int32_t end;
4766 	uint32_t txi;
4767 	vnet_public_desc_t *txdp;
4768 	vio_dring_entry_hdr_t *hdrp;
4769 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4770 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4771 #ifdef VGEN_REXMIT
4772 	vgen_stats_t *statsp = &ldcp->stats;
4773 #endif
4774 
4775 	DBG1(vgenp, ldcp, "enter\n");
4776 	start = dringmsg->start_idx;
4777 	end = dringmsg->end_idx;
4778 
4779 	/*
4780 	 * peer sent a NACK msg to indicate lost packets.
4781 	 * The start and end correspond to the range of descriptors
4782 	 * for which the peer didn't receive a dring data msg and so
4783 	 * didn't receive the corresponding data.
4784 	 */
4785 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
4786 
4787 	/* validate start and end indeces in the tx nack msg */
4788 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
4789 		/* drop the message if invalid index */
4790 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
4791 		    start, end);
4792 		return (rv);
4793 	}
4794 	/* validate dring_ident */
4795 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
4796 		/* invalid dring_ident, drop the msg */
4797 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4798 		    dringmsg->dring_ident);
4799 		return (rv);
4800 	}
4801 	mutex_enter(&ldcp->txlock);
4802 	mutex_enter(&ldcp->tclock);
4803 
4804 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
4805 		/* no busy descriptors, bogus nack ? */
4806 		mutex_exit(&ldcp->tclock);
4807 		mutex_exit(&ldcp->txlock);
4808 		return (rv);
4809 	}
4810 
4811 #ifdef VGEN_REXMIT
4812 	/* send a new dring data msg including the lost descrs */
4813 	end = ldcp->next_tbufp - ldcp->tbufp;
4814 	DECR_TXI(end, ldcp);
4815 	rv = vgen_send_dring_data(ldcp, start, end);
4816 	if (rv != 0) {
4817 		/*
4818 		 * vgen_send_dring_data() error: drop all packets
4819 		 * in this descr range
4820 		 */
4821 		DWARN(vgenp, ldcp, "vgen_send_dring_data failed: rv(%d)\n", rv);
4822 		for (txi = start; txi <= end; ) {
4823 			tbufp = &(ldcp->tbufp[txi]);
4824 			txdp = tbufp->descp;
4825 			hdrp = &txdp->hdr;
4826 			tbufp->flags = VGEN_PRIV_DESC_FREE;
4827 			hdrp->dstate = VIO_DESC_FREE;
4828 			hdrp->ack = B_FALSE;
4829 			statsp->oerrors++;
4830 		}
4831 
4832 		/* update next pointer */
4833 		ldcp->next_tbufp = &(ldcp->tbufp[start]);
4834 		ldcp->next_txi = start;
4835 	}
4836 	DBG2(vgenp, ldcp, "rexmit: start(%d) end(%d)\n", start, end);
4837 #else	/* VGEN_REXMIT */
4838 	/* we just mark the descrs as done so they can be reclaimed */
4839 	for (txi = start; txi <= end; ) {
4840 		txdp = &(ldcp->txdp[txi]);
4841 		hdrp = &txdp->hdr;
4842 		if (hdrp->dstate == VIO_DESC_READY)
4843 			hdrp->dstate = VIO_DESC_DONE;
4844 		INCR_TXI(txi, ldcp);
4845 	}
4846 #endif	/* VGEN_REXMIT */
4847 	mutex_exit(&ldcp->tclock);
4848 	mutex_exit(&ldcp->txlock);
4849 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4850 	return (rv);
4851 }
4852 
4853 static void
4854 vgen_reclaim(vgen_ldc_t *ldcp)
4855 {
4856 	mutex_enter(&ldcp->tclock);
4857 
4858 	vgen_reclaim_dring(ldcp);
4859 	ldcp->reclaim_lbolt = ddi_get_lbolt();
4860 
4861 	mutex_exit(&ldcp->tclock);
4862 }
4863 
4864 /*
4865  * transmit reclaim function. starting from the current reclaim index
4866  * look for descriptors marked DONE and reclaim the descriptor and the
4867  * corresponding buffers (tbuf).
4868  */
4869 static void
4870 vgen_reclaim_dring(vgen_ldc_t *ldcp)
4871 {
4872 	int count = 0;
4873 	vnet_public_desc_t *txdp;
4874 	vgen_private_desc_t *tbufp;
4875 	vio_dring_entry_hdr_t	*hdrp;
4876 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4877 
4878 #ifdef DEBUG
4879 	if (vgen_trigger_txtimeout)
4880 		return;
4881 #endif
4882 
4883 	tbufp = ldcp->cur_tbufp;
4884 	txdp = tbufp->descp;
4885 	hdrp = &txdp->hdr;
4886 
4887 	while ((hdrp->dstate == VIO_DESC_DONE) &&
4888 	    (tbufp != ldcp->next_tbufp)) {
4889 		tbufp->flags = VGEN_PRIV_DESC_FREE;
4890 		hdrp->dstate = VIO_DESC_FREE;
4891 		hdrp->ack = B_FALSE;
4892 
4893 		tbufp = NEXTTBUF(ldcp, tbufp);
4894 		txdp = tbufp->descp;
4895 		hdrp = &txdp->hdr;
4896 		count++;
4897 	}
4898 
4899 	ldcp->cur_tbufp = tbufp;
4900 
4901 	/*
4902 	 * Check if mac layer should be notified to restart transmissions
4903 	 */
4904 	if ((ldcp->need_resched) && (count > 0)) {
4905 		ldcp->need_resched = B_FALSE;
4906 		vnet_tx_update(vgenp->vnetp);
4907 	}
4908 }
4909 
4910 /* return the number of pending transmits for the channel */
4911 static int
4912 vgen_num_txpending(vgen_ldc_t *ldcp)
4913 {
4914 	int n;
4915 
4916 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
4917 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
4918 	} else  {
4919 		/* cur_tbufp > next_tbufp */
4920 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
4921 	}
4922 
4923 	return (n);
4924 }
4925 
4926 /* determine if the transmit descriptor ring is full */
4927 static int
4928 vgen_tx_dring_full(vgen_ldc_t *ldcp)
4929 {
4930 	vgen_private_desc_t	*tbufp;
4931 	vgen_private_desc_t	*ntbufp;
4932 
4933 	tbufp = ldcp->next_tbufp;
4934 	ntbufp = NEXTTBUF(ldcp, tbufp);
4935 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
4936 		return (VGEN_SUCCESS);
4937 	}
4938 	return (VGEN_FAILURE);
4939 }
4940 
4941 /* determine if timeout condition has occured */
4942 static int
4943 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
4944 {
4945 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
4946 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
4947 	    (vnet_ldcwd_txtimeout) &&
4948 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
4949 		return (VGEN_SUCCESS);
4950 	} else {
4951 		return (VGEN_FAILURE);
4952 	}
4953 }
4954 
4955 /* transmit watchdog timeout handler */
4956 static void
4957 vgen_ldc_watchdog(void *arg)
4958 {
4959 	vgen_ldc_t *ldcp;
4960 	vgen_t *vgenp;
4961 	int rv;
4962 
4963 	ldcp = (vgen_ldc_t *)arg;
4964 	vgenp = LDC_TO_VGEN(ldcp);
4965 
4966 	rv = vgen_ldc_txtimeout(ldcp);
4967 	if (rv == VGEN_SUCCESS) {
4968 		DWARN(vgenp, ldcp, "transmit timeout\n");
4969 #ifdef DEBUG
4970 		if (vgen_trigger_txtimeout) {
4971 			/* tx timeout triggered for debugging */
4972 			vgen_trigger_txtimeout = 0;
4973 		}
4974 #endif
4975 		mutex_enter(&ldcp->cblock);
4976 		ldcp->need_ldc_reset = B_TRUE;
4977 		vgen_handshake_retry(ldcp);
4978 		mutex_exit(&ldcp->cblock);
4979 		if (ldcp->need_resched) {
4980 			ldcp->need_resched = B_FALSE;
4981 			vnet_tx_update(vgenp->vnetp);
4982 		}
4983 	}
4984 
4985 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
4986 	    drv_usectohz(vnet_ldcwd_interval * 1000));
4987 }
4988 
4989 /* handler for error messages received from the peer ldc end-point */
4990 static void
4991 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4992 {
4993 	_NOTE(ARGUNUSED(ldcp, tagp))
4994 }
4995 
4996 /* Check if the session id in the received message is valid */
4997 static int
4998 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4999 {
5000 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5001 
5002 	if (tagp->vio_sid != ldcp->peer_sid) {
5003 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5004 		    ldcp->peer_sid, tagp->vio_sid);
5005 		return (VGEN_FAILURE);
5006 	}
5007 	else
5008 		return (VGEN_SUCCESS);
5009 }
5010 
5011 static caddr_t
5012 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5013 {
5014 	(void) sprintf(ebuf,
5015 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5016 	return (ebuf);
5017 }
5018 
5019 /* Handshake watchdog timeout handler */
5020 static void
5021 vgen_hwatchdog(void *arg)
5022 {
5023 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5024 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5025 
5026 	DWARN(vgenp, ldcp,
5027 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5028 	    ldcp->hphase, ldcp->hstate);
5029 
5030 	mutex_enter(&ldcp->cblock);
5031 	if (ldcp->cancel_htid) {
5032 		ldcp->cancel_htid = 0;
5033 		mutex_exit(&ldcp->cblock);
5034 		return;
5035 	}
5036 	ldcp->htid = 0;
5037 	ldcp->need_ldc_reset = B_TRUE;
5038 	vgen_handshake_retry(ldcp);
5039 	mutex_exit(&ldcp->cblock);
5040 }
5041 
5042 static void
5043 vgen_print_hparams(vgen_hparams_t *hp)
5044 {
5045 	uint8_t	addr[6];
5046 	char	ea[6];
5047 	ldc_mem_cookie_t *dc;
5048 
5049 	cmn_err(CE_CONT, "version_info:\n");
5050 	cmn_err(CE_CONT,
5051 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5052 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5053 
5054 	vnet_macaddr_ultostr(hp->addr, addr);
5055 	cmn_err(CE_CONT, "attr_info:\n");
5056 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5057 	    vgen_print_ethaddr(addr, ea));
5058 	cmn_err(CE_CONT,
5059 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5060 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5061 
5062 	dc = &hp->dring_cookie;
5063 	cmn_err(CE_CONT, "dring_info:\n");
5064 	cmn_err(CE_CONT,
5065 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5066 	cmn_err(CE_CONT,
5067 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5068 	    dc->addr, dc->size);
5069 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5070 }
5071 
5072 static void
5073 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5074 {
5075 	vgen_hparams_t *hp;
5076 
5077 	cmn_err(CE_CONT, "Channel Information:\n");
5078 	cmn_err(CE_CONT,
5079 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5080 	    ldcp->ldc_id, ldcp->ldc_status);
5081 	cmn_err(CE_CONT,
5082 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5083 	    ldcp->local_sid, ldcp->peer_sid);
5084 	cmn_err(CE_CONT,
5085 	    "\thphase: 0x%x, hstate: 0x%x\n",
5086 	    ldcp->hphase, ldcp->hstate);
5087 
5088 	cmn_err(CE_CONT, "Local handshake params:\n");
5089 	hp = &ldcp->local_hparams;
5090 	vgen_print_hparams(hp);
5091 
5092 	cmn_err(CE_CONT, "Peer handshake params:\n");
5093 	hp = &ldcp->peer_hparams;
5094 	vgen_print_hparams(hp);
5095 }
5096 
5097 /*
5098  * vgen_ldc_queue_data -- Queue data in the LDC.
5099  */
5100 static void
5101 vgen_ldc_queue_data(vgen_ldc_t *ldcp, mblk_t *rhead, mblk_t *rtail)
5102 {
5103 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5104 
5105 	DBG1(vgenp, ldcp, "enter\n");
5106 	/*
5107 	 * If the receive thread is enabled, then the queue
5108 	 * is protected by the soft_lock. After queuing, trigger
5109 	 * the soft interrupt so that the interrupt handler sends these
5110 	 * messages up the stack.
5111 	 *
5112 	 * If the receive thread is not enabled, then the list is
5113 	 * automatically protected by the cblock lock, so no need
5114 	 * to hold any additional locks.
5115 	 */
5116 	if (ldcp->rcv_thread != NULL) {
5117 		mutex_enter(&ldcp->soft_lock);
5118 	}
5119 	if (ldcp->rcv_mhead == NULL) {
5120 		ldcp->rcv_mhead = rhead;
5121 		ldcp->rcv_mtail = rtail;
5122 	} else {
5123 		ldcp->rcv_mtail->b_next = rhead;
5124 		ldcp->rcv_mtail = rtail;
5125 	}
5126 	if (ldcp->rcv_thread != NULL) {
5127 		mutex_exit(&ldcp->soft_lock);
5128 		(void) ddi_intr_trigger_softint(ldcp->soft_handle, NULL);
5129 	}
5130 	DBG1(vgenp, ldcp, "exit\n");
5131 }
5132 
5133 /*
5134  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
5135  * This thread is woken up by the LDC interrupt handler to process
5136  * LDC packets and receive data.
5137  */
5138 static void
5139 vgen_ldc_rcv_worker(void *arg)
5140 {
5141 	callb_cpr_t	cprinfo;
5142 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5143 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5144 
5145 	DBG1(vgenp, ldcp, "enter\n");
5146 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
5147 	    "vnet_rcv_thread");
5148 	mutex_enter(&ldcp->rcv_thr_lock);
5149 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
5150 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
5151 
5152 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
5153 		/*
5154 		 * Wait until the data is received or a stop
5155 		 * request is received.
5156 		 */
5157 		while (!(ldcp->rcv_thr_flags &
5158 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
5159 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5160 		}
5161 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
5162 
5163 		/*
5164 		 * First process the stop request.
5165 		 */
5166 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
5167 			DBG2(vgenp, ldcp, "stopped\n");
5168 			break;
5169 		}
5170 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
5171 		mutex_exit(&ldcp->rcv_thr_lock);
5172 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
5173 		vgen_handle_evt_read(ldcp);
5174 		mutex_enter(&ldcp->rcv_thr_lock);
5175 	}
5176 
5177 	/*
5178 	 * Update the run status and wakeup the thread that
5179 	 * has sent the stop request.
5180 	 */
5181 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
5182 	cv_signal(&ldcp->rcv_thr_cv);
5183 	CALLB_CPR_EXIT(&cprinfo);
5184 	thread_exit();
5185 	DBG1(vgenp, ldcp, "exit\n");
5186 }
5187 
5188 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
5189 static void
5190 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
5191 {
5192 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5193 
5194 	DBG1(vgenp, ldcp, "enter\n");
5195 	/*
5196 	 * Send a stop request by setting the stop flag and
5197 	 * wait until the receive thread stops.
5198 	 */
5199 	mutex_enter(&ldcp->rcv_thr_lock);
5200 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5201 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
5202 		cv_signal(&ldcp->rcv_thr_cv);
5203 		DBG2(vgenp, ldcp, "waiting...");
5204 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5205 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5206 		}
5207 	}
5208 	mutex_exit(&ldcp->rcv_thr_lock);
5209 	ldcp->rcv_thread = NULL;
5210 	DBG1(vgenp, ldcp, "exit\n");
5211 }
5212 
5213 /*
5214  * vgen_ldc_rcv_softintr -- LDC Soft interrupt handler function.
5215  * Its job is to pickup the recieved packets that are queued in the
5216  * LDC and send them up.
5217  *
5218  * NOTE: An interrupt handler is being used to handle the upper
5219  * layer(s) requirement to send up only at interrupt context.
5220  */
5221 /* ARGSUSED */
5222 static uint_t
5223 vgen_ldc_rcv_softintr(caddr_t arg1, caddr_t arg2)
5224 {
5225 	mblk_t *mp;
5226 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
5227 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5228 
5229 	DBG1(vgenp, ldcp, "enter\n");
5230 	DTRACE_PROBE1(vgen_soft_intr, uint64_t, ldcp->ldc_id);
5231 	mutex_enter(&ldcp->soft_lock);
5232 	mp = ldcp->rcv_mhead;
5233 	ldcp->rcv_mhead = ldcp->rcv_mtail = NULL;
5234 	mutex_exit(&ldcp->soft_lock);
5235 	if (mp != NULL) {
5236 		vnet_rx(vgenp->vnetp, NULL, mp);
5237 	}
5238 	DBG1(vgenp, ldcp, "exit\n");
5239 	return (DDI_INTR_CLAIMED);
5240 }
5241 
5242 #if DEBUG
5243 
5244 /*
5245  * Print debug messages - set to 0xf to enable all msgs
5246  */
5247 static void
5248 debug_printf(const char *fname, vgen_t *vgenp,
5249     vgen_ldc_t *ldcp, const char *fmt, ...)
5250 {
5251 	char    buf[256];
5252 	char    *bufp = buf;
5253 	va_list ap;
5254 
5255 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5256 		(void) sprintf(bufp, "vnet%d:",
5257 		    ((vnet_t *)(vgenp->vnetp))->instance);
5258 		bufp += strlen(bufp);
5259 	}
5260 	if (ldcp != NULL) {
5261 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5262 		bufp += strlen(bufp);
5263 	}
5264 	(void) sprintf(bufp, "%s: ", fname);
5265 	bufp += strlen(bufp);
5266 
5267 	va_start(ap, fmt);
5268 	(void) vsprintf(bufp, fmt, ap);
5269 	va_end(ap);
5270 
5271 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5272 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5273 		cmn_err(CE_CONT, "%s\n", buf);
5274 	}
5275 }
5276 #endif
5277