xref: /titanic_44/usr/src/uts/sun4v/io/vnet_gen.c (revision d00756ccb34596a328f8a15d1965da5412d366d0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/atomic.h>
60 #include <sys/callb.h>
61 #include <sys/sdt.h>
62 #include <sys/intr.h>
63 #include <sys/pattr.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
75 	mac_register_t **vgenmacp);
76 int vgen_uninit(void *arg);
77 static int vgen_start(void *arg);
78 static void vgen_stop(void *arg);
79 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
80 static int vgen_multicst(void *arg, boolean_t add,
81 	const uint8_t *mca);
82 static int vgen_promisc(void *arg, boolean_t on);
83 static int vgen_unicst(void *arg, const uint8_t *mca);
84 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
85 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
86 
87 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
88 void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
89 void vnet_del_fdb(void *arg, uint8_t *macaddr);
90 void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx,
91 	void *txarg, boolean_t upgrade);
92 void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
93 void vnet_del_def_rte(void *arg);
94 void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
95 void vnet_tx_update(void *arg);
96 
97 /* vgen internal functions */
98 static void vgen_detach_ports(vgen_t *vgenp);
99 static void vgen_port_detach(vgen_port_t *portp);
100 static void vgen_port_list_insert(vgen_port_t *portp);
101 static void vgen_port_list_remove(vgen_port_t *portp);
102 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
103 	int port_num);
104 static int vgen_read_mdprops(vgen_t *vgenp);
105 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
106 	mde_cookie_t node);
107 static int vgen_mdeg_reg(vgen_t *vgenp);
108 static void vgen_mdeg_unreg(vgen_t *vgenp);
109 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
110 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
111 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
112 static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
113 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
114 static void vgen_port_detach_mdeg(vgen_port_t *portp);
115 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
116 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
117 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
118 
119 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
120 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
121 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
122 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
123 static void vgen_init_ports(vgen_t *vgenp);
124 static void vgen_port_init(vgen_port_t *portp);
125 static void vgen_uninit_ports(vgen_t *vgenp);
126 static void vgen_port_uninit(vgen_port_t *portp);
127 static void vgen_init_ldcs(vgen_port_t *portp);
128 static void vgen_uninit_ldcs(vgen_port_t *portp);
129 static int vgen_ldc_init(vgen_ldc_t *ldcp);
130 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
131 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
132 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
133 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
134 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
135 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
137 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
138 static int vgen_ldcsend(void *arg, mblk_t *mp);
139 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
140 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
141 static void vgen_reclaim(vgen_ldc_t *ldcp);
142 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
143 static int vgen_num_txpending(vgen_ldc_t *ldcp);
144 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
145 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
146 static void vgen_ldc_watchdog(void *arg);
147 
148 /* vgen handshake functions */
149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
150 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
151 	boolean_t caller_holds_lock);
152 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
153 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
154 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
155 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
156 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
157 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
158 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
159 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
160 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
161 static void vgen_handshake(vgen_ldc_t *ldcp);
162 static int vgen_handshake_done(vgen_ldc_t *ldcp);
163 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
164 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
165 	vio_msg_tag_t *tagp);
166 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
172 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
173 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
179 	uint32_t start, int32_t end, uint8_t pstate);
180 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
181 	uint32_t msglen);
182 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
184 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
185 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
186 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
187 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
188 static void vgen_hwatchdog(void *arg);
189 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
190 static void vgen_print_hparams(vgen_hparams_t *hp);
191 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
192 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
193 static void vgen_ldc_rcv_worker(void *arg);
194 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
195 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
196 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
197 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
198 
199 /*
200  * The handshake process consists of 5 phases defined below, with VH_PHASE0
201  * being the pre-handshake phase and VH_DONE is the phase to indicate
202  * successful completion of all phases.
203  * Each phase may have one to several handshake states which are required
204  * to complete successfully to move to the next phase.
205  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
206  * more details.
207  */
208 /* handshake phases */
209 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
210 
211 /* handshake states */
212 enum {
213 
214 	VER_INFO_SENT	=	0x1,
215 	VER_ACK_RCVD	=	0x2,
216 	VER_INFO_RCVD	=	0x4,
217 	VER_ACK_SENT	=	0x8,
218 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
219 
220 	ATTR_INFO_SENT	=	0x10,
221 	ATTR_ACK_RCVD	=	0x20,
222 	ATTR_INFO_RCVD	=	0x40,
223 	ATTR_ACK_SENT	=	0x80,
224 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
225 
226 	DRING_INFO_SENT	=	0x100,
227 	DRING_ACK_RCVD	=	0x200,
228 	DRING_INFO_RCVD	=	0x400,
229 	DRING_ACK_SENT	=	0x800,
230 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
231 
232 	RDX_INFO_SENT	=	0x1000,
233 	RDX_ACK_RCVD	=	0x2000,
234 	RDX_INFO_RCVD	=	0x4000,
235 	RDX_ACK_SENT	=	0x8000,
236 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
237 
238 };
239 
240 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
241 
242 #define	LDC_LOCK(ldcp)	\
243 				mutex_enter(&((ldcp)->cblock));\
244 				mutex_enter(&((ldcp)->rxlock));\
245 				mutex_enter(&((ldcp)->wrlock));\
246 				mutex_enter(&((ldcp)->txlock));\
247 				mutex_enter(&((ldcp)->tclock));
248 #define	LDC_UNLOCK(ldcp)	\
249 				mutex_exit(&((ldcp)->tclock));\
250 				mutex_exit(&((ldcp)->txlock));\
251 				mutex_exit(&((ldcp)->wrlock));\
252 				mutex_exit(&((ldcp)->rxlock));\
253 				mutex_exit(&((ldcp)->cblock));
254 
255 static struct ether_addr etherbroadcastaddr = {
256 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
257 };
258 /*
259  * MIB II broadcast/multicast packets
260  */
261 #define	IS_BROADCAST(ehp) \
262 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
263 #define	IS_MULTICAST(ehp) \
264 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
265 
266 /*
267  * Property names
268  */
269 static char macaddr_propname[] = "mac-address";
270 static char rmacaddr_propname[] = "remote-mac-address";
271 static char channel_propname[] = "channel-endpoint";
272 static char reg_propname[] = "reg";
273 static char port_propname[] = "port";
274 static char swport_propname[] = "switch-port";
275 static char id_propname[] = "id";
276 static char vdev_propname[] = "virtual-device";
277 static char vnet_propname[] = "network";
278 static char pri_types_propname[] = "priority-ether-types";
279 
280 /* versions supported - in decreasing order */
281 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 2} };
282 
283 /* Tunables */
284 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
285 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
286 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
287 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
288 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
289 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
290 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
291 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
292 
293 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
294 
295 /*
296  * max # of packets accumulated prior to sending them up. It is best
297  * to keep this at 60% of the number of recieve buffers.
298  */
299 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
300 
301 /*
302  * Tunables for each receive buffer size and number of buffers for
303  * each buffer size.
304  */
305 uint32_t vgen_rbufsz1 = VGEN_DBLK_SZ_128;
306 uint32_t vgen_rbufsz2 = VGEN_DBLK_SZ_256;
307 uint32_t vgen_rbufsz3 = VGEN_DBLK_SZ_2048;
308 
309 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
310 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
311 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
312 
313 /*
314  * In the absence of "priority-ether-types" property in MD, the following
315  * internal tunable can be set to specify a single priority ethertype.
316  */
317 uint64_t vgen_pri_eth_type = 0;
318 
319 /*
320  * Number of transmit priority buffers that are preallocated per device.
321  * This number is chosen to be a small value to throttle transmission
322  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
323  */
324 uint32_t vgen_pri_tx_nmblks = 64;
325 
326 #ifdef DEBUG
327 /* flags to simulate error conditions for debugging */
328 int vgen_trigger_txtimeout = 0;
329 int vgen_trigger_rxlost = 0;
330 #endif
331 
332 /* MD update matching structure */
333 static md_prop_match_t	vport_prop_match[] = {
334 	{ MDET_PROP_VAL,	"id" },
335 	{ MDET_LIST_END,	NULL }
336 };
337 
338 static mdeg_node_match_t vport_match = { "virtual-device-port",
339 					vport_prop_match };
340 
341 /* template for matching a particular vnet instance */
342 static mdeg_prop_spec_t vgen_prop_template[] = {
343 	{ MDET_PROP_STR,	"name",		"network" },
344 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
345 	{ MDET_LIST_END,	NULL,		NULL }
346 };
347 
348 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
349 
350 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
351 
352 static mac_callbacks_t vgen_m_callbacks = {
353 	0,
354 	vgen_stat,
355 	vgen_start,
356 	vgen_stop,
357 	vgen_promisc,
358 	vgen_multicst,
359 	vgen_unicst,
360 	vgen_tx,
361 	NULL,
362 	NULL,
363 	NULL
364 };
365 
366 /* externs */
367 extern pri_t	maxclsyspri;
368 extern proc_t	p0;
369 extern uint32_t vnet_ntxds;
370 extern uint32_t vnet_ldcwd_interval;
371 extern uint32_t vnet_ldcwd_txtimeout;
372 extern uint32_t vnet_ldc_mtu;
373 extern uint32_t vnet_nrbufs;
374 
375 
376 #ifdef DEBUG
377 
378 extern int vnet_dbglevel;
379 static void debug_printf(const char *fname, vgen_t *vgenp,
380 	vgen_ldc_t *ldcp, const char *fmt, ...);
381 
382 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
383 int vgendbg_ldcid = -1;
384 
385 /* simulate handshake error conditions for debug */
386 uint32_t vgen_hdbg;
387 #define	HDBG_VERSION	0x1
388 #define	HDBG_TIMEOUT	0x2
389 #define	HDBG_BAD_SID	0x4
390 #define	HDBG_OUT_STATE	0x8
391 
392 #endif
393 
394 /*
395  * vgen_init() is called by an instance of vnet driver to initialize the
396  * corresponding generic proxy transport layer. The arguments passed by vnet
397  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
398  * the mac address of the vnet device, and a pointer to mac_register_t of
399  * the generic transport is returned in the last argument.
400  */
401 int
402 vgen_init(void *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
403     mac_register_t **vgenmacp)
404 {
405 	vgen_t *vgenp;
406 	mac_register_t *macp;
407 	int instance;
408 	int rv;
409 
410 	if ((vnetp == NULL) || (vnetdip == NULL))
411 		return (DDI_FAILURE);
412 
413 	instance = ddi_get_instance(vnetdip);
414 
415 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
416 
417 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
418 
419 	vgenp->vnetp = vnetp;
420 	vgenp->vnetdip = vnetdip;
421 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
422 
423 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
424 		KMEM_FREE(vgenp);
425 		return (DDI_FAILURE);
426 	}
427 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
428 	macp->m_driver = vgenp;
429 	macp->m_dip = vnetdip;
430 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
431 	macp->m_callbacks = &vgen_m_callbacks;
432 	macp->m_min_sdu = 0;
433 	macp->m_max_sdu = ETHERMTU;
434 	vgenp->macp = macp;
435 
436 	/* allocate multicast table */
437 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
438 	    sizeof (struct ether_addr), KM_SLEEP);
439 	vgenp->mccount = 0;
440 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
441 
442 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
443 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
444 
445 	rv = vgen_read_mdprops(vgenp);
446 	if (rv != 0) {
447 		goto vgen_init_fail;
448 	}
449 
450 	/* register with MD event generator */
451 	rv = vgen_mdeg_reg(vgenp);
452 	if (rv != DDI_SUCCESS) {
453 		goto vgen_init_fail;
454 	}
455 
456 	/* register macp of this vgen_t with vnet */
457 	*vgenmacp = vgenp->macp;
458 
459 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
460 	return (DDI_SUCCESS);
461 
462 vgen_init_fail:
463 	rw_destroy(&vgenp->vgenports.rwlock);
464 	mutex_destroy(&vgenp->lock);
465 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
466 	    sizeof (struct ether_addr));
467 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
468 		kmem_free(vgenp->pri_types,
469 		    sizeof (uint16_t) * vgenp->pri_num_types);
470 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
471 	}
472 	mac_free(vgenp->macp);
473 	KMEM_FREE(vgenp);
474 	return (DDI_FAILURE);
475 }
476 
477 /*
478  * Called by vnet to undo the initializations done by vgen_init().
479  * The handle provided by generic transport during vgen_init() is the argument.
480  */
481 int
482 vgen_uninit(void *arg)
483 {
484 	vgen_t		*vgenp = (vgen_t *)arg;
485 	vio_mblk_pool_t	*rp;
486 	vio_mblk_pool_t	*nrp;
487 
488 	if (vgenp == NULL) {
489 		return (DDI_FAILURE);
490 	}
491 
492 	DBG1(vgenp, NULL, "enter\n");
493 
494 	/* unregister with MD event generator */
495 	vgen_mdeg_unreg(vgenp);
496 
497 	mutex_enter(&vgenp->lock);
498 
499 	/* detach all ports from the device */
500 	vgen_detach_ports(vgenp);
501 
502 	/*
503 	 * free any pending rx mblk pools,
504 	 * that couldn't be freed previously during channel detach.
505 	 */
506 	rp = vgenp->rmp;
507 	while (rp != NULL) {
508 		nrp = vgenp->rmp = rp->nextp;
509 		if (vio_destroy_mblks(rp)) {
510 			vgenp->rmp = rp;
511 			mutex_exit(&vgenp->lock);
512 			return (DDI_FAILURE);
513 		}
514 		rp = nrp;
515 	}
516 
517 	/* free multicast table */
518 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
519 
520 	/* free pri_types table */
521 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
522 		kmem_free(vgenp->pri_types,
523 		    sizeof (uint16_t) * vgenp->pri_num_types);
524 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
525 	}
526 
527 	mac_free(vgenp->macp);
528 
529 	mutex_exit(&vgenp->lock);
530 
531 	rw_destroy(&vgenp->vgenports.rwlock);
532 	mutex_destroy(&vgenp->lock);
533 
534 	KMEM_FREE(vgenp);
535 
536 	DBG1(vgenp, NULL, "exit\n");
537 
538 	return (DDI_SUCCESS);
539 }
540 
541 /* enable transmit/receive for the device */
542 int
543 vgen_start(void *arg)
544 {
545 	vgen_t		*vgenp = (vgen_t *)arg;
546 
547 	DBG1(vgenp, NULL, "enter\n");
548 
549 	mutex_enter(&vgenp->lock);
550 	vgen_init_ports(vgenp);
551 	vgenp->flags |= VGEN_STARTED;
552 	mutex_exit(&vgenp->lock);
553 
554 	DBG1(vgenp, NULL, "exit\n");
555 	return (DDI_SUCCESS);
556 }
557 
558 /* stop transmit/receive */
559 void
560 vgen_stop(void *arg)
561 {
562 	vgen_t		*vgenp = (vgen_t *)arg;
563 
564 	DBG1(vgenp, NULL, "enter\n");
565 
566 	mutex_enter(&vgenp->lock);
567 	vgen_uninit_ports(vgenp);
568 	vgenp->flags &= ~(VGEN_STARTED);
569 	mutex_exit(&vgenp->lock);
570 
571 	DBG1(vgenp, NULL, "exit\n");
572 }
573 
574 /* vgen transmit function */
575 static mblk_t *
576 vgen_tx(void *arg, mblk_t *mp)
577 {
578 	int i;
579 	vgen_port_t *portp;
580 	int status = VGEN_FAILURE;
581 
582 	portp = (vgen_port_t *)arg;
583 	/*
584 	 * Retry so that we avoid reporting a failure
585 	 * to the upper layer. Returning a failure may cause the
586 	 * upper layer to go into single threaded mode there by
587 	 * causing performance degradation, especially for a large
588 	 * number of connections.
589 	 */
590 	for (i = 0; i < vgen_tx_retries; ) {
591 		status = vgen_portsend(portp, mp);
592 		if (status == VGEN_SUCCESS) {
593 			break;
594 		}
595 		if (++i < vgen_tx_retries)
596 			delay(drv_usectohz(vgen_tx_delay));
597 	}
598 	if (status != VGEN_SUCCESS) {
599 		/* failure */
600 		return (mp);
601 	}
602 	/* success */
603 	return (NULL);
604 }
605 
606 /* transmit packets over the given port */
607 static int
608 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
609 {
610 	vgen_ldclist_t	*ldclp;
611 	vgen_ldc_t *ldcp;
612 	int status;
613 	int rv = VGEN_SUCCESS;
614 
615 	ldclp = &portp->ldclist;
616 	READ_ENTER(&ldclp->rwlock);
617 	/*
618 	 * NOTE: for now, we will assume we have a single channel.
619 	 */
620 	if (ldclp->headp == NULL) {
621 		RW_EXIT(&ldclp->rwlock);
622 		return (VGEN_FAILURE);
623 	}
624 	ldcp = ldclp->headp;
625 
626 	status = ldcp->tx(ldcp, mp);
627 
628 	RW_EXIT(&ldclp->rwlock);
629 
630 	if (status != VGEN_TX_SUCCESS) {
631 		rv = VGEN_FAILURE;
632 	}
633 	return (rv);
634 }
635 
636 /*
637  * Wrapper function to transmit normal and/or priority frames over the channel.
638  */
639 static int
640 vgen_ldcsend(void *arg, mblk_t *mp)
641 {
642 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
643 	int			status;
644 	struct ether_header	*ehp;
645 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
646 	uint32_t		num_types;
647 	uint16_t		*types;
648 	int			i;
649 
650 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
651 
652 	num_types = vgenp->pri_num_types;
653 	types = vgenp->pri_types;
654 	ehp = (struct ether_header *)mp->b_rptr;
655 
656 	for (i = 0; i < num_types; i++) {
657 
658 		if (ehp->ether_type == types[i]) {
659 			/* priority frame, use pri tx function */
660 			vgen_ldcsend_pkt(ldcp, mp);
661 			return (VGEN_SUCCESS);
662 		}
663 
664 	}
665 
666 	status  = vgen_ldcsend_dring(ldcp, mp);
667 
668 	return (status);
669 }
670 
671 /*
672  * This functions handles ldc channel reset while in the context
673  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
674  */
675 static void
676 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
677 {
678 	ldc_status_t	istatus;
679 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
680 
681 	/*
682 	 * Check if either callback thread or another tx thread is
683 	 * already running. Calling mutex_enter() will result in a
684 	 * deadlock if the other thread already holds cblock and is
685 	 * blocked in vnet_modify_fdb() (which is called from
686 	 * vgen_handle_evt_reset()) waiting for write access on rwlock,
687 	 * as this transmit thread already holds that lock as a reader
688 	 * in vnet_m_tx(). See comments in vnet_modify_fdb() in vnet.c.
689 	 * If we cannot get the lock, the thread which holds it will
690 	 * handle the reset.
691 	 */
692 	if (mutex_tryenter(&ldcp->cblock)) {
693 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
694 			DWARN(vgenp, ldcp, "ldc_status() error\n");
695 		} else {
696 			ldcp->ldc_status = istatus;
697 		}
698 		if (ldcp->ldc_status != LDC_UP) {
699 			/*
700 			 * Second arg is TRUE, as we know that
701 			 * the caller of this function - vnet_m_tx(),
702 			 * already holds fdb-rwlock as a reader.
703 			 */
704 			vgen_handle_evt_reset(ldcp, B_TRUE);
705 		}
706 		mutex_exit(&ldcp->cblock);
707 	}
708 }
709 
710 /*
711  * This function transmits the frame in the payload of a raw data
712  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
713  * send special frames with high priorities, without going through
714  * the normal data path which uses descriptor ring mechanism.
715  */
716 static void
717 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
718 {
719 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
720 	vio_raw_data_msg_t	*pkt;
721 	mblk_t			*bp;
722 	mblk_t			*nmp = NULL;
723 	caddr_t			dst;
724 	uint32_t		mblksz;
725 	uint32_t		size;
726 	uint32_t		nbytes;
727 	int			rv;
728 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
729 	vgen_stats_t		*statsp = &ldcp->stats;
730 
731 	/* drop the packet if ldc is not up or handshake is not done */
732 	if (ldcp->ldc_status != LDC_UP) {
733 		(void) atomic_inc_32(&statsp->tx_pri_fail);
734 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
735 		    ldcp->ldc_status);
736 		goto send_pkt_exit;
737 	}
738 
739 	if (ldcp->hphase != VH_DONE) {
740 		(void) atomic_inc_32(&statsp->tx_pri_fail);
741 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
742 		    ldcp->hphase);
743 		goto send_pkt_exit;
744 	}
745 
746 	size = msgsize(mp);
747 
748 	/* frame size bigger than available payload len of raw data msg ? */
749 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
750 		(void) atomic_inc_32(&statsp->tx_pri_fail);
751 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
752 		goto send_pkt_exit;
753 	}
754 
755 	if (size < ETHERMIN)
756 		size = ETHERMIN;
757 
758 	/* alloc space for a raw data message */
759 	nmp = vio_allocb(vgenp->pri_tx_vmp);
760 	if (nmp == NULL) {
761 		(void) atomic_inc_32(&statsp->tx_pri_fail);
762 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
763 		goto send_pkt_exit;
764 	}
765 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
766 
767 	/* copy frame into the payload of raw data message */
768 	dst = (caddr_t)pkt->data;
769 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
770 		mblksz = MBLKL(bp);
771 		bcopy(bp->b_rptr, dst, mblksz);
772 		dst += mblksz;
773 	}
774 
775 	/* setup the raw data msg */
776 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
777 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
778 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
779 	pkt->tag.vio_sid = ldcp->local_sid;
780 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
781 
782 	/* send the msg over ldc */
783 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
784 	if (rv != VGEN_SUCCESS) {
785 		(void) atomic_inc_32(&statsp->tx_pri_fail);
786 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
787 		if (rv == ECONNRESET) {
788 			vgen_ldcsend_process_reset(ldcp);
789 		}
790 		goto send_pkt_exit;
791 	}
792 
793 	/* update stats */
794 	(void) atomic_inc_64(&statsp->tx_pri_packets);
795 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
796 
797 send_pkt_exit:
798 	if (nmp != NULL)
799 		freemsg(nmp);
800 	freemsg(mp);
801 }
802 
803 /*
804  * This function transmits normal (non-priority) data frames over
805  * the channel. It queues the frame into the transmit descriptor ring
806  * and sends a VIO_DRING_DATA message if needed, to wake up the
807  * peer to (re)start processing.
808  */
809 static int
810 vgen_ldcsend_dring(void *arg, mblk_t *mp)
811 {
812 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
813 	vgen_private_desc_t	*tbufp;
814 	vgen_private_desc_t	*rtbufp;
815 	vnet_public_desc_t	*rtxdp;
816 	vgen_private_desc_t	*ntbufp;
817 	vnet_public_desc_t	*txdp;
818 	vio_dring_entry_hdr_t	*hdrp;
819 	vgen_stats_t		*statsp;
820 	struct ether_header	*ehp;
821 	boolean_t	is_bcast = B_FALSE;
822 	boolean_t	is_mcast = B_FALSE;
823 	size_t		mblksz;
824 	caddr_t		dst;
825 	mblk_t		*bp;
826 	size_t		size;
827 	int		rv = 0;
828 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
829 
830 	statsp = &ldcp->stats;
831 	size = msgsize(mp);
832 
833 	DBG1(vgenp, ldcp, "enter\n");
834 
835 	if (ldcp->ldc_status != LDC_UP) {
836 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
837 		    ldcp->ldc_status);
838 		/* retry ldc_up() if needed */
839 		if (ldcp->flags & CHANNEL_STARTED)
840 			(void) ldc_up(ldcp->ldc_handle);
841 		goto send_dring_exit;
842 	}
843 
844 	/* drop the packet if ldc is not up or handshake is not done */
845 	if (ldcp->hphase != VH_DONE) {
846 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
847 		    ldcp->hphase);
848 		goto send_dring_exit;
849 	}
850 
851 	if (size > (size_t)ETHERMAX) {
852 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
853 		goto send_dring_exit;
854 	}
855 	if (size < ETHERMIN)
856 		size = ETHERMIN;
857 
858 	ehp = (struct ether_header *)mp->b_rptr;
859 	is_bcast = IS_BROADCAST(ehp);
860 	is_mcast = IS_MULTICAST(ehp);
861 
862 	mutex_enter(&ldcp->txlock);
863 	/*
864 	 * allocate a descriptor
865 	 */
866 	tbufp = ldcp->next_tbufp;
867 	ntbufp = NEXTTBUF(ldcp, tbufp);
868 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
869 
870 		mutex_enter(&ldcp->tclock);
871 		/* Try reclaiming now */
872 		vgen_reclaim_dring(ldcp);
873 		ldcp->reclaim_lbolt = ddi_get_lbolt();
874 
875 		if (ntbufp == ldcp->cur_tbufp) {
876 			/* Now we are really out of tbuf/txds */
877 			ldcp->need_resched = B_TRUE;
878 			mutex_exit(&ldcp->tclock);
879 
880 			statsp->tx_no_desc++;
881 			mutex_exit(&ldcp->txlock);
882 
883 			return (VGEN_TX_NORESOURCES);
884 		}
885 		mutex_exit(&ldcp->tclock);
886 	}
887 	/* update next available tbuf in the ring and update tx index */
888 	ldcp->next_tbufp = ntbufp;
889 	INCR_TXI(ldcp->next_txi, ldcp);
890 
891 	/* Mark the buffer busy before releasing the lock */
892 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
893 	mutex_exit(&ldcp->txlock);
894 
895 	/* copy data into pre-allocated transmit buffer */
896 	dst = tbufp->datap + VNET_IPALIGN;
897 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
898 		mblksz = MBLKL(bp);
899 		bcopy(bp->b_rptr, dst, mblksz);
900 		dst += mblksz;
901 	}
902 
903 	tbufp->datalen = size;
904 
905 	/* initialize the corresponding public descriptor (txd) */
906 	txdp = tbufp->descp;
907 	hdrp = &txdp->hdr;
908 	txdp->nbytes = size;
909 	txdp->ncookies = tbufp->ncookies;
910 	bcopy((tbufp->memcookie), (txdp->memcookie),
911 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
912 
913 	mutex_enter(&ldcp->wrlock);
914 	/*
915 	 * If the flags not set to BUSY, it implies that the clobber
916 	 * was done while we were copying the data. In such case,
917 	 * discard the packet and return.
918 	 */
919 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
920 		statsp->oerrors++;
921 		mutex_exit(&ldcp->wrlock);
922 		goto send_dring_exit;
923 	}
924 	hdrp->dstate = VIO_DESC_READY;
925 
926 	/* update stats */
927 	statsp->opackets++;
928 	statsp->obytes += size;
929 	if (is_bcast)
930 		statsp->brdcstxmt++;
931 	else if (is_mcast)
932 		statsp->multixmt++;
933 
934 	/* send dring datamsg to the peer */
935 	if (ldcp->resched_peer) {
936 
937 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
938 		rtxdp = rtbufp->descp;
939 
940 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
941 
942 			rv = vgen_send_dring_data(ldcp,
943 			    (uint32_t)ldcp->resched_peer_txi, -1);
944 			if (rv != 0) {
945 				/* error: drop the packet */
946 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
947 				    "failed: rv(%d) len(%d)\n",
948 				    ldcp->ldc_id, rv, size);
949 				statsp->oerrors++;
950 			} else {
951 				ldcp->resched_peer = B_FALSE;
952 			}
953 
954 		}
955 
956 	}
957 
958 	mutex_exit(&ldcp->wrlock);
959 
960 send_dring_exit:
961 	if (rv == ECONNRESET) {
962 		vgen_ldcsend_process_reset(ldcp);
963 	}
964 	freemsg(mp);
965 	DBG1(vgenp, ldcp, "exit\n");
966 	return (VGEN_TX_SUCCESS);
967 }
968 
969 /* enable/disable a multicast address */
970 int
971 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
972 {
973 	vgen_t			*vgenp;
974 	vnet_mcast_msg_t	mcastmsg;
975 	vio_msg_tag_t		*tagp;
976 	vgen_port_t		*portp;
977 	vgen_portlist_t		*plistp;
978 	vgen_ldc_t		*ldcp;
979 	vgen_ldclist_t		*ldclp;
980 	struct ether_addr	*addrp;
981 	int			rv = DDI_FAILURE;
982 	uint32_t		i;
983 
984 	vgenp = (vgen_t *)arg;
985 	addrp = (struct ether_addr *)mca;
986 	tagp = &mcastmsg.tag;
987 	bzero(&mcastmsg, sizeof (mcastmsg));
988 
989 	mutex_enter(&vgenp->lock);
990 
991 	plistp = &(vgenp->vgenports);
992 
993 	READ_ENTER(&plistp->rwlock);
994 
995 	portp = vgenp->vsw_portp;
996 	if (portp == NULL) {
997 		RW_EXIT(&plistp->rwlock);
998 		mutex_exit(&vgenp->lock);
999 		return (rv);
1000 	}
1001 	ldclp = &portp->ldclist;
1002 
1003 	READ_ENTER(&ldclp->rwlock);
1004 
1005 	ldcp = ldclp->headp;
1006 	if (ldcp == NULL)
1007 		goto vgen_mcast_exit;
1008 
1009 	mutex_enter(&ldcp->cblock);
1010 
1011 	if (ldcp->hphase == VH_DONE) {
1012 		/*
1013 		 * If handshake is done, send a msg to vsw to add/remove
1014 		 * the multicast address. Otherwise, we just update this
1015 		 * mcast address in our table and the table will be sync'd
1016 		 * with vsw when handshake completes.
1017 		 */
1018 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1019 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1020 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1021 		tagp->vio_sid = ldcp->local_sid;
1022 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1023 		mcastmsg.set = add;
1024 		mcastmsg.count = 1;
1025 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1026 		    B_FALSE) != VGEN_SUCCESS) {
1027 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1028 			mutex_exit(&ldcp->cblock);
1029 			goto vgen_mcast_exit;
1030 		}
1031 	}
1032 
1033 	mutex_exit(&ldcp->cblock);
1034 
1035 	if (add) {
1036 
1037 		/* expand multicast table if necessary */
1038 		if (vgenp->mccount >= vgenp->mcsize) {
1039 			struct ether_addr	*newtab;
1040 			uint32_t		newsize;
1041 
1042 
1043 			newsize = vgenp->mcsize * 2;
1044 
1045 			newtab = kmem_zalloc(newsize *
1046 			    sizeof (struct ether_addr), KM_NOSLEEP);
1047 			if (newtab == NULL)
1048 				goto vgen_mcast_exit;
1049 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1050 			    sizeof (struct ether_addr));
1051 			kmem_free(vgenp->mctab,
1052 			    vgenp->mcsize * sizeof (struct ether_addr));
1053 
1054 			vgenp->mctab = newtab;
1055 			vgenp->mcsize = newsize;
1056 		}
1057 
1058 		/* add address to the table */
1059 		vgenp->mctab[vgenp->mccount++] = *addrp;
1060 
1061 	} else {
1062 
1063 		/* delete address from the table */
1064 		for (i = 0; i < vgenp->mccount; i++) {
1065 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1066 
1067 				/*
1068 				 * If there's more than one address in this
1069 				 * table, delete the unwanted one by moving
1070 				 * the last one in the list over top of it;
1071 				 * otherwise, just remove it.
1072 				 */
1073 				if (vgenp->mccount > 1) {
1074 					vgenp->mctab[i] =
1075 					    vgenp->mctab[vgenp->mccount-1];
1076 				}
1077 				vgenp->mccount--;
1078 				break;
1079 			}
1080 		}
1081 	}
1082 
1083 	rv = DDI_SUCCESS;
1084 
1085 vgen_mcast_exit:
1086 	RW_EXIT(&ldclp->rwlock);
1087 	RW_EXIT(&plistp->rwlock);
1088 
1089 	mutex_exit(&vgenp->lock);
1090 	return (rv);
1091 }
1092 
1093 /* set or clear promiscuous mode on the device */
1094 static int
1095 vgen_promisc(void *arg, boolean_t on)
1096 {
1097 	_NOTE(ARGUNUSED(arg, on))
1098 	return (DDI_SUCCESS);
1099 }
1100 
1101 /* set the unicast mac address of the device */
1102 static int
1103 vgen_unicst(void *arg, const uint8_t *mca)
1104 {
1105 	_NOTE(ARGUNUSED(arg, mca))
1106 	return (DDI_SUCCESS);
1107 }
1108 
1109 /* get device statistics */
1110 int
1111 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1112 {
1113 	vgen_t		*vgenp = (vgen_t *)arg;
1114 	vgen_port_t	*portp;
1115 	vgen_portlist_t	*plistp;
1116 
1117 	*val = 0;
1118 
1119 	plistp = &(vgenp->vgenports);
1120 	READ_ENTER(&plistp->rwlock);
1121 
1122 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1123 		*val += vgen_port_stat(portp, stat);
1124 	}
1125 
1126 	RW_EXIT(&plistp->rwlock);
1127 
1128 	return (0);
1129 }
1130 
1131 static void
1132 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1133 {
1134 	 _NOTE(ARGUNUSED(arg, wq, mp))
1135 }
1136 
1137 /* vgen internal functions */
1138 /* detach all ports from the device */
1139 static void
1140 vgen_detach_ports(vgen_t *vgenp)
1141 {
1142 	vgen_port_t	*portp;
1143 	vgen_portlist_t	*plistp;
1144 
1145 	plistp = &(vgenp->vgenports);
1146 	WRITE_ENTER(&plistp->rwlock);
1147 
1148 	while ((portp = plistp->headp) != NULL) {
1149 		vgen_port_detach(portp);
1150 	}
1151 
1152 	RW_EXIT(&plistp->rwlock);
1153 }
1154 
1155 /*
1156  * detach the given port.
1157  */
1158 static void
1159 vgen_port_detach(vgen_port_t *portp)
1160 {
1161 	vgen_t		*vgenp;
1162 	vgen_ldclist_t	*ldclp;
1163 	int		port_num;
1164 
1165 	vgenp = portp->vgenp;
1166 	port_num = portp->port_num;
1167 
1168 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1169 
1170 	/* remove it from port list */
1171 	vgen_port_list_remove(portp);
1172 
1173 	/* detach channels from this port */
1174 	ldclp = &portp->ldclist;
1175 	WRITE_ENTER(&ldclp->rwlock);
1176 	while (ldclp->headp) {
1177 		vgen_ldc_detach(ldclp->headp);
1178 	}
1179 	RW_EXIT(&ldclp->rwlock);
1180 	rw_destroy(&ldclp->rwlock);
1181 
1182 	if (vgenp->vsw_portp == portp) {
1183 		vgenp->vsw_portp = NULL;
1184 	}
1185 	KMEM_FREE(portp);
1186 
1187 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1188 }
1189 
1190 /* add a port to port list */
1191 static void
1192 vgen_port_list_insert(vgen_port_t *portp)
1193 {
1194 	vgen_portlist_t *plistp;
1195 	vgen_t *vgenp;
1196 
1197 	vgenp = portp->vgenp;
1198 	plistp = &(vgenp->vgenports);
1199 
1200 	if (plistp->headp == NULL) {
1201 		plistp->headp = portp;
1202 	} else {
1203 		plistp->tailp->nextp = portp;
1204 	}
1205 	plistp->tailp = portp;
1206 	portp->nextp = NULL;
1207 }
1208 
1209 /* remove a port from port list */
1210 static void
1211 vgen_port_list_remove(vgen_port_t *portp)
1212 {
1213 	vgen_port_t *prevp;
1214 	vgen_port_t *nextp;
1215 	vgen_portlist_t *plistp;
1216 	vgen_t *vgenp;
1217 
1218 	vgenp = portp->vgenp;
1219 
1220 	plistp = &(vgenp->vgenports);
1221 
1222 	if (plistp->headp == NULL)
1223 		return;
1224 
1225 	if (portp == plistp->headp) {
1226 		plistp->headp = portp->nextp;
1227 		if (portp == plistp->tailp)
1228 			plistp->tailp = plistp->headp;
1229 	} else {
1230 		for (prevp = plistp->headp;
1231 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1232 		    prevp = nextp)
1233 			;
1234 		if (nextp == portp) {
1235 			prevp->nextp = portp->nextp;
1236 		}
1237 		if (portp == plistp->tailp)
1238 			plistp->tailp = prevp;
1239 	}
1240 }
1241 
1242 /* lookup a port in the list based on port_num */
1243 static vgen_port_t *
1244 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1245 {
1246 	vgen_port_t *portp = NULL;
1247 
1248 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1249 		if (portp->port_num == port_num) {
1250 			break;
1251 		}
1252 	}
1253 
1254 	return (portp);
1255 }
1256 
1257 /* enable ports for transmit/receive */
1258 static void
1259 vgen_init_ports(vgen_t *vgenp)
1260 {
1261 	vgen_port_t	*portp;
1262 	vgen_portlist_t	*plistp;
1263 
1264 	plistp = &(vgenp->vgenports);
1265 	READ_ENTER(&plistp->rwlock);
1266 
1267 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1268 		vgen_port_init(portp);
1269 	}
1270 
1271 	RW_EXIT(&plistp->rwlock);
1272 }
1273 
1274 static void
1275 vgen_port_init(vgen_port_t *portp)
1276 {
1277 	vgen_t *vgenp;
1278 
1279 	vgenp = portp->vgenp;
1280 	/*
1281 	 * Create fdb entry in vnet, corresponding to the mac
1282 	 * address of this port. Note that the port specified
1283 	 * is vsw-port. This is done so that vsw-port acts
1284 	 * as the route to reach this macaddr, until the
1285 	 * channel for this port comes up (LDC_UP) and
1286 	 * handshake is done successfully.
1287 	 * eg, if the peer is OBP-vnet, it may not bring the
1288 	 * channel up for this port and may communicate via
1289 	 * vsw to reach this port.
1290 	 * Later, when Solaris-vnet comes up at the other end
1291 	 * of the channel for this port and brings up the channel,
1292 	 * it is an indication that peer vnet is capable of
1293 	 * distributed switching, so the direct route through this
1294 	 * port is specified in fdb, using vnet_modify_fdb(macaddr);
1295 	 */
1296 	vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
1297 	    vgen_tx, vgenp->vsw_portp);
1298 
1299 	if (portp == vgenp->vsw_portp) {
1300 		/*
1301 		 * create the default route entry in vnet's fdb.
1302 		 * This is the entry used by vnet to reach
1303 		 * unknown destinations, which basically goes
1304 		 * through vsw on domain0 and out through the
1305 		 * physical device bound to vsw.
1306 		 */
1307 		vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
1308 	}
1309 
1310 	/* Bring up the channels of this port */
1311 	vgen_init_ldcs(portp);
1312 }
1313 
1314 /* disable transmit/receive on ports */
1315 static void
1316 vgen_uninit_ports(vgen_t *vgenp)
1317 {
1318 	vgen_port_t	*portp;
1319 	vgen_portlist_t	*plistp;
1320 
1321 	plistp = &(vgenp->vgenports);
1322 	READ_ENTER(&plistp->rwlock);
1323 
1324 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1325 		vgen_port_uninit(portp);
1326 	}
1327 
1328 	RW_EXIT(&plistp->rwlock);
1329 }
1330 
1331 static void
1332 vgen_port_uninit(vgen_port_t *portp)
1333 {
1334 	vgen_t *vgenp;
1335 
1336 	vgenp = portp->vgenp;
1337 
1338 	vgen_uninit_ldcs(portp);
1339 	/* delete the entry in vnet's fdb for this port */
1340 	vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
1341 	if (portp == vgenp->vsw_portp) {
1342 		/*
1343 		 * if this is vsw-port, then delete the default
1344 		 * route entry in vnet's fdb.
1345 		 */
1346 		vnet_del_def_rte(vgenp->vnetp);
1347 	}
1348 }
1349 
1350 /*
1351  * Scan the machine description for this instance of vnet
1352  * and read its properties. Called only from vgen_init().
1353  * Returns: 0 on success, 1 on failure.
1354  */
1355 static int
1356 vgen_read_mdprops(vgen_t *vgenp)
1357 {
1358 	md_t		*mdp = NULL;
1359 	mde_cookie_t	rootnode;
1360 	mde_cookie_t	*listp = NULL;
1361 	uint64_t	inst;
1362 	uint64_t	cfgh;
1363 	char		*name;
1364 	int		rv = 1;
1365 	int		num_nodes = 0;
1366 	int		num_devs = 0;
1367 	int		listsz = 0;
1368 	int		i;
1369 
1370 	/*
1371 	 * In each 'virtual-device' node in the MD there is a
1372 	 * 'cfg-handle' property which is the MD's concept of
1373 	 * an instance number (this may be completely different from
1374 	 * the device drivers instance #). OBP reads that value and
1375 	 * stores it in the 'reg' property of the appropriate node in
1376 	 * the device tree. We first read this reg property and use this
1377 	 * to compare against the 'cfg-handle' property of vnet nodes
1378 	 * in MD to get to this specific vnet instance and then read
1379 	 * other properties that we are interested in.
1380 	 * We also cache the value of 'reg' property and use it later
1381 	 * to register callbacks with mdeg (see vgen_mdeg_reg())
1382 	 */
1383 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1384 	    DDI_PROP_DONTPASS, reg_propname, -1);
1385 	if (inst == -1) {
1386 		return (rv);
1387 	}
1388 
1389 	vgenp->regprop = inst;
1390 
1391 	if ((mdp = md_get_handle()) == NULL) {
1392 		return (rv);
1393 	}
1394 
1395 	num_nodes = md_node_count(mdp);
1396 	ASSERT(num_nodes > 0);
1397 
1398 	listsz = num_nodes * sizeof (mde_cookie_t);
1399 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1400 
1401 	rootnode = md_root_node(mdp);
1402 
1403 	/* search for all "virtual_device" nodes */
1404 	num_devs = md_scan_dag(mdp, rootnode,
1405 	    md_find_name(mdp, vdev_propname),
1406 	    md_find_name(mdp, "fwd"), listp);
1407 	if (num_devs <= 0) {
1408 		goto vgen_readmd_exit;
1409 	}
1410 
1411 	/*
1412 	 * Now loop through the list of virtual-devices looking for
1413 	 * devices with name "network" and for each such device compare
1414 	 * its instance with what we have from the 'reg' property to
1415 	 * find the right node in MD and then read all its properties.
1416 	 */
1417 	for (i = 0; i < num_devs; i++) {
1418 
1419 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1420 			goto vgen_readmd_exit;
1421 		}
1422 
1423 		/* is this a "network" device? */
1424 		if (strcmp(name, vnet_propname) != 0)
1425 			continue;
1426 
1427 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1428 			goto vgen_readmd_exit;
1429 		}
1430 
1431 		/* is this the required instance of vnet? */
1432 		if (inst != cfgh)
1433 			continue;
1434 
1435 		/* now read all properties of this vnet instance */
1436 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1437 		rv = 0;
1438 		break;
1439 	}
1440 
1441 vgen_readmd_exit:
1442 
1443 	kmem_free(listp, listsz);
1444 	(void) md_fini_handle(mdp);
1445 	return (rv);
1446 }
1447 
1448 /*
1449  * This function reads "priority-ether-types" property from md. This property
1450  * is used to enable support for priority frames. Applications which need
1451  * guaranteed and timely delivery of certain high priority frames to/from
1452  * a vnet or vsw within ldoms, should configure this property by providing
1453  * the ether type(s) for which the priority facility is needed.
1454  * Normal data frames are delivered over a ldc channel using the descriptor
1455  * ring mechanism which is constrained by factors such as descriptor ring size,
1456  * the rate at which the ring is processed at the peer ldc end point, etc.
1457  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1458  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1459  * descriptor ring path and enables a more reliable and timely delivery of
1460  * frames to the peer.
1461  */
1462 static void
1463 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1464 {
1465 	int		rv;
1466 	uint16_t	*types;
1467 	uint64_t	*data;
1468 	int		size;
1469 	int		i;
1470 	size_t		mblk_sz;
1471 
1472 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1473 	    (uint8_t **)&data, &size);
1474 	if (rv != 0) {
1475 		/*
1476 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1477 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1478 		 */
1479 		if (vgen_pri_eth_type != 0) {
1480 			size = sizeof (vgen_pri_eth_type);
1481 			data = &vgen_pri_eth_type;
1482 		} else {
1483 			DWARN(vgenp, NULL,
1484 			    "prop(%s) not found", pri_types_propname);
1485 			size = 0;
1486 		}
1487 	}
1488 
1489 	if (size == 0) {
1490 		vgenp->pri_num_types = 0;
1491 		return;
1492 	}
1493 
1494 	/*
1495 	 * we have some priority-ether-types defined;
1496 	 * allocate a table of these types and also
1497 	 * allocate a pool of mblks to transmit these
1498 	 * priority packets.
1499 	 */
1500 	size /= sizeof (uint64_t);
1501 	vgenp->pri_num_types = size;
1502 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1503 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1504 		types[i] = data[i] & 0xFFFF;
1505 	}
1506 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1507 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1508 	    &vgenp->pri_tx_vmp);
1509 }
1510 
1511 /* register with MD event generator */
1512 static int
1513 vgen_mdeg_reg(vgen_t *vgenp)
1514 {
1515 	mdeg_prop_spec_t	*pspecp;
1516 	mdeg_node_spec_t	*parentp;
1517 	uint_t			templatesz;
1518 	int			rv;
1519 	mdeg_handle_t		hdl;
1520 	int			i;
1521 
1522 	i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1523 	    DDI_PROP_DONTPASS, reg_propname, -1);
1524 	if (i == -1) {
1525 		return (DDI_FAILURE);
1526 	}
1527 	templatesz = sizeof (vgen_prop_template);
1528 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1529 	if (pspecp == NULL) {
1530 		return (DDI_FAILURE);
1531 	}
1532 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1533 	if (parentp == NULL) {
1534 		kmem_free(pspecp, templatesz);
1535 		return (DDI_FAILURE);
1536 	}
1537 
1538 	bcopy(vgen_prop_template, pspecp, templatesz);
1539 
1540 	/*
1541 	 * NOTE: The instance here refers to the value of "reg" property and
1542 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1543 	 */
1544 	VGEN_SET_MDEG_PROP_INST(pspecp, i);
1545 
1546 	parentp->namep = "virtual-device";
1547 	parentp->specp = pspecp;
1548 
1549 	/* save parentp in vgen_t */
1550 	vgenp->mdeg_parentp = parentp;
1551 
1552 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
1553 	if (rv != MDEG_SUCCESS) {
1554 		DERR(vgenp, NULL, "mdeg_register failed\n");
1555 		KMEM_FREE(parentp);
1556 		kmem_free(pspecp, templatesz);
1557 		vgenp->mdeg_parentp = NULL;
1558 		return (DDI_FAILURE);
1559 	}
1560 
1561 	/* save mdeg handle in vgen_t */
1562 	vgenp->mdeg_hdl = hdl;
1563 
1564 	return (DDI_SUCCESS);
1565 }
1566 
1567 /* unregister with MD event generator */
1568 static void
1569 vgen_mdeg_unreg(vgen_t *vgenp)
1570 {
1571 	(void) mdeg_unregister(vgenp->mdeg_hdl);
1572 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1573 	KMEM_FREE(vgenp->mdeg_parentp);
1574 	vgenp->mdeg_parentp = NULL;
1575 	vgenp->mdeg_hdl = NULL;
1576 }
1577 
1578 /* callback function registered with MD event generator */
1579 static int
1580 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1581 {
1582 	int idx;
1583 	int vsw_idx = -1;
1584 	uint64_t val;
1585 	vgen_t *vgenp;
1586 
1587 	if ((resp == NULL) || (cb_argp == NULL)) {
1588 		return (MDEG_FAILURE);
1589 	}
1590 
1591 	vgenp = (vgen_t *)cb_argp;
1592 	DBG1(vgenp, NULL, "enter\n");
1593 
1594 	mutex_enter(&vgenp->lock);
1595 
1596 	DBG1(vgenp, NULL, "ports: removed(%x), "
1597 	"added(%x), updated(%x)\n", resp->removed.nelem,
1598 	    resp->added.nelem, resp->match_curr.nelem);
1599 
1600 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1601 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1602 		    resp->removed.mdep[idx]);
1603 	}
1604 
1605 	if (vgenp->vsw_portp == NULL) {
1606 		/*
1607 		 * find vsw_port and add it first, because other ports need
1608 		 * this when adding fdb entry (see vgen_port_init()).
1609 		 */
1610 		for (idx = 0; idx < resp->added.nelem; idx++) {
1611 			if (!(md_get_prop_val(resp->added.mdp,
1612 			    resp->added.mdep[idx], swport_propname, &val))) {
1613 				if (val == 0) {
1614 					/*
1615 					 * This port is connected to the
1616 					 * vsw on dom0.
1617 					 */
1618 					vsw_idx = idx;
1619 					if (vgen_add_port(vgenp,
1620 					    resp->added.mdp,
1621 					    resp->added.mdep[idx]) !=
1622 					    DDI_SUCCESS) {
1623 						cmn_err(CE_NOTE, "vnet%d Could "
1624 						    "not initialize virtual "
1625 						    "switch port.",
1626 						    ddi_get_instance(vgenp->
1627 						    vnetdip));
1628 						mutex_exit(&vgenp->lock);
1629 						return (MDEG_FAILURE);
1630 					}
1631 					break;
1632 				}
1633 			}
1634 		}
1635 		if (vsw_idx == -1) {
1636 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1637 			mutex_exit(&vgenp->lock);
1638 			return (MDEG_FAILURE);
1639 		}
1640 	}
1641 
1642 	for (idx = 0; idx < resp->added.nelem; idx++) {
1643 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1644 			continue;
1645 
1646 		/* If this port can't be added just skip it. */
1647 		(void) vgen_add_port(vgenp, resp->added.mdp,
1648 		    resp->added.mdep[idx]);
1649 	}
1650 
1651 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1652 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1653 		    resp->match_curr.mdep[idx],
1654 		    resp->match_prev.mdp,
1655 		    resp->match_prev.mdep[idx]);
1656 	}
1657 
1658 	mutex_exit(&vgenp->lock);
1659 	DBG1(vgenp, NULL, "exit\n");
1660 	return (MDEG_SUCCESS);
1661 }
1662 
1663 /* add a new port to the device */
1664 static int
1665 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1666 {
1667 	uint64_t	port_num;
1668 	uint64_t	*ldc_ids;
1669 	uint64_t	macaddr;
1670 	uint64_t	val;
1671 	int		num_ldcs;
1672 	int		vsw_port = B_FALSE;
1673 	int		i;
1674 	int		addrsz;
1675 	int		num_nodes = 0;
1676 	int		listsz = 0;
1677 	int		rv = DDI_SUCCESS;
1678 	mde_cookie_t	*listp = NULL;
1679 	uint8_t		*addrp;
1680 	struct ether_addr	ea;
1681 
1682 	/* read "id" property to get the port number */
1683 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1684 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1685 		return (DDI_FAILURE);
1686 	}
1687 
1688 	/*
1689 	 * Find the channel endpoint node(s) under this port node.
1690 	 */
1691 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1692 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
1693 		    num_nodes);
1694 		return (DDI_FAILURE);
1695 	}
1696 
1697 	/* allocate space for node list */
1698 	listsz = num_nodes * sizeof (mde_cookie_t);
1699 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
1700 	if (listp == NULL)
1701 		return (DDI_FAILURE);
1702 
1703 	num_ldcs = md_scan_dag(mdp, mdex,
1704 	    md_find_name(mdp, channel_propname),
1705 	    md_find_name(mdp, "fwd"), listp);
1706 
1707 	if (num_ldcs <= 0) {
1708 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
1709 		kmem_free(listp, listsz);
1710 		return (DDI_FAILURE);
1711 	}
1712 
1713 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
1714 
1715 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
1716 	if (ldc_ids == NULL) {
1717 		kmem_free(listp, listsz);
1718 		return (DDI_FAILURE);
1719 	}
1720 
1721 	for (i = 0; i < num_ldcs; i++) {
1722 		/* read channel ids */
1723 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
1724 			DWARN(vgenp, NULL, "prop(%s) not found\n",
1725 			    id_propname);
1726 			kmem_free(listp, listsz);
1727 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1728 			return (DDI_FAILURE);
1729 		}
1730 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
1731 	}
1732 
1733 	kmem_free(listp, listsz);
1734 
1735 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
1736 	    &addrsz)) {
1737 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
1738 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1739 		return (DDI_FAILURE);
1740 	}
1741 
1742 	if (addrsz < ETHERADDRL) {
1743 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
1744 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1745 		return (DDI_FAILURE);
1746 	}
1747 
1748 	macaddr = *((uint64_t *)addrp);
1749 
1750 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
1751 
1752 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1753 		ea.ether_addr_octet[i] = macaddr & 0xFF;
1754 		macaddr >>= 8;
1755 	}
1756 
1757 	if (vgenp->vsw_portp == NULL) {
1758 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
1759 			if (val == 0) {
1760 				/* This port is connected to the vsw on dom0 */
1761 				vsw_port = B_TRUE;
1762 			}
1763 		}
1764 	}
1765 	if (vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
1766 	    &ea, vsw_port) != DDI_SUCCESS) {
1767 		cmn_err(CE_NOTE, "vnet%d failed to attach port %d remote MAC "
1768 		    "address %s", ddi_get_instance(vgenp->vnetdip),
1769 		    (int)port_num, ether_sprintf(&ea));
1770 		rv = DDI_FAILURE;
1771 	}
1772 
1773 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
1774 
1775 	return (rv);
1776 }
1777 
1778 /* remove a port from the device */
1779 static int
1780 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1781 {
1782 	uint64_t	port_num;
1783 	vgen_port_t	*portp;
1784 	vgen_portlist_t	*plistp;
1785 
1786 	/* read "id" property to get the port number */
1787 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
1788 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
1789 		return (DDI_FAILURE);
1790 	}
1791 
1792 	plistp = &(vgenp->vgenports);
1793 
1794 	WRITE_ENTER(&plistp->rwlock);
1795 	portp = vgen_port_lookup(plistp, (int)port_num);
1796 	if (portp == NULL) {
1797 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
1798 		RW_EXIT(&plistp->rwlock);
1799 		return (DDI_FAILURE);
1800 	}
1801 
1802 	vgen_port_detach_mdeg(portp);
1803 	RW_EXIT(&plistp->rwlock);
1804 
1805 	return (DDI_SUCCESS);
1806 }
1807 
1808 /* attach a port to the device based on mdeg data */
1809 static int
1810 vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
1811 	int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
1812 {
1813 	vgen_port_t		*portp;
1814 	vgen_portlist_t		*plistp;
1815 	int			i;
1816 
1817 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
1818 	if (portp == NULL) {
1819 		return (DDI_FAILURE);
1820 	}
1821 	portp->vgenp = vgenp;
1822 	portp->port_num = port_num;
1823 
1824 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
1825 
1826 	portp->ldclist.num_ldcs = 0;
1827 	portp->ldclist.headp = NULL;
1828 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
1829 
1830 	ether_copy(macaddr, &portp->macaddr);
1831 	for (i = 0; i < num_ids; i++) {
1832 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
1833 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
1834 			vgen_port_detach(portp);
1835 			return (DDI_FAILURE);
1836 		}
1837 	}
1838 
1839 	/* link it into the list of ports */
1840 	plistp = &(vgenp->vgenports);
1841 	WRITE_ENTER(&plistp->rwlock);
1842 	vgen_port_list_insert(portp);
1843 	RW_EXIT(&plistp->rwlock);
1844 
1845 	/* This port is connected to the vsw on domain0 */
1846 	if (vsw_port)
1847 		vgenp->vsw_portp = portp;
1848 
1849 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
1850 		vgen_port_init(portp);
1851 	}
1852 
1853 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1854 	return (DDI_SUCCESS);
1855 }
1856 
1857 /* detach a port from the device based on mdeg data */
1858 static void
1859 vgen_port_detach_mdeg(vgen_port_t *portp)
1860 {
1861 	vgen_t *vgenp = portp->vgenp;
1862 
1863 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
1864 	/* stop the port if needed */
1865 	if (vgenp->flags & VGEN_STARTED) {
1866 		vgen_port_uninit(portp);
1867 	}
1868 	vgen_port_detach(portp);
1869 
1870 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
1871 }
1872 
1873 static int
1874 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
1875 	md_t *prev_mdp, mde_cookie_t prev_mdex)
1876 {
1877 	 _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
1878 
1879 	/* NOTE: TBD */
1880 	return (DDI_SUCCESS);
1881 }
1882 
1883 static uint64_t
1884 vgen_port_stat(vgen_port_t *portp, uint_t stat)
1885 {
1886 	vgen_ldclist_t	*ldclp;
1887 	vgen_ldc_t *ldcp;
1888 	uint64_t	val;
1889 
1890 	val = 0;
1891 	ldclp = &portp->ldclist;
1892 
1893 	READ_ENTER(&ldclp->rwlock);
1894 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
1895 		val += vgen_ldc_stat(ldcp, stat);
1896 	}
1897 	RW_EXIT(&ldclp->rwlock);
1898 
1899 	return (val);
1900 }
1901 
1902 /* attach the channel corresponding to the given ldc_id to the port */
1903 static int
1904 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
1905 {
1906 	vgen_t 		*vgenp;
1907 	vgen_ldclist_t	*ldclp;
1908 	vgen_ldc_t 	*ldcp, **prev_ldcp;
1909 	ldc_attr_t 	attr;
1910 	int 		status;
1911 	ldc_status_t	istatus;
1912 	char		kname[MAXNAMELEN];
1913 	int		instance;
1914 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
1915 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
1916 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
1917 		AST_create_rxmblks = 0x20,
1918 		AST_create_rcv_thread = 0x40} attach_state;
1919 
1920 	attach_state = AST_init;
1921 	vgenp = portp->vgenp;
1922 	ldclp = &portp->ldclist;
1923 
1924 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
1925 	if (ldcp == NULL) {
1926 		goto ldc_attach_failed;
1927 	}
1928 	ldcp->ldc_id = ldc_id;
1929 	ldcp->portp = portp;
1930 
1931 	attach_state |= AST_ldc_alloc;
1932 
1933 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
1934 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
1935 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
1936 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
1937 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
1938 
1939 	attach_state |= AST_mutex_init;
1940 
1941 	attr.devclass = LDC_DEV_NT;
1942 	attr.instance = ddi_get_instance(vgenp->vnetdip);
1943 	attr.mode = LDC_MODE_UNRELIABLE;
1944 	attr.mtu = vnet_ldc_mtu;
1945 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
1946 	if (status != 0) {
1947 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
1948 		goto ldc_attach_failed;
1949 	}
1950 	attach_state |= AST_ldc_init;
1951 
1952 	if (vgen_rcv_thread_enabled) {
1953 		ldcp->rcv_thr_flags = 0;
1954 
1955 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
1956 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
1957 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
1958 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
1959 
1960 		attach_state |= AST_create_rcv_thread;
1961 		if (ldcp->rcv_thread == NULL) {
1962 			DWARN(vgenp, ldcp, "Failed to create worker thread");
1963 			goto ldc_attach_failed;
1964 		}
1965 	}
1966 
1967 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
1968 	if (status != 0) {
1969 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
1970 		    status);
1971 		goto ldc_attach_failed;
1972 	}
1973 	/*
1974 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
1975 	 * data msgs, including raw data msgs used to recv priority frames.
1976 	 */
1977 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + ETHERMAX;
1978 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
1979 	attach_state |= AST_ldc_reg_cb;
1980 
1981 	(void) ldc_status(ldcp->ldc_handle, &istatus);
1982 	ASSERT(istatus == LDC_INIT);
1983 	ldcp->ldc_status = istatus;
1984 
1985 	/* allocate transmit resources */
1986 	status = vgen_alloc_tx_ring(ldcp);
1987 	if (status != 0) {
1988 		goto ldc_attach_failed;
1989 	}
1990 	attach_state |= AST_alloc_tx_ring;
1991 
1992 	/* allocate receive resources */
1993 	status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
1994 	    vgen_rbufsz1, vgen_rbufsz2, vgen_rbufsz3,
1995 	    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
1996 	if (status != 0) {
1997 		goto ldc_attach_failed;
1998 	}
1999 	attach_state |= AST_create_rxmblks;
2000 
2001 	/* Setup kstats for the channel */
2002 	instance = ddi_get_instance(vgenp->vnetdip);
2003 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2004 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2005 	if (ldcp->ksp == NULL) {
2006 		goto ldc_attach_failed;
2007 	}
2008 
2009 	/* initialize vgen_versions supported */
2010 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2011 	vgen_reset_vnet_proto_ops(ldcp);
2012 
2013 	/* link it into the list of channels for this port */
2014 	WRITE_ENTER(&ldclp->rwlock);
2015 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2016 	ldcp->nextp = *prev_ldcp;
2017 	*prev_ldcp = ldcp;
2018 	ldclp->num_ldcs++;
2019 	RW_EXIT(&ldclp->rwlock);
2020 
2021 	ldcp->flags |= CHANNEL_ATTACHED;
2022 	return (DDI_SUCCESS);
2023 
2024 ldc_attach_failed:
2025 	if (attach_state & AST_ldc_reg_cb) {
2026 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2027 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2028 	}
2029 	if (attach_state & AST_create_rcv_thread) {
2030 		if (ldcp->rcv_thread != NULL) {
2031 			vgen_stop_rcv_thread(ldcp);
2032 		}
2033 		mutex_destroy(&ldcp->rcv_thr_lock);
2034 		cv_destroy(&ldcp->rcv_thr_cv);
2035 	}
2036 	if (attach_state & AST_create_rxmblks) {
2037 		vio_mblk_pool_t *fvmp = NULL;
2038 
2039 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
2040 		ASSERT(fvmp == NULL);
2041 	}
2042 	if (attach_state & AST_alloc_tx_ring) {
2043 		vgen_free_tx_ring(ldcp);
2044 	}
2045 	if (attach_state & AST_ldc_init) {
2046 		(void) ldc_fini(ldcp->ldc_handle);
2047 	}
2048 	if (attach_state & AST_mutex_init) {
2049 		mutex_destroy(&ldcp->tclock);
2050 		mutex_destroy(&ldcp->txlock);
2051 		mutex_destroy(&ldcp->cblock);
2052 		mutex_destroy(&ldcp->wrlock);
2053 		mutex_destroy(&ldcp->rxlock);
2054 	}
2055 	if (attach_state & AST_ldc_alloc) {
2056 		KMEM_FREE(ldcp);
2057 	}
2058 	return (DDI_FAILURE);
2059 }
2060 
2061 /* detach a channel from the port */
2062 static void
2063 vgen_ldc_detach(vgen_ldc_t *ldcp)
2064 {
2065 	vgen_port_t	*portp;
2066 	vgen_t 		*vgenp;
2067 	vgen_ldc_t 	*pldcp;
2068 	vgen_ldc_t	**prev_ldcp;
2069 	vgen_ldclist_t	*ldclp;
2070 
2071 	portp = ldcp->portp;
2072 	vgenp = portp->vgenp;
2073 	ldclp = &portp->ldclist;
2074 
2075 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
2076 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
2077 		if (pldcp == ldcp) {
2078 			break;
2079 		}
2080 	}
2081 
2082 	if (pldcp == NULL) {
2083 		/* invalid ldcp? */
2084 		return;
2085 	}
2086 
2087 	if (ldcp->ldc_status != LDC_INIT) {
2088 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2089 	}
2090 
2091 	if (ldcp->flags & CHANNEL_ATTACHED) {
2092 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2093 
2094 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2095 		if (ldcp->rcv_thread != NULL) {
2096 			/* First stop the receive thread */
2097 			vgen_stop_rcv_thread(ldcp);
2098 			mutex_destroy(&ldcp->rcv_thr_lock);
2099 			cv_destroy(&ldcp->rcv_thr_cv);
2100 		}
2101 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2102 
2103 		vgen_destroy_kstats(ldcp->ksp);
2104 		ldcp->ksp = NULL;
2105 
2106 		/*
2107 		 * if we cannot reclaim all mblks, put this
2108 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
2109 		 * device gets detached (see vgen_uninit()).
2110 		 */
2111 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
2112 
2113 		/* free transmit resources */
2114 		vgen_free_tx_ring(ldcp);
2115 
2116 		(void) ldc_fini(ldcp->ldc_handle);
2117 		mutex_destroy(&ldcp->tclock);
2118 		mutex_destroy(&ldcp->txlock);
2119 		mutex_destroy(&ldcp->cblock);
2120 		mutex_destroy(&ldcp->wrlock);
2121 		mutex_destroy(&ldcp->rxlock);
2122 
2123 		/* unlink it from the list */
2124 		*prev_ldcp = ldcp->nextp;
2125 		ldclp->num_ldcs--;
2126 		KMEM_FREE(ldcp);
2127 	}
2128 }
2129 
2130 /*
2131  * This function allocates transmit resources for the channel.
2132  * The resources consist of a transmit descriptor ring and an associated
2133  * transmit buffer ring.
2134  */
2135 static int
2136 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
2137 {
2138 	void *tbufp;
2139 	ldc_mem_info_t minfo;
2140 	uint32_t txdsize;
2141 	uint32_t tbufsize;
2142 	int status;
2143 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2144 
2145 	ldcp->num_txds = vnet_ntxds;
2146 	txdsize = sizeof (vnet_public_desc_t);
2147 	tbufsize = sizeof (vgen_private_desc_t);
2148 
2149 	/* allocate transmit buffer ring */
2150 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
2151 	if (tbufp == NULL) {
2152 		return (DDI_FAILURE);
2153 	}
2154 
2155 	/* create transmit descriptor ring */
2156 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
2157 	    &ldcp->tx_dhandle);
2158 	if (status) {
2159 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
2160 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2161 		return (DDI_FAILURE);
2162 	}
2163 
2164 	/* get the addr of descripror ring */
2165 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
2166 	if (status) {
2167 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
2168 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2169 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
2170 		ldcp->tbufp = NULL;
2171 		return (DDI_FAILURE);
2172 	}
2173 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
2174 	ldcp->tbufp = tbufp;
2175 
2176 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
2177 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
2178 
2179 	return (DDI_SUCCESS);
2180 }
2181 
2182 /* Free transmit resources for the channel */
2183 static void
2184 vgen_free_tx_ring(vgen_ldc_t *ldcp)
2185 {
2186 	int tbufsize = sizeof (vgen_private_desc_t);
2187 
2188 	/* free transmit descriptor ring */
2189 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
2190 
2191 	/* free transmit buffer ring */
2192 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
2193 	ldcp->txdp = ldcp->txdendp = NULL;
2194 	ldcp->tbufp = ldcp->tbufendp = NULL;
2195 }
2196 
2197 /* enable transmit/receive on the channels for the port */
2198 static void
2199 vgen_init_ldcs(vgen_port_t *portp)
2200 {
2201 	vgen_ldclist_t	*ldclp = &portp->ldclist;
2202 	vgen_ldc_t	*ldcp;
2203 
2204 	READ_ENTER(&ldclp->rwlock);
2205 	ldcp =  ldclp->headp;
2206 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
2207 		(void) vgen_ldc_init(ldcp);
2208 	}
2209 	RW_EXIT(&ldclp->rwlock);
2210 }
2211 
2212 /* stop transmit/receive on the channels for the port */
2213 static void
2214 vgen_uninit_ldcs(vgen_port_t *portp)
2215 {
2216 	vgen_ldclist_t	*ldclp = &portp->ldclist;
2217 	vgen_ldc_t	*ldcp;
2218 
2219 	READ_ENTER(&ldclp->rwlock);
2220 	ldcp =  ldclp->headp;
2221 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
2222 		vgen_ldc_uninit(ldcp);
2223 	}
2224 	RW_EXIT(&ldclp->rwlock);
2225 }
2226 
2227 /* enable transmit/receive on the channel */
2228 static int
2229 vgen_ldc_init(vgen_ldc_t *ldcp)
2230 {
2231 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2232 	ldc_status_t	istatus;
2233 	int		rv;
2234 	uint32_t	retries = 0;
2235 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
2236 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
2237 	init_state = ST_init;
2238 
2239 	DBG1(vgenp, ldcp, "enter\n");
2240 	LDC_LOCK(ldcp);
2241 
2242 	rv = ldc_open(ldcp->ldc_handle);
2243 	if (rv != 0) {
2244 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2245 		goto ldcinit_failed;
2246 	}
2247 	init_state |= ST_ldc_open;
2248 
2249 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2250 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
2251 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2252 		goto ldcinit_failed;
2253 	}
2254 	ldcp->ldc_status = istatus;
2255 
2256 	rv = vgen_init_tbufs(ldcp);
2257 	if (rv != 0) {
2258 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
2259 		goto ldcinit_failed;
2260 	}
2261 	init_state |= ST_init_tbufs;
2262 
2263 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2264 	if (rv != 0) {
2265 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2266 		goto ldcinit_failed;
2267 	}
2268 
2269 	init_state |= ST_cb_enable;
2270 
2271 	do {
2272 		rv = ldc_up(ldcp->ldc_handle);
2273 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
2274 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2275 			drv_usecwait(VGEN_LDC_UP_DELAY);
2276 		}
2277 		if (retries++ >= vgen_ldcup_retries)
2278 			break;
2279 	} while (rv == EWOULDBLOCK);
2280 
2281 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2282 	if (istatus == LDC_UP) {
2283 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2284 	}
2285 
2286 	ldcp->ldc_status = istatus;
2287 
2288 	/* initialize transmit watchdog timeout */
2289 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
2290 	    drv_usectohz(vnet_ldcwd_interval * 1000));
2291 
2292 	ldcp->hphase = -1;
2293 	ldcp->flags |= CHANNEL_STARTED;
2294 
2295 	/* if channel is already UP - start handshake */
2296 	if (istatus == LDC_UP) {
2297 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2298 		if (ldcp->portp != vgenp->vsw_portp) {
2299 			/*
2300 			 * modify fdb entry to use this port as the
2301 			 * channel is up, instead of going through the
2302 			 * vsw-port (see comments in vgen_port_init())
2303 			 */
2304 			vnet_modify_fdb(vgenp->vnetp,
2305 			    (uint8_t *)&ldcp->portp->macaddr,
2306 			    vgen_tx, ldcp->portp, B_FALSE);
2307 		}
2308 
2309 		/* Initialize local session id */
2310 		ldcp->local_sid = ddi_get_lbolt();
2311 
2312 		/* clear peer session id */
2313 		ldcp->peer_sid = 0;
2314 		ldcp->hretries = 0;
2315 
2316 		/* Initiate Handshake process with peer ldc endpoint */
2317 		vgen_reset_hphase(ldcp);
2318 
2319 		mutex_exit(&ldcp->tclock);
2320 		mutex_exit(&ldcp->txlock);
2321 		mutex_exit(&ldcp->wrlock);
2322 		mutex_exit(&ldcp->rxlock);
2323 		vgen_handshake(vh_nextphase(ldcp));
2324 		mutex_exit(&ldcp->cblock);
2325 	} else {
2326 		LDC_UNLOCK(ldcp);
2327 	}
2328 
2329 	return (DDI_SUCCESS);
2330 
2331 ldcinit_failed:
2332 	if (init_state & ST_cb_enable) {
2333 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2334 	}
2335 	if (init_state & ST_init_tbufs) {
2336 		vgen_uninit_tbufs(ldcp);
2337 	}
2338 	if (init_state & ST_ldc_open) {
2339 		(void) ldc_close(ldcp->ldc_handle);
2340 	}
2341 	LDC_UNLOCK(ldcp);
2342 	DBG1(vgenp, ldcp, "exit\n");
2343 	return (DDI_FAILURE);
2344 }
2345 
2346 /* stop transmit/receive on the channel */
2347 static void
2348 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2349 {
2350 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2351 	int	rv;
2352 
2353 	DBG1(vgenp, ldcp, "enter\n");
2354 	LDC_LOCK(ldcp);
2355 
2356 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2357 		LDC_UNLOCK(ldcp);
2358 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2359 		return;
2360 	}
2361 
2362 	/* disable further callbacks */
2363 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2364 	if (rv != 0) {
2365 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
2366 	}
2367 
2368 	/*
2369 	 * clear handshake done bit and wait for pending tx and cb to finish.
2370 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
2371 	 */
2372 	ldcp->hphase &= ~(VH_DONE);
2373 	LDC_UNLOCK(ldcp);
2374 
2375 	/* cancel handshake watchdog timeout */
2376 	if (ldcp->htid) {
2377 		(void) untimeout(ldcp->htid);
2378 		ldcp->htid = 0;
2379 	}
2380 
2381 	/* cancel transmit watchdog timeout */
2382 	if (ldcp->wd_tid) {
2383 		(void) untimeout(ldcp->wd_tid);
2384 		ldcp->wd_tid = 0;
2385 	}
2386 
2387 	drv_usecwait(1000);
2388 
2389 	/* acquire locks again; any pending transmits and callbacks are done */
2390 	LDC_LOCK(ldcp);
2391 
2392 	vgen_reset_hphase(ldcp);
2393 
2394 	vgen_uninit_tbufs(ldcp);
2395 
2396 	rv = ldc_close(ldcp->ldc_handle);
2397 	if (rv != 0) {
2398 		DWARN(vgenp, ldcp, "ldc_close err\n");
2399 	}
2400 	ldcp->ldc_status = LDC_INIT;
2401 	ldcp->flags &= ~(CHANNEL_STARTED);
2402 
2403 	LDC_UNLOCK(ldcp);
2404 
2405 	DBG1(vgenp, ldcp, "exit\n");
2406 }
2407 
2408 /* Initialize the transmit buffer ring for the channel */
2409 static int
2410 vgen_init_tbufs(vgen_ldc_t *ldcp)
2411 {
2412 	vgen_private_desc_t	*tbufp;
2413 	vnet_public_desc_t	*txdp;
2414 	vio_dring_entry_hdr_t		*hdrp;
2415 	int 			i;
2416 	int 			rv;
2417 	caddr_t			datap = NULL;
2418 	int			ci;
2419 	uint32_t		ncookies;
2420 
2421 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
2422 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
2423 
2424 	datap = kmem_zalloc(ldcp->num_txds * VGEN_TXDBLK_SZ, KM_SLEEP);
2425 	ldcp->tx_datap = datap;
2426 
2427 	/*
2428 	 * for each private descriptor, allocate a ldc mem_handle which is
2429 	 * required to map the data during transmit, set the flags
2430 	 * to free (available for use by transmit routine).
2431 	 */
2432 
2433 	for (i = 0; i < ldcp->num_txds; i++) {
2434 
2435 		tbufp = &(ldcp->tbufp[i]);
2436 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
2437 		    &(tbufp->memhandle));
2438 		if (rv) {
2439 			tbufp->memhandle = 0;
2440 			goto init_tbufs_failed;
2441 		}
2442 
2443 		/*
2444 		 * bind ldc memhandle to the corresponding transmit buffer.
2445 		 */
2446 		ci = ncookies = 0;
2447 		rv = ldc_mem_bind_handle(tbufp->memhandle,
2448 		    (caddr_t)datap, VGEN_TXDBLK_SZ, LDC_SHADOW_MAP,
2449 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
2450 		if (rv != 0) {
2451 			goto init_tbufs_failed;
2452 		}
2453 
2454 		/*
2455 		 * successful in binding the handle to tx data buffer.
2456 		 * set datap in the private descr to this buffer.
2457 		 */
2458 		tbufp->datap = datap;
2459 
2460 		if ((ncookies == 0) ||
2461 		    (ncookies > MAX_COOKIES)) {
2462 			goto init_tbufs_failed;
2463 		}
2464 
2465 		for (ci = 1; ci < ncookies; ci++) {
2466 			rv = ldc_mem_nextcookie(tbufp->memhandle,
2467 			    &(tbufp->memcookie[ci]));
2468 			if (rv != 0) {
2469 				goto init_tbufs_failed;
2470 			}
2471 		}
2472 
2473 		tbufp->ncookies = ncookies;
2474 		datap += VGEN_TXDBLK_SZ;
2475 
2476 		tbufp->flags = VGEN_PRIV_DESC_FREE;
2477 		txdp = &(ldcp->txdp[i]);
2478 		hdrp = &txdp->hdr;
2479 		hdrp->dstate = VIO_DESC_FREE;
2480 		hdrp->ack = B_FALSE;
2481 		tbufp->descp = txdp;
2482 
2483 	}
2484 
2485 	/* reset tbuf walking pointers */
2486 	ldcp->next_tbufp = ldcp->tbufp;
2487 	ldcp->cur_tbufp = ldcp->tbufp;
2488 
2489 	/* initialize tx seqnum and index */
2490 	ldcp->next_txseq = VNET_ISS;
2491 	ldcp->next_txi = 0;
2492 
2493 	ldcp->resched_peer = B_TRUE;
2494 	ldcp->resched_peer_txi = 0;
2495 
2496 	return (DDI_SUCCESS);
2497 
2498 init_tbufs_failed:;
2499 	vgen_uninit_tbufs(ldcp);
2500 	return (DDI_FAILURE);
2501 }
2502 
2503 /* Uninitialize transmit buffer ring for the channel */
2504 static void
2505 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
2506 {
2507 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
2508 	int 			i;
2509 
2510 	/* for each tbuf (priv_desc), free ldc mem_handle */
2511 	for (i = 0; i < ldcp->num_txds; i++) {
2512 
2513 		tbufp = &(ldcp->tbufp[i]);
2514 
2515 		if (tbufp->datap) { /* if bound to a ldc memhandle */
2516 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
2517 			tbufp->datap = NULL;
2518 		}
2519 		if (tbufp->memhandle) {
2520 			(void) ldc_mem_free_handle(tbufp->memhandle);
2521 			tbufp->memhandle = 0;
2522 		}
2523 	}
2524 
2525 	if (ldcp->tx_datap) {
2526 		/* prealloc'd tx data buffer */
2527 		kmem_free(ldcp->tx_datap, ldcp->num_txds * VGEN_TXDBLK_SZ);
2528 		ldcp->tx_datap = NULL;
2529 	}
2530 
2531 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
2532 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
2533 }
2534 
2535 /* clobber tx descriptor ring */
2536 static void
2537 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
2538 {
2539 	vnet_public_desc_t	*txdp;
2540 	vgen_private_desc_t	*tbufp;
2541 	vio_dring_entry_hdr_t	*hdrp;
2542 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2543 	int i;
2544 #ifdef DEBUG
2545 	int ndone = 0;
2546 #endif
2547 
2548 	for (i = 0; i < ldcp->num_txds; i++) {
2549 
2550 		tbufp = &(ldcp->tbufp[i]);
2551 		txdp = tbufp->descp;
2552 		hdrp = &txdp->hdr;
2553 
2554 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
2555 			tbufp->flags = VGEN_PRIV_DESC_FREE;
2556 #ifdef DEBUG
2557 			if (hdrp->dstate == VIO_DESC_DONE)
2558 				ndone++;
2559 #endif
2560 			hdrp->dstate = VIO_DESC_FREE;
2561 			hdrp->ack = B_FALSE;
2562 		}
2563 	}
2564 	/* reset tbuf walking pointers */
2565 	ldcp->next_tbufp = ldcp->tbufp;
2566 	ldcp->cur_tbufp = ldcp->tbufp;
2567 
2568 	/* reset tx seqnum and index */
2569 	ldcp->next_txseq = VNET_ISS;
2570 	ldcp->next_txi = 0;
2571 
2572 	ldcp->resched_peer = B_TRUE;
2573 	ldcp->resched_peer_txi = 0;
2574 
2575 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
2576 }
2577 
2578 /* clobber receive descriptor ring */
2579 static void
2580 vgen_clobber_rxds(vgen_ldc_t *ldcp)
2581 {
2582 	ldcp->rx_dhandle = 0;
2583 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
2584 	ldcp->rxdp = NULL;
2585 	ldcp->next_rxi = 0;
2586 	ldcp->num_rxds = 0;
2587 	ldcp->next_rxseq = VNET_ISS;
2588 }
2589 
2590 /* initialize receive descriptor ring */
2591 static int
2592 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
2593 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
2594 {
2595 	int rv;
2596 	ldc_mem_info_t minfo;
2597 
2598 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
2599 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
2600 	if (rv != 0) {
2601 		return (DDI_FAILURE);
2602 	}
2603 
2604 	/*
2605 	 * sucessfully mapped, now try to
2606 	 * get info about the mapped dring
2607 	 */
2608 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
2609 	if (rv != 0) {
2610 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
2611 		return (DDI_FAILURE);
2612 	}
2613 
2614 	/*
2615 	 * save ring address, number of descriptors.
2616 	 */
2617 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
2618 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
2619 	ldcp->num_rxdcookies = ncookies;
2620 	ldcp->num_rxds = num_desc;
2621 	ldcp->next_rxi = 0;
2622 	ldcp->next_rxseq = VNET_ISS;
2623 
2624 	return (DDI_SUCCESS);
2625 }
2626 
2627 /* get channel statistics */
2628 static uint64_t
2629 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2630 {
2631 	vgen_stats_t *statsp;
2632 	uint64_t val;
2633 
2634 	val = 0;
2635 	statsp = &ldcp->stats;
2636 	switch (stat) {
2637 
2638 	case MAC_STAT_MULTIRCV:
2639 		val = statsp->multircv;
2640 		break;
2641 
2642 	case MAC_STAT_BRDCSTRCV:
2643 		val = statsp->brdcstrcv;
2644 		break;
2645 
2646 	case MAC_STAT_MULTIXMT:
2647 		val = statsp->multixmt;
2648 		break;
2649 
2650 	case MAC_STAT_BRDCSTXMT:
2651 		val = statsp->brdcstxmt;
2652 		break;
2653 
2654 	case MAC_STAT_NORCVBUF:
2655 		val = statsp->norcvbuf;
2656 		break;
2657 
2658 	case MAC_STAT_IERRORS:
2659 		val = statsp->ierrors;
2660 		break;
2661 
2662 	case MAC_STAT_NOXMTBUF:
2663 		val = statsp->noxmtbuf;
2664 		break;
2665 
2666 	case MAC_STAT_OERRORS:
2667 		val = statsp->oerrors;
2668 		break;
2669 
2670 	case MAC_STAT_COLLISIONS:
2671 		break;
2672 
2673 	case MAC_STAT_RBYTES:
2674 		val = statsp->rbytes;
2675 		break;
2676 
2677 	case MAC_STAT_IPACKETS:
2678 		val = statsp->ipackets;
2679 		break;
2680 
2681 	case MAC_STAT_OBYTES:
2682 		val = statsp->obytes;
2683 		break;
2684 
2685 	case MAC_STAT_OPACKETS:
2686 		val = statsp->opackets;
2687 		break;
2688 
2689 	/* stats not relevant to ldc, return 0 */
2690 	case MAC_STAT_IFSPEED:
2691 	case ETHER_STAT_ALIGN_ERRORS:
2692 	case ETHER_STAT_FCS_ERRORS:
2693 	case ETHER_STAT_FIRST_COLLISIONS:
2694 	case ETHER_STAT_MULTI_COLLISIONS:
2695 	case ETHER_STAT_DEFER_XMTS:
2696 	case ETHER_STAT_TX_LATE_COLLISIONS:
2697 	case ETHER_STAT_EX_COLLISIONS:
2698 	case ETHER_STAT_MACXMT_ERRORS:
2699 	case ETHER_STAT_CARRIER_ERRORS:
2700 	case ETHER_STAT_TOOLONG_ERRORS:
2701 	case ETHER_STAT_XCVR_ADDR:
2702 	case ETHER_STAT_XCVR_ID:
2703 	case ETHER_STAT_XCVR_INUSE:
2704 	case ETHER_STAT_CAP_1000FDX:
2705 	case ETHER_STAT_CAP_1000HDX:
2706 	case ETHER_STAT_CAP_100FDX:
2707 	case ETHER_STAT_CAP_100HDX:
2708 	case ETHER_STAT_CAP_10FDX:
2709 	case ETHER_STAT_CAP_10HDX:
2710 	case ETHER_STAT_CAP_ASMPAUSE:
2711 	case ETHER_STAT_CAP_PAUSE:
2712 	case ETHER_STAT_CAP_AUTONEG:
2713 	case ETHER_STAT_ADV_CAP_1000FDX:
2714 	case ETHER_STAT_ADV_CAP_1000HDX:
2715 	case ETHER_STAT_ADV_CAP_100FDX:
2716 	case ETHER_STAT_ADV_CAP_100HDX:
2717 	case ETHER_STAT_ADV_CAP_10FDX:
2718 	case ETHER_STAT_ADV_CAP_10HDX:
2719 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2720 	case ETHER_STAT_ADV_CAP_PAUSE:
2721 	case ETHER_STAT_ADV_CAP_AUTONEG:
2722 	case ETHER_STAT_LP_CAP_1000FDX:
2723 	case ETHER_STAT_LP_CAP_1000HDX:
2724 	case ETHER_STAT_LP_CAP_100FDX:
2725 	case ETHER_STAT_LP_CAP_100HDX:
2726 	case ETHER_STAT_LP_CAP_10FDX:
2727 	case ETHER_STAT_LP_CAP_10HDX:
2728 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2729 	case ETHER_STAT_LP_CAP_PAUSE:
2730 	case ETHER_STAT_LP_CAP_AUTONEG:
2731 	case ETHER_STAT_LINK_ASMPAUSE:
2732 	case ETHER_STAT_LINK_PAUSE:
2733 	case ETHER_STAT_LINK_AUTONEG:
2734 	case ETHER_STAT_LINK_DUPLEX:
2735 	default:
2736 		val = 0;
2737 		break;
2738 
2739 	}
2740 	return (val);
2741 }
2742 
2743 /*
2744  * LDC channel is UP, start handshake process with peer.
2745  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2746  * function is being called from transmit routine, otherwise B_FALSE.
2747  */
2748 static void
2749 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
2750 {
2751 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2752 
2753 	DBG1(vgenp, ldcp, "enter\n");
2754 
2755 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2756 
2757 	if (ldcp->portp != vgenp->vsw_portp) {
2758 		/*
2759 		 * modify fdb entry to use this port as the
2760 		 * channel is up, instead of going through the
2761 		 * vsw-port (see comments in vgen_port_init())
2762 		 */
2763 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2764 		    vgen_tx, ldcp->portp, flag);
2765 	}
2766 
2767 	/* Initialize local session id */
2768 	ldcp->local_sid = ddi_get_lbolt();
2769 
2770 	/* clear peer session id */
2771 	ldcp->peer_sid = 0;
2772 	ldcp->hretries = 0;
2773 
2774 	if (ldcp->hphase != VH_PHASE0) {
2775 		vgen_handshake_reset(ldcp);
2776 	}
2777 
2778 	/* Initiate Handshake process with peer ldc endpoint */
2779 	vgen_handshake(vh_nextphase(ldcp));
2780 
2781 	DBG1(vgenp, ldcp, "exit\n");
2782 }
2783 
2784 /*
2785  * LDC channel is Reset, terminate connection with peer and try to
2786  * bring the channel up again.
2787  * Flag tells vnet_modify_fdb() about the context: set to B_TRUE if this
2788  * function is being called from transmit routine, otherwise B_FALSE.
2789  */
2790 static void
2791 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
2792 {
2793 	ldc_status_t istatus;
2794 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2795 	int	rv;
2796 
2797 	DBG1(vgenp, ldcp, "enter\n");
2798 
2799 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2800 
2801 	if ((ldcp->portp != vgenp->vsw_portp) &&
2802 	    (vgenp->vsw_portp != NULL)) {
2803 		/*
2804 		 * modify fdb entry to use vsw-port  as the
2805 		 * channel is reset and we don't have a direct
2806 		 * link to the destination (see comments
2807 		 * in vgen_port_init()).
2808 		 */
2809 		vnet_modify_fdb(vgenp->vnetp, (uint8_t *)&ldcp->portp->macaddr,
2810 		    vgen_tx, vgenp->vsw_portp, flag);
2811 	}
2812 
2813 	if (ldcp->hphase != VH_PHASE0) {
2814 		vgen_handshake_reset(ldcp);
2815 	}
2816 
2817 	/* try to bring the channel up */
2818 	rv = ldc_up(ldcp->ldc_handle);
2819 	if (rv != 0) {
2820 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2821 	}
2822 
2823 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2824 		DWARN(vgenp, ldcp, "ldc_status err\n");
2825 	} else {
2826 		ldcp->ldc_status = istatus;
2827 	}
2828 
2829 	/* if channel is already UP - restart handshake */
2830 	if (ldcp->ldc_status == LDC_UP) {
2831 		vgen_handle_evt_up(ldcp, flag);
2832 	}
2833 
2834 	DBG1(vgenp, ldcp, "exit\n");
2835 }
2836 
2837 /* Interrupt handler for the channel */
2838 static uint_t
2839 vgen_ldc_cb(uint64_t event, caddr_t arg)
2840 {
2841 	_NOTE(ARGUNUSED(event))
2842 	vgen_ldc_t	*ldcp;
2843 	vgen_t		*vgenp;
2844 	ldc_status_t 	istatus;
2845 	vgen_stats_t	*statsp;
2846 
2847 	ldcp = (vgen_ldc_t *)arg;
2848 	vgenp = LDC_TO_VGEN(ldcp);
2849 	statsp = &ldcp->stats;
2850 
2851 	DBG1(vgenp, ldcp, "enter\n");
2852 
2853 	mutex_enter(&ldcp->cblock);
2854 	statsp->callbacks++;
2855 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
2856 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
2857 		    ldcp->ldc_status);
2858 		mutex_exit(&ldcp->cblock);
2859 		return (LDC_SUCCESS);
2860 	}
2861 
2862 	/*
2863 	 * NOTE: not using switch() as event could be triggered by
2864 	 * a state change and a read request. Also the ordering	of the
2865 	 * check for the event types is deliberate.
2866 	 */
2867 	if (event & LDC_EVT_UP) {
2868 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2869 			DWARN(vgenp, ldcp, "ldc_status err\n");
2870 			/* status couldn't be determined */
2871 			mutex_exit(&ldcp->cblock);
2872 			return (LDC_FAILURE);
2873 		}
2874 		ldcp->ldc_status = istatus;
2875 		if (ldcp->ldc_status != LDC_UP) {
2876 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
2877 			    " but ldc status is not UP(0x%x)\n",
2878 			    ldcp->ldc_status);
2879 			/* spurious interrupt, return success */
2880 			mutex_exit(&ldcp->cblock);
2881 			return (LDC_SUCCESS);
2882 		}
2883 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
2884 		    event, ldcp->ldc_status);
2885 
2886 		vgen_handle_evt_up(ldcp, B_FALSE);
2887 
2888 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2889 	}
2890 
2891 	/* Handle RESET/DOWN before READ event */
2892 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
2893 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
2894 			DWARN(vgenp, ldcp, "ldc_status error\n");
2895 			/* status couldn't be determined */
2896 			mutex_exit(&ldcp->cblock);
2897 			return (LDC_FAILURE);
2898 		}
2899 		ldcp->ldc_status = istatus;
2900 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
2901 		    event, ldcp->ldc_status);
2902 
2903 		vgen_handle_evt_reset(ldcp, B_FALSE);
2904 
2905 		/*
2906 		 * As the channel is down/reset, ignore READ event
2907 		 * but print a debug warning message.
2908 		 */
2909 		if (event & LDC_EVT_READ) {
2910 			DWARN(vgenp, ldcp,
2911 			    "LDC_EVT_READ set along with RESET/DOWN\n");
2912 			event &= ~LDC_EVT_READ;
2913 		}
2914 	}
2915 
2916 	if (event & LDC_EVT_READ) {
2917 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
2918 		    event, ldcp->ldc_status);
2919 
2920 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2921 
2922 		if (ldcp->rcv_thread != NULL) {
2923 			/*
2924 			 * If the receive thread is enabled, then
2925 			 * wakeup the receive thread to process the
2926 			 * LDC messages.
2927 			 */
2928 			mutex_exit(&ldcp->cblock);
2929 			mutex_enter(&ldcp->rcv_thr_lock);
2930 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
2931 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
2932 				cv_signal(&ldcp->rcv_thr_cv);
2933 			}
2934 			mutex_exit(&ldcp->rcv_thr_lock);
2935 			mutex_enter(&ldcp->cblock);
2936 		} else  {
2937 			vgen_handle_evt_read(ldcp);
2938 		}
2939 	}
2940 	mutex_exit(&ldcp->cblock);
2941 
2942 	if (ldcp->cancel_htid) {
2943 		/*
2944 		 * Cancel handshake timer.
2945 		 * untimeout(9F) will not return until the pending callback is
2946 		 * cancelled or has run. No problems will result from calling
2947 		 * untimeout if the handler has already completed.
2948 		 * If the timeout handler did run, then it would just
2949 		 * return as cancel_htid is set.
2950 		 */
2951 		(void) untimeout(ldcp->cancel_htid);
2952 		ldcp->cancel_htid = 0;
2953 	}
2954 	DBG1(vgenp, ldcp, "exit\n");
2955 
2956 	return (LDC_SUCCESS);
2957 }
2958 
2959 static void
2960 vgen_handle_evt_read(vgen_ldc_t *ldcp)
2961 {
2962 	int		rv;
2963 	uint64_t	*ldcmsg;
2964 	size_t		msglen;
2965 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2966 	vio_msg_tag_t	*tagp;
2967 	ldc_status_t 	istatus;
2968 	boolean_t 	has_data;
2969 
2970 	DBG1(vgenp, ldcp, "enter\n");
2971 
2972 	ldcmsg = ldcp->ldcmsg;
2973 	/*
2974 	 * If the receive thread is enabled, then the cblock
2975 	 * need to be acquired here. If not, the vgen_ldc_cb()
2976 	 * calls this function with cblock held already.
2977 	 */
2978 	if (ldcp->rcv_thread != NULL) {
2979 		mutex_enter(&ldcp->cblock);
2980 	} else {
2981 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2982 	}
2983 
2984 vgen_evt_read:
2985 	do {
2986 		msglen = ldcp->msglen;
2987 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
2988 
2989 		if (rv != 0) {
2990 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
2991 			    rv, msglen);
2992 			if (rv == ECONNRESET)
2993 				goto vgen_evtread_error;
2994 			break;
2995 		}
2996 		if (msglen == 0) {
2997 			DBG2(vgenp, ldcp, "ldc_read NODATA");
2998 			break;
2999 		}
3000 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3001 
3002 		tagp = (vio_msg_tag_t *)ldcmsg;
3003 
3004 		if (ldcp->peer_sid) {
3005 			/*
3006 			 * check sid only after we have received peer's sid
3007 			 * in the version negotiate msg.
3008 			 */
3009 #ifdef DEBUG
3010 			if (vgen_hdbg & HDBG_BAD_SID) {
3011 				/* simulate bad sid condition */
3012 				tagp->vio_sid = 0;
3013 				vgen_hdbg &= ~(HDBG_BAD_SID);
3014 			}
3015 #endif
3016 			rv = vgen_check_sid(ldcp, tagp);
3017 			if (rv != VGEN_SUCCESS) {
3018 				/*
3019 				 * If sid mismatch is detected,
3020 				 * reset the channel.
3021 				 */
3022 				ldcp->need_ldc_reset = B_TRUE;
3023 				goto vgen_evtread_error;
3024 			}
3025 		}
3026 
3027 		switch (tagp->vio_msgtype) {
3028 		case VIO_TYPE_CTRL:
3029 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3030 			break;
3031 
3032 		case VIO_TYPE_DATA:
3033 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3034 			break;
3035 
3036 		case VIO_TYPE_ERR:
3037 			vgen_handle_errmsg(ldcp, tagp);
3038 			break;
3039 
3040 		default:
3041 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3042 			    tagp->vio_msgtype);
3043 			break;
3044 		}
3045 
3046 		/*
3047 		 * If an error is encountered, stop processing and
3048 		 * handle the error.
3049 		 */
3050 		if (rv != 0) {
3051 			goto vgen_evtread_error;
3052 		}
3053 
3054 	} while (msglen);
3055 
3056 	/* check once more before exiting */
3057 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3058 	if ((rv == 0) && (has_data == B_TRUE)) {
3059 		DTRACE_PROBE(vgen_chkq);
3060 		goto vgen_evt_read;
3061 	}
3062 
3063 vgen_evtread_error:
3064 	if (rv == ECONNRESET) {
3065 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3066 			DWARN(vgenp, ldcp, "ldc_status err\n");
3067 		} else {
3068 			ldcp->ldc_status = istatus;
3069 		}
3070 		vgen_handle_evt_reset(ldcp, B_FALSE);
3071 	} else if (rv) {
3072 		vgen_handshake_retry(ldcp);
3073 	}
3074 
3075 	/*
3076 	 * If the receive thread is not enabled, then cancel the
3077 	 * handshake timeout here.
3078 	 */
3079 	if (ldcp->rcv_thread != NULL) {
3080 		mutex_exit(&ldcp->cblock);
3081 		if (ldcp->cancel_htid) {
3082 			/*
3083 			 * Cancel handshake timer. untimeout(9F) will
3084 			 * not return until the pending callback is cancelled
3085 			 * or has run. No problems will result from calling
3086 			 * untimeout if the handler has already completed.
3087 			 * If the timeout handler did run, then it would just
3088 			 * return as cancel_htid is set.
3089 			 */
3090 			(void) untimeout(ldcp->cancel_htid);
3091 			ldcp->cancel_htid = 0;
3092 		}
3093 	}
3094 
3095 	DBG1(vgenp, ldcp, "exit\n");
3096 }
3097 
3098 /* vgen handshake functions */
3099 
3100 /* change the hphase for the channel to the next phase */
3101 static vgen_ldc_t *
3102 vh_nextphase(vgen_ldc_t *ldcp)
3103 {
3104 	if (ldcp->hphase == VH_PHASE3) {
3105 		ldcp->hphase = VH_DONE;
3106 	} else {
3107 		ldcp->hphase++;
3108 	}
3109 	return (ldcp);
3110 }
3111 
3112 /*
3113  * wrapper routine to send the given message over ldc using ldc_write().
3114  */
3115 static int
3116 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
3117     boolean_t caller_holds_lock)
3118 {
3119 	int			rv;
3120 	size_t			len;
3121 	uint32_t		retries = 0;
3122 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3123 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
3124 	vio_dring_msg_t		*dmsg;
3125 	vio_raw_data_msg_t	*rmsg;
3126 	boolean_t		data_msg = B_FALSE;
3127 
3128 	len = msglen;
3129 	if ((len == 0) || (msg == NULL))
3130 		return (VGEN_FAILURE);
3131 
3132 	if (!caller_holds_lock) {
3133 		mutex_enter(&ldcp->wrlock);
3134 	}
3135 
3136 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
3137 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
3138 			dmsg = (vio_dring_msg_t *)tagp;
3139 			dmsg->seq_num = ldcp->next_txseq;
3140 			data_msg = B_TRUE;
3141 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
3142 			rmsg = (vio_raw_data_msg_t *)tagp;
3143 			rmsg->seq_num = ldcp->next_txseq;
3144 			data_msg = B_TRUE;
3145 		}
3146 	}
3147 
3148 	do {
3149 		len = msglen;
3150 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
3151 		if (retries++ >= vgen_ldcwr_retries)
3152 			break;
3153 	} while (rv == EWOULDBLOCK);
3154 
3155 	if (rv == 0 && data_msg == B_TRUE) {
3156 		ldcp->next_txseq++;
3157 	}
3158 
3159 	if (!caller_holds_lock) {
3160 		mutex_exit(&ldcp->wrlock);
3161 	}
3162 
3163 	if (rv != 0) {
3164 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
3165 		    rv, msglen);
3166 		return (rv);
3167 	}
3168 
3169 	if (len != msglen) {
3170 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
3171 		    rv, msglen);
3172 		return (VGEN_FAILURE);
3173 	}
3174 
3175 	return (VGEN_SUCCESS);
3176 }
3177 
3178 /* send version negotiate message to the peer over ldc */
3179 static int
3180 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3181 {
3182 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3183 	vio_ver_msg_t	vermsg;
3184 	vio_msg_tag_t	*tagp = &vermsg.tag;
3185 	int		rv;
3186 
3187 	bzero(&vermsg, sizeof (vermsg));
3188 
3189 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3190 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3191 	tagp->vio_subtype_env = VIO_VER_INFO;
3192 	tagp->vio_sid = ldcp->local_sid;
3193 
3194 	/* get version msg payload from ldcp->local */
3195 	vermsg.ver_major = ldcp->local_hparams.ver_major;
3196 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3197 	vermsg.dev_class = ldcp->local_hparams.dev_class;
3198 
3199 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3200 	if (rv != VGEN_SUCCESS) {
3201 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3202 		return (rv);
3203 	}
3204 
3205 	ldcp->hstate |= VER_INFO_SENT;
3206 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3207 	    vermsg.ver_major, vermsg.ver_minor);
3208 
3209 	return (VGEN_SUCCESS);
3210 }
3211 
3212 /* send attr info message to the peer over ldc */
3213 static int
3214 vgen_send_attr_info(vgen_ldc_t *ldcp)
3215 {
3216 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3217 	vnet_attr_msg_t	attrmsg;
3218 	vio_msg_tag_t	*tagp = &attrmsg.tag;
3219 	int		rv;
3220 
3221 	bzero(&attrmsg, sizeof (attrmsg));
3222 
3223 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3224 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3225 	tagp->vio_subtype_env = VIO_ATTR_INFO;
3226 	tagp->vio_sid = ldcp->local_sid;
3227 
3228 	/* get attr msg payload from ldcp->local */
3229 	attrmsg.mtu = ldcp->local_hparams.mtu;
3230 	attrmsg.addr = ldcp->local_hparams.addr;
3231 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
3232 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3233 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3234 
3235 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3236 	if (rv != VGEN_SUCCESS) {
3237 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3238 		return (rv);
3239 	}
3240 
3241 	ldcp->hstate |= ATTR_INFO_SENT;
3242 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3243 
3244 	return (VGEN_SUCCESS);
3245 }
3246 
3247 /* send descriptor ring register message to the peer over ldc */
3248 static int
3249 vgen_send_dring_reg(vgen_ldc_t *ldcp)
3250 {
3251 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3252 	vio_dring_reg_msg_t	msg;
3253 	vio_msg_tag_t		*tagp = &msg.tag;
3254 	int		rv;
3255 
3256 	bzero(&msg, sizeof (msg));
3257 
3258 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3259 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3260 	tagp->vio_subtype_env = VIO_DRING_REG;
3261 	tagp->vio_sid = ldcp->local_sid;
3262 
3263 	/* get dring info msg payload from ldcp->local */
3264 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
3265 	    sizeof (ldc_mem_cookie_t));
3266 	msg.ncookies = ldcp->local_hparams.num_dcookies;
3267 	msg.num_descriptors = ldcp->local_hparams.num_desc;
3268 	msg.descriptor_size = ldcp->local_hparams.desc_size;
3269 
3270 	/*
3271 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3272 	 * value and sends it in the ack, which is saved in
3273 	 * vgen_handle_dring_reg().
3274 	 */
3275 	msg.dring_ident = 0;
3276 
3277 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
3278 	if (rv != VGEN_SUCCESS) {
3279 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3280 		return (rv);
3281 	}
3282 
3283 	ldcp->hstate |= DRING_INFO_SENT;
3284 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3285 
3286 	return (VGEN_SUCCESS);
3287 }
3288 
3289 static int
3290 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3291 {
3292 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3293 	vio_rdx_msg_t	rdxmsg;
3294 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
3295 	int		rv;
3296 
3297 	bzero(&rdxmsg, sizeof (rdxmsg));
3298 
3299 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3300 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3301 	tagp->vio_subtype_env = VIO_RDX;
3302 	tagp->vio_sid = ldcp->local_sid;
3303 
3304 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3305 	if (rv != VGEN_SUCCESS) {
3306 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3307 		return (rv);
3308 	}
3309 
3310 	ldcp->hstate |= RDX_INFO_SENT;
3311 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3312 
3313 	return (VGEN_SUCCESS);
3314 }
3315 
3316 /* send descriptor ring data message to the peer over ldc */
3317 static int
3318 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
3319 {
3320 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3321 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
3322 	vio_msg_tag_t	*tagp = &msgp->tag;
3323 	vgen_stats_t	*statsp = &ldcp->stats;
3324 	int		rv;
3325 
3326 	bzero(msgp, sizeof (*msgp));
3327 
3328 	tagp->vio_msgtype = VIO_TYPE_DATA;
3329 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3330 	tagp->vio_subtype_env = VIO_DRING_DATA;
3331 	tagp->vio_sid = ldcp->local_sid;
3332 
3333 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
3334 	msgp->start_idx = start;
3335 	msgp->end_idx = end;
3336 
3337 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
3338 	if (rv != VGEN_SUCCESS) {
3339 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3340 		return (rv);
3341 	}
3342 
3343 	statsp->dring_data_msgs++;
3344 
3345 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
3346 
3347 	return (VGEN_SUCCESS);
3348 }
3349 
3350 /* send multicast addr info message to vsw */
3351 static int
3352 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3353 {
3354 	vnet_mcast_msg_t	mcastmsg;
3355 	vnet_mcast_msg_t	*msgp;
3356 	vio_msg_tag_t		*tagp;
3357 	vgen_t			*vgenp;
3358 	struct ether_addr	*mca;
3359 	int			rv;
3360 	int			i;
3361 	uint32_t		size;
3362 	uint32_t		mccount;
3363 	uint32_t		n;
3364 
3365 	msgp = &mcastmsg;
3366 	tagp = &msgp->tag;
3367 	vgenp = LDC_TO_VGEN(ldcp);
3368 
3369 	mccount = vgenp->mccount;
3370 	i = 0;
3371 
3372 	do {
3373 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3374 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3375 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3376 		tagp->vio_sid = ldcp->local_sid;
3377 
3378 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3379 		size = n * sizeof (struct ether_addr);
3380 
3381 		mca = &(vgenp->mctab[i]);
3382 		bcopy(mca, (msgp->mca), size);
3383 		msgp->set = B_TRUE;
3384 		msgp->count = n;
3385 
3386 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3387 		    B_FALSE);
3388 		if (rv != VGEN_SUCCESS) {
3389 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3390 			return (rv);
3391 		}
3392 
3393 		mccount -= n;
3394 		i += n;
3395 
3396 	} while (mccount);
3397 
3398 	return (VGEN_SUCCESS);
3399 }
3400 
3401 /* Initiate Phase 2 of handshake */
3402 static int
3403 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3404 {
3405 	int rv;
3406 	uint32_t ncookies = 0;
3407 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3408 
3409 #ifdef DEBUG
3410 	if (vgen_hdbg & HDBG_OUT_STATE) {
3411 		/* simulate out of state condition */
3412 		vgen_hdbg &= ~(HDBG_OUT_STATE);
3413 		rv = vgen_send_rdx_info(ldcp);
3414 		return (rv);
3415 	}
3416 	if (vgen_hdbg & HDBG_TIMEOUT) {
3417 		/* simulate timeout condition */
3418 		vgen_hdbg &= ~(HDBG_TIMEOUT);
3419 		return (VGEN_SUCCESS);
3420 	}
3421 #endif
3422 	rv = vgen_send_attr_info(ldcp);
3423 	if (rv != VGEN_SUCCESS) {
3424 		return (rv);
3425 	}
3426 
3427 	/* Bind descriptor ring to the channel */
3428 	if (ldcp->num_txdcookies == 0) {
3429 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
3430 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
3431 		if (rv != 0) {
3432 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
3433 			    "rv(%x)\n", rv);
3434 			return (rv);
3435 		}
3436 		ASSERT(ncookies == 1);
3437 		ldcp->num_txdcookies = ncookies;
3438 	}
3439 
3440 	/* update local dring_info params */
3441 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
3442 	    sizeof (ldc_mem_cookie_t));
3443 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
3444 	ldcp->local_hparams.num_desc = ldcp->num_txds;
3445 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3446 
3447 	rv = vgen_send_dring_reg(ldcp);
3448 	if (rv != VGEN_SUCCESS) {
3449 		return (rv);
3450 	}
3451 
3452 	return (VGEN_SUCCESS);
3453 }
3454 
3455 /*
3456  * Set vnet-protocol-version dependent functions based on version.
3457  */
3458 static void
3459 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
3460 {
3461 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3462 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3463 
3464 	if ((lp->ver_major == 1) && (lp->ver_minor == 2)) {
3465 		/* Version 1.2 */
3466 
3467 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
3468 			/*
3469 			 * enable priority routines and pkt mode only if
3470 			 * at least one pri-eth-type is specified in MD.
3471 			 */
3472 
3473 			ldcp->tx = vgen_ldcsend;
3474 			ldcp->rx_pktdata = vgen_handle_pkt_data;
3475 
3476 			/* set xfer mode for vgen_send_attr_info() */
3477 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
3478 
3479 		} else {
3480 			/* no priority eth types defined in MD */
3481 
3482 			ldcp->tx = vgen_ldcsend_dring;
3483 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3484 
3485 			/* set xfer mode for vgen_send_attr_info() */
3486 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
3487 
3488 		}
3489 	} else {
3490 		/* Versions prior to 1.2  */
3491 
3492 		vgen_reset_vnet_proto_ops(ldcp);
3493 	}
3494 }
3495 
3496 /*
3497  * Reset vnet-protocol-version dependent functions to pre-v1.2.
3498  */
3499 static void
3500 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
3501 {
3502 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3503 
3504 	ldcp->tx = vgen_ldcsend_dring;
3505 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3506 
3507 	/* set xfer mode for vgen_send_attr_info() */
3508 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
3509 }
3510 
3511 /*
3512  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
3513  * This can happen after a channel comes up (status: LDC_UP) or
3514  * when handshake gets terminated due to various conditions.
3515  */
3516 static void
3517 vgen_reset_hphase(vgen_ldc_t *ldcp)
3518 {
3519 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3520 	ldc_status_t istatus;
3521 	int rv;
3522 
3523 	DBG1(vgenp, ldcp, "enter\n");
3524 	/* reset hstate and hphase */
3525 	ldcp->hstate = 0;
3526 	ldcp->hphase = VH_PHASE0;
3527 
3528 	vgen_reset_vnet_proto_ops(ldcp);
3529 
3530 	/*
3531 	 * Save the id of pending handshake timer in cancel_htid.
3532 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
3533 	 * be cancelled after releasing cblock.
3534 	 */
3535 	if (ldcp->htid) {
3536 		ldcp->cancel_htid = ldcp->htid;
3537 		ldcp->htid = 0;
3538 	}
3539 
3540 	if (ldcp->local_hparams.dring_ready) {
3541 		ldcp->local_hparams.dring_ready = B_FALSE;
3542 	}
3543 
3544 	/* Unbind tx descriptor ring from the channel */
3545 	if (ldcp->num_txdcookies) {
3546 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
3547 		if (rv != 0) {
3548 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
3549 		}
3550 		ldcp->num_txdcookies = 0;
3551 	}
3552 
3553 	if (ldcp->peer_hparams.dring_ready) {
3554 		ldcp->peer_hparams.dring_ready = B_FALSE;
3555 		/* Unmap peer's dring */
3556 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3557 		vgen_clobber_rxds(ldcp);
3558 	}
3559 
3560 	vgen_clobber_tbufs(ldcp);
3561 
3562 	/*
3563 	 * clear local handshake params and initialize.
3564 	 */
3565 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3566 
3567 	/* set version to the highest version supported */
3568 	ldcp->local_hparams.ver_major =
3569 	    ldcp->vgen_versions[0].ver_major;
3570 	ldcp->local_hparams.ver_minor =
3571 	    ldcp->vgen_versions[0].ver_minor;
3572 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3573 
3574 	/* set attr_info params */
3575 	ldcp->local_hparams.mtu = ETHERMAX;
3576 	ldcp->local_hparams.addr =
3577 	    vnet_macaddr_strtoul(vgenp->macaddr);
3578 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3579 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
3580 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3581 
3582 	/*
3583 	 * Note: dring is created, but not bound yet.
3584 	 * local dring_info params will be updated when we bind the dring in
3585 	 * vgen_handshake_phase2().
3586 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3587 	 * value and sends it in the ack, which is saved in
3588 	 * vgen_handle_dring_reg().
3589 	 */
3590 	ldcp->local_hparams.dring_ident = 0;
3591 
3592 	/* clear peer_hparams */
3593 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3594 
3595 	/* reset the channel if required */
3596 	if (ldcp->need_ldc_reset) {
3597 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3598 		ldcp->need_ldc_reset = B_FALSE;
3599 		(void) ldc_down(ldcp->ldc_handle);
3600 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3601 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
3602 		ldcp->ldc_status = istatus;
3603 
3604 		/* clear sids */
3605 		ldcp->local_sid = 0;
3606 		ldcp->peer_sid = 0;
3607 
3608 		/* try to bring the channel up */
3609 		rv = ldc_up(ldcp->ldc_handle);
3610 		if (rv != 0) {
3611 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3612 		}
3613 
3614 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3615 			DWARN(vgenp, ldcp, "ldc_status err\n");
3616 		} else {
3617 			ldcp->ldc_status = istatus;
3618 		}
3619 	}
3620 }
3621 
3622 /* wrapper function for vgen_reset_hphase */
3623 static void
3624 vgen_handshake_reset(vgen_ldc_t *ldcp)
3625 {
3626 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3627 	mutex_enter(&ldcp->rxlock);
3628 	mutex_enter(&ldcp->wrlock);
3629 	mutex_enter(&ldcp->txlock);
3630 	mutex_enter(&ldcp->tclock);
3631 
3632 	vgen_reset_hphase(ldcp);
3633 
3634 	mutex_exit(&ldcp->tclock);
3635 	mutex_exit(&ldcp->txlock);
3636 	mutex_exit(&ldcp->wrlock);
3637 	mutex_exit(&ldcp->rxlock);
3638 }
3639 
3640 /*
3641  * Initiate handshake with the peer by sending various messages
3642  * based on the handshake-phase that the channel is currently in.
3643  */
3644 static void
3645 vgen_handshake(vgen_ldc_t *ldcp)
3646 {
3647 	uint32_t hphase = ldcp->hphase;
3648 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3649 	ldc_status_t	istatus;
3650 	int	rv = 0;
3651 
3652 	switch (hphase) {
3653 
3654 	case VH_PHASE1:
3655 
3656 		/*
3657 		 * start timer, for entire handshake process, turn this timer
3658 		 * off if all phases of handshake complete successfully and
3659 		 * hphase goes to VH_DONE(below) or
3660 		 * vgen_reset_hphase() gets called or
3661 		 * channel is reset due to errors or
3662 		 * vgen_ldc_uninit() is invoked(vgen_stop).
3663 		 */
3664 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
3665 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
3666 
3667 		/* Phase 1 involves negotiating the version */
3668 		rv = vgen_send_version_negotiate(ldcp);
3669 		break;
3670 
3671 	case VH_PHASE2:
3672 		rv = vgen_handshake_phase2(ldcp);
3673 		break;
3674 
3675 	case VH_PHASE3:
3676 		rv = vgen_send_rdx_info(ldcp);
3677 		break;
3678 
3679 	case VH_DONE:
3680 		/*
3681 		 * Save the id of pending handshake timer in cancel_htid.
3682 		 * This will be checked in vgen_ldc_cb() and the handshake
3683 		 * timer will be cancelled after releasing cblock.
3684 		 */
3685 		if (ldcp->htid) {
3686 			ldcp->cancel_htid = ldcp->htid;
3687 			ldcp->htid = 0;
3688 		}
3689 		ldcp->hretries = 0;
3690 		DBG1(vgenp, ldcp, "Handshake Done\n");
3691 
3692 		if (ldcp->portp == vgenp->vsw_portp) {
3693 			/*
3694 			 * If this channel(port) is connected to vsw,
3695 			 * need to sync multicast table with vsw.
3696 			 */
3697 			mutex_exit(&ldcp->cblock);
3698 
3699 			mutex_enter(&vgenp->lock);
3700 			rv = vgen_send_mcast_info(ldcp);
3701 			mutex_exit(&vgenp->lock);
3702 
3703 			mutex_enter(&ldcp->cblock);
3704 			if (rv != VGEN_SUCCESS)
3705 				break;
3706 		}
3707 
3708 		/*
3709 		 * Check if mac layer should be notified to restart
3710 		 * transmissions. This can happen if the channel got
3711 		 * reset and vgen_clobber_tbufs() is called, while
3712 		 * need_resched is set.
3713 		 */
3714 		mutex_enter(&ldcp->tclock);
3715 		if (ldcp->need_resched) {
3716 			ldcp->need_resched = B_FALSE;
3717 			vnet_tx_update(vgenp->vnetp);
3718 		}
3719 		mutex_exit(&ldcp->tclock);
3720 
3721 		break;
3722 
3723 	default:
3724 		break;
3725 	}
3726 
3727 	if (rv == ECONNRESET) {
3728 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3729 			DWARN(vgenp, ldcp, "ldc_status err\n");
3730 		} else {
3731 			ldcp->ldc_status = istatus;
3732 		}
3733 		vgen_handle_evt_reset(ldcp, B_FALSE);
3734 	} else if (rv) {
3735 		vgen_handshake_reset(ldcp);
3736 	}
3737 }
3738 
3739 /*
3740  * Check if the current handshake phase has completed successfully and
3741  * return the status.
3742  */
3743 static int
3744 vgen_handshake_done(vgen_ldc_t *ldcp)
3745 {
3746 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3747 	uint32_t	hphase = ldcp->hphase;
3748 	int 		status = 0;
3749 
3750 	switch (hphase) {
3751 
3752 	case VH_PHASE1:
3753 		/*
3754 		 * Phase1 is done, if version negotiation
3755 		 * completed successfully.
3756 		 */
3757 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
3758 		    VER_NEGOTIATED);
3759 		break;
3760 
3761 	case VH_PHASE2:
3762 		/*
3763 		 * Phase 2 is done, if attr info and dring info
3764 		 * have been exchanged successfully.
3765 		 */
3766 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
3767 		    ATTR_INFO_EXCHANGED) &&
3768 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
3769 		    DRING_INFO_EXCHANGED));
3770 		break;
3771 
3772 	case VH_PHASE3:
3773 		/* Phase 3 is done, if rdx msg has been exchanged */
3774 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
3775 		    RDX_EXCHANGED);
3776 		break;
3777 
3778 	default:
3779 		break;
3780 	}
3781 
3782 	if (status == 0) {
3783 		return (VGEN_FAILURE);
3784 	}
3785 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
3786 	return (VGEN_SUCCESS);
3787 }
3788 
3789 /* retry handshake on failure */
3790 static void
3791 vgen_handshake_retry(vgen_ldc_t *ldcp)
3792 {
3793 	/* reset handshake phase */
3794 	vgen_handshake_reset(ldcp);
3795 
3796 	/* handshake retry is specified and the channel is UP */
3797 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
3798 		if (ldcp->hretries++ < vgen_max_hretries) {
3799 			ldcp->local_sid = ddi_get_lbolt();
3800 			vgen_handshake(vh_nextphase(ldcp));
3801 		}
3802 	}
3803 }
3804 
3805 /*
3806  * Handle a version info msg from the peer or an ACK/NACK from the peer
3807  * to a version info msg that we sent.
3808  */
3809 static int
3810 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3811 {
3812 	vgen_t		*vgenp;
3813 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
3814 	int		ack = 0;
3815 	int		failed = 0;
3816 	int		idx;
3817 	vgen_ver_t	*versions = ldcp->vgen_versions;
3818 	int		rv = 0;
3819 
3820 	vgenp = LDC_TO_VGEN(ldcp);
3821 	DBG1(vgenp, ldcp, "enter\n");
3822 	switch (tagp->vio_subtype) {
3823 	case VIO_SUBTYPE_INFO:
3824 
3825 		/*  Cache sid of peer if this is the first time */
3826 		if (ldcp->peer_sid == 0) {
3827 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
3828 			    tagp->vio_sid);
3829 			ldcp->peer_sid = tagp->vio_sid;
3830 		}
3831 
3832 		if (ldcp->hphase != VH_PHASE1) {
3833 			/*
3834 			 * If we are not already in VH_PHASE1, reset to
3835 			 * pre-handshake state, and initiate handshake
3836 			 * to the peer too.
3837 			 */
3838 			vgen_handshake_reset(ldcp);
3839 			vgen_handshake(vh_nextphase(ldcp));
3840 		}
3841 		ldcp->hstate |= VER_INFO_RCVD;
3842 
3843 		/* save peer's requested values */
3844 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
3845 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
3846 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
3847 
3848 		if ((vermsg->dev_class != VDEV_NETWORK) &&
3849 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
3850 			/* unsupported dev_class, send NACK */
3851 
3852 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
3853 
3854 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
3855 			tagp->vio_sid = ldcp->local_sid;
3856 			/* send reply msg back to peer */
3857 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
3858 			    sizeof (*vermsg), B_FALSE);
3859 			if (rv != VGEN_SUCCESS) {
3860 				return (rv);
3861 			}
3862 			return (VGEN_FAILURE);
3863 		}
3864 
3865 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
3866 		    vermsg->ver_major,  vermsg->ver_minor);
3867 
3868 		idx = 0;
3869 
3870 		for (;;) {
3871 
3872 			if (vermsg->ver_major > versions[idx].ver_major) {
3873 
3874 				/* nack with next lower version */
3875 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3876 				vermsg->ver_major = versions[idx].ver_major;
3877 				vermsg->ver_minor = versions[idx].ver_minor;
3878 				break;
3879 			}
3880 
3881 			if (vermsg->ver_major == versions[idx].ver_major) {
3882 
3883 				/* major version match - ACK version */
3884 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
3885 				ack = 1;
3886 
3887 				/*
3888 				 * lower minor version to the one this endpt
3889 				 * supports, if necessary
3890 				 */
3891 				if (vermsg->ver_minor >
3892 				    versions[idx].ver_minor) {
3893 					vermsg->ver_minor =
3894 					    versions[idx].ver_minor;
3895 					ldcp->peer_hparams.ver_minor =
3896 					    versions[idx].ver_minor;
3897 				}
3898 				break;
3899 			}
3900 
3901 			idx++;
3902 
3903 			if (idx == VGEN_NUM_VER) {
3904 
3905 				/* no version match - send NACK */
3906 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
3907 				vermsg->ver_major = 0;
3908 				vermsg->ver_minor = 0;
3909 				failed = 1;
3910 				break;
3911 			}
3912 
3913 		}
3914 
3915 		tagp->vio_sid = ldcp->local_sid;
3916 
3917 		/* send reply msg back to peer */
3918 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
3919 		    B_FALSE);
3920 		if (rv != VGEN_SUCCESS) {
3921 			return (rv);
3922 		}
3923 
3924 		if (ack) {
3925 			ldcp->hstate |= VER_ACK_SENT;
3926 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
3927 			    vermsg->ver_major, vermsg->ver_minor);
3928 		}
3929 		if (failed) {
3930 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
3931 			return (VGEN_FAILURE);
3932 		}
3933 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3934 
3935 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3936 
3937 			/* local and peer versions match? */
3938 			ASSERT((ldcp->local_hparams.ver_major ==
3939 			    ldcp->peer_hparams.ver_major) &&
3940 			    (ldcp->local_hparams.ver_minor ==
3941 			    ldcp->peer_hparams.ver_minor));
3942 
3943 			vgen_set_vnet_proto_ops(ldcp);
3944 
3945 			/* move to the next phase */
3946 			vgen_handshake(vh_nextphase(ldcp));
3947 		}
3948 
3949 		break;
3950 
3951 	case VIO_SUBTYPE_ACK:
3952 
3953 		if (ldcp->hphase != VH_PHASE1) {
3954 			/*  This should not happen. */
3955 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
3956 			return (VGEN_FAILURE);
3957 		}
3958 
3959 		/* SUCCESS - we have agreed on a version */
3960 		ldcp->local_hparams.ver_major = vermsg->ver_major;
3961 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
3962 		ldcp->hstate |= VER_ACK_RCVD;
3963 
3964 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
3965 		    vermsg->ver_major,  vermsg->ver_minor);
3966 
3967 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
3968 
3969 			/*  VER_ACK_SENT and VER_ACK_RCVD */
3970 
3971 			/* local and peer versions match? */
3972 			ASSERT((ldcp->local_hparams.ver_major ==
3973 			    ldcp->peer_hparams.ver_major) &&
3974 			    (ldcp->local_hparams.ver_minor ==
3975 			    ldcp->peer_hparams.ver_minor));
3976 
3977 			vgen_set_vnet_proto_ops(ldcp);
3978 
3979 			/* move to the next phase */
3980 			vgen_handshake(vh_nextphase(ldcp));
3981 		}
3982 		break;
3983 
3984 	case VIO_SUBTYPE_NACK:
3985 
3986 		if (ldcp->hphase != VH_PHASE1) {
3987 			/*  This should not happen.  */
3988 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
3989 			"Phase(%u)\n", ldcp->hphase);
3990 			return (VGEN_FAILURE);
3991 		}
3992 
3993 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
3994 		    vermsg->ver_major, vermsg->ver_minor);
3995 
3996 		/* check if version in NACK is zero */
3997 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
3998 			/*
3999 			 * Version Negotiation has failed.
4000 			 */
4001 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4002 			return (VGEN_FAILURE);
4003 		}
4004 
4005 		idx = 0;
4006 
4007 		for (;;) {
4008 
4009 			if (vermsg->ver_major > versions[idx].ver_major) {
4010 				/* select next lower version */
4011 
4012 				ldcp->local_hparams.ver_major =
4013 				    versions[idx].ver_major;
4014 				ldcp->local_hparams.ver_minor =
4015 				    versions[idx].ver_minor;
4016 				break;
4017 			}
4018 
4019 			if (vermsg->ver_major == versions[idx].ver_major) {
4020 				/* major version match */
4021 
4022 				ldcp->local_hparams.ver_major =
4023 				    versions[idx].ver_major;
4024 
4025 				ldcp->local_hparams.ver_minor =
4026 				    versions[idx].ver_minor;
4027 				break;
4028 			}
4029 
4030 			idx++;
4031 
4032 			if (idx == VGEN_NUM_VER) {
4033 				/*
4034 				 * no version match.
4035 				 * Version Negotiation has failed.
4036 				 */
4037 				DWARN(vgenp, ldcp,
4038 				    "Version Negotiation Failed\n");
4039 				return (VGEN_FAILURE);
4040 			}
4041 
4042 		}
4043 
4044 		rv = vgen_send_version_negotiate(ldcp);
4045 		if (rv != VGEN_SUCCESS) {
4046 			return (rv);
4047 		}
4048 
4049 		break;
4050 	}
4051 
4052 	DBG1(vgenp, ldcp, "exit\n");
4053 	return (VGEN_SUCCESS);
4054 }
4055 
4056 /* Check if the attributes are supported */
4057 static int
4058 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4059 {
4060 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4061 
4062 	if ((msg->mtu != ETHERMAX) ||
4063 	    (msg->addr_type != ADDR_TYPE_MAC) ||
4064 	    (msg->ack_freq > 64) ||
4065 	    (msg->xfer_mode != lp->xfer_mode)) {
4066 		return (VGEN_FAILURE);
4067 	}
4068 
4069 	return (VGEN_SUCCESS);
4070 }
4071 
4072 /*
4073  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4074  * to an attr info msg that we sent.
4075  */
4076 static int
4077 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4078 {
4079 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4080 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
4081 	int		ack = 0;
4082 	int		rv = 0;
4083 
4084 	DBG1(vgenp, ldcp, "enter\n");
4085 	if (ldcp->hphase != VH_PHASE2) {
4086 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4087 		" Invalid Phase(%u)\n",
4088 		    tagp->vio_subtype, ldcp->hphase);
4089 		return (VGEN_FAILURE);
4090 	}
4091 	switch (tagp->vio_subtype) {
4092 	case VIO_SUBTYPE_INFO:
4093 
4094 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
4095 		ldcp->hstate |= ATTR_INFO_RCVD;
4096 
4097 		/* save peer's values */
4098 		ldcp->peer_hparams.mtu = attrmsg->mtu;
4099 		ldcp->peer_hparams.addr = attrmsg->addr;
4100 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
4101 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
4102 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
4103 
4104 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
4105 			/* unsupported attr, send NACK */
4106 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4107 		} else {
4108 			ack = 1;
4109 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4110 		}
4111 		tagp->vio_sid = ldcp->local_sid;
4112 
4113 		/* send reply msg back to peer */
4114 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
4115 		    B_FALSE);
4116 		if (rv != VGEN_SUCCESS) {
4117 			return (rv);
4118 		}
4119 
4120 		if (ack) {
4121 			ldcp->hstate |= ATTR_ACK_SENT;
4122 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4123 		} else {
4124 			/* failed */
4125 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
4126 			return (VGEN_FAILURE);
4127 		}
4128 
4129 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4130 			vgen_handshake(vh_nextphase(ldcp));
4131 		}
4132 
4133 		break;
4134 
4135 	case VIO_SUBTYPE_ACK:
4136 
4137 		ldcp->hstate |= ATTR_ACK_RCVD;
4138 
4139 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4140 
4141 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4142 			vgen_handshake(vh_nextphase(ldcp));
4143 		}
4144 		break;
4145 
4146 	case VIO_SUBTYPE_NACK:
4147 
4148 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4149 		return (VGEN_FAILURE);
4150 	}
4151 	DBG1(vgenp, ldcp, "exit\n");
4152 	return (VGEN_SUCCESS);
4153 }
4154 
4155 /* Check if the dring info msg is ok */
4156 static int
4157 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
4158 {
4159 	/* check if msg contents are ok */
4160 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
4161 	    sizeof (vnet_public_desc_t))) {
4162 		return (VGEN_FAILURE);
4163 	}
4164 	return (VGEN_SUCCESS);
4165 }
4166 
4167 /*
4168  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4169  * the peer to a dring register msg that we sent.
4170  */
4171 static int
4172 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4173 {
4174 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
4175 	ldc_mem_cookie_t dcookie;
4176 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4177 	int ack = 0;
4178 	int rv = 0;
4179 
4180 	DBG1(vgenp, ldcp, "enter\n");
4181 	if (ldcp->hphase < VH_PHASE2) {
4182 		/* dring_info can be rcvd in any of the phases after Phase1 */
4183 		DWARN(vgenp, ldcp,
4184 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4185 		    tagp->vio_subtype, ldcp->hphase);
4186 		return (VGEN_FAILURE);
4187 	}
4188 	switch (tagp->vio_subtype) {
4189 	case VIO_SUBTYPE_INFO:
4190 
4191 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
4192 		ldcp->hstate |= DRING_INFO_RCVD;
4193 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
4194 
4195 		ASSERT(msg->ncookies == 1);
4196 
4197 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
4198 			/*
4199 			 * verified dring info msg to be ok,
4200 			 * now try to map the remote dring.
4201 			 */
4202 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
4203 			    msg->descriptor_size, &dcookie,
4204 			    msg->ncookies);
4205 			if (rv == DDI_SUCCESS) {
4206 				/* now we can ack the peer */
4207 				ack = 1;
4208 			}
4209 		}
4210 		if (ack == 0) {
4211 			/* failed, send NACK */
4212 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4213 		} else {
4214 			if (!(ldcp->peer_hparams.dring_ready)) {
4215 
4216 				/* save peer's dring_info values */
4217 				bcopy(&dcookie,
4218 				    &(ldcp->peer_hparams.dring_cookie),
4219 				    sizeof (dcookie));
4220 				ldcp->peer_hparams.num_desc =
4221 				    msg->num_descriptors;
4222 				ldcp->peer_hparams.desc_size =
4223 				    msg->descriptor_size;
4224 				ldcp->peer_hparams.num_dcookies =
4225 				    msg->ncookies;
4226 
4227 				/* set dring_ident for the peer */
4228 				ldcp->peer_hparams.dring_ident =
4229 				    (uint64_t)ldcp->rxdp;
4230 				/* return the dring_ident in ack msg */
4231 				msg->dring_ident =
4232 				    (uint64_t)ldcp->rxdp;
4233 
4234 				ldcp->peer_hparams.dring_ready = B_TRUE;
4235 			}
4236 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4237 		}
4238 		tagp->vio_sid = ldcp->local_sid;
4239 		/* send reply msg back to peer */
4240 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4241 		    B_FALSE);
4242 		if (rv != VGEN_SUCCESS) {
4243 			return (rv);
4244 		}
4245 
4246 		if (ack) {
4247 			ldcp->hstate |= DRING_ACK_SENT;
4248 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
4249 		} else {
4250 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
4251 			return (VGEN_FAILURE);
4252 		}
4253 
4254 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4255 			vgen_handshake(vh_nextphase(ldcp));
4256 		}
4257 
4258 		break;
4259 
4260 	case VIO_SUBTYPE_ACK:
4261 
4262 		ldcp->hstate |= DRING_ACK_RCVD;
4263 
4264 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
4265 
4266 		if (!(ldcp->local_hparams.dring_ready)) {
4267 			/* local dring is now ready */
4268 			ldcp->local_hparams.dring_ready = B_TRUE;
4269 
4270 			/* save dring_ident acked by peer */
4271 			ldcp->local_hparams.dring_ident =
4272 			    msg->dring_ident;
4273 		}
4274 
4275 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4276 			vgen_handshake(vh_nextphase(ldcp));
4277 		}
4278 
4279 		break;
4280 
4281 	case VIO_SUBTYPE_NACK:
4282 
4283 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
4284 		return (VGEN_FAILURE);
4285 	}
4286 	DBG1(vgenp, ldcp, "exit\n");
4287 	return (VGEN_SUCCESS);
4288 }
4289 
4290 /*
4291  * Handle a rdx info msg from the peer or an ACK/NACK
4292  * from the peer to a rdx info msg that we sent.
4293  */
4294 static int
4295 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4296 {
4297 	int rv = 0;
4298 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4299 
4300 	DBG1(vgenp, ldcp, "enter\n");
4301 	if (ldcp->hphase != VH_PHASE3) {
4302 		DWARN(vgenp, ldcp,
4303 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
4304 		    tagp->vio_subtype, ldcp->hphase);
4305 		return (VGEN_FAILURE);
4306 	}
4307 	switch (tagp->vio_subtype) {
4308 	case VIO_SUBTYPE_INFO:
4309 
4310 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
4311 		ldcp->hstate |= RDX_INFO_RCVD;
4312 
4313 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
4314 		tagp->vio_sid = ldcp->local_sid;
4315 		/* send reply msg back to peer */
4316 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
4317 		    B_FALSE);
4318 		if (rv != VGEN_SUCCESS) {
4319 			return (rv);
4320 		}
4321 
4322 		ldcp->hstate |= RDX_ACK_SENT;
4323 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
4324 
4325 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4326 			vgen_handshake(vh_nextphase(ldcp));
4327 		}
4328 
4329 		break;
4330 
4331 	case VIO_SUBTYPE_ACK:
4332 
4333 		ldcp->hstate |= RDX_ACK_RCVD;
4334 
4335 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
4336 
4337 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4338 			vgen_handshake(vh_nextphase(ldcp));
4339 		}
4340 		break;
4341 
4342 	case VIO_SUBTYPE_NACK:
4343 
4344 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
4345 		return (VGEN_FAILURE);
4346 	}
4347 	DBG1(vgenp, ldcp, "exit\n");
4348 	return (VGEN_SUCCESS);
4349 }
4350 
4351 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
4352 static int
4353 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4354 {
4355 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4356 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
4357 	struct ether_addr *addrp;
4358 	int count;
4359 	int i;
4360 
4361 	DBG1(vgenp, ldcp, "enter\n");
4362 	switch (tagp->vio_subtype) {
4363 
4364 	case VIO_SUBTYPE_INFO:
4365 
4366 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
4367 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
4368 		break;
4369 
4370 	case VIO_SUBTYPE_ACK:
4371 
4372 		/* success adding/removing multicast addr */
4373 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
4374 		break;
4375 
4376 	case VIO_SUBTYPE_NACK:
4377 
4378 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
4379 		if (!(msgp->set)) {
4380 			/* multicast remove request failed */
4381 			break;
4382 		}
4383 
4384 		/* multicast add request failed */
4385 		for (count = 0; count < msgp->count; count++) {
4386 			addrp = &(msgp->mca[count]);
4387 
4388 			/* delete address from the table */
4389 			for (i = 0; i < vgenp->mccount; i++) {
4390 				if (ether_cmp(addrp,
4391 				    &(vgenp->mctab[i])) == 0) {
4392 					if (vgenp->mccount > 1) {
4393 						int t = vgenp->mccount - 1;
4394 						vgenp->mctab[i] =
4395 						    vgenp->mctab[t];
4396 					}
4397 					vgenp->mccount--;
4398 					break;
4399 				}
4400 			}
4401 		}
4402 		break;
4403 
4404 	}
4405 	DBG1(vgenp, ldcp, "exit\n");
4406 
4407 	return (VGEN_SUCCESS);
4408 }
4409 
4410 /* handler for control messages received from the peer ldc end-point */
4411 static int
4412 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4413 {
4414 	int rv = 0;
4415 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4416 
4417 	DBG1(vgenp, ldcp, "enter\n");
4418 	switch (tagp->vio_subtype_env) {
4419 
4420 	case VIO_VER_INFO:
4421 		rv = vgen_handle_version_negotiate(ldcp, tagp);
4422 		break;
4423 
4424 	case VIO_ATTR_INFO:
4425 		rv = vgen_handle_attr_info(ldcp, tagp);
4426 		break;
4427 
4428 	case VIO_DRING_REG:
4429 		rv = vgen_handle_dring_reg(ldcp, tagp);
4430 		break;
4431 
4432 	case VIO_RDX:
4433 		rv = vgen_handle_rdx_info(ldcp, tagp);
4434 		break;
4435 
4436 	case VNET_MCAST_INFO:
4437 		rv = vgen_handle_mcast_info(ldcp, tagp);
4438 		break;
4439 
4440 	}
4441 
4442 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4443 	return (rv);
4444 }
4445 
4446 /* handler for data messages received from the peer ldc end-point */
4447 static int
4448 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
4449 {
4450 	int rv = 0;
4451 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4452 
4453 	DBG1(vgenp, ldcp, "enter\n");
4454 
4455 	if (ldcp->hphase != VH_DONE)
4456 		return (rv);
4457 
4458 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4459 		rv = vgen_check_datamsg_seq(ldcp, tagp);
4460 		if (rv != 0) {
4461 			return (rv);
4462 		}
4463 	}
4464 
4465 	switch (tagp->vio_subtype_env) {
4466 	case VIO_DRING_DATA:
4467 		rv = vgen_handle_dring_data(ldcp, tagp);
4468 		break;
4469 
4470 	case VIO_PKT_DATA:
4471 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
4472 		break;
4473 	default:
4474 		break;
4475 	}
4476 
4477 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4478 	return (rv);
4479 }
4480 
4481 /*
4482  * dummy pkt data handler function for vnet protocol version 1.0
4483  */
4484 static void
4485 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
4486 {
4487 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
4488 }
4489 
4490 /*
4491  * This function handles raw pkt data messages received over the channel.
4492  * Currently, only priority-eth-type frames are received through this mechanism.
4493  * In this case, the frame(data) is present within the message itself which
4494  * is copied into an mblk before sending it up the stack.
4495  */
4496 static void
4497 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
4498 {
4499 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
4500 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
4501 	uint32_t		size;
4502 	mblk_t			*mp;
4503 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4504 	vgen_stats_t		*statsp = &ldcp->stats;
4505 
4506 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4507 
4508 	mutex_exit(&ldcp->cblock);
4509 
4510 	size = msglen - VIO_PKT_DATA_HDRSIZE;
4511 	if (size < ETHERMIN || size > ETHERMAX) {
4512 		(void) atomic_inc_32(&statsp->rx_pri_fail);
4513 		goto exit;
4514 	}
4515 
4516 	mp = vio_multipool_allocb(&ldcp->vmp, size);
4517 	if (mp == NULL) {
4518 		mp = allocb(size, BPRI_MED);
4519 		if (mp == NULL) {
4520 			(void) atomic_inc_32(&statsp->rx_pri_fail);
4521 			DWARN(vgenp, ldcp, "allocb failure, "
4522 			    "unable to process priority frame\n");
4523 			goto exit;
4524 		}
4525 	}
4526 
4527 	/* copy the frame from the payload of raw data msg into the mblk */
4528 	bcopy(pkt->data, mp->b_rptr, size);
4529 	mp->b_wptr = mp->b_rptr + size;
4530 
4531 	/* update stats */
4532 	(void) atomic_inc_64(&statsp->rx_pri_packets);
4533 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
4534 
4535 	/* send up; call vnet_rx() as cblock is already released */
4536 	vnet_rx(vgenp->vnetp, NULL, mp);
4537 
4538 exit:
4539 	mutex_enter(&ldcp->cblock);
4540 }
4541 
4542 static int
4543 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
4544     int32_t end, uint8_t pstate)
4545 {
4546 	int rv = 0;
4547 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4548 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
4549 
4550 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
4551 	tagp->vio_sid = ldcp->local_sid;
4552 	msgp->start_idx = start;
4553 	msgp->end_idx = end;
4554 	msgp->dring_process_state = pstate;
4555 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
4556 	if (rv != VGEN_SUCCESS) {
4557 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4558 	}
4559 	return (rv);
4560 }
4561 
4562 static int
4563 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4564 {
4565 	int rv = 0;
4566 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4567 
4568 
4569 	DBG1(vgenp, ldcp, "enter\n");
4570 	switch (tagp->vio_subtype) {
4571 
4572 	case VIO_SUBTYPE_INFO:
4573 		/*
4574 		 * To reduce the locking contention, release the
4575 		 * cblock here and re-acquire it once we are done
4576 		 * receiving packets.
4577 		 */
4578 		mutex_exit(&ldcp->cblock);
4579 		mutex_enter(&ldcp->rxlock);
4580 		rv = vgen_handle_dring_data_info(ldcp, tagp);
4581 		mutex_exit(&ldcp->rxlock);
4582 		mutex_enter(&ldcp->cblock);
4583 		break;
4584 
4585 	case VIO_SUBTYPE_ACK:
4586 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
4587 		break;
4588 
4589 	case VIO_SUBTYPE_NACK:
4590 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
4591 		break;
4592 	}
4593 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4594 	return (rv);
4595 }
4596 
4597 static int
4598 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4599 {
4600 	uint32_t start;
4601 	int32_t end;
4602 	int rv = 0;
4603 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4604 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4605 #ifdef VGEN_HANDLE_LOST_PKTS
4606 	vgen_stats_t *statsp = &ldcp->stats;
4607 	uint32_t rxi;
4608 	int n;
4609 #endif
4610 
4611 	DBG1(vgenp, ldcp, "enter\n");
4612 
4613 	start = dringmsg->start_idx;
4614 	end = dringmsg->end_idx;
4615 	/*
4616 	 * received a data msg, which contains the start and end
4617 	 * indices of the descriptors within the rx ring holding data,
4618 	 * the seq_num of data packet corresponding to the start index,
4619 	 * and the dring_ident.
4620 	 * We can now read the contents of each of these descriptors
4621 	 * and gather data from it.
4622 	 */
4623 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
4624 	    start, end);
4625 
4626 	/* validate rx start and end indeces */
4627 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
4628 	    !(CHECK_RXI(end, ldcp)))) {
4629 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
4630 		    start, end);
4631 		/* drop the message if invalid index */
4632 		return (rv);
4633 	}
4634 
4635 	/* validate dring_ident */
4636 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
4637 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
4638 		    dringmsg->dring_ident);
4639 		/* invalid dring_ident, drop the msg */
4640 		return (rv);
4641 	}
4642 #ifdef DEBUG
4643 	if (vgen_trigger_rxlost) {
4644 		/* drop this msg to simulate lost pkts for debugging */
4645 		vgen_trigger_rxlost = 0;
4646 		return (rv);
4647 	}
4648 #endif
4649 
4650 #ifdef	VGEN_HANDLE_LOST_PKTS
4651 
4652 	/* receive start index doesn't match expected index */
4653 	if (ldcp->next_rxi != start) {
4654 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
4655 		    ldcp->next_rxi, start);
4656 
4657 		/* calculate the number of pkts lost */
4658 		if (start >= ldcp->next_rxi) {
4659 			n = start - ldcp->next_rxi;
4660 		} else  {
4661 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
4662 		}
4663 
4664 		statsp->rx_lost_pkts += n;
4665 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
4666 		tagp->vio_sid = ldcp->local_sid;
4667 		/* indicate the range of lost descriptors */
4668 		dringmsg->start_idx = ldcp->next_rxi;
4669 		rxi = start;
4670 		DECR_RXI(rxi, ldcp);
4671 		dringmsg->end_idx = rxi;
4672 		/* dring ident is left unchanged */
4673 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4674 		    sizeof (*dringmsg), B_FALSE);
4675 		if (rv != VGEN_SUCCESS) {
4676 			DWARN(vgenp, ldcp,
4677 			    "vgen_sendmsg failed, stype:NACK\n");
4678 			return (rv);
4679 		}
4680 		/*
4681 		 * treat this range of descrs/pkts as dropped
4682 		 * and set the new expected value of next_rxi
4683 		 * and continue(below) to process from the new
4684 		 * start index.
4685 		 */
4686 		ldcp->next_rxi = start;
4687 	}
4688 
4689 #endif	/* VGEN_HANDLE_LOST_PKTS */
4690 
4691 	/* Now receive messages */
4692 	rv = vgen_process_dring_data(ldcp, tagp);
4693 
4694 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
4695 	return (rv);
4696 }
4697 
4698 static int
4699 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4700 {
4701 	boolean_t set_ack_start = B_FALSE;
4702 	uint32_t start;
4703 	uint32_t ack_end;
4704 	uint32_t next_rxi;
4705 	uint32_t rxi;
4706 	int count = 0;
4707 	int rv = 0;
4708 	uint32_t retries = 0;
4709 	vgen_stats_t *statsp;
4710 	vnet_public_desc_t *rxdp;
4711 	vio_dring_entry_hdr_t *hdrp;
4712 	mblk_t *bp = NULL;
4713 	mblk_t *bpt = NULL;
4714 	uint32_t ack_start;
4715 	uint32_t datalen;
4716 	uint32_t ncookies;
4717 	boolean_t rxd_err = B_FALSE;
4718 	mblk_t *mp = NULL;
4719 	size_t nbytes;
4720 	boolean_t ack_needed = B_FALSE;
4721 	size_t nread;
4722 	uint64_t off = 0;
4723 	struct ether_header *ehp;
4724 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
4725 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4726 
4727 	DBG1(vgenp, ldcp, "enter\n");
4728 
4729 	statsp = &ldcp->stats;
4730 	start = dringmsg->start_idx;
4731 
4732 	/*
4733 	 * start processing the descriptors from the specified
4734 	 * start index, up to the index a descriptor is not ready
4735 	 * to be processed or we process the entire descriptor ring
4736 	 * and wrap around upto the start index.
4737 	 */
4738 
4739 	/* need to set the start index of descriptors to be ack'd */
4740 	set_ack_start = B_TRUE;
4741 
4742 	/* index upto which we have ack'd */
4743 	ack_end = start;
4744 	DECR_RXI(ack_end, ldcp);
4745 
4746 	next_rxi = rxi =  start;
4747 	do {
4748 vgen_recv_retry:
4749 		rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
4750 		if (rv != 0) {
4751 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
4752 			    " rv(%d)\n", rv);
4753 			statsp->ierrors++;
4754 			return (rv);
4755 		}
4756 
4757 		rxdp = &(ldcp->rxdp[rxi]);
4758 		hdrp = &rxdp->hdr;
4759 
4760 		if (hdrp->dstate != VIO_DESC_READY) {
4761 			/*
4762 			 * Before waiting and retry here, send up
4763 			 * the packets that are received already
4764 			 */
4765 			if (bp != NULL) {
4766 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4767 				vgen_rx(ldcp, bp);
4768 				count = 0;
4769 				bp = bpt = NULL;
4770 			}
4771 			/*
4772 			 * descriptor is not ready.
4773 			 * retry descriptor acquire, stop processing
4774 			 * after max # retries.
4775 			 */
4776 			if (retries == vgen_recv_retries)
4777 				break;
4778 			retries++;
4779 			drv_usecwait(vgen_recv_delay);
4780 			goto vgen_recv_retry;
4781 		}
4782 		retries = 0;
4783 
4784 		if (set_ack_start) {
4785 			/*
4786 			 * initialize the start index of the range
4787 			 * of descriptors to be ack'd.
4788 			 */
4789 			ack_start = rxi;
4790 			set_ack_start = B_FALSE;
4791 		}
4792 
4793 		datalen = rxdp->nbytes;
4794 		ncookies = rxdp->ncookies;
4795 		if ((datalen < ETHERMIN) ||
4796 		    (ncookies == 0) ||
4797 		    (ncookies > MAX_COOKIES)) {
4798 			rxd_err = B_TRUE;
4799 		} else {
4800 			/*
4801 			 * Try to allocate an mblk from the free pool
4802 			 * of recv mblks for the channel.
4803 			 * If this fails, use allocb().
4804 			 */
4805 			nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
4806 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
4807 			if (!mp) {
4808 				/*
4809 				 * The data buffer returned by
4810 				 * allocb(9F) is 8byte aligned. We
4811 				 * allocate extra 8 bytes to ensure
4812 				 * size is multiple of 8 bytes for
4813 				 * ldc_mem_copy().
4814 				 */
4815 				statsp->rx_vio_allocb_fail++;
4816 				mp = allocb(VNET_IPALIGN + datalen + 8,
4817 				    BPRI_MED);
4818 			}
4819 		}
4820 		if ((rxd_err) || (mp == NULL)) {
4821 			/*
4822 			 * rxd_err or allocb() failure,
4823 			 * drop this packet, get next.
4824 			 */
4825 			if (rxd_err) {
4826 				statsp->ierrors++;
4827 				rxd_err = B_FALSE;
4828 			} else {
4829 				statsp->rx_allocb_fail++;
4830 			}
4831 
4832 			ack_needed = hdrp->ack;
4833 
4834 			/* set descriptor done bit */
4835 			hdrp->dstate = VIO_DESC_DONE;
4836 
4837 			rv = ldc_mem_dring_release(ldcp->rx_dhandle,
4838 			    rxi, rxi);
4839 			if (rv != 0) {
4840 				DWARN(vgenp, ldcp,
4841 				    "ldc_mem_dring_release err rv(%d)\n", rv);
4842 				return (rv);
4843 			}
4844 
4845 			if (ack_needed) {
4846 				ack_needed = B_FALSE;
4847 				/*
4848 				 * sender needs ack for this packet,
4849 				 * ack pkts upto this index.
4850 				 */
4851 				ack_end = rxi;
4852 
4853 				rv = vgen_send_dring_ack(ldcp, tagp,
4854 				    ack_start, ack_end,
4855 				    VIO_DP_ACTIVE);
4856 				if (rv != VGEN_SUCCESS) {
4857 					goto error_ret;
4858 				}
4859 
4860 				/* need to set new ack start index */
4861 				set_ack_start = B_TRUE;
4862 			}
4863 			goto vgen_next_rxi;
4864 		}
4865 
4866 		nread = nbytes;
4867 		rv = ldc_mem_copy(ldcp->ldc_handle,
4868 		    (caddr_t)mp->b_rptr, off, &nread,
4869 		    rxdp->memcookie, ncookies, LDC_COPY_IN);
4870 
4871 		/* if ldc_mem_copy() failed */
4872 		if (rv) {
4873 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
4874 			statsp->ierrors++;
4875 			freemsg(mp);
4876 			goto error_ret;
4877 		}
4878 
4879 		ack_needed = hdrp->ack;
4880 		hdrp->dstate = VIO_DESC_DONE;
4881 
4882 		rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
4883 		if (rv != 0) {
4884 			DWARN(vgenp, ldcp,
4885 			    "ldc_mem_dring_release err rv(%d)\n", rv);
4886 			goto error_ret;
4887 		}
4888 
4889 		mp->b_rptr += VNET_IPALIGN;
4890 
4891 		if (ack_needed) {
4892 			ack_needed = B_FALSE;
4893 			/*
4894 			 * sender needs ack for this packet,
4895 			 * ack pkts upto this index.
4896 			 */
4897 			ack_end = rxi;
4898 
4899 			rv = vgen_send_dring_ack(ldcp, tagp,
4900 			    ack_start, ack_end, VIO_DP_ACTIVE);
4901 			if (rv != VGEN_SUCCESS) {
4902 				goto error_ret;
4903 			}
4904 
4905 			/* need to set new ack start index */
4906 			set_ack_start = B_TRUE;
4907 		}
4908 
4909 		if (nread != nbytes) {
4910 			DWARN(vgenp, ldcp,
4911 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
4912 			    nread, nbytes);
4913 			statsp->ierrors++;
4914 			freemsg(mp);
4915 			goto vgen_next_rxi;
4916 		}
4917 
4918 		/* point to the actual end of data */
4919 		mp->b_wptr = mp->b_rptr + datalen;
4920 
4921 		/* update stats */
4922 		statsp->ipackets++;
4923 		statsp->rbytes += datalen;
4924 		ehp = (struct ether_header *)mp->b_rptr;
4925 		if (IS_BROADCAST(ehp))
4926 			statsp->brdcstrcv++;
4927 		else if (IS_MULTICAST(ehp))
4928 			statsp->multircv++;
4929 
4930 		/* build a chain of received packets */
4931 		if (bp == NULL) {
4932 			/* first pkt */
4933 			bp = mp;
4934 			bpt = bp;
4935 			bpt->b_next = NULL;
4936 		} else {
4937 			mp->b_next = NULL;
4938 			bpt->b_next = mp;
4939 			bpt = mp;
4940 		}
4941 
4942 		if (count++ > vgen_chain_len) {
4943 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4944 			vgen_rx(ldcp, bp);
4945 			count = 0;
4946 			bp = bpt = NULL;
4947 		}
4948 
4949 vgen_next_rxi:
4950 		/* update end index of range of descrs to be ack'd */
4951 		ack_end = rxi;
4952 
4953 		/* update the next index to be processed */
4954 		INCR_RXI(next_rxi, ldcp);
4955 		if (next_rxi == start) {
4956 			/*
4957 			 * processed the entire descriptor ring upto
4958 			 * the index at which we started.
4959 			 */
4960 			break;
4961 		}
4962 
4963 		rxi = next_rxi;
4964 
4965 	_NOTE(CONSTCOND)
4966 	} while (1);
4967 
4968 	/*
4969 	 * send an ack message to peer indicating that we have stopped
4970 	 * processing descriptors.
4971 	 */
4972 	if (set_ack_start) {
4973 		/*
4974 		 * We have ack'd upto some index and we have not
4975 		 * processed any descriptors beyond that index.
4976 		 * Use the last ack'd index as both the start and
4977 		 * end of range of descrs being ack'd.
4978 		 * Note: This results in acking the last index twice
4979 		 * and should be harmless.
4980 		 */
4981 		ack_start = ack_end;
4982 	}
4983 
4984 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
4985 	    VIO_DP_STOPPED);
4986 	if (rv != VGEN_SUCCESS) {
4987 		goto error_ret;
4988 	}
4989 
4990 	/* save new recv index of next dring msg */
4991 	ldcp->next_rxi = next_rxi;
4992 
4993 error_ret:
4994 	/* send up packets received so far */
4995 	if (bp != NULL) {
4996 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
4997 		vgen_rx(ldcp, bp);
4998 		bp = bpt = NULL;
4999 	}
5000 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5001 	return (rv);
5002 
5003 }
5004 
5005 static int
5006 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5007 {
5008 	int rv = 0;
5009 	uint32_t start;
5010 	int32_t end;
5011 	uint32_t txi;
5012 	boolean_t ready_txd = B_FALSE;
5013 	vgen_stats_t *statsp;
5014 	vgen_private_desc_t *tbufp;
5015 	vnet_public_desc_t *txdp;
5016 	vio_dring_entry_hdr_t *hdrp;
5017 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5018 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5019 
5020 	DBG1(vgenp, ldcp, "enter\n");
5021 	start = dringmsg->start_idx;
5022 	end = dringmsg->end_idx;
5023 	statsp = &ldcp->stats;
5024 
5025 	/*
5026 	 * received an ack corresponding to a specific descriptor for
5027 	 * which we had set the ACK bit in the descriptor (during
5028 	 * transmit). This enables us to reclaim descriptors.
5029 	 */
5030 
5031 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
5032 
5033 	/* validate start and end indeces in the tx ack msg */
5034 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
5035 		/* drop the message if invalid index */
5036 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
5037 		    start, end);
5038 		return (rv);
5039 	}
5040 	/* validate dring_ident */
5041 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
5042 		/* invalid dring_ident, drop the msg */
5043 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5044 		    dringmsg->dring_ident);
5045 		return (rv);
5046 	}
5047 	statsp->dring_data_acks++;
5048 
5049 	/* reclaim descriptors that are done */
5050 	vgen_reclaim(ldcp);
5051 
5052 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
5053 		/*
5054 		 * receiver continued processing descriptors after
5055 		 * sending us the ack.
5056 		 */
5057 		return (rv);
5058 	}
5059 
5060 	statsp->dring_stopped_acks++;
5061 
5062 	/* receiver stopped processing descriptors */
5063 	mutex_enter(&ldcp->wrlock);
5064 	mutex_enter(&ldcp->tclock);
5065 
5066 	/*
5067 	 * determine if there are any pending tx descriptors
5068 	 * ready to be processed by the receiver(peer) and if so,
5069 	 * send a message to the peer to restart receiving.
5070 	 */
5071 	ready_txd = B_FALSE;
5072 
5073 	/*
5074 	 * using the end index of the descriptor range for which
5075 	 * we received the ack, check if the next descriptor is
5076 	 * ready.
5077 	 */
5078 	txi = end;
5079 	INCR_TXI(txi, ldcp);
5080 	tbufp = &ldcp->tbufp[txi];
5081 	txdp = tbufp->descp;
5082 	hdrp = &txdp->hdr;
5083 	if (hdrp->dstate == VIO_DESC_READY) {
5084 		ready_txd = B_TRUE;
5085 	} else {
5086 		/*
5087 		 * descr next to the end of ack'd descr range is not
5088 		 * ready.
5089 		 * starting from the current reclaim index, check
5090 		 * if any descriptor is ready.
5091 		 */
5092 
5093 		txi = ldcp->cur_tbufp - ldcp->tbufp;
5094 		tbufp = &ldcp->tbufp[txi];
5095 
5096 		txdp = tbufp->descp;
5097 		hdrp = &txdp->hdr;
5098 		if (hdrp->dstate == VIO_DESC_READY) {
5099 			ready_txd = B_TRUE;
5100 		}
5101 
5102 	}
5103 
5104 	if (ready_txd) {
5105 		/*
5106 		 * we have tx descriptor(s) ready to be
5107 		 * processed by the receiver.
5108 		 * send a message to the peer with the start index
5109 		 * of ready descriptors.
5110 		 */
5111 		rv = vgen_send_dring_data(ldcp, txi, -1);
5112 		if (rv != VGEN_SUCCESS) {
5113 			ldcp->resched_peer = B_TRUE;
5114 			ldcp->resched_peer_txi = txi;
5115 			mutex_exit(&ldcp->tclock);
5116 			mutex_exit(&ldcp->wrlock);
5117 			return (rv);
5118 		}
5119 	} else {
5120 		/*
5121 		 * no ready tx descriptors. set the flag to send a
5122 		 * message to peer when tx descriptors are ready in
5123 		 * transmit routine.
5124 		 */
5125 		ldcp->resched_peer = B_TRUE;
5126 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
5127 	}
5128 
5129 	mutex_exit(&ldcp->tclock);
5130 	mutex_exit(&ldcp->wrlock);
5131 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5132 	return (rv);
5133 }
5134 
5135 static int
5136 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5137 {
5138 	int rv = 0;
5139 	uint32_t start;
5140 	int32_t end;
5141 	uint32_t txi;
5142 	vnet_public_desc_t *txdp;
5143 	vio_dring_entry_hdr_t *hdrp;
5144 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5145 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5146 
5147 	DBG1(vgenp, ldcp, "enter\n");
5148 	start = dringmsg->start_idx;
5149 	end = dringmsg->end_idx;
5150 
5151 	/*
5152 	 * peer sent a NACK msg to indicate lost packets.
5153 	 * The start and end correspond to the range of descriptors
5154 	 * for which the peer didn't receive a dring data msg and so
5155 	 * didn't receive the corresponding data.
5156 	 */
5157 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
5158 
5159 	/* validate start and end indeces in the tx nack msg */
5160 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
5161 		/* drop the message if invalid index */
5162 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
5163 		    start, end);
5164 		return (rv);
5165 	}
5166 	/* validate dring_ident */
5167 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
5168 		/* invalid dring_ident, drop the msg */
5169 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5170 		    dringmsg->dring_ident);
5171 		return (rv);
5172 	}
5173 	mutex_enter(&ldcp->txlock);
5174 	mutex_enter(&ldcp->tclock);
5175 
5176 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
5177 		/* no busy descriptors, bogus nack ? */
5178 		mutex_exit(&ldcp->tclock);
5179 		mutex_exit(&ldcp->txlock);
5180 		return (rv);
5181 	}
5182 
5183 	/* we just mark the descrs as done so they can be reclaimed */
5184 	for (txi = start; txi <= end; ) {
5185 		txdp = &(ldcp->txdp[txi]);
5186 		hdrp = &txdp->hdr;
5187 		if (hdrp->dstate == VIO_DESC_READY)
5188 			hdrp->dstate = VIO_DESC_DONE;
5189 		INCR_TXI(txi, ldcp);
5190 	}
5191 	mutex_exit(&ldcp->tclock);
5192 	mutex_exit(&ldcp->txlock);
5193 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5194 	return (rv);
5195 }
5196 
5197 static void
5198 vgen_reclaim(vgen_ldc_t *ldcp)
5199 {
5200 	mutex_enter(&ldcp->tclock);
5201 
5202 	vgen_reclaim_dring(ldcp);
5203 	ldcp->reclaim_lbolt = ddi_get_lbolt();
5204 
5205 	mutex_exit(&ldcp->tclock);
5206 }
5207 
5208 /*
5209  * transmit reclaim function. starting from the current reclaim index
5210  * look for descriptors marked DONE and reclaim the descriptor and the
5211  * corresponding buffers (tbuf).
5212  */
5213 static void
5214 vgen_reclaim_dring(vgen_ldc_t *ldcp)
5215 {
5216 	int count = 0;
5217 	vnet_public_desc_t *txdp;
5218 	vgen_private_desc_t *tbufp;
5219 	vio_dring_entry_hdr_t	*hdrp;
5220 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5221 
5222 #ifdef DEBUG
5223 	if (vgen_trigger_txtimeout)
5224 		return;
5225 #endif
5226 
5227 	tbufp = ldcp->cur_tbufp;
5228 	txdp = tbufp->descp;
5229 	hdrp = &txdp->hdr;
5230 
5231 	while ((hdrp->dstate == VIO_DESC_DONE) &&
5232 	    (tbufp != ldcp->next_tbufp)) {
5233 		tbufp->flags = VGEN_PRIV_DESC_FREE;
5234 		hdrp->dstate = VIO_DESC_FREE;
5235 		hdrp->ack = B_FALSE;
5236 
5237 		tbufp = NEXTTBUF(ldcp, tbufp);
5238 		txdp = tbufp->descp;
5239 		hdrp = &txdp->hdr;
5240 		count++;
5241 	}
5242 
5243 	ldcp->cur_tbufp = tbufp;
5244 
5245 	/*
5246 	 * Check if mac layer should be notified to restart transmissions
5247 	 */
5248 	if ((ldcp->need_resched) && (count > 0)) {
5249 		ldcp->need_resched = B_FALSE;
5250 		vnet_tx_update(vgenp->vnetp);
5251 	}
5252 }
5253 
5254 /* return the number of pending transmits for the channel */
5255 static int
5256 vgen_num_txpending(vgen_ldc_t *ldcp)
5257 {
5258 	int n;
5259 
5260 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
5261 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
5262 	} else  {
5263 		/* cur_tbufp > next_tbufp */
5264 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
5265 	}
5266 
5267 	return (n);
5268 }
5269 
5270 /* determine if the transmit descriptor ring is full */
5271 static int
5272 vgen_tx_dring_full(vgen_ldc_t *ldcp)
5273 {
5274 	vgen_private_desc_t	*tbufp;
5275 	vgen_private_desc_t	*ntbufp;
5276 
5277 	tbufp = ldcp->next_tbufp;
5278 	ntbufp = NEXTTBUF(ldcp, tbufp);
5279 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
5280 		return (VGEN_SUCCESS);
5281 	}
5282 	return (VGEN_FAILURE);
5283 }
5284 
5285 /* determine if timeout condition has occured */
5286 static int
5287 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
5288 {
5289 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
5290 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
5291 	    (vnet_ldcwd_txtimeout) &&
5292 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
5293 		return (VGEN_SUCCESS);
5294 	} else {
5295 		return (VGEN_FAILURE);
5296 	}
5297 }
5298 
5299 /* transmit watchdog timeout handler */
5300 static void
5301 vgen_ldc_watchdog(void *arg)
5302 {
5303 	vgen_ldc_t *ldcp;
5304 	vgen_t *vgenp;
5305 	int rv;
5306 
5307 	ldcp = (vgen_ldc_t *)arg;
5308 	vgenp = LDC_TO_VGEN(ldcp);
5309 
5310 	rv = vgen_ldc_txtimeout(ldcp);
5311 	if (rv == VGEN_SUCCESS) {
5312 		DWARN(vgenp, ldcp, "transmit timeout\n");
5313 #ifdef DEBUG
5314 		if (vgen_trigger_txtimeout) {
5315 			/* tx timeout triggered for debugging */
5316 			vgen_trigger_txtimeout = 0;
5317 		}
5318 #endif
5319 		mutex_enter(&ldcp->cblock);
5320 		ldcp->need_ldc_reset = B_TRUE;
5321 		vgen_handshake_retry(ldcp);
5322 		mutex_exit(&ldcp->cblock);
5323 		if (ldcp->need_resched) {
5324 			ldcp->need_resched = B_FALSE;
5325 			vnet_tx_update(vgenp->vnetp);
5326 		}
5327 	}
5328 
5329 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
5330 	    drv_usectohz(vnet_ldcwd_interval * 1000));
5331 }
5332 
5333 /* handler for error messages received from the peer ldc end-point */
5334 static void
5335 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5336 {
5337 	_NOTE(ARGUNUSED(ldcp, tagp))
5338 }
5339 
5340 static int
5341 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5342 {
5343 	vio_raw_data_msg_t	*rmsg;
5344 	vio_dring_msg_t		*dmsg;
5345 	uint64_t		seq_num;
5346 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5347 
5348 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
5349 		dmsg = (vio_dring_msg_t *)tagp;
5350 		seq_num = dmsg->seq_num;
5351 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
5352 		rmsg = (vio_raw_data_msg_t *)tagp;
5353 		seq_num = rmsg->seq_num;
5354 	} else {
5355 		return (EINVAL);
5356 	}
5357 
5358 	if (seq_num != ldcp->next_rxseq) {
5359 
5360 		/* seqnums don't match */
5361 		DWARN(vgenp, ldcp,
5362 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
5363 		    ldcp->next_rxseq, seq_num);
5364 
5365 		ldcp->need_ldc_reset = B_TRUE;
5366 		return (EINVAL);
5367 
5368 	}
5369 
5370 	ldcp->next_rxseq++;
5371 
5372 	return (0);
5373 }
5374 
5375 /* Check if the session id in the received message is valid */
5376 static int
5377 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5378 {
5379 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5380 
5381 	if (tagp->vio_sid != ldcp->peer_sid) {
5382 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5383 		    ldcp->peer_sid, tagp->vio_sid);
5384 		return (VGEN_FAILURE);
5385 	}
5386 	else
5387 		return (VGEN_SUCCESS);
5388 }
5389 
5390 static caddr_t
5391 vgen_print_ethaddr(uint8_t *a, char *ebuf)
5392 {
5393 	(void) sprintf(ebuf,
5394 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
5395 	return (ebuf);
5396 }
5397 
5398 /* Handshake watchdog timeout handler */
5399 static void
5400 vgen_hwatchdog(void *arg)
5401 {
5402 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5403 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5404 
5405 	DWARN(vgenp, ldcp,
5406 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
5407 	    ldcp->hphase, ldcp->hstate);
5408 
5409 	mutex_enter(&ldcp->cblock);
5410 	if (ldcp->cancel_htid) {
5411 		ldcp->cancel_htid = 0;
5412 		mutex_exit(&ldcp->cblock);
5413 		return;
5414 	}
5415 	ldcp->htid = 0;
5416 	ldcp->need_ldc_reset = B_TRUE;
5417 	vgen_handshake_retry(ldcp);
5418 	mutex_exit(&ldcp->cblock);
5419 }
5420 
5421 static void
5422 vgen_print_hparams(vgen_hparams_t *hp)
5423 {
5424 	uint8_t	addr[6];
5425 	char	ea[6];
5426 	ldc_mem_cookie_t *dc;
5427 
5428 	cmn_err(CE_CONT, "version_info:\n");
5429 	cmn_err(CE_CONT,
5430 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
5431 	    hp->ver_major, hp->ver_minor, hp->dev_class);
5432 
5433 	vnet_macaddr_ultostr(hp->addr, addr);
5434 	cmn_err(CE_CONT, "attr_info:\n");
5435 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
5436 	    vgen_print_ethaddr(addr, ea));
5437 	cmn_err(CE_CONT,
5438 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
5439 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
5440 
5441 	dc = &hp->dring_cookie;
5442 	cmn_err(CE_CONT, "dring_info:\n");
5443 	cmn_err(CE_CONT,
5444 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
5445 	cmn_err(CE_CONT,
5446 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
5447 	    dc->addr, dc->size);
5448 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
5449 }
5450 
5451 static void
5452 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
5453 {
5454 	vgen_hparams_t *hp;
5455 
5456 	cmn_err(CE_CONT, "Channel Information:\n");
5457 	cmn_err(CE_CONT,
5458 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
5459 	    ldcp->ldc_id, ldcp->ldc_status);
5460 	cmn_err(CE_CONT,
5461 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
5462 	    ldcp->local_sid, ldcp->peer_sid);
5463 	cmn_err(CE_CONT,
5464 	    "\thphase: 0x%x, hstate: 0x%x\n",
5465 	    ldcp->hphase, ldcp->hstate);
5466 
5467 	cmn_err(CE_CONT, "Local handshake params:\n");
5468 	hp = &ldcp->local_hparams;
5469 	vgen_print_hparams(hp);
5470 
5471 	cmn_err(CE_CONT, "Peer handshake params:\n");
5472 	hp = &ldcp->peer_hparams;
5473 	vgen_print_hparams(hp);
5474 }
5475 
5476 /*
5477  * Send received packets up the stack.
5478  */
5479 static void
5480 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
5481 {
5482 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5483 
5484 	if (ldcp->rcv_thread != NULL) {
5485 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
5486 		mutex_exit(&ldcp->rxlock);
5487 	} else {
5488 		ASSERT(MUTEX_HELD(&ldcp->cblock));
5489 		mutex_exit(&ldcp->cblock);
5490 	}
5491 
5492 	vnet_rx(vgenp->vnetp, NULL, bp);
5493 
5494 	if (ldcp->rcv_thread != NULL) {
5495 		mutex_enter(&ldcp->rxlock);
5496 	} else {
5497 		mutex_enter(&ldcp->cblock);
5498 	}
5499 }
5500 
5501 /*
5502  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
5503  * This thread is woken up by the LDC interrupt handler to process
5504  * LDC packets and receive data.
5505  */
5506 static void
5507 vgen_ldc_rcv_worker(void *arg)
5508 {
5509 	callb_cpr_t	cprinfo;
5510 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5511 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5512 
5513 	DBG1(vgenp, ldcp, "enter\n");
5514 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
5515 	    "vnet_rcv_thread");
5516 	mutex_enter(&ldcp->rcv_thr_lock);
5517 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
5518 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
5519 
5520 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
5521 		/*
5522 		 * Wait until the data is received or a stop
5523 		 * request is received.
5524 		 */
5525 		while (!(ldcp->rcv_thr_flags &
5526 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
5527 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5528 		}
5529 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
5530 
5531 		/*
5532 		 * First process the stop request.
5533 		 */
5534 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
5535 			DBG2(vgenp, ldcp, "stopped\n");
5536 			break;
5537 		}
5538 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
5539 		mutex_exit(&ldcp->rcv_thr_lock);
5540 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
5541 		vgen_handle_evt_read(ldcp);
5542 		mutex_enter(&ldcp->rcv_thr_lock);
5543 	}
5544 
5545 	/*
5546 	 * Update the run status and wakeup the thread that
5547 	 * has sent the stop request.
5548 	 */
5549 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
5550 	cv_signal(&ldcp->rcv_thr_cv);
5551 	CALLB_CPR_EXIT(&cprinfo);
5552 	thread_exit();
5553 	DBG1(vgenp, ldcp, "exit\n");
5554 }
5555 
5556 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
5557 static void
5558 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
5559 {
5560 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5561 
5562 	DBG1(vgenp, ldcp, "enter\n");
5563 	/*
5564 	 * Send a stop request by setting the stop flag and
5565 	 * wait until the receive thread stops.
5566 	 */
5567 	mutex_enter(&ldcp->rcv_thr_lock);
5568 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5569 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
5570 		cv_signal(&ldcp->rcv_thr_cv);
5571 		DBG2(vgenp, ldcp, "waiting...");
5572 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
5573 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
5574 		}
5575 	}
5576 	mutex_exit(&ldcp->rcv_thr_lock);
5577 	ldcp->rcv_thread = NULL;
5578 	DBG1(vgenp, ldcp, "exit\n");
5579 }
5580 
5581 #if DEBUG
5582 
5583 /*
5584  * Print debug messages - set to 0xf to enable all msgs
5585  */
5586 static void
5587 debug_printf(const char *fname, vgen_t *vgenp,
5588     vgen_ldc_t *ldcp, const char *fmt, ...)
5589 {
5590 	char    buf[256];
5591 	char    *bufp = buf;
5592 	va_list ap;
5593 
5594 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5595 		(void) sprintf(bufp, "vnet%d:",
5596 		    ((vnet_t *)(vgenp->vnetp))->instance);
5597 		bufp += strlen(bufp);
5598 	}
5599 	if (ldcp != NULL) {
5600 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5601 		bufp += strlen(bufp);
5602 	}
5603 	(void) sprintf(bufp, "%s: ", fname);
5604 	bufp += strlen(bufp);
5605 
5606 	va_start(ap, fmt);
5607 	(void) vsprintf(bufp, fmt, ap);
5608 	va_end(ap);
5609 
5610 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5611 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5612 		cmn_err(CE_CONT, "%s\n", buf);
5613 	}
5614 }
5615 #endif
5616