xref: /titanic_44/usr/src/uts/sun4v/io/vnet_gen.c (revision e4b86885570d77af552e9cf94f142f4d744fb8c8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 int vgen_uninit(void *arg);
77 int vgen_dds_tx(void *arg, void *dmsg);
78 static int vgen_start(void *arg);
79 static void vgen_stop(void *arg);
80 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
81 static int vgen_multicst(void *arg, boolean_t add,
82 	const uint8_t *mca);
83 static int vgen_promisc(void *arg, boolean_t on);
84 static int vgen_unicst(void *arg, const uint8_t *mca);
85 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
86 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
87 
88 /* vgen internal functions */
89 static int vgen_read_mdprops(vgen_t *vgenp);
90 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
91 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
92 	mde_cookie_t node);
93 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
94 	uint32_t *mtu);
95 static void vgen_detach_ports(vgen_t *vgenp);
96 static void vgen_port_detach(vgen_port_t *portp);
97 static void vgen_port_list_insert(vgen_port_t *portp);
98 static void vgen_port_list_remove(vgen_port_t *portp);
99 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
100 	int port_num);
101 static int vgen_mdeg_reg(vgen_t *vgenp);
102 static void vgen_mdeg_unreg(vgen_t *vgenp);
103 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
104 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
105 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
106 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
107 	mde_cookie_t mdex);
108 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
109 static int vgen_port_attach(vgen_port_t *portp);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static void vgen_port_detach_mdeg(vgen_port_t *portp);
112 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
113 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
114 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
115 
116 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
117 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
118 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
120 static void vgen_init_ports(vgen_t *vgenp);
121 static void vgen_port_init(vgen_port_t *portp);
122 static void vgen_uninit_ports(vgen_t *vgenp);
123 static void vgen_port_uninit(vgen_port_t *portp);
124 static void vgen_init_ldcs(vgen_port_t *portp);
125 static void vgen_uninit_ldcs(vgen_port_t *portp);
126 static int vgen_ldc_init(vgen_ldc_t *ldcp);
127 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
128 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
131 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
132 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
133 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
134 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
135 static int vgen_ldcsend(void *arg, mblk_t *mp);
136 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
137 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
138 static void vgen_reclaim(vgen_ldc_t *ldcp);
139 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
140 static int vgen_num_txpending(vgen_ldc_t *ldcp);
141 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
142 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
143 static void vgen_ldc_watchdog(void *arg);
144 
145 /* vgen handshake functions */
146 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
147 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
148 	boolean_t caller_holds_lock);
149 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
150 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
151 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
152 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
153 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
154 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
155 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
156 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
157 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
158 static void vgen_handshake(vgen_ldc_t *ldcp);
159 static int vgen_handshake_done(vgen_ldc_t *ldcp);
160 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
161 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
162 	vio_msg_tag_t *tagp);
163 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
166 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
169 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
170 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
176 	uint32_t start, int32_t end, uint8_t pstate);
177 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
178 	uint32_t msglen);
179 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
181 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
182 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
184 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
185 static void vgen_hwatchdog(void *arg);
186 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
187 static void vgen_print_hparams(vgen_hparams_t *hp);
188 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
189 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
190 static void vgen_ldc_rcv_worker(void *arg);
191 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
192 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
193 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
194 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
195 
196 /* VLAN routines */
197 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
198 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
199 	uint16_t *nvidsp, uint16_t *default_idp);
200 static void vgen_vlan_create_hash(vgen_port_t *portp);
201 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
202 static void vgen_vlan_add_ids(vgen_port_t *portp);
203 static void vgen_vlan_remove_ids(vgen_port_t *portp);
204 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
205 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
206 	uint16_t *vidp);
207 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
208 	boolean_t is_tagged, uint16_t vid);
209 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
210 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
211 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
212 
213 /* externs */
214 extern void vnet_dds_rx(void *arg, void *dmsg);
215 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
216 
217 /*
218  * The handshake process consists of 5 phases defined below, with VH_PHASE0
219  * being the pre-handshake phase and VH_DONE is the phase to indicate
220  * successful completion of all phases.
221  * Each phase may have one to several handshake states which are required
222  * to complete successfully to move to the next phase.
223  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
224  * more details.
225  */
226 /* handshake phases */
227 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
228 
229 /* handshake states */
230 enum {
231 
232 	VER_INFO_SENT	=	0x1,
233 	VER_ACK_RCVD	=	0x2,
234 	VER_INFO_RCVD	=	0x4,
235 	VER_ACK_SENT	=	0x8,
236 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
237 
238 	ATTR_INFO_SENT	=	0x10,
239 	ATTR_ACK_RCVD	=	0x20,
240 	ATTR_INFO_RCVD	=	0x40,
241 	ATTR_ACK_SENT	=	0x80,
242 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
243 
244 	DRING_INFO_SENT	=	0x100,
245 	DRING_ACK_RCVD	=	0x200,
246 	DRING_INFO_RCVD	=	0x400,
247 	DRING_ACK_SENT	=	0x800,
248 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
249 
250 	RDX_INFO_SENT	=	0x1000,
251 	RDX_ACK_RCVD	=	0x2000,
252 	RDX_INFO_RCVD	=	0x4000,
253 	RDX_ACK_SENT	=	0x8000,
254 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
255 
256 };
257 
258 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
259 
260 #define	LDC_LOCK(ldcp)	\
261 				mutex_enter(&((ldcp)->cblock));\
262 				mutex_enter(&((ldcp)->rxlock));\
263 				mutex_enter(&((ldcp)->wrlock));\
264 				mutex_enter(&((ldcp)->txlock));\
265 				mutex_enter(&((ldcp)->tclock));
266 #define	LDC_UNLOCK(ldcp)	\
267 				mutex_exit(&((ldcp)->tclock));\
268 				mutex_exit(&((ldcp)->txlock));\
269 				mutex_exit(&((ldcp)->wrlock));\
270 				mutex_exit(&((ldcp)->rxlock));\
271 				mutex_exit(&((ldcp)->cblock));
272 
273 #define	VGEN_VER_EQ(ldcp, major, minor)	\
274 	((ldcp)->local_hparams.ver_major == (major) &&	\
275 	    (ldcp)->local_hparams.ver_minor == (minor))
276 
277 #define	VGEN_VER_LT(ldcp, major, minor)	\
278 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
279 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
280 	    (ldcp)->local_hparams.ver_minor < (minor)))
281 
282 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
283 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
284 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
285 	    (ldcp)->local_hparams.ver_minor >= (minor)))
286 
287 static struct ether_addr etherbroadcastaddr = {
288 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
289 };
290 /*
291  * MIB II broadcast/multicast packets
292  */
293 #define	IS_BROADCAST(ehp) \
294 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
295 #define	IS_MULTICAST(ehp) \
296 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
297 
298 /*
299  * Property names
300  */
301 static char macaddr_propname[] = "mac-address";
302 static char rmacaddr_propname[] = "remote-mac-address";
303 static char channel_propname[] = "channel-endpoint";
304 static char reg_propname[] = "reg";
305 static char port_propname[] = "port";
306 static char swport_propname[] = "switch-port";
307 static char id_propname[] = "id";
308 static char vdev_propname[] = "virtual-device";
309 static char vnet_propname[] = "network";
310 static char pri_types_propname[] = "priority-ether-types";
311 static char vgen_pvid_propname[] = "port-vlan-id";
312 static char vgen_vid_propname[] = "vlan-id";
313 static char vgen_dvid_propname[] = "default-vlan-id";
314 static char port_pvid_propname[] = "remote-port-vlan-id";
315 static char port_vid_propname[] = "remote-vlan-id";
316 static char vgen_mtu_propname[] = "mtu";
317 
318 /* versions supported - in decreasing order */
319 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 4} };
320 
321 /* Tunables */
322 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
323 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
324 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
325 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
326 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
327 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
328 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
329 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
330 
331 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
332 
333 /*
334  * max # of packets accumulated prior to sending them up. It is best
335  * to keep this at 60% of the number of recieve buffers.
336  */
337 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
338 
339 /*
340  * Internal tunables for receive buffer pools, that is,  the size and number of
341  * mblks for each pool. At least 3 sizes must be specified if these are used.
342  * The sizes must be specified in increasing order. Non-zero value of the first
343  * size will be used as a hint to use these values instead of the algorithm
344  * that determines the sizes based on MTU.
345  */
346 uint32_t vgen_rbufsz1 = 0;
347 uint32_t vgen_rbufsz2 = 0;
348 uint32_t vgen_rbufsz3 = 0;
349 uint32_t vgen_rbufsz4 = 0;
350 
351 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
352 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
353 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
354 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
355 
356 /*
357  * In the absence of "priority-ether-types" property in MD, the following
358  * internal tunable can be set to specify a single priority ethertype.
359  */
360 uint64_t vgen_pri_eth_type = 0;
361 
362 /*
363  * Number of transmit priority buffers that are preallocated per device.
364  * This number is chosen to be a small value to throttle transmission
365  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
366  */
367 uint32_t vgen_pri_tx_nmblks = 64;
368 
369 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
370 
371 #ifdef DEBUG
372 /* flags to simulate error conditions for debugging */
373 int vgen_trigger_txtimeout = 0;
374 int vgen_trigger_rxlost = 0;
375 #endif
376 
377 /*
378  * Matching criteria passed to the MDEG to register interest
379  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
380  * by their 'name' and 'cfg-handle' properties.
381  */
382 static md_prop_match_t vdev_prop_match[] = {
383 	{ MDET_PROP_STR,    "name"   },
384 	{ MDET_PROP_VAL,    "cfg-handle" },
385 	{ MDET_LIST_END,    NULL    }
386 };
387 
388 static mdeg_node_match_t vdev_match = { "virtual-device",
389 						vdev_prop_match };
390 
391 /* MD update matching structure */
392 static md_prop_match_t	vport_prop_match[] = {
393 	{ MDET_PROP_VAL,	"id" },
394 	{ MDET_LIST_END,	NULL }
395 };
396 
397 static mdeg_node_match_t vport_match = { "virtual-device-port",
398 					vport_prop_match };
399 
400 /* template for matching a particular vnet instance */
401 static mdeg_prop_spec_t vgen_prop_template[] = {
402 	{ MDET_PROP_STR,	"name",		"network" },
403 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
404 	{ MDET_LIST_END,	NULL,		NULL }
405 };
406 
407 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
408 
409 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
410 
411 static mac_callbacks_t vgen_m_callbacks = {
412 	0,
413 	vgen_stat,
414 	vgen_start,
415 	vgen_stop,
416 	vgen_promisc,
417 	vgen_multicst,
418 	vgen_unicst,
419 	vgen_tx,
420 	NULL,
421 	NULL,
422 	NULL
423 };
424 
425 /* externs */
426 extern pri_t	maxclsyspri;
427 extern proc_t	p0;
428 extern uint32_t vnet_ntxds;
429 extern uint32_t vnet_ldcwd_interval;
430 extern uint32_t vnet_ldcwd_txtimeout;
431 extern uint32_t vnet_ldc_mtu;
432 extern uint32_t vnet_nrbufs;
433 extern uint32_t	vnet_ethermtu;
434 extern uint16_t	vnet_default_vlan_id;
435 extern boolean_t vnet_jumbo_rxpools;
436 
437 #ifdef DEBUG
438 
439 extern int vnet_dbglevel;
440 static void debug_printf(const char *fname, vgen_t *vgenp,
441 	vgen_ldc_t *ldcp, const char *fmt, ...);
442 
443 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
444 int vgendbg_ldcid = -1;
445 
446 /* simulate handshake error conditions for debug */
447 uint32_t vgen_hdbg;
448 #define	HDBG_VERSION	0x1
449 #define	HDBG_TIMEOUT	0x2
450 #define	HDBG_BAD_SID	0x4
451 #define	HDBG_OUT_STATE	0x8
452 
453 #endif
454 
455 /*
456  * vgen_init() is called by an instance of vnet driver to initialize the
457  * corresponding generic proxy transport layer. The arguments passed by vnet
458  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
459  * the mac address of the vnet device, and a pointer to vgen_t is passed
460  * back as a handle to vnet.
461  */
462 int
463 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
464     const uint8_t *macaddr, void **vgenhdl)
465 {
466 	vgen_t *vgenp;
467 	int instance;
468 	int rv;
469 
470 	if ((vnetp == NULL) || (vnetdip == NULL))
471 		return (DDI_FAILURE);
472 
473 	instance = ddi_get_instance(vnetdip);
474 
475 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
476 
477 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
478 
479 	vgenp->vnetp = vnetp;
480 	vgenp->instance = instance;
481 	vgenp->regprop = regprop;
482 	vgenp->vnetdip = vnetdip;
483 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
484 
485 	/* allocate multicast table */
486 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
487 	    sizeof (struct ether_addr), KM_SLEEP);
488 	vgenp->mccount = 0;
489 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
490 
491 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
492 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
493 
494 	rv = vgen_read_mdprops(vgenp);
495 	if (rv != 0) {
496 		goto vgen_init_fail;
497 	}
498 
499 	/* register with MD event generator */
500 	rv = vgen_mdeg_reg(vgenp);
501 	if (rv != DDI_SUCCESS) {
502 		goto vgen_init_fail;
503 	}
504 
505 	*vgenhdl = (void *)vgenp;
506 
507 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
508 	return (DDI_SUCCESS);
509 
510 vgen_init_fail:
511 	rw_destroy(&vgenp->vgenports.rwlock);
512 	mutex_destroy(&vgenp->lock);
513 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
514 	    sizeof (struct ether_addr));
515 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
516 		kmem_free(vgenp->pri_types,
517 		    sizeof (uint16_t) * vgenp->pri_num_types);
518 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
519 	}
520 	KMEM_FREE(vgenp);
521 	return (DDI_FAILURE);
522 }
523 
524 /*
525  * Called by vnet to undo the initializations done by vgen_init().
526  * The handle provided by generic transport during vgen_init() is the argument.
527  */
528 int
529 vgen_uninit(void *arg)
530 {
531 	vgen_t		*vgenp = (vgen_t *)arg;
532 	vio_mblk_pool_t	*rp;
533 	vio_mblk_pool_t	*nrp;
534 
535 	if (vgenp == NULL) {
536 		return (DDI_FAILURE);
537 	}
538 
539 	DBG1(vgenp, NULL, "enter\n");
540 
541 	/* unregister with MD event generator */
542 	vgen_mdeg_unreg(vgenp);
543 
544 	mutex_enter(&vgenp->lock);
545 
546 	/* detach all ports from the device */
547 	vgen_detach_ports(vgenp);
548 
549 	/*
550 	 * free any pending rx mblk pools,
551 	 * that couldn't be freed previously during channel detach.
552 	 */
553 	rp = vgenp->rmp;
554 	while (rp != NULL) {
555 		nrp = vgenp->rmp = rp->nextp;
556 		if (vio_destroy_mblks(rp)) {
557 			vgenp->rmp = rp;
558 			mutex_exit(&vgenp->lock);
559 			return (DDI_FAILURE);
560 		}
561 		rp = nrp;
562 	}
563 
564 	/* free multicast table */
565 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
566 
567 	/* free pri_types table */
568 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
569 		kmem_free(vgenp->pri_types,
570 		    sizeof (uint16_t) * vgenp->pri_num_types);
571 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
572 	}
573 
574 	mutex_exit(&vgenp->lock);
575 
576 	rw_destroy(&vgenp->vgenports.rwlock);
577 	mutex_destroy(&vgenp->lock);
578 
579 	KMEM_FREE(vgenp);
580 
581 	DBG1(vgenp, NULL, "exit\n");
582 
583 	return (DDI_SUCCESS);
584 }
585 
586 /* enable transmit/receive for the device */
587 int
588 vgen_start(void *arg)
589 {
590 	vgen_port_t	*portp = (vgen_port_t *)arg;
591 	vgen_t		*vgenp = portp->vgenp;
592 
593 	DBG1(vgenp, NULL, "enter\n");
594 	mutex_enter(&portp->lock);
595 	vgen_port_init(portp);
596 	portp->flags |= VGEN_STARTED;
597 	mutex_exit(&portp->lock);
598 	DBG1(vgenp, NULL, "exit\n");
599 
600 	return (DDI_SUCCESS);
601 }
602 
603 /* stop transmit/receive */
604 void
605 vgen_stop(void *arg)
606 {
607 	vgen_port_t	*portp = (vgen_port_t *)arg;
608 	vgen_t		*vgenp = portp->vgenp;
609 
610 	DBG1(vgenp, NULL, "enter\n");
611 
612 	mutex_enter(&portp->lock);
613 	vgen_port_uninit(portp);
614 	portp->flags &= ~(VGEN_STARTED);
615 	mutex_exit(&portp->lock);
616 	DBG1(vgenp, NULL, "exit\n");
617 
618 }
619 
620 /* vgen transmit function */
621 static mblk_t *
622 vgen_tx(void *arg, mblk_t *mp)
623 {
624 	int i;
625 	vgen_port_t *portp;
626 	int status = VGEN_FAILURE;
627 
628 	portp = (vgen_port_t *)arg;
629 	/*
630 	 * Retry so that we avoid reporting a failure
631 	 * to the upper layer. Returning a failure may cause the
632 	 * upper layer to go into single threaded mode there by
633 	 * causing performance degradation, especially for a large
634 	 * number of connections.
635 	 */
636 	for (i = 0; i < vgen_tx_retries; ) {
637 		status = vgen_portsend(portp, mp);
638 		if (status == VGEN_SUCCESS) {
639 			break;
640 		}
641 		if (++i < vgen_tx_retries)
642 			delay(drv_usectohz(vgen_tx_delay));
643 	}
644 	if (status != VGEN_SUCCESS) {
645 		/* failure */
646 		return (mp);
647 	}
648 	/* success */
649 	return (NULL);
650 }
651 
652 /*
653  * This function provides any necessary tagging/untagging of the frames
654  * that are being transmitted over the port. It first verifies the vlan
655  * membership of the destination(port) and drops the packet if the
656  * destination doesn't belong to the given vlan.
657  *
658  * Arguments:
659  *   portp:     port over which the frames should be transmitted
660  *   mp:        frame to be transmitted
661  *   is_tagged:
662  *              B_TRUE: indicates frame header contains the vlan tag already.
663  *              B_FALSE: indicates frame is untagged.
664  *   vid:       vlan in which the frame should be transmitted.
665  *
666  * Returns:
667  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
668  *              Failure: NULL
669  */
670 static mblk_t *
671 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
672 	uint16_t vid)
673 {
674 	vgen_t				*vgenp;
675 	boolean_t			dst_tagged;
676 	int				rv;
677 
678 	vgenp = portp->vgenp;
679 
680 	/*
681 	 * If the packet is going to a vnet:
682 	 *   Check if the destination vnet is in the same vlan.
683 	 *   Check the frame header if tag or untag is needed.
684 	 *
685 	 * We do not check the above conditions if the packet is going to vsw:
686 	 *   vsw must be present implicitly in all the vlans that a vnet device
687 	 *   is configured into; even if vsw itself is not assigned to those
688 	 *   vlans as an interface. For instance, the packet might be destined
689 	 *   to another vnet(indirectly through vsw) or to an external host
690 	 *   which is in the same vlan as this vnet and vsw itself may not be
691 	 *   present in that vlan. Similarly packets going to vsw must be
692 	 *   always tagged(unless in the default-vlan) if not already tagged,
693 	 *   as we do not know the final destination. This is needed because
694 	 *   vsw must always invoke its switching function only after tagging
695 	 *   the packet; otherwise after switching function determines the
696 	 *   destination we cannot figure out if the destination belongs to the
697 	 *   the same vlan that the frame originated from and if it needs tag/
698 	 *   untag. Note that vsw will tag the packet itself when it receives
699 	 *   it over the channel from a client if needed. However, that is
700 	 *   needed only in the case of vlan unaware clients such as obp or
701 	 *   earlier versions of vnet.
702 	 *
703 	 */
704 	if (portp != vgenp->vsw_portp) {
705 		/*
706 		 * Packet going to a vnet. Check if the destination vnet is in
707 		 * the same vlan. Then check the frame header if tag/untag is
708 		 * needed.
709 		 */
710 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
711 		if (rv == B_FALSE) {
712 			/* drop the packet */
713 			freemsg(mp);
714 			return (NULL);
715 		}
716 
717 		/* is the destination tagged or untagged in this vlan? */
718 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
719 		    (dst_tagged = B_TRUE);
720 
721 		if (is_tagged == dst_tagged) {
722 			/* no tagging/untagging needed */
723 			return (mp);
724 		}
725 
726 		if (is_tagged == B_TRUE) {
727 			/* frame is tagged; destination needs untagged */
728 			mp = vnet_vlan_remove_tag(mp);
729 			return (mp);
730 		}
731 
732 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
733 	}
734 
735 	/*
736 	 * Packet going to a vnet needs tagging.
737 	 * OR
738 	 * If the packet is going to vsw, then it must be tagged in all cases:
739 	 * unknown unicast, broadcast/multicast or to vsw interface.
740 	 */
741 
742 	if (is_tagged == B_FALSE) {
743 		mp = vnet_vlan_insert_tag(mp, vid);
744 	}
745 
746 	return (mp);
747 }
748 
749 /* transmit packets over the given port */
750 static int
751 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
752 {
753 	vgen_ldclist_t		*ldclp;
754 	vgen_ldc_t		*ldcp;
755 	int			status;
756 	int			rv = VGEN_SUCCESS;
757 	vgen_t			*vgenp = portp->vgenp;
758 	vnet_t			*vnetp = vgenp->vnetp;
759 	boolean_t		is_tagged;
760 	boolean_t		dec_refcnt = B_FALSE;
761 	uint16_t		vlan_id;
762 	struct ether_header	*ehp;
763 
764 	if (portp->use_vsw_port) {
765 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
766 		portp = portp->vgenp->vsw_portp;
767 		dec_refcnt = B_TRUE;
768 	}
769 	if (portp == NULL) {
770 		return (VGEN_FAILURE);
771 	}
772 
773 	/*
774 	 * Determine the vlan id that the frame belongs to.
775 	 */
776 	ehp = (struct ether_header *)mp->b_rptr;
777 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
778 
779 	if (vlan_id == vnetp->default_vlan_id) {
780 
781 		/* Frames in default vlan must be untagged */
782 		ASSERT(is_tagged == B_FALSE);
783 
784 		/*
785 		 * If the destination is a vnet-port verify it belongs to the
786 		 * default vlan; otherwise drop the packet. We do not need
787 		 * this check for vsw-port, as it should implicitly belong to
788 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
789 		 */
790 		if (portp != vgenp->vsw_portp &&
791 		    portp->pvid != vnetp->default_vlan_id) {
792 			freemsg(mp);
793 			goto portsend_ret;
794 		}
795 
796 	} else {	/* frame not in default-vlan */
797 
798 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
799 		if (mp == NULL) {
800 			goto portsend_ret;
801 		}
802 
803 	}
804 
805 	ldclp = &portp->ldclist;
806 	READ_ENTER(&ldclp->rwlock);
807 	/*
808 	 * NOTE: for now, we will assume we have a single channel.
809 	 */
810 	if (ldclp->headp == NULL) {
811 		RW_EXIT(&ldclp->rwlock);
812 		rv = VGEN_FAILURE;
813 		goto portsend_ret;
814 	}
815 	ldcp = ldclp->headp;
816 
817 	status = ldcp->tx(ldcp, mp);
818 
819 	RW_EXIT(&ldclp->rwlock);
820 
821 	if (status != VGEN_TX_SUCCESS) {
822 		rv = VGEN_FAILURE;
823 	}
824 
825 portsend_ret:
826 	if (dec_refcnt == B_TRUE) {
827 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
828 	}
829 	return (rv);
830 }
831 
832 /*
833  * Wrapper function to transmit normal and/or priority frames over the channel.
834  */
835 static int
836 vgen_ldcsend(void *arg, mblk_t *mp)
837 {
838 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
839 	int			status;
840 	struct ether_header	*ehp;
841 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
842 	uint32_t		num_types;
843 	uint16_t		*types;
844 	int			i;
845 
846 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
847 
848 	num_types = vgenp->pri_num_types;
849 	types = vgenp->pri_types;
850 	ehp = (struct ether_header *)mp->b_rptr;
851 
852 	for (i = 0; i < num_types; i++) {
853 
854 		if (ehp->ether_type == types[i]) {
855 			/* priority frame, use pri tx function */
856 			vgen_ldcsend_pkt(ldcp, mp);
857 			return (VGEN_SUCCESS);
858 		}
859 
860 	}
861 
862 	status  = vgen_ldcsend_dring(ldcp, mp);
863 
864 	return (status);
865 }
866 
867 /*
868  * This functions handles ldc channel reset while in the context
869  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
870  */
871 static void
872 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
873 {
874 	ldc_status_t	istatus;
875 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
876 
877 	if (mutex_tryenter(&ldcp->cblock)) {
878 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
879 			DWARN(vgenp, ldcp, "ldc_status() error\n");
880 		} else {
881 			ldcp->ldc_status = istatus;
882 		}
883 		if (ldcp->ldc_status != LDC_UP) {
884 			vgen_handle_evt_reset(ldcp);
885 		}
886 		mutex_exit(&ldcp->cblock);
887 	}
888 }
889 
890 /*
891  * This function transmits the frame in the payload of a raw data
892  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
893  * send special frames with high priorities, without going through
894  * the normal data path which uses descriptor ring mechanism.
895  */
896 static void
897 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
898 {
899 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
900 	vio_raw_data_msg_t	*pkt;
901 	mblk_t			*bp;
902 	mblk_t			*nmp = NULL;
903 	caddr_t			dst;
904 	uint32_t		mblksz;
905 	uint32_t		size;
906 	uint32_t		nbytes;
907 	int			rv;
908 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
909 	vgen_stats_t		*statsp = &ldcp->stats;
910 
911 	/* drop the packet if ldc is not up or handshake is not done */
912 	if (ldcp->ldc_status != LDC_UP) {
913 		(void) atomic_inc_32(&statsp->tx_pri_fail);
914 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
915 		    ldcp->ldc_status);
916 		goto send_pkt_exit;
917 	}
918 
919 	if (ldcp->hphase != VH_DONE) {
920 		(void) atomic_inc_32(&statsp->tx_pri_fail);
921 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
922 		    ldcp->hphase);
923 		goto send_pkt_exit;
924 	}
925 
926 	size = msgsize(mp);
927 
928 	/* frame size bigger than available payload len of raw data msg ? */
929 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
930 		(void) atomic_inc_32(&statsp->tx_pri_fail);
931 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
932 		goto send_pkt_exit;
933 	}
934 
935 	if (size < ETHERMIN)
936 		size = ETHERMIN;
937 
938 	/* alloc space for a raw data message */
939 	nmp = vio_allocb(vgenp->pri_tx_vmp);
940 	if (nmp == NULL) {
941 		(void) atomic_inc_32(&statsp->tx_pri_fail);
942 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
943 		goto send_pkt_exit;
944 	}
945 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
946 
947 	/* copy frame into the payload of raw data message */
948 	dst = (caddr_t)pkt->data;
949 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
950 		mblksz = MBLKL(bp);
951 		bcopy(bp->b_rptr, dst, mblksz);
952 		dst += mblksz;
953 	}
954 
955 	/* setup the raw data msg */
956 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
957 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
958 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
959 	pkt->tag.vio_sid = ldcp->local_sid;
960 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
961 
962 	/* send the msg over ldc */
963 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
964 	if (rv != VGEN_SUCCESS) {
965 		(void) atomic_inc_32(&statsp->tx_pri_fail);
966 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
967 		if (rv == ECONNRESET) {
968 			vgen_ldcsend_process_reset(ldcp);
969 		}
970 		goto send_pkt_exit;
971 	}
972 
973 	/* update stats */
974 	(void) atomic_inc_64(&statsp->tx_pri_packets);
975 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
976 
977 send_pkt_exit:
978 	if (nmp != NULL)
979 		freemsg(nmp);
980 	freemsg(mp);
981 }
982 
983 /*
984  * This function transmits normal (non-priority) data frames over
985  * the channel. It queues the frame into the transmit descriptor ring
986  * and sends a VIO_DRING_DATA message if needed, to wake up the
987  * peer to (re)start processing.
988  */
989 static int
990 vgen_ldcsend_dring(void *arg, mblk_t *mp)
991 {
992 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
993 	vgen_private_desc_t	*tbufp;
994 	vgen_private_desc_t	*rtbufp;
995 	vnet_public_desc_t	*rtxdp;
996 	vgen_private_desc_t	*ntbufp;
997 	vnet_public_desc_t	*txdp;
998 	vio_dring_entry_hdr_t	*hdrp;
999 	vgen_stats_t		*statsp;
1000 	struct ether_header	*ehp;
1001 	boolean_t		is_bcast = B_FALSE;
1002 	boolean_t		is_mcast = B_FALSE;
1003 	size_t			mblksz;
1004 	caddr_t			dst;
1005 	mblk_t			*bp;
1006 	size_t			size;
1007 	int			rv = 0;
1008 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1009 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1010 
1011 	statsp = &ldcp->stats;
1012 	size = msgsize(mp);
1013 
1014 	DBG1(vgenp, ldcp, "enter\n");
1015 
1016 	if (ldcp->ldc_status != LDC_UP) {
1017 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1018 		    ldcp->ldc_status);
1019 		/* retry ldc_up() if needed */
1020 		if (ldcp->flags & CHANNEL_STARTED)
1021 			(void) ldc_up(ldcp->ldc_handle);
1022 		goto send_dring_exit;
1023 	}
1024 
1025 	/* drop the packet if ldc is not up or handshake is not done */
1026 	if (ldcp->hphase != VH_DONE) {
1027 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1028 		    ldcp->hphase);
1029 		goto send_dring_exit;
1030 	}
1031 
1032 	if (size > (size_t)lp->mtu) {
1033 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1034 		goto send_dring_exit;
1035 	}
1036 	if (size < ETHERMIN)
1037 		size = ETHERMIN;
1038 
1039 	ehp = (struct ether_header *)mp->b_rptr;
1040 	is_bcast = IS_BROADCAST(ehp);
1041 	is_mcast = IS_MULTICAST(ehp);
1042 
1043 	mutex_enter(&ldcp->txlock);
1044 	/*
1045 	 * allocate a descriptor
1046 	 */
1047 	tbufp = ldcp->next_tbufp;
1048 	ntbufp = NEXTTBUF(ldcp, tbufp);
1049 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1050 
1051 		mutex_enter(&ldcp->tclock);
1052 		/* Try reclaiming now */
1053 		vgen_reclaim_dring(ldcp);
1054 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1055 
1056 		if (ntbufp == ldcp->cur_tbufp) {
1057 			/* Now we are really out of tbuf/txds */
1058 			ldcp->need_resched = B_TRUE;
1059 			mutex_exit(&ldcp->tclock);
1060 
1061 			statsp->tx_no_desc++;
1062 			mutex_exit(&ldcp->txlock);
1063 
1064 			return (VGEN_TX_NORESOURCES);
1065 		}
1066 		mutex_exit(&ldcp->tclock);
1067 	}
1068 	/* update next available tbuf in the ring and update tx index */
1069 	ldcp->next_tbufp = ntbufp;
1070 	INCR_TXI(ldcp->next_txi, ldcp);
1071 
1072 	/* Mark the buffer busy before releasing the lock */
1073 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1074 	mutex_exit(&ldcp->txlock);
1075 
1076 	/* copy data into pre-allocated transmit buffer */
1077 	dst = tbufp->datap + VNET_IPALIGN;
1078 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1079 		mblksz = MBLKL(bp);
1080 		bcopy(bp->b_rptr, dst, mblksz);
1081 		dst += mblksz;
1082 	}
1083 
1084 	tbufp->datalen = size;
1085 
1086 	/* initialize the corresponding public descriptor (txd) */
1087 	txdp = tbufp->descp;
1088 	hdrp = &txdp->hdr;
1089 	txdp->nbytes = size;
1090 	txdp->ncookies = tbufp->ncookies;
1091 	bcopy((tbufp->memcookie), (txdp->memcookie),
1092 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1093 
1094 	mutex_enter(&ldcp->wrlock);
1095 	/*
1096 	 * If the flags not set to BUSY, it implies that the clobber
1097 	 * was done while we were copying the data. In such case,
1098 	 * discard the packet and return.
1099 	 */
1100 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1101 		statsp->oerrors++;
1102 		mutex_exit(&ldcp->wrlock);
1103 		goto send_dring_exit;
1104 	}
1105 	hdrp->dstate = VIO_DESC_READY;
1106 
1107 	/* update stats */
1108 	statsp->opackets++;
1109 	statsp->obytes += size;
1110 	if (is_bcast)
1111 		statsp->brdcstxmt++;
1112 	else if (is_mcast)
1113 		statsp->multixmt++;
1114 
1115 	/* send dring datamsg to the peer */
1116 	if (ldcp->resched_peer) {
1117 
1118 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1119 		rtxdp = rtbufp->descp;
1120 
1121 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1122 
1123 			rv = vgen_send_dring_data(ldcp,
1124 			    (uint32_t)ldcp->resched_peer_txi, -1);
1125 			if (rv != 0) {
1126 				/* error: drop the packet */
1127 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1128 				    "failed: rv(%d) len(%d)\n",
1129 				    ldcp->ldc_id, rv, size);
1130 				statsp->oerrors++;
1131 			} else {
1132 				ldcp->resched_peer = B_FALSE;
1133 			}
1134 
1135 		}
1136 
1137 	}
1138 
1139 	mutex_exit(&ldcp->wrlock);
1140 
1141 send_dring_exit:
1142 	if (rv == ECONNRESET) {
1143 		vgen_ldcsend_process_reset(ldcp);
1144 	}
1145 	freemsg(mp);
1146 	DBG1(vgenp, ldcp, "exit\n");
1147 	return (VGEN_TX_SUCCESS);
1148 }
1149 
1150 /* enable/disable a multicast address */
1151 int
1152 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1153 {
1154 	vgen_t			*vgenp;
1155 	vnet_mcast_msg_t	mcastmsg;
1156 	vio_msg_tag_t		*tagp;
1157 	vgen_port_t		*portp;
1158 	vgen_portlist_t		*plistp;
1159 	vgen_ldc_t		*ldcp;
1160 	vgen_ldclist_t		*ldclp;
1161 	struct ether_addr	*addrp;
1162 	int			rv = DDI_FAILURE;
1163 	uint32_t		i;
1164 
1165 	portp = (vgen_port_t *)arg;
1166 	vgenp = portp->vgenp;
1167 
1168 	if (portp != vgenp->vsw_portp) {
1169 		return (DDI_SUCCESS);
1170 	}
1171 
1172 	addrp = (struct ether_addr *)mca;
1173 	tagp = &mcastmsg.tag;
1174 	bzero(&mcastmsg, sizeof (mcastmsg));
1175 
1176 	mutex_enter(&vgenp->lock);
1177 
1178 	plistp = &(vgenp->vgenports);
1179 
1180 	READ_ENTER(&plistp->rwlock);
1181 
1182 	portp = vgenp->vsw_portp;
1183 	if (portp == NULL) {
1184 		RW_EXIT(&plistp->rwlock);
1185 		mutex_exit(&vgenp->lock);
1186 		return (rv);
1187 	}
1188 	ldclp = &portp->ldclist;
1189 
1190 	READ_ENTER(&ldclp->rwlock);
1191 
1192 	ldcp = ldclp->headp;
1193 	if (ldcp == NULL)
1194 		goto vgen_mcast_exit;
1195 
1196 	mutex_enter(&ldcp->cblock);
1197 
1198 	if (ldcp->hphase == VH_DONE) {
1199 		/*
1200 		 * If handshake is done, send a msg to vsw to add/remove
1201 		 * the multicast address. Otherwise, we just update this
1202 		 * mcast address in our table and the table will be sync'd
1203 		 * with vsw when handshake completes.
1204 		 */
1205 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1206 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1207 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1208 		tagp->vio_sid = ldcp->local_sid;
1209 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1210 		mcastmsg.set = add;
1211 		mcastmsg.count = 1;
1212 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1213 		    B_FALSE) != VGEN_SUCCESS) {
1214 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1215 			mutex_exit(&ldcp->cblock);
1216 			goto vgen_mcast_exit;
1217 		}
1218 	}
1219 
1220 	mutex_exit(&ldcp->cblock);
1221 
1222 	if (add) {
1223 
1224 		/* expand multicast table if necessary */
1225 		if (vgenp->mccount >= vgenp->mcsize) {
1226 			struct ether_addr	*newtab;
1227 			uint32_t		newsize;
1228 
1229 
1230 			newsize = vgenp->mcsize * 2;
1231 
1232 			newtab = kmem_zalloc(newsize *
1233 			    sizeof (struct ether_addr), KM_NOSLEEP);
1234 			if (newtab == NULL)
1235 				goto vgen_mcast_exit;
1236 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1237 			    sizeof (struct ether_addr));
1238 			kmem_free(vgenp->mctab,
1239 			    vgenp->mcsize * sizeof (struct ether_addr));
1240 
1241 			vgenp->mctab = newtab;
1242 			vgenp->mcsize = newsize;
1243 		}
1244 
1245 		/* add address to the table */
1246 		vgenp->mctab[vgenp->mccount++] = *addrp;
1247 
1248 	} else {
1249 
1250 		/* delete address from the table */
1251 		for (i = 0; i < vgenp->mccount; i++) {
1252 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1253 
1254 				/*
1255 				 * If there's more than one address in this
1256 				 * table, delete the unwanted one by moving
1257 				 * the last one in the list over top of it;
1258 				 * otherwise, just remove it.
1259 				 */
1260 				if (vgenp->mccount > 1) {
1261 					vgenp->mctab[i] =
1262 					    vgenp->mctab[vgenp->mccount-1];
1263 				}
1264 				vgenp->mccount--;
1265 				break;
1266 			}
1267 		}
1268 	}
1269 
1270 	rv = DDI_SUCCESS;
1271 
1272 vgen_mcast_exit:
1273 	RW_EXIT(&ldclp->rwlock);
1274 	RW_EXIT(&plistp->rwlock);
1275 
1276 	mutex_exit(&vgenp->lock);
1277 	return (rv);
1278 }
1279 
1280 /* set or clear promiscuous mode on the device */
1281 static int
1282 vgen_promisc(void *arg, boolean_t on)
1283 {
1284 	_NOTE(ARGUNUSED(arg, on))
1285 	return (DDI_SUCCESS);
1286 }
1287 
1288 /* set the unicast mac address of the device */
1289 static int
1290 vgen_unicst(void *arg, const uint8_t *mca)
1291 {
1292 	_NOTE(ARGUNUSED(arg, mca))
1293 	return (DDI_SUCCESS);
1294 }
1295 
1296 /* get device statistics */
1297 int
1298 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1299 {
1300 	vgen_port_t	*portp = (vgen_port_t *)arg;
1301 
1302 	*val = vgen_port_stat(portp, stat);
1303 
1304 	return (0);
1305 }
1306 
1307 static void
1308 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1309 {
1310 	 _NOTE(ARGUNUSED(arg, wq, mp))
1311 }
1312 
1313 /* vgen internal functions */
1314 /* detach all ports from the device */
1315 static void
1316 vgen_detach_ports(vgen_t *vgenp)
1317 {
1318 	vgen_port_t	*portp;
1319 	vgen_portlist_t	*plistp;
1320 
1321 	plistp = &(vgenp->vgenports);
1322 	WRITE_ENTER(&plistp->rwlock);
1323 
1324 	while ((portp = plistp->headp) != NULL) {
1325 		vgen_port_detach(portp);
1326 	}
1327 
1328 	RW_EXIT(&plistp->rwlock);
1329 }
1330 
1331 /*
1332  * detach the given port.
1333  */
1334 static void
1335 vgen_port_detach(vgen_port_t *portp)
1336 {
1337 	vgen_t		*vgenp;
1338 	vgen_ldclist_t	*ldclp;
1339 	int		port_num;
1340 
1341 	vgenp = portp->vgenp;
1342 	port_num = portp->port_num;
1343 
1344 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1345 
1346 	/*
1347 	 * If this port is connected to the vswitch, then
1348 	 * potentially there could be ports that may be using
1349 	 * this port to transmit packets. To address this do
1350 	 * the following:
1351 	 *	- First set vgenp->vsw_portp to NULL, so that
1352 	 *	  its not used after that.
1353 	 *	- Then wait for the refcnt to go down to 0.
1354 	 *	- Now we can safely detach this port.
1355 	 */
1356 	if (vgenp->vsw_portp == portp) {
1357 		vgenp->vsw_portp = NULL;
1358 		while (vgenp->vsw_port_refcnt > 0) {
1359 			delay(drv_usectohz(vgen_tx_delay));
1360 		}
1361 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1362 	}
1363 
1364 	if (portp->vhp != NULL) {
1365 		vio_net_resource_unreg(portp->vhp);
1366 		portp->vhp = NULL;
1367 	}
1368 
1369 	vgen_vlan_destroy_hash(portp);
1370 
1371 	/* remove it from port list */
1372 	vgen_port_list_remove(portp);
1373 
1374 	/* detach channels from this port */
1375 	ldclp = &portp->ldclist;
1376 	WRITE_ENTER(&ldclp->rwlock);
1377 	while (ldclp->headp) {
1378 		vgen_ldc_detach(ldclp->headp);
1379 	}
1380 	RW_EXIT(&ldclp->rwlock);
1381 	rw_destroy(&ldclp->rwlock);
1382 
1383 	if (portp->num_ldcs != 0) {
1384 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1385 		portp->num_ldcs = 0;
1386 	}
1387 
1388 	mutex_destroy(&portp->lock);
1389 	KMEM_FREE(portp);
1390 
1391 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1392 }
1393 
1394 /* add a port to port list */
1395 static void
1396 vgen_port_list_insert(vgen_port_t *portp)
1397 {
1398 	vgen_portlist_t *plistp;
1399 	vgen_t *vgenp;
1400 
1401 	vgenp = portp->vgenp;
1402 	plistp = &(vgenp->vgenports);
1403 
1404 	if (plistp->headp == NULL) {
1405 		plistp->headp = portp;
1406 	} else {
1407 		plistp->tailp->nextp = portp;
1408 	}
1409 	plistp->tailp = portp;
1410 	portp->nextp = NULL;
1411 }
1412 
1413 /* remove a port from port list */
1414 static void
1415 vgen_port_list_remove(vgen_port_t *portp)
1416 {
1417 	vgen_port_t *prevp;
1418 	vgen_port_t *nextp;
1419 	vgen_portlist_t *plistp;
1420 	vgen_t *vgenp;
1421 
1422 	vgenp = portp->vgenp;
1423 
1424 	plistp = &(vgenp->vgenports);
1425 
1426 	if (plistp->headp == NULL)
1427 		return;
1428 
1429 	if (portp == plistp->headp) {
1430 		plistp->headp = portp->nextp;
1431 		if (portp == plistp->tailp)
1432 			plistp->tailp = plistp->headp;
1433 	} else {
1434 		for (prevp = plistp->headp;
1435 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1436 		    prevp = nextp)
1437 			;
1438 		if (nextp == portp) {
1439 			prevp->nextp = portp->nextp;
1440 		}
1441 		if (portp == plistp->tailp)
1442 			plistp->tailp = prevp;
1443 	}
1444 }
1445 
1446 /* lookup a port in the list based on port_num */
1447 static vgen_port_t *
1448 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1449 {
1450 	vgen_port_t *portp = NULL;
1451 
1452 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1453 		if (portp->port_num == port_num) {
1454 			break;
1455 		}
1456 	}
1457 
1458 	return (portp);
1459 }
1460 
1461 /* enable ports for transmit/receive */
1462 static void
1463 vgen_init_ports(vgen_t *vgenp)
1464 {
1465 	vgen_port_t	*portp;
1466 	vgen_portlist_t	*plistp;
1467 
1468 	plistp = &(vgenp->vgenports);
1469 	READ_ENTER(&plistp->rwlock);
1470 
1471 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1472 		vgen_port_init(portp);
1473 	}
1474 
1475 	RW_EXIT(&plistp->rwlock);
1476 }
1477 
1478 static void
1479 vgen_port_init(vgen_port_t *portp)
1480 {
1481 	/* Add the port to the specified vlans */
1482 	vgen_vlan_add_ids(portp);
1483 
1484 	/* Bring up the channels of this port */
1485 	vgen_init_ldcs(portp);
1486 }
1487 
1488 /* disable transmit/receive on ports */
1489 static void
1490 vgen_uninit_ports(vgen_t *vgenp)
1491 {
1492 	vgen_port_t	*portp;
1493 	vgen_portlist_t	*plistp;
1494 
1495 	plistp = &(vgenp->vgenports);
1496 	READ_ENTER(&plistp->rwlock);
1497 
1498 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1499 		vgen_port_uninit(portp);
1500 	}
1501 
1502 	RW_EXIT(&plistp->rwlock);
1503 }
1504 
1505 static void
1506 vgen_port_uninit(vgen_port_t *portp)
1507 {
1508 	vgen_uninit_ldcs(portp);
1509 
1510 	/* remove the port from vlans it has been assigned to */
1511 	vgen_vlan_remove_ids(portp);
1512 }
1513 
1514 /*
1515  * Scan the machine description for this instance of vnet
1516  * and read its properties. Called only from vgen_init().
1517  * Returns: 0 on success, 1 on failure.
1518  */
1519 static int
1520 vgen_read_mdprops(vgen_t *vgenp)
1521 {
1522 	vnet_t		*vnetp = vgenp->vnetp;
1523 	md_t		*mdp = NULL;
1524 	mde_cookie_t	rootnode;
1525 	mde_cookie_t	*listp = NULL;
1526 	uint64_t	cfgh;
1527 	char		*name;
1528 	int		rv = 1;
1529 	int		num_nodes = 0;
1530 	int		num_devs = 0;
1531 	int		listsz = 0;
1532 	int		i;
1533 
1534 	if ((mdp = md_get_handle()) == NULL) {
1535 		return (rv);
1536 	}
1537 
1538 	num_nodes = md_node_count(mdp);
1539 	ASSERT(num_nodes > 0);
1540 
1541 	listsz = num_nodes * sizeof (mde_cookie_t);
1542 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1543 
1544 	rootnode = md_root_node(mdp);
1545 
1546 	/* search for all "virtual_device" nodes */
1547 	num_devs = md_scan_dag(mdp, rootnode,
1548 	    md_find_name(mdp, vdev_propname),
1549 	    md_find_name(mdp, "fwd"), listp);
1550 	if (num_devs <= 0) {
1551 		goto vgen_readmd_exit;
1552 	}
1553 
1554 	/*
1555 	 * Now loop through the list of virtual-devices looking for
1556 	 * devices with name "network" and for each such device compare
1557 	 * its instance with what we have from the 'reg' property to
1558 	 * find the right node in MD and then read all its properties.
1559 	 */
1560 	for (i = 0; i < num_devs; i++) {
1561 
1562 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1563 			goto vgen_readmd_exit;
1564 		}
1565 
1566 		/* is this a "network" device? */
1567 		if (strcmp(name, vnet_propname) != 0)
1568 			continue;
1569 
1570 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1571 			goto vgen_readmd_exit;
1572 		}
1573 
1574 		/* is this the required instance of vnet? */
1575 		if (vgenp->regprop != cfgh)
1576 			continue;
1577 
1578 		/*
1579 		 * Read the mtu. Note that we set the mtu of vnet device within
1580 		 * this routine itself, after validating the range.
1581 		 */
1582 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1583 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1584 			vnetp->mtu = ETHERMTU;
1585 		}
1586 		vgenp->max_frame_size = vnetp->mtu +
1587 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1588 
1589 		/* read priority ether types */
1590 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1591 
1592 		/* read vlan id properties of this vnet instance */
1593 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1594 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1595 		    &vnetp->default_vlan_id);
1596 
1597 		rv = 0;
1598 		break;
1599 	}
1600 
1601 vgen_readmd_exit:
1602 
1603 	kmem_free(listp, listsz);
1604 	(void) md_fini_handle(mdp);
1605 	return (rv);
1606 }
1607 
1608 /*
1609  * Read vlan id properties of the given MD node.
1610  * Arguments:
1611  *   arg:          device argument(vnet device or a port)
1612  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1613  *   mdp:          machine description
1614  *   node:         md node cookie
1615  *
1616  * Returns:
1617  *   pvidp:        port-vlan-id of the node
1618  *   vidspp:       list of vlan-ids of the node
1619  *   nvidsp:       # of vlan-ids in the list
1620  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1621  */
1622 static void
1623 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1624 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1625 	uint16_t *default_idp)
1626 {
1627 	vgen_t		*vgenp;
1628 	vnet_t		*vnetp;
1629 	vgen_port_t	*portp;
1630 	char		*pvid_propname;
1631 	char		*vid_propname;
1632 	uint_t		nvids;
1633 	uint32_t	vids_size;
1634 	int		rv;
1635 	int		i;
1636 	uint64_t	*data;
1637 	uint64_t	val;
1638 	int		size;
1639 	int		inst;
1640 
1641 	if (type == VGEN_LOCAL) {
1642 
1643 		vgenp = (vgen_t *)arg;
1644 		vnetp = vgenp->vnetp;
1645 		pvid_propname = vgen_pvid_propname;
1646 		vid_propname = vgen_vid_propname;
1647 		inst = vnetp->instance;
1648 
1649 	} else if (type == VGEN_PEER) {
1650 
1651 		portp = (vgen_port_t *)arg;
1652 		vgenp = portp->vgenp;
1653 		vnetp = vgenp->vnetp;
1654 		pvid_propname = port_pvid_propname;
1655 		vid_propname = port_vid_propname;
1656 		inst = portp->port_num;
1657 
1658 	} else {
1659 		return;
1660 	}
1661 
1662 	if (type == VGEN_LOCAL && default_idp != NULL) {
1663 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1664 		if (rv != 0) {
1665 			DWARN(vgenp, NULL, "prop(%s) not found",
1666 			    vgen_dvid_propname);
1667 
1668 			*default_idp = vnet_default_vlan_id;
1669 		} else {
1670 			*default_idp = val & 0xFFF;
1671 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1672 			    inst, *default_idp);
1673 		}
1674 	}
1675 
1676 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1677 	if (rv != 0) {
1678 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1679 		*pvidp = vnet_default_vlan_id;
1680 	} else {
1681 
1682 		*pvidp = val & 0xFFF;
1683 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1684 		    pvid_propname, inst, *pvidp);
1685 	}
1686 
1687 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1688 	    &size);
1689 	if (rv != 0) {
1690 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1691 		size = 0;
1692 	} else {
1693 		size /= sizeof (uint64_t);
1694 	}
1695 	nvids = size;
1696 
1697 	if (nvids != 0) {
1698 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1699 		vids_size = sizeof (uint16_t) * nvids;
1700 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1701 		for (i = 0; i < nvids; i++) {
1702 			(*vidspp)[i] = data[i] & 0xFFFF;
1703 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1704 		}
1705 		DBG2(vgenp, NULL, "\n");
1706 	}
1707 
1708 	*nvidsp = nvids;
1709 }
1710 
1711 /*
1712  * Create a vlan id hash table for the given port.
1713  */
1714 static void
1715 vgen_vlan_create_hash(vgen_port_t *portp)
1716 {
1717 	char		hashname[MAXNAMELEN];
1718 
1719 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1720 	    portp->port_num);
1721 
1722 	portp->vlan_nchains = vgen_vlan_nchains;
1723 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1724 	    portp->vlan_nchains, mod_hash_null_valdtor);
1725 }
1726 
1727 /*
1728  * Destroy the vlan id hash table in the given port.
1729  */
1730 static void
1731 vgen_vlan_destroy_hash(vgen_port_t *portp)
1732 {
1733 	if (portp->vlan_hashp != NULL) {
1734 		mod_hash_destroy_hash(portp->vlan_hashp);
1735 		portp->vlan_hashp = NULL;
1736 		portp->vlan_nchains = 0;
1737 	}
1738 }
1739 
1740 /*
1741  * Add a port to the vlans specified in its port properites.
1742  */
1743 static void
1744 vgen_vlan_add_ids(vgen_port_t *portp)
1745 {
1746 	int		rv;
1747 	int		i;
1748 
1749 	rv = mod_hash_insert(portp->vlan_hashp,
1750 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1751 	    (mod_hash_val_t)B_TRUE);
1752 	ASSERT(rv == 0);
1753 
1754 	for (i = 0; i < portp->nvids; i++) {
1755 		rv = mod_hash_insert(portp->vlan_hashp,
1756 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1757 		    (mod_hash_val_t)B_TRUE);
1758 		ASSERT(rv == 0);
1759 	}
1760 }
1761 
1762 /*
1763  * Remove a port from the vlans it has been assigned to.
1764  */
1765 static void
1766 vgen_vlan_remove_ids(vgen_port_t *portp)
1767 {
1768 	int		rv;
1769 	int		i;
1770 	mod_hash_val_t	vp;
1771 
1772 	rv = mod_hash_remove(portp->vlan_hashp,
1773 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1774 	    (mod_hash_val_t *)&vp);
1775 	ASSERT(rv == 0);
1776 
1777 	for (i = 0; i < portp->nvids; i++) {
1778 		rv = mod_hash_remove(portp->vlan_hashp,
1779 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1780 		    (mod_hash_val_t *)&vp);
1781 		ASSERT(rv == 0);
1782 	}
1783 }
1784 
1785 /*
1786  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1787  * then the vlan-id is available in the tag; otherwise, its vlan id is
1788  * implicitly obtained from the port-vlan-id of the vnet device.
1789  * The vlan id determined is returned in vidp.
1790  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1791  */
1792 static boolean_t
1793 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1794 {
1795 	struct ether_vlan_header	*evhp;
1796 
1797 	/* If it's a tagged frame, get the vlan id from vlan header */
1798 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1799 
1800 		evhp = (struct ether_vlan_header *)ehp;
1801 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1802 		return (B_TRUE);
1803 	}
1804 
1805 	/* Untagged frame, vlan-id is the pvid of vnet device */
1806 	*vidp = vnetp->pvid;
1807 	return (B_FALSE);
1808 }
1809 
1810 /*
1811  * Find the given vlan id in the hash table.
1812  * Return: B_TRUE if the id is found; B_FALSE if not found.
1813  */
1814 static boolean_t
1815 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1816 {
1817 	int		rv;
1818 	mod_hash_val_t	vp;
1819 
1820 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1821 
1822 	if (rv != 0)
1823 		return (B_FALSE);
1824 
1825 	return (B_TRUE);
1826 }
1827 
1828 /*
1829  * This function reads "priority-ether-types" property from md. This property
1830  * is used to enable support for priority frames. Applications which need
1831  * guaranteed and timely delivery of certain high priority frames to/from
1832  * a vnet or vsw within ldoms, should configure this property by providing
1833  * the ether type(s) for which the priority facility is needed.
1834  * Normal data frames are delivered over a ldc channel using the descriptor
1835  * ring mechanism which is constrained by factors such as descriptor ring size,
1836  * the rate at which the ring is processed at the peer ldc end point, etc.
1837  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1838  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1839  * descriptor ring path and enables a more reliable and timely delivery of
1840  * frames to the peer.
1841  */
1842 static void
1843 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1844 {
1845 	int		rv;
1846 	uint16_t	*types;
1847 	uint64_t	*data;
1848 	int		size;
1849 	int		i;
1850 	size_t		mblk_sz;
1851 
1852 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1853 	    (uint8_t **)&data, &size);
1854 	if (rv != 0) {
1855 		/*
1856 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1857 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1858 		 */
1859 		if (vgen_pri_eth_type != 0) {
1860 			size = sizeof (vgen_pri_eth_type);
1861 			data = &vgen_pri_eth_type;
1862 		} else {
1863 			DBG2(vgenp, NULL,
1864 			    "prop(%s) not found", pri_types_propname);
1865 			size = 0;
1866 		}
1867 	}
1868 
1869 	if (size == 0) {
1870 		vgenp->pri_num_types = 0;
1871 		return;
1872 	}
1873 
1874 	/*
1875 	 * we have some priority-ether-types defined;
1876 	 * allocate a table of these types and also
1877 	 * allocate a pool of mblks to transmit these
1878 	 * priority packets.
1879 	 */
1880 	size /= sizeof (uint64_t);
1881 	vgenp->pri_num_types = size;
1882 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1883 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1884 		types[i] = data[i] & 0xFFFF;
1885 	}
1886 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1887 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1888 	    &vgenp->pri_tx_vmp);
1889 }
1890 
1891 static void
1892 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1893 {
1894 	int		rv;
1895 	uint64_t	val;
1896 	char		*mtu_propname;
1897 
1898 	mtu_propname = vgen_mtu_propname;
1899 
1900 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1901 	if (rv != 0) {
1902 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1903 		*mtu = vnet_ethermtu;
1904 	} else {
1905 
1906 		*mtu = val & 0xFFFF;
1907 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1908 		    vgenp->instance, *mtu);
1909 	}
1910 }
1911 
1912 /* register with MD event generator */
1913 static int
1914 vgen_mdeg_reg(vgen_t *vgenp)
1915 {
1916 	mdeg_prop_spec_t	*pspecp;
1917 	mdeg_node_spec_t	*parentp;
1918 	uint_t			templatesz;
1919 	int			rv;
1920 	mdeg_handle_t		dev_hdl = NULL;
1921 	mdeg_handle_t		port_hdl = NULL;
1922 
1923 	templatesz = sizeof (vgen_prop_template);
1924 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1925 	if (pspecp == NULL) {
1926 		return (DDI_FAILURE);
1927 	}
1928 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1929 	if (parentp == NULL) {
1930 		kmem_free(pspecp, templatesz);
1931 		return (DDI_FAILURE);
1932 	}
1933 
1934 	bcopy(vgen_prop_template, pspecp, templatesz);
1935 
1936 	/*
1937 	 * NOTE: The instance here refers to the value of "reg" property and
1938 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1939 	 */
1940 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1941 
1942 	parentp->namep = "virtual-device";
1943 	parentp->specp = pspecp;
1944 
1945 	/* save parentp in vgen_t */
1946 	vgenp->mdeg_parentp = parentp;
1947 
1948 	/*
1949 	 * Register an interest in 'virtual-device' nodes with a
1950 	 * 'name' property of 'network'
1951 	 */
1952 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1953 	if (rv != MDEG_SUCCESS) {
1954 		DERR(vgenp, NULL, "mdeg_register failed\n");
1955 		goto mdeg_reg_fail;
1956 	}
1957 
1958 	/* Register an interest in 'port' nodes */
1959 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1960 	    &port_hdl);
1961 	if (rv != MDEG_SUCCESS) {
1962 		DERR(vgenp, NULL, "mdeg_register failed\n");
1963 		goto mdeg_reg_fail;
1964 	}
1965 
1966 	/* save mdeg handle in vgen_t */
1967 	vgenp->mdeg_dev_hdl = dev_hdl;
1968 	vgenp->mdeg_port_hdl = port_hdl;
1969 
1970 	return (DDI_SUCCESS);
1971 
1972 mdeg_reg_fail:
1973 	if (dev_hdl != NULL) {
1974 		(void) mdeg_unregister(dev_hdl);
1975 	}
1976 	KMEM_FREE(parentp);
1977 	kmem_free(pspecp, templatesz);
1978 	vgenp->mdeg_parentp = NULL;
1979 	return (DDI_FAILURE);
1980 }
1981 
1982 /* unregister with MD event generator */
1983 static void
1984 vgen_mdeg_unreg(vgen_t *vgenp)
1985 {
1986 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1987 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
1988 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1989 	KMEM_FREE(vgenp->mdeg_parentp);
1990 	vgenp->mdeg_parentp = NULL;
1991 	vgenp->mdeg_dev_hdl = NULL;
1992 	vgenp->mdeg_port_hdl = NULL;
1993 }
1994 
1995 /* mdeg callback function for the port node */
1996 static int
1997 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1998 {
1999 	int idx;
2000 	int vsw_idx = -1;
2001 	uint64_t val;
2002 	vgen_t *vgenp;
2003 
2004 	if ((resp == NULL) || (cb_argp == NULL)) {
2005 		return (MDEG_FAILURE);
2006 	}
2007 
2008 	vgenp = (vgen_t *)cb_argp;
2009 	DBG1(vgenp, NULL, "enter\n");
2010 
2011 	mutex_enter(&vgenp->lock);
2012 
2013 	DBG1(vgenp, NULL, "ports: removed(%x), "
2014 	"added(%x), updated(%x)\n", resp->removed.nelem,
2015 	    resp->added.nelem, resp->match_curr.nelem);
2016 
2017 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2018 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2019 		    resp->removed.mdep[idx]);
2020 	}
2021 
2022 	if (vgenp->vsw_portp == NULL) {
2023 		/*
2024 		 * find vsw_port and add it first, because other ports need
2025 		 * this when adding fdb entry (see vgen_port_init()).
2026 		 */
2027 		for (idx = 0; idx < resp->added.nelem; idx++) {
2028 			if (!(md_get_prop_val(resp->added.mdp,
2029 			    resp->added.mdep[idx], swport_propname, &val))) {
2030 				if (val == 0) {
2031 					/*
2032 					 * This port is connected to the
2033 					 * vsw on service domain.
2034 					 */
2035 					vsw_idx = idx;
2036 					if (vgen_add_port(vgenp,
2037 					    resp->added.mdp,
2038 					    resp->added.mdep[idx]) !=
2039 					    DDI_SUCCESS) {
2040 						cmn_err(CE_NOTE, "vnet%d Could "
2041 						    "not initialize virtual "
2042 						    "switch port.",
2043 						    vgenp->instance);
2044 						mutex_exit(&vgenp->lock);
2045 						return (MDEG_FAILURE);
2046 					}
2047 					break;
2048 				}
2049 			}
2050 		}
2051 		if (vsw_idx == -1) {
2052 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2053 			mutex_exit(&vgenp->lock);
2054 			return (MDEG_FAILURE);
2055 		}
2056 	}
2057 
2058 	for (idx = 0; idx < resp->added.nelem; idx++) {
2059 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2060 			continue;
2061 
2062 		/* If this port can't be added just skip it. */
2063 		(void) vgen_add_port(vgenp, resp->added.mdp,
2064 		    resp->added.mdep[idx]);
2065 	}
2066 
2067 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2068 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2069 		    resp->match_curr.mdep[idx],
2070 		    resp->match_prev.mdp,
2071 		    resp->match_prev.mdep[idx]);
2072 	}
2073 
2074 	mutex_exit(&vgenp->lock);
2075 	DBG1(vgenp, NULL, "exit\n");
2076 	return (MDEG_SUCCESS);
2077 }
2078 
2079 /* mdeg callback function for the vnet node */
2080 static int
2081 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2082 {
2083 	vgen_t		*vgenp;
2084 	vnet_t		*vnetp;
2085 	md_t		*mdp;
2086 	mde_cookie_t	node;
2087 	uint64_t	inst;
2088 	char		*node_name = NULL;
2089 
2090 	if ((resp == NULL) || (cb_argp == NULL)) {
2091 		return (MDEG_FAILURE);
2092 	}
2093 
2094 	vgenp = (vgen_t *)cb_argp;
2095 	vnetp = vgenp->vnetp;
2096 
2097 	DBG1(vgenp, NULL, "%s: added %d : removed %d : curr matched %d"
2098 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2099 	    resp->match_curr.nelem, resp->match_prev.nelem);
2100 
2101 	mutex_enter(&vgenp->lock);
2102 
2103 	/*
2104 	 * We get an initial callback for this node as 'added' after
2105 	 * registering with mdeg. Note that we would have already gathered
2106 	 * information about this vnet node by walking MD earlier during attach
2107 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2108 	 * of this node might have changed when we get this initial 'added'
2109 	 * callback. We handle this as if an update occured and invoke the same
2110 	 * function which handles updates to the properties of this vnet-node
2111 	 * if any. A non-zero 'match' value indicates that the MD has been
2112 	 * updated and that a 'network' node is present which may or may not
2113 	 * have been updated. It is up to the clients to examine their own
2114 	 * nodes and determine if they have changed.
2115 	 */
2116 	if (resp->added.nelem != 0) {
2117 
2118 		if (resp->added.nelem != 1) {
2119 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2120 			    "invalid: %d\n", vnetp->instance,
2121 			    resp->added.nelem);
2122 			goto vgen_mdeg_cb_err;
2123 		}
2124 
2125 		mdp = resp->added.mdp;
2126 		node = resp->added.mdep[0];
2127 
2128 	} else if (resp->match_curr.nelem != 0) {
2129 
2130 		if (resp->match_curr.nelem != 1) {
2131 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2132 			    "invalid: %d\n", vnetp->instance,
2133 			    resp->match_curr.nelem);
2134 			goto vgen_mdeg_cb_err;
2135 		}
2136 
2137 		mdp = resp->match_curr.mdp;
2138 		node = resp->match_curr.mdep[0];
2139 
2140 	} else {
2141 		goto vgen_mdeg_cb_err;
2142 	}
2143 
2144 	/* Validate name and instance */
2145 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2146 		DERR(vgenp, NULL, "unable to get node name\n");
2147 		goto vgen_mdeg_cb_err;
2148 	}
2149 
2150 	/* is this a virtual-network device? */
2151 	if (strcmp(node_name, vnet_propname) != 0) {
2152 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2153 		goto vgen_mdeg_cb_err;
2154 	}
2155 
2156 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2157 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2158 		goto vgen_mdeg_cb_err;
2159 	}
2160 
2161 	/* is this the right instance of vnet? */
2162 	if (inst != vgenp->regprop) {
2163 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2164 		goto vgen_mdeg_cb_err;
2165 	}
2166 
2167 	vgen_update_md_prop(vgenp, mdp, node);
2168 
2169 	mutex_exit(&vgenp->lock);
2170 	return (MDEG_SUCCESS);
2171 
2172 vgen_mdeg_cb_err:
2173 	mutex_exit(&vgenp->lock);
2174 	return (MDEG_FAILURE);
2175 }
2176 
2177 /*
2178  * Check to see if the relevant properties in the specified node have
2179  * changed, and if so take the appropriate action.
2180  */
2181 static void
2182 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2183 {
2184 	uint16_t	pvid;
2185 	uint16_t	*vids;
2186 	uint16_t	nvids;
2187 	vnet_t		*vnetp = vgenp->vnetp;
2188 	uint32_t	mtu;
2189 	enum		{ MD_init = 0x1,
2190 			    MD_vlans = 0x2,
2191 			    MD_mtu = 0x4 } updated;
2192 	int		rv;
2193 
2194 	updated = MD_init;
2195 
2196 	/* Read the vlan ids */
2197 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2198 	    &nvids, NULL);
2199 
2200 	/* Determine if there are any vlan id updates */
2201 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2202 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2203 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2204 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2205 		updated |= MD_vlans;
2206 	}
2207 
2208 	/* Read mtu */
2209 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2210 	if (mtu != vnetp->mtu) {
2211 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2212 			updated |= MD_mtu;
2213 		} else {
2214 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2215 			    " as the specified value:%d is invalid\n",
2216 			    vnetp->instance, mtu);
2217 		}
2218 	}
2219 
2220 	/* Now process the updated props */
2221 
2222 	if (updated & MD_vlans) {
2223 
2224 		/* save the new vlan ids */
2225 		vnetp->pvid = pvid;
2226 		if (vnetp->nvids != 0) {
2227 			kmem_free(vnetp->vids,
2228 			    sizeof (uint16_t) * vnetp->nvids);
2229 			vnetp->nvids = 0;
2230 		}
2231 		if (nvids != 0) {
2232 			vnetp->nvids = nvids;
2233 			vnetp->vids = vids;
2234 		}
2235 
2236 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2237 		vgen_reset_vlan_unaware_ports(vgenp);
2238 
2239 	} else {
2240 
2241 		if (nvids != 0) {
2242 			kmem_free(vids, sizeof (uint16_t) * nvids);
2243 		}
2244 	}
2245 
2246 	if (updated & MD_mtu) {
2247 
2248 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2249 		    vnetp->mtu, mtu);
2250 
2251 		rv = vnet_mtu_update(vnetp, mtu);
2252 		if (rv == 0) {
2253 			vgenp->max_frame_size = mtu +
2254 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2255 		}
2256 	}
2257 }
2258 
2259 /* add a new port to the device */
2260 static int
2261 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2262 {
2263 	vgen_port_t	*portp;
2264 	int		rv;
2265 
2266 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2267 
2268 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2269 	if (rv != DDI_SUCCESS) {
2270 		KMEM_FREE(portp);
2271 		return (DDI_FAILURE);
2272 	}
2273 
2274 	rv = vgen_port_attach(portp);
2275 	if (rv != DDI_SUCCESS) {
2276 		return (DDI_FAILURE);
2277 	}
2278 
2279 	return (DDI_SUCCESS);
2280 }
2281 
2282 /* read properties of the port from its md node */
2283 static int
2284 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2285 	mde_cookie_t mdex)
2286 {
2287 	uint64_t		port_num;
2288 	uint64_t		*ldc_ids;
2289 	uint64_t		macaddr;
2290 	uint64_t		val;
2291 	int			num_ldcs;
2292 	int			i;
2293 	int			addrsz;
2294 	int			num_nodes = 0;
2295 	int			listsz = 0;
2296 	mde_cookie_t		*listp = NULL;
2297 	uint8_t			*addrp;
2298 	struct ether_addr	ea;
2299 
2300 	/* read "id" property to get the port number */
2301 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2302 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2303 		return (DDI_FAILURE);
2304 	}
2305 
2306 	/*
2307 	 * Find the channel endpoint node(s) under this port node.
2308 	 */
2309 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2310 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2311 		    num_nodes);
2312 		return (DDI_FAILURE);
2313 	}
2314 
2315 	/* allocate space for node list */
2316 	listsz = num_nodes * sizeof (mde_cookie_t);
2317 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2318 	if (listp == NULL)
2319 		return (DDI_FAILURE);
2320 
2321 	num_ldcs = md_scan_dag(mdp, mdex,
2322 	    md_find_name(mdp, channel_propname),
2323 	    md_find_name(mdp, "fwd"), listp);
2324 
2325 	if (num_ldcs <= 0) {
2326 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2327 		kmem_free(listp, listsz);
2328 		return (DDI_FAILURE);
2329 	}
2330 
2331 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2332 
2333 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2334 	if (ldc_ids == NULL) {
2335 		kmem_free(listp, listsz);
2336 		return (DDI_FAILURE);
2337 	}
2338 
2339 	for (i = 0; i < num_ldcs; i++) {
2340 		/* read channel ids */
2341 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2342 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2343 			    id_propname);
2344 			kmem_free(listp, listsz);
2345 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2346 			return (DDI_FAILURE);
2347 		}
2348 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2349 	}
2350 
2351 	kmem_free(listp, listsz);
2352 
2353 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2354 	    &addrsz)) {
2355 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2356 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2357 		return (DDI_FAILURE);
2358 	}
2359 
2360 	if (addrsz < ETHERADDRL) {
2361 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2362 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2363 		return (DDI_FAILURE);
2364 	}
2365 
2366 	macaddr = *((uint64_t *)addrp);
2367 
2368 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2369 
2370 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2371 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2372 		macaddr >>= 8;
2373 	}
2374 
2375 	if (vgenp->vsw_portp == NULL) {
2376 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2377 			if (val == 0) {
2378 				(void) atomic_swap_32(
2379 				    &vgenp->vsw_port_refcnt, 0);
2380 				/* This port is connected to the vsw */
2381 				vgenp->vsw_portp = portp;
2382 			}
2383 		}
2384 	}
2385 
2386 	/* now update all properties into the port */
2387 	portp->vgenp = vgenp;
2388 	portp->port_num = port_num;
2389 	ether_copy(&ea, &portp->macaddr);
2390 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2391 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2392 	portp->num_ldcs = num_ldcs;
2393 
2394 	/* read vlan id properties of this port node */
2395 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2396 	    &portp->vids, &portp->nvids, NULL);
2397 
2398 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2399 
2400 	return (DDI_SUCCESS);
2401 }
2402 
2403 /* remove a port from the device */
2404 static int
2405 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2406 {
2407 	uint64_t	port_num;
2408 	vgen_port_t	*portp;
2409 	vgen_portlist_t	*plistp;
2410 
2411 	/* read "id" property to get the port number */
2412 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2413 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2414 		return (DDI_FAILURE);
2415 	}
2416 
2417 	plistp = &(vgenp->vgenports);
2418 
2419 	WRITE_ENTER(&plistp->rwlock);
2420 	portp = vgen_port_lookup(plistp, (int)port_num);
2421 	if (portp == NULL) {
2422 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2423 		RW_EXIT(&plistp->rwlock);
2424 		return (DDI_FAILURE);
2425 	}
2426 
2427 	vgen_port_detach_mdeg(portp);
2428 	RW_EXIT(&plistp->rwlock);
2429 
2430 	return (DDI_SUCCESS);
2431 }
2432 
2433 /* attach a port to the device based on mdeg data */
2434 static int
2435 vgen_port_attach(vgen_port_t *portp)
2436 {
2437 	int			i;
2438 	vgen_portlist_t		*plistp;
2439 	vgen_t			*vgenp;
2440 	uint64_t		*ldcids;
2441 	uint32_t		num_ldcs;
2442 	mac_register_t		*macp;
2443 	vio_net_res_type_t	type;
2444 	int			rv;
2445 
2446 	ASSERT(portp != NULL);
2447 
2448 	vgenp = portp->vgenp;
2449 	ldcids = portp->ldc_ids;
2450 	num_ldcs = portp->num_ldcs;
2451 
2452 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2453 
2454 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2455 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2456 	portp->ldclist.headp = NULL;
2457 
2458 	for (i = 0; i < num_ldcs; i++) {
2459 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2460 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2461 			vgen_port_detach(portp);
2462 			return (DDI_FAILURE);
2463 		}
2464 	}
2465 
2466 	/* create vlan id hash table */
2467 	vgen_vlan_create_hash(portp);
2468 
2469 	if (portp == vgenp->vsw_portp) {
2470 		/* This port is connected to the switch port */
2471 		vgenp->vsw_portp = portp;
2472 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2473 		type = VIO_NET_RES_LDC_SERVICE;
2474 	} else {
2475 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2476 		type = VIO_NET_RES_LDC_GUEST;
2477 	}
2478 
2479 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2480 		vgen_port_detach(portp);
2481 		return (DDI_FAILURE);
2482 	}
2483 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2484 	macp->m_driver = portp;
2485 	macp->m_dip = vgenp->vnetdip;
2486 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2487 	macp->m_callbacks = &vgen_m_callbacks;
2488 	macp->m_min_sdu = 0;
2489 	macp->m_max_sdu = ETHERMTU;
2490 
2491 	mutex_enter(&portp->lock);
2492 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2493 	    portp->macaddr, &portp->vhp, &portp->vcb);
2494 	mutex_exit(&portp->lock);
2495 	mac_free(macp);
2496 
2497 	if (rv == 0) {
2498 		/* link it into the list of ports */
2499 		plistp = &(vgenp->vgenports);
2500 		WRITE_ENTER(&plistp->rwlock);
2501 		vgen_port_list_insert(portp);
2502 		RW_EXIT(&plistp->rwlock);
2503 	} else {
2504 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2505 		    portp);
2506 		vgen_port_detach(portp);
2507 	}
2508 
2509 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2510 	return (DDI_SUCCESS);
2511 }
2512 
2513 /* detach a port from the device based on mdeg data */
2514 static void
2515 vgen_port_detach_mdeg(vgen_port_t *portp)
2516 {
2517 	vgen_t *vgenp = portp->vgenp;
2518 
2519 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2520 
2521 	mutex_enter(&portp->lock);
2522 
2523 	/* stop the port if needed */
2524 	if (portp->flags & VGEN_STARTED) {
2525 		vgen_port_uninit(portp);
2526 	}
2527 
2528 	mutex_exit(&portp->lock);
2529 	vgen_port_detach(portp);
2530 
2531 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2532 }
2533 
2534 static int
2535 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2536 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2537 {
2538 	uint64_t	cport_num;
2539 	uint64_t	pport_num;
2540 	vgen_portlist_t	*plistp;
2541 	vgen_port_t	*portp;
2542 	boolean_t	updated_vlans = B_FALSE;
2543 	uint16_t	pvid;
2544 	uint16_t	*vids;
2545 	uint16_t	nvids;
2546 
2547 	/*
2548 	 * For now, we get port updates only if vlan ids changed.
2549 	 * We read the port num and do some sanity check.
2550 	 */
2551 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2552 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2553 		return (DDI_FAILURE);
2554 	}
2555 
2556 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2557 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2558 		return (DDI_FAILURE);
2559 	}
2560 	if (cport_num != pport_num)
2561 		return (DDI_FAILURE);
2562 
2563 	plistp = &(vgenp->vgenports);
2564 
2565 	READ_ENTER(&plistp->rwlock);
2566 
2567 	portp = vgen_port_lookup(plistp, (int)cport_num);
2568 	if (portp == NULL) {
2569 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2570 		RW_EXIT(&plistp->rwlock);
2571 		return (DDI_FAILURE);
2572 	}
2573 
2574 	/* Read the vlan ids */
2575 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2576 	    &nvids, NULL);
2577 
2578 	/* Determine if there are any vlan id updates */
2579 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2580 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2581 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2582 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2583 		updated_vlans = B_TRUE;
2584 	}
2585 
2586 	if (updated_vlans == B_FALSE) {
2587 		RW_EXIT(&plistp->rwlock);
2588 		return (DDI_FAILURE);
2589 	}
2590 
2591 	/* remove the port from vlans it has been assigned to */
2592 	vgen_vlan_remove_ids(portp);
2593 
2594 	/* save the new vlan ids */
2595 	portp->pvid = pvid;
2596 	if (portp->nvids != 0) {
2597 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2598 		portp->nvids = 0;
2599 	}
2600 	if (nvids != 0) {
2601 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2602 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2603 		portp->nvids = nvids;
2604 		kmem_free(vids, sizeof (uint16_t) * nvids);
2605 	}
2606 
2607 	/* add port to the new vlans */
2608 	vgen_vlan_add_ids(portp);
2609 
2610 	/* reset the port if it is vlan unaware (ver < 1.3) */
2611 	vgen_vlan_unaware_port_reset(portp);
2612 
2613 	RW_EXIT(&plistp->rwlock);
2614 
2615 	return (DDI_SUCCESS);
2616 }
2617 
2618 static uint64_t
2619 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2620 {
2621 	vgen_ldclist_t	*ldclp;
2622 	vgen_ldc_t *ldcp;
2623 	uint64_t	val;
2624 
2625 	val = 0;
2626 	ldclp = &portp->ldclist;
2627 
2628 	READ_ENTER(&ldclp->rwlock);
2629 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2630 		val += vgen_ldc_stat(ldcp, stat);
2631 	}
2632 	RW_EXIT(&ldclp->rwlock);
2633 
2634 	return (val);
2635 }
2636 
2637 /* allocate receive resources */
2638 static int
2639 vgen_init_multipools(vgen_ldc_t *ldcp)
2640 {
2641 	size_t		data_sz;
2642 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2643 	int		status;
2644 	uint32_t	sz1 = 0;
2645 	uint32_t	sz2 = 0;
2646 	uint32_t	sz3 = 0;
2647 	uint32_t	sz4 = 0;
2648 
2649 	/*
2650 	 * We round up the mtu specified to be a multiple of 2K.
2651 	 * We then create rx pools based on the rounded up size.
2652 	 */
2653 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2654 	data_sz = VNET_ROUNDUP_2K(data_sz);
2655 
2656 	/*
2657 	 * If pool sizes are specified, use them. Note that the presence of
2658 	 * the first tunable will be used as a hint.
2659 	 */
2660 	if (vgen_rbufsz1 != 0) {
2661 
2662 		sz1 = vgen_rbufsz1;
2663 		sz2 = vgen_rbufsz2;
2664 		sz3 = vgen_rbufsz3;
2665 		sz4 = vgen_rbufsz4;
2666 
2667 		if (sz4 == 0) { /* need 3 pools */
2668 
2669 			ldcp->max_rxpool_size = sz3;
2670 			status = vio_init_multipools(&ldcp->vmp,
2671 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2672 			    vgen_nrbufs2, vgen_nrbufs3);
2673 
2674 		} else {
2675 
2676 			ldcp->max_rxpool_size = sz4;
2677 			status = vio_init_multipools(&ldcp->vmp,
2678 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2679 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2680 			    vgen_nrbufs4);
2681 		}
2682 		return (status);
2683 	}
2684 
2685 	/*
2686 	 * Pool sizes are not specified. We select the pool sizes based on the
2687 	 * mtu if vnet_jumbo_rxpools is enabled.
2688 	 */
2689 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2690 		/*
2691 		 * Receive buffer pool allocation based on mtu is disabled.
2692 		 * Use the default mechanism of standard size pool allocation.
2693 		 */
2694 		sz1 = VGEN_DBLK_SZ_128;
2695 		sz2 = VGEN_DBLK_SZ_256;
2696 		sz3 = VGEN_DBLK_SZ_2048;
2697 		ldcp->max_rxpool_size = sz3;
2698 
2699 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2700 		    sz1, sz2, sz3,
2701 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2702 
2703 		return (status);
2704 	}
2705 
2706 	switch (data_sz) {
2707 
2708 	case VNET_4K:
2709 
2710 		sz1 = VGEN_DBLK_SZ_128;
2711 		sz2 = VGEN_DBLK_SZ_256;
2712 		sz3 = VGEN_DBLK_SZ_2048;
2713 		sz4 = sz3 << 1;			/* 4K */
2714 		ldcp->max_rxpool_size = sz4;
2715 
2716 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2717 		    sz1, sz2, sz3, sz4,
2718 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2719 		break;
2720 
2721 	default:	/* data_sz:  4K+ to 16K */
2722 
2723 		sz1 = VGEN_DBLK_SZ_256;
2724 		sz2 = VGEN_DBLK_SZ_2048;
2725 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2726 		sz4 = data_sz;		/* Jumbo-size  */
2727 		ldcp->max_rxpool_size = sz4;
2728 
2729 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2730 		    sz1, sz2, sz3, sz4,
2731 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2732 		break;
2733 
2734 	}
2735 
2736 	return (status);
2737 }
2738 
2739 /* attach the channel corresponding to the given ldc_id to the port */
2740 static int
2741 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2742 {
2743 	vgen_t 		*vgenp;
2744 	vgen_ldclist_t	*ldclp;
2745 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2746 	ldc_attr_t 	attr;
2747 	int 		status;
2748 	ldc_status_t	istatus;
2749 	char		kname[MAXNAMELEN];
2750 	int		instance;
2751 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2752 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2753 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2754 		AST_create_rxmblks = 0x20,
2755 		AST_create_rcv_thread = 0x40} attach_state;
2756 
2757 	attach_state = AST_init;
2758 	vgenp = portp->vgenp;
2759 	ldclp = &portp->ldclist;
2760 
2761 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2762 	if (ldcp == NULL) {
2763 		goto ldc_attach_failed;
2764 	}
2765 	ldcp->ldc_id = ldc_id;
2766 	ldcp->portp = portp;
2767 
2768 	attach_state |= AST_ldc_alloc;
2769 
2770 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2771 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2772 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2773 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2774 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2775 
2776 	attach_state |= AST_mutex_init;
2777 
2778 	attr.devclass = LDC_DEV_NT;
2779 	attr.instance = vgenp->instance;
2780 	attr.mode = LDC_MODE_UNRELIABLE;
2781 	attr.mtu = vnet_ldc_mtu;
2782 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2783 	if (status != 0) {
2784 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2785 		goto ldc_attach_failed;
2786 	}
2787 	attach_state |= AST_ldc_init;
2788 
2789 	if (vgen_rcv_thread_enabled) {
2790 		ldcp->rcv_thr_flags = 0;
2791 
2792 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2793 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2794 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2795 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2796 
2797 		attach_state |= AST_create_rcv_thread;
2798 		if (ldcp->rcv_thread == NULL) {
2799 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2800 			goto ldc_attach_failed;
2801 		}
2802 	}
2803 
2804 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2805 	if (status != 0) {
2806 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2807 		    status);
2808 		goto ldc_attach_failed;
2809 	}
2810 	/*
2811 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2812 	 * data msgs, including raw data msgs used to recv priority frames.
2813 	 */
2814 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2815 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2816 	attach_state |= AST_ldc_reg_cb;
2817 
2818 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2819 	ASSERT(istatus == LDC_INIT);
2820 	ldcp->ldc_status = istatus;
2821 
2822 	/* allocate transmit resources */
2823 	status = vgen_alloc_tx_ring(ldcp);
2824 	if (status != 0) {
2825 		goto ldc_attach_failed;
2826 	}
2827 	attach_state |= AST_alloc_tx_ring;
2828 
2829 	/* allocate receive resources */
2830 	status = vgen_init_multipools(ldcp);
2831 	if (status != 0) {
2832 		goto ldc_attach_failed;
2833 	}
2834 	attach_state |= AST_create_rxmblks;
2835 
2836 	/* Setup kstats for the channel */
2837 	instance = vgenp->instance;
2838 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2839 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2840 	if (ldcp->ksp == NULL) {
2841 		goto ldc_attach_failed;
2842 	}
2843 
2844 	/* initialize vgen_versions supported */
2845 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2846 	vgen_reset_vnet_proto_ops(ldcp);
2847 
2848 	/* link it into the list of channels for this port */
2849 	WRITE_ENTER(&ldclp->rwlock);
2850 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2851 	ldcp->nextp = *prev_ldcp;
2852 	*prev_ldcp = ldcp;
2853 	RW_EXIT(&ldclp->rwlock);
2854 
2855 	ldcp->flags |= CHANNEL_ATTACHED;
2856 	return (DDI_SUCCESS);
2857 
2858 ldc_attach_failed:
2859 	if (attach_state & AST_ldc_reg_cb) {
2860 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2861 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2862 	}
2863 	if (attach_state & AST_create_rcv_thread) {
2864 		if (ldcp->rcv_thread != NULL) {
2865 			vgen_stop_rcv_thread(ldcp);
2866 		}
2867 		mutex_destroy(&ldcp->rcv_thr_lock);
2868 		cv_destroy(&ldcp->rcv_thr_cv);
2869 	}
2870 	if (attach_state & AST_create_rxmblks) {
2871 		vio_mblk_pool_t *fvmp = NULL;
2872 
2873 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
2874 		ASSERT(fvmp == NULL);
2875 	}
2876 	if (attach_state & AST_alloc_tx_ring) {
2877 		vgen_free_tx_ring(ldcp);
2878 	}
2879 	if (attach_state & AST_ldc_init) {
2880 		(void) ldc_fini(ldcp->ldc_handle);
2881 	}
2882 	if (attach_state & AST_mutex_init) {
2883 		mutex_destroy(&ldcp->tclock);
2884 		mutex_destroy(&ldcp->txlock);
2885 		mutex_destroy(&ldcp->cblock);
2886 		mutex_destroy(&ldcp->wrlock);
2887 		mutex_destroy(&ldcp->rxlock);
2888 	}
2889 	if (attach_state & AST_ldc_alloc) {
2890 		KMEM_FREE(ldcp);
2891 	}
2892 	return (DDI_FAILURE);
2893 }
2894 
2895 /* detach a channel from the port */
2896 static void
2897 vgen_ldc_detach(vgen_ldc_t *ldcp)
2898 {
2899 	vgen_port_t	*portp;
2900 	vgen_t 		*vgenp;
2901 	vgen_ldc_t 	*pldcp;
2902 	vgen_ldc_t	**prev_ldcp;
2903 	vgen_ldclist_t	*ldclp;
2904 
2905 	portp = ldcp->portp;
2906 	vgenp = portp->vgenp;
2907 	ldclp = &portp->ldclist;
2908 
2909 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
2910 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
2911 		if (pldcp == ldcp) {
2912 			break;
2913 		}
2914 	}
2915 
2916 	if (pldcp == NULL) {
2917 		/* invalid ldcp? */
2918 		return;
2919 	}
2920 
2921 	if (ldcp->ldc_status != LDC_INIT) {
2922 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2923 	}
2924 
2925 	if (ldcp->flags & CHANNEL_ATTACHED) {
2926 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2927 
2928 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2929 		if (ldcp->rcv_thread != NULL) {
2930 			/* First stop the receive thread */
2931 			vgen_stop_rcv_thread(ldcp);
2932 			mutex_destroy(&ldcp->rcv_thr_lock);
2933 			cv_destroy(&ldcp->rcv_thr_cv);
2934 		}
2935 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2936 
2937 		vgen_destroy_kstats(ldcp->ksp);
2938 		ldcp->ksp = NULL;
2939 
2940 		/*
2941 		 * if we cannot reclaim all mblks, put this
2942 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
2943 		 * device gets detached (see vgen_uninit()).
2944 		 */
2945 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
2946 
2947 		/* free transmit resources */
2948 		vgen_free_tx_ring(ldcp);
2949 
2950 		(void) ldc_fini(ldcp->ldc_handle);
2951 		mutex_destroy(&ldcp->tclock);
2952 		mutex_destroy(&ldcp->txlock);
2953 		mutex_destroy(&ldcp->cblock);
2954 		mutex_destroy(&ldcp->wrlock);
2955 		mutex_destroy(&ldcp->rxlock);
2956 
2957 		/* unlink it from the list */
2958 		*prev_ldcp = ldcp->nextp;
2959 		KMEM_FREE(ldcp);
2960 	}
2961 }
2962 
2963 /*
2964  * This function allocates transmit resources for the channel.
2965  * The resources consist of a transmit descriptor ring and an associated
2966  * transmit buffer ring.
2967  */
2968 static int
2969 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
2970 {
2971 	void *tbufp;
2972 	ldc_mem_info_t minfo;
2973 	uint32_t txdsize;
2974 	uint32_t tbufsize;
2975 	int status;
2976 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2977 
2978 	ldcp->num_txds = vnet_ntxds;
2979 	txdsize = sizeof (vnet_public_desc_t);
2980 	tbufsize = sizeof (vgen_private_desc_t);
2981 
2982 	/* allocate transmit buffer ring */
2983 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
2984 	if (tbufp == NULL) {
2985 		return (DDI_FAILURE);
2986 	}
2987 
2988 	/* create transmit descriptor ring */
2989 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
2990 	    &ldcp->tx_dhandle);
2991 	if (status) {
2992 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
2993 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2994 		return (DDI_FAILURE);
2995 	}
2996 
2997 	/* get the addr of descripror ring */
2998 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
2999 	if (status) {
3000 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3001 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3002 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3003 		ldcp->tbufp = NULL;
3004 		return (DDI_FAILURE);
3005 	}
3006 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3007 	ldcp->tbufp = tbufp;
3008 
3009 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3010 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3011 
3012 	return (DDI_SUCCESS);
3013 }
3014 
3015 /* Free transmit resources for the channel */
3016 static void
3017 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3018 {
3019 	int tbufsize = sizeof (vgen_private_desc_t);
3020 
3021 	/* free transmit descriptor ring */
3022 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3023 
3024 	/* free transmit buffer ring */
3025 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3026 	ldcp->txdp = ldcp->txdendp = NULL;
3027 	ldcp->tbufp = ldcp->tbufendp = NULL;
3028 }
3029 
3030 /* enable transmit/receive on the channels for the port */
3031 static void
3032 vgen_init_ldcs(vgen_port_t *portp)
3033 {
3034 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3035 	vgen_ldc_t	*ldcp;
3036 
3037 	READ_ENTER(&ldclp->rwlock);
3038 	ldcp =  ldclp->headp;
3039 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3040 		(void) vgen_ldc_init(ldcp);
3041 	}
3042 	RW_EXIT(&ldclp->rwlock);
3043 }
3044 
3045 /* stop transmit/receive on the channels for the port */
3046 static void
3047 vgen_uninit_ldcs(vgen_port_t *portp)
3048 {
3049 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3050 	vgen_ldc_t	*ldcp;
3051 
3052 	READ_ENTER(&ldclp->rwlock);
3053 	ldcp =  ldclp->headp;
3054 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3055 		vgen_ldc_uninit(ldcp);
3056 	}
3057 	RW_EXIT(&ldclp->rwlock);
3058 }
3059 
3060 /* enable transmit/receive on the channel */
3061 static int
3062 vgen_ldc_init(vgen_ldc_t *ldcp)
3063 {
3064 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3065 	ldc_status_t	istatus;
3066 	int		rv;
3067 	uint32_t	retries = 0;
3068 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3069 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3070 	init_state = ST_init;
3071 
3072 	DBG1(vgenp, ldcp, "enter\n");
3073 	LDC_LOCK(ldcp);
3074 
3075 	rv = ldc_open(ldcp->ldc_handle);
3076 	if (rv != 0) {
3077 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3078 		goto ldcinit_failed;
3079 	}
3080 	init_state |= ST_ldc_open;
3081 
3082 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3083 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3084 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3085 		goto ldcinit_failed;
3086 	}
3087 	ldcp->ldc_status = istatus;
3088 
3089 	rv = vgen_init_tbufs(ldcp);
3090 	if (rv != 0) {
3091 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3092 		goto ldcinit_failed;
3093 	}
3094 	init_state |= ST_init_tbufs;
3095 
3096 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3097 	if (rv != 0) {
3098 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3099 		goto ldcinit_failed;
3100 	}
3101 
3102 	init_state |= ST_cb_enable;
3103 
3104 	do {
3105 		rv = ldc_up(ldcp->ldc_handle);
3106 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3107 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3108 			drv_usecwait(VGEN_LDC_UP_DELAY);
3109 		}
3110 		if (retries++ >= vgen_ldcup_retries)
3111 			break;
3112 	} while (rv == EWOULDBLOCK);
3113 
3114 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3115 	if (istatus == LDC_UP) {
3116 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3117 	}
3118 
3119 	ldcp->ldc_status = istatus;
3120 
3121 	/* initialize transmit watchdog timeout */
3122 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3123 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3124 
3125 	ldcp->hphase = -1;
3126 	ldcp->flags |= CHANNEL_STARTED;
3127 
3128 	/* if channel is already UP - start handshake */
3129 	if (istatus == LDC_UP) {
3130 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3131 		if (ldcp->portp != vgenp->vsw_portp) {
3132 			/*
3133 			 * As the channel is up, use this port from now on.
3134 			 */
3135 			(void) atomic_swap_32(
3136 			    &ldcp->portp->use_vsw_port, B_FALSE);
3137 		}
3138 
3139 		/* Initialize local session id */
3140 		ldcp->local_sid = ddi_get_lbolt();
3141 
3142 		/* clear peer session id */
3143 		ldcp->peer_sid = 0;
3144 		ldcp->hretries = 0;
3145 
3146 		/* Initiate Handshake process with peer ldc endpoint */
3147 		vgen_reset_hphase(ldcp);
3148 
3149 		mutex_exit(&ldcp->tclock);
3150 		mutex_exit(&ldcp->txlock);
3151 		mutex_exit(&ldcp->wrlock);
3152 		mutex_exit(&ldcp->rxlock);
3153 		vgen_handshake(vh_nextphase(ldcp));
3154 		mutex_exit(&ldcp->cblock);
3155 	} else {
3156 		LDC_UNLOCK(ldcp);
3157 	}
3158 
3159 	return (DDI_SUCCESS);
3160 
3161 ldcinit_failed:
3162 	if (init_state & ST_cb_enable) {
3163 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3164 	}
3165 	if (init_state & ST_init_tbufs) {
3166 		vgen_uninit_tbufs(ldcp);
3167 	}
3168 	if (init_state & ST_ldc_open) {
3169 		(void) ldc_close(ldcp->ldc_handle);
3170 	}
3171 	LDC_UNLOCK(ldcp);
3172 	DBG1(vgenp, ldcp, "exit\n");
3173 	return (DDI_FAILURE);
3174 }
3175 
3176 /* stop transmit/receive on the channel */
3177 static void
3178 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3179 {
3180 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3181 	int	rv;
3182 
3183 	DBG1(vgenp, ldcp, "enter\n");
3184 	LDC_LOCK(ldcp);
3185 
3186 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3187 		LDC_UNLOCK(ldcp);
3188 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3189 		return;
3190 	}
3191 
3192 	/* disable further callbacks */
3193 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3194 	if (rv != 0) {
3195 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3196 	}
3197 
3198 	if (vgenp->vsw_portp == ldcp->portp) {
3199 		vio_net_report_err_t rep_err =
3200 		    ldcp->portp->vcb.vio_net_report_err;
3201 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3202 	}
3203 
3204 	/*
3205 	 * clear handshake done bit and wait for pending tx and cb to finish.
3206 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3207 	 */
3208 	ldcp->hphase &= ~(VH_DONE);
3209 	LDC_UNLOCK(ldcp);
3210 
3211 	/* cancel handshake watchdog timeout */
3212 	if (ldcp->htid) {
3213 		(void) untimeout(ldcp->htid);
3214 		ldcp->htid = 0;
3215 	}
3216 
3217 	/* cancel transmit watchdog timeout */
3218 	if (ldcp->wd_tid) {
3219 		(void) untimeout(ldcp->wd_tid);
3220 		ldcp->wd_tid = 0;
3221 	}
3222 
3223 	drv_usecwait(1000);
3224 
3225 	/* acquire locks again; any pending transmits and callbacks are done */
3226 	LDC_LOCK(ldcp);
3227 
3228 	vgen_reset_hphase(ldcp);
3229 
3230 	vgen_uninit_tbufs(ldcp);
3231 
3232 	rv = ldc_close(ldcp->ldc_handle);
3233 	if (rv != 0) {
3234 		DWARN(vgenp, ldcp, "ldc_close err\n");
3235 	}
3236 	ldcp->ldc_status = LDC_INIT;
3237 	ldcp->flags &= ~(CHANNEL_STARTED);
3238 
3239 	LDC_UNLOCK(ldcp);
3240 
3241 	DBG1(vgenp, ldcp, "exit\n");
3242 }
3243 
3244 /* Initialize the transmit buffer ring for the channel */
3245 static int
3246 vgen_init_tbufs(vgen_ldc_t *ldcp)
3247 {
3248 	vgen_private_desc_t	*tbufp;
3249 	vnet_public_desc_t	*txdp;
3250 	vio_dring_entry_hdr_t		*hdrp;
3251 	int 			i;
3252 	int 			rv;
3253 	caddr_t			datap = NULL;
3254 	int			ci;
3255 	uint32_t		ncookies;
3256 	size_t			data_sz;
3257 	vgen_t			*vgenp;
3258 
3259 	vgenp = LDC_TO_VGEN(ldcp);
3260 
3261 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3262 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3263 
3264 	/*
3265 	 * In order to ensure that the number of ldc cookies per descriptor is
3266 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3267 	 * outlined below:
3268 	 *
3269 	 * Align the entire data buffer area to 8K and carve out per descriptor
3270 	 * data buffers starting from this 8K aligned base address.
3271 	 *
3272 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3273 	 * For sizes up to 12K we round up the size to the next 2K.
3274 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3275 	 * 14K could end up needing 3 cookies, with the buffer spread across
3276 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3277 	 */
3278 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3279 	if (data_sz <= VNET_12K) {
3280 		data_sz = VNET_ROUNDUP_2K(data_sz);
3281 	} else {
3282 		data_sz = VNET_ROUNDUP_4K(data_sz);
3283 	}
3284 
3285 	/* allocate extra 8K bytes for alignment */
3286 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3287 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3288 	ldcp->tx_datap = datap;
3289 
3290 
3291 	/* align the starting address of the data area to 8K */
3292 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3293 
3294 	/*
3295 	 * for each private descriptor, allocate a ldc mem_handle which is
3296 	 * required to map the data during transmit, set the flags
3297 	 * to free (available for use by transmit routine).
3298 	 */
3299 
3300 	for (i = 0; i < ldcp->num_txds; i++) {
3301 
3302 		tbufp = &(ldcp->tbufp[i]);
3303 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3304 		    &(tbufp->memhandle));
3305 		if (rv) {
3306 			tbufp->memhandle = 0;
3307 			goto init_tbufs_failed;
3308 		}
3309 
3310 		/*
3311 		 * bind ldc memhandle to the corresponding transmit buffer.
3312 		 */
3313 		ci = ncookies = 0;
3314 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3315 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3316 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3317 		if (rv != 0) {
3318 			goto init_tbufs_failed;
3319 		}
3320 
3321 		/*
3322 		 * successful in binding the handle to tx data buffer.
3323 		 * set datap in the private descr to this buffer.
3324 		 */
3325 		tbufp->datap = datap;
3326 
3327 		if ((ncookies == 0) ||
3328 		    (ncookies > MAX_COOKIES)) {
3329 			goto init_tbufs_failed;
3330 		}
3331 
3332 		for (ci = 1; ci < ncookies; ci++) {
3333 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3334 			    &(tbufp->memcookie[ci]));
3335 			if (rv != 0) {
3336 				goto init_tbufs_failed;
3337 			}
3338 		}
3339 
3340 		tbufp->ncookies = ncookies;
3341 		datap += data_sz;
3342 
3343 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3344 		txdp = &(ldcp->txdp[i]);
3345 		hdrp = &txdp->hdr;
3346 		hdrp->dstate = VIO_DESC_FREE;
3347 		hdrp->ack = B_FALSE;
3348 		tbufp->descp = txdp;
3349 
3350 	}
3351 
3352 	/* reset tbuf walking pointers */
3353 	ldcp->next_tbufp = ldcp->tbufp;
3354 	ldcp->cur_tbufp = ldcp->tbufp;
3355 
3356 	/* initialize tx seqnum and index */
3357 	ldcp->next_txseq = VNET_ISS;
3358 	ldcp->next_txi = 0;
3359 
3360 	ldcp->resched_peer = B_TRUE;
3361 	ldcp->resched_peer_txi = 0;
3362 
3363 	return (DDI_SUCCESS);
3364 
3365 init_tbufs_failed:;
3366 	vgen_uninit_tbufs(ldcp);
3367 	return (DDI_FAILURE);
3368 }
3369 
3370 /* Uninitialize transmit buffer ring for the channel */
3371 static void
3372 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3373 {
3374 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3375 	int 			i;
3376 
3377 	/* for each tbuf (priv_desc), free ldc mem_handle */
3378 	for (i = 0; i < ldcp->num_txds; i++) {
3379 
3380 		tbufp = &(ldcp->tbufp[i]);
3381 
3382 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3383 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3384 			tbufp->datap = NULL;
3385 		}
3386 		if (tbufp->memhandle) {
3387 			(void) ldc_mem_free_handle(tbufp->memhandle);
3388 			tbufp->memhandle = 0;
3389 		}
3390 	}
3391 
3392 	if (ldcp->tx_datap) {
3393 		/* prealloc'd tx data buffer */
3394 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3395 		ldcp->tx_datap = NULL;
3396 		ldcp->tx_data_sz = 0;
3397 	}
3398 
3399 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3400 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3401 }
3402 
3403 /* clobber tx descriptor ring */
3404 static void
3405 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3406 {
3407 	vnet_public_desc_t	*txdp;
3408 	vgen_private_desc_t	*tbufp;
3409 	vio_dring_entry_hdr_t	*hdrp;
3410 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3411 	int i;
3412 #ifdef DEBUG
3413 	int ndone = 0;
3414 #endif
3415 
3416 	for (i = 0; i < ldcp->num_txds; i++) {
3417 
3418 		tbufp = &(ldcp->tbufp[i]);
3419 		txdp = tbufp->descp;
3420 		hdrp = &txdp->hdr;
3421 
3422 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3423 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3424 #ifdef DEBUG
3425 			if (hdrp->dstate == VIO_DESC_DONE)
3426 				ndone++;
3427 #endif
3428 			hdrp->dstate = VIO_DESC_FREE;
3429 			hdrp->ack = B_FALSE;
3430 		}
3431 	}
3432 	/* reset tbuf walking pointers */
3433 	ldcp->next_tbufp = ldcp->tbufp;
3434 	ldcp->cur_tbufp = ldcp->tbufp;
3435 
3436 	/* reset tx seqnum and index */
3437 	ldcp->next_txseq = VNET_ISS;
3438 	ldcp->next_txi = 0;
3439 
3440 	ldcp->resched_peer = B_TRUE;
3441 	ldcp->resched_peer_txi = 0;
3442 
3443 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3444 }
3445 
3446 /* clobber receive descriptor ring */
3447 static void
3448 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3449 {
3450 	ldcp->rx_dhandle = 0;
3451 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3452 	ldcp->rxdp = NULL;
3453 	ldcp->next_rxi = 0;
3454 	ldcp->num_rxds = 0;
3455 	ldcp->next_rxseq = VNET_ISS;
3456 }
3457 
3458 /* initialize receive descriptor ring */
3459 static int
3460 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3461 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3462 {
3463 	int rv;
3464 	ldc_mem_info_t minfo;
3465 
3466 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3467 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3468 	if (rv != 0) {
3469 		return (DDI_FAILURE);
3470 	}
3471 
3472 	/*
3473 	 * sucessfully mapped, now try to
3474 	 * get info about the mapped dring
3475 	 */
3476 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3477 	if (rv != 0) {
3478 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3479 		return (DDI_FAILURE);
3480 	}
3481 
3482 	/*
3483 	 * save ring address, number of descriptors.
3484 	 */
3485 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3486 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3487 	ldcp->num_rxdcookies = ncookies;
3488 	ldcp->num_rxds = num_desc;
3489 	ldcp->next_rxi = 0;
3490 	ldcp->next_rxseq = VNET_ISS;
3491 	ldcp->dring_mtype = minfo.mtype;
3492 
3493 	return (DDI_SUCCESS);
3494 }
3495 
3496 /* get channel statistics */
3497 static uint64_t
3498 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3499 {
3500 	vgen_stats_t *statsp;
3501 	uint64_t val;
3502 
3503 	val = 0;
3504 	statsp = &ldcp->stats;
3505 	switch (stat) {
3506 
3507 	case MAC_STAT_MULTIRCV:
3508 		val = statsp->multircv;
3509 		break;
3510 
3511 	case MAC_STAT_BRDCSTRCV:
3512 		val = statsp->brdcstrcv;
3513 		break;
3514 
3515 	case MAC_STAT_MULTIXMT:
3516 		val = statsp->multixmt;
3517 		break;
3518 
3519 	case MAC_STAT_BRDCSTXMT:
3520 		val = statsp->brdcstxmt;
3521 		break;
3522 
3523 	case MAC_STAT_NORCVBUF:
3524 		val = statsp->norcvbuf;
3525 		break;
3526 
3527 	case MAC_STAT_IERRORS:
3528 		val = statsp->ierrors;
3529 		break;
3530 
3531 	case MAC_STAT_NOXMTBUF:
3532 		val = statsp->noxmtbuf;
3533 		break;
3534 
3535 	case MAC_STAT_OERRORS:
3536 		val = statsp->oerrors;
3537 		break;
3538 
3539 	case MAC_STAT_COLLISIONS:
3540 		break;
3541 
3542 	case MAC_STAT_RBYTES:
3543 		val = statsp->rbytes;
3544 		break;
3545 
3546 	case MAC_STAT_IPACKETS:
3547 		val = statsp->ipackets;
3548 		break;
3549 
3550 	case MAC_STAT_OBYTES:
3551 		val = statsp->obytes;
3552 		break;
3553 
3554 	case MAC_STAT_OPACKETS:
3555 		val = statsp->opackets;
3556 		break;
3557 
3558 	/* stats not relevant to ldc, return 0 */
3559 	case MAC_STAT_IFSPEED:
3560 	case ETHER_STAT_ALIGN_ERRORS:
3561 	case ETHER_STAT_FCS_ERRORS:
3562 	case ETHER_STAT_FIRST_COLLISIONS:
3563 	case ETHER_STAT_MULTI_COLLISIONS:
3564 	case ETHER_STAT_DEFER_XMTS:
3565 	case ETHER_STAT_TX_LATE_COLLISIONS:
3566 	case ETHER_STAT_EX_COLLISIONS:
3567 	case ETHER_STAT_MACXMT_ERRORS:
3568 	case ETHER_STAT_CARRIER_ERRORS:
3569 	case ETHER_STAT_TOOLONG_ERRORS:
3570 	case ETHER_STAT_XCVR_ADDR:
3571 	case ETHER_STAT_XCVR_ID:
3572 	case ETHER_STAT_XCVR_INUSE:
3573 	case ETHER_STAT_CAP_1000FDX:
3574 	case ETHER_STAT_CAP_1000HDX:
3575 	case ETHER_STAT_CAP_100FDX:
3576 	case ETHER_STAT_CAP_100HDX:
3577 	case ETHER_STAT_CAP_10FDX:
3578 	case ETHER_STAT_CAP_10HDX:
3579 	case ETHER_STAT_CAP_ASMPAUSE:
3580 	case ETHER_STAT_CAP_PAUSE:
3581 	case ETHER_STAT_CAP_AUTONEG:
3582 	case ETHER_STAT_ADV_CAP_1000FDX:
3583 	case ETHER_STAT_ADV_CAP_1000HDX:
3584 	case ETHER_STAT_ADV_CAP_100FDX:
3585 	case ETHER_STAT_ADV_CAP_100HDX:
3586 	case ETHER_STAT_ADV_CAP_10FDX:
3587 	case ETHER_STAT_ADV_CAP_10HDX:
3588 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3589 	case ETHER_STAT_ADV_CAP_PAUSE:
3590 	case ETHER_STAT_ADV_CAP_AUTONEG:
3591 	case ETHER_STAT_LP_CAP_1000FDX:
3592 	case ETHER_STAT_LP_CAP_1000HDX:
3593 	case ETHER_STAT_LP_CAP_100FDX:
3594 	case ETHER_STAT_LP_CAP_100HDX:
3595 	case ETHER_STAT_LP_CAP_10FDX:
3596 	case ETHER_STAT_LP_CAP_10HDX:
3597 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3598 	case ETHER_STAT_LP_CAP_PAUSE:
3599 	case ETHER_STAT_LP_CAP_AUTONEG:
3600 	case ETHER_STAT_LINK_ASMPAUSE:
3601 	case ETHER_STAT_LINK_PAUSE:
3602 	case ETHER_STAT_LINK_AUTONEG:
3603 	case ETHER_STAT_LINK_DUPLEX:
3604 	default:
3605 		val = 0;
3606 		break;
3607 
3608 	}
3609 	return (val);
3610 }
3611 
3612 /*
3613  * LDC channel is UP, start handshake process with peer.
3614  */
3615 static void
3616 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3617 {
3618 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3619 
3620 	DBG1(vgenp, ldcp, "enter\n");
3621 
3622 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3623 
3624 	if (ldcp->portp != vgenp->vsw_portp) {
3625 		/*
3626 		 * As the channel is up, use this port from now on.
3627 		 */
3628 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3629 	}
3630 
3631 	/* Initialize local session id */
3632 	ldcp->local_sid = ddi_get_lbolt();
3633 
3634 	/* clear peer session id */
3635 	ldcp->peer_sid = 0;
3636 	ldcp->hretries = 0;
3637 
3638 	if (ldcp->hphase != VH_PHASE0) {
3639 		vgen_handshake_reset(ldcp);
3640 	}
3641 
3642 	/* Initiate Handshake process with peer ldc endpoint */
3643 	vgen_handshake(vh_nextphase(ldcp));
3644 
3645 	DBG1(vgenp, ldcp, "exit\n");
3646 }
3647 
3648 /*
3649  * LDC channel is Reset, terminate connection with peer and try to
3650  * bring the channel up again.
3651  */
3652 static void
3653 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3654 {
3655 	ldc_status_t istatus;
3656 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3657 	int	rv;
3658 
3659 	DBG1(vgenp, ldcp, "enter\n");
3660 
3661 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3662 
3663 	if ((ldcp->portp != vgenp->vsw_portp) &&
3664 	    (vgenp->vsw_portp != NULL)) {
3665 		/*
3666 		 * As the channel is down, use the switch port until
3667 		 * the channel becomes ready to be used.
3668 		 */
3669 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3670 	}
3671 
3672 	if (vgenp->vsw_portp == ldcp->portp) {
3673 		vio_net_report_err_t rep_err =
3674 		    ldcp->portp->vcb.vio_net_report_err;
3675 
3676 		/* Post a reset message */
3677 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3678 	}
3679 
3680 	if (ldcp->hphase != VH_PHASE0) {
3681 		vgen_handshake_reset(ldcp);
3682 	}
3683 
3684 	/* try to bring the channel up */
3685 	rv = ldc_up(ldcp->ldc_handle);
3686 	if (rv != 0) {
3687 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3688 	}
3689 
3690 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3691 		DWARN(vgenp, ldcp, "ldc_status err\n");
3692 	} else {
3693 		ldcp->ldc_status = istatus;
3694 	}
3695 
3696 	/* if channel is already UP - restart handshake */
3697 	if (ldcp->ldc_status == LDC_UP) {
3698 		vgen_handle_evt_up(ldcp);
3699 	}
3700 
3701 	DBG1(vgenp, ldcp, "exit\n");
3702 }
3703 
3704 /* Interrupt handler for the channel */
3705 static uint_t
3706 vgen_ldc_cb(uint64_t event, caddr_t arg)
3707 {
3708 	_NOTE(ARGUNUSED(event))
3709 	vgen_ldc_t	*ldcp;
3710 	vgen_t		*vgenp;
3711 	ldc_status_t 	istatus;
3712 	vgen_stats_t	*statsp;
3713 
3714 	ldcp = (vgen_ldc_t *)arg;
3715 	vgenp = LDC_TO_VGEN(ldcp);
3716 	statsp = &ldcp->stats;
3717 
3718 	DBG1(vgenp, ldcp, "enter\n");
3719 
3720 	mutex_enter(&ldcp->cblock);
3721 	statsp->callbacks++;
3722 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3723 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3724 		    ldcp->ldc_status);
3725 		mutex_exit(&ldcp->cblock);
3726 		return (LDC_SUCCESS);
3727 	}
3728 
3729 	/*
3730 	 * NOTE: not using switch() as event could be triggered by
3731 	 * a state change and a read request. Also the ordering	of the
3732 	 * check for the event types is deliberate.
3733 	 */
3734 	if (event & LDC_EVT_UP) {
3735 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3736 			DWARN(vgenp, ldcp, "ldc_status err\n");
3737 			/* status couldn't be determined */
3738 			mutex_exit(&ldcp->cblock);
3739 			return (LDC_FAILURE);
3740 		}
3741 		ldcp->ldc_status = istatus;
3742 		if (ldcp->ldc_status != LDC_UP) {
3743 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3744 			    " but ldc status is not UP(0x%x)\n",
3745 			    ldcp->ldc_status);
3746 			/* spurious interrupt, return success */
3747 			mutex_exit(&ldcp->cblock);
3748 			return (LDC_SUCCESS);
3749 		}
3750 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3751 		    event, ldcp->ldc_status);
3752 
3753 		vgen_handle_evt_up(ldcp);
3754 
3755 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3756 	}
3757 
3758 	/* Handle RESET/DOWN before READ event */
3759 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3760 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3761 			DWARN(vgenp, ldcp, "ldc_status error\n");
3762 			/* status couldn't be determined */
3763 			mutex_exit(&ldcp->cblock);
3764 			return (LDC_FAILURE);
3765 		}
3766 		ldcp->ldc_status = istatus;
3767 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3768 		    event, ldcp->ldc_status);
3769 
3770 		vgen_handle_evt_reset(ldcp);
3771 
3772 		/*
3773 		 * As the channel is down/reset, ignore READ event
3774 		 * but print a debug warning message.
3775 		 */
3776 		if (event & LDC_EVT_READ) {
3777 			DWARN(vgenp, ldcp,
3778 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3779 			event &= ~LDC_EVT_READ;
3780 		}
3781 	}
3782 
3783 	if (event & LDC_EVT_READ) {
3784 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3785 		    event, ldcp->ldc_status);
3786 
3787 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3788 
3789 		if (ldcp->rcv_thread != NULL) {
3790 			/*
3791 			 * If the receive thread is enabled, then
3792 			 * wakeup the receive thread to process the
3793 			 * LDC messages.
3794 			 */
3795 			mutex_exit(&ldcp->cblock);
3796 			mutex_enter(&ldcp->rcv_thr_lock);
3797 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3798 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3799 				cv_signal(&ldcp->rcv_thr_cv);
3800 			}
3801 			mutex_exit(&ldcp->rcv_thr_lock);
3802 			mutex_enter(&ldcp->cblock);
3803 		} else  {
3804 			vgen_handle_evt_read(ldcp);
3805 		}
3806 	}
3807 	mutex_exit(&ldcp->cblock);
3808 
3809 	if (ldcp->cancel_htid) {
3810 		/*
3811 		 * Cancel handshake timer.
3812 		 * untimeout(9F) will not return until the pending callback is
3813 		 * cancelled or has run. No problems will result from calling
3814 		 * untimeout if the handler has already completed.
3815 		 * If the timeout handler did run, then it would just
3816 		 * return as cancel_htid is set.
3817 		 */
3818 		(void) untimeout(ldcp->cancel_htid);
3819 		ldcp->cancel_htid = 0;
3820 	}
3821 	DBG1(vgenp, ldcp, "exit\n");
3822 
3823 	return (LDC_SUCCESS);
3824 }
3825 
3826 static void
3827 vgen_handle_evt_read(vgen_ldc_t *ldcp)
3828 {
3829 	int		rv;
3830 	uint64_t	*ldcmsg;
3831 	size_t		msglen;
3832 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3833 	vio_msg_tag_t	*tagp;
3834 	ldc_status_t 	istatus;
3835 	boolean_t 	has_data;
3836 
3837 	DBG1(vgenp, ldcp, "enter\n");
3838 
3839 	ldcmsg = ldcp->ldcmsg;
3840 	/*
3841 	 * If the receive thread is enabled, then the cblock
3842 	 * need to be acquired here. If not, the vgen_ldc_cb()
3843 	 * calls this function with cblock held already.
3844 	 */
3845 	if (ldcp->rcv_thread != NULL) {
3846 		mutex_enter(&ldcp->cblock);
3847 	} else {
3848 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3849 	}
3850 
3851 vgen_evt_read:
3852 	do {
3853 		msglen = ldcp->msglen;
3854 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3855 
3856 		if (rv != 0) {
3857 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
3858 			    rv, msglen);
3859 			if (rv == ECONNRESET)
3860 				goto vgen_evtread_error;
3861 			break;
3862 		}
3863 		if (msglen == 0) {
3864 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3865 			break;
3866 		}
3867 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3868 
3869 		tagp = (vio_msg_tag_t *)ldcmsg;
3870 
3871 		if (ldcp->peer_sid) {
3872 			/*
3873 			 * check sid only after we have received peer's sid
3874 			 * in the version negotiate msg.
3875 			 */
3876 #ifdef DEBUG
3877 			if (vgen_hdbg & HDBG_BAD_SID) {
3878 				/* simulate bad sid condition */
3879 				tagp->vio_sid = 0;
3880 				vgen_hdbg &= ~(HDBG_BAD_SID);
3881 			}
3882 #endif
3883 			rv = vgen_check_sid(ldcp, tagp);
3884 			if (rv != VGEN_SUCCESS) {
3885 				/*
3886 				 * If sid mismatch is detected,
3887 				 * reset the channel.
3888 				 */
3889 				ldcp->need_ldc_reset = B_TRUE;
3890 				goto vgen_evtread_error;
3891 			}
3892 		}
3893 
3894 		switch (tagp->vio_msgtype) {
3895 		case VIO_TYPE_CTRL:
3896 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3897 			break;
3898 
3899 		case VIO_TYPE_DATA:
3900 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3901 			break;
3902 
3903 		case VIO_TYPE_ERR:
3904 			vgen_handle_errmsg(ldcp, tagp);
3905 			break;
3906 
3907 		default:
3908 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3909 			    tagp->vio_msgtype);
3910 			break;
3911 		}
3912 
3913 		/*
3914 		 * If an error is encountered, stop processing and
3915 		 * handle the error.
3916 		 */
3917 		if (rv != 0) {
3918 			goto vgen_evtread_error;
3919 		}
3920 
3921 	} while (msglen);
3922 
3923 	/* check once more before exiting */
3924 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3925 	if ((rv == 0) && (has_data == B_TRUE)) {
3926 		DTRACE_PROBE(vgen_chkq);
3927 		goto vgen_evt_read;
3928 	}
3929 
3930 vgen_evtread_error:
3931 	if (rv == ECONNRESET) {
3932 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3933 			DWARN(vgenp, ldcp, "ldc_status err\n");
3934 		} else {
3935 			ldcp->ldc_status = istatus;
3936 		}
3937 		vgen_handle_evt_reset(ldcp);
3938 	} else if (rv) {
3939 		vgen_handshake_retry(ldcp);
3940 	}
3941 
3942 	/*
3943 	 * If the receive thread is not enabled, then cancel the
3944 	 * handshake timeout here.
3945 	 */
3946 	if (ldcp->rcv_thread != NULL) {
3947 		mutex_exit(&ldcp->cblock);
3948 		if (ldcp->cancel_htid) {
3949 			/*
3950 			 * Cancel handshake timer. untimeout(9F) will
3951 			 * not return until the pending callback is cancelled
3952 			 * or has run. No problems will result from calling
3953 			 * untimeout if the handler has already completed.
3954 			 * If the timeout handler did run, then it would just
3955 			 * return as cancel_htid is set.
3956 			 */
3957 			(void) untimeout(ldcp->cancel_htid);
3958 			ldcp->cancel_htid = 0;
3959 		}
3960 	}
3961 
3962 	DBG1(vgenp, ldcp, "exit\n");
3963 }
3964 
3965 /* vgen handshake functions */
3966 
3967 /* change the hphase for the channel to the next phase */
3968 static vgen_ldc_t *
3969 vh_nextphase(vgen_ldc_t *ldcp)
3970 {
3971 	if (ldcp->hphase == VH_PHASE3) {
3972 		ldcp->hphase = VH_DONE;
3973 	} else {
3974 		ldcp->hphase++;
3975 	}
3976 	return (ldcp);
3977 }
3978 
3979 /*
3980  * wrapper routine to send the given message over ldc using ldc_write().
3981  */
3982 static int
3983 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
3984     boolean_t caller_holds_lock)
3985 {
3986 	int			rv;
3987 	size_t			len;
3988 	uint32_t		retries = 0;
3989 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3990 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
3991 	vio_dring_msg_t		*dmsg;
3992 	vio_raw_data_msg_t	*rmsg;
3993 	boolean_t		data_msg = B_FALSE;
3994 
3995 	len = msglen;
3996 	if ((len == 0) || (msg == NULL))
3997 		return (VGEN_FAILURE);
3998 
3999 	if (!caller_holds_lock) {
4000 		mutex_enter(&ldcp->wrlock);
4001 	}
4002 
4003 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4004 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4005 			dmsg = (vio_dring_msg_t *)tagp;
4006 			dmsg->seq_num = ldcp->next_txseq;
4007 			data_msg = B_TRUE;
4008 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4009 			rmsg = (vio_raw_data_msg_t *)tagp;
4010 			rmsg->seq_num = ldcp->next_txseq;
4011 			data_msg = B_TRUE;
4012 		}
4013 	}
4014 
4015 	do {
4016 		len = msglen;
4017 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4018 		if (retries++ >= vgen_ldcwr_retries)
4019 			break;
4020 	} while (rv == EWOULDBLOCK);
4021 
4022 	if (rv == 0 && data_msg == B_TRUE) {
4023 		ldcp->next_txseq++;
4024 	}
4025 
4026 	if (!caller_holds_lock) {
4027 		mutex_exit(&ldcp->wrlock);
4028 	}
4029 
4030 	if (rv != 0) {
4031 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4032 		    rv, msglen);
4033 		return (rv);
4034 	}
4035 
4036 	if (len != msglen) {
4037 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4038 		    rv, msglen);
4039 		return (VGEN_FAILURE);
4040 	}
4041 
4042 	return (VGEN_SUCCESS);
4043 }
4044 
4045 /* send version negotiate message to the peer over ldc */
4046 static int
4047 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4048 {
4049 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4050 	vio_ver_msg_t	vermsg;
4051 	vio_msg_tag_t	*tagp = &vermsg.tag;
4052 	int		rv;
4053 
4054 	bzero(&vermsg, sizeof (vermsg));
4055 
4056 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4057 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4058 	tagp->vio_subtype_env = VIO_VER_INFO;
4059 	tagp->vio_sid = ldcp->local_sid;
4060 
4061 	/* get version msg payload from ldcp->local */
4062 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4063 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4064 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4065 
4066 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4067 	if (rv != VGEN_SUCCESS) {
4068 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4069 		return (rv);
4070 	}
4071 
4072 	ldcp->hstate |= VER_INFO_SENT;
4073 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4074 	    vermsg.ver_major, vermsg.ver_minor);
4075 
4076 	return (VGEN_SUCCESS);
4077 }
4078 
4079 /* send attr info message to the peer over ldc */
4080 static int
4081 vgen_send_attr_info(vgen_ldc_t *ldcp)
4082 {
4083 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4084 	vnet_attr_msg_t	attrmsg;
4085 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4086 	int		rv;
4087 
4088 	bzero(&attrmsg, sizeof (attrmsg));
4089 
4090 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4091 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4092 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4093 	tagp->vio_sid = ldcp->local_sid;
4094 
4095 	/* get attr msg payload from ldcp->local */
4096 	attrmsg.mtu = ldcp->local_hparams.mtu;
4097 	attrmsg.addr = ldcp->local_hparams.addr;
4098 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4099 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4100 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4101 
4102 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4103 	if (rv != VGEN_SUCCESS) {
4104 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4105 		return (rv);
4106 	}
4107 
4108 	ldcp->hstate |= ATTR_INFO_SENT;
4109 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4110 
4111 	return (VGEN_SUCCESS);
4112 }
4113 
4114 /* send descriptor ring register message to the peer over ldc */
4115 static int
4116 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4117 {
4118 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4119 	vio_dring_reg_msg_t	msg;
4120 	vio_msg_tag_t		*tagp = &msg.tag;
4121 	int		rv;
4122 
4123 	bzero(&msg, sizeof (msg));
4124 
4125 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4126 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4127 	tagp->vio_subtype_env = VIO_DRING_REG;
4128 	tagp->vio_sid = ldcp->local_sid;
4129 
4130 	/* get dring info msg payload from ldcp->local */
4131 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4132 	    sizeof (ldc_mem_cookie_t));
4133 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4134 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4135 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4136 
4137 	/*
4138 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4139 	 * value and sends it in the ack, which is saved in
4140 	 * vgen_handle_dring_reg().
4141 	 */
4142 	msg.dring_ident = 0;
4143 
4144 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4145 	if (rv != VGEN_SUCCESS) {
4146 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4147 		return (rv);
4148 	}
4149 
4150 	ldcp->hstate |= DRING_INFO_SENT;
4151 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4152 
4153 	return (VGEN_SUCCESS);
4154 }
4155 
4156 static int
4157 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4158 {
4159 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4160 	vio_rdx_msg_t	rdxmsg;
4161 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4162 	int		rv;
4163 
4164 	bzero(&rdxmsg, sizeof (rdxmsg));
4165 
4166 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4167 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4168 	tagp->vio_subtype_env = VIO_RDX;
4169 	tagp->vio_sid = ldcp->local_sid;
4170 
4171 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4172 	if (rv != VGEN_SUCCESS) {
4173 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4174 		return (rv);
4175 	}
4176 
4177 	ldcp->hstate |= RDX_INFO_SENT;
4178 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4179 
4180 	return (VGEN_SUCCESS);
4181 }
4182 
4183 /* send descriptor ring data message to the peer over ldc */
4184 static int
4185 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4186 {
4187 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4188 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4189 	vio_msg_tag_t	*tagp = &msgp->tag;
4190 	vgen_stats_t	*statsp = &ldcp->stats;
4191 	int		rv;
4192 
4193 	bzero(msgp, sizeof (*msgp));
4194 
4195 	tagp->vio_msgtype = VIO_TYPE_DATA;
4196 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4197 	tagp->vio_subtype_env = VIO_DRING_DATA;
4198 	tagp->vio_sid = ldcp->local_sid;
4199 
4200 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4201 	msgp->start_idx = start;
4202 	msgp->end_idx = end;
4203 
4204 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4205 	if (rv != VGEN_SUCCESS) {
4206 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4207 		return (rv);
4208 	}
4209 
4210 	statsp->dring_data_msgs++;
4211 
4212 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4213 
4214 	return (VGEN_SUCCESS);
4215 }
4216 
4217 /* send multicast addr info message to vsw */
4218 static int
4219 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4220 {
4221 	vnet_mcast_msg_t	mcastmsg;
4222 	vnet_mcast_msg_t	*msgp;
4223 	vio_msg_tag_t		*tagp;
4224 	vgen_t			*vgenp;
4225 	struct ether_addr	*mca;
4226 	int			rv;
4227 	int			i;
4228 	uint32_t		size;
4229 	uint32_t		mccount;
4230 	uint32_t		n;
4231 
4232 	msgp = &mcastmsg;
4233 	tagp = &msgp->tag;
4234 	vgenp = LDC_TO_VGEN(ldcp);
4235 
4236 	mccount = vgenp->mccount;
4237 	i = 0;
4238 
4239 	do {
4240 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4241 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4242 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4243 		tagp->vio_sid = ldcp->local_sid;
4244 
4245 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4246 		size = n * sizeof (struct ether_addr);
4247 
4248 		mca = &(vgenp->mctab[i]);
4249 		bcopy(mca, (msgp->mca), size);
4250 		msgp->set = B_TRUE;
4251 		msgp->count = n;
4252 
4253 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4254 		    B_FALSE);
4255 		if (rv != VGEN_SUCCESS) {
4256 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4257 			return (rv);
4258 		}
4259 
4260 		mccount -= n;
4261 		i += n;
4262 
4263 	} while (mccount);
4264 
4265 	return (VGEN_SUCCESS);
4266 }
4267 
4268 /* Initiate Phase 2 of handshake */
4269 static int
4270 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4271 {
4272 	int rv;
4273 	uint32_t ncookies = 0;
4274 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4275 
4276 #ifdef DEBUG
4277 	if (vgen_hdbg & HDBG_OUT_STATE) {
4278 		/* simulate out of state condition */
4279 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4280 		rv = vgen_send_rdx_info(ldcp);
4281 		return (rv);
4282 	}
4283 	if (vgen_hdbg & HDBG_TIMEOUT) {
4284 		/* simulate timeout condition */
4285 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4286 		return (VGEN_SUCCESS);
4287 	}
4288 #endif
4289 	rv = vgen_send_attr_info(ldcp);
4290 	if (rv != VGEN_SUCCESS) {
4291 		return (rv);
4292 	}
4293 
4294 	/* Bind descriptor ring to the channel */
4295 	if (ldcp->num_txdcookies == 0) {
4296 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4297 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4298 		    &ldcp->tx_dcookie, &ncookies);
4299 		if (rv != 0) {
4300 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4301 			    "rv(%x)\n", rv);
4302 			return (rv);
4303 		}
4304 		ASSERT(ncookies == 1);
4305 		ldcp->num_txdcookies = ncookies;
4306 	}
4307 
4308 	/* update local dring_info params */
4309 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4310 	    sizeof (ldc_mem_cookie_t));
4311 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4312 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4313 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4314 
4315 	rv = vgen_send_dring_reg(ldcp);
4316 	if (rv != VGEN_SUCCESS) {
4317 		return (rv);
4318 	}
4319 
4320 	return (VGEN_SUCCESS);
4321 }
4322 
4323 /*
4324  * Set vnet-protocol-version dependent functions based on version.
4325  */
4326 static void
4327 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4328 {
4329 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4330 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4331 
4332 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4333 		/*
4334 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4335 		 * Support), set the mtu in our attributes to max_frame_size.
4336 		 */
4337 		lp->mtu = vgenp->max_frame_size;
4338 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4339 		/*
4340 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4341 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4342 		 */
4343 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4344 	} else {
4345 		vgen_port_t	*portp = ldcp->portp;
4346 		vnet_t		*vnetp = vgenp->vnetp;
4347 		/*
4348 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4349 		 * We can negotiate that size with those peers provided the
4350 		 * following conditions are true:
4351 		 * - Only pvid is defined for our peer and there are no vids.
4352 		 * - pvids are equal.
4353 		 * If the above conditions are true, then we can send/recv only
4354 		 * untagged frames of max size ETHERMAX.
4355 		 */
4356 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4357 			lp->mtu = ETHERMAX;
4358 		}
4359 	}
4360 
4361 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4362 		/* Versions >= 1.2 */
4363 
4364 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4365 			/*
4366 			 * enable priority routines and pkt mode only if
4367 			 * at least one pri-eth-type is specified in MD.
4368 			 */
4369 
4370 			ldcp->tx = vgen_ldcsend;
4371 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4372 
4373 			/* set xfer mode for vgen_send_attr_info() */
4374 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4375 
4376 		} else {
4377 			/* no priority eth types defined in MD */
4378 
4379 			ldcp->tx = vgen_ldcsend_dring;
4380 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4381 
4382 			/* set xfer mode for vgen_send_attr_info() */
4383 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4384 
4385 		}
4386 	} else {
4387 		/* Versions prior to 1.2  */
4388 
4389 		vgen_reset_vnet_proto_ops(ldcp);
4390 	}
4391 }
4392 
4393 /*
4394  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4395  */
4396 static void
4397 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4398 {
4399 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4400 
4401 	ldcp->tx = vgen_ldcsend_dring;
4402 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4403 
4404 	/* set xfer mode for vgen_send_attr_info() */
4405 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4406 }
4407 
4408 static void
4409 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4410 {
4411 	vgen_ldclist_t	*ldclp;
4412 	vgen_ldc_t	*ldcp;
4413 	vgen_t		*vgenp = portp->vgenp;
4414 	vnet_t		*vnetp = vgenp->vnetp;
4415 
4416 	ldclp = &portp->ldclist;
4417 
4418 	READ_ENTER(&ldclp->rwlock);
4419 
4420 	/*
4421 	 * NOTE: for now, we will assume we have a single channel.
4422 	 */
4423 	if (ldclp->headp == NULL) {
4424 		RW_EXIT(&ldclp->rwlock);
4425 		return;
4426 	}
4427 	ldcp = ldclp->headp;
4428 
4429 	mutex_enter(&ldcp->cblock);
4430 
4431 	/*
4432 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4433 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4434 	 */
4435 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4436 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4437 		ldcp->need_ldc_reset = B_TRUE;
4438 		vgen_handshake_retry(ldcp);
4439 	}
4440 
4441 	mutex_exit(&ldcp->cblock);
4442 
4443 	RW_EXIT(&ldclp->rwlock);
4444 }
4445 
4446 static void
4447 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4448 {
4449 	vgen_port_t	*portp;
4450 	vgen_portlist_t	*plistp;
4451 
4452 	plistp = &(vgenp->vgenports);
4453 	READ_ENTER(&plistp->rwlock);
4454 
4455 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4456 
4457 		vgen_vlan_unaware_port_reset(portp);
4458 
4459 	}
4460 
4461 	RW_EXIT(&plistp->rwlock);
4462 }
4463 
4464 /*
4465  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4466  * This can happen after a channel comes up (status: LDC_UP) or
4467  * when handshake gets terminated due to various conditions.
4468  */
4469 static void
4470 vgen_reset_hphase(vgen_ldc_t *ldcp)
4471 {
4472 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4473 	ldc_status_t istatus;
4474 	int rv;
4475 
4476 	DBG1(vgenp, ldcp, "enter\n");
4477 	/* reset hstate and hphase */
4478 	ldcp->hstate = 0;
4479 	ldcp->hphase = VH_PHASE0;
4480 
4481 	vgen_reset_vnet_proto_ops(ldcp);
4482 
4483 	/*
4484 	 * Save the id of pending handshake timer in cancel_htid.
4485 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4486 	 * be cancelled after releasing cblock.
4487 	 */
4488 	if (ldcp->htid) {
4489 		ldcp->cancel_htid = ldcp->htid;
4490 		ldcp->htid = 0;
4491 	}
4492 
4493 	if (ldcp->local_hparams.dring_ready) {
4494 		ldcp->local_hparams.dring_ready = B_FALSE;
4495 	}
4496 
4497 	/* Unbind tx descriptor ring from the channel */
4498 	if (ldcp->num_txdcookies) {
4499 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4500 		if (rv != 0) {
4501 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4502 		}
4503 		ldcp->num_txdcookies = 0;
4504 	}
4505 
4506 	if (ldcp->peer_hparams.dring_ready) {
4507 		ldcp->peer_hparams.dring_ready = B_FALSE;
4508 		/* Unmap peer's dring */
4509 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4510 		vgen_clobber_rxds(ldcp);
4511 	}
4512 
4513 	vgen_clobber_tbufs(ldcp);
4514 
4515 	/*
4516 	 * clear local handshake params and initialize.
4517 	 */
4518 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4519 
4520 	/* set version to the highest version supported */
4521 	ldcp->local_hparams.ver_major =
4522 	    ldcp->vgen_versions[0].ver_major;
4523 	ldcp->local_hparams.ver_minor =
4524 	    ldcp->vgen_versions[0].ver_minor;
4525 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4526 
4527 	/* set attr_info params */
4528 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4529 	ldcp->local_hparams.addr =
4530 	    vnet_macaddr_strtoul(vgenp->macaddr);
4531 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4532 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4533 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4534 
4535 	/*
4536 	 * Note: dring is created, but not bound yet.
4537 	 * local dring_info params will be updated when we bind the dring in
4538 	 * vgen_handshake_phase2().
4539 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4540 	 * value and sends it in the ack, which is saved in
4541 	 * vgen_handle_dring_reg().
4542 	 */
4543 	ldcp->local_hparams.dring_ident = 0;
4544 
4545 	/* clear peer_hparams */
4546 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4547 
4548 	/* reset the channel if required */
4549 	if (ldcp->need_ldc_reset) {
4550 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4551 		ldcp->need_ldc_reset = B_FALSE;
4552 		(void) ldc_down(ldcp->ldc_handle);
4553 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4554 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4555 		ldcp->ldc_status = istatus;
4556 
4557 		/* clear sids */
4558 		ldcp->local_sid = 0;
4559 		ldcp->peer_sid = 0;
4560 
4561 		/* try to bring the channel up */
4562 		rv = ldc_up(ldcp->ldc_handle);
4563 		if (rv != 0) {
4564 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4565 		}
4566 
4567 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4568 			DWARN(vgenp, ldcp, "ldc_status err\n");
4569 		} else {
4570 			ldcp->ldc_status = istatus;
4571 		}
4572 	}
4573 }
4574 
4575 /* wrapper function for vgen_reset_hphase */
4576 static void
4577 vgen_handshake_reset(vgen_ldc_t *ldcp)
4578 {
4579 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4580 	mutex_enter(&ldcp->rxlock);
4581 	mutex_enter(&ldcp->wrlock);
4582 	mutex_enter(&ldcp->txlock);
4583 	mutex_enter(&ldcp->tclock);
4584 
4585 	vgen_reset_hphase(ldcp);
4586 
4587 	mutex_exit(&ldcp->tclock);
4588 	mutex_exit(&ldcp->txlock);
4589 	mutex_exit(&ldcp->wrlock);
4590 	mutex_exit(&ldcp->rxlock);
4591 }
4592 
4593 /*
4594  * Initiate handshake with the peer by sending various messages
4595  * based on the handshake-phase that the channel is currently in.
4596  */
4597 static void
4598 vgen_handshake(vgen_ldc_t *ldcp)
4599 {
4600 	uint32_t hphase = ldcp->hphase;
4601 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4602 	ldc_status_t	istatus;
4603 	int	rv = 0;
4604 
4605 	switch (hphase) {
4606 
4607 	case VH_PHASE1:
4608 
4609 		/*
4610 		 * start timer, for entire handshake process, turn this timer
4611 		 * off if all phases of handshake complete successfully and
4612 		 * hphase goes to VH_DONE(below) or
4613 		 * vgen_reset_hphase() gets called or
4614 		 * channel is reset due to errors or
4615 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4616 		 */
4617 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4618 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4619 
4620 		/* Phase 1 involves negotiating the version */
4621 		rv = vgen_send_version_negotiate(ldcp);
4622 		break;
4623 
4624 	case VH_PHASE2:
4625 		rv = vgen_handshake_phase2(ldcp);
4626 		break;
4627 
4628 	case VH_PHASE3:
4629 		rv = vgen_send_rdx_info(ldcp);
4630 		break;
4631 
4632 	case VH_DONE:
4633 		/*
4634 		 * Save the id of pending handshake timer in cancel_htid.
4635 		 * This will be checked in vgen_ldc_cb() and the handshake
4636 		 * timer will be cancelled after releasing cblock.
4637 		 */
4638 		if (ldcp->htid) {
4639 			ldcp->cancel_htid = ldcp->htid;
4640 			ldcp->htid = 0;
4641 		}
4642 		ldcp->hretries = 0;
4643 		DBG1(vgenp, ldcp, "Handshake Done\n");
4644 
4645 		if (ldcp->portp == vgenp->vsw_portp) {
4646 			/*
4647 			 * If this channel(port) is connected to vsw,
4648 			 * need to sync multicast table with vsw.
4649 			 */
4650 			mutex_exit(&ldcp->cblock);
4651 
4652 			mutex_enter(&vgenp->lock);
4653 			rv = vgen_send_mcast_info(ldcp);
4654 			mutex_exit(&vgenp->lock);
4655 
4656 			mutex_enter(&ldcp->cblock);
4657 			if (rv != VGEN_SUCCESS)
4658 				break;
4659 		}
4660 
4661 		/*
4662 		 * Check if mac layer should be notified to restart
4663 		 * transmissions. This can happen if the channel got
4664 		 * reset and vgen_clobber_tbufs() is called, while
4665 		 * need_resched is set.
4666 		 */
4667 		mutex_enter(&ldcp->tclock);
4668 		if (ldcp->need_resched) {
4669 			vio_net_tx_update_t vtx_update =
4670 			    ldcp->portp->vcb.vio_net_tx_update;
4671 
4672 			ldcp->need_resched = B_FALSE;
4673 			vtx_update(ldcp->portp->vhp);
4674 		}
4675 		mutex_exit(&ldcp->tclock);
4676 
4677 		break;
4678 
4679 	default:
4680 		break;
4681 	}
4682 
4683 	if (rv == ECONNRESET) {
4684 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4685 			DWARN(vgenp, ldcp, "ldc_status err\n");
4686 		} else {
4687 			ldcp->ldc_status = istatus;
4688 		}
4689 		vgen_handle_evt_reset(ldcp);
4690 	} else if (rv) {
4691 		vgen_handshake_reset(ldcp);
4692 	}
4693 }
4694 
4695 /*
4696  * Check if the current handshake phase has completed successfully and
4697  * return the status.
4698  */
4699 static int
4700 vgen_handshake_done(vgen_ldc_t *ldcp)
4701 {
4702 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4703 	uint32_t	hphase = ldcp->hphase;
4704 	int 		status = 0;
4705 
4706 	switch (hphase) {
4707 
4708 	case VH_PHASE1:
4709 		/*
4710 		 * Phase1 is done, if version negotiation
4711 		 * completed successfully.
4712 		 */
4713 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4714 		    VER_NEGOTIATED);
4715 		break;
4716 
4717 	case VH_PHASE2:
4718 		/*
4719 		 * Phase 2 is done, if attr info and dring info
4720 		 * have been exchanged successfully.
4721 		 */
4722 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4723 		    ATTR_INFO_EXCHANGED) &&
4724 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4725 		    DRING_INFO_EXCHANGED));
4726 		break;
4727 
4728 	case VH_PHASE3:
4729 		/* Phase 3 is done, if rdx msg has been exchanged */
4730 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4731 		    RDX_EXCHANGED);
4732 		break;
4733 
4734 	default:
4735 		break;
4736 	}
4737 
4738 	if (status == 0) {
4739 		return (VGEN_FAILURE);
4740 	}
4741 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4742 	return (VGEN_SUCCESS);
4743 }
4744 
4745 /* retry handshake on failure */
4746 static void
4747 vgen_handshake_retry(vgen_ldc_t *ldcp)
4748 {
4749 	/* reset handshake phase */
4750 	vgen_handshake_reset(ldcp);
4751 
4752 	/* handshake retry is specified and the channel is UP */
4753 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
4754 		if (ldcp->hretries++ < vgen_max_hretries) {
4755 			ldcp->local_sid = ddi_get_lbolt();
4756 			vgen_handshake(vh_nextphase(ldcp));
4757 		}
4758 	}
4759 }
4760 
4761 /*
4762  * Handle a version info msg from the peer or an ACK/NACK from the peer
4763  * to a version info msg that we sent.
4764  */
4765 static int
4766 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4767 {
4768 	vgen_t		*vgenp;
4769 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4770 	int		ack = 0;
4771 	int		failed = 0;
4772 	int		idx;
4773 	vgen_ver_t	*versions = ldcp->vgen_versions;
4774 	int		rv = 0;
4775 
4776 	vgenp = LDC_TO_VGEN(ldcp);
4777 	DBG1(vgenp, ldcp, "enter\n");
4778 	switch (tagp->vio_subtype) {
4779 	case VIO_SUBTYPE_INFO:
4780 
4781 		/*  Cache sid of peer if this is the first time */
4782 		if (ldcp->peer_sid == 0) {
4783 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4784 			    tagp->vio_sid);
4785 			ldcp->peer_sid = tagp->vio_sid;
4786 		}
4787 
4788 		if (ldcp->hphase != VH_PHASE1) {
4789 			/*
4790 			 * If we are not already in VH_PHASE1, reset to
4791 			 * pre-handshake state, and initiate handshake
4792 			 * to the peer too.
4793 			 */
4794 			vgen_handshake_reset(ldcp);
4795 			vgen_handshake(vh_nextphase(ldcp));
4796 		}
4797 		ldcp->hstate |= VER_INFO_RCVD;
4798 
4799 		/* save peer's requested values */
4800 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4801 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4802 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4803 
4804 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4805 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4806 			/* unsupported dev_class, send NACK */
4807 
4808 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4809 
4810 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4811 			tagp->vio_sid = ldcp->local_sid;
4812 			/* send reply msg back to peer */
4813 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4814 			    sizeof (*vermsg), B_FALSE);
4815 			if (rv != VGEN_SUCCESS) {
4816 				return (rv);
4817 			}
4818 			return (VGEN_FAILURE);
4819 		}
4820 
4821 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4822 		    vermsg->ver_major,  vermsg->ver_minor);
4823 
4824 		idx = 0;
4825 
4826 		for (;;) {
4827 
4828 			if (vermsg->ver_major > versions[idx].ver_major) {
4829 
4830 				/* nack with next lower version */
4831 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4832 				vermsg->ver_major = versions[idx].ver_major;
4833 				vermsg->ver_minor = versions[idx].ver_minor;
4834 				break;
4835 			}
4836 
4837 			if (vermsg->ver_major == versions[idx].ver_major) {
4838 
4839 				/* major version match - ACK version */
4840 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4841 				ack = 1;
4842 
4843 				/*
4844 				 * lower minor version to the one this endpt
4845 				 * supports, if necessary
4846 				 */
4847 				if (vermsg->ver_minor >
4848 				    versions[idx].ver_minor) {
4849 					vermsg->ver_minor =
4850 					    versions[idx].ver_minor;
4851 					ldcp->peer_hparams.ver_minor =
4852 					    versions[idx].ver_minor;
4853 				}
4854 				break;
4855 			}
4856 
4857 			idx++;
4858 
4859 			if (idx == VGEN_NUM_VER) {
4860 
4861 				/* no version match - send NACK */
4862 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4863 				vermsg->ver_major = 0;
4864 				vermsg->ver_minor = 0;
4865 				failed = 1;
4866 				break;
4867 			}
4868 
4869 		}
4870 
4871 		tagp->vio_sid = ldcp->local_sid;
4872 
4873 		/* send reply msg back to peer */
4874 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4875 		    B_FALSE);
4876 		if (rv != VGEN_SUCCESS) {
4877 			return (rv);
4878 		}
4879 
4880 		if (ack) {
4881 			ldcp->hstate |= VER_ACK_SENT;
4882 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4883 			    vermsg->ver_major, vermsg->ver_minor);
4884 		}
4885 		if (failed) {
4886 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4887 			return (VGEN_FAILURE);
4888 		}
4889 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4890 
4891 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4892 
4893 			/* local and peer versions match? */
4894 			ASSERT((ldcp->local_hparams.ver_major ==
4895 			    ldcp->peer_hparams.ver_major) &&
4896 			    (ldcp->local_hparams.ver_minor ==
4897 			    ldcp->peer_hparams.ver_minor));
4898 
4899 			vgen_set_vnet_proto_ops(ldcp);
4900 
4901 			/* move to the next phase */
4902 			vgen_handshake(vh_nextphase(ldcp));
4903 		}
4904 
4905 		break;
4906 
4907 	case VIO_SUBTYPE_ACK:
4908 
4909 		if (ldcp->hphase != VH_PHASE1) {
4910 			/*  This should not happen. */
4911 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4912 			return (VGEN_FAILURE);
4913 		}
4914 
4915 		/* SUCCESS - we have agreed on a version */
4916 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4917 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4918 		ldcp->hstate |= VER_ACK_RCVD;
4919 
4920 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4921 		    vermsg->ver_major,  vermsg->ver_minor);
4922 
4923 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4924 
4925 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4926 
4927 			/* local and peer versions match? */
4928 			ASSERT((ldcp->local_hparams.ver_major ==
4929 			    ldcp->peer_hparams.ver_major) &&
4930 			    (ldcp->local_hparams.ver_minor ==
4931 			    ldcp->peer_hparams.ver_minor));
4932 
4933 			vgen_set_vnet_proto_ops(ldcp);
4934 
4935 			/* move to the next phase */
4936 			vgen_handshake(vh_nextphase(ldcp));
4937 		}
4938 		break;
4939 
4940 	case VIO_SUBTYPE_NACK:
4941 
4942 		if (ldcp->hphase != VH_PHASE1) {
4943 			/*  This should not happen.  */
4944 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4945 			"Phase(%u)\n", ldcp->hphase);
4946 			return (VGEN_FAILURE);
4947 		}
4948 
4949 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4950 		    vermsg->ver_major, vermsg->ver_minor);
4951 
4952 		/* check if version in NACK is zero */
4953 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4954 			/*
4955 			 * Version Negotiation has failed.
4956 			 */
4957 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4958 			return (VGEN_FAILURE);
4959 		}
4960 
4961 		idx = 0;
4962 
4963 		for (;;) {
4964 
4965 			if (vermsg->ver_major > versions[idx].ver_major) {
4966 				/* select next lower version */
4967 
4968 				ldcp->local_hparams.ver_major =
4969 				    versions[idx].ver_major;
4970 				ldcp->local_hparams.ver_minor =
4971 				    versions[idx].ver_minor;
4972 				break;
4973 			}
4974 
4975 			if (vermsg->ver_major == versions[idx].ver_major) {
4976 				/* major version match */
4977 
4978 				ldcp->local_hparams.ver_major =
4979 				    versions[idx].ver_major;
4980 
4981 				ldcp->local_hparams.ver_minor =
4982 				    versions[idx].ver_minor;
4983 				break;
4984 			}
4985 
4986 			idx++;
4987 
4988 			if (idx == VGEN_NUM_VER) {
4989 				/*
4990 				 * no version match.
4991 				 * Version Negotiation has failed.
4992 				 */
4993 				DWARN(vgenp, ldcp,
4994 				    "Version Negotiation Failed\n");
4995 				return (VGEN_FAILURE);
4996 			}
4997 
4998 		}
4999 
5000 		rv = vgen_send_version_negotiate(ldcp);
5001 		if (rv != VGEN_SUCCESS) {
5002 			return (rv);
5003 		}
5004 
5005 		break;
5006 	}
5007 
5008 	DBG1(vgenp, ldcp, "exit\n");
5009 	return (VGEN_SUCCESS);
5010 }
5011 
5012 /* Check if the attributes are supported */
5013 static int
5014 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5015 {
5016 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5017 
5018 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5019 	    (msg->ack_freq > 64) ||
5020 	    (msg->xfer_mode != lp->xfer_mode)) {
5021 		return (VGEN_FAILURE);
5022 	}
5023 
5024 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5025 		/* versions < 1.4, mtu must match */
5026 		if (msg->mtu != lp->mtu) {
5027 			return (VGEN_FAILURE);
5028 		}
5029 	} else {
5030 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5031 		if (msg->mtu < ETHERMAX) {
5032 			return (VGEN_FAILURE);
5033 		}
5034 	}
5035 
5036 	return (VGEN_SUCCESS);
5037 }
5038 
5039 /*
5040  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5041  * to an attr info msg that we sent.
5042  */
5043 static int
5044 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5045 {
5046 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5047 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5048 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5049 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5050 	int		ack = 1;
5051 	int		rv = 0;
5052 	uint32_t	mtu;
5053 
5054 	DBG1(vgenp, ldcp, "enter\n");
5055 	if (ldcp->hphase != VH_PHASE2) {
5056 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5057 		" Invalid Phase(%u)\n",
5058 		    tagp->vio_subtype, ldcp->hphase);
5059 		return (VGEN_FAILURE);
5060 	}
5061 	switch (tagp->vio_subtype) {
5062 	case VIO_SUBTYPE_INFO:
5063 
5064 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5065 		ldcp->hstate |= ATTR_INFO_RCVD;
5066 
5067 		/* save peer's values */
5068 		rp->mtu = msg->mtu;
5069 		rp->addr = msg->addr;
5070 		rp->addr_type = msg->addr_type;
5071 		rp->xfer_mode = msg->xfer_mode;
5072 		rp->ack_freq = msg->ack_freq;
5073 
5074 		rv = vgen_check_attr_info(ldcp, msg);
5075 		if (rv == VGEN_FAILURE) {
5076 			/* unsupported attr, send NACK */
5077 			ack = 0;
5078 		} else {
5079 
5080 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5081 
5082 				/*
5083 				 * Versions >= 1.4:
5084 				 * The mtu is negotiated down to the
5085 				 * minimum of our mtu and peer's mtu.
5086 				 */
5087 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5088 
5089 				/*
5090 				 * If we have received an ack for the attr info
5091 				 * that we sent, then check if the mtu computed
5092 				 * above matches the mtu that the peer had ack'd
5093 				 * (saved in local hparams). If they don't
5094 				 * match, we fail the handshake.
5095 				 */
5096 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5097 					if (mtu != lp->mtu) {
5098 						/* send NACK */
5099 						ack = 0;
5100 					}
5101 				} else {
5102 					/*
5103 					 * Save the mtu computed above in our
5104 					 * attr parameters, so it gets sent in
5105 					 * the attr info from us to the peer.
5106 					 */
5107 					lp->mtu = mtu;
5108 				}
5109 
5110 				/* save the MIN mtu in the msg to be replied */
5111 				msg->mtu = mtu;
5112 
5113 			}
5114 		}
5115 
5116 
5117 		if (ack) {
5118 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5119 		} else {
5120 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5121 		}
5122 		tagp->vio_sid = ldcp->local_sid;
5123 
5124 		/* send reply msg back to peer */
5125 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5126 		    B_FALSE);
5127 		if (rv != VGEN_SUCCESS) {
5128 			return (rv);
5129 		}
5130 
5131 		if (ack) {
5132 			ldcp->hstate |= ATTR_ACK_SENT;
5133 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5134 		} else {
5135 			/* failed */
5136 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5137 			return (VGEN_FAILURE);
5138 		}
5139 
5140 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5141 			vgen_handshake(vh_nextphase(ldcp));
5142 		}
5143 
5144 		break;
5145 
5146 	case VIO_SUBTYPE_ACK:
5147 
5148 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5149 			/*
5150 			 * Versions >= 1.4:
5151 			 * The ack msg sent by the peer contains the minimum of
5152 			 * our mtu (that we had sent in our attr info) and the
5153 			 * peer's mtu.
5154 			 *
5155 			 * If we have sent an ack for the attr info msg from
5156 			 * the peer, check if the mtu that was computed then
5157 			 * (saved in local hparams) matches the mtu that the
5158 			 * peer has ack'd. If they don't match, we fail the
5159 			 * handshake.
5160 			 */
5161 			if (ldcp->hstate & ATTR_ACK_SENT) {
5162 				if (lp->mtu != msg->mtu) {
5163 					return (VGEN_FAILURE);
5164 				}
5165 			} else {
5166 				/*
5167 				 * If the mtu ack'd by the peer is > our mtu
5168 				 * fail handshake. Otherwise, save the mtu, so
5169 				 * we can validate it when we receive attr info
5170 				 * from our peer.
5171 				 */
5172 				if (msg->mtu > lp->mtu) {
5173 					return (VGEN_FAILURE);
5174 				}
5175 				if (msg->mtu <= lp->mtu) {
5176 					lp->mtu = msg->mtu;
5177 				}
5178 			}
5179 		}
5180 
5181 		ldcp->hstate |= ATTR_ACK_RCVD;
5182 
5183 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5184 
5185 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5186 			vgen_handshake(vh_nextphase(ldcp));
5187 		}
5188 		break;
5189 
5190 	case VIO_SUBTYPE_NACK:
5191 
5192 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5193 		return (VGEN_FAILURE);
5194 	}
5195 	DBG1(vgenp, ldcp, "exit\n");
5196 	return (VGEN_SUCCESS);
5197 }
5198 
5199 /* Check if the dring info msg is ok */
5200 static int
5201 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5202 {
5203 	/* check if msg contents are ok */
5204 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5205 	    sizeof (vnet_public_desc_t))) {
5206 		return (VGEN_FAILURE);
5207 	}
5208 	return (VGEN_SUCCESS);
5209 }
5210 
5211 /*
5212  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5213  * the peer to a dring register msg that we sent.
5214  */
5215 static int
5216 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5217 {
5218 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5219 	ldc_mem_cookie_t dcookie;
5220 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5221 	int ack = 0;
5222 	int rv = 0;
5223 
5224 	DBG1(vgenp, ldcp, "enter\n");
5225 	if (ldcp->hphase < VH_PHASE2) {
5226 		/* dring_info can be rcvd in any of the phases after Phase1 */
5227 		DWARN(vgenp, ldcp,
5228 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5229 		    tagp->vio_subtype, ldcp->hphase);
5230 		return (VGEN_FAILURE);
5231 	}
5232 	switch (tagp->vio_subtype) {
5233 	case VIO_SUBTYPE_INFO:
5234 
5235 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5236 		ldcp->hstate |= DRING_INFO_RCVD;
5237 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5238 
5239 		ASSERT(msg->ncookies == 1);
5240 
5241 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5242 			/*
5243 			 * verified dring info msg to be ok,
5244 			 * now try to map the remote dring.
5245 			 */
5246 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5247 			    msg->descriptor_size, &dcookie,
5248 			    msg->ncookies);
5249 			if (rv == DDI_SUCCESS) {
5250 				/* now we can ack the peer */
5251 				ack = 1;
5252 			}
5253 		}
5254 		if (ack == 0) {
5255 			/* failed, send NACK */
5256 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5257 		} else {
5258 			if (!(ldcp->peer_hparams.dring_ready)) {
5259 
5260 				/* save peer's dring_info values */
5261 				bcopy(&dcookie,
5262 				    &(ldcp->peer_hparams.dring_cookie),
5263 				    sizeof (dcookie));
5264 				ldcp->peer_hparams.num_desc =
5265 				    msg->num_descriptors;
5266 				ldcp->peer_hparams.desc_size =
5267 				    msg->descriptor_size;
5268 				ldcp->peer_hparams.num_dcookies =
5269 				    msg->ncookies;
5270 
5271 				/* set dring_ident for the peer */
5272 				ldcp->peer_hparams.dring_ident =
5273 				    (uint64_t)ldcp->rxdp;
5274 				/* return the dring_ident in ack msg */
5275 				msg->dring_ident =
5276 				    (uint64_t)ldcp->rxdp;
5277 
5278 				ldcp->peer_hparams.dring_ready = B_TRUE;
5279 			}
5280 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5281 		}
5282 		tagp->vio_sid = ldcp->local_sid;
5283 		/* send reply msg back to peer */
5284 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5285 		    B_FALSE);
5286 		if (rv != VGEN_SUCCESS) {
5287 			return (rv);
5288 		}
5289 
5290 		if (ack) {
5291 			ldcp->hstate |= DRING_ACK_SENT;
5292 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5293 		} else {
5294 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5295 			return (VGEN_FAILURE);
5296 		}
5297 
5298 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5299 			vgen_handshake(vh_nextphase(ldcp));
5300 		}
5301 
5302 		break;
5303 
5304 	case VIO_SUBTYPE_ACK:
5305 
5306 		ldcp->hstate |= DRING_ACK_RCVD;
5307 
5308 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5309 
5310 		if (!(ldcp->local_hparams.dring_ready)) {
5311 			/* local dring is now ready */
5312 			ldcp->local_hparams.dring_ready = B_TRUE;
5313 
5314 			/* save dring_ident acked by peer */
5315 			ldcp->local_hparams.dring_ident =
5316 			    msg->dring_ident;
5317 		}
5318 
5319 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5320 			vgen_handshake(vh_nextphase(ldcp));
5321 		}
5322 
5323 		break;
5324 
5325 	case VIO_SUBTYPE_NACK:
5326 
5327 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5328 		return (VGEN_FAILURE);
5329 	}
5330 	DBG1(vgenp, ldcp, "exit\n");
5331 	return (VGEN_SUCCESS);
5332 }
5333 
5334 /*
5335  * Handle a rdx info msg from the peer or an ACK/NACK
5336  * from the peer to a rdx info msg that we sent.
5337  */
5338 static int
5339 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5340 {
5341 	int rv = 0;
5342 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5343 
5344 	DBG1(vgenp, ldcp, "enter\n");
5345 	if (ldcp->hphase != VH_PHASE3) {
5346 		DWARN(vgenp, ldcp,
5347 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5348 		    tagp->vio_subtype, ldcp->hphase);
5349 		return (VGEN_FAILURE);
5350 	}
5351 	switch (tagp->vio_subtype) {
5352 	case VIO_SUBTYPE_INFO:
5353 
5354 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5355 		ldcp->hstate |= RDX_INFO_RCVD;
5356 
5357 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5358 		tagp->vio_sid = ldcp->local_sid;
5359 		/* send reply msg back to peer */
5360 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5361 		    B_FALSE);
5362 		if (rv != VGEN_SUCCESS) {
5363 			return (rv);
5364 		}
5365 
5366 		ldcp->hstate |= RDX_ACK_SENT;
5367 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5368 
5369 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5370 			vgen_handshake(vh_nextphase(ldcp));
5371 		}
5372 
5373 		break;
5374 
5375 	case VIO_SUBTYPE_ACK:
5376 
5377 		ldcp->hstate |= RDX_ACK_RCVD;
5378 
5379 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5380 
5381 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5382 			vgen_handshake(vh_nextphase(ldcp));
5383 		}
5384 		break;
5385 
5386 	case VIO_SUBTYPE_NACK:
5387 
5388 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5389 		return (VGEN_FAILURE);
5390 	}
5391 	DBG1(vgenp, ldcp, "exit\n");
5392 	return (VGEN_SUCCESS);
5393 }
5394 
5395 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5396 static int
5397 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5398 {
5399 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5400 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5401 	struct ether_addr *addrp;
5402 	int count;
5403 	int i;
5404 
5405 	DBG1(vgenp, ldcp, "enter\n");
5406 	switch (tagp->vio_subtype) {
5407 
5408 	case VIO_SUBTYPE_INFO:
5409 
5410 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5411 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5412 		break;
5413 
5414 	case VIO_SUBTYPE_ACK:
5415 
5416 		/* success adding/removing multicast addr */
5417 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5418 		break;
5419 
5420 	case VIO_SUBTYPE_NACK:
5421 
5422 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5423 		if (!(msgp->set)) {
5424 			/* multicast remove request failed */
5425 			break;
5426 		}
5427 
5428 		/* multicast add request failed */
5429 		for (count = 0; count < msgp->count; count++) {
5430 			addrp = &(msgp->mca[count]);
5431 
5432 			/* delete address from the table */
5433 			for (i = 0; i < vgenp->mccount; i++) {
5434 				if (ether_cmp(addrp,
5435 				    &(vgenp->mctab[i])) == 0) {
5436 					if (vgenp->mccount > 1) {
5437 						int t = vgenp->mccount - 1;
5438 						vgenp->mctab[i] =
5439 						    vgenp->mctab[t];
5440 					}
5441 					vgenp->mccount--;
5442 					break;
5443 				}
5444 			}
5445 		}
5446 		break;
5447 
5448 	}
5449 	DBG1(vgenp, ldcp, "exit\n");
5450 
5451 	return (VGEN_SUCCESS);
5452 }
5453 
5454 /* handler for control messages received from the peer ldc end-point */
5455 static int
5456 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5457 {
5458 	int rv = 0;
5459 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5460 
5461 	DBG1(vgenp, ldcp, "enter\n");
5462 	switch (tagp->vio_subtype_env) {
5463 
5464 	case VIO_VER_INFO:
5465 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5466 		break;
5467 
5468 	case VIO_ATTR_INFO:
5469 		rv = vgen_handle_attr_info(ldcp, tagp);
5470 		break;
5471 
5472 	case VIO_DRING_REG:
5473 		rv = vgen_handle_dring_reg(ldcp, tagp);
5474 		break;
5475 
5476 	case VIO_RDX:
5477 		rv = vgen_handle_rdx_info(ldcp, tagp);
5478 		break;
5479 
5480 	case VNET_MCAST_INFO:
5481 		rv = vgen_handle_mcast_info(ldcp, tagp);
5482 		break;
5483 
5484 	case VIO_DDS_INFO:
5485 		rv = vgen_dds_rx(ldcp, tagp);
5486 		break;
5487 	}
5488 
5489 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5490 	return (rv);
5491 }
5492 
5493 /* handler for data messages received from the peer ldc end-point */
5494 static int
5495 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5496 {
5497 	int rv = 0;
5498 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5499 
5500 	DBG1(vgenp, ldcp, "enter\n");
5501 
5502 	if (ldcp->hphase != VH_DONE)
5503 		return (rv);
5504 
5505 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5506 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5507 		if (rv != 0) {
5508 			return (rv);
5509 		}
5510 	}
5511 
5512 	switch (tagp->vio_subtype_env) {
5513 	case VIO_DRING_DATA:
5514 		rv = vgen_handle_dring_data(ldcp, tagp);
5515 		break;
5516 
5517 	case VIO_PKT_DATA:
5518 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5519 		break;
5520 	default:
5521 		break;
5522 	}
5523 
5524 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5525 	return (rv);
5526 }
5527 
5528 /*
5529  * dummy pkt data handler function for vnet protocol version 1.0
5530  */
5531 static void
5532 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5533 {
5534 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5535 }
5536 
5537 /*
5538  * This function handles raw pkt data messages received over the channel.
5539  * Currently, only priority-eth-type frames are received through this mechanism.
5540  * In this case, the frame(data) is present within the message itself which
5541  * is copied into an mblk before sending it up the stack.
5542  */
5543 static void
5544 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5545 {
5546 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5547 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5548 	uint32_t		size;
5549 	mblk_t			*mp;
5550 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5551 	vgen_stats_t		*statsp = &ldcp->stats;
5552 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5553 	vio_net_rx_cb_t		vrx_cb;
5554 
5555 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5556 
5557 	mutex_exit(&ldcp->cblock);
5558 
5559 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5560 	if (size < ETHERMIN || size > lp->mtu) {
5561 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5562 		goto exit;
5563 	}
5564 
5565 	mp = vio_multipool_allocb(&ldcp->vmp, size);
5566 	if (mp == NULL) {
5567 		mp = allocb(size, BPRI_MED);
5568 		if (mp == NULL) {
5569 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5570 			DWARN(vgenp, ldcp, "allocb failure, "
5571 			    "unable to process priority frame\n");
5572 			goto exit;
5573 		}
5574 	}
5575 
5576 	/* copy the frame from the payload of raw data msg into the mblk */
5577 	bcopy(pkt->data, mp->b_rptr, size);
5578 	mp->b_wptr = mp->b_rptr + size;
5579 
5580 	/* update stats */
5581 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5582 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5583 
5584 	/* send up; call vrx_cb() as cblock is already released */
5585 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5586 	vrx_cb(ldcp->portp->vhp, mp);
5587 
5588 exit:
5589 	mutex_enter(&ldcp->cblock);
5590 }
5591 
5592 static int
5593 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
5594     int32_t end, uint8_t pstate)
5595 {
5596 	int rv = 0;
5597 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5598 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
5599 
5600 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
5601 	tagp->vio_sid = ldcp->local_sid;
5602 	msgp->start_idx = start;
5603 	msgp->end_idx = end;
5604 	msgp->dring_process_state = pstate;
5605 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
5606 	if (rv != VGEN_SUCCESS) {
5607 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
5608 	}
5609 	return (rv);
5610 }
5611 
5612 static int
5613 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5614 {
5615 	int rv = 0;
5616 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5617 
5618 
5619 	DBG1(vgenp, ldcp, "enter\n");
5620 	switch (tagp->vio_subtype) {
5621 
5622 	case VIO_SUBTYPE_INFO:
5623 		/*
5624 		 * To reduce the locking contention, release the
5625 		 * cblock here and re-acquire it once we are done
5626 		 * receiving packets.
5627 		 */
5628 		mutex_exit(&ldcp->cblock);
5629 		mutex_enter(&ldcp->rxlock);
5630 		rv = vgen_handle_dring_data_info(ldcp, tagp);
5631 		mutex_exit(&ldcp->rxlock);
5632 		mutex_enter(&ldcp->cblock);
5633 		break;
5634 
5635 	case VIO_SUBTYPE_ACK:
5636 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
5637 		break;
5638 
5639 	case VIO_SUBTYPE_NACK:
5640 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
5641 		break;
5642 	}
5643 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5644 	return (rv);
5645 }
5646 
5647 static int
5648 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5649 {
5650 	uint32_t start;
5651 	int32_t end;
5652 	int rv = 0;
5653 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5654 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5655 #ifdef VGEN_HANDLE_LOST_PKTS
5656 	vgen_stats_t *statsp = &ldcp->stats;
5657 	uint32_t rxi;
5658 	int n;
5659 #endif
5660 
5661 	DBG1(vgenp, ldcp, "enter\n");
5662 
5663 	start = dringmsg->start_idx;
5664 	end = dringmsg->end_idx;
5665 	/*
5666 	 * received a data msg, which contains the start and end
5667 	 * indices of the descriptors within the rx ring holding data,
5668 	 * the seq_num of data packet corresponding to the start index,
5669 	 * and the dring_ident.
5670 	 * We can now read the contents of each of these descriptors
5671 	 * and gather data from it.
5672 	 */
5673 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
5674 	    start, end);
5675 
5676 	/* validate rx start and end indeces */
5677 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
5678 	    !(CHECK_RXI(end, ldcp)))) {
5679 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
5680 		    start, end);
5681 		/* drop the message if invalid index */
5682 		return (rv);
5683 	}
5684 
5685 	/* validate dring_ident */
5686 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
5687 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5688 		    dringmsg->dring_ident);
5689 		/* invalid dring_ident, drop the msg */
5690 		return (rv);
5691 	}
5692 #ifdef DEBUG
5693 	if (vgen_trigger_rxlost) {
5694 		/* drop this msg to simulate lost pkts for debugging */
5695 		vgen_trigger_rxlost = 0;
5696 		return (rv);
5697 	}
5698 #endif
5699 
5700 #ifdef	VGEN_HANDLE_LOST_PKTS
5701 
5702 	/* receive start index doesn't match expected index */
5703 	if (ldcp->next_rxi != start) {
5704 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
5705 		    ldcp->next_rxi, start);
5706 
5707 		/* calculate the number of pkts lost */
5708 		if (start >= ldcp->next_rxi) {
5709 			n = start - ldcp->next_rxi;
5710 		} else  {
5711 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
5712 		}
5713 
5714 		statsp->rx_lost_pkts += n;
5715 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
5716 		tagp->vio_sid = ldcp->local_sid;
5717 		/* indicate the range of lost descriptors */
5718 		dringmsg->start_idx = ldcp->next_rxi;
5719 		rxi = start;
5720 		DECR_RXI(rxi, ldcp);
5721 		dringmsg->end_idx = rxi;
5722 		/* dring ident is left unchanged */
5723 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5724 		    sizeof (*dringmsg), B_FALSE);
5725 		if (rv != VGEN_SUCCESS) {
5726 			DWARN(vgenp, ldcp,
5727 			    "vgen_sendmsg failed, stype:NACK\n");
5728 			return (rv);
5729 		}
5730 		/*
5731 		 * treat this range of descrs/pkts as dropped
5732 		 * and set the new expected value of next_rxi
5733 		 * and continue(below) to process from the new
5734 		 * start index.
5735 		 */
5736 		ldcp->next_rxi = start;
5737 	}
5738 
5739 #endif	/* VGEN_HANDLE_LOST_PKTS */
5740 
5741 	/* Now receive messages */
5742 	rv = vgen_process_dring_data(ldcp, tagp);
5743 
5744 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5745 	return (rv);
5746 }
5747 
5748 static int
5749 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5750 {
5751 	boolean_t set_ack_start = B_FALSE;
5752 	uint32_t start;
5753 	uint32_t ack_end;
5754 	uint32_t next_rxi;
5755 	uint32_t rxi;
5756 	int count = 0;
5757 	int rv = 0;
5758 	uint32_t retries = 0;
5759 	vgen_stats_t *statsp;
5760 	vnet_public_desc_t rxd;
5761 	vio_dring_entry_hdr_t *hdrp;
5762 	mblk_t *bp = NULL;
5763 	mblk_t *bpt = NULL;
5764 	uint32_t ack_start;
5765 	boolean_t rxd_err = B_FALSE;
5766 	mblk_t *mp = NULL;
5767 	size_t nbytes;
5768 	boolean_t ack_needed = B_FALSE;
5769 	size_t nread;
5770 	uint64_t off = 0;
5771 	struct ether_header *ehp;
5772 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5773 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5774 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5775 
5776 	DBG1(vgenp, ldcp, "enter\n");
5777 
5778 	statsp = &ldcp->stats;
5779 	start = dringmsg->start_idx;
5780 
5781 	/*
5782 	 * start processing the descriptors from the specified
5783 	 * start index, up to the index a descriptor is not ready
5784 	 * to be processed or we process the entire descriptor ring
5785 	 * and wrap around upto the start index.
5786 	 */
5787 
5788 	/* need to set the start index of descriptors to be ack'd */
5789 	set_ack_start = B_TRUE;
5790 
5791 	/* index upto which we have ack'd */
5792 	ack_end = start;
5793 	DECR_RXI(ack_end, ldcp);
5794 
5795 	next_rxi = rxi =  start;
5796 	do {
5797 vgen_recv_retry:
5798 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
5799 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
5800 		if (rv != 0) {
5801 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
5802 			    " rv(%d)\n", rv);
5803 			statsp->ierrors++;
5804 			return (rv);
5805 		}
5806 
5807 		hdrp = &rxd.hdr;
5808 
5809 		if (hdrp->dstate != VIO_DESC_READY) {
5810 			/*
5811 			 * Before waiting and retry here, send up
5812 			 * the packets that are received already
5813 			 */
5814 			if (bp != NULL) {
5815 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5816 				vgen_rx(ldcp, bp);
5817 				count = 0;
5818 				bp = bpt = NULL;
5819 			}
5820 			/*
5821 			 * descriptor is not ready.
5822 			 * retry descriptor acquire, stop processing
5823 			 * after max # retries.
5824 			 */
5825 			if (retries == vgen_recv_retries)
5826 				break;
5827 			retries++;
5828 			drv_usecwait(vgen_recv_delay);
5829 			goto vgen_recv_retry;
5830 		}
5831 		retries = 0;
5832 
5833 		if (set_ack_start) {
5834 			/*
5835 			 * initialize the start index of the range
5836 			 * of descriptors to be ack'd.
5837 			 */
5838 			ack_start = rxi;
5839 			set_ack_start = B_FALSE;
5840 		}
5841 
5842 		if ((rxd.nbytes < ETHERMIN) ||
5843 		    (rxd.nbytes > lp->mtu) ||
5844 		    (rxd.ncookies == 0) ||
5845 		    (rxd.ncookies > MAX_COOKIES)) {
5846 			rxd_err = B_TRUE;
5847 		} else {
5848 			/*
5849 			 * Try to allocate an mblk from the free pool
5850 			 * of recv mblks for the channel.
5851 			 * If this fails, use allocb().
5852 			 */
5853 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
5854 			if (nbytes > ldcp->max_rxpool_size) {
5855 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
5856 				    BPRI_MED);
5857 			} else {
5858 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
5859 				if (mp == NULL) {
5860 					statsp->rx_vio_allocb_fail++;
5861 					/*
5862 					 * Data buffer returned by allocb(9F)
5863 					 * is 8byte aligned. We allocate extra
5864 					 * 8 bytes to ensure size is multiple
5865 					 * of 8 bytes for ldc_mem_copy().
5866 					 */
5867 					mp = allocb(VNET_IPALIGN +
5868 					    rxd.nbytes + 8, BPRI_MED);
5869 				}
5870 			}
5871 		}
5872 		if ((rxd_err) || (mp == NULL)) {
5873 			/*
5874 			 * rxd_err or allocb() failure,
5875 			 * drop this packet, get next.
5876 			 */
5877 			if (rxd_err) {
5878 				statsp->ierrors++;
5879 				rxd_err = B_FALSE;
5880 			} else {
5881 				statsp->rx_allocb_fail++;
5882 			}
5883 
5884 			ack_needed = hdrp->ack;
5885 
5886 			/* set descriptor done bit */
5887 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
5888 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
5889 			    VIO_DESC_DONE);
5890 			if (rv != 0) {
5891 				DWARN(vgenp, ldcp,
5892 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
5893 				    rv);
5894 				return (rv);
5895 			}
5896 
5897 			if (ack_needed) {
5898 				ack_needed = B_FALSE;
5899 				/*
5900 				 * sender needs ack for this packet,
5901 				 * ack pkts upto this index.
5902 				 */
5903 				ack_end = rxi;
5904 
5905 				rv = vgen_send_dring_ack(ldcp, tagp,
5906 				    ack_start, ack_end,
5907 				    VIO_DP_ACTIVE);
5908 				if (rv != VGEN_SUCCESS) {
5909 					goto error_ret;
5910 				}
5911 
5912 				/* need to set new ack start index */
5913 				set_ack_start = B_TRUE;
5914 			}
5915 			goto vgen_next_rxi;
5916 		}
5917 
5918 		nread = nbytes;
5919 		rv = ldc_mem_copy(ldcp->ldc_handle,
5920 		    (caddr_t)mp->b_rptr, off, &nread,
5921 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
5922 
5923 		/* if ldc_mem_copy() failed */
5924 		if (rv) {
5925 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
5926 			statsp->ierrors++;
5927 			freemsg(mp);
5928 			goto error_ret;
5929 		}
5930 
5931 		ack_needed = hdrp->ack;
5932 
5933 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
5934 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
5935 		    VIO_DESC_DONE);
5936 		if (rv != 0) {
5937 			DWARN(vgenp, ldcp,
5938 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
5939 			goto error_ret;
5940 		}
5941 
5942 		mp->b_rptr += VNET_IPALIGN;
5943 
5944 		if (ack_needed) {
5945 			ack_needed = B_FALSE;
5946 			/*
5947 			 * sender needs ack for this packet,
5948 			 * ack pkts upto this index.
5949 			 */
5950 			ack_end = rxi;
5951 
5952 			rv = vgen_send_dring_ack(ldcp, tagp,
5953 			    ack_start, ack_end, VIO_DP_ACTIVE);
5954 			if (rv != VGEN_SUCCESS) {
5955 				goto error_ret;
5956 			}
5957 
5958 			/* need to set new ack start index */
5959 			set_ack_start = B_TRUE;
5960 		}
5961 
5962 		if (nread != nbytes) {
5963 			DWARN(vgenp, ldcp,
5964 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
5965 			    nread, nbytes);
5966 			statsp->ierrors++;
5967 			freemsg(mp);
5968 			goto vgen_next_rxi;
5969 		}
5970 
5971 		/* point to the actual end of data */
5972 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
5973 
5974 		/* update stats */
5975 		statsp->ipackets++;
5976 		statsp->rbytes += rxd.nbytes;
5977 		ehp = (struct ether_header *)mp->b_rptr;
5978 		if (IS_BROADCAST(ehp))
5979 			statsp->brdcstrcv++;
5980 		else if (IS_MULTICAST(ehp))
5981 			statsp->multircv++;
5982 
5983 		/* build a chain of received packets */
5984 		if (bp == NULL) {
5985 			/* first pkt */
5986 			bp = mp;
5987 			bpt = bp;
5988 			bpt->b_next = NULL;
5989 		} else {
5990 			mp->b_next = NULL;
5991 			bpt->b_next = mp;
5992 			bpt = mp;
5993 		}
5994 
5995 		if (count++ > vgen_chain_len) {
5996 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5997 			vgen_rx(ldcp, bp);
5998 			count = 0;
5999 			bp = bpt = NULL;
6000 		}
6001 
6002 vgen_next_rxi:
6003 		/* update end index of range of descrs to be ack'd */
6004 		ack_end = rxi;
6005 
6006 		/* update the next index to be processed */
6007 		INCR_RXI(next_rxi, ldcp);
6008 		if (next_rxi == start) {
6009 			/*
6010 			 * processed the entire descriptor ring upto
6011 			 * the index at which we started.
6012 			 */
6013 			break;
6014 		}
6015 
6016 		rxi = next_rxi;
6017 
6018 	_NOTE(CONSTCOND)
6019 	} while (1);
6020 
6021 	/*
6022 	 * send an ack message to peer indicating that we have stopped
6023 	 * processing descriptors.
6024 	 */
6025 	if (set_ack_start) {
6026 		/*
6027 		 * We have ack'd upto some index and we have not
6028 		 * processed any descriptors beyond that index.
6029 		 * Use the last ack'd index as both the start and
6030 		 * end of range of descrs being ack'd.
6031 		 * Note: This results in acking the last index twice
6032 		 * and should be harmless.
6033 		 */
6034 		ack_start = ack_end;
6035 	}
6036 
6037 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6038 	    VIO_DP_STOPPED);
6039 	if (rv != VGEN_SUCCESS) {
6040 		goto error_ret;
6041 	}
6042 
6043 	/* save new recv index of next dring msg */
6044 	ldcp->next_rxi = next_rxi;
6045 
6046 error_ret:
6047 	/* send up packets received so far */
6048 	if (bp != NULL) {
6049 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6050 		vgen_rx(ldcp, bp);
6051 		bp = bpt = NULL;
6052 	}
6053 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6054 	return (rv);
6055 
6056 }
6057 
6058 static int
6059 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6060 {
6061 	int rv = 0;
6062 	uint32_t start;
6063 	int32_t end;
6064 	uint32_t txi;
6065 	boolean_t ready_txd = B_FALSE;
6066 	vgen_stats_t *statsp;
6067 	vgen_private_desc_t *tbufp;
6068 	vnet_public_desc_t *txdp;
6069 	vio_dring_entry_hdr_t *hdrp;
6070 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6071 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6072 
6073 	DBG1(vgenp, ldcp, "enter\n");
6074 	start = dringmsg->start_idx;
6075 	end = dringmsg->end_idx;
6076 	statsp = &ldcp->stats;
6077 
6078 	/*
6079 	 * received an ack corresponding to a specific descriptor for
6080 	 * which we had set the ACK bit in the descriptor (during
6081 	 * transmit). This enables us to reclaim descriptors.
6082 	 */
6083 
6084 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6085 
6086 	/* validate start and end indeces in the tx ack msg */
6087 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6088 		/* drop the message if invalid index */
6089 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6090 		    start, end);
6091 		return (rv);
6092 	}
6093 	/* validate dring_ident */
6094 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6095 		/* invalid dring_ident, drop the msg */
6096 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6097 		    dringmsg->dring_ident);
6098 		return (rv);
6099 	}
6100 	statsp->dring_data_acks++;
6101 
6102 	/* reclaim descriptors that are done */
6103 	vgen_reclaim(ldcp);
6104 
6105 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6106 		/*
6107 		 * receiver continued processing descriptors after
6108 		 * sending us the ack.
6109 		 */
6110 		return (rv);
6111 	}
6112 
6113 	statsp->dring_stopped_acks++;
6114 
6115 	/* receiver stopped processing descriptors */
6116 	mutex_enter(&ldcp->wrlock);
6117 	mutex_enter(&ldcp->tclock);
6118 
6119 	/*
6120 	 * determine if there are any pending tx descriptors
6121 	 * ready to be processed by the receiver(peer) and if so,
6122 	 * send a message to the peer to restart receiving.
6123 	 */
6124 	ready_txd = B_FALSE;
6125 
6126 	/*
6127 	 * using the end index of the descriptor range for which
6128 	 * we received the ack, check if the next descriptor is
6129 	 * ready.
6130 	 */
6131 	txi = end;
6132 	INCR_TXI(txi, ldcp);
6133 	tbufp = &ldcp->tbufp[txi];
6134 	txdp = tbufp->descp;
6135 	hdrp = &txdp->hdr;
6136 	if (hdrp->dstate == VIO_DESC_READY) {
6137 		ready_txd = B_TRUE;
6138 	} else {
6139 		/*
6140 		 * descr next to the end of ack'd descr range is not
6141 		 * ready.
6142 		 * starting from the current reclaim index, check
6143 		 * if any descriptor is ready.
6144 		 */
6145 
6146 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6147 		tbufp = &ldcp->tbufp[txi];
6148 
6149 		txdp = tbufp->descp;
6150 		hdrp = &txdp->hdr;
6151 		if (hdrp->dstate == VIO_DESC_READY) {
6152 			ready_txd = B_TRUE;
6153 		}
6154 
6155 	}
6156 
6157 	if (ready_txd) {
6158 		/*
6159 		 * we have tx descriptor(s) ready to be
6160 		 * processed by the receiver.
6161 		 * send a message to the peer with the start index
6162 		 * of ready descriptors.
6163 		 */
6164 		rv = vgen_send_dring_data(ldcp, txi, -1);
6165 		if (rv != VGEN_SUCCESS) {
6166 			ldcp->resched_peer = B_TRUE;
6167 			ldcp->resched_peer_txi = txi;
6168 			mutex_exit(&ldcp->tclock);
6169 			mutex_exit(&ldcp->wrlock);
6170 			return (rv);
6171 		}
6172 	} else {
6173 		/*
6174 		 * no ready tx descriptors. set the flag to send a
6175 		 * message to peer when tx descriptors are ready in
6176 		 * transmit routine.
6177 		 */
6178 		ldcp->resched_peer = B_TRUE;
6179 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6180 	}
6181 
6182 	mutex_exit(&ldcp->tclock);
6183 	mutex_exit(&ldcp->wrlock);
6184 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6185 	return (rv);
6186 }
6187 
6188 static int
6189 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6190 {
6191 	int rv = 0;
6192 	uint32_t start;
6193 	int32_t end;
6194 	uint32_t txi;
6195 	vnet_public_desc_t *txdp;
6196 	vio_dring_entry_hdr_t *hdrp;
6197 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6198 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6199 
6200 	DBG1(vgenp, ldcp, "enter\n");
6201 	start = dringmsg->start_idx;
6202 	end = dringmsg->end_idx;
6203 
6204 	/*
6205 	 * peer sent a NACK msg to indicate lost packets.
6206 	 * The start and end correspond to the range of descriptors
6207 	 * for which the peer didn't receive a dring data msg and so
6208 	 * didn't receive the corresponding data.
6209 	 */
6210 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6211 
6212 	/* validate start and end indeces in the tx nack msg */
6213 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6214 		/* drop the message if invalid index */
6215 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6216 		    start, end);
6217 		return (rv);
6218 	}
6219 	/* validate dring_ident */
6220 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6221 		/* invalid dring_ident, drop the msg */
6222 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6223 		    dringmsg->dring_ident);
6224 		return (rv);
6225 	}
6226 	mutex_enter(&ldcp->txlock);
6227 	mutex_enter(&ldcp->tclock);
6228 
6229 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6230 		/* no busy descriptors, bogus nack ? */
6231 		mutex_exit(&ldcp->tclock);
6232 		mutex_exit(&ldcp->txlock);
6233 		return (rv);
6234 	}
6235 
6236 	/* we just mark the descrs as done so they can be reclaimed */
6237 	for (txi = start; txi <= end; ) {
6238 		txdp = &(ldcp->txdp[txi]);
6239 		hdrp = &txdp->hdr;
6240 		if (hdrp->dstate == VIO_DESC_READY)
6241 			hdrp->dstate = VIO_DESC_DONE;
6242 		INCR_TXI(txi, ldcp);
6243 	}
6244 	mutex_exit(&ldcp->tclock);
6245 	mutex_exit(&ldcp->txlock);
6246 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6247 	return (rv);
6248 }
6249 
6250 static void
6251 vgen_reclaim(vgen_ldc_t *ldcp)
6252 {
6253 	mutex_enter(&ldcp->tclock);
6254 
6255 	vgen_reclaim_dring(ldcp);
6256 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6257 
6258 	mutex_exit(&ldcp->tclock);
6259 }
6260 
6261 /*
6262  * transmit reclaim function. starting from the current reclaim index
6263  * look for descriptors marked DONE and reclaim the descriptor and the
6264  * corresponding buffers (tbuf).
6265  */
6266 static void
6267 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6268 {
6269 	int count = 0;
6270 	vnet_public_desc_t *txdp;
6271 	vgen_private_desc_t *tbufp;
6272 	vio_dring_entry_hdr_t	*hdrp;
6273 
6274 #ifdef DEBUG
6275 	if (vgen_trigger_txtimeout)
6276 		return;
6277 #endif
6278 
6279 	tbufp = ldcp->cur_tbufp;
6280 	txdp = tbufp->descp;
6281 	hdrp = &txdp->hdr;
6282 
6283 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6284 	    (tbufp != ldcp->next_tbufp)) {
6285 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6286 		hdrp->dstate = VIO_DESC_FREE;
6287 		hdrp->ack = B_FALSE;
6288 
6289 		tbufp = NEXTTBUF(ldcp, tbufp);
6290 		txdp = tbufp->descp;
6291 		hdrp = &txdp->hdr;
6292 		count++;
6293 	}
6294 
6295 	ldcp->cur_tbufp = tbufp;
6296 
6297 	/*
6298 	 * Check if mac layer should be notified to restart transmissions
6299 	 */
6300 	if ((ldcp->need_resched) && (count > 0)) {
6301 		vio_net_tx_update_t vtx_update =
6302 		    ldcp->portp->vcb.vio_net_tx_update;
6303 
6304 		ldcp->need_resched = B_FALSE;
6305 		vtx_update(ldcp->portp->vhp);
6306 	}
6307 }
6308 
6309 /* return the number of pending transmits for the channel */
6310 static int
6311 vgen_num_txpending(vgen_ldc_t *ldcp)
6312 {
6313 	int n;
6314 
6315 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6316 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6317 	} else  {
6318 		/* cur_tbufp > next_tbufp */
6319 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6320 	}
6321 
6322 	return (n);
6323 }
6324 
6325 /* determine if the transmit descriptor ring is full */
6326 static int
6327 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6328 {
6329 	vgen_private_desc_t	*tbufp;
6330 	vgen_private_desc_t	*ntbufp;
6331 
6332 	tbufp = ldcp->next_tbufp;
6333 	ntbufp = NEXTTBUF(ldcp, tbufp);
6334 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6335 		return (VGEN_SUCCESS);
6336 	}
6337 	return (VGEN_FAILURE);
6338 }
6339 
6340 /* determine if timeout condition has occured */
6341 static int
6342 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6343 {
6344 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6345 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6346 	    (vnet_ldcwd_txtimeout) &&
6347 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6348 		return (VGEN_SUCCESS);
6349 	} else {
6350 		return (VGEN_FAILURE);
6351 	}
6352 }
6353 
6354 /* transmit watchdog timeout handler */
6355 static void
6356 vgen_ldc_watchdog(void *arg)
6357 {
6358 	vgen_ldc_t *ldcp;
6359 	vgen_t *vgenp;
6360 	int rv;
6361 
6362 	ldcp = (vgen_ldc_t *)arg;
6363 	vgenp = LDC_TO_VGEN(ldcp);
6364 
6365 	rv = vgen_ldc_txtimeout(ldcp);
6366 	if (rv == VGEN_SUCCESS) {
6367 		DWARN(vgenp, ldcp, "transmit timeout\n");
6368 #ifdef DEBUG
6369 		if (vgen_trigger_txtimeout) {
6370 			/* tx timeout triggered for debugging */
6371 			vgen_trigger_txtimeout = 0;
6372 		}
6373 #endif
6374 		mutex_enter(&ldcp->cblock);
6375 		ldcp->need_ldc_reset = B_TRUE;
6376 		vgen_handshake_retry(ldcp);
6377 		mutex_exit(&ldcp->cblock);
6378 		if (ldcp->need_resched) {
6379 			vio_net_tx_update_t vtx_update =
6380 			    ldcp->portp->vcb.vio_net_tx_update;
6381 
6382 			ldcp->need_resched = B_FALSE;
6383 			vtx_update(ldcp->portp->vhp);
6384 		}
6385 	}
6386 
6387 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6388 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6389 }
6390 
6391 /* handler for error messages received from the peer ldc end-point */
6392 static void
6393 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6394 {
6395 	_NOTE(ARGUNUSED(ldcp, tagp))
6396 }
6397 
6398 static int
6399 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6400 {
6401 	vio_raw_data_msg_t	*rmsg;
6402 	vio_dring_msg_t		*dmsg;
6403 	uint64_t		seq_num;
6404 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6405 
6406 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6407 		dmsg = (vio_dring_msg_t *)tagp;
6408 		seq_num = dmsg->seq_num;
6409 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6410 		rmsg = (vio_raw_data_msg_t *)tagp;
6411 		seq_num = rmsg->seq_num;
6412 	} else {
6413 		return (EINVAL);
6414 	}
6415 
6416 	if (seq_num != ldcp->next_rxseq) {
6417 
6418 		/* seqnums don't match */
6419 		DWARN(vgenp, ldcp,
6420 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6421 		    ldcp->next_rxseq, seq_num);
6422 
6423 		ldcp->need_ldc_reset = B_TRUE;
6424 		return (EINVAL);
6425 
6426 	}
6427 
6428 	ldcp->next_rxseq++;
6429 
6430 	return (0);
6431 }
6432 
6433 /* Check if the session id in the received message is valid */
6434 static int
6435 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6436 {
6437 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6438 
6439 	if (tagp->vio_sid != ldcp->peer_sid) {
6440 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6441 		    ldcp->peer_sid, tagp->vio_sid);
6442 		return (VGEN_FAILURE);
6443 	}
6444 	else
6445 		return (VGEN_SUCCESS);
6446 }
6447 
6448 static caddr_t
6449 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6450 {
6451 	(void) sprintf(ebuf,
6452 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6453 	return (ebuf);
6454 }
6455 
6456 /* Handshake watchdog timeout handler */
6457 static void
6458 vgen_hwatchdog(void *arg)
6459 {
6460 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6461 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6462 
6463 	DWARN(vgenp, ldcp,
6464 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
6465 	    ldcp->hphase, ldcp->hstate);
6466 
6467 	mutex_enter(&ldcp->cblock);
6468 	if (ldcp->cancel_htid) {
6469 		ldcp->cancel_htid = 0;
6470 		mutex_exit(&ldcp->cblock);
6471 		return;
6472 	}
6473 	ldcp->htid = 0;
6474 	ldcp->need_ldc_reset = B_TRUE;
6475 	vgen_handshake_retry(ldcp);
6476 	mutex_exit(&ldcp->cblock);
6477 }
6478 
6479 static void
6480 vgen_print_hparams(vgen_hparams_t *hp)
6481 {
6482 	uint8_t	addr[6];
6483 	char	ea[6];
6484 	ldc_mem_cookie_t *dc;
6485 
6486 	cmn_err(CE_CONT, "version_info:\n");
6487 	cmn_err(CE_CONT,
6488 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6489 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6490 
6491 	vnet_macaddr_ultostr(hp->addr, addr);
6492 	cmn_err(CE_CONT, "attr_info:\n");
6493 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6494 	    vgen_print_ethaddr(addr, ea));
6495 	cmn_err(CE_CONT,
6496 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6497 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6498 
6499 	dc = &hp->dring_cookie;
6500 	cmn_err(CE_CONT, "dring_info:\n");
6501 	cmn_err(CE_CONT,
6502 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6503 	cmn_err(CE_CONT,
6504 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6505 	    dc->addr, dc->size);
6506 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6507 }
6508 
6509 static void
6510 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6511 {
6512 	vgen_hparams_t *hp;
6513 
6514 	cmn_err(CE_CONT, "Channel Information:\n");
6515 	cmn_err(CE_CONT,
6516 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6517 	    ldcp->ldc_id, ldcp->ldc_status);
6518 	cmn_err(CE_CONT,
6519 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6520 	    ldcp->local_sid, ldcp->peer_sid);
6521 	cmn_err(CE_CONT,
6522 	    "\thphase: 0x%x, hstate: 0x%x\n",
6523 	    ldcp->hphase, ldcp->hstate);
6524 
6525 	cmn_err(CE_CONT, "Local handshake params:\n");
6526 	hp = &ldcp->local_hparams;
6527 	vgen_print_hparams(hp);
6528 
6529 	cmn_err(CE_CONT, "Peer handshake params:\n");
6530 	hp = &ldcp->peer_hparams;
6531 	vgen_print_hparams(hp);
6532 }
6533 
6534 /*
6535  * Send received packets up the stack.
6536  */
6537 static void
6538 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
6539 {
6540 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6541 
6542 	if (ldcp->rcv_thread != NULL) {
6543 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
6544 		mutex_exit(&ldcp->rxlock);
6545 	} else {
6546 		ASSERT(MUTEX_HELD(&ldcp->cblock));
6547 		mutex_exit(&ldcp->cblock);
6548 	}
6549 
6550 	vrx_cb(ldcp->portp->vhp, bp);
6551 
6552 	if (ldcp->rcv_thread != NULL) {
6553 		mutex_enter(&ldcp->rxlock);
6554 	} else {
6555 		mutex_enter(&ldcp->cblock);
6556 	}
6557 }
6558 
6559 /*
6560  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
6561  * This thread is woken up by the LDC interrupt handler to process
6562  * LDC packets and receive data.
6563  */
6564 static void
6565 vgen_ldc_rcv_worker(void *arg)
6566 {
6567 	callb_cpr_t	cprinfo;
6568 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6569 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6570 
6571 	DBG1(vgenp, ldcp, "enter\n");
6572 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
6573 	    "vnet_rcv_thread");
6574 	mutex_enter(&ldcp->rcv_thr_lock);
6575 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
6576 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
6577 
6578 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
6579 		/*
6580 		 * Wait until the data is received or a stop
6581 		 * request is received.
6582 		 */
6583 		while (!(ldcp->rcv_thr_flags &
6584 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
6585 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6586 		}
6587 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
6588 
6589 		/*
6590 		 * First process the stop request.
6591 		 */
6592 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
6593 			DBG2(vgenp, ldcp, "stopped\n");
6594 			break;
6595 		}
6596 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
6597 		mutex_exit(&ldcp->rcv_thr_lock);
6598 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
6599 		vgen_handle_evt_read(ldcp);
6600 		mutex_enter(&ldcp->rcv_thr_lock);
6601 	}
6602 
6603 	/*
6604 	 * Update the run status and wakeup the thread that
6605 	 * has sent the stop request.
6606 	 */
6607 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
6608 	cv_signal(&ldcp->rcv_thr_cv);
6609 	CALLB_CPR_EXIT(&cprinfo);
6610 	thread_exit();
6611 	DBG1(vgenp, ldcp, "exit\n");
6612 }
6613 
6614 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
6615 static void
6616 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
6617 {
6618 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6619 
6620 	DBG1(vgenp, ldcp, "enter\n");
6621 	/*
6622 	 * Send a stop request by setting the stop flag and
6623 	 * wait until the receive thread stops.
6624 	 */
6625 	mutex_enter(&ldcp->rcv_thr_lock);
6626 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6627 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
6628 		cv_signal(&ldcp->rcv_thr_cv);
6629 		DBG2(vgenp, ldcp, "waiting...");
6630 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6631 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6632 		}
6633 	}
6634 	mutex_exit(&ldcp->rcv_thr_lock);
6635 	ldcp->rcv_thread = NULL;
6636 	DBG1(vgenp, ldcp, "exit\n");
6637 }
6638 
6639 /*
6640  * vgen_dds_rx -- post DDS messages to vnet.
6641  */
6642 static int
6643 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6644 {
6645 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
6646 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6647 
6648 	if (dmsg->dds_class != DDS_VNET_NIU) {
6649 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
6650 		return (EBADMSG);
6651 	}
6652 	vnet_dds_rx(vgenp->vnetp, dmsg);
6653 	return (0);
6654 }
6655 
6656 /*
6657  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
6658  */
6659 int
6660 vgen_dds_tx(void *arg, void *msg)
6661 {
6662 	vgen_t *vgenp = arg;
6663 	vio_dds_msg_t *dmsg = msg;
6664 	vgen_portlist_t *plistp = &vgenp->vgenports;
6665 	vgen_ldc_t *ldcp;
6666 	vgen_ldclist_t *ldclp;
6667 	int rv = EIO;
6668 
6669 
6670 	READ_ENTER(&plistp->rwlock);
6671 	ldclp = &(vgenp->vsw_portp->ldclist);
6672 	READ_ENTER(&ldclp->rwlock);
6673 	ldcp = ldclp->headp;
6674 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
6675 		goto vgen_dsend_exit;
6676 	}
6677 
6678 	dmsg->tag.vio_sid = ldcp->local_sid;
6679 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
6680 	if (rv != VGEN_SUCCESS) {
6681 		rv = EIO;
6682 	} else {
6683 		rv = 0;
6684 	}
6685 
6686 vgen_dsend_exit:
6687 	RW_EXIT(&ldclp->rwlock);
6688 	RW_EXIT(&plistp->rwlock);
6689 	return (rv);
6690 
6691 }
6692 
6693 #if DEBUG
6694 
6695 /*
6696  * Print debug messages - set to 0xf to enable all msgs
6697  */
6698 static void
6699 debug_printf(const char *fname, vgen_t *vgenp,
6700     vgen_ldc_t *ldcp, const char *fmt, ...)
6701 {
6702 	char    buf[256];
6703 	char    *bufp = buf;
6704 	va_list ap;
6705 
6706 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
6707 		(void) sprintf(bufp, "vnet%d:",
6708 		    ((vnet_t *)(vgenp->vnetp))->instance);
6709 		bufp += strlen(bufp);
6710 	}
6711 	if (ldcp != NULL) {
6712 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
6713 		bufp += strlen(bufp);
6714 	}
6715 	(void) sprintf(bufp, "%s: ", fname);
6716 	bufp += strlen(bufp);
6717 
6718 	va_start(ap, fmt);
6719 	(void) vsprintf(bufp, fmt, ap);
6720 	va_end(ap);
6721 
6722 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
6723 	    (vgendbg_ldcid == ldcp->ldc_id)) {
6724 		cmn_err(CE_CONT, "%s\n", buf);
6725 	}
6726 }
6727 #endif
6728