xref: /titanic_41/usr/src/uts/sun4v/io/vnet_gen.c (revision 7c500f1b365962a44422512b5c00083faa05a79e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 int vgen_init_mdeg(void *arg);
77 void vgen_uninit(void *arg);
78 int vgen_dds_tx(void *arg, void *dmsg);
79 void vgen_mod_init(void);
80 int vgen_mod_cleanup(void);
81 void vgen_mod_fini(void);
82 int vgen_enable_intr(void *arg);
83 int vgen_disable_intr(void *arg);
84 mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
85 static int vgen_start(void *arg);
86 static void vgen_stop(void *arg);
87 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
88 static int vgen_multicst(void *arg, boolean_t add,
89 	const uint8_t *mca);
90 static int vgen_promisc(void *arg, boolean_t on);
91 static int vgen_unicst(void *arg, const uint8_t *mca);
92 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
93 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
94 #ifdef	VNET_IOC_DEBUG
95 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
96 #endif
97 
98 /* vgen internal functions */
99 static int vgen_read_mdprops(vgen_t *vgenp);
100 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
101 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
102 	mde_cookie_t node);
103 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
104 	uint32_t *mtu);
105 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
106 	boolean_t *pls);
107 static void vgen_detach_ports(vgen_t *vgenp);
108 static void vgen_port_detach(vgen_port_t *portp);
109 static void vgen_port_list_insert(vgen_port_t *portp);
110 static void vgen_port_list_remove(vgen_port_t *portp);
111 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
112 	int port_num);
113 static int vgen_mdeg_reg(vgen_t *vgenp);
114 static void vgen_mdeg_unreg(vgen_t *vgenp);
115 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
116 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
117 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
118 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
119 	mde_cookie_t mdex);
120 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
121 static int vgen_port_attach(vgen_port_t *portp);
122 static void vgen_port_detach_mdeg(vgen_port_t *portp);
123 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
124 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
125 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
126 static void vgen_port_reset(vgen_port_t *portp);
127 static void vgen_reset_vsw_port(vgen_t *vgenp);
128 static void vgen_ldc_reset(vgen_ldc_t *ldcp);
129 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
130 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
131 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
132 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
133 static void vgen_init_ports(vgen_t *vgenp);
134 static void vgen_port_init(vgen_port_t *portp);
135 static void vgen_uninit_ports(vgen_t *vgenp);
136 static void vgen_port_uninit(vgen_port_t *portp);
137 static void vgen_init_ldcs(vgen_port_t *portp);
138 static void vgen_uninit_ldcs(vgen_port_t *portp);
139 static int vgen_ldc_init(vgen_ldc_t *ldcp);
140 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
141 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
142 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
143 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
144 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
145 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
146 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
147 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
148 static int vgen_ldcsend(void *arg, mblk_t *mp);
149 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
150 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
151 static void vgen_reclaim(vgen_ldc_t *ldcp);
152 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
153 static int vgen_num_txpending(vgen_ldc_t *ldcp);
154 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
155 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
156 static void vgen_ldc_watchdog(void *arg);
157 static mblk_t *vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup);
158 
159 /* vgen handshake functions */
160 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
161 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
162 	boolean_t caller_holds_lock);
163 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
164 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
165 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
166 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
167 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
168 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
169 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
170 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
171 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
172 static void vgen_handshake(vgen_ldc_t *ldcp);
173 static int vgen_handshake_done(vgen_ldc_t *ldcp);
174 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
175 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
176 	vio_msg_tag_t *tagp);
177 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
179 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
182 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
183 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
184 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
185 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
186 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
187 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
188 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
189 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
190 	uint32_t start, int32_t end, uint8_t pstate);
191 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
192 	uint32_t msglen);
193 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
194 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
195 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
196 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
197 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
198 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
199 static void vgen_hwatchdog(void *arg);
200 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
201 static void vgen_print_hparams(vgen_hparams_t *hp);
202 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
203 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
204 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
205 static void vgen_ldc_rcv_worker(void *arg);
206 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
207 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt);
208 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
209 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
210 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
211 
212 /* VLAN routines */
213 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
214 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
215 	uint16_t *nvidsp, uint16_t *default_idp);
216 static void vgen_vlan_create_hash(vgen_port_t *portp);
217 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
218 static void vgen_vlan_add_ids(vgen_port_t *portp);
219 static void vgen_vlan_remove_ids(vgen_port_t *portp);
220 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
221 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
222 	uint16_t *vidp);
223 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
224 	boolean_t is_tagged, uint16_t vid);
225 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
226 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
227 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
228 
229 /* externs */
230 extern void vnet_dds_rx(void *arg, void *dmsg);
231 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
232 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
233 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
234 
235 /*
236  * The handshake process consists of 5 phases defined below, with VH_PHASE0
237  * being the pre-handshake phase and VH_DONE is the phase to indicate
238  * successful completion of all phases.
239  * Each phase may have one to several handshake states which are required
240  * to complete successfully to move to the next phase.
241  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
242  * more details.
243  */
244 /* handshake phases */
245 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
246 
247 /* handshake states */
248 enum {
249 
250 	VER_INFO_SENT	=	0x1,
251 	VER_ACK_RCVD	=	0x2,
252 	VER_INFO_RCVD	=	0x4,
253 	VER_ACK_SENT	=	0x8,
254 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
255 
256 	ATTR_INFO_SENT	=	0x10,
257 	ATTR_ACK_RCVD	=	0x20,
258 	ATTR_INFO_RCVD	=	0x40,
259 	ATTR_ACK_SENT	=	0x80,
260 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
261 
262 	DRING_INFO_SENT	=	0x100,
263 	DRING_ACK_RCVD	=	0x200,
264 	DRING_INFO_RCVD	=	0x400,
265 	DRING_ACK_SENT	=	0x800,
266 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
267 
268 	RDX_INFO_SENT	=	0x1000,
269 	RDX_ACK_RCVD	=	0x2000,
270 	RDX_INFO_RCVD	=	0x4000,
271 	RDX_ACK_SENT	=	0x8000,
272 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
273 
274 };
275 
276 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
277 
278 #define	LDC_LOCK(ldcp)	\
279 				mutex_enter(&((ldcp)->cblock));\
280 				mutex_enter(&((ldcp)->rxlock));\
281 				mutex_enter(&((ldcp)->wrlock));\
282 				mutex_enter(&((ldcp)->txlock));\
283 				mutex_enter(&((ldcp)->tclock));
284 #define	LDC_UNLOCK(ldcp)	\
285 				mutex_exit(&((ldcp)->tclock));\
286 				mutex_exit(&((ldcp)->txlock));\
287 				mutex_exit(&((ldcp)->wrlock));\
288 				mutex_exit(&((ldcp)->rxlock));\
289 				mutex_exit(&((ldcp)->cblock));
290 
291 #define	VGEN_VER_EQ(ldcp, major, minor)	\
292 	((ldcp)->local_hparams.ver_major == (major) &&	\
293 	    (ldcp)->local_hparams.ver_minor == (minor))
294 
295 #define	VGEN_VER_LT(ldcp, major, minor)	\
296 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
297 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
298 	    (ldcp)->local_hparams.ver_minor < (minor)))
299 
300 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
301 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
302 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
303 	    (ldcp)->local_hparams.ver_minor >= (minor)))
304 
305 static struct ether_addr etherbroadcastaddr = {
306 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
307 };
308 /*
309  * MIB II broadcast/multicast packets
310  */
311 #define	IS_BROADCAST(ehp) \
312 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
313 #define	IS_MULTICAST(ehp) \
314 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
315 
316 /*
317  * Property names
318  */
319 static char macaddr_propname[] = "mac-address";
320 static char rmacaddr_propname[] = "remote-mac-address";
321 static char channel_propname[] = "channel-endpoint";
322 static char reg_propname[] = "reg";
323 static char port_propname[] = "port";
324 static char swport_propname[] = "switch-port";
325 static char id_propname[] = "id";
326 static char vdev_propname[] = "virtual-device";
327 static char vnet_propname[] = "network";
328 static char pri_types_propname[] = "priority-ether-types";
329 static char vgen_pvid_propname[] = "port-vlan-id";
330 static char vgen_vid_propname[] = "vlan-id";
331 static char vgen_dvid_propname[] = "default-vlan-id";
332 static char port_pvid_propname[] = "remote-port-vlan-id";
333 static char port_vid_propname[] = "remote-vlan-id";
334 static char vgen_mtu_propname[] = "mtu";
335 static char vgen_linkprop_propname[] = "linkprop";
336 
337 /*
338  * VIO Protocol Version Info:
339  *
340  * The version specified below represents the version of protocol currently
341  * supported in the driver. It means the driver can negotiate with peers with
342  * versions <= this version. Here is a summary of the feature(s) that are
343  * supported at each version of the protocol:
344  *
345  * 1.0			Basic VIO protocol.
346  * 1.1			vDisk protocol update (no virtual network update).
347  * 1.2			Support for priority frames (priority-ether-types).
348  * 1.3			VLAN and HybridIO support.
349  * 1.4			Jumbo Frame support.
350  * 1.5			Link State Notification support with optional support
351  * 			for Physical Link information.
352  */
353 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 5} };
354 
355 /* Tunables */
356 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
357 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
358 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
359 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
360 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
361 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
362 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
363 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
364 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
365 
366 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
367 
368 static vio_mblk_pool_t	*vgen_rx_poolp = NULL;
369 static krwlock_t	vgen_rw;
370 
371 /*
372  * max # of packets accumulated prior to sending them up. It is best
373  * to keep this at 60% of the number of recieve buffers.
374  */
375 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
376 
377 /*
378  * Internal tunables for receive buffer pools, that is,  the size and number of
379  * mblks for each pool. At least 3 sizes must be specified if these are used.
380  * The sizes must be specified in increasing order. Non-zero value of the first
381  * size will be used as a hint to use these values instead of the algorithm
382  * that determines the sizes based on MTU.
383  */
384 uint32_t vgen_rbufsz1 = 0;
385 uint32_t vgen_rbufsz2 = 0;
386 uint32_t vgen_rbufsz3 = 0;
387 uint32_t vgen_rbufsz4 = 0;
388 
389 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
390 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
391 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
392 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
393 
394 /*
395  * In the absence of "priority-ether-types" property in MD, the following
396  * internal tunable can be set to specify a single priority ethertype.
397  */
398 uint64_t vgen_pri_eth_type = 0;
399 
400 /*
401  * Number of transmit priority buffers that are preallocated per device.
402  * This number is chosen to be a small value to throttle transmission
403  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
404  */
405 uint32_t vgen_pri_tx_nmblks = 64;
406 
407 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
408 
409 #ifdef DEBUG
410 /* flags to simulate error conditions for debugging */
411 int vgen_trigger_txtimeout = 0;
412 int vgen_trigger_rxlost = 0;
413 #endif
414 
415 /*
416  * Matching criteria passed to the MDEG to register interest
417  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
418  * by their 'name' and 'cfg-handle' properties.
419  */
420 static md_prop_match_t vdev_prop_match[] = {
421 	{ MDET_PROP_STR,    "name"   },
422 	{ MDET_PROP_VAL,    "cfg-handle" },
423 	{ MDET_LIST_END,    NULL    }
424 };
425 
426 static mdeg_node_match_t vdev_match = { "virtual-device",
427 						vdev_prop_match };
428 
429 /* MD update matching structure */
430 static md_prop_match_t	vport_prop_match[] = {
431 	{ MDET_PROP_VAL,	"id" },
432 	{ MDET_LIST_END,	NULL }
433 };
434 
435 static mdeg_node_match_t vport_match = { "virtual-device-port",
436 					vport_prop_match };
437 
438 /* template for matching a particular vnet instance */
439 static mdeg_prop_spec_t vgen_prop_template[] = {
440 	{ MDET_PROP_STR,	"name",		"network" },
441 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
442 	{ MDET_LIST_END,	NULL,		NULL }
443 };
444 
445 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
446 
447 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
448 
449 #ifdef	VNET_IOC_DEBUG
450 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
451 #else
452 #define	VGEN_M_CALLBACK_FLAGS	(0)
453 #endif
454 
455 static mac_callbacks_t vgen_m_callbacks = {
456 	VGEN_M_CALLBACK_FLAGS,
457 	vgen_stat,
458 	vgen_start,
459 	vgen_stop,
460 	vgen_promisc,
461 	vgen_multicst,
462 	vgen_unicst,
463 	vgen_tx,
464 	vgen_ioctl,
465 	NULL,
466 	NULL
467 };
468 
469 /* externs */
470 extern pri_t	maxclsyspri;
471 extern proc_t	p0;
472 extern uint32_t vnet_ntxds;
473 extern uint32_t vnet_ldcwd_interval;
474 extern uint32_t vnet_ldcwd_txtimeout;
475 extern uint32_t vnet_ldc_mtu;
476 extern uint32_t vnet_nrbufs;
477 extern uint32_t	vnet_ethermtu;
478 extern uint16_t	vnet_default_vlan_id;
479 extern boolean_t vnet_jumbo_rxpools;
480 
481 #ifdef DEBUG
482 
483 extern int vnet_dbglevel;
484 static void debug_printf(const char *fname, vgen_t *vgenp,
485 	vgen_ldc_t *ldcp, const char *fmt, ...);
486 
487 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
488 int vgendbg_ldcid = -1;
489 
490 /* simulate handshake error conditions for debug */
491 uint32_t vgen_hdbg;
492 #define	HDBG_VERSION	0x1
493 #define	HDBG_TIMEOUT	0x2
494 #define	HDBG_BAD_SID	0x4
495 #define	HDBG_OUT_STATE	0x8
496 
497 #endif
498 
499 /*
500  * vgen_init() is called by an instance of vnet driver to initialize the
501  * corresponding generic proxy transport layer. The arguments passed by vnet
502  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
503  * the mac address of the vnet device, and a pointer to vgen_t is passed
504  * back as a handle to vnet.
505  */
506 int
507 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
508     const uint8_t *macaddr, void **vgenhdl)
509 {
510 	vgen_t *vgenp;
511 	int instance;
512 	int rv;
513 
514 	if ((vnetp == NULL) || (vnetdip == NULL))
515 		return (DDI_FAILURE);
516 
517 	instance = ddi_get_instance(vnetdip);
518 
519 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
520 
521 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
522 
523 	vgenp->vnetp = vnetp;
524 	vgenp->instance = instance;
525 	vgenp->regprop = regprop;
526 	vgenp->vnetdip = vnetdip;
527 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
528 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
529 
530 	/* allocate multicast table */
531 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
532 	    sizeof (struct ether_addr), KM_SLEEP);
533 	vgenp->mccount = 0;
534 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
535 
536 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
537 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
538 
539 	rv = vgen_read_mdprops(vgenp);
540 	if (rv != 0) {
541 		goto vgen_init_fail;
542 	}
543 	*vgenhdl = (void *)vgenp;
544 
545 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
546 	return (DDI_SUCCESS);
547 
548 vgen_init_fail:
549 	rw_destroy(&vgenp->vgenports.rwlock);
550 	mutex_destroy(&vgenp->lock);
551 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
552 	    sizeof (struct ether_addr));
553 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
554 		kmem_free(vgenp->pri_types,
555 		    sizeof (uint16_t) * vgenp->pri_num_types);
556 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
557 	}
558 	KMEM_FREE(vgenp);
559 	return (DDI_FAILURE);
560 }
561 
562 int
563 vgen_init_mdeg(void *arg)
564 {
565 	vgen_t	*vgenp = (vgen_t *)arg;
566 
567 	/* register with MD event generator */
568 	return (vgen_mdeg_reg(vgenp));
569 }
570 
571 /*
572  * Called by vnet to undo the initializations done by vgen_init().
573  * The handle provided by generic transport during vgen_init() is the argument.
574  */
575 void
576 vgen_uninit(void *arg)
577 {
578 	vgen_t		*vgenp = (vgen_t *)arg;
579 	vio_mblk_pool_t	*rp;
580 	vio_mblk_pool_t	*nrp;
581 
582 	if (vgenp == NULL) {
583 		return;
584 	}
585 
586 	DBG1(vgenp, NULL, "enter\n");
587 
588 	/* unregister with MD event generator */
589 	vgen_mdeg_unreg(vgenp);
590 
591 	mutex_enter(&vgenp->lock);
592 
593 	/* detach all ports from the device */
594 	vgen_detach_ports(vgenp);
595 
596 	/*
597 	 * free any pending rx mblk pools,
598 	 * that couldn't be freed previously during channel detach.
599 	 */
600 	rp = vgenp->rmp;
601 	while (rp != NULL) {
602 		nrp = vgenp->rmp = rp->nextp;
603 		if (vio_destroy_mblks(rp)) {
604 			WRITE_ENTER(&vgen_rw);
605 			rp->nextp = vgen_rx_poolp;
606 			vgen_rx_poolp = rp;
607 			RW_EXIT(&vgen_rw);
608 		}
609 		rp = nrp;
610 	}
611 
612 	/* free multicast table */
613 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
614 
615 	/* free pri_types table */
616 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
617 		kmem_free(vgenp->pri_types,
618 		    sizeof (uint16_t) * vgenp->pri_num_types);
619 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
620 	}
621 
622 	mutex_exit(&vgenp->lock);
623 
624 	rw_destroy(&vgenp->vgenports.rwlock);
625 	mutex_destroy(&vgenp->lock);
626 
627 	DBG1(vgenp, NULL, "exit\n");
628 	KMEM_FREE(vgenp);
629 }
630 
631 /*
632  * module specific initialization common to all instances of vnet/vgen.
633  */
634 void
635 vgen_mod_init(void)
636 {
637 	rw_init(&vgen_rw, NULL, RW_DRIVER, NULL);
638 }
639 
640 /*
641  * module specific cleanup common to all instances of vnet/vgen.
642  */
643 int
644 vgen_mod_cleanup(void)
645 {
646 	vio_mblk_pool_t	*poolp, *npoolp;
647 
648 	/*
649 	 * If any rx mblk pools are still in use, return
650 	 * error and stop the module from unloading.
651 	 */
652 	WRITE_ENTER(&vgen_rw);
653 	poolp = vgen_rx_poolp;
654 	while (poolp != NULL) {
655 		npoolp = vgen_rx_poolp = poolp->nextp;
656 		if (vio_destroy_mblks(poolp) != 0) {
657 			vgen_rx_poolp = poolp;
658 			RW_EXIT(&vgen_rw);
659 			return (EBUSY);
660 		}
661 		poolp = npoolp;
662 	}
663 	RW_EXIT(&vgen_rw);
664 
665 	return (0);
666 }
667 
668 /*
669  * module specific uninitialization common to all instances of vnet/vgen.
670  */
671 void
672 vgen_mod_fini(void)
673 {
674 	rw_destroy(&vgen_rw);
675 }
676 
677 /* enable transmit/receive for the device */
678 int
679 vgen_start(void *arg)
680 {
681 	vgen_port_t	*portp = (vgen_port_t *)arg;
682 	vgen_t		*vgenp = portp->vgenp;
683 
684 	DBG1(vgenp, NULL, "enter\n");
685 	mutex_enter(&portp->lock);
686 	vgen_port_init(portp);
687 	portp->flags |= VGEN_STARTED;
688 	mutex_exit(&portp->lock);
689 	DBG1(vgenp, NULL, "exit\n");
690 
691 	return (DDI_SUCCESS);
692 }
693 
694 /* stop transmit/receive */
695 void
696 vgen_stop(void *arg)
697 {
698 	vgen_port_t	*portp = (vgen_port_t *)arg;
699 	vgen_t		*vgenp = portp->vgenp;
700 
701 	DBG1(vgenp, NULL, "enter\n");
702 
703 	mutex_enter(&portp->lock);
704 	if (portp->flags & VGEN_STARTED) {
705 		vgen_port_uninit(portp);
706 		portp->flags &= ~(VGEN_STARTED);
707 	}
708 	mutex_exit(&portp->lock);
709 	DBG1(vgenp, NULL, "exit\n");
710 
711 }
712 
713 /* vgen transmit function */
714 static mblk_t *
715 vgen_tx(void *arg, mblk_t *mp)
716 {
717 	int i;
718 	vgen_port_t *portp;
719 	int status = VGEN_FAILURE;
720 
721 	portp = (vgen_port_t *)arg;
722 	/*
723 	 * Retry so that we avoid reporting a failure
724 	 * to the upper layer. Returning a failure may cause the
725 	 * upper layer to go into single threaded mode there by
726 	 * causing performance degradation, especially for a large
727 	 * number of connections.
728 	 */
729 	for (i = 0; i < vgen_tx_retries; ) {
730 		status = vgen_portsend(portp, mp);
731 		if (status == VGEN_SUCCESS) {
732 			break;
733 		}
734 		if (++i < vgen_tx_retries)
735 			delay(drv_usectohz(vgen_tx_delay));
736 	}
737 	if (status != VGEN_SUCCESS) {
738 		/* failure */
739 		return (mp);
740 	}
741 	/* success */
742 	return (NULL);
743 }
744 
745 /*
746  * This function provides any necessary tagging/untagging of the frames
747  * that are being transmitted over the port. It first verifies the vlan
748  * membership of the destination(port) and drops the packet if the
749  * destination doesn't belong to the given vlan.
750  *
751  * Arguments:
752  *   portp:     port over which the frames should be transmitted
753  *   mp:        frame to be transmitted
754  *   is_tagged:
755  *              B_TRUE: indicates frame header contains the vlan tag already.
756  *              B_FALSE: indicates frame is untagged.
757  *   vid:       vlan in which the frame should be transmitted.
758  *
759  * Returns:
760  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
761  *              Failure: NULL
762  */
763 static mblk_t *
764 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
765 	uint16_t vid)
766 {
767 	vgen_t				*vgenp;
768 	boolean_t			dst_tagged;
769 	int				rv;
770 
771 	vgenp = portp->vgenp;
772 
773 	/*
774 	 * If the packet is going to a vnet:
775 	 *   Check if the destination vnet is in the same vlan.
776 	 *   Check the frame header if tag or untag is needed.
777 	 *
778 	 * We do not check the above conditions if the packet is going to vsw:
779 	 *   vsw must be present implicitly in all the vlans that a vnet device
780 	 *   is configured into; even if vsw itself is not assigned to those
781 	 *   vlans as an interface. For instance, the packet might be destined
782 	 *   to another vnet(indirectly through vsw) or to an external host
783 	 *   which is in the same vlan as this vnet and vsw itself may not be
784 	 *   present in that vlan. Similarly packets going to vsw must be
785 	 *   always tagged(unless in the default-vlan) if not already tagged,
786 	 *   as we do not know the final destination. This is needed because
787 	 *   vsw must always invoke its switching function only after tagging
788 	 *   the packet; otherwise after switching function determines the
789 	 *   destination we cannot figure out if the destination belongs to the
790 	 *   the same vlan that the frame originated from and if it needs tag/
791 	 *   untag. Note that vsw will tag the packet itself when it receives
792 	 *   it over the channel from a client if needed. However, that is
793 	 *   needed only in the case of vlan unaware clients such as obp or
794 	 *   earlier versions of vnet.
795 	 *
796 	 */
797 	if (portp != vgenp->vsw_portp) {
798 		/*
799 		 * Packet going to a vnet. Check if the destination vnet is in
800 		 * the same vlan. Then check the frame header if tag/untag is
801 		 * needed.
802 		 */
803 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
804 		if (rv == B_FALSE) {
805 			/* drop the packet */
806 			freemsg(mp);
807 			return (NULL);
808 		}
809 
810 		/* is the destination tagged or untagged in this vlan? */
811 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
812 		    (dst_tagged = B_TRUE);
813 
814 		if (is_tagged == dst_tagged) {
815 			/* no tagging/untagging needed */
816 			return (mp);
817 		}
818 
819 		if (is_tagged == B_TRUE) {
820 			/* frame is tagged; destination needs untagged */
821 			mp = vnet_vlan_remove_tag(mp);
822 			return (mp);
823 		}
824 
825 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
826 	}
827 
828 	/*
829 	 * Packet going to a vnet needs tagging.
830 	 * OR
831 	 * If the packet is going to vsw, then it must be tagged in all cases:
832 	 * unknown unicast, broadcast/multicast or to vsw interface.
833 	 */
834 
835 	if (is_tagged == B_FALSE) {
836 		mp = vnet_vlan_insert_tag(mp, vid);
837 	}
838 
839 	return (mp);
840 }
841 
842 /* transmit packets over the given port */
843 static int
844 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
845 {
846 	vgen_ldclist_t		*ldclp;
847 	vgen_ldc_t		*ldcp;
848 	int			status;
849 	int			rv = VGEN_SUCCESS;
850 	vgen_t			*vgenp = portp->vgenp;
851 	vnet_t			*vnetp = vgenp->vnetp;
852 	boolean_t		is_tagged;
853 	boolean_t		dec_refcnt = B_FALSE;
854 	uint16_t		vlan_id;
855 	struct ether_header	*ehp;
856 
857 	if (portp->use_vsw_port) {
858 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
859 		portp = portp->vgenp->vsw_portp;
860 		dec_refcnt = B_TRUE;
861 	}
862 	if (portp == NULL) {
863 		return (VGEN_FAILURE);
864 	}
865 
866 	/*
867 	 * Determine the vlan id that the frame belongs to.
868 	 */
869 	ehp = (struct ether_header *)mp->b_rptr;
870 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
871 
872 	if (vlan_id == vnetp->default_vlan_id) {
873 
874 		/* Frames in default vlan must be untagged */
875 		ASSERT(is_tagged == B_FALSE);
876 
877 		/*
878 		 * If the destination is a vnet-port verify it belongs to the
879 		 * default vlan; otherwise drop the packet. We do not need
880 		 * this check for vsw-port, as it should implicitly belong to
881 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
882 		 */
883 		if (portp != vgenp->vsw_portp &&
884 		    portp->pvid != vnetp->default_vlan_id) {
885 			freemsg(mp);
886 			goto portsend_ret;
887 		}
888 
889 	} else {	/* frame not in default-vlan */
890 
891 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
892 		if (mp == NULL) {
893 			goto portsend_ret;
894 		}
895 
896 	}
897 
898 	ldclp = &portp->ldclist;
899 	READ_ENTER(&ldclp->rwlock);
900 	/*
901 	 * NOTE: for now, we will assume we have a single channel.
902 	 */
903 	if (ldclp->headp == NULL) {
904 		RW_EXIT(&ldclp->rwlock);
905 		rv = VGEN_FAILURE;
906 		goto portsend_ret;
907 	}
908 	ldcp = ldclp->headp;
909 
910 	status = ldcp->tx(ldcp, mp);
911 
912 	RW_EXIT(&ldclp->rwlock);
913 
914 	if (status != VGEN_TX_SUCCESS) {
915 		rv = VGEN_FAILURE;
916 	}
917 
918 portsend_ret:
919 	if (dec_refcnt == B_TRUE) {
920 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
921 	}
922 	return (rv);
923 }
924 
925 /*
926  * Wrapper function to transmit normal and/or priority frames over the channel.
927  */
928 static int
929 vgen_ldcsend(void *arg, mblk_t *mp)
930 {
931 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
932 	int			status;
933 	struct ether_header	*ehp;
934 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
935 	uint32_t		num_types;
936 	uint16_t		*types;
937 	int			i;
938 
939 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
940 
941 	num_types = vgenp->pri_num_types;
942 	types = vgenp->pri_types;
943 	ehp = (struct ether_header *)mp->b_rptr;
944 
945 	for (i = 0; i < num_types; i++) {
946 
947 		if (ehp->ether_type == types[i]) {
948 			/* priority frame, use pri tx function */
949 			vgen_ldcsend_pkt(ldcp, mp);
950 			return (VGEN_SUCCESS);
951 		}
952 
953 	}
954 
955 	status  = vgen_ldcsend_dring(ldcp, mp);
956 
957 	return (status);
958 }
959 
960 /*
961  * This functions handles ldc channel reset while in the context
962  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
963  */
964 static void
965 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
966 {
967 	ldc_status_t	istatus;
968 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
969 
970 	if (mutex_tryenter(&ldcp->cblock)) {
971 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
972 			DWARN(vgenp, ldcp, "ldc_status() error\n");
973 		} else {
974 			ldcp->ldc_status = istatus;
975 		}
976 		if (ldcp->ldc_status != LDC_UP) {
977 			vgen_handle_evt_reset(ldcp);
978 		}
979 		mutex_exit(&ldcp->cblock);
980 	}
981 }
982 
983 /*
984  * This function transmits the frame in the payload of a raw data
985  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
986  * send special frames with high priorities, without going through
987  * the normal data path which uses descriptor ring mechanism.
988  */
989 static void
990 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
991 {
992 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
993 	vio_raw_data_msg_t	*pkt;
994 	mblk_t			*bp;
995 	mblk_t			*nmp = NULL;
996 	caddr_t			dst;
997 	uint32_t		mblksz;
998 	uint32_t		size;
999 	uint32_t		nbytes;
1000 	int			rv;
1001 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1002 	vgen_stats_t		*statsp = &ldcp->stats;
1003 
1004 	/* drop the packet if ldc is not up or handshake is not done */
1005 	if (ldcp->ldc_status != LDC_UP) {
1006 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1007 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1008 		    ldcp->ldc_status);
1009 		goto send_pkt_exit;
1010 	}
1011 
1012 	if (ldcp->hphase != VH_DONE) {
1013 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1014 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1015 		    ldcp->hphase);
1016 		goto send_pkt_exit;
1017 	}
1018 
1019 	size = msgsize(mp);
1020 
1021 	/* frame size bigger than available payload len of raw data msg ? */
1022 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
1023 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1024 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1025 		goto send_pkt_exit;
1026 	}
1027 
1028 	if (size < ETHERMIN)
1029 		size = ETHERMIN;
1030 
1031 	/* alloc space for a raw data message */
1032 	nmp = vio_allocb(vgenp->pri_tx_vmp);
1033 	if (nmp == NULL) {
1034 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1035 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
1036 		goto send_pkt_exit;
1037 	}
1038 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
1039 
1040 	/* copy frame into the payload of raw data message */
1041 	dst = (caddr_t)pkt->data;
1042 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1043 		mblksz = MBLKL(bp);
1044 		bcopy(bp->b_rptr, dst, mblksz);
1045 		dst += mblksz;
1046 	}
1047 
1048 	/* setup the raw data msg */
1049 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
1050 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
1051 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
1052 	pkt->tag.vio_sid = ldcp->local_sid;
1053 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
1054 
1055 	/* send the msg over ldc */
1056 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
1057 	if (rv != VGEN_SUCCESS) {
1058 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1059 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
1060 		if (rv == ECONNRESET) {
1061 			vgen_ldcsend_process_reset(ldcp);
1062 		}
1063 		goto send_pkt_exit;
1064 	}
1065 
1066 	/* update stats */
1067 	(void) atomic_inc_64(&statsp->tx_pri_packets);
1068 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
1069 
1070 send_pkt_exit:
1071 	if (nmp != NULL)
1072 		freemsg(nmp);
1073 	freemsg(mp);
1074 }
1075 
1076 /*
1077  * This function transmits normal (non-priority) data frames over
1078  * the channel. It queues the frame into the transmit descriptor ring
1079  * and sends a VIO_DRING_DATA message if needed, to wake up the
1080  * peer to (re)start processing.
1081  */
1082 static int
1083 vgen_ldcsend_dring(void *arg, mblk_t *mp)
1084 {
1085 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
1086 	vgen_private_desc_t	*tbufp;
1087 	vgen_private_desc_t	*rtbufp;
1088 	vnet_public_desc_t	*rtxdp;
1089 	vgen_private_desc_t	*ntbufp;
1090 	vnet_public_desc_t	*txdp;
1091 	vio_dring_entry_hdr_t	*hdrp;
1092 	vgen_stats_t		*statsp;
1093 	struct ether_header	*ehp;
1094 	boolean_t		is_bcast = B_FALSE;
1095 	boolean_t		is_mcast = B_FALSE;
1096 	size_t			mblksz;
1097 	caddr_t			dst;
1098 	mblk_t			*bp;
1099 	size_t			size;
1100 	int			rv = 0;
1101 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1102 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1103 
1104 	statsp = &ldcp->stats;
1105 	size = msgsize(mp);
1106 
1107 	DBG1(vgenp, ldcp, "enter\n");
1108 
1109 	if (ldcp->ldc_status != LDC_UP) {
1110 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1111 		    ldcp->ldc_status);
1112 		/* retry ldc_up() if needed */
1113 #ifdef	VNET_IOC_DEBUG
1114 		if (ldcp->flags & CHANNEL_STARTED && !ldcp->link_down_forced) {
1115 #else
1116 		if (ldcp->flags & CHANNEL_STARTED) {
1117 #endif
1118 			(void) ldc_up(ldcp->ldc_handle);
1119 		}
1120 		goto send_dring_exit;
1121 	}
1122 
1123 	/* drop the packet if ldc is not up or handshake is not done */
1124 	if (ldcp->hphase != VH_DONE) {
1125 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1126 		    ldcp->hphase);
1127 		goto send_dring_exit;
1128 	}
1129 
1130 	if (size > (size_t)lp->mtu) {
1131 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1132 		goto send_dring_exit;
1133 	}
1134 	if (size < ETHERMIN)
1135 		size = ETHERMIN;
1136 
1137 	ehp = (struct ether_header *)mp->b_rptr;
1138 	is_bcast = IS_BROADCAST(ehp);
1139 	is_mcast = IS_MULTICAST(ehp);
1140 
1141 	mutex_enter(&ldcp->txlock);
1142 	/*
1143 	 * allocate a descriptor
1144 	 */
1145 	tbufp = ldcp->next_tbufp;
1146 	ntbufp = NEXTTBUF(ldcp, tbufp);
1147 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1148 
1149 		mutex_enter(&ldcp->tclock);
1150 		/* Try reclaiming now */
1151 		vgen_reclaim_dring(ldcp);
1152 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1153 
1154 		if (ntbufp == ldcp->cur_tbufp) {
1155 			/* Now we are really out of tbuf/txds */
1156 			ldcp->need_resched = B_TRUE;
1157 			mutex_exit(&ldcp->tclock);
1158 
1159 			statsp->tx_no_desc++;
1160 			mutex_exit(&ldcp->txlock);
1161 
1162 			return (VGEN_TX_NORESOURCES);
1163 		}
1164 		mutex_exit(&ldcp->tclock);
1165 	}
1166 	/* update next available tbuf in the ring and update tx index */
1167 	ldcp->next_tbufp = ntbufp;
1168 	INCR_TXI(ldcp->next_txi, ldcp);
1169 
1170 	/* Mark the buffer busy before releasing the lock */
1171 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1172 	mutex_exit(&ldcp->txlock);
1173 
1174 	/* copy data into pre-allocated transmit buffer */
1175 	dst = tbufp->datap + VNET_IPALIGN;
1176 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1177 		mblksz = MBLKL(bp);
1178 		bcopy(bp->b_rptr, dst, mblksz);
1179 		dst += mblksz;
1180 	}
1181 
1182 	tbufp->datalen = size;
1183 
1184 	/* initialize the corresponding public descriptor (txd) */
1185 	txdp = tbufp->descp;
1186 	hdrp = &txdp->hdr;
1187 	txdp->nbytes = size;
1188 	txdp->ncookies = tbufp->ncookies;
1189 	bcopy((tbufp->memcookie), (txdp->memcookie),
1190 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1191 
1192 	mutex_enter(&ldcp->wrlock);
1193 	/*
1194 	 * If the flags not set to BUSY, it implies that the clobber
1195 	 * was done while we were copying the data. In such case,
1196 	 * discard the packet and return.
1197 	 */
1198 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1199 		statsp->oerrors++;
1200 		mutex_exit(&ldcp->wrlock);
1201 		goto send_dring_exit;
1202 	}
1203 	hdrp->dstate = VIO_DESC_READY;
1204 
1205 	/* update stats */
1206 	statsp->opackets++;
1207 	statsp->obytes += size;
1208 	if (is_bcast)
1209 		statsp->brdcstxmt++;
1210 	else if (is_mcast)
1211 		statsp->multixmt++;
1212 
1213 	/* send dring datamsg to the peer */
1214 	if (ldcp->resched_peer) {
1215 
1216 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1217 		rtxdp = rtbufp->descp;
1218 
1219 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1220 
1221 			rv = vgen_send_dring_data(ldcp,
1222 			    (uint32_t)ldcp->resched_peer_txi, -1);
1223 			if (rv != 0) {
1224 				/* error: drop the packet */
1225 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1226 				    "failed: rv(%d) len(%d)\n",
1227 				    ldcp->ldc_id, rv, size);
1228 				statsp->oerrors++;
1229 			} else {
1230 				ldcp->resched_peer = B_FALSE;
1231 			}
1232 
1233 		}
1234 
1235 	}
1236 
1237 	mutex_exit(&ldcp->wrlock);
1238 
1239 send_dring_exit:
1240 	if (rv == ECONNRESET) {
1241 		vgen_ldcsend_process_reset(ldcp);
1242 	}
1243 	freemsg(mp);
1244 	DBG1(vgenp, ldcp, "exit\n");
1245 	return (VGEN_TX_SUCCESS);
1246 }
1247 
1248 /*
1249  * enable/disable a multicast address
1250  * note that the cblock of the ldc channel connected to the vsw is used for
1251  * synchronization of the mctab.
1252  */
1253 int
1254 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1255 {
1256 	vgen_t			*vgenp;
1257 	vnet_mcast_msg_t	mcastmsg;
1258 	vio_msg_tag_t		*tagp;
1259 	vgen_port_t		*portp;
1260 	vgen_ldc_t		*ldcp;
1261 	vgen_ldclist_t		*ldclp;
1262 	struct ether_addr	*addrp;
1263 	int			rv = DDI_FAILURE;
1264 	uint32_t		i;
1265 
1266 	portp = (vgen_port_t *)arg;
1267 	vgenp = portp->vgenp;
1268 
1269 	if (portp->is_vsw_port != B_TRUE) {
1270 		return (DDI_SUCCESS);
1271 	}
1272 
1273 	addrp = (struct ether_addr *)mca;
1274 	tagp = &mcastmsg.tag;
1275 	bzero(&mcastmsg, sizeof (mcastmsg));
1276 
1277 	ldclp = &portp->ldclist;
1278 
1279 	READ_ENTER(&ldclp->rwlock);
1280 
1281 	ldcp = ldclp->headp;
1282 	if (ldcp == NULL) {
1283 		RW_EXIT(&ldclp->rwlock);
1284 		return (DDI_FAILURE);
1285 	}
1286 
1287 	mutex_enter(&ldcp->cblock);
1288 
1289 	if (ldcp->hphase == VH_DONE) {
1290 		/*
1291 		 * If handshake is done, send a msg to vsw to add/remove
1292 		 * the multicast address. Otherwise, we just update this
1293 		 * mcast address in our table and the table will be sync'd
1294 		 * with vsw when handshake completes.
1295 		 */
1296 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1297 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1298 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1299 		tagp->vio_sid = ldcp->local_sid;
1300 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1301 		mcastmsg.set = add;
1302 		mcastmsg.count = 1;
1303 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1304 		    B_FALSE) != VGEN_SUCCESS) {
1305 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1306 			rv = DDI_FAILURE;
1307 			goto vgen_mcast_exit;
1308 		}
1309 	}
1310 
1311 	if (add) {
1312 
1313 		/* expand multicast table if necessary */
1314 		if (vgenp->mccount >= vgenp->mcsize) {
1315 			struct ether_addr	*newtab;
1316 			uint32_t		newsize;
1317 
1318 
1319 			newsize = vgenp->mcsize * 2;
1320 
1321 			newtab = kmem_zalloc(newsize *
1322 			    sizeof (struct ether_addr), KM_NOSLEEP);
1323 			if (newtab == NULL)
1324 				goto vgen_mcast_exit;
1325 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1326 			    sizeof (struct ether_addr));
1327 			kmem_free(vgenp->mctab,
1328 			    vgenp->mcsize * sizeof (struct ether_addr));
1329 
1330 			vgenp->mctab = newtab;
1331 			vgenp->mcsize = newsize;
1332 		}
1333 
1334 		/* add address to the table */
1335 		vgenp->mctab[vgenp->mccount++] = *addrp;
1336 
1337 	} else {
1338 
1339 		/* delete address from the table */
1340 		for (i = 0; i < vgenp->mccount; i++) {
1341 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1342 
1343 				/*
1344 				 * If there's more than one address in this
1345 				 * table, delete the unwanted one by moving
1346 				 * the last one in the list over top of it;
1347 				 * otherwise, just remove it.
1348 				 */
1349 				if (vgenp->mccount > 1) {
1350 					vgenp->mctab[i] =
1351 					    vgenp->mctab[vgenp->mccount-1];
1352 				}
1353 				vgenp->mccount--;
1354 				break;
1355 			}
1356 		}
1357 	}
1358 
1359 	rv = DDI_SUCCESS;
1360 
1361 vgen_mcast_exit:
1362 	mutex_exit(&ldcp->cblock);
1363 	RW_EXIT(&ldclp->rwlock);
1364 
1365 	return (rv);
1366 }
1367 
1368 /* set or clear promiscuous mode on the device */
1369 static int
1370 vgen_promisc(void *arg, boolean_t on)
1371 {
1372 	_NOTE(ARGUNUSED(arg, on))
1373 	return (DDI_SUCCESS);
1374 }
1375 
1376 /* set the unicast mac address of the device */
1377 static int
1378 vgen_unicst(void *arg, const uint8_t *mca)
1379 {
1380 	_NOTE(ARGUNUSED(arg, mca))
1381 	return (DDI_SUCCESS);
1382 }
1383 
1384 /* get device statistics */
1385 int
1386 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1387 {
1388 	vgen_port_t	*portp = (vgen_port_t *)arg;
1389 
1390 	*val = vgen_port_stat(portp, stat);
1391 
1392 	return (0);
1393 }
1394 
1395 /* vgen internal functions */
1396 /* detach all ports from the device */
1397 static void
1398 vgen_detach_ports(vgen_t *vgenp)
1399 {
1400 	vgen_port_t	*portp;
1401 	vgen_portlist_t	*plistp;
1402 
1403 	plistp = &(vgenp->vgenports);
1404 	WRITE_ENTER(&plistp->rwlock);
1405 	while ((portp = plistp->headp) != NULL) {
1406 		vgen_port_detach(portp);
1407 	}
1408 	RW_EXIT(&plistp->rwlock);
1409 }
1410 
1411 /*
1412  * detach the given port.
1413  */
1414 static void
1415 vgen_port_detach(vgen_port_t *portp)
1416 {
1417 	vgen_t		*vgenp;
1418 	vgen_ldclist_t	*ldclp;
1419 	int		port_num;
1420 
1421 	vgenp = portp->vgenp;
1422 	port_num = portp->port_num;
1423 
1424 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1425 
1426 	/*
1427 	 * If this port is connected to the vswitch, then
1428 	 * potentially there could be ports that may be using
1429 	 * this port to transmit packets. To address this do
1430 	 * the following:
1431 	 *	- First set vgenp->vsw_portp to NULL, so that
1432 	 *	  its not used after that.
1433 	 *	- Then wait for the refcnt to go down to 0.
1434 	 *	- Now we can safely detach this port.
1435 	 */
1436 	if (vgenp->vsw_portp == portp) {
1437 		vgenp->vsw_portp = NULL;
1438 		while (vgenp->vsw_port_refcnt > 0) {
1439 			delay(drv_usectohz(vgen_tx_delay));
1440 		}
1441 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1442 	}
1443 
1444 	if (portp->vhp != NULL) {
1445 		vio_net_resource_unreg(portp->vhp);
1446 		portp->vhp = NULL;
1447 	}
1448 
1449 	vgen_vlan_destroy_hash(portp);
1450 
1451 	/* remove it from port list */
1452 	vgen_port_list_remove(portp);
1453 
1454 	/* detach channels from this port */
1455 	ldclp = &portp->ldclist;
1456 	WRITE_ENTER(&ldclp->rwlock);
1457 	while (ldclp->headp) {
1458 		vgen_ldc_detach(ldclp->headp);
1459 	}
1460 	RW_EXIT(&ldclp->rwlock);
1461 	rw_destroy(&ldclp->rwlock);
1462 
1463 	if (portp->num_ldcs != 0) {
1464 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1465 		portp->num_ldcs = 0;
1466 	}
1467 
1468 	mutex_destroy(&portp->lock);
1469 	KMEM_FREE(portp);
1470 
1471 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1472 }
1473 
1474 /* add a port to port list */
1475 static void
1476 vgen_port_list_insert(vgen_port_t *portp)
1477 {
1478 	vgen_portlist_t *plistp;
1479 	vgen_t *vgenp;
1480 
1481 	vgenp = portp->vgenp;
1482 	plistp = &(vgenp->vgenports);
1483 
1484 	if (plistp->headp == NULL) {
1485 		plistp->headp = portp;
1486 	} else {
1487 		plistp->tailp->nextp = portp;
1488 	}
1489 	plistp->tailp = portp;
1490 	portp->nextp = NULL;
1491 }
1492 
1493 /* remove a port from port list */
1494 static void
1495 vgen_port_list_remove(vgen_port_t *portp)
1496 {
1497 	vgen_port_t *prevp;
1498 	vgen_port_t *nextp;
1499 	vgen_portlist_t *plistp;
1500 	vgen_t *vgenp;
1501 
1502 	vgenp = portp->vgenp;
1503 
1504 	plistp = &(vgenp->vgenports);
1505 
1506 	if (plistp->headp == NULL)
1507 		return;
1508 
1509 	if (portp == plistp->headp) {
1510 		plistp->headp = portp->nextp;
1511 		if (portp == plistp->tailp)
1512 			plistp->tailp = plistp->headp;
1513 	} else {
1514 		for (prevp = plistp->headp;
1515 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1516 		    prevp = nextp)
1517 			;
1518 		if (nextp == portp) {
1519 			prevp->nextp = portp->nextp;
1520 		}
1521 		if (portp == plistp->tailp)
1522 			plistp->tailp = prevp;
1523 	}
1524 }
1525 
1526 /* lookup a port in the list based on port_num */
1527 static vgen_port_t *
1528 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1529 {
1530 	vgen_port_t *portp = NULL;
1531 
1532 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1533 		if (portp->port_num == port_num) {
1534 			break;
1535 		}
1536 	}
1537 
1538 	return (portp);
1539 }
1540 
1541 /* enable ports for transmit/receive */
1542 static void
1543 vgen_init_ports(vgen_t *vgenp)
1544 {
1545 	vgen_port_t	*portp;
1546 	vgen_portlist_t	*plistp;
1547 
1548 	plistp = &(vgenp->vgenports);
1549 	READ_ENTER(&plistp->rwlock);
1550 
1551 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1552 		vgen_port_init(portp);
1553 	}
1554 
1555 	RW_EXIT(&plistp->rwlock);
1556 }
1557 
1558 static void
1559 vgen_port_init(vgen_port_t *portp)
1560 {
1561 	/* Add the port to the specified vlans */
1562 	vgen_vlan_add_ids(portp);
1563 
1564 	/* Bring up the channels of this port */
1565 	vgen_init_ldcs(portp);
1566 }
1567 
1568 /* disable transmit/receive on ports */
1569 static void
1570 vgen_uninit_ports(vgen_t *vgenp)
1571 {
1572 	vgen_port_t	*portp;
1573 	vgen_portlist_t	*plistp;
1574 
1575 	plistp = &(vgenp->vgenports);
1576 	READ_ENTER(&plistp->rwlock);
1577 
1578 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1579 		vgen_port_uninit(portp);
1580 	}
1581 
1582 	RW_EXIT(&plistp->rwlock);
1583 }
1584 
1585 static void
1586 vgen_port_uninit(vgen_port_t *portp)
1587 {
1588 	vgen_uninit_ldcs(portp);
1589 
1590 	/* remove the port from vlans it has been assigned to */
1591 	vgen_vlan_remove_ids(portp);
1592 }
1593 
1594 /*
1595  * Scan the machine description for this instance of vnet
1596  * and read its properties. Called only from vgen_init().
1597  * Returns: 0 on success, 1 on failure.
1598  */
1599 static int
1600 vgen_read_mdprops(vgen_t *vgenp)
1601 {
1602 	vnet_t		*vnetp = vgenp->vnetp;
1603 	md_t		*mdp = NULL;
1604 	mde_cookie_t	rootnode;
1605 	mde_cookie_t	*listp = NULL;
1606 	uint64_t	cfgh;
1607 	char		*name;
1608 	int		rv = 1;
1609 	int		num_nodes = 0;
1610 	int		num_devs = 0;
1611 	int		listsz = 0;
1612 	int		i;
1613 
1614 	if ((mdp = md_get_handle()) == NULL) {
1615 		return (rv);
1616 	}
1617 
1618 	num_nodes = md_node_count(mdp);
1619 	ASSERT(num_nodes > 0);
1620 
1621 	listsz = num_nodes * sizeof (mde_cookie_t);
1622 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1623 
1624 	rootnode = md_root_node(mdp);
1625 
1626 	/* search for all "virtual_device" nodes */
1627 	num_devs = md_scan_dag(mdp, rootnode,
1628 	    md_find_name(mdp, vdev_propname),
1629 	    md_find_name(mdp, "fwd"), listp);
1630 	if (num_devs <= 0) {
1631 		goto vgen_readmd_exit;
1632 	}
1633 
1634 	/*
1635 	 * Now loop through the list of virtual-devices looking for
1636 	 * devices with name "network" and for each such device compare
1637 	 * its instance with what we have from the 'reg' property to
1638 	 * find the right node in MD and then read all its properties.
1639 	 */
1640 	for (i = 0; i < num_devs; i++) {
1641 
1642 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1643 			goto vgen_readmd_exit;
1644 		}
1645 
1646 		/* is this a "network" device? */
1647 		if (strcmp(name, vnet_propname) != 0)
1648 			continue;
1649 
1650 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1651 			goto vgen_readmd_exit;
1652 		}
1653 
1654 		/* is this the required instance of vnet? */
1655 		if (vgenp->regprop != cfgh)
1656 			continue;
1657 
1658 		/*
1659 		 * Read the 'linkprop' property to know if this vnet
1660 		 * device should get physical link updates from vswitch.
1661 		 */
1662 		vgen_linkprop_read(vgenp, mdp, listp[i],
1663 		    &vnetp->pls_update);
1664 
1665 		/*
1666 		 * Read the mtu. Note that we set the mtu of vnet device within
1667 		 * this routine itself, after validating the range.
1668 		 */
1669 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1670 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1671 			vnetp->mtu = ETHERMTU;
1672 		}
1673 		vgenp->max_frame_size = vnetp->mtu +
1674 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1675 
1676 		/* read priority ether types */
1677 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1678 
1679 		/* read vlan id properties of this vnet instance */
1680 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1681 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1682 		    &vnetp->default_vlan_id);
1683 
1684 		rv = 0;
1685 		break;
1686 	}
1687 
1688 vgen_readmd_exit:
1689 
1690 	kmem_free(listp, listsz);
1691 	(void) md_fini_handle(mdp);
1692 	return (rv);
1693 }
1694 
1695 /*
1696  * Read vlan id properties of the given MD node.
1697  * Arguments:
1698  *   arg:          device argument(vnet device or a port)
1699  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1700  *   mdp:          machine description
1701  *   node:         md node cookie
1702  *
1703  * Returns:
1704  *   pvidp:        port-vlan-id of the node
1705  *   vidspp:       list of vlan-ids of the node
1706  *   nvidsp:       # of vlan-ids in the list
1707  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1708  */
1709 static void
1710 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1711 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1712 	uint16_t *default_idp)
1713 {
1714 	vgen_t		*vgenp;
1715 	vnet_t		*vnetp;
1716 	vgen_port_t	*portp;
1717 	char		*pvid_propname;
1718 	char		*vid_propname;
1719 	uint_t		nvids;
1720 	uint32_t	vids_size;
1721 	int		rv;
1722 	int		i;
1723 	uint64_t	*data;
1724 	uint64_t	val;
1725 	int		size;
1726 	int		inst;
1727 
1728 	if (type == VGEN_LOCAL) {
1729 
1730 		vgenp = (vgen_t *)arg;
1731 		vnetp = vgenp->vnetp;
1732 		pvid_propname = vgen_pvid_propname;
1733 		vid_propname = vgen_vid_propname;
1734 		inst = vnetp->instance;
1735 
1736 	} else if (type == VGEN_PEER) {
1737 
1738 		portp = (vgen_port_t *)arg;
1739 		vgenp = portp->vgenp;
1740 		vnetp = vgenp->vnetp;
1741 		pvid_propname = port_pvid_propname;
1742 		vid_propname = port_vid_propname;
1743 		inst = portp->port_num;
1744 
1745 	} else {
1746 		return;
1747 	}
1748 
1749 	if (type == VGEN_LOCAL && default_idp != NULL) {
1750 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1751 		if (rv != 0) {
1752 			DWARN(vgenp, NULL, "prop(%s) not found",
1753 			    vgen_dvid_propname);
1754 
1755 			*default_idp = vnet_default_vlan_id;
1756 		} else {
1757 			*default_idp = val & 0xFFF;
1758 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1759 			    inst, *default_idp);
1760 		}
1761 	}
1762 
1763 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1764 	if (rv != 0) {
1765 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1766 		*pvidp = vnet_default_vlan_id;
1767 	} else {
1768 
1769 		*pvidp = val & 0xFFF;
1770 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1771 		    pvid_propname, inst, *pvidp);
1772 	}
1773 
1774 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1775 	    &size);
1776 	if (rv != 0) {
1777 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1778 		size = 0;
1779 	} else {
1780 		size /= sizeof (uint64_t);
1781 	}
1782 	nvids = size;
1783 
1784 	if (nvids != 0) {
1785 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1786 		vids_size = sizeof (uint16_t) * nvids;
1787 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1788 		for (i = 0; i < nvids; i++) {
1789 			(*vidspp)[i] = data[i] & 0xFFFF;
1790 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1791 		}
1792 		DBG2(vgenp, NULL, "\n");
1793 	}
1794 
1795 	*nvidsp = nvids;
1796 }
1797 
1798 /*
1799  * Create a vlan id hash table for the given port.
1800  */
1801 static void
1802 vgen_vlan_create_hash(vgen_port_t *portp)
1803 {
1804 	char		hashname[MAXNAMELEN];
1805 
1806 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1807 	    portp->port_num);
1808 
1809 	portp->vlan_nchains = vgen_vlan_nchains;
1810 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1811 	    portp->vlan_nchains, mod_hash_null_valdtor);
1812 }
1813 
1814 /*
1815  * Destroy the vlan id hash table in the given port.
1816  */
1817 static void
1818 vgen_vlan_destroy_hash(vgen_port_t *portp)
1819 {
1820 	if (portp->vlan_hashp != NULL) {
1821 		mod_hash_destroy_hash(portp->vlan_hashp);
1822 		portp->vlan_hashp = NULL;
1823 		portp->vlan_nchains = 0;
1824 	}
1825 }
1826 
1827 /*
1828  * Add a port to the vlans specified in its port properites.
1829  */
1830 static void
1831 vgen_vlan_add_ids(vgen_port_t *portp)
1832 {
1833 	int		rv;
1834 	int		i;
1835 
1836 	rv = mod_hash_insert(portp->vlan_hashp,
1837 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1838 	    (mod_hash_val_t)B_TRUE);
1839 	ASSERT(rv == 0);
1840 
1841 	for (i = 0; i < portp->nvids; i++) {
1842 		rv = mod_hash_insert(portp->vlan_hashp,
1843 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1844 		    (mod_hash_val_t)B_TRUE);
1845 		ASSERT(rv == 0);
1846 	}
1847 }
1848 
1849 /*
1850  * Remove a port from the vlans it has been assigned to.
1851  */
1852 static void
1853 vgen_vlan_remove_ids(vgen_port_t *portp)
1854 {
1855 	int		rv;
1856 	int		i;
1857 	mod_hash_val_t	vp;
1858 
1859 	rv = mod_hash_remove(portp->vlan_hashp,
1860 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1861 	    (mod_hash_val_t *)&vp);
1862 	ASSERT(rv == 0);
1863 
1864 	for (i = 0; i < portp->nvids; i++) {
1865 		rv = mod_hash_remove(portp->vlan_hashp,
1866 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1867 		    (mod_hash_val_t *)&vp);
1868 		ASSERT(rv == 0);
1869 	}
1870 }
1871 
1872 /*
1873  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1874  * then the vlan-id is available in the tag; otherwise, its vlan id is
1875  * implicitly obtained from the port-vlan-id of the vnet device.
1876  * The vlan id determined is returned in vidp.
1877  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1878  */
1879 static boolean_t
1880 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1881 {
1882 	struct ether_vlan_header	*evhp;
1883 
1884 	/* If it's a tagged frame, get the vlan id from vlan header */
1885 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1886 
1887 		evhp = (struct ether_vlan_header *)ehp;
1888 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1889 		return (B_TRUE);
1890 	}
1891 
1892 	/* Untagged frame, vlan-id is the pvid of vnet device */
1893 	*vidp = vnetp->pvid;
1894 	return (B_FALSE);
1895 }
1896 
1897 /*
1898  * Find the given vlan id in the hash table.
1899  * Return: B_TRUE if the id is found; B_FALSE if not found.
1900  */
1901 static boolean_t
1902 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1903 {
1904 	int		rv;
1905 	mod_hash_val_t	vp;
1906 
1907 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1908 
1909 	if (rv != 0)
1910 		return (B_FALSE);
1911 
1912 	return (B_TRUE);
1913 }
1914 
1915 /*
1916  * This function reads "priority-ether-types" property from md. This property
1917  * is used to enable support for priority frames. Applications which need
1918  * guaranteed and timely delivery of certain high priority frames to/from
1919  * a vnet or vsw within ldoms, should configure this property by providing
1920  * the ether type(s) for which the priority facility is needed.
1921  * Normal data frames are delivered over a ldc channel using the descriptor
1922  * ring mechanism which is constrained by factors such as descriptor ring size,
1923  * the rate at which the ring is processed at the peer ldc end point, etc.
1924  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1925  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1926  * descriptor ring path and enables a more reliable and timely delivery of
1927  * frames to the peer.
1928  */
1929 static void
1930 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1931 {
1932 	int		rv;
1933 	uint16_t	*types;
1934 	uint64_t	*data;
1935 	int		size;
1936 	int		i;
1937 	size_t		mblk_sz;
1938 
1939 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1940 	    (uint8_t **)&data, &size);
1941 	if (rv != 0) {
1942 		/*
1943 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1944 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1945 		 */
1946 		if (vgen_pri_eth_type != 0) {
1947 			size = sizeof (vgen_pri_eth_type);
1948 			data = &vgen_pri_eth_type;
1949 		} else {
1950 			DBG2(vgenp, NULL,
1951 			    "prop(%s) not found", pri_types_propname);
1952 			size = 0;
1953 		}
1954 	}
1955 
1956 	if (size == 0) {
1957 		vgenp->pri_num_types = 0;
1958 		return;
1959 	}
1960 
1961 	/*
1962 	 * we have some priority-ether-types defined;
1963 	 * allocate a table of these types and also
1964 	 * allocate a pool of mblks to transmit these
1965 	 * priority packets.
1966 	 */
1967 	size /= sizeof (uint64_t);
1968 	vgenp->pri_num_types = size;
1969 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1970 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1971 		types[i] = data[i] & 0xFFFF;
1972 	}
1973 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1974 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1975 	    &vgenp->pri_tx_vmp);
1976 }
1977 
1978 static void
1979 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1980 {
1981 	int		rv;
1982 	uint64_t	val;
1983 	char		*mtu_propname;
1984 
1985 	mtu_propname = vgen_mtu_propname;
1986 
1987 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1988 	if (rv != 0) {
1989 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1990 		*mtu = vnet_ethermtu;
1991 	} else {
1992 
1993 		*mtu = val & 0xFFFF;
1994 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1995 		    vgenp->instance, *mtu);
1996 	}
1997 }
1998 
1999 static void
2000 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
2001 	boolean_t *pls)
2002 {
2003 	int		rv;
2004 	uint64_t	val;
2005 	char		*linkpropname;
2006 
2007 	linkpropname = vgen_linkprop_propname;
2008 
2009 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
2010 	if (rv != 0) {
2011 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
2012 		*pls = B_FALSE;
2013 	} else {
2014 
2015 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
2016 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
2017 		    vgenp->instance, *pls);
2018 	}
2019 }
2020 
2021 /* register with MD event generator */
2022 static int
2023 vgen_mdeg_reg(vgen_t *vgenp)
2024 {
2025 	mdeg_prop_spec_t	*pspecp;
2026 	mdeg_node_spec_t	*parentp;
2027 	uint_t			templatesz;
2028 	int			rv;
2029 	mdeg_handle_t		dev_hdl = NULL;
2030 	mdeg_handle_t		port_hdl = NULL;
2031 
2032 	templatesz = sizeof (vgen_prop_template);
2033 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
2034 	if (pspecp == NULL) {
2035 		return (DDI_FAILURE);
2036 	}
2037 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
2038 	if (parentp == NULL) {
2039 		kmem_free(pspecp, templatesz);
2040 		return (DDI_FAILURE);
2041 	}
2042 
2043 	bcopy(vgen_prop_template, pspecp, templatesz);
2044 
2045 	/*
2046 	 * NOTE: The instance here refers to the value of "reg" property and
2047 	 * not the dev_info instance (ddi_get_instance()) of vnet.
2048 	 */
2049 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
2050 
2051 	parentp->namep = "virtual-device";
2052 	parentp->specp = pspecp;
2053 
2054 	/* save parentp in vgen_t */
2055 	vgenp->mdeg_parentp = parentp;
2056 
2057 	/*
2058 	 * Register an interest in 'virtual-device' nodes with a
2059 	 * 'name' property of 'network'
2060 	 */
2061 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
2062 	if (rv != MDEG_SUCCESS) {
2063 		DERR(vgenp, NULL, "mdeg_register failed\n");
2064 		goto mdeg_reg_fail;
2065 	}
2066 
2067 	/* Register an interest in 'port' nodes */
2068 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
2069 	    &port_hdl);
2070 	if (rv != MDEG_SUCCESS) {
2071 		DERR(vgenp, NULL, "mdeg_register failed\n");
2072 		goto mdeg_reg_fail;
2073 	}
2074 
2075 	/* save mdeg handle in vgen_t */
2076 	vgenp->mdeg_dev_hdl = dev_hdl;
2077 	vgenp->mdeg_port_hdl = port_hdl;
2078 
2079 	return (DDI_SUCCESS);
2080 
2081 mdeg_reg_fail:
2082 	if (dev_hdl != NULL) {
2083 		(void) mdeg_unregister(dev_hdl);
2084 	}
2085 	KMEM_FREE(parentp);
2086 	kmem_free(pspecp, templatesz);
2087 	vgenp->mdeg_parentp = NULL;
2088 	return (DDI_FAILURE);
2089 }
2090 
2091 /* unregister with MD event generator */
2092 static void
2093 vgen_mdeg_unreg(vgen_t *vgenp)
2094 {
2095 	if (vgenp->mdeg_dev_hdl != NULL) {
2096 		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
2097 		vgenp->mdeg_dev_hdl = NULL;
2098 	}
2099 	if (vgenp->mdeg_port_hdl != NULL) {
2100 		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
2101 		vgenp->mdeg_port_hdl = NULL;
2102 	}
2103 
2104 	if (vgenp->mdeg_parentp != NULL) {
2105 		kmem_free(vgenp->mdeg_parentp->specp,
2106 		    sizeof (vgen_prop_template));
2107 		KMEM_FREE(vgenp->mdeg_parentp);
2108 		vgenp->mdeg_parentp = NULL;
2109 	}
2110 }
2111 
2112 /* mdeg callback function for the port node */
2113 static int
2114 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
2115 {
2116 	int idx;
2117 	int vsw_idx = -1;
2118 	uint64_t val;
2119 	vgen_t *vgenp;
2120 
2121 	if ((resp == NULL) || (cb_argp == NULL)) {
2122 		return (MDEG_FAILURE);
2123 	}
2124 
2125 	vgenp = (vgen_t *)cb_argp;
2126 	DBG1(vgenp, NULL, "enter\n");
2127 
2128 	mutex_enter(&vgenp->lock);
2129 
2130 	DBG1(vgenp, NULL, "ports: removed(%x), "
2131 	"added(%x), updated(%x)\n", resp->removed.nelem,
2132 	    resp->added.nelem, resp->match_curr.nelem);
2133 
2134 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2135 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2136 		    resp->removed.mdep[idx]);
2137 	}
2138 
2139 	if (vgenp->vsw_portp == NULL) {
2140 		/*
2141 		 * find vsw_port and add it first, because other ports need
2142 		 * this when adding fdb entry (see vgen_port_init()).
2143 		 */
2144 		for (idx = 0; idx < resp->added.nelem; idx++) {
2145 			if (!(md_get_prop_val(resp->added.mdp,
2146 			    resp->added.mdep[idx], swport_propname, &val))) {
2147 				if (val == 0) {
2148 					/*
2149 					 * This port is connected to the
2150 					 * vsw on service domain.
2151 					 */
2152 					vsw_idx = idx;
2153 					if (vgen_add_port(vgenp,
2154 					    resp->added.mdp,
2155 					    resp->added.mdep[idx]) !=
2156 					    DDI_SUCCESS) {
2157 						cmn_err(CE_NOTE, "vnet%d Could "
2158 						    "not initialize virtual "
2159 						    "switch port.",
2160 						    vgenp->instance);
2161 						mutex_exit(&vgenp->lock);
2162 						return (MDEG_FAILURE);
2163 					}
2164 					break;
2165 				}
2166 			}
2167 		}
2168 		if (vsw_idx == -1) {
2169 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2170 			mutex_exit(&vgenp->lock);
2171 			return (MDEG_FAILURE);
2172 		}
2173 	}
2174 
2175 	for (idx = 0; idx < resp->added.nelem; idx++) {
2176 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2177 			continue;
2178 
2179 		/* If this port can't be added just skip it. */
2180 		(void) vgen_add_port(vgenp, resp->added.mdp,
2181 		    resp->added.mdep[idx]);
2182 	}
2183 
2184 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2185 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2186 		    resp->match_curr.mdep[idx],
2187 		    resp->match_prev.mdp,
2188 		    resp->match_prev.mdep[idx]);
2189 	}
2190 
2191 	mutex_exit(&vgenp->lock);
2192 	DBG1(vgenp, NULL, "exit\n");
2193 	return (MDEG_SUCCESS);
2194 }
2195 
2196 /* mdeg callback function for the vnet node */
2197 static int
2198 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2199 {
2200 	vgen_t		*vgenp;
2201 	vnet_t		*vnetp;
2202 	md_t		*mdp;
2203 	mde_cookie_t	node;
2204 	uint64_t	inst;
2205 	char		*node_name = NULL;
2206 
2207 	if ((resp == NULL) || (cb_argp == NULL)) {
2208 		return (MDEG_FAILURE);
2209 	}
2210 
2211 	vgenp = (vgen_t *)cb_argp;
2212 	vnetp = vgenp->vnetp;
2213 
2214 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2215 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2216 	    resp->match_curr.nelem, resp->match_prev.nelem);
2217 
2218 	mutex_enter(&vgenp->lock);
2219 
2220 	/*
2221 	 * We get an initial callback for this node as 'added' after
2222 	 * registering with mdeg. Note that we would have already gathered
2223 	 * information about this vnet node by walking MD earlier during attach
2224 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2225 	 * of this node might have changed when we get this initial 'added'
2226 	 * callback. We handle this as if an update occured and invoke the same
2227 	 * function which handles updates to the properties of this vnet-node
2228 	 * if any. A non-zero 'match' value indicates that the MD has been
2229 	 * updated and that a 'network' node is present which may or may not
2230 	 * have been updated. It is up to the clients to examine their own
2231 	 * nodes and determine if they have changed.
2232 	 */
2233 	if (resp->added.nelem != 0) {
2234 
2235 		if (resp->added.nelem != 1) {
2236 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2237 			    "invalid: %d\n", vnetp->instance,
2238 			    resp->added.nelem);
2239 			goto vgen_mdeg_cb_err;
2240 		}
2241 
2242 		mdp = resp->added.mdp;
2243 		node = resp->added.mdep[0];
2244 
2245 	} else if (resp->match_curr.nelem != 0) {
2246 
2247 		if (resp->match_curr.nelem != 1) {
2248 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2249 			    "invalid: %d\n", vnetp->instance,
2250 			    resp->match_curr.nelem);
2251 			goto vgen_mdeg_cb_err;
2252 		}
2253 
2254 		mdp = resp->match_curr.mdp;
2255 		node = resp->match_curr.mdep[0];
2256 
2257 	} else {
2258 		goto vgen_mdeg_cb_err;
2259 	}
2260 
2261 	/* Validate name and instance */
2262 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2263 		DERR(vgenp, NULL, "unable to get node name\n");
2264 		goto vgen_mdeg_cb_err;
2265 	}
2266 
2267 	/* is this a virtual-network device? */
2268 	if (strcmp(node_name, vnet_propname) != 0) {
2269 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2270 		goto vgen_mdeg_cb_err;
2271 	}
2272 
2273 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2274 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2275 		goto vgen_mdeg_cb_err;
2276 	}
2277 
2278 	/* is this the right instance of vnet? */
2279 	if (inst != vgenp->regprop) {
2280 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2281 		goto vgen_mdeg_cb_err;
2282 	}
2283 
2284 	vgen_update_md_prop(vgenp, mdp, node);
2285 
2286 	mutex_exit(&vgenp->lock);
2287 	return (MDEG_SUCCESS);
2288 
2289 vgen_mdeg_cb_err:
2290 	mutex_exit(&vgenp->lock);
2291 	return (MDEG_FAILURE);
2292 }
2293 
2294 /*
2295  * Check to see if the relevant properties in the specified node have
2296  * changed, and if so take the appropriate action.
2297  */
2298 static void
2299 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2300 {
2301 	uint16_t	pvid;
2302 	uint16_t	*vids;
2303 	uint16_t	nvids;
2304 	vnet_t		*vnetp = vgenp->vnetp;
2305 	uint32_t	mtu;
2306 	boolean_t	pls_update;
2307 	enum		{ MD_init = 0x1,
2308 			    MD_vlans = 0x2,
2309 			    MD_mtu = 0x4,
2310 			    MD_pls = 0x8 } updated;
2311 	int		rv;
2312 
2313 	updated = MD_init;
2314 
2315 	/* Read the vlan ids */
2316 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2317 	    &nvids, NULL);
2318 
2319 	/* Determine if there are any vlan id updates */
2320 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2321 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2322 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2323 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2324 		updated |= MD_vlans;
2325 	}
2326 
2327 	/* Read mtu */
2328 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2329 	if (mtu != vnetp->mtu) {
2330 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2331 			updated |= MD_mtu;
2332 		} else {
2333 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2334 			    " as the specified value:%d is invalid\n",
2335 			    vnetp->instance, mtu);
2336 		}
2337 	}
2338 
2339 	/*
2340 	 * Read the 'linkprop' property.
2341 	 */
2342 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2343 	if (pls_update != vnetp->pls_update) {
2344 		updated |= MD_pls;
2345 	}
2346 
2347 	/* Now process the updated props */
2348 
2349 	if (updated & MD_vlans) {
2350 
2351 		/* save the new vlan ids */
2352 		vnetp->pvid = pvid;
2353 		if (vnetp->nvids != 0) {
2354 			kmem_free(vnetp->vids,
2355 			    sizeof (uint16_t) * vnetp->nvids);
2356 			vnetp->nvids = 0;
2357 		}
2358 		if (nvids != 0) {
2359 			vnetp->nvids = nvids;
2360 			vnetp->vids = vids;
2361 		}
2362 
2363 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2364 		vgen_reset_vlan_unaware_ports(vgenp);
2365 
2366 	} else {
2367 
2368 		if (nvids != 0) {
2369 			kmem_free(vids, sizeof (uint16_t) * nvids);
2370 		}
2371 	}
2372 
2373 	if (updated & MD_mtu) {
2374 
2375 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2376 		    vnetp->mtu, mtu);
2377 
2378 		rv = vnet_mtu_update(vnetp, mtu);
2379 		if (rv == 0) {
2380 			vgenp->max_frame_size = mtu +
2381 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2382 		}
2383 	}
2384 
2385 	if (updated & MD_pls) {
2386 		/* enable/disable physical link state updates */
2387 		vnetp->pls_update = pls_update;
2388 		mutex_exit(&vgenp->lock);
2389 
2390 		/* reset vsw-port to re-negotiate with the updated prop. */
2391 		vgen_reset_vsw_port(vgenp);
2392 
2393 		mutex_enter(&vgenp->lock);
2394 	}
2395 }
2396 
2397 /* add a new port to the device */
2398 static int
2399 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2400 {
2401 	vgen_port_t	*portp;
2402 	int		rv;
2403 
2404 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2405 
2406 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2407 	if (rv != DDI_SUCCESS) {
2408 		KMEM_FREE(portp);
2409 		return (DDI_FAILURE);
2410 	}
2411 
2412 	rv = vgen_port_attach(portp);
2413 	if (rv != DDI_SUCCESS) {
2414 		return (DDI_FAILURE);
2415 	}
2416 
2417 	return (DDI_SUCCESS);
2418 }
2419 
2420 /* read properties of the port from its md node */
2421 static int
2422 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2423 	mde_cookie_t mdex)
2424 {
2425 	uint64_t		port_num;
2426 	uint64_t		*ldc_ids;
2427 	uint64_t		macaddr;
2428 	uint64_t		val;
2429 	int			num_ldcs;
2430 	int			i;
2431 	int			addrsz;
2432 	int			num_nodes = 0;
2433 	int			listsz = 0;
2434 	mde_cookie_t		*listp = NULL;
2435 	uint8_t			*addrp;
2436 	struct ether_addr	ea;
2437 
2438 	/* read "id" property to get the port number */
2439 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2440 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2441 		return (DDI_FAILURE);
2442 	}
2443 
2444 	/*
2445 	 * Find the channel endpoint node(s) under this port node.
2446 	 */
2447 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2448 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2449 		    num_nodes);
2450 		return (DDI_FAILURE);
2451 	}
2452 
2453 	/* allocate space for node list */
2454 	listsz = num_nodes * sizeof (mde_cookie_t);
2455 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2456 	if (listp == NULL)
2457 		return (DDI_FAILURE);
2458 
2459 	num_ldcs = md_scan_dag(mdp, mdex,
2460 	    md_find_name(mdp, channel_propname),
2461 	    md_find_name(mdp, "fwd"), listp);
2462 
2463 	if (num_ldcs <= 0) {
2464 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2465 		kmem_free(listp, listsz);
2466 		return (DDI_FAILURE);
2467 	}
2468 
2469 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2470 
2471 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2472 	if (ldc_ids == NULL) {
2473 		kmem_free(listp, listsz);
2474 		return (DDI_FAILURE);
2475 	}
2476 
2477 	for (i = 0; i < num_ldcs; i++) {
2478 		/* read channel ids */
2479 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2480 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2481 			    id_propname);
2482 			kmem_free(listp, listsz);
2483 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2484 			return (DDI_FAILURE);
2485 		}
2486 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2487 	}
2488 
2489 	kmem_free(listp, listsz);
2490 
2491 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2492 	    &addrsz)) {
2493 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2494 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2495 		return (DDI_FAILURE);
2496 	}
2497 
2498 	if (addrsz < ETHERADDRL) {
2499 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2500 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2501 		return (DDI_FAILURE);
2502 	}
2503 
2504 	macaddr = *((uint64_t *)addrp);
2505 
2506 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2507 
2508 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2509 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2510 		macaddr >>= 8;
2511 	}
2512 
2513 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2514 		if (val == 0) {
2515 			/* This port is connected to the vswitch */
2516 			portp->is_vsw_port = B_TRUE;
2517 		} else {
2518 			portp->is_vsw_port = B_FALSE;
2519 		}
2520 	}
2521 
2522 	/* now update all properties into the port */
2523 	portp->vgenp = vgenp;
2524 	portp->port_num = port_num;
2525 	ether_copy(&ea, &portp->macaddr);
2526 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2527 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2528 	portp->num_ldcs = num_ldcs;
2529 
2530 	/* read vlan id properties of this port node */
2531 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2532 	    &portp->vids, &portp->nvids, NULL);
2533 
2534 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2535 
2536 	return (DDI_SUCCESS);
2537 }
2538 
2539 /* remove a port from the device */
2540 static int
2541 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2542 {
2543 	uint64_t	port_num;
2544 	vgen_port_t	*portp;
2545 	vgen_portlist_t	*plistp;
2546 
2547 	/* read "id" property to get the port number */
2548 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2549 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2550 		return (DDI_FAILURE);
2551 	}
2552 
2553 	plistp = &(vgenp->vgenports);
2554 
2555 	WRITE_ENTER(&plistp->rwlock);
2556 	portp = vgen_port_lookup(plistp, (int)port_num);
2557 	if (portp == NULL) {
2558 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2559 		RW_EXIT(&plistp->rwlock);
2560 		return (DDI_FAILURE);
2561 	}
2562 
2563 	vgen_port_detach_mdeg(portp);
2564 	RW_EXIT(&plistp->rwlock);
2565 
2566 	return (DDI_SUCCESS);
2567 }
2568 
2569 /* attach a port to the device based on mdeg data */
2570 static int
2571 vgen_port_attach(vgen_port_t *portp)
2572 {
2573 	int			i;
2574 	vgen_portlist_t		*plistp;
2575 	vgen_t			*vgenp;
2576 	uint64_t		*ldcids;
2577 	uint32_t		num_ldcs;
2578 	mac_register_t		*macp;
2579 	vio_net_res_type_t	type;
2580 	int			rv;
2581 
2582 	ASSERT(portp != NULL);
2583 
2584 	vgenp = portp->vgenp;
2585 	ldcids = portp->ldc_ids;
2586 	num_ldcs = portp->num_ldcs;
2587 
2588 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2589 
2590 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2591 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2592 	portp->ldclist.headp = NULL;
2593 
2594 	for (i = 0; i < num_ldcs; i++) {
2595 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2596 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2597 			vgen_port_detach(portp);
2598 			return (DDI_FAILURE);
2599 		}
2600 	}
2601 
2602 	/* create vlan id hash table */
2603 	vgen_vlan_create_hash(portp);
2604 
2605 	if (portp->is_vsw_port == B_TRUE) {
2606 		/* This port is connected to the switch port */
2607 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2608 		type = VIO_NET_RES_LDC_SERVICE;
2609 	} else {
2610 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2611 		type = VIO_NET_RES_LDC_GUEST;
2612 	}
2613 
2614 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2615 		vgen_port_detach(portp);
2616 		return (DDI_FAILURE);
2617 	}
2618 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2619 	macp->m_driver = portp;
2620 	macp->m_dip = vgenp->vnetdip;
2621 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2622 	macp->m_callbacks = &vgen_m_callbacks;
2623 	macp->m_min_sdu = 0;
2624 	macp->m_max_sdu = ETHERMTU;
2625 
2626 	mutex_enter(&portp->lock);
2627 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2628 	    portp->macaddr, &portp->vhp, &portp->vcb);
2629 	mutex_exit(&portp->lock);
2630 	mac_free(macp);
2631 
2632 	if (rv == 0) {
2633 		/* link it into the list of ports */
2634 		plistp = &(vgenp->vgenports);
2635 		WRITE_ENTER(&plistp->rwlock);
2636 		vgen_port_list_insert(portp);
2637 		RW_EXIT(&plistp->rwlock);
2638 
2639 		if (portp->is_vsw_port == B_TRUE) {
2640 			/* We now have the vswitch port attached */
2641 			vgenp->vsw_portp = portp;
2642 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2643 		}
2644 	} else {
2645 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2646 		    portp);
2647 		vgen_port_detach(portp);
2648 	}
2649 
2650 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2651 	return (DDI_SUCCESS);
2652 }
2653 
2654 /* detach a port from the device based on mdeg data */
2655 static void
2656 vgen_port_detach_mdeg(vgen_port_t *portp)
2657 {
2658 	vgen_t *vgenp = portp->vgenp;
2659 
2660 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2661 
2662 	mutex_enter(&portp->lock);
2663 
2664 	/* stop the port if needed */
2665 	if (portp->flags & VGEN_STARTED) {
2666 		vgen_port_uninit(portp);
2667 		portp->flags &= ~(VGEN_STARTED);
2668 	}
2669 
2670 	mutex_exit(&portp->lock);
2671 	vgen_port_detach(portp);
2672 
2673 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2674 }
2675 
2676 static int
2677 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2678 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2679 {
2680 	uint64_t	cport_num;
2681 	uint64_t	pport_num;
2682 	vgen_portlist_t	*plistp;
2683 	vgen_port_t	*portp;
2684 	boolean_t	updated_vlans = B_FALSE;
2685 	uint16_t	pvid;
2686 	uint16_t	*vids;
2687 	uint16_t	nvids;
2688 
2689 	/*
2690 	 * For now, we get port updates only if vlan ids changed.
2691 	 * We read the port num and do some sanity check.
2692 	 */
2693 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2694 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2695 		return (DDI_FAILURE);
2696 	}
2697 
2698 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2699 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2700 		return (DDI_FAILURE);
2701 	}
2702 	if (cport_num != pport_num)
2703 		return (DDI_FAILURE);
2704 
2705 	plistp = &(vgenp->vgenports);
2706 
2707 	READ_ENTER(&plistp->rwlock);
2708 
2709 	portp = vgen_port_lookup(plistp, (int)cport_num);
2710 	if (portp == NULL) {
2711 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2712 		RW_EXIT(&plistp->rwlock);
2713 		return (DDI_FAILURE);
2714 	}
2715 
2716 	/* Read the vlan ids */
2717 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2718 	    &nvids, NULL);
2719 
2720 	/* Determine if there are any vlan id updates */
2721 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2722 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2723 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2724 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2725 		updated_vlans = B_TRUE;
2726 	}
2727 
2728 	if (updated_vlans == B_FALSE) {
2729 		RW_EXIT(&plistp->rwlock);
2730 		return (DDI_FAILURE);
2731 	}
2732 
2733 	/* remove the port from vlans it has been assigned to */
2734 	vgen_vlan_remove_ids(portp);
2735 
2736 	/* save the new vlan ids */
2737 	portp->pvid = pvid;
2738 	if (portp->nvids != 0) {
2739 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2740 		portp->nvids = 0;
2741 	}
2742 	if (nvids != 0) {
2743 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2744 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2745 		portp->nvids = nvids;
2746 		kmem_free(vids, sizeof (uint16_t) * nvids);
2747 	}
2748 
2749 	/* add port to the new vlans */
2750 	vgen_vlan_add_ids(portp);
2751 
2752 	/* reset the port if it is vlan unaware (ver < 1.3) */
2753 	vgen_vlan_unaware_port_reset(portp);
2754 
2755 	RW_EXIT(&plistp->rwlock);
2756 
2757 	return (DDI_SUCCESS);
2758 }
2759 
2760 static uint64_t
2761 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2762 {
2763 	vgen_ldclist_t	*ldclp;
2764 	vgen_ldc_t *ldcp;
2765 	uint64_t	val;
2766 
2767 	val = 0;
2768 	ldclp = &portp->ldclist;
2769 
2770 	READ_ENTER(&ldclp->rwlock);
2771 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2772 		val += vgen_ldc_stat(ldcp, stat);
2773 	}
2774 	RW_EXIT(&ldclp->rwlock);
2775 
2776 	return (val);
2777 }
2778 
2779 /* allocate receive resources */
2780 static int
2781 vgen_init_multipools(vgen_ldc_t *ldcp)
2782 {
2783 	size_t		data_sz;
2784 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2785 	int		status;
2786 	uint32_t	sz1 = 0;
2787 	uint32_t	sz2 = 0;
2788 	uint32_t	sz3 = 0;
2789 	uint32_t	sz4 = 0;
2790 
2791 	/*
2792 	 * We round up the mtu specified to be a multiple of 2K.
2793 	 * We then create rx pools based on the rounded up size.
2794 	 */
2795 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2796 	data_sz = VNET_ROUNDUP_2K(data_sz);
2797 
2798 	/*
2799 	 * If pool sizes are specified, use them. Note that the presence of
2800 	 * the first tunable will be used as a hint.
2801 	 */
2802 	if (vgen_rbufsz1 != 0) {
2803 
2804 		sz1 = vgen_rbufsz1;
2805 		sz2 = vgen_rbufsz2;
2806 		sz3 = vgen_rbufsz3;
2807 		sz4 = vgen_rbufsz4;
2808 
2809 		if (sz4 == 0) { /* need 3 pools */
2810 
2811 			ldcp->max_rxpool_size = sz3;
2812 			status = vio_init_multipools(&ldcp->vmp,
2813 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2814 			    vgen_nrbufs2, vgen_nrbufs3);
2815 
2816 		} else {
2817 
2818 			ldcp->max_rxpool_size = sz4;
2819 			status = vio_init_multipools(&ldcp->vmp,
2820 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2821 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2822 			    vgen_nrbufs4);
2823 		}
2824 		return (status);
2825 	}
2826 
2827 	/*
2828 	 * Pool sizes are not specified. We select the pool sizes based on the
2829 	 * mtu if vnet_jumbo_rxpools is enabled.
2830 	 */
2831 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2832 		/*
2833 		 * Receive buffer pool allocation based on mtu is disabled.
2834 		 * Use the default mechanism of standard size pool allocation.
2835 		 */
2836 		sz1 = VGEN_DBLK_SZ_128;
2837 		sz2 = VGEN_DBLK_SZ_256;
2838 		sz3 = VGEN_DBLK_SZ_2048;
2839 		ldcp->max_rxpool_size = sz3;
2840 
2841 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2842 		    sz1, sz2, sz3,
2843 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2844 
2845 		return (status);
2846 	}
2847 
2848 	switch (data_sz) {
2849 
2850 	case VNET_4K:
2851 
2852 		sz1 = VGEN_DBLK_SZ_128;
2853 		sz2 = VGEN_DBLK_SZ_256;
2854 		sz3 = VGEN_DBLK_SZ_2048;
2855 		sz4 = sz3 << 1;			/* 4K */
2856 		ldcp->max_rxpool_size = sz4;
2857 
2858 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2859 		    sz1, sz2, sz3, sz4,
2860 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2861 		break;
2862 
2863 	default:	/* data_sz:  4K+ to 16K */
2864 
2865 		sz1 = VGEN_DBLK_SZ_256;
2866 		sz2 = VGEN_DBLK_SZ_2048;
2867 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2868 		sz4 = data_sz;		/* Jumbo-size  */
2869 		ldcp->max_rxpool_size = sz4;
2870 
2871 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2872 		    sz1, sz2, sz3, sz4,
2873 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2874 		break;
2875 
2876 	}
2877 
2878 	return (status);
2879 }
2880 
2881 /* attach the channel corresponding to the given ldc_id to the port */
2882 static int
2883 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2884 {
2885 	vgen_t 		*vgenp;
2886 	vgen_ldclist_t	*ldclp;
2887 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2888 	ldc_attr_t 	attr;
2889 	int 		status;
2890 	ldc_status_t	istatus;
2891 	char		kname[MAXNAMELEN];
2892 	int		instance;
2893 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2894 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2895 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2896 		AST_create_rxmblks = 0x20,
2897 		AST_create_rcv_thread = 0x40} attach_state;
2898 
2899 	attach_state = AST_init;
2900 	vgenp = portp->vgenp;
2901 	ldclp = &portp->ldclist;
2902 
2903 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2904 	if (ldcp == NULL) {
2905 		goto ldc_attach_failed;
2906 	}
2907 	ldcp->ldc_id = ldc_id;
2908 	ldcp->portp = portp;
2909 
2910 	attach_state |= AST_ldc_alloc;
2911 
2912 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2913 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2914 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2915 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2916 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2917 	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2918 
2919 	attach_state |= AST_mutex_init;
2920 
2921 	attr.devclass = LDC_DEV_NT;
2922 	attr.instance = vgenp->instance;
2923 	attr.mode = LDC_MODE_UNRELIABLE;
2924 	attr.mtu = vnet_ldc_mtu;
2925 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2926 	if (status != 0) {
2927 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2928 		goto ldc_attach_failed;
2929 	}
2930 	attach_state |= AST_ldc_init;
2931 
2932 	if (vgen_rcv_thread_enabled) {
2933 		ldcp->rcv_thr_flags = 0;
2934 
2935 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2936 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2937 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2938 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2939 
2940 		attach_state |= AST_create_rcv_thread;
2941 		if (ldcp->rcv_thread == NULL) {
2942 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2943 			goto ldc_attach_failed;
2944 		}
2945 	}
2946 
2947 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2948 	if (status != 0) {
2949 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2950 		    status);
2951 		goto ldc_attach_failed;
2952 	}
2953 	/*
2954 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2955 	 * data msgs, including raw data msgs used to recv priority frames.
2956 	 */
2957 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2958 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2959 	attach_state |= AST_ldc_reg_cb;
2960 
2961 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2962 	ASSERT(istatus == LDC_INIT);
2963 	ldcp->ldc_status = istatus;
2964 
2965 	/* allocate transmit resources */
2966 	status = vgen_alloc_tx_ring(ldcp);
2967 	if (status != 0) {
2968 		goto ldc_attach_failed;
2969 	}
2970 	attach_state |= AST_alloc_tx_ring;
2971 
2972 	/* allocate receive resources */
2973 	status = vgen_init_multipools(ldcp);
2974 	if (status != 0) {
2975 		/*
2976 		 * We do not return failure if receive mblk pools can't be
2977 		 * allocated; instead allocb(9F) will be used to dynamically
2978 		 * allocate buffers during receive.
2979 		 */
2980 		DWARN(vgenp, ldcp,
2981 		    "vnet%d: status(%d), failed to allocate rx mblk pools for "
2982 		    "channel(0x%lx)\n",
2983 		    vgenp->instance, status, ldcp->ldc_id);
2984 	} else {
2985 		attach_state |= AST_create_rxmblks;
2986 	}
2987 
2988 	/* Setup kstats for the channel */
2989 	instance = vgenp->instance;
2990 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2991 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2992 	if (ldcp->ksp == NULL) {
2993 		goto ldc_attach_failed;
2994 	}
2995 
2996 	/* initialize vgen_versions supported */
2997 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2998 	vgen_reset_vnet_proto_ops(ldcp);
2999 
3000 	/* link it into the list of channels for this port */
3001 	WRITE_ENTER(&ldclp->rwlock);
3002 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
3003 	ldcp->nextp = *prev_ldcp;
3004 	*prev_ldcp = ldcp;
3005 	RW_EXIT(&ldclp->rwlock);
3006 
3007 	ldcp->link_state = LINK_STATE_UNKNOWN;
3008 #ifdef	VNET_IOC_DEBUG
3009 	ldcp->link_down_forced = B_FALSE;
3010 #endif
3011 	ldcp->flags |= CHANNEL_ATTACHED;
3012 	return (DDI_SUCCESS);
3013 
3014 ldc_attach_failed:
3015 	if (attach_state & AST_ldc_reg_cb) {
3016 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3017 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3018 	}
3019 	if (attach_state & AST_create_rcv_thread) {
3020 		if (ldcp->rcv_thread != NULL) {
3021 			vgen_stop_rcv_thread(ldcp);
3022 		}
3023 		mutex_destroy(&ldcp->rcv_thr_lock);
3024 		cv_destroy(&ldcp->rcv_thr_cv);
3025 	}
3026 	if (attach_state & AST_create_rxmblks) {
3027 		vio_mblk_pool_t *fvmp = NULL;
3028 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
3029 		ASSERT(fvmp == NULL);
3030 	}
3031 	if (attach_state & AST_alloc_tx_ring) {
3032 		vgen_free_tx_ring(ldcp);
3033 	}
3034 	if (attach_state & AST_ldc_init) {
3035 		(void) ldc_fini(ldcp->ldc_handle);
3036 	}
3037 	if (attach_state & AST_mutex_init) {
3038 		mutex_destroy(&ldcp->tclock);
3039 		mutex_destroy(&ldcp->txlock);
3040 		mutex_destroy(&ldcp->cblock);
3041 		mutex_destroy(&ldcp->wrlock);
3042 		mutex_destroy(&ldcp->rxlock);
3043 		mutex_destroy(&ldcp->pollq_lock);
3044 	}
3045 	if (attach_state & AST_ldc_alloc) {
3046 		KMEM_FREE(ldcp);
3047 	}
3048 	return (DDI_FAILURE);
3049 }
3050 
3051 /* detach a channel from the port */
3052 static void
3053 vgen_ldc_detach(vgen_ldc_t *ldcp)
3054 {
3055 	vgen_port_t	*portp;
3056 	vgen_t 		*vgenp;
3057 	vgen_ldc_t 	*pldcp;
3058 	vgen_ldc_t	**prev_ldcp;
3059 	vgen_ldclist_t	*ldclp;
3060 
3061 	portp = ldcp->portp;
3062 	vgenp = portp->vgenp;
3063 	ldclp = &portp->ldclist;
3064 
3065 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
3066 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
3067 		if (pldcp == ldcp) {
3068 			break;
3069 		}
3070 	}
3071 
3072 	if (pldcp == NULL) {
3073 		/* invalid ldcp? */
3074 		return;
3075 	}
3076 
3077 	if (ldcp->ldc_status != LDC_INIT) {
3078 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
3079 	}
3080 
3081 	if (ldcp->flags & CHANNEL_ATTACHED) {
3082 		ldcp->flags &= ~(CHANNEL_ATTACHED);
3083 
3084 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3085 		if (ldcp->rcv_thread != NULL) {
3086 			/* First stop the receive thread */
3087 			vgen_stop_rcv_thread(ldcp);
3088 			mutex_destroy(&ldcp->rcv_thr_lock);
3089 			cv_destroy(&ldcp->rcv_thr_cv);
3090 		}
3091 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3092 
3093 		vgen_destroy_kstats(ldcp->ksp);
3094 		ldcp->ksp = NULL;
3095 
3096 		/*
3097 		 * if we cannot reclaim all mblks, put this
3098 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
3099 		 * device gets detached (see vgen_uninit()).
3100 		 */
3101 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
3102 
3103 		/* free transmit resources */
3104 		vgen_free_tx_ring(ldcp);
3105 
3106 		(void) ldc_fini(ldcp->ldc_handle);
3107 		mutex_destroy(&ldcp->tclock);
3108 		mutex_destroy(&ldcp->txlock);
3109 		mutex_destroy(&ldcp->cblock);
3110 		mutex_destroy(&ldcp->wrlock);
3111 		mutex_destroy(&ldcp->rxlock);
3112 		mutex_destroy(&ldcp->pollq_lock);
3113 
3114 		/* unlink it from the list */
3115 		*prev_ldcp = ldcp->nextp;
3116 		KMEM_FREE(ldcp);
3117 	}
3118 }
3119 
3120 /*
3121  * This function allocates transmit resources for the channel.
3122  * The resources consist of a transmit descriptor ring and an associated
3123  * transmit buffer ring.
3124  */
3125 static int
3126 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
3127 {
3128 	void *tbufp;
3129 	ldc_mem_info_t minfo;
3130 	uint32_t txdsize;
3131 	uint32_t tbufsize;
3132 	int status;
3133 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3134 
3135 	ldcp->num_txds = vnet_ntxds;
3136 	txdsize = sizeof (vnet_public_desc_t);
3137 	tbufsize = sizeof (vgen_private_desc_t);
3138 
3139 	/* allocate transmit buffer ring */
3140 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
3141 	if (tbufp == NULL) {
3142 		return (DDI_FAILURE);
3143 	}
3144 
3145 	/* create transmit descriptor ring */
3146 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
3147 	    &ldcp->tx_dhandle);
3148 	if (status) {
3149 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
3150 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3151 		return (DDI_FAILURE);
3152 	}
3153 
3154 	/* get the addr of descripror ring */
3155 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
3156 	if (status) {
3157 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3158 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3159 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3160 		ldcp->tbufp = NULL;
3161 		return (DDI_FAILURE);
3162 	}
3163 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3164 	ldcp->tbufp = tbufp;
3165 
3166 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3167 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3168 
3169 	return (DDI_SUCCESS);
3170 }
3171 
3172 /* Free transmit resources for the channel */
3173 static void
3174 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3175 {
3176 	int tbufsize = sizeof (vgen_private_desc_t);
3177 
3178 	/* free transmit descriptor ring */
3179 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3180 
3181 	/* free transmit buffer ring */
3182 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3183 	ldcp->txdp = ldcp->txdendp = NULL;
3184 	ldcp->tbufp = ldcp->tbufendp = NULL;
3185 }
3186 
3187 /* enable transmit/receive on the channels for the port */
3188 static void
3189 vgen_init_ldcs(vgen_port_t *portp)
3190 {
3191 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3192 	vgen_ldc_t	*ldcp;
3193 
3194 	READ_ENTER(&ldclp->rwlock);
3195 	ldcp =  ldclp->headp;
3196 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3197 		(void) vgen_ldc_init(ldcp);
3198 	}
3199 	RW_EXIT(&ldclp->rwlock);
3200 }
3201 
3202 /* stop transmit/receive on the channels for the port */
3203 static void
3204 vgen_uninit_ldcs(vgen_port_t *portp)
3205 {
3206 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3207 	vgen_ldc_t	*ldcp;
3208 
3209 	READ_ENTER(&ldclp->rwlock);
3210 	ldcp =  ldclp->headp;
3211 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3212 		vgen_ldc_uninit(ldcp);
3213 	}
3214 	RW_EXIT(&ldclp->rwlock);
3215 }
3216 
3217 /* enable transmit/receive on the channel */
3218 static int
3219 vgen_ldc_init(vgen_ldc_t *ldcp)
3220 {
3221 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3222 	ldc_status_t	istatus;
3223 	int		rv;
3224 	uint32_t	retries = 0;
3225 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3226 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3227 	init_state = ST_init;
3228 
3229 	DBG1(vgenp, ldcp, "enter\n");
3230 	LDC_LOCK(ldcp);
3231 
3232 	rv = ldc_open(ldcp->ldc_handle);
3233 	if (rv != 0) {
3234 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3235 		goto ldcinit_failed;
3236 	}
3237 	init_state |= ST_ldc_open;
3238 
3239 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3240 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3241 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3242 		goto ldcinit_failed;
3243 	}
3244 	ldcp->ldc_status = istatus;
3245 
3246 	rv = vgen_init_tbufs(ldcp);
3247 	if (rv != 0) {
3248 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3249 		goto ldcinit_failed;
3250 	}
3251 	init_state |= ST_init_tbufs;
3252 
3253 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3254 	if (rv != 0) {
3255 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3256 		goto ldcinit_failed;
3257 	}
3258 
3259 	init_state |= ST_cb_enable;
3260 
3261 	do {
3262 		rv = ldc_up(ldcp->ldc_handle);
3263 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3264 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3265 			drv_usecwait(VGEN_LDC_UP_DELAY);
3266 		}
3267 		if (retries++ >= vgen_ldcup_retries)
3268 			break;
3269 	} while (rv == EWOULDBLOCK);
3270 
3271 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3272 	if (istatus == LDC_UP) {
3273 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3274 	}
3275 
3276 	ldcp->ldc_status = istatus;
3277 
3278 	/* initialize transmit watchdog timeout */
3279 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3280 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3281 
3282 	ldcp->hphase = -1;
3283 	ldcp->flags |= CHANNEL_STARTED;
3284 
3285 	/* if channel is already UP - start handshake */
3286 	if (istatus == LDC_UP) {
3287 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3288 		if (ldcp->portp != vgenp->vsw_portp) {
3289 			/*
3290 			 * As the channel is up, use this port from now on.
3291 			 */
3292 			(void) atomic_swap_32(
3293 			    &ldcp->portp->use_vsw_port, B_FALSE);
3294 		}
3295 
3296 		/* Initialize local session id */
3297 		ldcp->local_sid = ddi_get_lbolt();
3298 
3299 		/* clear peer session id */
3300 		ldcp->peer_sid = 0;
3301 		ldcp->hretries = 0;
3302 
3303 		/* Initiate Handshake process with peer ldc endpoint */
3304 		vgen_reset_hphase(ldcp);
3305 
3306 		mutex_exit(&ldcp->tclock);
3307 		mutex_exit(&ldcp->txlock);
3308 		mutex_exit(&ldcp->wrlock);
3309 		mutex_exit(&ldcp->rxlock);
3310 		vgen_handshake(vh_nextphase(ldcp));
3311 		mutex_exit(&ldcp->cblock);
3312 	} else {
3313 		LDC_UNLOCK(ldcp);
3314 	}
3315 
3316 	return (DDI_SUCCESS);
3317 
3318 ldcinit_failed:
3319 	if (init_state & ST_cb_enable) {
3320 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3321 	}
3322 	if (init_state & ST_init_tbufs) {
3323 		vgen_uninit_tbufs(ldcp);
3324 	}
3325 	if (init_state & ST_ldc_open) {
3326 		(void) ldc_close(ldcp->ldc_handle);
3327 	}
3328 	LDC_UNLOCK(ldcp);
3329 	DBG1(vgenp, ldcp, "exit\n");
3330 	return (DDI_FAILURE);
3331 }
3332 
3333 /* stop transmit/receive on the channel */
3334 static void
3335 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3336 {
3337 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3338 	int	rv;
3339 	uint_t	retries = 0;
3340 
3341 	DBG1(vgenp, ldcp, "enter\n");
3342 	LDC_LOCK(ldcp);
3343 
3344 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3345 		LDC_UNLOCK(ldcp);
3346 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3347 		return;
3348 	}
3349 
3350 	/* disable further callbacks */
3351 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3352 	if (rv != 0) {
3353 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3354 	}
3355 
3356 	/*
3357 	 * clear handshake done bit and wait for pending tx and cb to finish.
3358 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3359 	 */
3360 	ldcp->hphase &= ~(VH_DONE);
3361 	LDC_UNLOCK(ldcp);
3362 
3363 	if (vgenp->vsw_portp == ldcp->portp) {
3364 		vio_net_report_err_t rep_err =
3365 		    ldcp->portp->vcb.vio_net_report_err;
3366 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3367 	}
3368 
3369 	/* cancel handshake watchdog timeout */
3370 	if (ldcp->htid) {
3371 		(void) untimeout(ldcp->htid);
3372 		ldcp->htid = 0;
3373 	}
3374 
3375 	if (ldcp->cancel_htid) {
3376 		(void) untimeout(ldcp->cancel_htid);
3377 		ldcp->cancel_htid = 0;
3378 	}
3379 
3380 	/* cancel transmit watchdog timeout */
3381 	if (ldcp->wd_tid) {
3382 		(void) untimeout(ldcp->wd_tid);
3383 		ldcp->wd_tid = 0;
3384 	}
3385 
3386 	drv_usecwait(1000);
3387 
3388 	if (ldcp->rcv_thread != NULL) {
3389 		/*
3390 		 * Note that callbacks have been disabled already(above). The
3391 		 * drain function takes care of the condition when an already
3392 		 * executing callback signals the worker to start processing or
3393 		 * the worker has already been signalled and is in the middle of
3394 		 * processing.
3395 		 */
3396 		vgen_drain_rcv_thread(ldcp);
3397 	}
3398 
3399 	/* acquire locks again; any pending transmits and callbacks are done */
3400 	LDC_LOCK(ldcp);
3401 
3402 	vgen_reset_hphase(ldcp);
3403 
3404 	vgen_uninit_tbufs(ldcp);
3405 
3406 	/* close the channel - retry on EAGAIN */
3407 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
3408 		if (++retries > vgen_ldccl_retries) {
3409 			break;
3410 		}
3411 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
3412 	}
3413 	if (rv != 0) {
3414 		cmn_err(CE_NOTE,
3415 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
3416 		    vgenp->instance, rv, ldcp->ldc_id);
3417 	}
3418 
3419 	ldcp->ldc_status = LDC_INIT;
3420 	ldcp->flags &= ~(CHANNEL_STARTED);
3421 
3422 	LDC_UNLOCK(ldcp);
3423 
3424 	DBG1(vgenp, ldcp, "exit\n");
3425 }
3426 
3427 /* Initialize the transmit buffer ring for the channel */
3428 static int
3429 vgen_init_tbufs(vgen_ldc_t *ldcp)
3430 {
3431 	vgen_private_desc_t	*tbufp;
3432 	vnet_public_desc_t	*txdp;
3433 	vio_dring_entry_hdr_t		*hdrp;
3434 	int 			i;
3435 	int 			rv;
3436 	caddr_t			datap = NULL;
3437 	int			ci;
3438 	uint32_t		ncookies;
3439 	size_t			data_sz;
3440 	vgen_t			*vgenp;
3441 
3442 	vgenp = LDC_TO_VGEN(ldcp);
3443 
3444 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3445 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3446 
3447 	/*
3448 	 * In order to ensure that the number of ldc cookies per descriptor is
3449 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3450 	 * outlined below:
3451 	 *
3452 	 * Align the entire data buffer area to 8K and carve out per descriptor
3453 	 * data buffers starting from this 8K aligned base address.
3454 	 *
3455 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3456 	 * For sizes up to 12K we round up the size to the next 2K.
3457 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3458 	 * 14K could end up needing 3 cookies, with the buffer spread across
3459 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3460 	 */
3461 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3462 	if (data_sz <= VNET_12K) {
3463 		data_sz = VNET_ROUNDUP_2K(data_sz);
3464 	} else {
3465 		data_sz = VNET_ROUNDUP_4K(data_sz);
3466 	}
3467 
3468 	/* allocate extra 8K bytes for alignment */
3469 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3470 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3471 	ldcp->tx_datap = datap;
3472 
3473 
3474 	/* align the starting address of the data area to 8K */
3475 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3476 
3477 	/*
3478 	 * for each private descriptor, allocate a ldc mem_handle which is
3479 	 * required to map the data during transmit, set the flags
3480 	 * to free (available for use by transmit routine).
3481 	 */
3482 
3483 	for (i = 0; i < ldcp->num_txds; i++) {
3484 
3485 		tbufp = &(ldcp->tbufp[i]);
3486 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3487 		    &(tbufp->memhandle));
3488 		if (rv) {
3489 			tbufp->memhandle = 0;
3490 			goto init_tbufs_failed;
3491 		}
3492 
3493 		/*
3494 		 * bind ldc memhandle to the corresponding transmit buffer.
3495 		 */
3496 		ci = ncookies = 0;
3497 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3498 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3499 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3500 		if (rv != 0) {
3501 			goto init_tbufs_failed;
3502 		}
3503 
3504 		/*
3505 		 * successful in binding the handle to tx data buffer.
3506 		 * set datap in the private descr to this buffer.
3507 		 */
3508 		tbufp->datap = datap;
3509 
3510 		if ((ncookies == 0) ||
3511 		    (ncookies > MAX_COOKIES)) {
3512 			goto init_tbufs_failed;
3513 		}
3514 
3515 		for (ci = 1; ci < ncookies; ci++) {
3516 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3517 			    &(tbufp->memcookie[ci]));
3518 			if (rv != 0) {
3519 				goto init_tbufs_failed;
3520 			}
3521 		}
3522 
3523 		tbufp->ncookies = ncookies;
3524 		datap += data_sz;
3525 
3526 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3527 		txdp = &(ldcp->txdp[i]);
3528 		hdrp = &txdp->hdr;
3529 		hdrp->dstate = VIO_DESC_FREE;
3530 		hdrp->ack = B_FALSE;
3531 		tbufp->descp = txdp;
3532 
3533 	}
3534 
3535 	/* reset tbuf walking pointers */
3536 	ldcp->next_tbufp = ldcp->tbufp;
3537 	ldcp->cur_tbufp = ldcp->tbufp;
3538 
3539 	/* initialize tx seqnum and index */
3540 	ldcp->next_txseq = VNET_ISS;
3541 	ldcp->next_txi = 0;
3542 
3543 	ldcp->resched_peer = B_TRUE;
3544 	ldcp->resched_peer_txi = 0;
3545 
3546 	return (DDI_SUCCESS);
3547 
3548 init_tbufs_failed:;
3549 	vgen_uninit_tbufs(ldcp);
3550 	return (DDI_FAILURE);
3551 }
3552 
3553 /* Uninitialize transmit buffer ring for the channel */
3554 static void
3555 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3556 {
3557 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3558 	int 			i;
3559 
3560 	/* for each tbuf (priv_desc), free ldc mem_handle */
3561 	for (i = 0; i < ldcp->num_txds; i++) {
3562 
3563 		tbufp = &(ldcp->tbufp[i]);
3564 
3565 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3566 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3567 			tbufp->datap = NULL;
3568 		}
3569 		if (tbufp->memhandle) {
3570 			(void) ldc_mem_free_handle(tbufp->memhandle);
3571 			tbufp->memhandle = 0;
3572 		}
3573 	}
3574 
3575 	if (ldcp->tx_datap) {
3576 		/* prealloc'd tx data buffer */
3577 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3578 		ldcp->tx_datap = NULL;
3579 		ldcp->tx_data_sz = 0;
3580 	}
3581 
3582 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3583 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3584 }
3585 
3586 /* clobber tx descriptor ring */
3587 static void
3588 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3589 {
3590 	vnet_public_desc_t	*txdp;
3591 	vgen_private_desc_t	*tbufp;
3592 	vio_dring_entry_hdr_t	*hdrp;
3593 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3594 	int i;
3595 #ifdef DEBUG
3596 	int ndone = 0;
3597 #endif
3598 
3599 	for (i = 0; i < ldcp->num_txds; i++) {
3600 
3601 		tbufp = &(ldcp->tbufp[i]);
3602 		txdp = tbufp->descp;
3603 		hdrp = &txdp->hdr;
3604 
3605 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3606 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3607 #ifdef DEBUG
3608 			if (hdrp->dstate == VIO_DESC_DONE)
3609 				ndone++;
3610 #endif
3611 			hdrp->dstate = VIO_DESC_FREE;
3612 			hdrp->ack = B_FALSE;
3613 		}
3614 	}
3615 	/* reset tbuf walking pointers */
3616 	ldcp->next_tbufp = ldcp->tbufp;
3617 	ldcp->cur_tbufp = ldcp->tbufp;
3618 
3619 	/* reset tx seqnum and index */
3620 	ldcp->next_txseq = VNET_ISS;
3621 	ldcp->next_txi = 0;
3622 
3623 	ldcp->resched_peer = B_TRUE;
3624 	ldcp->resched_peer_txi = 0;
3625 
3626 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3627 }
3628 
3629 /* clobber receive descriptor ring */
3630 static void
3631 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3632 {
3633 	ldcp->rx_dhandle = 0;
3634 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3635 	ldcp->rxdp = NULL;
3636 	ldcp->next_rxi = 0;
3637 	ldcp->num_rxds = 0;
3638 	ldcp->next_rxseq = VNET_ISS;
3639 }
3640 
3641 /* initialize receive descriptor ring */
3642 static int
3643 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3644 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3645 {
3646 	int rv;
3647 	ldc_mem_info_t minfo;
3648 
3649 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3650 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3651 	if (rv != 0) {
3652 		return (DDI_FAILURE);
3653 	}
3654 
3655 	/*
3656 	 * sucessfully mapped, now try to
3657 	 * get info about the mapped dring
3658 	 */
3659 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3660 	if (rv != 0) {
3661 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3662 		return (DDI_FAILURE);
3663 	}
3664 
3665 	/*
3666 	 * save ring address, number of descriptors.
3667 	 */
3668 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3669 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3670 	ldcp->num_rxdcookies = ncookies;
3671 	ldcp->num_rxds = num_desc;
3672 	ldcp->next_rxi = 0;
3673 	ldcp->next_rxseq = VNET_ISS;
3674 	ldcp->dring_mtype = minfo.mtype;
3675 
3676 	return (DDI_SUCCESS);
3677 }
3678 
3679 /* get channel statistics */
3680 static uint64_t
3681 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3682 {
3683 	vgen_stats_t *statsp;
3684 	uint64_t val;
3685 
3686 	val = 0;
3687 	statsp = &ldcp->stats;
3688 	switch (stat) {
3689 
3690 	case MAC_STAT_MULTIRCV:
3691 		val = statsp->multircv;
3692 		break;
3693 
3694 	case MAC_STAT_BRDCSTRCV:
3695 		val = statsp->brdcstrcv;
3696 		break;
3697 
3698 	case MAC_STAT_MULTIXMT:
3699 		val = statsp->multixmt;
3700 		break;
3701 
3702 	case MAC_STAT_BRDCSTXMT:
3703 		val = statsp->brdcstxmt;
3704 		break;
3705 
3706 	case MAC_STAT_NORCVBUF:
3707 		val = statsp->norcvbuf;
3708 		break;
3709 
3710 	case MAC_STAT_IERRORS:
3711 		val = statsp->ierrors;
3712 		break;
3713 
3714 	case MAC_STAT_NOXMTBUF:
3715 		val = statsp->noxmtbuf;
3716 		break;
3717 
3718 	case MAC_STAT_OERRORS:
3719 		val = statsp->oerrors;
3720 		break;
3721 
3722 	case MAC_STAT_COLLISIONS:
3723 		break;
3724 
3725 	case MAC_STAT_RBYTES:
3726 		val = statsp->rbytes;
3727 		break;
3728 
3729 	case MAC_STAT_IPACKETS:
3730 		val = statsp->ipackets;
3731 		break;
3732 
3733 	case MAC_STAT_OBYTES:
3734 		val = statsp->obytes;
3735 		break;
3736 
3737 	case MAC_STAT_OPACKETS:
3738 		val = statsp->opackets;
3739 		break;
3740 
3741 	/* stats not relevant to ldc, return 0 */
3742 	case MAC_STAT_IFSPEED:
3743 	case ETHER_STAT_ALIGN_ERRORS:
3744 	case ETHER_STAT_FCS_ERRORS:
3745 	case ETHER_STAT_FIRST_COLLISIONS:
3746 	case ETHER_STAT_MULTI_COLLISIONS:
3747 	case ETHER_STAT_DEFER_XMTS:
3748 	case ETHER_STAT_TX_LATE_COLLISIONS:
3749 	case ETHER_STAT_EX_COLLISIONS:
3750 	case ETHER_STAT_MACXMT_ERRORS:
3751 	case ETHER_STAT_CARRIER_ERRORS:
3752 	case ETHER_STAT_TOOLONG_ERRORS:
3753 	case ETHER_STAT_XCVR_ADDR:
3754 	case ETHER_STAT_XCVR_ID:
3755 	case ETHER_STAT_XCVR_INUSE:
3756 	case ETHER_STAT_CAP_1000FDX:
3757 	case ETHER_STAT_CAP_1000HDX:
3758 	case ETHER_STAT_CAP_100FDX:
3759 	case ETHER_STAT_CAP_100HDX:
3760 	case ETHER_STAT_CAP_10FDX:
3761 	case ETHER_STAT_CAP_10HDX:
3762 	case ETHER_STAT_CAP_ASMPAUSE:
3763 	case ETHER_STAT_CAP_PAUSE:
3764 	case ETHER_STAT_CAP_AUTONEG:
3765 	case ETHER_STAT_ADV_CAP_1000FDX:
3766 	case ETHER_STAT_ADV_CAP_1000HDX:
3767 	case ETHER_STAT_ADV_CAP_100FDX:
3768 	case ETHER_STAT_ADV_CAP_100HDX:
3769 	case ETHER_STAT_ADV_CAP_10FDX:
3770 	case ETHER_STAT_ADV_CAP_10HDX:
3771 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3772 	case ETHER_STAT_ADV_CAP_PAUSE:
3773 	case ETHER_STAT_ADV_CAP_AUTONEG:
3774 	case ETHER_STAT_LP_CAP_1000FDX:
3775 	case ETHER_STAT_LP_CAP_1000HDX:
3776 	case ETHER_STAT_LP_CAP_100FDX:
3777 	case ETHER_STAT_LP_CAP_100HDX:
3778 	case ETHER_STAT_LP_CAP_10FDX:
3779 	case ETHER_STAT_LP_CAP_10HDX:
3780 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3781 	case ETHER_STAT_LP_CAP_PAUSE:
3782 	case ETHER_STAT_LP_CAP_AUTONEG:
3783 	case ETHER_STAT_LINK_ASMPAUSE:
3784 	case ETHER_STAT_LINK_PAUSE:
3785 	case ETHER_STAT_LINK_AUTONEG:
3786 	case ETHER_STAT_LINK_DUPLEX:
3787 	default:
3788 		val = 0;
3789 		break;
3790 
3791 	}
3792 	return (val);
3793 }
3794 
3795 /*
3796  * LDC channel is UP, start handshake process with peer.
3797  */
3798 static void
3799 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3800 {
3801 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3802 
3803 	DBG1(vgenp, ldcp, "enter\n");
3804 
3805 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3806 
3807 	if (ldcp->portp != vgenp->vsw_portp) {
3808 		/*
3809 		 * As the channel is up, use this port from now on.
3810 		 */
3811 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3812 	}
3813 
3814 	/* Initialize local session id */
3815 	ldcp->local_sid = ddi_get_lbolt();
3816 
3817 	/* clear peer session id */
3818 	ldcp->peer_sid = 0;
3819 	ldcp->hretries = 0;
3820 
3821 	if (ldcp->hphase != VH_PHASE0) {
3822 		vgen_handshake_reset(ldcp);
3823 	}
3824 
3825 	/* Initiate Handshake process with peer ldc endpoint */
3826 	vgen_handshake(vh_nextphase(ldcp));
3827 
3828 	DBG1(vgenp, ldcp, "exit\n");
3829 }
3830 
3831 /*
3832  * LDC channel is Reset, terminate connection with peer and try to
3833  * bring the channel up again.
3834  */
3835 static void
3836 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3837 {
3838 	ldc_status_t istatus;
3839 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3840 	int	rv;
3841 
3842 	DBG1(vgenp, ldcp, "enter\n");
3843 
3844 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3845 
3846 	if ((ldcp->portp != vgenp->vsw_portp) &&
3847 	    (vgenp->vsw_portp != NULL)) {
3848 		/*
3849 		 * As the channel is down, use the switch port until
3850 		 * the channel becomes ready to be used.
3851 		 */
3852 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3853 	}
3854 
3855 	if (vgenp->vsw_portp == ldcp->portp) {
3856 		vio_net_report_err_t rep_err =
3857 		    ldcp->portp->vcb.vio_net_report_err;
3858 
3859 		/* Post a reset message */
3860 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3861 	}
3862 
3863 	if (ldcp->hphase != VH_PHASE0) {
3864 		vgen_handshake_reset(ldcp);
3865 	}
3866 
3867 	/* try to bring the channel up */
3868 #ifdef	VNET_IOC_DEBUG
3869 	if (ldcp->link_down_forced == B_FALSE) {
3870 		rv = ldc_up(ldcp->ldc_handle);
3871 		if (rv != 0) {
3872 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3873 		}
3874 	}
3875 #else
3876 	rv = ldc_up(ldcp->ldc_handle);
3877 	if (rv != 0) {
3878 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3879 	}
3880 #endif
3881 
3882 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3883 		DWARN(vgenp, ldcp, "ldc_status err\n");
3884 	} else {
3885 		ldcp->ldc_status = istatus;
3886 	}
3887 
3888 	/* if channel is already UP - restart handshake */
3889 	if (ldcp->ldc_status == LDC_UP) {
3890 		vgen_handle_evt_up(ldcp);
3891 	}
3892 
3893 	DBG1(vgenp, ldcp, "exit\n");
3894 }
3895 
3896 /* Interrupt handler for the channel */
3897 static uint_t
3898 vgen_ldc_cb(uint64_t event, caddr_t arg)
3899 {
3900 	_NOTE(ARGUNUSED(event))
3901 	vgen_ldc_t	*ldcp;
3902 	vgen_t		*vgenp;
3903 	ldc_status_t 	istatus;
3904 	vgen_stats_t	*statsp;
3905 	timeout_id_t	cancel_htid = 0;
3906 	uint_t		ret = LDC_SUCCESS;
3907 
3908 	ldcp = (vgen_ldc_t *)arg;
3909 	vgenp = LDC_TO_VGEN(ldcp);
3910 	statsp = &ldcp->stats;
3911 
3912 	DBG1(vgenp, ldcp, "enter\n");
3913 
3914 	mutex_enter(&ldcp->cblock);
3915 	statsp->callbacks++;
3916 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3917 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3918 		    ldcp->ldc_status);
3919 		mutex_exit(&ldcp->cblock);
3920 		return (LDC_SUCCESS);
3921 	}
3922 
3923 	/*
3924 	 * cache cancel_htid before the events specific
3925 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3926 	 * as it is also used to indicate the timer to quit immediately.
3927 	 */
3928 	cancel_htid = ldcp->cancel_htid;
3929 
3930 	/*
3931 	 * NOTE: not using switch() as event could be triggered by
3932 	 * a state change and a read request. Also the ordering	of the
3933 	 * check for the event types is deliberate.
3934 	 */
3935 	if (event & LDC_EVT_UP) {
3936 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3937 			DWARN(vgenp, ldcp, "ldc_status err\n");
3938 			/* status couldn't be determined */
3939 			ret = LDC_FAILURE;
3940 			goto ldc_cb_ret;
3941 		}
3942 		ldcp->ldc_status = istatus;
3943 		if (ldcp->ldc_status != LDC_UP) {
3944 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3945 			    " but ldc status is not UP(0x%x)\n",
3946 			    ldcp->ldc_status);
3947 			/* spurious interrupt, return success */
3948 			goto ldc_cb_ret;
3949 		}
3950 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3951 		    event, ldcp->ldc_status);
3952 
3953 		vgen_handle_evt_up(ldcp);
3954 
3955 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3956 	}
3957 
3958 	/* Handle RESET/DOWN before READ event */
3959 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3960 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3961 			DWARN(vgenp, ldcp, "ldc_status error\n");
3962 			/* status couldn't be determined */
3963 			ret = LDC_FAILURE;
3964 			goto ldc_cb_ret;
3965 		}
3966 		ldcp->ldc_status = istatus;
3967 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3968 		    event, ldcp->ldc_status);
3969 
3970 		vgen_handle_evt_reset(ldcp);
3971 
3972 		/*
3973 		 * As the channel is down/reset, ignore READ event
3974 		 * but print a debug warning message.
3975 		 */
3976 		if (event & LDC_EVT_READ) {
3977 			DWARN(vgenp, ldcp,
3978 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3979 			event &= ~LDC_EVT_READ;
3980 		}
3981 	}
3982 
3983 	if (event & LDC_EVT_READ) {
3984 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3985 		    event, ldcp->ldc_status);
3986 
3987 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3988 
3989 		if (ldcp->rcv_thread != NULL) {
3990 			/*
3991 			 * If the receive thread is enabled, then
3992 			 * wakeup the receive thread to process the
3993 			 * LDC messages.
3994 			 */
3995 			mutex_exit(&ldcp->cblock);
3996 			mutex_enter(&ldcp->rcv_thr_lock);
3997 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3998 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3999 				cv_signal(&ldcp->rcv_thr_cv);
4000 			}
4001 			mutex_exit(&ldcp->rcv_thr_lock);
4002 			mutex_enter(&ldcp->cblock);
4003 		} else  {
4004 			vgen_handle_evt_read(ldcp);
4005 		}
4006 	}
4007 
4008 ldc_cb_ret:
4009 	/*
4010 	 * Check to see if the status of cancel_htid has
4011 	 * changed. If another timer needs to be cancelled,
4012 	 * then let the next callback to clear it.
4013 	 */
4014 	if (cancel_htid == 0) {
4015 		cancel_htid = ldcp->cancel_htid;
4016 	}
4017 	mutex_exit(&ldcp->cblock);
4018 
4019 	if (cancel_htid) {
4020 		/*
4021 		 * Cancel handshake timer.
4022 		 * untimeout(9F) will not return until the pending callback is
4023 		 * cancelled or has run. No problems will result from calling
4024 		 * untimeout if the handler has already completed.
4025 		 * If the timeout handler did run, then it would just
4026 		 * return as cancel_htid is set.
4027 		 */
4028 		DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
4029 		(void) untimeout(cancel_htid);
4030 		mutex_enter(&ldcp->cblock);
4031 		/* clear it only if its the same as the one we cancelled */
4032 		if (ldcp->cancel_htid == cancel_htid) {
4033 			ldcp->cancel_htid = 0;
4034 		}
4035 		mutex_exit(&ldcp->cblock);
4036 	}
4037 	DBG1(vgenp, ldcp, "exit\n");
4038 	return (ret);
4039 }
4040 
4041 static void
4042 vgen_handle_evt_read(vgen_ldc_t *ldcp)
4043 {
4044 	int		rv;
4045 	uint64_t	*ldcmsg;
4046 	size_t		msglen;
4047 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4048 	vio_msg_tag_t	*tagp;
4049 	ldc_status_t 	istatus;
4050 	boolean_t 	has_data;
4051 
4052 	DBG1(vgenp, ldcp, "enter\n");
4053 
4054 	ldcmsg = ldcp->ldcmsg;
4055 	/*
4056 	 * If the receive thread is enabled, then the cblock
4057 	 * need to be acquired here. If not, the vgen_ldc_cb()
4058 	 * calls this function with cblock held already.
4059 	 */
4060 	if (ldcp->rcv_thread != NULL) {
4061 		mutex_enter(&ldcp->cblock);
4062 	} else {
4063 		ASSERT(MUTEX_HELD(&ldcp->cblock));
4064 	}
4065 
4066 vgen_evt_read:
4067 	do {
4068 		msglen = ldcp->msglen;
4069 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
4070 
4071 		if (rv != 0) {
4072 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
4073 			    rv, msglen);
4074 			if (rv == ECONNRESET)
4075 				goto vgen_evtread_error;
4076 			break;
4077 		}
4078 		if (msglen == 0) {
4079 			DBG2(vgenp, ldcp, "ldc_read NODATA");
4080 			break;
4081 		}
4082 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
4083 
4084 		tagp = (vio_msg_tag_t *)ldcmsg;
4085 
4086 		if (ldcp->peer_sid) {
4087 			/*
4088 			 * check sid only after we have received peer's sid
4089 			 * in the version negotiate msg.
4090 			 */
4091 #ifdef DEBUG
4092 			if (vgen_hdbg & HDBG_BAD_SID) {
4093 				/* simulate bad sid condition */
4094 				tagp->vio_sid = 0;
4095 				vgen_hdbg &= ~(HDBG_BAD_SID);
4096 			}
4097 #endif
4098 			rv = vgen_check_sid(ldcp, tagp);
4099 			if (rv != VGEN_SUCCESS) {
4100 				/*
4101 				 * If sid mismatch is detected,
4102 				 * reset the channel.
4103 				 */
4104 				goto vgen_evtread_error;
4105 			}
4106 		}
4107 
4108 		switch (tagp->vio_msgtype) {
4109 		case VIO_TYPE_CTRL:
4110 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
4111 			break;
4112 
4113 		case VIO_TYPE_DATA:
4114 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
4115 			break;
4116 
4117 		case VIO_TYPE_ERR:
4118 			vgen_handle_errmsg(ldcp, tagp);
4119 			break;
4120 
4121 		default:
4122 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
4123 			    tagp->vio_msgtype);
4124 			break;
4125 		}
4126 
4127 		/*
4128 		 * If an error is encountered, stop processing and
4129 		 * handle the error.
4130 		 */
4131 		if (rv != 0) {
4132 			goto vgen_evtread_error;
4133 		}
4134 
4135 	} while (msglen);
4136 
4137 	/* check once more before exiting */
4138 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
4139 	if ((rv == 0) && (has_data == B_TRUE)) {
4140 		DTRACE_PROBE(vgen_chkq);
4141 		goto vgen_evt_read;
4142 	}
4143 
4144 vgen_evtread_error:
4145 	if (rv == ECONNRESET) {
4146 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4147 			DWARN(vgenp, ldcp, "ldc_status err\n");
4148 		} else {
4149 			ldcp->ldc_status = istatus;
4150 		}
4151 		vgen_handle_evt_reset(ldcp);
4152 	} else if (rv) {
4153 		vgen_ldc_reset(ldcp);
4154 	}
4155 
4156 	/*
4157 	 * If the receive thread is enabled, then cancel the
4158 	 * handshake timeout here.
4159 	 */
4160 	if (ldcp->rcv_thread != NULL) {
4161 		timeout_id_t cancel_htid = ldcp->cancel_htid;
4162 
4163 		mutex_exit(&ldcp->cblock);
4164 		if (cancel_htid) {
4165 			/*
4166 			 * Cancel handshake timer. untimeout(9F) will
4167 			 * not return until the pending callback is cancelled
4168 			 * or has run. No problems will result from calling
4169 			 * untimeout if the handler has already completed.
4170 			 * If the timeout handler did run, then it would just
4171 			 * return as cancel_htid is set.
4172 			 */
4173 			DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
4174 			(void) untimeout(cancel_htid);
4175 
4176 			/*
4177 			 * clear it only if its the same as the one we
4178 			 * cancelled
4179 			 */
4180 			mutex_enter(&ldcp->cblock);
4181 			if (ldcp->cancel_htid == cancel_htid) {
4182 				ldcp->cancel_htid = 0;
4183 			}
4184 			mutex_exit(&ldcp->cblock);
4185 		}
4186 	}
4187 
4188 	DBG1(vgenp, ldcp, "exit\n");
4189 }
4190 
4191 /* vgen handshake functions */
4192 
4193 /* change the hphase for the channel to the next phase */
4194 static vgen_ldc_t *
4195 vh_nextphase(vgen_ldc_t *ldcp)
4196 {
4197 	if (ldcp->hphase == VH_PHASE3) {
4198 		ldcp->hphase = VH_DONE;
4199 	} else {
4200 		ldcp->hphase++;
4201 	}
4202 	return (ldcp);
4203 }
4204 
4205 /*
4206  * wrapper routine to send the given message over ldc using ldc_write().
4207  */
4208 static int
4209 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4210     boolean_t caller_holds_lock)
4211 {
4212 	int			rv;
4213 	size_t			len;
4214 	uint32_t		retries = 0;
4215 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4216 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4217 	vio_dring_msg_t		*dmsg;
4218 	vio_raw_data_msg_t	*rmsg;
4219 	boolean_t		data_msg = B_FALSE;
4220 
4221 	len = msglen;
4222 	if ((len == 0) || (msg == NULL))
4223 		return (VGEN_FAILURE);
4224 
4225 	if (!caller_holds_lock) {
4226 		mutex_enter(&ldcp->wrlock);
4227 	}
4228 
4229 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4230 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4231 			dmsg = (vio_dring_msg_t *)tagp;
4232 			dmsg->seq_num = ldcp->next_txseq;
4233 			data_msg = B_TRUE;
4234 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4235 			rmsg = (vio_raw_data_msg_t *)tagp;
4236 			rmsg->seq_num = ldcp->next_txseq;
4237 			data_msg = B_TRUE;
4238 		}
4239 	}
4240 
4241 	do {
4242 		len = msglen;
4243 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4244 		if (retries++ >= vgen_ldcwr_retries)
4245 			break;
4246 	} while (rv == EWOULDBLOCK);
4247 
4248 	if (rv == 0 && data_msg == B_TRUE) {
4249 		ldcp->next_txseq++;
4250 	}
4251 
4252 	if (!caller_holds_lock) {
4253 		mutex_exit(&ldcp->wrlock);
4254 	}
4255 
4256 	if (rv != 0) {
4257 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4258 		    rv, msglen);
4259 		return (rv);
4260 	}
4261 
4262 	if (len != msglen) {
4263 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4264 		    rv, msglen);
4265 		return (VGEN_FAILURE);
4266 	}
4267 
4268 	return (VGEN_SUCCESS);
4269 }
4270 
4271 /* send version negotiate message to the peer over ldc */
4272 static int
4273 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4274 {
4275 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4276 	vio_ver_msg_t	vermsg;
4277 	vio_msg_tag_t	*tagp = &vermsg.tag;
4278 	int		rv;
4279 
4280 	bzero(&vermsg, sizeof (vermsg));
4281 
4282 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4283 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4284 	tagp->vio_subtype_env = VIO_VER_INFO;
4285 	tagp->vio_sid = ldcp->local_sid;
4286 
4287 	/* get version msg payload from ldcp->local */
4288 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4289 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4290 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4291 
4292 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4293 	if (rv != VGEN_SUCCESS) {
4294 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4295 		return (rv);
4296 	}
4297 
4298 	ldcp->hstate |= VER_INFO_SENT;
4299 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4300 	    vermsg.ver_major, vermsg.ver_minor);
4301 
4302 	return (VGEN_SUCCESS);
4303 }
4304 
4305 /* send attr info message to the peer over ldc */
4306 static int
4307 vgen_send_attr_info(vgen_ldc_t *ldcp)
4308 {
4309 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4310 	vnet_attr_msg_t	attrmsg;
4311 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4312 	int		rv;
4313 
4314 	bzero(&attrmsg, sizeof (attrmsg));
4315 
4316 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4317 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4318 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4319 	tagp->vio_sid = ldcp->local_sid;
4320 
4321 	/* get attr msg payload from ldcp->local */
4322 	attrmsg.mtu = ldcp->local_hparams.mtu;
4323 	attrmsg.addr = ldcp->local_hparams.addr;
4324 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4325 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4326 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4327 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
4328 
4329 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4330 	if (rv != VGEN_SUCCESS) {
4331 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4332 		return (rv);
4333 	}
4334 
4335 	ldcp->hstate |= ATTR_INFO_SENT;
4336 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4337 
4338 	return (VGEN_SUCCESS);
4339 }
4340 
4341 /* send descriptor ring register message to the peer over ldc */
4342 static int
4343 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4344 {
4345 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4346 	vio_dring_reg_msg_t	msg;
4347 	vio_msg_tag_t		*tagp = &msg.tag;
4348 	int		rv;
4349 
4350 	bzero(&msg, sizeof (msg));
4351 
4352 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4353 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4354 	tagp->vio_subtype_env = VIO_DRING_REG;
4355 	tagp->vio_sid = ldcp->local_sid;
4356 
4357 	/* get dring info msg payload from ldcp->local */
4358 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4359 	    sizeof (ldc_mem_cookie_t));
4360 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4361 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4362 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4363 
4364 	/*
4365 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4366 	 * value and sends it in the ack, which is saved in
4367 	 * vgen_handle_dring_reg().
4368 	 */
4369 	msg.dring_ident = 0;
4370 
4371 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4372 	if (rv != VGEN_SUCCESS) {
4373 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4374 		return (rv);
4375 	}
4376 
4377 	ldcp->hstate |= DRING_INFO_SENT;
4378 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4379 
4380 	return (VGEN_SUCCESS);
4381 }
4382 
4383 static int
4384 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4385 {
4386 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4387 	vio_rdx_msg_t	rdxmsg;
4388 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4389 	int		rv;
4390 
4391 	bzero(&rdxmsg, sizeof (rdxmsg));
4392 
4393 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4394 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4395 	tagp->vio_subtype_env = VIO_RDX;
4396 	tagp->vio_sid = ldcp->local_sid;
4397 
4398 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4399 	if (rv != VGEN_SUCCESS) {
4400 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4401 		return (rv);
4402 	}
4403 
4404 	ldcp->hstate |= RDX_INFO_SENT;
4405 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4406 
4407 	return (VGEN_SUCCESS);
4408 }
4409 
4410 /* send descriptor ring data message to the peer over ldc */
4411 static int
4412 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4413 {
4414 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4415 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4416 	vio_msg_tag_t	*tagp = &msgp->tag;
4417 	vgen_stats_t	*statsp = &ldcp->stats;
4418 	int		rv;
4419 
4420 	bzero(msgp, sizeof (*msgp));
4421 
4422 	tagp->vio_msgtype = VIO_TYPE_DATA;
4423 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4424 	tagp->vio_subtype_env = VIO_DRING_DATA;
4425 	tagp->vio_sid = ldcp->local_sid;
4426 
4427 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4428 	msgp->start_idx = start;
4429 	msgp->end_idx = end;
4430 
4431 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4432 	if (rv != VGEN_SUCCESS) {
4433 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4434 		return (rv);
4435 	}
4436 
4437 	statsp->dring_data_msgs++;
4438 
4439 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4440 
4441 	return (VGEN_SUCCESS);
4442 }
4443 
4444 /* send multicast addr info message to vsw */
4445 static int
4446 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4447 {
4448 	vnet_mcast_msg_t	mcastmsg;
4449 	vnet_mcast_msg_t	*msgp;
4450 	vio_msg_tag_t		*tagp;
4451 	vgen_t			*vgenp;
4452 	struct ether_addr	*mca;
4453 	int			rv;
4454 	int			i;
4455 	uint32_t		size;
4456 	uint32_t		mccount;
4457 	uint32_t		n;
4458 
4459 	msgp = &mcastmsg;
4460 	tagp = &msgp->tag;
4461 	vgenp = LDC_TO_VGEN(ldcp);
4462 
4463 	mccount = vgenp->mccount;
4464 	i = 0;
4465 
4466 	do {
4467 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4468 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4469 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4470 		tagp->vio_sid = ldcp->local_sid;
4471 
4472 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4473 		size = n * sizeof (struct ether_addr);
4474 
4475 		mca = &(vgenp->mctab[i]);
4476 		bcopy(mca, (msgp->mca), size);
4477 		msgp->set = B_TRUE;
4478 		msgp->count = n;
4479 
4480 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4481 		    B_FALSE);
4482 		if (rv != VGEN_SUCCESS) {
4483 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4484 			return (rv);
4485 		}
4486 
4487 		mccount -= n;
4488 		i += n;
4489 
4490 	} while (mccount);
4491 
4492 	return (VGEN_SUCCESS);
4493 }
4494 
4495 /* Initiate Phase 2 of handshake */
4496 static int
4497 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4498 {
4499 	int rv;
4500 	uint32_t ncookies = 0;
4501 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4502 
4503 #ifdef DEBUG
4504 	if (vgen_hdbg & HDBG_OUT_STATE) {
4505 		/* simulate out of state condition */
4506 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4507 		rv = vgen_send_rdx_info(ldcp);
4508 		return (rv);
4509 	}
4510 	if (vgen_hdbg & HDBG_TIMEOUT) {
4511 		/* simulate timeout condition */
4512 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4513 		return (VGEN_SUCCESS);
4514 	}
4515 #endif
4516 	rv = vgen_send_attr_info(ldcp);
4517 	if (rv != VGEN_SUCCESS) {
4518 		return (rv);
4519 	}
4520 
4521 	/* Bind descriptor ring to the channel */
4522 	if (ldcp->num_txdcookies == 0) {
4523 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4524 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4525 		    &ldcp->tx_dcookie, &ncookies);
4526 		if (rv != 0) {
4527 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4528 			    "rv(%x)\n", rv);
4529 			return (rv);
4530 		}
4531 		ASSERT(ncookies == 1);
4532 		ldcp->num_txdcookies = ncookies;
4533 	}
4534 
4535 	/* update local dring_info params */
4536 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4537 	    sizeof (ldc_mem_cookie_t));
4538 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4539 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4540 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4541 
4542 	rv = vgen_send_dring_reg(ldcp);
4543 	if (rv != VGEN_SUCCESS) {
4544 		return (rv);
4545 	}
4546 
4547 	return (VGEN_SUCCESS);
4548 }
4549 
4550 /*
4551  * Set vnet-protocol-version dependent functions based on version.
4552  */
4553 static void
4554 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4555 {
4556 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4557 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4558 
4559 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
4560 		vgen_port_t	*portp = ldcp->portp;
4561 		vnet_t		*vnetp = vgenp->vnetp;
4562 		/*
4563 		 * If the version negotiated with vswitch is >= 1.5 (link
4564 		 * status update support), set the required bits in our
4565 		 * attributes if this vnet device has been configured to get
4566 		 * physical link state updates.
4567 		 */
4568 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
4569 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
4570 		} else {
4571 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
4572 		}
4573 	}
4574 
4575 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4576 		/*
4577 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4578 		 * Support), set the mtu in our attributes to max_frame_size.
4579 		 */
4580 		lp->mtu = vgenp->max_frame_size;
4581 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4582 		/*
4583 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4584 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4585 		 */
4586 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4587 	} else {
4588 		vgen_port_t	*portp = ldcp->portp;
4589 		vnet_t		*vnetp = vgenp->vnetp;
4590 		/*
4591 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4592 		 * We can negotiate that size with those peers provided the
4593 		 * following conditions are true:
4594 		 * - Only pvid is defined for our peer and there are no vids.
4595 		 * - pvids are equal.
4596 		 * If the above conditions are true, then we can send/recv only
4597 		 * untagged frames of max size ETHERMAX.
4598 		 */
4599 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4600 			lp->mtu = ETHERMAX;
4601 		}
4602 	}
4603 
4604 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4605 		/* Versions >= 1.2 */
4606 
4607 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4608 			/*
4609 			 * enable priority routines and pkt mode only if
4610 			 * at least one pri-eth-type is specified in MD.
4611 			 */
4612 
4613 			ldcp->tx = vgen_ldcsend;
4614 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4615 
4616 			/* set xfer mode for vgen_send_attr_info() */
4617 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4618 
4619 		} else {
4620 			/* no priority eth types defined in MD */
4621 
4622 			ldcp->tx = vgen_ldcsend_dring;
4623 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4624 
4625 			/* set xfer mode for vgen_send_attr_info() */
4626 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4627 
4628 		}
4629 	} else {
4630 		/* Versions prior to 1.2  */
4631 
4632 		vgen_reset_vnet_proto_ops(ldcp);
4633 	}
4634 }
4635 
4636 /*
4637  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4638  */
4639 static void
4640 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4641 {
4642 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4643 
4644 	ldcp->tx = vgen_ldcsend_dring;
4645 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4646 
4647 	/* set xfer mode for vgen_send_attr_info() */
4648 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4649 }
4650 
4651 static void
4652 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4653 {
4654 	vgen_ldclist_t	*ldclp;
4655 	vgen_ldc_t	*ldcp;
4656 	vgen_t		*vgenp = portp->vgenp;
4657 	vnet_t		*vnetp = vgenp->vnetp;
4658 
4659 	ldclp = &portp->ldclist;
4660 
4661 	READ_ENTER(&ldclp->rwlock);
4662 
4663 	/*
4664 	 * NOTE: for now, we will assume we have a single channel.
4665 	 */
4666 	if (ldclp->headp == NULL) {
4667 		RW_EXIT(&ldclp->rwlock);
4668 		return;
4669 	}
4670 	ldcp = ldclp->headp;
4671 
4672 	mutex_enter(&ldcp->cblock);
4673 
4674 	/*
4675 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4676 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4677 	 */
4678 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4679 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4680 		vgen_ldc_reset(ldcp);
4681 	}
4682 
4683 	mutex_exit(&ldcp->cblock);
4684 
4685 	RW_EXIT(&ldclp->rwlock);
4686 }
4687 
4688 static void
4689 vgen_port_reset(vgen_port_t *portp)
4690 {
4691 	vgen_ldclist_t	*ldclp;
4692 	vgen_ldc_t	*ldcp;
4693 
4694 	ldclp = &portp->ldclist;
4695 
4696 	READ_ENTER(&ldclp->rwlock);
4697 
4698 	/*
4699 	 * NOTE: for now, we will assume we have a single channel.
4700 	 */
4701 	if (ldclp->headp == NULL) {
4702 		RW_EXIT(&ldclp->rwlock);
4703 		return;
4704 	}
4705 	ldcp = ldclp->headp;
4706 
4707 	mutex_enter(&ldcp->cblock);
4708 
4709 	vgen_ldc_reset(ldcp);
4710 
4711 	mutex_exit(&ldcp->cblock);
4712 
4713 	RW_EXIT(&ldclp->rwlock);
4714 }
4715 
4716 static void
4717 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4718 {
4719 	vgen_port_t	*portp;
4720 	vgen_portlist_t	*plistp;
4721 
4722 	plistp = &(vgenp->vgenports);
4723 	READ_ENTER(&plistp->rwlock);
4724 
4725 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4726 
4727 		vgen_vlan_unaware_port_reset(portp);
4728 
4729 	}
4730 
4731 	RW_EXIT(&plistp->rwlock);
4732 }
4733 
4734 static void
4735 vgen_reset_vsw_port(vgen_t *vgenp)
4736 {
4737 	vgen_port_t	*portp;
4738 
4739 	if ((portp = vgenp->vsw_portp) != NULL) {
4740 		vgen_port_reset(portp);
4741 	}
4742 }
4743 
4744 /*
4745  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4746  * This can happen after a channel comes up (status: LDC_UP) or
4747  * when handshake gets terminated due to various conditions.
4748  */
4749 static void
4750 vgen_reset_hphase(vgen_ldc_t *ldcp)
4751 {
4752 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4753 	ldc_status_t istatus;
4754 	int rv;
4755 
4756 	DBG1(vgenp, ldcp, "enter\n");
4757 	/* reset hstate and hphase */
4758 	ldcp->hstate = 0;
4759 	ldcp->hphase = VH_PHASE0;
4760 
4761 	vgen_reset_vnet_proto_ops(ldcp);
4762 
4763 	/*
4764 	 * Save the id of pending handshake timer in cancel_htid.
4765 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4766 	 * be cancelled after releasing cblock.
4767 	 */
4768 	if (ldcp->htid) {
4769 		ldcp->cancel_htid = ldcp->htid;
4770 		ldcp->htid = 0;
4771 	}
4772 
4773 	if (ldcp->local_hparams.dring_ready) {
4774 		ldcp->local_hparams.dring_ready = B_FALSE;
4775 	}
4776 
4777 	/* Unbind tx descriptor ring from the channel */
4778 	if (ldcp->num_txdcookies) {
4779 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4780 		if (rv != 0) {
4781 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4782 		}
4783 		ldcp->num_txdcookies = 0;
4784 	}
4785 
4786 	if (ldcp->peer_hparams.dring_ready) {
4787 		ldcp->peer_hparams.dring_ready = B_FALSE;
4788 		/* Unmap peer's dring */
4789 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4790 		vgen_clobber_rxds(ldcp);
4791 	}
4792 
4793 	vgen_clobber_tbufs(ldcp);
4794 
4795 	/*
4796 	 * clear local handshake params and initialize.
4797 	 */
4798 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4799 
4800 	/* set version to the highest version supported */
4801 	ldcp->local_hparams.ver_major =
4802 	    ldcp->vgen_versions[0].ver_major;
4803 	ldcp->local_hparams.ver_minor =
4804 	    ldcp->vgen_versions[0].ver_minor;
4805 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4806 
4807 	/* set attr_info params */
4808 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4809 	ldcp->local_hparams.addr =
4810 	    vnet_macaddr_strtoul(vgenp->macaddr);
4811 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4812 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4813 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4814 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
4815 
4816 	/*
4817 	 * Note: dring is created, but not bound yet.
4818 	 * local dring_info params will be updated when we bind the dring in
4819 	 * vgen_handshake_phase2().
4820 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4821 	 * value and sends it in the ack, which is saved in
4822 	 * vgen_handle_dring_reg().
4823 	 */
4824 	ldcp->local_hparams.dring_ident = 0;
4825 
4826 	/* clear peer_hparams */
4827 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4828 
4829 	/* reset the channel if required */
4830 #ifdef	VNET_IOC_DEBUG
4831 	if (ldcp->need_ldc_reset && !ldcp->link_down_forced) {
4832 #else
4833 	if (ldcp->need_ldc_reset) {
4834 #endif
4835 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4836 		ldcp->need_ldc_reset = B_FALSE;
4837 		(void) ldc_down(ldcp->ldc_handle);
4838 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4839 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4840 		ldcp->ldc_status = istatus;
4841 
4842 		/* clear sids */
4843 		ldcp->local_sid = 0;
4844 		ldcp->peer_sid = 0;
4845 
4846 		/* try to bring the channel up */
4847 		rv = ldc_up(ldcp->ldc_handle);
4848 		if (rv != 0) {
4849 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4850 		}
4851 
4852 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4853 			DWARN(vgenp, ldcp, "ldc_status err\n");
4854 		} else {
4855 			ldcp->ldc_status = istatus;
4856 		}
4857 	}
4858 }
4859 
4860 /* wrapper function for vgen_reset_hphase */
4861 static void
4862 vgen_handshake_reset(vgen_ldc_t *ldcp)
4863 {
4864 	vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
4865 
4866 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4867 	mutex_enter(&ldcp->rxlock);
4868 	mutex_enter(&ldcp->wrlock);
4869 	mutex_enter(&ldcp->txlock);
4870 	mutex_enter(&ldcp->tclock);
4871 
4872 	vgen_reset_hphase(ldcp);
4873 
4874 	mutex_exit(&ldcp->tclock);
4875 	mutex_exit(&ldcp->txlock);
4876 	mutex_exit(&ldcp->wrlock);
4877 	mutex_exit(&ldcp->rxlock);
4878 
4879 	/*
4880 	 * As the connection is now reset, mark the channel
4881 	 * link_state as 'down' and notify the stack if needed.
4882 	 */
4883 	if (ldcp->link_state != LINK_STATE_DOWN) {
4884 		ldcp->link_state = LINK_STATE_DOWN;
4885 
4886 		if (ldcp->portp == vgenp->vsw_portp) { /* vswitch port ? */
4887 			/*
4888 			 * As the channel link is down, mark physical link also
4889 			 * as down. After the channel comes back up and
4890 			 * handshake completes, we will get an update on the
4891 			 * physlink state from vswitch (if this device has been
4892 			 * configured to get phys link updates).
4893 			 */
4894 			vgenp->phys_link_state = LINK_STATE_DOWN;
4895 
4896 			/* Now update the stack */
4897 			mutex_exit(&ldcp->cblock);
4898 			vgen_link_update(vgenp, ldcp->link_state);
4899 			mutex_enter(&ldcp->cblock);
4900 		}
4901 	}
4902 }
4903 
4904 /*
4905  * Initiate handshake with the peer by sending various messages
4906  * based on the handshake-phase that the channel is currently in.
4907  */
4908 static void
4909 vgen_handshake(vgen_ldc_t *ldcp)
4910 {
4911 	uint32_t	hphase = ldcp->hphase;
4912 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4913 	ldc_status_t	istatus;
4914 	int		rv = 0;
4915 
4916 	switch (hphase) {
4917 
4918 	case VH_PHASE1:
4919 
4920 		/*
4921 		 * start timer, for entire handshake process, turn this timer
4922 		 * off if all phases of handshake complete successfully and
4923 		 * hphase goes to VH_DONE(below) or
4924 		 * vgen_reset_hphase() gets called or
4925 		 * channel is reset due to errors or
4926 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4927 		 */
4928 		ASSERT(ldcp->htid == 0);
4929 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4930 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4931 
4932 		/* Phase 1 involves negotiating the version */
4933 		rv = vgen_send_version_negotiate(ldcp);
4934 		break;
4935 
4936 	case VH_PHASE2:
4937 		rv = vgen_handshake_phase2(ldcp);
4938 		break;
4939 
4940 	case VH_PHASE3:
4941 		rv = vgen_send_rdx_info(ldcp);
4942 		break;
4943 
4944 	case VH_DONE:
4945 		/*
4946 		 * Save the id of pending handshake timer in cancel_htid.
4947 		 * This will be checked in vgen_ldc_cb() and the handshake
4948 		 * timer will be cancelled after releasing cblock.
4949 		 */
4950 		if (ldcp->htid) {
4951 			ldcp->cancel_htid = ldcp->htid;
4952 			ldcp->htid = 0;
4953 		}
4954 		ldcp->hretries = 0;
4955 		DBG1(vgenp, ldcp, "Handshake Done\n");
4956 
4957 		/*
4958 		 * The channel is up and handshake is done successfully. Now we
4959 		 * can mark the channel link_state as 'up'. We also notify the
4960 		 * stack if the channel is connected to vswitch.
4961 		 */
4962 		ldcp->link_state = LINK_STATE_UP;
4963 
4964 		if (ldcp->portp == vgenp->vsw_portp) {
4965 			/*
4966 			 * If this channel(port) is connected to vsw,
4967 			 * need to sync multicast table with vsw.
4968 			 */
4969 			rv = vgen_send_mcast_info(ldcp);
4970 			if (rv != VGEN_SUCCESS) {
4971 				break;
4972 			}
4973 
4974 			if (vgenp->pls_negotiated == B_FALSE) {
4975 				/*
4976 				 * We haven't negotiated with vswitch to get
4977 				 * physical link state updates. We can update
4978 				 * update the stack at this point as the
4979 				 * channel to vswitch is up and the handshake
4980 				 * is done successfully.
4981 				 *
4982 				 * If we have negotiated to get physical link
4983 				 * state updates, then we won't notify the
4984 				 * the stack here; we do that as soon as
4985 				 * vswitch sends us the initial phys link state
4986 				 * (see vgen_handle_physlink_info()).
4987 				 */
4988 				mutex_exit(&ldcp->cblock);
4989 				vgen_link_update(vgenp, ldcp->link_state);
4990 				mutex_enter(&ldcp->cblock);
4991 			}
4992 
4993 		}
4994 
4995 		/*
4996 		 * Check if mac layer should be notified to restart
4997 		 * transmissions. This can happen if the channel got
4998 		 * reset and vgen_clobber_tbufs() is called, while
4999 		 * need_resched is set.
5000 		 */
5001 		mutex_enter(&ldcp->tclock);
5002 		if (ldcp->need_resched) {
5003 			vio_net_tx_update_t vtx_update =
5004 			    ldcp->portp->vcb.vio_net_tx_update;
5005 
5006 			ldcp->need_resched = B_FALSE;
5007 			vtx_update(ldcp->portp->vhp);
5008 		}
5009 		mutex_exit(&ldcp->tclock);
5010 
5011 		break;
5012 
5013 	default:
5014 		break;
5015 	}
5016 
5017 	if (rv == ECONNRESET) {
5018 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5019 			DWARN(vgenp, ldcp, "ldc_status err\n");
5020 		} else {
5021 			ldcp->ldc_status = istatus;
5022 		}
5023 		vgen_handle_evt_reset(ldcp);
5024 	} else if (rv) {
5025 		vgen_handshake_reset(ldcp);
5026 	}
5027 }
5028 
5029 /*
5030  * Check if the current handshake phase has completed successfully and
5031  * return the status.
5032  */
5033 static int
5034 vgen_handshake_done(vgen_ldc_t *ldcp)
5035 {
5036 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5037 	uint32_t	hphase = ldcp->hphase;
5038 	int 		status = 0;
5039 
5040 	switch (hphase) {
5041 
5042 	case VH_PHASE1:
5043 		/*
5044 		 * Phase1 is done, if version negotiation
5045 		 * completed successfully.
5046 		 */
5047 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
5048 		    VER_NEGOTIATED);
5049 		break;
5050 
5051 	case VH_PHASE2:
5052 		/*
5053 		 * Phase 2 is done, if attr info and dring info
5054 		 * have been exchanged successfully.
5055 		 */
5056 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
5057 		    ATTR_INFO_EXCHANGED) &&
5058 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
5059 		    DRING_INFO_EXCHANGED));
5060 		break;
5061 
5062 	case VH_PHASE3:
5063 		/* Phase 3 is done, if rdx msg has been exchanged */
5064 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
5065 		    RDX_EXCHANGED);
5066 		break;
5067 
5068 	default:
5069 		break;
5070 	}
5071 
5072 	if (status == 0) {
5073 		return (VGEN_FAILURE);
5074 	}
5075 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
5076 	return (VGEN_SUCCESS);
5077 }
5078 
5079 /* retry handshake on failure */
5080 static void
5081 vgen_handshake_retry(vgen_ldc_t *ldcp)
5082 {
5083 	/* reset handshake phase */
5084 	vgen_handshake_reset(ldcp);
5085 
5086 	/* handshake retry is specified and the channel is UP */
5087 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
5088 		if (ldcp->hretries++ < vgen_max_hretries) {
5089 			ldcp->local_sid = ddi_get_lbolt();
5090 			vgen_handshake(vh_nextphase(ldcp));
5091 		}
5092 	}
5093 }
5094 
5095 
5096 /*
5097  * Link State Update Notes:
5098  * The link state of the channel connected to vswitch is reported as the link
5099  * state of the vnet device, by default. If the channel is down or reset, then
5100  * the link state is marked 'down'. If the channel is 'up' *and* handshake
5101  * between the vnet and vswitch is successful, then the link state is marked
5102  * 'up'. If physical network link state is desired, then the vnet device must
5103  * be configured to get physical link updates and the 'linkprop' property
5104  * in the virtual-device MD node indicates this. As part of attribute exchange
5105  * the vnet device negotiates with the vswitch to obtain physical link state
5106  * updates. If it successfully negotiates, vswitch sends an initial physlink
5107  * msg once the handshake is done and further whenever the physical link state
5108  * changes. Currently we don't have mac layer interfaces to report two distinct
5109  * link states - virtual and physical. Thus, if the vnet has been configured to
5110  * get physical link updates, then the link status will be reported as 'up'
5111  * only when both the virtual and physical links are up.
5112  */
5113 static void
5114 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
5115 {
5116 	vnet_link_update(vgenp->vnetp, link_state);
5117 }
5118 
5119 /*
5120  * Handle a version info msg from the peer or an ACK/NACK from the peer
5121  * to a version info msg that we sent.
5122  */
5123 static int
5124 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5125 {
5126 	vgen_t		*vgenp;
5127 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
5128 	int		ack = 0;
5129 	int		failed = 0;
5130 	int		idx;
5131 	vgen_ver_t	*versions = ldcp->vgen_versions;
5132 	int		rv = 0;
5133 
5134 	vgenp = LDC_TO_VGEN(ldcp);
5135 	DBG1(vgenp, ldcp, "enter\n");
5136 	switch (tagp->vio_subtype) {
5137 	case VIO_SUBTYPE_INFO:
5138 
5139 		/*  Cache sid of peer if this is the first time */
5140 		if (ldcp->peer_sid == 0) {
5141 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
5142 			    tagp->vio_sid);
5143 			ldcp->peer_sid = tagp->vio_sid;
5144 		}
5145 
5146 		if (ldcp->hphase != VH_PHASE1) {
5147 			/*
5148 			 * If we are not already in VH_PHASE1, reset to
5149 			 * pre-handshake state, and initiate handshake
5150 			 * to the peer too.
5151 			 */
5152 			vgen_handshake_reset(ldcp);
5153 			vgen_handshake(vh_nextphase(ldcp));
5154 		}
5155 		ldcp->hstate |= VER_INFO_RCVD;
5156 
5157 		/* save peer's requested values */
5158 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
5159 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
5160 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
5161 
5162 		if ((vermsg->dev_class != VDEV_NETWORK) &&
5163 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
5164 			/* unsupported dev_class, send NACK */
5165 
5166 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5167 
5168 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5169 			tagp->vio_sid = ldcp->local_sid;
5170 			/* send reply msg back to peer */
5171 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5172 			    sizeof (*vermsg), B_FALSE);
5173 			if (rv != VGEN_SUCCESS) {
5174 				return (rv);
5175 			}
5176 			return (VGEN_FAILURE);
5177 		}
5178 
5179 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
5180 		    vermsg->ver_major,  vermsg->ver_minor);
5181 
5182 		idx = 0;
5183 
5184 		for (;;) {
5185 
5186 			if (vermsg->ver_major > versions[idx].ver_major) {
5187 
5188 				/* nack with next lower version */
5189 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5190 				vermsg->ver_major = versions[idx].ver_major;
5191 				vermsg->ver_minor = versions[idx].ver_minor;
5192 				break;
5193 			}
5194 
5195 			if (vermsg->ver_major == versions[idx].ver_major) {
5196 
5197 				/* major version match - ACK version */
5198 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
5199 				ack = 1;
5200 
5201 				/*
5202 				 * lower minor version to the one this endpt
5203 				 * supports, if necessary
5204 				 */
5205 				if (vermsg->ver_minor >
5206 				    versions[idx].ver_minor) {
5207 					vermsg->ver_minor =
5208 					    versions[idx].ver_minor;
5209 					ldcp->peer_hparams.ver_minor =
5210 					    versions[idx].ver_minor;
5211 				}
5212 				break;
5213 			}
5214 
5215 			idx++;
5216 
5217 			if (idx == VGEN_NUM_VER) {
5218 
5219 				/* no version match - send NACK */
5220 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5221 				vermsg->ver_major = 0;
5222 				vermsg->ver_minor = 0;
5223 				failed = 1;
5224 				break;
5225 			}
5226 
5227 		}
5228 
5229 		tagp->vio_sid = ldcp->local_sid;
5230 
5231 		/* send reply msg back to peer */
5232 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
5233 		    B_FALSE);
5234 		if (rv != VGEN_SUCCESS) {
5235 			return (rv);
5236 		}
5237 
5238 		if (ack) {
5239 			ldcp->hstate |= VER_ACK_SENT;
5240 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
5241 			    vermsg->ver_major, vermsg->ver_minor);
5242 		}
5243 		if (failed) {
5244 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
5245 			return (VGEN_FAILURE);
5246 		}
5247 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5248 
5249 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5250 
5251 			/* local and peer versions match? */
5252 			ASSERT((ldcp->local_hparams.ver_major ==
5253 			    ldcp->peer_hparams.ver_major) &&
5254 			    (ldcp->local_hparams.ver_minor ==
5255 			    ldcp->peer_hparams.ver_minor));
5256 
5257 			vgen_set_vnet_proto_ops(ldcp);
5258 
5259 			/* move to the next phase */
5260 			vgen_handshake(vh_nextphase(ldcp));
5261 		}
5262 
5263 		break;
5264 
5265 	case VIO_SUBTYPE_ACK:
5266 
5267 		if (ldcp->hphase != VH_PHASE1) {
5268 			/*  This should not happen. */
5269 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
5270 			return (VGEN_FAILURE);
5271 		}
5272 
5273 		/* SUCCESS - we have agreed on a version */
5274 		ldcp->local_hparams.ver_major = vermsg->ver_major;
5275 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
5276 		ldcp->hstate |= VER_ACK_RCVD;
5277 
5278 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
5279 		    vermsg->ver_major,  vermsg->ver_minor);
5280 
5281 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5282 
5283 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5284 
5285 			/* local and peer versions match? */
5286 			ASSERT((ldcp->local_hparams.ver_major ==
5287 			    ldcp->peer_hparams.ver_major) &&
5288 			    (ldcp->local_hparams.ver_minor ==
5289 			    ldcp->peer_hparams.ver_minor));
5290 
5291 			vgen_set_vnet_proto_ops(ldcp);
5292 
5293 			/* move to the next phase */
5294 			vgen_handshake(vh_nextphase(ldcp));
5295 		}
5296 		break;
5297 
5298 	case VIO_SUBTYPE_NACK:
5299 
5300 		if (ldcp->hphase != VH_PHASE1) {
5301 			/*  This should not happen.  */
5302 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
5303 			"Phase(%u)\n", ldcp->hphase);
5304 			return (VGEN_FAILURE);
5305 		}
5306 
5307 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
5308 		    vermsg->ver_major, vermsg->ver_minor);
5309 
5310 		/* check if version in NACK is zero */
5311 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
5312 			/*
5313 			 * Version Negotiation has failed.
5314 			 */
5315 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5316 			return (VGEN_FAILURE);
5317 		}
5318 
5319 		idx = 0;
5320 
5321 		for (;;) {
5322 
5323 			if (vermsg->ver_major > versions[idx].ver_major) {
5324 				/* select next lower version */
5325 
5326 				ldcp->local_hparams.ver_major =
5327 				    versions[idx].ver_major;
5328 				ldcp->local_hparams.ver_minor =
5329 				    versions[idx].ver_minor;
5330 				break;
5331 			}
5332 
5333 			if (vermsg->ver_major == versions[idx].ver_major) {
5334 				/* major version match */
5335 
5336 				ldcp->local_hparams.ver_major =
5337 				    versions[idx].ver_major;
5338 
5339 				ldcp->local_hparams.ver_minor =
5340 				    versions[idx].ver_minor;
5341 				break;
5342 			}
5343 
5344 			idx++;
5345 
5346 			if (idx == VGEN_NUM_VER) {
5347 				/*
5348 				 * no version match.
5349 				 * Version Negotiation has failed.
5350 				 */
5351 				DWARN(vgenp, ldcp,
5352 				    "Version Negotiation Failed\n");
5353 				return (VGEN_FAILURE);
5354 			}
5355 
5356 		}
5357 
5358 		rv = vgen_send_version_negotiate(ldcp);
5359 		if (rv != VGEN_SUCCESS) {
5360 			return (rv);
5361 		}
5362 
5363 		break;
5364 	}
5365 
5366 	DBG1(vgenp, ldcp, "exit\n");
5367 	return (VGEN_SUCCESS);
5368 }
5369 
5370 /* Check if the attributes are supported */
5371 static int
5372 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5373 {
5374 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5375 
5376 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5377 	    (msg->ack_freq > 64) ||
5378 	    (msg->xfer_mode != lp->xfer_mode)) {
5379 		return (VGEN_FAILURE);
5380 	}
5381 
5382 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5383 		/* versions < 1.4, mtu must match */
5384 		if (msg->mtu != lp->mtu) {
5385 			return (VGEN_FAILURE);
5386 		}
5387 	} else {
5388 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5389 		if (msg->mtu < ETHERMAX) {
5390 			return (VGEN_FAILURE);
5391 		}
5392 	}
5393 
5394 	return (VGEN_SUCCESS);
5395 }
5396 
5397 /*
5398  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5399  * to an attr info msg that we sent.
5400  */
5401 static int
5402 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5403 {
5404 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5405 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5406 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5407 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5408 	int		ack = 1;
5409 	int		rv = 0;
5410 	uint32_t	mtu;
5411 
5412 	DBG1(vgenp, ldcp, "enter\n");
5413 	if (ldcp->hphase != VH_PHASE2) {
5414 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5415 		" Invalid Phase(%u)\n",
5416 		    tagp->vio_subtype, ldcp->hphase);
5417 		return (VGEN_FAILURE);
5418 	}
5419 	switch (tagp->vio_subtype) {
5420 	case VIO_SUBTYPE_INFO:
5421 
5422 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5423 		ldcp->hstate |= ATTR_INFO_RCVD;
5424 
5425 		/* save peer's values */
5426 		rp->mtu = msg->mtu;
5427 		rp->addr = msg->addr;
5428 		rp->addr_type = msg->addr_type;
5429 		rp->xfer_mode = msg->xfer_mode;
5430 		rp->ack_freq = msg->ack_freq;
5431 
5432 		rv = vgen_check_attr_info(ldcp, msg);
5433 		if (rv == VGEN_FAILURE) {
5434 			/* unsupported attr, send NACK */
5435 			ack = 0;
5436 		} else {
5437 
5438 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5439 
5440 				/*
5441 				 * Versions >= 1.4:
5442 				 * The mtu is negotiated down to the
5443 				 * minimum of our mtu and peer's mtu.
5444 				 */
5445 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5446 
5447 				/*
5448 				 * If we have received an ack for the attr info
5449 				 * that we sent, then check if the mtu computed
5450 				 * above matches the mtu that the peer had ack'd
5451 				 * (saved in local hparams). If they don't
5452 				 * match, we fail the handshake.
5453 				 */
5454 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5455 					if (mtu != lp->mtu) {
5456 						/* send NACK */
5457 						ack = 0;
5458 					}
5459 				} else {
5460 					/*
5461 					 * Save the mtu computed above in our
5462 					 * attr parameters, so it gets sent in
5463 					 * the attr info from us to the peer.
5464 					 */
5465 					lp->mtu = mtu;
5466 				}
5467 
5468 				/* save the MIN mtu in the msg to be replied */
5469 				msg->mtu = mtu;
5470 
5471 			}
5472 		}
5473 
5474 
5475 		if (ack) {
5476 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5477 		} else {
5478 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5479 		}
5480 		tagp->vio_sid = ldcp->local_sid;
5481 
5482 		/* send reply msg back to peer */
5483 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5484 		    B_FALSE);
5485 		if (rv != VGEN_SUCCESS) {
5486 			return (rv);
5487 		}
5488 
5489 		if (ack) {
5490 			ldcp->hstate |= ATTR_ACK_SENT;
5491 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5492 		} else {
5493 			/* failed */
5494 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5495 			return (VGEN_FAILURE);
5496 		}
5497 
5498 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5499 			vgen_handshake(vh_nextphase(ldcp));
5500 		}
5501 
5502 		break;
5503 
5504 	case VIO_SUBTYPE_ACK:
5505 
5506 		if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
5507 		    ldcp->portp == vgenp->vsw_portp) {
5508 			/*
5509 			 * Versions >= 1.5:
5510 			 * If the vnet device has been configured to get
5511 			 * physical link state updates, check the corresponding
5512 			 * bits in the ack msg, if the peer is vswitch.
5513 			 */
5514 			if (((lp->physlink_update &
5515 			    PHYSLINK_UPDATE_STATE_MASK) ==
5516 			    PHYSLINK_UPDATE_STATE) &&
5517 
5518 			    ((msg->physlink_update &
5519 			    PHYSLINK_UPDATE_STATE_MASK) ==
5520 			    PHYSLINK_UPDATE_STATE_ACK)) {
5521 				vgenp->pls_negotiated = B_TRUE;
5522 			} else {
5523 				vgenp->pls_negotiated = B_FALSE;
5524 			}
5525 		}
5526 
5527 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5528 			/*
5529 			 * Versions >= 1.4:
5530 			 * The ack msg sent by the peer contains the minimum of
5531 			 * our mtu (that we had sent in our attr info) and the
5532 			 * peer's mtu.
5533 			 *
5534 			 * If we have sent an ack for the attr info msg from
5535 			 * the peer, check if the mtu that was computed then
5536 			 * (saved in local hparams) matches the mtu that the
5537 			 * peer has ack'd. If they don't match, we fail the
5538 			 * handshake.
5539 			 */
5540 			if (ldcp->hstate & ATTR_ACK_SENT) {
5541 				if (lp->mtu != msg->mtu) {
5542 					return (VGEN_FAILURE);
5543 				}
5544 			} else {
5545 				/*
5546 				 * If the mtu ack'd by the peer is > our mtu
5547 				 * fail handshake. Otherwise, save the mtu, so
5548 				 * we can validate it when we receive attr info
5549 				 * from our peer.
5550 				 */
5551 				if (msg->mtu > lp->mtu) {
5552 					return (VGEN_FAILURE);
5553 				}
5554 				if (msg->mtu <= lp->mtu) {
5555 					lp->mtu = msg->mtu;
5556 				}
5557 			}
5558 		}
5559 
5560 		ldcp->hstate |= ATTR_ACK_RCVD;
5561 
5562 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5563 
5564 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5565 			vgen_handshake(vh_nextphase(ldcp));
5566 		}
5567 		break;
5568 
5569 	case VIO_SUBTYPE_NACK:
5570 
5571 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5572 		return (VGEN_FAILURE);
5573 	}
5574 	DBG1(vgenp, ldcp, "exit\n");
5575 	return (VGEN_SUCCESS);
5576 }
5577 
5578 /* Check if the dring info msg is ok */
5579 static int
5580 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5581 {
5582 	/* check if msg contents are ok */
5583 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5584 	    sizeof (vnet_public_desc_t))) {
5585 		return (VGEN_FAILURE);
5586 	}
5587 	return (VGEN_SUCCESS);
5588 }
5589 
5590 /*
5591  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5592  * the peer to a dring register msg that we sent.
5593  */
5594 static int
5595 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5596 {
5597 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5598 	ldc_mem_cookie_t dcookie;
5599 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5600 	int ack = 0;
5601 	int rv = 0;
5602 
5603 	DBG1(vgenp, ldcp, "enter\n");
5604 	if (ldcp->hphase < VH_PHASE2) {
5605 		/* dring_info can be rcvd in any of the phases after Phase1 */
5606 		DWARN(vgenp, ldcp,
5607 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5608 		    tagp->vio_subtype, ldcp->hphase);
5609 		return (VGEN_FAILURE);
5610 	}
5611 	switch (tagp->vio_subtype) {
5612 	case VIO_SUBTYPE_INFO:
5613 
5614 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5615 		ldcp->hstate |= DRING_INFO_RCVD;
5616 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5617 
5618 		ASSERT(msg->ncookies == 1);
5619 
5620 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5621 			/*
5622 			 * verified dring info msg to be ok,
5623 			 * now try to map the remote dring.
5624 			 */
5625 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5626 			    msg->descriptor_size, &dcookie,
5627 			    msg->ncookies);
5628 			if (rv == DDI_SUCCESS) {
5629 				/* now we can ack the peer */
5630 				ack = 1;
5631 			}
5632 		}
5633 		if (ack == 0) {
5634 			/* failed, send NACK */
5635 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5636 		} else {
5637 			if (!(ldcp->peer_hparams.dring_ready)) {
5638 
5639 				/* save peer's dring_info values */
5640 				bcopy(&dcookie,
5641 				    &(ldcp->peer_hparams.dring_cookie),
5642 				    sizeof (dcookie));
5643 				ldcp->peer_hparams.num_desc =
5644 				    msg->num_descriptors;
5645 				ldcp->peer_hparams.desc_size =
5646 				    msg->descriptor_size;
5647 				ldcp->peer_hparams.num_dcookies =
5648 				    msg->ncookies;
5649 
5650 				/* set dring_ident for the peer */
5651 				ldcp->peer_hparams.dring_ident =
5652 				    (uint64_t)ldcp->rxdp;
5653 				/* return the dring_ident in ack msg */
5654 				msg->dring_ident =
5655 				    (uint64_t)ldcp->rxdp;
5656 
5657 				ldcp->peer_hparams.dring_ready = B_TRUE;
5658 			}
5659 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5660 		}
5661 		tagp->vio_sid = ldcp->local_sid;
5662 		/* send reply msg back to peer */
5663 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5664 		    B_FALSE);
5665 		if (rv != VGEN_SUCCESS) {
5666 			return (rv);
5667 		}
5668 
5669 		if (ack) {
5670 			ldcp->hstate |= DRING_ACK_SENT;
5671 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5672 		} else {
5673 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5674 			return (VGEN_FAILURE);
5675 		}
5676 
5677 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5678 			vgen_handshake(vh_nextphase(ldcp));
5679 		}
5680 
5681 		break;
5682 
5683 	case VIO_SUBTYPE_ACK:
5684 
5685 		ldcp->hstate |= DRING_ACK_RCVD;
5686 
5687 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5688 
5689 		if (!(ldcp->local_hparams.dring_ready)) {
5690 			/* local dring is now ready */
5691 			ldcp->local_hparams.dring_ready = B_TRUE;
5692 
5693 			/* save dring_ident acked by peer */
5694 			ldcp->local_hparams.dring_ident =
5695 			    msg->dring_ident;
5696 		}
5697 
5698 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5699 			vgen_handshake(vh_nextphase(ldcp));
5700 		}
5701 
5702 		break;
5703 
5704 	case VIO_SUBTYPE_NACK:
5705 
5706 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5707 		return (VGEN_FAILURE);
5708 	}
5709 	DBG1(vgenp, ldcp, "exit\n");
5710 	return (VGEN_SUCCESS);
5711 }
5712 
5713 /*
5714  * Handle a rdx info msg from the peer or an ACK/NACK
5715  * from the peer to a rdx info msg that we sent.
5716  */
5717 static int
5718 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5719 {
5720 	int rv = 0;
5721 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5722 
5723 	DBG1(vgenp, ldcp, "enter\n");
5724 	if (ldcp->hphase != VH_PHASE3) {
5725 		DWARN(vgenp, ldcp,
5726 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5727 		    tagp->vio_subtype, ldcp->hphase);
5728 		return (VGEN_FAILURE);
5729 	}
5730 	switch (tagp->vio_subtype) {
5731 	case VIO_SUBTYPE_INFO:
5732 
5733 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5734 		ldcp->hstate |= RDX_INFO_RCVD;
5735 
5736 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5737 		tagp->vio_sid = ldcp->local_sid;
5738 		/* send reply msg back to peer */
5739 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5740 		    B_FALSE);
5741 		if (rv != VGEN_SUCCESS) {
5742 			return (rv);
5743 		}
5744 
5745 		ldcp->hstate |= RDX_ACK_SENT;
5746 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5747 
5748 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5749 			vgen_handshake(vh_nextphase(ldcp));
5750 		}
5751 
5752 		break;
5753 
5754 	case VIO_SUBTYPE_ACK:
5755 
5756 		ldcp->hstate |= RDX_ACK_RCVD;
5757 
5758 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5759 
5760 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5761 			vgen_handshake(vh_nextphase(ldcp));
5762 		}
5763 		break;
5764 
5765 	case VIO_SUBTYPE_NACK:
5766 
5767 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5768 		return (VGEN_FAILURE);
5769 	}
5770 	DBG1(vgenp, ldcp, "exit\n");
5771 	return (VGEN_SUCCESS);
5772 }
5773 
5774 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5775 static int
5776 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5777 {
5778 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5779 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5780 	struct ether_addr *addrp;
5781 	int count;
5782 	int i;
5783 
5784 	DBG1(vgenp, ldcp, "enter\n");
5785 	switch (tagp->vio_subtype) {
5786 
5787 	case VIO_SUBTYPE_INFO:
5788 
5789 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5790 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5791 		break;
5792 
5793 	case VIO_SUBTYPE_ACK:
5794 
5795 		/* success adding/removing multicast addr */
5796 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5797 		break;
5798 
5799 	case VIO_SUBTYPE_NACK:
5800 
5801 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5802 		if (!(msgp->set)) {
5803 			/* multicast remove request failed */
5804 			break;
5805 		}
5806 
5807 		/* multicast add request failed */
5808 		for (count = 0; count < msgp->count; count++) {
5809 			addrp = &(msgp->mca[count]);
5810 
5811 			/* delete address from the table */
5812 			for (i = 0; i < vgenp->mccount; i++) {
5813 				if (ether_cmp(addrp,
5814 				    &(vgenp->mctab[i])) == 0) {
5815 					if (vgenp->mccount > 1) {
5816 						int t = vgenp->mccount - 1;
5817 						vgenp->mctab[i] =
5818 						    vgenp->mctab[t];
5819 					}
5820 					vgenp->mccount--;
5821 					break;
5822 				}
5823 			}
5824 		}
5825 		break;
5826 
5827 	}
5828 	DBG1(vgenp, ldcp, "exit\n");
5829 
5830 	return (VGEN_SUCCESS);
5831 }
5832 
5833 /*
5834  * Physical link information message from the peer. Only vswitch should send
5835  * us this message; if the vnet device has been configured to get physical link
5836  * state updates. Note that we must have already negotiated this with the
5837  * vswitch during attribute exchange phase of handshake.
5838  */
5839 static int
5840 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5841 {
5842 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5843 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5844 	link_state_t		link_state;
5845 	int			rv;
5846 
5847 	if (ldcp->portp != vgenp->vsw_portp) {
5848 		/*
5849 		 * drop the message and don't process; as we should
5850 		 * receive physlink_info message from only vswitch.
5851 		 */
5852 		return (VGEN_SUCCESS);
5853 	}
5854 
5855 	if (vgenp->pls_negotiated == B_FALSE) {
5856 		/*
5857 		 * drop the message and don't process; as we should receive
5858 		 * physlink_info message only if physlink update is enabled for
5859 		 * the device and negotiated with vswitch.
5860 		 */
5861 		return (VGEN_SUCCESS);
5862 	}
5863 
5864 	switch (tagp->vio_subtype) {
5865 
5866 	case VIO_SUBTYPE_INFO:
5867 
5868 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5869 		    VNET_PHYSLINK_STATE_UP) {
5870 			link_state = LINK_STATE_UP;
5871 		} else {
5872 			link_state = LINK_STATE_DOWN;
5873 		}
5874 
5875 		if (vgenp->phys_link_state != link_state) {
5876 			vgenp->phys_link_state = link_state;
5877 			mutex_exit(&ldcp->cblock);
5878 
5879 			/* Now update the stack */
5880 			vgen_link_update(vgenp, link_state);
5881 
5882 			mutex_enter(&ldcp->cblock);
5883 		}
5884 
5885 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5886 		tagp->vio_sid = ldcp->local_sid;
5887 
5888 		/* send reply msg back to peer */
5889 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5890 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5891 		if (rv != VGEN_SUCCESS) {
5892 			return (rv);
5893 		}
5894 		break;
5895 
5896 	case VIO_SUBTYPE_ACK:
5897 
5898 		/* vnet shouldn't recv physlink acks */
5899 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5900 		break;
5901 
5902 	case VIO_SUBTYPE_NACK:
5903 
5904 		/* vnet shouldn't recv physlink nacks */
5905 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5906 		break;
5907 
5908 	}
5909 	DBG1(vgenp, ldcp, "exit\n");
5910 
5911 	return (VGEN_SUCCESS);
5912 }
5913 
5914 /* handler for control messages received from the peer ldc end-point */
5915 static int
5916 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5917 {
5918 	int rv = 0;
5919 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5920 
5921 	DBG1(vgenp, ldcp, "enter\n");
5922 	switch (tagp->vio_subtype_env) {
5923 
5924 	case VIO_VER_INFO:
5925 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5926 		break;
5927 
5928 	case VIO_ATTR_INFO:
5929 		rv = vgen_handle_attr_info(ldcp, tagp);
5930 		break;
5931 
5932 	case VIO_DRING_REG:
5933 		rv = vgen_handle_dring_reg(ldcp, tagp);
5934 		break;
5935 
5936 	case VIO_RDX:
5937 		rv = vgen_handle_rdx_info(ldcp, tagp);
5938 		break;
5939 
5940 	case VNET_MCAST_INFO:
5941 		rv = vgen_handle_mcast_info(ldcp, tagp);
5942 		break;
5943 
5944 	case VIO_DDS_INFO:
5945 		/*
5946 		 * If we are in the process of resetting the vswitch channel,
5947 		 * drop the dds message. A new handshake will be initiated
5948 		 * when the channel comes back up after the reset and dds
5949 		 * negotiation can then continue.
5950 		 */
5951 		if (ldcp->need_ldc_reset == B_TRUE) {
5952 			break;
5953 		}
5954 		rv = vgen_dds_rx(ldcp, tagp);
5955 		break;
5956 
5957 	case VNET_PHYSLINK_INFO:
5958 		rv = vgen_handle_physlink_info(ldcp, tagp);
5959 		break;
5960 	}
5961 
5962 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5963 	return (rv);
5964 }
5965 
5966 /* handler for data messages received from the peer ldc end-point */
5967 static int
5968 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5969 {
5970 	int rv = 0;
5971 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5972 
5973 	DBG1(vgenp, ldcp, "enter\n");
5974 
5975 	if (ldcp->hphase != VH_DONE)
5976 		return (rv);
5977 
5978 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5979 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5980 		if (rv != 0) {
5981 			return (rv);
5982 		}
5983 	}
5984 
5985 	switch (tagp->vio_subtype_env) {
5986 	case VIO_DRING_DATA:
5987 		rv = vgen_handle_dring_data(ldcp, tagp);
5988 		break;
5989 
5990 	case VIO_PKT_DATA:
5991 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5992 		break;
5993 	default:
5994 		break;
5995 	}
5996 
5997 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5998 	return (rv);
5999 }
6000 
6001 /*
6002  * dummy pkt data handler function for vnet protocol version 1.0
6003  */
6004 static void
6005 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
6006 {
6007 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
6008 }
6009 
6010 /*
6011  * This function handles raw pkt data messages received over the channel.
6012  * Currently, only priority-eth-type frames are received through this mechanism.
6013  * In this case, the frame(data) is present within the message itself which
6014  * is copied into an mblk before sending it up the stack.
6015  */
6016 static void
6017 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
6018 {
6019 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
6020 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
6021 	uint32_t		size;
6022 	mblk_t			*mp;
6023 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6024 	vgen_stats_t		*statsp = &ldcp->stats;
6025 	vgen_hparams_t		*lp = &ldcp->local_hparams;
6026 	vio_net_rx_cb_t		vrx_cb;
6027 
6028 	ASSERT(MUTEX_HELD(&ldcp->cblock));
6029 
6030 	mutex_exit(&ldcp->cblock);
6031 
6032 	size = msglen - VIO_PKT_DATA_HDRSIZE;
6033 	if (size < ETHERMIN || size > lp->mtu) {
6034 		(void) atomic_inc_32(&statsp->rx_pri_fail);
6035 		goto exit;
6036 	}
6037 
6038 	mp = vio_multipool_allocb(&ldcp->vmp, size);
6039 	if (mp == NULL) {
6040 		mp = allocb(size, BPRI_MED);
6041 		if (mp == NULL) {
6042 			(void) atomic_inc_32(&statsp->rx_pri_fail);
6043 			DWARN(vgenp, ldcp, "allocb failure, "
6044 			    "unable to process priority frame\n");
6045 			goto exit;
6046 		}
6047 	}
6048 
6049 	/* copy the frame from the payload of raw data msg into the mblk */
6050 	bcopy(pkt->data, mp->b_rptr, size);
6051 	mp->b_wptr = mp->b_rptr + size;
6052 
6053 	/* update stats */
6054 	(void) atomic_inc_64(&statsp->rx_pri_packets);
6055 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
6056 
6057 	/* send up; call vrx_cb() as cblock is already released */
6058 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6059 	vrx_cb(ldcp->portp->vhp, mp);
6060 
6061 exit:
6062 	mutex_enter(&ldcp->cblock);
6063 }
6064 
6065 static int
6066 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
6067     int32_t end, uint8_t pstate)
6068 {
6069 	int rv = 0;
6070 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6071 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
6072 
6073 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
6074 	tagp->vio_sid = ldcp->local_sid;
6075 	msgp->start_idx = start;
6076 	msgp->end_idx = end;
6077 	msgp->dring_process_state = pstate;
6078 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
6079 	if (rv != VGEN_SUCCESS) {
6080 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
6081 	}
6082 	return (rv);
6083 }
6084 
6085 static int
6086 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6087 {
6088 	int rv = 0;
6089 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6090 
6091 
6092 	DBG1(vgenp, ldcp, "enter\n");
6093 	switch (tagp->vio_subtype) {
6094 
6095 	case VIO_SUBTYPE_INFO:
6096 		/*
6097 		 * To reduce the locking contention, release the
6098 		 * cblock here and re-acquire it once we are done
6099 		 * receiving packets.
6100 		 */
6101 		mutex_exit(&ldcp->cblock);
6102 		mutex_enter(&ldcp->rxlock);
6103 		rv = vgen_handle_dring_data_info(ldcp, tagp);
6104 		mutex_exit(&ldcp->rxlock);
6105 		mutex_enter(&ldcp->cblock);
6106 		break;
6107 
6108 	case VIO_SUBTYPE_ACK:
6109 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
6110 		break;
6111 
6112 	case VIO_SUBTYPE_NACK:
6113 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
6114 		break;
6115 	}
6116 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6117 	return (rv);
6118 }
6119 
6120 static int
6121 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6122 {
6123 	uint32_t start;
6124 	int32_t end;
6125 	int rv = 0;
6126 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6127 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6128 #ifdef VGEN_HANDLE_LOST_PKTS
6129 	vgen_stats_t *statsp = &ldcp->stats;
6130 	uint32_t rxi;
6131 	int n;
6132 #endif
6133 
6134 	DBG1(vgenp, ldcp, "enter\n");
6135 
6136 	start = dringmsg->start_idx;
6137 	end = dringmsg->end_idx;
6138 	/*
6139 	 * received a data msg, which contains the start and end
6140 	 * indices of the descriptors within the rx ring holding data,
6141 	 * the seq_num of data packet corresponding to the start index,
6142 	 * and the dring_ident.
6143 	 * We can now read the contents of each of these descriptors
6144 	 * and gather data from it.
6145 	 */
6146 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
6147 	    start, end);
6148 
6149 	/* validate rx start and end indeces */
6150 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
6151 	    !(CHECK_RXI(end, ldcp)))) {
6152 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
6153 		    start, end);
6154 		/* drop the message if invalid index */
6155 		return (rv);
6156 	}
6157 
6158 	/* validate dring_ident */
6159 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
6160 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6161 		    dringmsg->dring_ident);
6162 		/* invalid dring_ident, drop the msg */
6163 		return (rv);
6164 	}
6165 #ifdef DEBUG
6166 	if (vgen_trigger_rxlost) {
6167 		/* drop this msg to simulate lost pkts for debugging */
6168 		vgen_trigger_rxlost = 0;
6169 		return (rv);
6170 	}
6171 #endif
6172 
6173 #ifdef	VGEN_HANDLE_LOST_PKTS
6174 
6175 	/* receive start index doesn't match expected index */
6176 	if (ldcp->next_rxi != start) {
6177 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
6178 		    ldcp->next_rxi, start);
6179 
6180 		/* calculate the number of pkts lost */
6181 		if (start >= ldcp->next_rxi) {
6182 			n = start - ldcp->next_rxi;
6183 		} else  {
6184 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
6185 		}
6186 
6187 		statsp->rx_lost_pkts += n;
6188 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
6189 		tagp->vio_sid = ldcp->local_sid;
6190 		/* indicate the range of lost descriptors */
6191 		dringmsg->start_idx = ldcp->next_rxi;
6192 		rxi = start;
6193 		DECR_RXI(rxi, ldcp);
6194 		dringmsg->end_idx = rxi;
6195 		/* dring ident is left unchanged */
6196 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
6197 		    sizeof (*dringmsg), B_FALSE);
6198 		if (rv != VGEN_SUCCESS) {
6199 			DWARN(vgenp, ldcp,
6200 			    "vgen_sendmsg failed, stype:NACK\n");
6201 			return (rv);
6202 		}
6203 		/*
6204 		 * treat this range of descrs/pkts as dropped
6205 		 * and set the new expected value of next_rxi
6206 		 * and continue(below) to process from the new
6207 		 * start index.
6208 		 */
6209 		ldcp->next_rxi = start;
6210 	}
6211 
6212 #endif	/* VGEN_HANDLE_LOST_PKTS */
6213 
6214 	/* Now receive messages */
6215 	rv = vgen_process_dring_data(ldcp, tagp);
6216 
6217 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6218 	return (rv);
6219 }
6220 
6221 static int
6222 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6223 {
6224 	boolean_t set_ack_start = B_FALSE;
6225 	uint32_t start;
6226 	uint32_t ack_end;
6227 	uint32_t next_rxi;
6228 	uint32_t rxi;
6229 	int count = 0;
6230 	int rv = 0;
6231 	uint32_t retries = 0;
6232 	vgen_stats_t *statsp;
6233 	vnet_public_desc_t rxd;
6234 	vio_dring_entry_hdr_t *hdrp;
6235 	mblk_t *bp = NULL;
6236 	mblk_t *bpt = NULL;
6237 	uint32_t ack_start;
6238 	boolean_t rxd_err = B_FALSE;
6239 	mblk_t *mp = NULL;
6240 	size_t nbytes;
6241 	boolean_t ack_needed = B_FALSE;
6242 	size_t nread;
6243 	uint64_t off = 0;
6244 	struct ether_header *ehp;
6245 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6246 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6247 	vgen_hparams_t	*lp = &ldcp->local_hparams;
6248 
6249 	DBG1(vgenp, ldcp, "enter\n");
6250 
6251 	statsp = &ldcp->stats;
6252 	start = dringmsg->start_idx;
6253 
6254 	/*
6255 	 * start processing the descriptors from the specified
6256 	 * start index, up to the index a descriptor is not ready
6257 	 * to be processed or we process the entire descriptor ring
6258 	 * and wrap around upto the start index.
6259 	 */
6260 
6261 	/* need to set the start index of descriptors to be ack'd */
6262 	set_ack_start = B_TRUE;
6263 
6264 	/* index upto which we have ack'd */
6265 	ack_end = start;
6266 	DECR_RXI(ack_end, ldcp);
6267 
6268 	next_rxi = rxi =  start;
6269 	do {
6270 vgen_recv_retry:
6271 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
6272 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
6273 		if (rv != 0) {
6274 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
6275 			    " rv(%d)\n", rv);
6276 			statsp->ierrors++;
6277 			return (rv);
6278 		}
6279 
6280 		hdrp = &rxd.hdr;
6281 
6282 		if (hdrp->dstate != VIO_DESC_READY) {
6283 			/*
6284 			 * Before waiting and retry here, send up
6285 			 * the packets that are received already
6286 			 */
6287 			if (bp != NULL) {
6288 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6289 				vgen_rx(ldcp, bp, bpt);
6290 				count = 0;
6291 				bp = bpt = NULL;
6292 			}
6293 			/*
6294 			 * descriptor is not ready.
6295 			 * retry descriptor acquire, stop processing
6296 			 * after max # retries.
6297 			 */
6298 			if (retries == vgen_recv_retries)
6299 				break;
6300 			retries++;
6301 			drv_usecwait(vgen_recv_delay);
6302 			goto vgen_recv_retry;
6303 		}
6304 		retries = 0;
6305 
6306 		if (set_ack_start) {
6307 			/*
6308 			 * initialize the start index of the range
6309 			 * of descriptors to be ack'd.
6310 			 */
6311 			ack_start = rxi;
6312 			set_ack_start = B_FALSE;
6313 		}
6314 
6315 		if ((rxd.nbytes < ETHERMIN) ||
6316 		    (rxd.nbytes > lp->mtu) ||
6317 		    (rxd.ncookies == 0) ||
6318 		    (rxd.ncookies > MAX_COOKIES)) {
6319 			rxd_err = B_TRUE;
6320 		} else {
6321 			/*
6322 			 * Try to allocate an mblk from the free pool
6323 			 * of recv mblks for the channel.
6324 			 * If this fails, use allocb().
6325 			 */
6326 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
6327 			if (nbytes > ldcp->max_rxpool_size) {
6328 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
6329 				    BPRI_MED);
6330 			} else {
6331 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
6332 				if (mp == NULL) {
6333 					statsp->rx_vio_allocb_fail++;
6334 					/*
6335 					 * Data buffer returned by allocb(9F)
6336 					 * is 8byte aligned. We allocate extra
6337 					 * 8 bytes to ensure size is multiple
6338 					 * of 8 bytes for ldc_mem_copy().
6339 					 */
6340 					mp = allocb(VNET_IPALIGN +
6341 					    rxd.nbytes + 8, BPRI_MED);
6342 				}
6343 			}
6344 		}
6345 		if ((rxd_err) || (mp == NULL)) {
6346 			/*
6347 			 * rxd_err or allocb() failure,
6348 			 * drop this packet, get next.
6349 			 */
6350 			if (rxd_err) {
6351 				statsp->ierrors++;
6352 				rxd_err = B_FALSE;
6353 			} else {
6354 				statsp->rx_allocb_fail++;
6355 			}
6356 
6357 			ack_needed = hdrp->ack;
6358 
6359 			/* set descriptor done bit */
6360 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6361 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6362 			    VIO_DESC_DONE);
6363 			if (rv != 0) {
6364 				DWARN(vgenp, ldcp,
6365 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
6366 				    rv);
6367 				return (rv);
6368 			}
6369 
6370 			if (ack_needed) {
6371 				ack_needed = B_FALSE;
6372 				/*
6373 				 * sender needs ack for this packet,
6374 				 * ack pkts upto this index.
6375 				 */
6376 				ack_end = rxi;
6377 
6378 				rv = vgen_send_dring_ack(ldcp, tagp,
6379 				    ack_start, ack_end,
6380 				    VIO_DP_ACTIVE);
6381 				if (rv != VGEN_SUCCESS) {
6382 					goto error_ret;
6383 				}
6384 
6385 				/* need to set new ack start index */
6386 				set_ack_start = B_TRUE;
6387 			}
6388 			goto vgen_next_rxi;
6389 		}
6390 
6391 		nread = nbytes;
6392 		rv = ldc_mem_copy(ldcp->ldc_handle,
6393 		    (caddr_t)mp->b_rptr, off, &nread,
6394 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
6395 
6396 		/* if ldc_mem_copy() failed */
6397 		if (rv) {
6398 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
6399 			statsp->ierrors++;
6400 			freemsg(mp);
6401 			goto error_ret;
6402 		}
6403 
6404 		ack_needed = hdrp->ack;
6405 
6406 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6407 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6408 		    VIO_DESC_DONE);
6409 		if (rv != 0) {
6410 			DWARN(vgenp, ldcp,
6411 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
6412 			goto error_ret;
6413 		}
6414 
6415 		mp->b_rptr += VNET_IPALIGN;
6416 
6417 		if (ack_needed) {
6418 			ack_needed = B_FALSE;
6419 			/*
6420 			 * sender needs ack for this packet,
6421 			 * ack pkts upto this index.
6422 			 */
6423 			ack_end = rxi;
6424 
6425 			rv = vgen_send_dring_ack(ldcp, tagp,
6426 			    ack_start, ack_end, VIO_DP_ACTIVE);
6427 			if (rv != VGEN_SUCCESS) {
6428 				goto error_ret;
6429 			}
6430 
6431 			/* need to set new ack start index */
6432 			set_ack_start = B_TRUE;
6433 		}
6434 
6435 		if (nread != nbytes) {
6436 			DWARN(vgenp, ldcp,
6437 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6438 			    nread, nbytes);
6439 			statsp->ierrors++;
6440 			freemsg(mp);
6441 			goto vgen_next_rxi;
6442 		}
6443 
6444 		/* point to the actual end of data */
6445 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6446 
6447 		/* update stats */
6448 		statsp->ipackets++;
6449 		statsp->rbytes += rxd.nbytes;
6450 		ehp = (struct ether_header *)mp->b_rptr;
6451 		if (IS_BROADCAST(ehp))
6452 			statsp->brdcstrcv++;
6453 		else if (IS_MULTICAST(ehp))
6454 			statsp->multircv++;
6455 
6456 		/* build a chain of received packets */
6457 		if (bp == NULL) {
6458 			/* first pkt */
6459 			bp = mp;
6460 			bpt = bp;
6461 			bpt->b_next = NULL;
6462 		} else {
6463 			mp->b_next = NULL;
6464 			bpt->b_next = mp;
6465 			bpt = mp;
6466 		}
6467 
6468 		if (count++ > vgen_chain_len) {
6469 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6470 			vgen_rx(ldcp, bp, bpt);
6471 			count = 0;
6472 			bp = bpt = NULL;
6473 		}
6474 
6475 vgen_next_rxi:
6476 		/* update end index of range of descrs to be ack'd */
6477 		ack_end = rxi;
6478 
6479 		/* update the next index to be processed */
6480 		INCR_RXI(next_rxi, ldcp);
6481 		if (next_rxi == start) {
6482 			/*
6483 			 * processed the entire descriptor ring upto
6484 			 * the index at which we started.
6485 			 */
6486 			break;
6487 		}
6488 
6489 		rxi = next_rxi;
6490 
6491 	_NOTE(CONSTCOND)
6492 	} while (1);
6493 
6494 	/*
6495 	 * send an ack message to peer indicating that we have stopped
6496 	 * processing descriptors.
6497 	 */
6498 	if (set_ack_start) {
6499 		/*
6500 		 * We have ack'd upto some index and we have not
6501 		 * processed any descriptors beyond that index.
6502 		 * Use the last ack'd index as both the start and
6503 		 * end of range of descrs being ack'd.
6504 		 * Note: This results in acking the last index twice
6505 		 * and should be harmless.
6506 		 */
6507 		ack_start = ack_end;
6508 	}
6509 
6510 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6511 	    VIO_DP_STOPPED);
6512 	if (rv != VGEN_SUCCESS) {
6513 		goto error_ret;
6514 	}
6515 
6516 	/* save new recv index of next dring msg */
6517 	ldcp->next_rxi = next_rxi;
6518 
6519 error_ret:
6520 	/* send up packets received so far */
6521 	if (bp != NULL) {
6522 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6523 		vgen_rx(ldcp, bp, bpt);
6524 		bp = bpt = NULL;
6525 	}
6526 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6527 	return (rv);
6528 
6529 }
6530 
6531 static int
6532 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6533 {
6534 	int rv = 0;
6535 	uint32_t start;
6536 	int32_t end;
6537 	uint32_t txi;
6538 	boolean_t ready_txd = B_FALSE;
6539 	vgen_stats_t *statsp;
6540 	vgen_private_desc_t *tbufp;
6541 	vnet_public_desc_t *txdp;
6542 	vio_dring_entry_hdr_t *hdrp;
6543 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6544 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6545 
6546 	DBG1(vgenp, ldcp, "enter\n");
6547 	start = dringmsg->start_idx;
6548 	end = dringmsg->end_idx;
6549 	statsp = &ldcp->stats;
6550 
6551 	/*
6552 	 * received an ack corresponding to a specific descriptor for
6553 	 * which we had set the ACK bit in the descriptor (during
6554 	 * transmit). This enables us to reclaim descriptors.
6555 	 */
6556 
6557 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6558 
6559 	/* validate start and end indeces in the tx ack msg */
6560 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6561 		/* drop the message if invalid index */
6562 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6563 		    start, end);
6564 		return (rv);
6565 	}
6566 	/* validate dring_ident */
6567 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6568 		/* invalid dring_ident, drop the msg */
6569 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6570 		    dringmsg->dring_ident);
6571 		return (rv);
6572 	}
6573 	statsp->dring_data_acks++;
6574 
6575 	/* reclaim descriptors that are done */
6576 	vgen_reclaim(ldcp);
6577 
6578 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6579 		/*
6580 		 * receiver continued processing descriptors after
6581 		 * sending us the ack.
6582 		 */
6583 		return (rv);
6584 	}
6585 
6586 	statsp->dring_stopped_acks++;
6587 
6588 	/* receiver stopped processing descriptors */
6589 	mutex_enter(&ldcp->wrlock);
6590 	mutex_enter(&ldcp->tclock);
6591 
6592 	/*
6593 	 * determine if there are any pending tx descriptors
6594 	 * ready to be processed by the receiver(peer) and if so,
6595 	 * send a message to the peer to restart receiving.
6596 	 */
6597 	ready_txd = B_FALSE;
6598 
6599 	/*
6600 	 * using the end index of the descriptor range for which
6601 	 * we received the ack, check if the next descriptor is
6602 	 * ready.
6603 	 */
6604 	txi = end;
6605 	INCR_TXI(txi, ldcp);
6606 	tbufp = &ldcp->tbufp[txi];
6607 	txdp = tbufp->descp;
6608 	hdrp = &txdp->hdr;
6609 	if (hdrp->dstate == VIO_DESC_READY) {
6610 		ready_txd = B_TRUE;
6611 	} else {
6612 		/*
6613 		 * descr next to the end of ack'd descr range is not
6614 		 * ready.
6615 		 * starting from the current reclaim index, check
6616 		 * if any descriptor is ready.
6617 		 */
6618 
6619 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6620 		tbufp = &ldcp->tbufp[txi];
6621 
6622 		txdp = tbufp->descp;
6623 		hdrp = &txdp->hdr;
6624 		if (hdrp->dstate == VIO_DESC_READY) {
6625 			ready_txd = B_TRUE;
6626 		}
6627 
6628 	}
6629 
6630 	if (ready_txd) {
6631 		/*
6632 		 * we have tx descriptor(s) ready to be
6633 		 * processed by the receiver.
6634 		 * send a message to the peer with the start index
6635 		 * of ready descriptors.
6636 		 */
6637 		rv = vgen_send_dring_data(ldcp, txi, -1);
6638 		if (rv != VGEN_SUCCESS) {
6639 			ldcp->resched_peer = B_TRUE;
6640 			ldcp->resched_peer_txi = txi;
6641 			mutex_exit(&ldcp->tclock);
6642 			mutex_exit(&ldcp->wrlock);
6643 			return (rv);
6644 		}
6645 	} else {
6646 		/*
6647 		 * no ready tx descriptors. set the flag to send a
6648 		 * message to peer when tx descriptors are ready in
6649 		 * transmit routine.
6650 		 */
6651 		ldcp->resched_peer = B_TRUE;
6652 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6653 	}
6654 
6655 	mutex_exit(&ldcp->tclock);
6656 	mutex_exit(&ldcp->wrlock);
6657 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6658 	return (rv);
6659 }
6660 
6661 static int
6662 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6663 {
6664 	int rv = 0;
6665 	uint32_t start;
6666 	int32_t end;
6667 	uint32_t txi;
6668 	vnet_public_desc_t *txdp;
6669 	vio_dring_entry_hdr_t *hdrp;
6670 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6671 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6672 
6673 	DBG1(vgenp, ldcp, "enter\n");
6674 	start = dringmsg->start_idx;
6675 	end = dringmsg->end_idx;
6676 
6677 	/*
6678 	 * peer sent a NACK msg to indicate lost packets.
6679 	 * The start and end correspond to the range of descriptors
6680 	 * for which the peer didn't receive a dring data msg and so
6681 	 * didn't receive the corresponding data.
6682 	 */
6683 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6684 
6685 	/* validate start and end indeces in the tx nack msg */
6686 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6687 		/* drop the message if invalid index */
6688 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6689 		    start, end);
6690 		return (rv);
6691 	}
6692 	/* validate dring_ident */
6693 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6694 		/* invalid dring_ident, drop the msg */
6695 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6696 		    dringmsg->dring_ident);
6697 		return (rv);
6698 	}
6699 	mutex_enter(&ldcp->txlock);
6700 	mutex_enter(&ldcp->tclock);
6701 
6702 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6703 		/* no busy descriptors, bogus nack ? */
6704 		mutex_exit(&ldcp->tclock);
6705 		mutex_exit(&ldcp->txlock);
6706 		return (rv);
6707 	}
6708 
6709 	/* we just mark the descrs as done so they can be reclaimed */
6710 	for (txi = start; txi <= end; ) {
6711 		txdp = &(ldcp->txdp[txi]);
6712 		hdrp = &txdp->hdr;
6713 		if (hdrp->dstate == VIO_DESC_READY)
6714 			hdrp->dstate = VIO_DESC_DONE;
6715 		INCR_TXI(txi, ldcp);
6716 	}
6717 	mutex_exit(&ldcp->tclock);
6718 	mutex_exit(&ldcp->txlock);
6719 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6720 	return (rv);
6721 }
6722 
6723 static void
6724 vgen_reclaim(vgen_ldc_t *ldcp)
6725 {
6726 	mutex_enter(&ldcp->tclock);
6727 
6728 	vgen_reclaim_dring(ldcp);
6729 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6730 
6731 	mutex_exit(&ldcp->tclock);
6732 }
6733 
6734 /*
6735  * transmit reclaim function. starting from the current reclaim index
6736  * look for descriptors marked DONE and reclaim the descriptor and the
6737  * corresponding buffers (tbuf).
6738  */
6739 static void
6740 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6741 {
6742 	int count = 0;
6743 	vnet_public_desc_t *txdp;
6744 	vgen_private_desc_t *tbufp;
6745 	vio_dring_entry_hdr_t	*hdrp;
6746 
6747 #ifdef DEBUG
6748 	if (vgen_trigger_txtimeout)
6749 		return;
6750 #endif
6751 
6752 	tbufp = ldcp->cur_tbufp;
6753 	txdp = tbufp->descp;
6754 	hdrp = &txdp->hdr;
6755 
6756 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6757 	    (tbufp != ldcp->next_tbufp)) {
6758 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6759 		hdrp->dstate = VIO_DESC_FREE;
6760 		hdrp->ack = B_FALSE;
6761 
6762 		tbufp = NEXTTBUF(ldcp, tbufp);
6763 		txdp = tbufp->descp;
6764 		hdrp = &txdp->hdr;
6765 		count++;
6766 	}
6767 
6768 	ldcp->cur_tbufp = tbufp;
6769 
6770 	/*
6771 	 * Check if mac layer should be notified to restart transmissions
6772 	 */
6773 	if ((ldcp->need_resched) && (count > 0)) {
6774 		vio_net_tx_update_t vtx_update =
6775 		    ldcp->portp->vcb.vio_net_tx_update;
6776 
6777 		ldcp->need_resched = B_FALSE;
6778 		vtx_update(ldcp->portp->vhp);
6779 	}
6780 }
6781 
6782 /* return the number of pending transmits for the channel */
6783 static int
6784 vgen_num_txpending(vgen_ldc_t *ldcp)
6785 {
6786 	int n;
6787 
6788 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6789 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6790 	} else  {
6791 		/* cur_tbufp > next_tbufp */
6792 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6793 	}
6794 
6795 	return (n);
6796 }
6797 
6798 /* determine if the transmit descriptor ring is full */
6799 static int
6800 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6801 {
6802 	vgen_private_desc_t	*tbufp;
6803 	vgen_private_desc_t	*ntbufp;
6804 
6805 	tbufp = ldcp->next_tbufp;
6806 	ntbufp = NEXTTBUF(ldcp, tbufp);
6807 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6808 		return (VGEN_SUCCESS);
6809 	}
6810 	return (VGEN_FAILURE);
6811 }
6812 
6813 /* determine if timeout condition has occured */
6814 static int
6815 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6816 {
6817 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6818 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6819 	    (vnet_ldcwd_txtimeout) &&
6820 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6821 		return (VGEN_SUCCESS);
6822 	} else {
6823 		return (VGEN_FAILURE);
6824 	}
6825 }
6826 
6827 /* transmit watchdog timeout handler */
6828 static void
6829 vgen_ldc_watchdog(void *arg)
6830 {
6831 	vgen_ldc_t *ldcp;
6832 	vgen_t *vgenp;
6833 	int rv;
6834 
6835 	ldcp = (vgen_ldc_t *)arg;
6836 	vgenp = LDC_TO_VGEN(ldcp);
6837 
6838 	rv = vgen_ldc_txtimeout(ldcp);
6839 	if (rv == VGEN_SUCCESS) {
6840 		DWARN(vgenp, ldcp, "transmit timeout\n");
6841 #ifdef DEBUG
6842 		if (vgen_trigger_txtimeout) {
6843 			/* tx timeout triggered for debugging */
6844 			vgen_trigger_txtimeout = 0;
6845 		}
6846 #endif
6847 		mutex_enter(&ldcp->cblock);
6848 		vgen_ldc_reset(ldcp);
6849 		mutex_exit(&ldcp->cblock);
6850 		if (ldcp->need_resched) {
6851 			vio_net_tx_update_t vtx_update =
6852 			    ldcp->portp->vcb.vio_net_tx_update;
6853 
6854 			ldcp->need_resched = B_FALSE;
6855 			vtx_update(ldcp->portp->vhp);
6856 		}
6857 	}
6858 
6859 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6860 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6861 }
6862 
6863 /* handler for error messages received from the peer ldc end-point */
6864 static void
6865 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6866 {
6867 	_NOTE(ARGUNUSED(ldcp, tagp))
6868 }
6869 
6870 static int
6871 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6872 {
6873 	vio_raw_data_msg_t	*rmsg;
6874 	vio_dring_msg_t		*dmsg;
6875 	uint64_t		seq_num;
6876 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6877 
6878 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6879 		dmsg = (vio_dring_msg_t *)tagp;
6880 		seq_num = dmsg->seq_num;
6881 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6882 		rmsg = (vio_raw_data_msg_t *)tagp;
6883 		seq_num = rmsg->seq_num;
6884 	} else {
6885 		return (EINVAL);
6886 	}
6887 
6888 	if (seq_num != ldcp->next_rxseq) {
6889 
6890 		/* seqnums don't match */
6891 		DWARN(vgenp, ldcp,
6892 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6893 		    ldcp->next_rxseq, seq_num);
6894 
6895 		return (EINVAL);
6896 
6897 	}
6898 
6899 	ldcp->next_rxseq++;
6900 
6901 	return (0);
6902 }
6903 
6904 /* Check if the session id in the received message is valid */
6905 static int
6906 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6907 {
6908 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6909 
6910 	if (tagp->vio_sid != ldcp->peer_sid) {
6911 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6912 		    ldcp->peer_sid, tagp->vio_sid);
6913 		return (VGEN_FAILURE);
6914 	}
6915 	else
6916 		return (VGEN_SUCCESS);
6917 }
6918 
6919 static caddr_t
6920 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6921 {
6922 	(void) sprintf(ebuf,
6923 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6924 	return (ebuf);
6925 }
6926 
6927 /* Handshake watchdog timeout handler */
6928 static void
6929 vgen_hwatchdog(void *arg)
6930 {
6931 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6932 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6933 
6934 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
6935 	    ldcp->hphase, ldcp->hstate);
6936 
6937 	mutex_enter(&ldcp->cblock);
6938 	if (ldcp->cancel_htid) {
6939 		ldcp->cancel_htid = 0;
6940 		mutex_exit(&ldcp->cblock);
6941 		return;
6942 	}
6943 	ldcp->htid = 0;
6944 	vgen_ldc_reset(ldcp);
6945 	mutex_exit(&ldcp->cblock);
6946 }
6947 
6948 static void
6949 vgen_print_hparams(vgen_hparams_t *hp)
6950 {
6951 	uint8_t	addr[6];
6952 	char	ea[6];
6953 	ldc_mem_cookie_t *dc;
6954 
6955 	cmn_err(CE_CONT, "version_info:\n");
6956 	cmn_err(CE_CONT,
6957 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6958 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6959 
6960 	vnet_macaddr_ultostr(hp->addr, addr);
6961 	cmn_err(CE_CONT, "attr_info:\n");
6962 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6963 	    vgen_print_ethaddr(addr, ea));
6964 	cmn_err(CE_CONT,
6965 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6966 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6967 
6968 	dc = &hp->dring_cookie;
6969 	cmn_err(CE_CONT, "dring_info:\n");
6970 	cmn_err(CE_CONT,
6971 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6972 	cmn_err(CE_CONT,
6973 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6974 	    dc->addr, dc->size);
6975 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6976 }
6977 
6978 static void
6979 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6980 {
6981 	vgen_hparams_t *hp;
6982 
6983 	cmn_err(CE_CONT, "Channel Information:\n");
6984 	cmn_err(CE_CONT,
6985 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6986 	    ldcp->ldc_id, ldcp->ldc_status);
6987 	cmn_err(CE_CONT,
6988 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6989 	    ldcp->local_sid, ldcp->peer_sid);
6990 	cmn_err(CE_CONT,
6991 	    "\thphase: 0x%x, hstate: 0x%x\n",
6992 	    ldcp->hphase, ldcp->hstate);
6993 
6994 	cmn_err(CE_CONT, "Local handshake params:\n");
6995 	hp = &ldcp->local_hparams;
6996 	vgen_print_hparams(hp);
6997 
6998 	cmn_err(CE_CONT, "Peer handshake params:\n");
6999 	hp = &ldcp->peer_hparams;
7000 	vgen_print_hparams(hp);
7001 }
7002 
7003 /*
7004  * Send received packets up the stack.
7005  */
7006 static void
7007 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt)
7008 {
7009 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
7010 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
7011 
7012 	if (ldcp->rcv_thread != NULL) {
7013 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
7014 	} else {
7015 		ASSERT(MUTEX_HELD(&ldcp->cblock));
7016 	}
7017 
7018 	mutex_enter(&ldcp->pollq_lock);
7019 
7020 	if (ldcp->polling_on == B_TRUE) {
7021 		/*
7022 		 * If we are in polling mode, simply queue
7023 		 * the packets onto the poll queue and return.
7024 		 */
7025 		if (ldcp->pollq_headp == NULL) {
7026 			ldcp->pollq_headp = bp;
7027 			ldcp->pollq_tailp = bpt;
7028 		} else {
7029 			ldcp->pollq_tailp->b_next = bp;
7030 			ldcp->pollq_tailp = bpt;
7031 		}
7032 
7033 		mutex_exit(&ldcp->pollq_lock);
7034 		return;
7035 	}
7036 
7037 	/*
7038 	 * Prepend any pending mblks in the poll queue, now that we
7039 	 * are in interrupt mode, before sending up the chain of pkts.
7040 	 */
7041 	if (ldcp->pollq_headp != NULL) {
7042 		DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n",
7043 		    (uintptr_t)ldcp);
7044 		ldcp->pollq_tailp->b_next = bp;
7045 		bp = ldcp->pollq_headp;
7046 		ldcp->pollq_headp = ldcp->pollq_tailp = NULL;
7047 	}
7048 
7049 	mutex_exit(&ldcp->pollq_lock);
7050 
7051 	if (ldcp->rcv_thread != NULL) {
7052 		mutex_exit(&ldcp->rxlock);
7053 	} else {
7054 		mutex_exit(&ldcp->cblock);
7055 	}
7056 
7057 	/* Send up the packets */
7058 	vrx_cb(ldcp->portp->vhp, bp);
7059 
7060 	if (ldcp->rcv_thread != NULL) {
7061 		mutex_enter(&ldcp->rxlock);
7062 	} else {
7063 		mutex_enter(&ldcp->cblock);
7064 	}
7065 }
7066 
7067 /*
7068  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
7069  * This thread is woken up by the LDC interrupt handler to process
7070  * LDC packets and receive data.
7071  */
7072 static void
7073 vgen_ldc_rcv_worker(void *arg)
7074 {
7075 	callb_cpr_t	cprinfo;
7076 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
7077 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7078 
7079 	DBG1(vgenp, ldcp, "enter\n");
7080 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
7081 	    "vnet_rcv_thread");
7082 	mutex_enter(&ldcp->rcv_thr_lock);
7083 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
7084 
7085 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
7086 		/*
7087 		 * Wait until the data is received or a stop
7088 		 * request is received.
7089 		 */
7090 		while (!(ldcp->rcv_thr_flags &
7091 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
7092 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
7093 		}
7094 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
7095 
7096 		/*
7097 		 * First process the stop request.
7098 		 */
7099 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
7100 			DBG2(vgenp, ldcp, "stopped\n");
7101 			break;
7102 		}
7103 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
7104 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
7105 		mutex_exit(&ldcp->rcv_thr_lock);
7106 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
7107 		vgen_handle_evt_read(ldcp);
7108 		mutex_enter(&ldcp->rcv_thr_lock);
7109 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
7110 	}
7111 
7112 	/*
7113 	 * Update the run status and wakeup the thread that
7114 	 * has sent the stop request.
7115 	 */
7116 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP;
7117 	ldcp->rcv_thread = NULL;
7118 	CALLB_CPR_EXIT(&cprinfo);
7119 
7120 	thread_exit();
7121 	DBG1(vgenp, ldcp, "exit\n");
7122 }
7123 
7124 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
7125 static void
7126 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
7127 {
7128 	kt_did_t	tid = 0;
7129 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7130 
7131 	DBG1(vgenp, ldcp, "enter\n");
7132 	/*
7133 	 * Send a stop request by setting the stop flag and
7134 	 * wait until the receive thread stops.
7135 	 */
7136 	mutex_enter(&ldcp->rcv_thr_lock);
7137 	if (ldcp->rcv_thread != NULL) {
7138 		tid = ldcp->rcv_thread->t_did;
7139 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
7140 		cv_signal(&ldcp->rcv_thr_cv);
7141 	}
7142 	mutex_exit(&ldcp->rcv_thr_lock);
7143 
7144 	if (tid != 0) {
7145 		thread_join(tid);
7146 	}
7147 	DBG1(vgenp, ldcp, "exit\n");
7148 }
7149 
7150 /*
7151  * Wait for the channel rx-queue to be drained by allowing the receive
7152  * worker thread to read all messages from the rx-queue of the channel.
7153  * Assumption: further callbacks are disabled at this time.
7154  */
7155 static void
7156 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
7157 {
7158 	clock_t	tm;
7159 	clock_t	wt;
7160 	clock_t	rv;
7161 
7162 	/*
7163 	 * If there is data in ldc rx queue, wait until the rx
7164 	 * worker thread runs and drains all msgs in the queue.
7165 	 */
7166 	wt = drv_usectohz(MILLISEC);
7167 
7168 	mutex_enter(&ldcp->rcv_thr_lock);
7169 
7170 	tm = ddi_get_lbolt() + wt;
7171 
7172 	/*
7173 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
7174 	 * If DATARCVD is set, that means the callback has signalled the worker
7175 	 * thread, but the worker hasn't started processing yet. If PROCESSING
7176 	 * is set, that means the thread is awake and processing. Note that the
7177 	 * DATARCVD state can only be seen once, as the assumption is that
7178 	 * further callbacks have been disabled at this point.
7179 	 */
7180 	while (ldcp->rcv_thr_flags &
7181 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
7182 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
7183 		if (rv == -1) {	/* timeout */
7184 			/*
7185 			 * Note that the only way we return is due to a timeout;
7186 			 * we set the new time to wait, before we go back and
7187 			 * check the condition. The other(unlikely) possibility
7188 			 * is a premature wakeup(see cv_timedwait(9F)) in which
7189 			 * case we just continue to use the same time to wait.
7190 			 */
7191 			tm = ddi_get_lbolt() + wt;
7192 		}
7193 	}
7194 
7195 	mutex_exit(&ldcp->rcv_thr_lock);
7196 }
7197 
7198 /*
7199  * vgen_dds_rx -- post DDS messages to vnet.
7200  */
7201 static int
7202 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
7203 {
7204 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
7205 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7206 
7207 	if (dmsg->dds_class != DDS_VNET_NIU) {
7208 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
7209 		return (EBADMSG);
7210 	}
7211 	vnet_dds_rx(vgenp->vnetp, dmsg);
7212 	return (0);
7213 }
7214 
7215 /*
7216  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
7217  */
7218 int
7219 vgen_dds_tx(void *arg, void *msg)
7220 {
7221 	vgen_t *vgenp = arg;
7222 	vio_dds_msg_t *dmsg = msg;
7223 	vgen_portlist_t *plistp = &vgenp->vgenports;
7224 	vgen_ldc_t *ldcp;
7225 	vgen_ldclist_t *ldclp;
7226 	int rv = EIO;
7227 
7228 
7229 	READ_ENTER(&plistp->rwlock);
7230 	ldclp = &(vgenp->vsw_portp->ldclist);
7231 	READ_ENTER(&ldclp->rwlock);
7232 	ldcp = ldclp->headp;
7233 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
7234 		goto vgen_dsend_exit;
7235 	}
7236 
7237 	dmsg->tag.vio_sid = ldcp->local_sid;
7238 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
7239 	if (rv != VGEN_SUCCESS) {
7240 		rv = EIO;
7241 	} else {
7242 		rv = 0;
7243 	}
7244 
7245 vgen_dsend_exit:
7246 	RW_EXIT(&ldclp->rwlock);
7247 	RW_EXIT(&plistp->rwlock);
7248 	return (rv);
7249 
7250 }
7251 
7252 static void
7253 vgen_ldc_reset(vgen_ldc_t *ldcp)
7254 {
7255 	vnet_t	*vnetp = LDC_TO_VNET(ldcp);
7256 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
7257 
7258 	ASSERT(MUTEX_HELD(&ldcp->cblock));
7259 
7260 	if (ldcp->need_ldc_reset == B_TRUE) {
7261 		/* another thread is already in the process of resetting */
7262 		return;
7263 	}
7264 
7265 	/* Set the flag to indicate reset is in progress */
7266 	ldcp->need_ldc_reset = B_TRUE;
7267 
7268 	if (ldcp->portp == vgenp->vsw_portp) {
7269 		mutex_exit(&ldcp->cblock);
7270 		/*
7271 		 * Now cleanup any HIO resources; the above flag also tells
7272 		 * the code that handles dds messages to drop any new msgs
7273 		 * that arrive while we are cleaning up and resetting the
7274 		 * channel.
7275 		 */
7276 		vnet_dds_cleanup_hio(vnetp);
7277 		mutex_enter(&ldcp->cblock);
7278 	}
7279 
7280 	vgen_handshake_retry(ldcp);
7281 }
7282 
7283 int
7284 vgen_enable_intr(void *arg)
7285 {
7286 	vgen_port_t		*portp = (vgen_port_t *)arg;
7287 	vgen_ldclist_t		*ldclp;
7288 	vgen_ldc_t		*ldcp;
7289 
7290 	ldclp = &portp->ldclist;
7291 	READ_ENTER(&ldclp->rwlock);
7292 	/*
7293 	 * NOTE: for now, we will assume we have a single channel.
7294 	 */
7295 	if (ldclp->headp == NULL) {
7296 		RW_EXIT(&ldclp->rwlock);
7297 		return (1);
7298 	}
7299 	ldcp = ldclp->headp;
7300 
7301 	mutex_enter(&ldcp->pollq_lock);
7302 	ldcp->polling_on = B_FALSE;
7303 	mutex_exit(&ldcp->pollq_lock);
7304 
7305 	RW_EXIT(&ldclp->rwlock);
7306 
7307 	return (0);
7308 }
7309 
7310 int
7311 vgen_disable_intr(void *arg)
7312 {
7313 	vgen_port_t		*portp = (vgen_port_t *)arg;
7314 	vgen_ldclist_t		*ldclp;
7315 	vgen_ldc_t		*ldcp;
7316 
7317 	ldclp = &portp->ldclist;
7318 	READ_ENTER(&ldclp->rwlock);
7319 	/*
7320 	 * NOTE: for now, we will assume we have a single channel.
7321 	 */
7322 	if (ldclp->headp == NULL) {
7323 		RW_EXIT(&ldclp->rwlock);
7324 		return (1);
7325 	}
7326 	ldcp = ldclp->headp;
7327 
7328 
7329 	mutex_enter(&ldcp->pollq_lock);
7330 	ldcp->polling_on = B_TRUE;
7331 	mutex_exit(&ldcp->pollq_lock);
7332 
7333 	RW_EXIT(&ldclp->rwlock);
7334 
7335 	return (0);
7336 }
7337 
7338 mblk_t *
7339 vgen_poll(void *arg, int bytes_to_pickup)
7340 {
7341 	vgen_port_t		*portp = (vgen_port_t *)arg;
7342 	vgen_ldclist_t		*ldclp;
7343 	vgen_ldc_t		*ldcp;
7344 	mblk_t			*mp = NULL;
7345 
7346 	ldclp = &portp->ldclist;
7347 	READ_ENTER(&ldclp->rwlock);
7348 	/*
7349 	 * NOTE: for now, we will assume we have a single channel.
7350 	 */
7351 	if (ldclp->headp == NULL) {
7352 		RW_EXIT(&ldclp->rwlock);
7353 		return (NULL);
7354 	}
7355 	ldcp = ldclp->headp;
7356 
7357 	mp = vgen_ldc_poll(ldcp, bytes_to_pickup);
7358 
7359 	RW_EXIT(&ldclp->rwlock);
7360 	return (mp);
7361 }
7362 
7363 static mblk_t *
7364 vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup)
7365 {
7366 	mblk_t	*bp = NULL;
7367 	mblk_t	*bpt = NULL;
7368 	mblk_t	*mp = NULL;
7369 	size_t	mblk_sz = 0;
7370 	size_t	sz = 0;
7371 	uint_t	count = 0;
7372 
7373 	mutex_enter(&ldcp->pollq_lock);
7374 
7375 	bp = ldcp->pollq_headp;
7376 	while (bp != NULL) {
7377 		/* get the size of this packet */
7378 		mblk_sz = msgdsize(bp);
7379 
7380 		/* if adding this pkt, exceeds the size limit, we are done. */
7381 		if (sz + mblk_sz >  bytes_to_pickup) {
7382 			break;
7383 		}
7384 
7385 		/* we have room for this packet */
7386 		sz += mblk_sz;
7387 
7388 		/* increment the # of packets being sent up */
7389 		count++;
7390 
7391 		/* track the last processed pkt */
7392 		bpt = bp;
7393 
7394 		/* get the next pkt */
7395 		bp = bp->b_next;
7396 	}
7397 
7398 	if (count != 0) {
7399 		/*
7400 		 * picked up some packets; save the head of pkts to be sent up.
7401 		 */
7402 		mp = ldcp->pollq_headp;
7403 
7404 		/* move the pollq_headp to skip over the pkts being sent up */
7405 		ldcp->pollq_headp = bp;
7406 
7407 		/* picked up all pending pkts in the queue; reset tail also */
7408 		if (ldcp->pollq_headp == NULL) {
7409 			ldcp->pollq_tailp = NULL;
7410 		}
7411 
7412 		/* terminate the tail of pkts to be sent up */
7413 		bpt->b_next = NULL;
7414 	}
7415 
7416 	mutex_exit(&ldcp->pollq_lock);
7417 
7418 	DTRACE_PROBE1(vgen_poll_pkts, uint_t, count);
7419 	return (mp);
7420 }
7421 
7422 #if DEBUG
7423 
7424 /*
7425  * Print debug messages - set to 0xf to enable all msgs
7426  */
7427 static void
7428 debug_printf(const char *fname, vgen_t *vgenp,
7429     vgen_ldc_t *ldcp, const char *fmt, ...)
7430 {
7431 	char    buf[256];
7432 	char    *bufp = buf;
7433 	va_list ap;
7434 
7435 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
7436 		(void) sprintf(bufp, "vnet%d:",
7437 		    ((vnet_t *)(vgenp->vnetp))->instance);
7438 		bufp += strlen(bufp);
7439 	}
7440 	if (ldcp != NULL) {
7441 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
7442 		bufp += strlen(bufp);
7443 	}
7444 	(void) sprintf(bufp, "%s: ", fname);
7445 	bufp += strlen(bufp);
7446 
7447 	va_start(ap, fmt);
7448 	(void) vsprintf(bufp, fmt, ap);
7449 	va_end(ap);
7450 
7451 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
7452 	    (vgendbg_ldcid == ldcp->ldc_id)) {
7453 		cmn_err(CE_CONT, "%s\n", buf);
7454 	}
7455 }
7456 #endif
7457 
7458 #ifdef	VNET_IOC_DEBUG
7459 
7460 static void
7461 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7462 {
7463 	struct iocblk	*iocp;
7464 	vgen_port_t	*portp;
7465 	enum		ioc_reply {
7466 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
7467 			IOC_ACK			/* OK, just send ACK    */
7468 	}		status;
7469 	int		rv;
7470 
7471 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
7472 	iocp->ioc_error = 0;
7473 	portp = (vgen_port_t *)arg;
7474 
7475 	if (portp == NULL) {
7476 		status = IOC_INVAL;
7477 		goto vgen_ioc_exit;
7478 	}
7479 
7480 	mutex_enter(&portp->lock);
7481 
7482 	switch (iocp->ioc_cmd) {
7483 
7484 	case VNET_FORCE_LINK_DOWN:
7485 	case VNET_FORCE_LINK_UP:
7486 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
7487 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
7488 		break;
7489 
7490 	default:
7491 		status = IOC_INVAL;
7492 		break;
7493 
7494 	}
7495 
7496 	mutex_exit(&portp->lock);
7497 
7498 vgen_ioc_exit:
7499 
7500 	switch (status) {
7501 	default:
7502 	case IOC_INVAL:
7503 		/* Error, reply with a NAK and EINVAL error */
7504 		miocnak(q, mp, 0, EINVAL);
7505 		break;
7506 	case IOC_ACK:
7507 		/* OK, reply with an ACK */
7508 		miocack(q, mp, 0, 0);
7509 		break;
7510 	}
7511 }
7512 
7513 static int
7514 vgen_force_link_state(vgen_port_t *portp, int cmd)
7515 {
7516 	ldc_status_t	istatus;
7517 	vgen_ldclist_t	*ldclp;
7518 	vgen_ldc_t	*ldcp;
7519 	vgen_t		*vgenp = portp->vgenp;
7520 	int		rv;
7521 
7522 	ldclp = &portp->ldclist;
7523 	READ_ENTER(&ldclp->rwlock);
7524 
7525 	/*
7526 	 * NOTE: for now, we will assume we have a single channel.
7527 	 */
7528 	if (ldclp->headp == NULL) {
7529 		RW_EXIT(&ldclp->rwlock);
7530 		return (1);
7531 	}
7532 	ldcp = ldclp->headp;
7533 	mutex_enter(&ldcp->cblock);
7534 
7535 	switch (cmd) {
7536 
7537 	case VNET_FORCE_LINK_DOWN:
7538 		(void) ldc_down(ldcp->ldc_handle);
7539 		ldcp->link_down_forced = B_TRUE;
7540 		break;
7541 
7542 	case VNET_FORCE_LINK_UP:
7543 		rv = ldc_up(ldcp->ldc_handle);
7544 		if (rv != 0) {
7545 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
7546 		}
7547 		ldcp->link_down_forced = B_FALSE;
7548 
7549 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
7550 			DWARN(vgenp, ldcp, "ldc_status err\n");
7551 		} else {
7552 			ldcp->ldc_status = istatus;
7553 		}
7554 
7555 		/* if channel is already UP - restart handshake */
7556 		if (ldcp->ldc_status == LDC_UP) {
7557 			vgen_handle_evt_up(ldcp);
7558 		}
7559 		break;
7560 
7561 	}
7562 
7563 	mutex_exit(&ldcp->cblock);
7564 	RW_EXIT(&ldclp->rwlock);
7565 
7566 	return (0);
7567 }
7568 
7569 #else
7570 
7571 static void
7572 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7573 {
7574 	vgen_port_t	*portp;
7575 
7576 	portp = (vgen_port_t *)arg;
7577 
7578 	if (portp == NULL) {
7579 		miocnak(q, mp, 0, EINVAL);
7580 		return;
7581 	}
7582 
7583 	miocnak(q, mp, 0, ENOTSUP);
7584 }
7585 
7586 #endif
7587