xref: /illumos-gate/usr/src/uts/sun4v/io/vnet_gen.c (revision e511d54dfc1c7eb3aea1a9125b54791fc2f23d42)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 int vgen_init_mdeg(void *arg);
77 void vgen_uninit(void *arg);
78 int vgen_dds_tx(void *arg, void *dmsg);
79 void vgen_mod_init(void);
80 int vgen_mod_cleanup(void);
81 void vgen_mod_fini(void);
82 int vgen_enable_intr(void *arg);
83 int vgen_disable_intr(void *arg);
84 mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
85 static int vgen_start(void *arg);
86 static void vgen_stop(void *arg);
87 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
88 static int vgen_multicst(void *arg, boolean_t add,
89 	const uint8_t *mca);
90 static int vgen_promisc(void *arg, boolean_t on);
91 static int vgen_unicst(void *arg, const uint8_t *mca);
92 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
93 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
94 #ifdef	VNET_IOC_DEBUG
95 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
96 #endif
97 
98 /* vgen internal functions */
99 static int vgen_read_mdprops(vgen_t *vgenp);
100 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
101 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
102 	mde_cookie_t node);
103 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
104 	uint32_t *mtu);
105 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
106 	boolean_t *pls);
107 static void vgen_detach_ports(vgen_t *vgenp);
108 static void vgen_port_detach(vgen_port_t *portp);
109 static void vgen_port_list_insert(vgen_port_t *portp);
110 static void vgen_port_list_remove(vgen_port_t *portp);
111 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
112 	int port_num);
113 static int vgen_mdeg_reg(vgen_t *vgenp);
114 static void vgen_mdeg_unreg(vgen_t *vgenp);
115 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
116 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
117 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
118 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
119 	mde_cookie_t mdex);
120 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
121 static int vgen_port_attach(vgen_port_t *portp);
122 static void vgen_port_detach_mdeg(vgen_port_t *portp);
123 static void vgen_port_detach_mdeg(vgen_port_t *portp);
124 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
125 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
126 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
127 static void vgen_port_reset(vgen_port_t *portp);
128 static void vgen_reset_vsw_port(vgen_t *vgenp);
129 static void vgen_ldc_reset(vgen_ldc_t *ldcp);
130 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
131 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
132 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
133 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
134 static void vgen_init_ports(vgen_t *vgenp);
135 static void vgen_port_init(vgen_port_t *portp);
136 static void vgen_uninit_ports(vgen_t *vgenp);
137 static void vgen_port_uninit(vgen_port_t *portp);
138 static void vgen_init_ldcs(vgen_port_t *portp);
139 static void vgen_uninit_ldcs(vgen_port_t *portp);
140 static int vgen_ldc_init(vgen_ldc_t *ldcp);
141 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
142 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
143 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
144 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
145 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
146 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
147 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
148 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
149 static int vgen_ldcsend(void *arg, mblk_t *mp);
150 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
151 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
152 static void vgen_reclaim(vgen_ldc_t *ldcp);
153 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
154 static int vgen_num_txpending(vgen_ldc_t *ldcp);
155 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
156 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
157 static void vgen_ldc_watchdog(void *arg);
158 static mblk_t *vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup);
159 
160 /* vgen handshake functions */
161 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
162 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
163 	boolean_t caller_holds_lock);
164 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
165 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
166 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
167 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
168 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
169 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
170 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
171 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
172 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
173 static void vgen_handshake(vgen_ldc_t *ldcp);
174 static int vgen_handshake_done(vgen_ldc_t *ldcp);
175 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
176 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
177 	vio_msg_tag_t *tagp);
178 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
179 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
182 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
184 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
185 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
186 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
187 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
188 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
189 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
190 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
191 	uint32_t start, int32_t end, uint8_t pstate);
192 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
193 	uint32_t msglen);
194 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
195 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
196 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
197 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
198 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
199 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
200 static void vgen_hwatchdog(void *arg);
201 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
202 static void vgen_print_hparams(vgen_hparams_t *hp);
203 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
204 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
205 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
206 static void vgen_ldc_rcv_worker(void *arg);
207 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
208 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt);
209 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
210 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
211 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
212 
213 /* VLAN routines */
214 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
215 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
216 	uint16_t *nvidsp, uint16_t *default_idp);
217 static void vgen_vlan_create_hash(vgen_port_t *portp);
218 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
219 static void vgen_vlan_add_ids(vgen_port_t *portp);
220 static void vgen_vlan_remove_ids(vgen_port_t *portp);
221 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
222 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
223 	uint16_t *vidp);
224 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
225 	boolean_t is_tagged, uint16_t vid);
226 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
227 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
228 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
229 
230 /* externs */
231 extern void vnet_dds_rx(void *arg, void *dmsg);
232 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
233 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
234 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
235 
236 /*
237  * The handshake process consists of 5 phases defined below, with VH_PHASE0
238  * being the pre-handshake phase and VH_DONE is the phase to indicate
239  * successful completion of all phases.
240  * Each phase may have one to several handshake states which are required
241  * to complete successfully to move to the next phase.
242  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
243  * more details.
244  */
245 /* handshake phases */
246 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
247 
248 /* handshake states */
249 enum {
250 
251 	VER_INFO_SENT	=	0x1,
252 	VER_ACK_RCVD	=	0x2,
253 	VER_INFO_RCVD	=	0x4,
254 	VER_ACK_SENT	=	0x8,
255 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
256 
257 	ATTR_INFO_SENT	=	0x10,
258 	ATTR_ACK_RCVD	=	0x20,
259 	ATTR_INFO_RCVD	=	0x40,
260 	ATTR_ACK_SENT	=	0x80,
261 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
262 
263 	DRING_INFO_SENT	=	0x100,
264 	DRING_ACK_RCVD	=	0x200,
265 	DRING_INFO_RCVD	=	0x400,
266 	DRING_ACK_SENT	=	0x800,
267 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
268 
269 	RDX_INFO_SENT	=	0x1000,
270 	RDX_ACK_RCVD	=	0x2000,
271 	RDX_INFO_RCVD	=	0x4000,
272 	RDX_ACK_SENT	=	0x8000,
273 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
274 
275 };
276 
277 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
278 
279 #define	LDC_LOCK(ldcp)	\
280 				mutex_enter(&((ldcp)->cblock));\
281 				mutex_enter(&((ldcp)->rxlock));\
282 				mutex_enter(&((ldcp)->wrlock));\
283 				mutex_enter(&((ldcp)->txlock));\
284 				mutex_enter(&((ldcp)->tclock));
285 #define	LDC_UNLOCK(ldcp)	\
286 				mutex_exit(&((ldcp)->tclock));\
287 				mutex_exit(&((ldcp)->txlock));\
288 				mutex_exit(&((ldcp)->wrlock));\
289 				mutex_exit(&((ldcp)->rxlock));\
290 				mutex_exit(&((ldcp)->cblock));
291 
292 #define	VGEN_VER_EQ(ldcp, major, minor)	\
293 	((ldcp)->local_hparams.ver_major == (major) &&	\
294 	    (ldcp)->local_hparams.ver_minor == (minor))
295 
296 #define	VGEN_VER_LT(ldcp, major, minor)	\
297 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
298 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
299 	    (ldcp)->local_hparams.ver_minor < (minor)))
300 
301 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
302 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
303 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
304 	    (ldcp)->local_hparams.ver_minor >= (minor)))
305 
306 static struct ether_addr etherbroadcastaddr = {
307 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
308 };
309 /*
310  * MIB II broadcast/multicast packets
311  */
312 #define	IS_BROADCAST(ehp) \
313 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
314 #define	IS_MULTICAST(ehp) \
315 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
316 
317 /*
318  * Property names
319  */
320 static char macaddr_propname[] = "mac-address";
321 static char rmacaddr_propname[] = "remote-mac-address";
322 static char channel_propname[] = "channel-endpoint";
323 static char reg_propname[] = "reg";
324 static char port_propname[] = "port";
325 static char swport_propname[] = "switch-port";
326 static char id_propname[] = "id";
327 static char vdev_propname[] = "virtual-device";
328 static char vnet_propname[] = "network";
329 static char pri_types_propname[] = "priority-ether-types";
330 static char vgen_pvid_propname[] = "port-vlan-id";
331 static char vgen_vid_propname[] = "vlan-id";
332 static char vgen_dvid_propname[] = "default-vlan-id";
333 static char port_pvid_propname[] = "remote-port-vlan-id";
334 static char port_vid_propname[] = "remote-vlan-id";
335 static char vgen_mtu_propname[] = "mtu";
336 static char vgen_linkprop_propname[] = "linkprop";
337 
338 /*
339  * VIO Protocol Version Info:
340  *
341  * The version specified below represents the version of protocol currently
342  * supported in the driver. It means the driver can negotiate with peers with
343  * versions <= this version. Here is a summary of the feature(s) that are
344  * supported at each version of the protocol:
345  *
346  * 1.0			Basic VIO protocol.
347  * 1.1			vDisk protocol update (no virtual network update).
348  * 1.2			Support for priority frames (priority-ether-types).
349  * 1.3			VLAN and HybridIO support.
350  * 1.4			Jumbo Frame support.
351  * 1.5			Link State Notification support with optional support
352  * 			for Physical Link information.
353  */
354 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 5} };
355 
356 /* Tunables */
357 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
358 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
359 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
360 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
361 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
362 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
363 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
364 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
365 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
366 
367 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
368 
369 static vio_mblk_pool_t	*vgen_rx_poolp = NULL;
370 static krwlock_t	vgen_rw;
371 
372 /*
373  * max # of packets accumulated prior to sending them up. It is best
374  * to keep this at 60% of the number of recieve buffers.
375  */
376 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
377 
378 /*
379  * Internal tunables for receive buffer pools, that is,  the size and number of
380  * mblks for each pool. At least 3 sizes must be specified if these are used.
381  * The sizes must be specified in increasing order. Non-zero value of the first
382  * size will be used as a hint to use these values instead of the algorithm
383  * that determines the sizes based on MTU.
384  */
385 uint32_t vgen_rbufsz1 = 0;
386 uint32_t vgen_rbufsz2 = 0;
387 uint32_t vgen_rbufsz3 = 0;
388 uint32_t vgen_rbufsz4 = 0;
389 
390 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
391 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
392 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
393 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
394 
395 /*
396  * In the absence of "priority-ether-types" property in MD, the following
397  * internal tunable can be set to specify a single priority ethertype.
398  */
399 uint64_t vgen_pri_eth_type = 0;
400 
401 /*
402  * Number of transmit priority buffers that are preallocated per device.
403  * This number is chosen to be a small value to throttle transmission
404  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
405  */
406 uint32_t vgen_pri_tx_nmblks = 64;
407 
408 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
409 
410 #ifdef DEBUG
411 /* flags to simulate error conditions for debugging */
412 int vgen_trigger_txtimeout = 0;
413 int vgen_trigger_rxlost = 0;
414 #endif
415 
416 /*
417  * Matching criteria passed to the MDEG to register interest
418  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
419  * by their 'name' and 'cfg-handle' properties.
420  */
421 static md_prop_match_t vdev_prop_match[] = {
422 	{ MDET_PROP_STR,    "name"   },
423 	{ MDET_PROP_VAL,    "cfg-handle" },
424 	{ MDET_LIST_END,    NULL    }
425 };
426 
427 static mdeg_node_match_t vdev_match = { "virtual-device",
428 						vdev_prop_match };
429 
430 /* MD update matching structure */
431 static md_prop_match_t	vport_prop_match[] = {
432 	{ MDET_PROP_VAL,	"id" },
433 	{ MDET_LIST_END,	NULL }
434 };
435 
436 static mdeg_node_match_t vport_match = { "virtual-device-port",
437 					vport_prop_match };
438 
439 /* template for matching a particular vnet instance */
440 static mdeg_prop_spec_t vgen_prop_template[] = {
441 	{ MDET_PROP_STR,	"name",		"network" },
442 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
443 	{ MDET_LIST_END,	NULL,		NULL }
444 };
445 
446 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
447 
448 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
449 
450 #ifdef	VNET_IOC_DEBUG
451 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
452 #else
453 #define	VGEN_M_CALLBACK_FLAGS	(0)
454 #endif
455 
456 static mac_callbacks_t vgen_m_callbacks = {
457 	VGEN_M_CALLBACK_FLAGS,
458 	vgen_stat,
459 	vgen_start,
460 	vgen_stop,
461 	vgen_promisc,
462 	vgen_multicst,
463 	vgen_unicst,
464 	vgen_tx,
465 	vgen_ioctl,
466 	NULL,
467 	NULL
468 };
469 
470 /* externs */
471 extern pri_t	maxclsyspri;
472 extern proc_t	p0;
473 extern uint32_t vnet_ntxds;
474 extern uint32_t vnet_ldcwd_interval;
475 extern uint32_t vnet_ldcwd_txtimeout;
476 extern uint32_t vnet_ldc_mtu;
477 extern uint32_t vnet_nrbufs;
478 extern uint32_t	vnet_ethermtu;
479 extern uint16_t	vnet_default_vlan_id;
480 extern boolean_t vnet_jumbo_rxpools;
481 
482 #ifdef DEBUG
483 
484 extern int vnet_dbglevel;
485 static void debug_printf(const char *fname, vgen_t *vgenp,
486 	vgen_ldc_t *ldcp, const char *fmt, ...);
487 
488 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
489 int vgendbg_ldcid = -1;
490 
491 /* simulate handshake error conditions for debug */
492 uint32_t vgen_hdbg;
493 #define	HDBG_VERSION	0x1
494 #define	HDBG_TIMEOUT	0x2
495 #define	HDBG_BAD_SID	0x4
496 #define	HDBG_OUT_STATE	0x8
497 
498 #endif
499 
500 /*
501  * vgen_init() is called by an instance of vnet driver to initialize the
502  * corresponding generic proxy transport layer. The arguments passed by vnet
503  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
504  * the mac address of the vnet device, and a pointer to vgen_t is passed
505  * back as a handle to vnet.
506  */
507 int
508 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
509     const uint8_t *macaddr, void **vgenhdl)
510 {
511 	vgen_t *vgenp;
512 	int instance;
513 	int rv;
514 
515 	if ((vnetp == NULL) || (vnetdip == NULL))
516 		return (DDI_FAILURE);
517 
518 	instance = ddi_get_instance(vnetdip);
519 
520 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
521 
522 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
523 
524 	vgenp->vnetp = vnetp;
525 	vgenp->instance = instance;
526 	vgenp->regprop = regprop;
527 	vgenp->vnetdip = vnetdip;
528 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
529 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
530 
531 	/* allocate multicast table */
532 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
533 	    sizeof (struct ether_addr), KM_SLEEP);
534 	vgenp->mccount = 0;
535 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
536 
537 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
538 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
539 
540 	rv = vgen_read_mdprops(vgenp);
541 	if (rv != 0) {
542 		goto vgen_init_fail;
543 	}
544 	*vgenhdl = (void *)vgenp;
545 
546 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
547 	return (DDI_SUCCESS);
548 
549 vgen_init_fail:
550 	rw_destroy(&vgenp->vgenports.rwlock);
551 	mutex_destroy(&vgenp->lock);
552 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
553 	    sizeof (struct ether_addr));
554 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
555 		kmem_free(vgenp->pri_types,
556 		    sizeof (uint16_t) * vgenp->pri_num_types);
557 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
558 	}
559 	KMEM_FREE(vgenp);
560 	return (DDI_FAILURE);
561 }
562 
563 int
564 vgen_init_mdeg(void *arg)
565 {
566 	vgen_t	*vgenp = (vgen_t *)arg;
567 
568 	/* register with MD event generator */
569 	return (vgen_mdeg_reg(vgenp));
570 }
571 
572 /*
573  * Called by vnet to undo the initializations done by vgen_init().
574  * The handle provided by generic transport during vgen_init() is the argument.
575  */
576 void
577 vgen_uninit(void *arg)
578 {
579 	vgen_t		*vgenp = (vgen_t *)arg;
580 	vio_mblk_pool_t	*rp;
581 	vio_mblk_pool_t	*nrp;
582 
583 	if (vgenp == NULL) {
584 		return;
585 	}
586 
587 	DBG1(vgenp, NULL, "enter\n");
588 
589 	/* unregister with MD event generator */
590 	vgen_mdeg_unreg(vgenp);
591 
592 	mutex_enter(&vgenp->lock);
593 
594 	/* detach all ports from the device */
595 	vgen_detach_ports(vgenp);
596 
597 	/*
598 	 * free any pending rx mblk pools,
599 	 * that couldn't be freed previously during channel detach.
600 	 */
601 	rp = vgenp->rmp;
602 	while (rp != NULL) {
603 		nrp = vgenp->rmp = rp->nextp;
604 		if (vio_destroy_mblks(rp)) {
605 			WRITE_ENTER(&vgen_rw);
606 			rp->nextp = vgen_rx_poolp;
607 			vgen_rx_poolp = rp;
608 			RW_EXIT(&vgen_rw);
609 		}
610 		rp = nrp;
611 	}
612 
613 	/* free multicast table */
614 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
615 
616 	/* free pri_types table */
617 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
618 		kmem_free(vgenp->pri_types,
619 		    sizeof (uint16_t) * vgenp->pri_num_types);
620 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
621 	}
622 
623 	mutex_exit(&vgenp->lock);
624 
625 	rw_destroy(&vgenp->vgenports.rwlock);
626 	mutex_destroy(&vgenp->lock);
627 
628 	DBG1(vgenp, NULL, "exit\n");
629 	KMEM_FREE(vgenp);
630 }
631 
632 /*
633  * module specific initialization common to all instances of vnet/vgen.
634  */
635 void
636 vgen_mod_init(void)
637 {
638 	rw_init(&vgen_rw, NULL, RW_DRIVER, NULL);
639 }
640 
641 /*
642  * module specific cleanup common to all instances of vnet/vgen.
643  */
644 int
645 vgen_mod_cleanup(void)
646 {
647 	vio_mblk_pool_t	*poolp, *npoolp;
648 
649 	/*
650 	 * If any rx mblk pools are still in use, return
651 	 * error and stop the module from unloading.
652 	 */
653 	WRITE_ENTER(&vgen_rw);
654 	poolp = vgen_rx_poolp;
655 	while (poolp != NULL) {
656 		npoolp = vgen_rx_poolp = poolp->nextp;
657 		if (vio_destroy_mblks(poolp) != 0) {
658 			vgen_rx_poolp = poolp;
659 			RW_EXIT(&vgen_rw);
660 			return (EBUSY);
661 		}
662 		poolp = npoolp;
663 	}
664 	RW_EXIT(&vgen_rw);
665 
666 	return (0);
667 }
668 
669 /*
670  * module specific uninitialization common to all instances of vnet/vgen.
671  */
672 void
673 vgen_mod_fini(void)
674 {
675 	rw_destroy(&vgen_rw);
676 }
677 
678 /* enable transmit/receive for the device */
679 int
680 vgen_start(void *arg)
681 {
682 	vgen_port_t	*portp = (vgen_port_t *)arg;
683 	vgen_t		*vgenp = portp->vgenp;
684 
685 	DBG1(vgenp, NULL, "enter\n");
686 	mutex_enter(&portp->lock);
687 	vgen_port_init(portp);
688 	portp->flags |= VGEN_STARTED;
689 	mutex_exit(&portp->lock);
690 	DBG1(vgenp, NULL, "exit\n");
691 
692 	return (DDI_SUCCESS);
693 }
694 
695 /* stop transmit/receive */
696 void
697 vgen_stop(void *arg)
698 {
699 	vgen_port_t	*portp = (vgen_port_t *)arg;
700 	vgen_t		*vgenp = portp->vgenp;
701 
702 	DBG1(vgenp, NULL, "enter\n");
703 
704 	mutex_enter(&portp->lock);
705 	vgen_port_uninit(portp);
706 	portp->flags &= ~(VGEN_STARTED);
707 	mutex_exit(&portp->lock);
708 	DBG1(vgenp, NULL, "exit\n");
709 
710 }
711 
712 /* vgen transmit function */
713 static mblk_t *
714 vgen_tx(void *arg, mblk_t *mp)
715 {
716 	int i;
717 	vgen_port_t *portp;
718 	int status = VGEN_FAILURE;
719 
720 	portp = (vgen_port_t *)arg;
721 	/*
722 	 * Retry so that we avoid reporting a failure
723 	 * to the upper layer. Returning a failure may cause the
724 	 * upper layer to go into single threaded mode there by
725 	 * causing performance degradation, especially for a large
726 	 * number of connections.
727 	 */
728 	for (i = 0; i < vgen_tx_retries; ) {
729 		status = vgen_portsend(portp, mp);
730 		if (status == VGEN_SUCCESS) {
731 			break;
732 		}
733 		if (++i < vgen_tx_retries)
734 			delay(drv_usectohz(vgen_tx_delay));
735 	}
736 	if (status != VGEN_SUCCESS) {
737 		/* failure */
738 		return (mp);
739 	}
740 	/* success */
741 	return (NULL);
742 }
743 
744 /*
745  * This function provides any necessary tagging/untagging of the frames
746  * that are being transmitted over the port. It first verifies the vlan
747  * membership of the destination(port) and drops the packet if the
748  * destination doesn't belong to the given vlan.
749  *
750  * Arguments:
751  *   portp:     port over which the frames should be transmitted
752  *   mp:        frame to be transmitted
753  *   is_tagged:
754  *              B_TRUE: indicates frame header contains the vlan tag already.
755  *              B_FALSE: indicates frame is untagged.
756  *   vid:       vlan in which the frame should be transmitted.
757  *
758  * Returns:
759  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
760  *              Failure: NULL
761  */
762 static mblk_t *
763 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
764 	uint16_t vid)
765 {
766 	vgen_t				*vgenp;
767 	boolean_t			dst_tagged;
768 	int				rv;
769 
770 	vgenp = portp->vgenp;
771 
772 	/*
773 	 * If the packet is going to a vnet:
774 	 *   Check if the destination vnet is in the same vlan.
775 	 *   Check the frame header if tag or untag is needed.
776 	 *
777 	 * We do not check the above conditions if the packet is going to vsw:
778 	 *   vsw must be present implicitly in all the vlans that a vnet device
779 	 *   is configured into; even if vsw itself is not assigned to those
780 	 *   vlans as an interface. For instance, the packet might be destined
781 	 *   to another vnet(indirectly through vsw) or to an external host
782 	 *   which is in the same vlan as this vnet and vsw itself may not be
783 	 *   present in that vlan. Similarly packets going to vsw must be
784 	 *   always tagged(unless in the default-vlan) if not already tagged,
785 	 *   as we do not know the final destination. This is needed because
786 	 *   vsw must always invoke its switching function only after tagging
787 	 *   the packet; otherwise after switching function determines the
788 	 *   destination we cannot figure out if the destination belongs to the
789 	 *   the same vlan that the frame originated from and if it needs tag/
790 	 *   untag. Note that vsw will tag the packet itself when it receives
791 	 *   it over the channel from a client if needed. However, that is
792 	 *   needed only in the case of vlan unaware clients such as obp or
793 	 *   earlier versions of vnet.
794 	 *
795 	 */
796 	if (portp != vgenp->vsw_portp) {
797 		/*
798 		 * Packet going to a vnet. Check if the destination vnet is in
799 		 * the same vlan. Then check the frame header if tag/untag is
800 		 * needed.
801 		 */
802 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
803 		if (rv == B_FALSE) {
804 			/* drop the packet */
805 			freemsg(mp);
806 			return (NULL);
807 		}
808 
809 		/* is the destination tagged or untagged in this vlan? */
810 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
811 		    (dst_tagged = B_TRUE);
812 
813 		if (is_tagged == dst_tagged) {
814 			/* no tagging/untagging needed */
815 			return (mp);
816 		}
817 
818 		if (is_tagged == B_TRUE) {
819 			/* frame is tagged; destination needs untagged */
820 			mp = vnet_vlan_remove_tag(mp);
821 			return (mp);
822 		}
823 
824 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
825 	}
826 
827 	/*
828 	 * Packet going to a vnet needs tagging.
829 	 * OR
830 	 * If the packet is going to vsw, then it must be tagged in all cases:
831 	 * unknown unicast, broadcast/multicast or to vsw interface.
832 	 */
833 
834 	if (is_tagged == B_FALSE) {
835 		mp = vnet_vlan_insert_tag(mp, vid);
836 	}
837 
838 	return (mp);
839 }
840 
841 /* transmit packets over the given port */
842 static int
843 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
844 {
845 	vgen_ldclist_t		*ldclp;
846 	vgen_ldc_t		*ldcp;
847 	int			status;
848 	int			rv = VGEN_SUCCESS;
849 	vgen_t			*vgenp = portp->vgenp;
850 	vnet_t			*vnetp = vgenp->vnetp;
851 	boolean_t		is_tagged;
852 	boolean_t		dec_refcnt = B_FALSE;
853 	uint16_t		vlan_id;
854 	struct ether_header	*ehp;
855 
856 	if (portp->use_vsw_port) {
857 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
858 		portp = portp->vgenp->vsw_portp;
859 		dec_refcnt = B_TRUE;
860 	}
861 	if (portp == NULL) {
862 		return (VGEN_FAILURE);
863 	}
864 
865 	/*
866 	 * Determine the vlan id that the frame belongs to.
867 	 */
868 	ehp = (struct ether_header *)mp->b_rptr;
869 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
870 
871 	if (vlan_id == vnetp->default_vlan_id) {
872 
873 		/* Frames in default vlan must be untagged */
874 		ASSERT(is_tagged == B_FALSE);
875 
876 		/*
877 		 * If the destination is a vnet-port verify it belongs to the
878 		 * default vlan; otherwise drop the packet. We do not need
879 		 * this check for vsw-port, as it should implicitly belong to
880 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
881 		 */
882 		if (portp != vgenp->vsw_portp &&
883 		    portp->pvid != vnetp->default_vlan_id) {
884 			freemsg(mp);
885 			goto portsend_ret;
886 		}
887 
888 	} else {	/* frame not in default-vlan */
889 
890 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
891 		if (mp == NULL) {
892 			goto portsend_ret;
893 		}
894 
895 	}
896 
897 	ldclp = &portp->ldclist;
898 	READ_ENTER(&ldclp->rwlock);
899 	/*
900 	 * NOTE: for now, we will assume we have a single channel.
901 	 */
902 	if (ldclp->headp == NULL) {
903 		RW_EXIT(&ldclp->rwlock);
904 		rv = VGEN_FAILURE;
905 		goto portsend_ret;
906 	}
907 	ldcp = ldclp->headp;
908 
909 	status = ldcp->tx(ldcp, mp);
910 
911 	RW_EXIT(&ldclp->rwlock);
912 
913 	if (status != VGEN_TX_SUCCESS) {
914 		rv = VGEN_FAILURE;
915 	}
916 
917 portsend_ret:
918 	if (dec_refcnt == B_TRUE) {
919 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
920 	}
921 	return (rv);
922 }
923 
924 /*
925  * Wrapper function to transmit normal and/or priority frames over the channel.
926  */
927 static int
928 vgen_ldcsend(void *arg, mblk_t *mp)
929 {
930 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
931 	int			status;
932 	struct ether_header	*ehp;
933 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
934 	uint32_t		num_types;
935 	uint16_t		*types;
936 	int			i;
937 
938 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
939 
940 	num_types = vgenp->pri_num_types;
941 	types = vgenp->pri_types;
942 	ehp = (struct ether_header *)mp->b_rptr;
943 
944 	for (i = 0; i < num_types; i++) {
945 
946 		if (ehp->ether_type == types[i]) {
947 			/* priority frame, use pri tx function */
948 			vgen_ldcsend_pkt(ldcp, mp);
949 			return (VGEN_SUCCESS);
950 		}
951 
952 	}
953 
954 	status  = vgen_ldcsend_dring(ldcp, mp);
955 
956 	return (status);
957 }
958 
959 /*
960  * This functions handles ldc channel reset while in the context
961  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
962  */
963 static void
964 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
965 {
966 	ldc_status_t	istatus;
967 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
968 
969 	if (mutex_tryenter(&ldcp->cblock)) {
970 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
971 			DWARN(vgenp, ldcp, "ldc_status() error\n");
972 		} else {
973 			ldcp->ldc_status = istatus;
974 		}
975 		if (ldcp->ldc_status != LDC_UP) {
976 			vgen_handle_evt_reset(ldcp);
977 		}
978 		mutex_exit(&ldcp->cblock);
979 	}
980 }
981 
982 /*
983  * This function transmits the frame in the payload of a raw data
984  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
985  * send special frames with high priorities, without going through
986  * the normal data path which uses descriptor ring mechanism.
987  */
988 static void
989 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
990 {
991 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
992 	vio_raw_data_msg_t	*pkt;
993 	mblk_t			*bp;
994 	mblk_t			*nmp = NULL;
995 	caddr_t			dst;
996 	uint32_t		mblksz;
997 	uint32_t		size;
998 	uint32_t		nbytes;
999 	int			rv;
1000 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1001 	vgen_stats_t		*statsp = &ldcp->stats;
1002 
1003 	/* drop the packet if ldc is not up or handshake is not done */
1004 	if (ldcp->ldc_status != LDC_UP) {
1005 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1006 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1007 		    ldcp->ldc_status);
1008 		goto send_pkt_exit;
1009 	}
1010 
1011 	if (ldcp->hphase != VH_DONE) {
1012 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1013 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1014 		    ldcp->hphase);
1015 		goto send_pkt_exit;
1016 	}
1017 
1018 	size = msgsize(mp);
1019 
1020 	/* frame size bigger than available payload len of raw data msg ? */
1021 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
1022 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1023 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1024 		goto send_pkt_exit;
1025 	}
1026 
1027 	if (size < ETHERMIN)
1028 		size = ETHERMIN;
1029 
1030 	/* alloc space for a raw data message */
1031 	nmp = vio_allocb(vgenp->pri_tx_vmp);
1032 	if (nmp == NULL) {
1033 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1034 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
1035 		goto send_pkt_exit;
1036 	}
1037 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
1038 
1039 	/* copy frame into the payload of raw data message */
1040 	dst = (caddr_t)pkt->data;
1041 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1042 		mblksz = MBLKL(bp);
1043 		bcopy(bp->b_rptr, dst, mblksz);
1044 		dst += mblksz;
1045 	}
1046 
1047 	/* setup the raw data msg */
1048 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
1049 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
1050 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
1051 	pkt->tag.vio_sid = ldcp->local_sid;
1052 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
1053 
1054 	/* send the msg over ldc */
1055 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
1056 	if (rv != VGEN_SUCCESS) {
1057 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1058 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
1059 		if (rv == ECONNRESET) {
1060 			vgen_ldcsend_process_reset(ldcp);
1061 		}
1062 		goto send_pkt_exit;
1063 	}
1064 
1065 	/* update stats */
1066 	(void) atomic_inc_64(&statsp->tx_pri_packets);
1067 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
1068 
1069 send_pkt_exit:
1070 	if (nmp != NULL)
1071 		freemsg(nmp);
1072 	freemsg(mp);
1073 }
1074 
1075 /*
1076  * This function transmits normal (non-priority) data frames over
1077  * the channel. It queues the frame into the transmit descriptor ring
1078  * and sends a VIO_DRING_DATA message if needed, to wake up the
1079  * peer to (re)start processing.
1080  */
1081 static int
1082 vgen_ldcsend_dring(void *arg, mblk_t *mp)
1083 {
1084 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
1085 	vgen_private_desc_t	*tbufp;
1086 	vgen_private_desc_t	*rtbufp;
1087 	vnet_public_desc_t	*rtxdp;
1088 	vgen_private_desc_t	*ntbufp;
1089 	vnet_public_desc_t	*txdp;
1090 	vio_dring_entry_hdr_t	*hdrp;
1091 	vgen_stats_t		*statsp;
1092 	struct ether_header	*ehp;
1093 	boolean_t		is_bcast = B_FALSE;
1094 	boolean_t		is_mcast = B_FALSE;
1095 	size_t			mblksz;
1096 	caddr_t			dst;
1097 	mblk_t			*bp;
1098 	size_t			size;
1099 	int			rv = 0;
1100 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1101 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1102 
1103 	statsp = &ldcp->stats;
1104 	size = msgsize(mp);
1105 
1106 	DBG1(vgenp, ldcp, "enter\n");
1107 
1108 	if (ldcp->ldc_status != LDC_UP) {
1109 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1110 		    ldcp->ldc_status);
1111 		/* retry ldc_up() if needed */
1112 #ifdef	VNET_IOC_DEBUG
1113 		if (ldcp->flags & CHANNEL_STARTED && !ldcp->link_down_forced) {
1114 #else
1115 		if (ldcp->flags & CHANNEL_STARTED) {
1116 #endif
1117 			(void) ldc_up(ldcp->ldc_handle);
1118 		}
1119 		goto send_dring_exit;
1120 	}
1121 
1122 	/* drop the packet if ldc is not up or handshake is not done */
1123 	if (ldcp->hphase != VH_DONE) {
1124 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1125 		    ldcp->hphase);
1126 		goto send_dring_exit;
1127 	}
1128 
1129 	if (size > (size_t)lp->mtu) {
1130 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1131 		goto send_dring_exit;
1132 	}
1133 	if (size < ETHERMIN)
1134 		size = ETHERMIN;
1135 
1136 	ehp = (struct ether_header *)mp->b_rptr;
1137 	is_bcast = IS_BROADCAST(ehp);
1138 	is_mcast = IS_MULTICAST(ehp);
1139 
1140 	mutex_enter(&ldcp->txlock);
1141 	/*
1142 	 * allocate a descriptor
1143 	 */
1144 	tbufp = ldcp->next_tbufp;
1145 	ntbufp = NEXTTBUF(ldcp, tbufp);
1146 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1147 
1148 		mutex_enter(&ldcp->tclock);
1149 		/* Try reclaiming now */
1150 		vgen_reclaim_dring(ldcp);
1151 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1152 
1153 		if (ntbufp == ldcp->cur_tbufp) {
1154 			/* Now we are really out of tbuf/txds */
1155 			ldcp->need_resched = B_TRUE;
1156 			mutex_exit(&ldcp->tclock);
1157 
1158 			statsp->tx_no_desc++;
1159 			mutex_exit(&ldcp->txlock);
1160 
1161 			return (VGEN_TX_NORESOURCES);
1162 		}
1163 		mutex_exit(&ldcp->tclock);
1164 	}
1165 	/* update next available tbuf in the ring and update tx index */
1166 	ldcp->next_tbufp = ntbufp;
1167 	INCR_TXI(ldcp->next_txi, ldcp);
1168 
1169 	/* Mark the buffer busy before releasing the lock */
1170 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1171 	mutex_exit(&ldcp->txlock);
1172 
1173 	/* copy data into pre-allocated transmit buffer */
1174 	dst = tbufp->datap + VNET_IPALIGN;
1175 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1176 		mblksz = MBLKL(bp);
1177 		bcopy(bp->b_rptr, dst, mblksz);
1178 		dst += mblksz;
1179 	}
1180 
1181 	tbufp->datalen = size;
1182 
1183 	/* initialize the corresponding public descriptor (txd) */
1184 	txdp = tbufp->descp;
1185 	hdrp = &txdp->hdr;
1186 	txdp->nbytes = size;
1187 	txdp->ncookies = tbufp->ncookies;
1188 	bcopy((tbufp->memcookie), (txdp->memcookie),
1189 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1190 
1191 	mutex_enter(&ldcp->wrlock);
1192 	/*
1193 	 * If the flags not set to BUSY, it implies that the clobber
1194 	 * was done while we were copying the data. In such case,
1195 	 * discard the packet and return.
1196 	 */
1197 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1198 		statsp->oerrors++;
1199 		mutex_exit(&ldcp->wrlock);
1200 		goto send_dring_exit;
1201 	}
1202 	hdrp->dstate = VIO_DESC_READY;
1203 
1204 	/* update stats */
1205 	statsp->opackets++;
1206 	statsp->obytes += size;
1207 	if (is_bcast)
1208 		statsp->brdcstxmt++;
1209 	else if (is_mcast)
1210 		statsp->multixmt++;
1211 
1212 	/* send dring datamsg to the peer */
1213 	if (ldcp->resched_peer) {
1214 
1215 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1216 		rtxdp = rtbufp->descp;
1217 
1218 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1219 
1220 			rv = vgen_send_dring_data(ldcp,
1221 			    (uint32_t)ldcp->resched_peer_txi, -1);
1222 			if (rv != 0) {
1223 				/* error: drop the packet */
1224 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1225 				    "failed: rv(%d) len(%d)\n",
1226 				    ldcp->ldc_id, rv, size);
1227 				statsp->oerrors++;
1228 			} else {
1229 				ldcp->resched_peer = B_FALSE;
1230 			}
1231 
1232 		}
1233 
1234 	}
1235 
1236 	mutex_exit(&ldcp->wrlock);
1237 
1238 send_dring_exit:
1239 	if (rv == ECONNRESET) {
1240 		vgen_ldcsend_process_reset(ldcp);
1241 	}
1242 	freemsg(mp);
1243 	DBG1(vgenp, ldcp, "exit\n");
1244 	return (VGEN_TX_SUCCESS);
1245 }
1246 
1247 /* enable/disable a multicast address */
1248 int
1249 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1250 {
1251 	vgen_t			*vgenp;
1252 	vnet_mcast_msg_t	mcastmsg;
1253 	vio_msg_tag_t		*tagp;
1254 	vgen_port_t		*portp;
1255 	vgen_portlist_t		*plistp;
1256 	vgen_ldc_t		*ldcp;
1257 	vgen_ldclist_t		*ldclp;
1258 	struct ether_addr	*addrp;
1259 	int			rv = DDI_FAILURE;
1260 	uint32_t		i;
1261 
1262 	portp = (vgen_port_t *)arg;
1263 	vgenp = portp->vgenp;
1264 
1265 	if (portp != vgenp->vsw_portp) {
1266 		return (DDI_SUCCESS);
1267 	}
1268 
1269 	addrp = (struct ether_addr *)mca;
1270 	tagp = &mcastmsg.tag;
1271 	bzero(&mcastmsg, sizeof (mcastmsg));
1272 
1273 	mutex_enter(&vgenp->lock);
1274 
1275 	plistp = &(vgenp->vgenports);
1276 
1277 	READ_ENTER(&plistp->rwlock);
1278 
1279 	portp = vgenp->vsw_portp;
1280 	if (portp == NULL) {
1281 		RW_EXIT(&plistp->rwlock);
1282 		mutex_exit(&vgenp->lock);
1283 		return (rv);
1284 	}
1285 	ldclp = &portp->ldclist;
1286 
1287 	READ_ENTER(&ldclp->rwlock);
1288 
1289 	ldcp = ldclp->headp;
1290 	if (ldcp == NULL)
1291 		goto vgen_mcast_exit;
1292 
1293 	mutex_enter(&ldcp->cblock);
1294 
1295 	if (ldcp->hphase == VH_DONE) {
1296 		/*
1297 		 * If handshake is done, send a msg to vsw to add/remove
1298 		 * the multicast address. Otherwise, we just update this
1299 		 * mcast address in our table and the table will be sync'd
1300 		 * with vsw when handshake completes.
1301 		 */
1302 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1303 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1304 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1305 		tagp->vio_sid = ldcp->local_sid;
1306 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1307 		mcastmsg.set = add;
1308 		mcastmsg.count = 1;
1309 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1310 		    B_FALSE) != VGEN_SUCCESS) {
1311 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1312 			mutex_exit(&ldcp->cblock);
1313 			goto vgen_mcast_exit;
1314 		}
1315 	}
1316 
1317 	mutex_exit(&ldcp->cblock);
1318 
1319 	if (add) {
1320 
1321 		/* expand multicast table if necessary */
1322 		if (vgenp->mccount >= vgenp->mcsize) {
1323 			struct ether_addr	*newtab;
1324 			uint32_t		newsize;
1325 
1326 
1327 			newsize = vgenp->mcsize * 2;
1328 
1329 			newtab = kmem_zalloc(newsize *
1330 			    sizeof (struct ether_addr), KM_NOSLEEP);
1331 			if (newtab == NULL)
1332 				goto vgen_mcast_exit;
1333 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1334 			    sizeof (struct ether_addr));
1335 			kmem_free(vgenp->mctab,
1336 			    vgenp->mcsize * sizeof (struct ether_addr));
1337 
1338 			vgenp->mctab = newtab;
1339 			vgenp->mcsize = newsize;
1340 		}
1341 
1342 		/* add address to the table */
1343 		vgenp->mctab[vgenp->mccount++] = *addrp;
1344 
1345 	} else {
1346 
1347 		/* delete address from the table */
1348 		for (i = 0; i < vgenp->mccount; i++) {
1349 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1350 
1351 				/*
1352 				 * If there's more than one address in this
1353 				 * table, delete the unwanted one by moving
1354 				 * the last one in the list over top of it;
1355 				 * otherwise, just remove it.
1356 				 */
1357 				if (vgenp->mccount > 1) {
1358 					vgenp->mctab[i] =
1359 					    vgenp->mctab[vgenp->mccount-1];
1360 				}
1361 				vgenp->mccount--;
1362 				break;
1363 			}
1364 		}
1365 	}
1366 
1367 	rv = DDI_SUCCESS;
1368 
1369 vgen_mcast_exit:
1370 	RW_EXIT(&ldclp->rwlock);
1371 	RW_EXIT(&plistp->rwlock);
1372 
1373 	mutex_exit(&vgenp->lock);
1374 	return (rv);
1375 }
1376 
1377 /* set or clear promiscuous mode on the device */
1378 static int
1379 vgen_promisc(void *arg, boolean_t on)
1380 {
1381 	_NOTE(ARGUNUSED(arg, on))
1382 	return (DDI_SUCCESS);
1383 }
1384 
1385 /* set the unicast mac address of the device */
1386 static int
1387 vgen_unicst(void *arg, const uint8_t *mca)
1388 {
1389 	_NOTE(ARGUNUSED(arg, mca))
1390 	return (DDI_SUCCESS);
1391 }
1392 
1393 /* get device statistics */
1394 int
1395 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1396 {
1397 	vgen_port_t	*portp = (vgen_port_t *)arg;
1398 
1399 	*val = vgen_port_stat(portp, stat);
1400 
1401 	return (0);
1402 }
1403 
1404 /* vgen internal functions */
1405 /* detach all ports from the device */
1406 static void
1407 vgen_detach_ports(vgen_t *vgenp)
1408 {
1409 	vgen_port_t	*portp;
1410 	vgen_portlist_t	*plistp;
1411 
1412 	plistp = &(vgenp->vgenports);
1413 	WRITE_ENTER(&plistp->rwlock);
1414 	while ((portp = plistp->headp) != NULL) {
1415 		vgen_port_detach(portp);
1416 	}
1417 	RW_EXIT(&plistp->rwlock);
1418 }
1419 
1420 /*
1421  * detach the given port.
1422  */
1423 static void
1424 vgen_port_detach(vgen_port_t *portp)
1425 {
1426 	vgen_t		*vgenp;
1427 	vgen_ldclist_t	*ldclp;
1428 	int		port_num;
1429 
1430 	vgenp = portp->vgenp;
1431 	port_num = portp->port_num;
1432 
1433 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1434 
1435 	/*
1436 	 * If this port is connected to the vswitch, then
1437 	 * potentially there could be ports that may be using
1438 	 * this port to transmit packets. To address this do
1439 	 * the following:
1440 	 *	- First set vgenp->vsw_portp to NULL, so that
1441 	 *	  its not used after that.
1442 	 *	- Then wait for the refcnt to go down to 0.
1443 	 *	- Now we can safely detach this port.
1444 	 */
1445 	if (vgenp->vsw_portp == portp) {
1446 		vgenp->vsw_portp = NULL;
1447 		while (vgenp->vsw_port_refcnt > 0) {
1448 			delay(drv_usectohz(vgen_tx_delay));
1449 		}
1450 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1451 	}
1452 
1453 	if (portp->vhp != NULL) {
1454 		vio_net_resource_unreg(portp->vhp);
1455 		portp->vhp = NULL;
1456 	}
1457 
1458 	vgen_vlan_destroy_hash(portp);
1459 
1460 	/* remove it from port list */
1461 	vgen_port_list_remove(portp);
1462 
1463 	/* detach channels from this port */
1464 	ldclp = &portp->ldclist;
1465 	WRITE_ENTER(&ldclp->rwlock);
1466 	while (ldclp->headp) {
1467 		vgen_ldc_detach(ldclp->headp);
1468 	}
1469 	RW_EXIT(&ldclp->rwlock);
1470 	rw_destroy(&ldclp->rwlock);
1471 
1472 	if (portp->num_ldcs != 0) {
1473 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1474 		portp->num_ldcs = 0;
1475 	}
1476 
1477 	mutex_destroy(&portp->lock);
1478 	KMEM_FREE(portp);
1479 
1480 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1481 }
1482 
1483 /* add a port to port list */
1484 static void
1485 vgen_port_list_insert(vgen_port_t *portp)
1486 {
1487 	vgen_portlist_t *plistp;
1488 	vgen_t *vgenp;
1489 
1490 	vgenp = portp->vgenp;
1491 	plistp = &(vgenp->vgenports);
1492 
1493 	if (plistp->headp == NULL) {
1494 		plistp->headp = portp;
1495 	} else {
1496 		plistp->tailp->nextp = portp;
1497 	}
1498 	plistp->tailp = portp;
1499 	portp->nextp = NULL;
1500 }
1501 
1502 /* remove a port from port list */
1503 static void
1504 vgen_port_list_remove(vgen_port_t *portp)
1505 {
1506 	vgen_port_t *prevp;
1507 	vgen_port_t *nextp;
1508 	vgen_portlist_t *plistp;
1509 	vgen_t *vgenp;
1510 
1511 	vgenp = portp->vgenp;
1512 
1513 	plistp = &(vgenp->vgenports);
1514 
1515 	if (plistp->headp == NULL)
1516 		return;
1517 
1518 	if (portp == plistp->headp) {
1519 		plistp->headp = portp->nextp;
1520 		if (portp == plistp->tailp)
1521 			plistp->tailp = plistp->headp;
1522 	} else {
1523 		for (prevp = plistp->headp;
1524 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1525 		    prevp = nextp)
1526 			;
1527 		if (nextp == portp) {
1528 			prevp->nextp = portp->nextp;
1529 		}
1530 		if (portp == plistp->tailp)
1531 			plistp->tailp = prevp;
1532 	}
1533 }
1534 
1535 /* lookup a port in the list based on port_num */
1536 static vgen_port_t *
1537 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1538 {
1539 	vgen_port_t *portp = NULL;
1540 
1541 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1542 		if (portp->port_num == port_num) {
1543 			break;
1544 		}
1545 	}
1546 
1547 	return (portp);
1548 }
1549 
1550 /* enable ports for transmit/receive */
1551 static void
1552 vgen_init_ports(vgen_t *vgenp)
1553 {
1554 	vgen_port_t	*portp;
1555 	vgen_portlist_t	*plistp;
1556 
1557 	plistp = &(vgenp->vgenports);
1558 	READ_ENTER(&plistp->rwlock);
1559 
1560 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1561 		vgen_port_init(portp);
1562 	}
1563 
1564 	RW_EXIT(&plistp->rwlock);
1565 }
1566 
1567 static void
1568 vgen_port_init(vgen_port_t *portp)
1569 {
1570 	/* Add the port to the specified vlans */
1571 	vgen_vlan_add_ids(portp);
1572 
1573 	/* Bring up the channels of this port */
1574 	vgen_init_ldcs(portp);
1575 }
1576 
1577 /* disable transmit/receive on ports */
1578 static void
1579 vgen_uninit_ports(vgen_t *vgenp)
1580 {
1581 	vgen_port_t	*portp;
1582 	vgen_portlist_t	*plistp;
1583 
1584 	plistp = &(vgenp->vgenports);
1585 	READ_ENTER(&plistp->rwlock);
1586 
1587 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1588 		vgen_port_uninit(portp);
1589 	}
1590 
1591 	RW_EXIT(&plistp->rwlock);
1592 }
1593 
1594 static void
1595 vgen_port_uninit(vgen_port_t *portp)
1596 {
1597 	vgen_uninit_ldcs(portp);
1598 
1599 	/* remove the port from vlans it has been assigned to */
1600 	vgen_vlan_remove_ids(portp);
1601 }
1602 
1603 /*
1604  * Scan the machine description for this instance of vnet
1605  * and read its properties. Called only from vgen_init().
1606  * Returns: 0 on success, 1 on failure.
1607  */
1608 static int
1609 vgen_read_mdprops(vgen_t *vgenp)
1610 {
1611 	vnet_t		*vnetp = vgenp->vnetp;
1612 	md_t		*mdp = NULL;
1613 	mde_cookie_t	rootnode;
1614 	mde_cookie_t	*listp = NULL;
1615 	uint64_t	cfgh;
1616 	char		*name;
1617 	int		rv = 1;
1618 	int		num_nodes = 0;
1619 	int		num_devs = 0;
1620 	int		listsz = 0;
1621 	int		i;
1622 
1623 	if ((mdp = md_get_handle()) == NULL) {
1624 		return (rv);
1625 	}
1626 
1627 	num_nodes = md_node_count(mdp);
1628 	ASSERT(num_nodes > 0);
1629 
1630 	listsz = num_nodes * sizeof (mde_cookie_t);
1631 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1632 
1633 	rootnode = md_root_node(mdp);
1634 
1635 	/* search for all "virtual_device" nodes */
1636 	num_devs = md_scan_dag(mdp, rootnode,
1637 	    md_find_name(mdp, vdev_propname),
1638 	    md_find_name(mdp, "fwd"), listp);
1639 	if (num_devs <= 0) {
1640 		goto vgen_readmd_exit;
1641 	}
1642 
1643 	/*
1644 	 * Now loop through the list of virtual-devices looking for
1645 	 * devices with name "network" and for each such device compare
1646 	 * its instance with what we have from the 'reg' property to
1647 	 * find the right node in MD and then read all its properties.
1648 	 */
1649 	for (i = 0; i < num_devs; i++) {
1650 
1651 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1652 			goto vgen_readmd_exit;
1653 		}
1654 
1655 		/* is this a "network" device? */
1656 		if (strcmp(name, vnet_propname) != 0)
1657 			continue;
1658 
1659 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1660 			goto vgen_readmd_exit;
1661 		}
1662 
1663 		/* is this the required instance of vnet? */
1664 		if (vgenp->regprop != cfgh)
1665 			continue;
1666 
1667 		/*
1668 		 * Read the 'linkprop' property to know if this vnet
1669 		 * device should get physical link updates from vswitch.
1670 		 */
1671 		vgen_linkprop_read(vgenp, mdp, listp[i],
1672 		    &vnetp->pls_update);
1673 
1674 		/*
1675 		 * Read the mtu. Note that we set the mtu of vnet device within
1676 		 * this routine itself, after validating the range.
1677 		 */
1678 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1679 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1680 			vnetp->mtu = ETHERMTU;
1681 		}
1682 		vgenp->max_frame_size = vnetp->mtu +
1683 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1684 
1685 		/* read priority ether types */
1686 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1687 
1688 		/* read vlan id properties of this vnet instance */
1689 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1690 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1691 		    &vnetp->default_vlan_id);
1692 
1693 		rv = 0;
1694 		break;
1695 	}
1696 
1697 vgen_readmd_exit:
1698 
1699 	kmem_free(listp, listsz);
1700 	(void) md_fini_handle(mdp);
1701 	return (rv);
1702 }
1703 
1704 /*
1705  * Read vlan id properties of the given MD node.
1706  * Arguments:
1707  *   arg:          device argument(vnet device or a port)
1708  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1709  *   mdp:          machine description
1710  *   node:         md node cookie
1711  *
1712  * Returns:
1713  *   pvidp:        port-vlan-id of the node
1714  *   vidspp:       list of vlan-ids of the node
1715  *   nvidsp:       # of vlan-ids in the list
1716  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1717  */
1718 static void
1719 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1720 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1721 	uint16_t *default_idp)
1722 {
1723 	vgen_t		*vgenp;
1724 	vnet_t		*vnetp;
1725 	vgen_port_t	*portp;
1726 	char		*pvid_propname;
1727 	char		*vid_propname;
1728 	uint_t		nvids;
1729 	uint32_t	vids_size;
1730 	int		rv;
1731 	int		i;
1732 	uint64_t	*data;
1733 	uint64_t	val;
1734 	int		size;
1735 	int		inst;
1736 
1737 	if (type == VGEN_LOCAL) {
1738 
1739 		vgenp = (vgen_t *)arg;
1740 		vnetp = vgenp->vnetp;
1741 		pvid_propname = vgen_pvid_propname;
1742 		vid_propname = vgen_vid_propname;
1743 		inst = vnetp->instance;
1744 
1745 	} else if (type == VGEN_PEER) {
1746 
1747 		portp = (vgen_port_t *)arg;
1748 		vgenp = portp->vgenp;
1749 		vnetp = vgenp->vnetp;
1750 		pvid_propname = port_pvid_propname;
1751 		vid_propname = port_vid_propname;
1752 		inst = portp->port_num;
1753 
1754 	} else {
1755 		return;
1756 	}
1757 
1758 	if (type == VGEN_LOCAL && default_idp != NULL) {
1759 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1760 		if (rv != 0) {
1761 			DWARN(vgenp, NULL, "prop(%s) not found",
1762 			    vgen_dvid_propname);
1763 
1764 			*default_idp = vnet_default_vlan_id;
1765 		} else {
1766 			*default_idp = val & 0xFFF;
1767 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1768 			    inst, *default_idp);
1769 		}
1770 	}
1771 
1772 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1773 	if (rv != 0) {
1774 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1775 		*pvidp = vnet_default_vlan_id;
1776 	} else {
1777 
1778 		*pvidp = val & 0xFFF;
1779 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1780 		    pvid_propname, inst, *pvidp);
1781 	}
1782 
1783 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1784 	    &size);
1785 	if (rv != 0) {
1786 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1787 		size = 0;
1788 	} else {
1789 		size /= sizeof (uint64_t);
1790 	}
1791 	nvids = size;
1792 
1793 	if (nvids != 0) {
1794 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1795 		vids_size = sizeof (uint16_t) * nvids;
1796 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1797 		for (i = 0; i < nvids; i++) {
1798 			(*vidspp)[i] = data[i] & 0xFFFF;
1799 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1800 		}
1801 		DBG2(vgenp, NULL, "\n");
1802 	}
1803 
1804 	*nvidsp = nvids;
1805 }
1806 
1807 /*
1808  * Create a vlan id hash table for the given port.
1809  */
1810 static void
1811 vgen_vlan_create_hash(vgen_port_t *portp)
1812 {
1813 	char		hashname[MAXNAMELEN];
1814 
1815 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1816 	    portp->port_num);
1817 
1818 	portp->vlan_nchains = vgen_vlan_nchains;
1819 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1820 	    portp->vlan_nchains, mod_hash_null_valdtor);
1821 }
1822 
1823 /*
1824  * Destroy the vlan id hash table in the given port.
1825  */
1826 static void
1827 vgen_vlan_destroy_hash(vgen_port_t *portp)
1828 {
1829 	if (portp->vlan_hashp != NULL) {
1830 		mod_hash_destroy_hash(portp->vlan_hashp);
1831 		portp->vlan_hashp = NULL;
1832 		portp->vlan_nchains = 0;
1833 	}
1834 }
1835 
1836 /*
1837  * Add a port to the vlans specified in its port properites.
1838  */
1839 static void
1840 vgen_vlan_add_ids(vgen_port_t *portp)
1841 {
1842 	int		rv;
1843 	int		i;
1844 
1845 	rv = mod_hash_insert(portp->vlan_hashp,
1846 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1847 	    (mod_hash_val_t)B_TRUE);
1848 	ASSERT(rv == 0);
1849 
1850 	for (i = 0; i < portp->nvids; i++) {
1851 		rv = mod_hash_insert(portp->vlan_hashp,
1852 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1853 		    (mod_hash_val_t)B_TRUE);
1854 		ASSERT(rv == 0);
1855 	}
1856 }
1857 
1858 /*
1859  * Remove a port from the vlans it has been assigned to.
1860  */
1861 static void
1862 vgen_vlan_remove_ids(vgen_port_t *portp)
1863 {
1864 	int		rv;
1865 	int		i;
1866 	mod_hash_val_t	vp;
1867 
1868 	rv = mod_hash_remove(portp->vlan_hashp,
1869 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1870 	    (mod_hash_val_t *)&vp);
1871 	ASSERT(rv == 0);
1872 
1873 	for (i = 0; i < portp->nvids; i++) {
1874 		rv = mod_hash_remove(portp->vlan_hashp,
1875 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1876 		    (mod_hash_val_t *)&vp);
1877 		ASSERT(rv == 0);
1878 	}
1879 }
1880 
1881 /*
1882  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1883  * then the vlan-id is available in the tag; otherwise, its vlan id is
1884  * implicitly obtained from the port-vlan-id of the vnet device.
1885  * The vlan id determined is returned in vidp.
1886  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1887  */
1888 static boolean_t
1889 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1890 {
1891 	struct ether_vlan_header	*evhp;
1892 
1893 	/* If it's a tagged frame, get the vlan id from vlan header */
1894 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1895 
1896 		evhp = (struct ether_vlan_header *)ehp;
1897 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1898 		return (B_TRUE);
1899 	}
1900 
1901 	/* Untagged frame, vlan-id is the pvid of vnet device */
1902 	*vidp = vnetp->pvid;
1903 	return (B_FALSE);
1904 }
1905 
1906 /*
1907  * Find the given vlan id in the hash table.
1908  * Return: B_TRUE if the id is found; B_FALSE if not found.
1909  */
1910 static boolean_t
1911 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1912 {
1913 	int		rv;
1914 	mod_hash_val_t	vp;
1915 
1916 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1917 
1918 	if (rv != 0)
1919 		return (B_FALSE);
1920 
1921 	return (B_TRUE);
1922 }
1923 
1924 /*
1925  * This function reads "priority-ether-types" property from md. This property
1926  * is used to enable support for priority frames. Applications which need
1927  * guaranteed and timely delivery of certain high priority frames to/from
1928  * a vnet or vsw within ldoms, should configure this property by providing
1929  * the ether type(s) for which the priority facility is needed.
1930  * Normal data frames are delivered over a ldc channel using the descriptor
1931  * ring mechanism which is constrained by factors such as descriptor ring size,
1932  * the rate at which the ring is processed at the peer ldc end point, etc.
1933  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1934  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1935  * descriptor ring path and enables a more reliable and timely delivery of
1936  * frames to the peer.
1937  */
1938 static void
1939 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1940 {
1941 	int		rv;
1942 	uint16_t	*types;
1943 	uint64_t	*data;
1944 	int		size;
1945 	int		i;
1946 	size_t		mblk_sz;
1947 
1948 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1949 	    (uint8_t **)&data, &size);
1950 	if (rv != 0) {
1951 		/*
1952 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1953 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1954 		 */
1955 		if (vgen_pri_eth_type != 0) {
1956 			size = sizeof (vgen_pri_eth_type);
1957 			data = &vgen_pri_eth_type;
1958 		} else {
1959 			DBG2(vgenp, NULL,
1960 			    "prop(%s) not found", pri_types_propname);
1961 			size = 0;
1962 		}
1963 	}
1964 
1965 	if (size == 0) {
1966 		vgenp->pri_num_types = 0;
1967 		return;
1968 	}
1969 
1970 	/*
1971 	 * we have some priority-ether-types defined;
1972 	 * allocate a table of these types and also
1973 	 * allocate a pool of mblks to transmit these
1974 	 * priority packets.
1975 	 */
1976 	size /= sizeof (uint64_t);
1977 	vgenp->pri_num_types = size;
1978 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1979 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1980 		types[i] = data[i] & 0xFFFF;
1981 	}
1982 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1983 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1984 	    &vgenp->pri_tx_vmp);
1985 }
1986 
1987 static void
1988 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1989 {
1990 	int		rv;
1991 	uint64_t	val;
1992 	char		*mtu_propname;
1993 
1994 	mtu_propname = vgen_mtu_propname;
1995 
1996 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1997 	if (rv != 0) {
1998 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1999 		*mtu = vnet_ethermtu;
2000 	} else {
2001 
2002 		*mtu = val & 0xFFFF;
2003 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
2004 		    vgenp->instance, *mtu);
2005 	}
2006 }
2007 
2008 static void
2009 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
2010 	boolean_t *pls)
2011 {
2012 	int		rv;
2013 	uint64_t	val;
2014 	char		*linkpropname;
2015 
2016 	linkpropname = vgen_linkprop_propname;
2017 
2018 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
2019 	if (rv != 0) {
2020 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
2021 		*pls = B_FALSE;
2022 	} else {
2023 
2024 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
2025 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
2026 		    vgenp->instance, *pls);
2027 	}
2028 }
2029 
2030 /* register with MD event generator */
2031 static int
2032 vgen_mdeg_reg(vgen_t *vgenp)
2033 {
2034 	mdeg_prop_spec_t	*pspecp;
2035 	mdeg_node_spec_t	*parentp;
2036 	uint_t			templatesz;
2037 	int			rv;
2038 	mdeg_handle_t		dev_hdl = NULL;
2039 	mdeg_handle_t		port_hdl = NULL;
2040 
2041 	templatesz = sizeof (vgen_prop_template);
2042 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
2043 	if (pspecp == NULL) {
2044 		return (DDI_FAILURE);
2045 	}
2046 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
2047 	if (parentp == NULL) {
2048 		kmem_free(pspecp, templatesz);
2049 		return (DDI_FAILURE);
2050 	}
2051 
2052 	bcopy(vgen_prop_template, pspecp, templatesz);
2053 
2054 	/*
2055 	 * NOTE: The instance here refers to the value of "reg" property and
2056 	 * not the dev_info instance (ddi_get_instance()) of vnet.
2057 	 */
2058 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
2059 
2060 	parentp->namep = "virtual-device";
2061 	parentp->specp = pspecp;
2062 
2063 	/* save parentp in vgen_t */
2064 	vgenp->mdeg_parentp = parentp;
2065 
2066 	/*
2067 	 * Register an interest in 'virtual-device' nodes with a
2068 	 * 'name' property of 'network'
2069 	 */
2070 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
2071 	if (rv != MDEG_SUCCESS) {
2072 		DERR(vgenp, NULL, "mdeg_register failed\n");
2073 		goto mdeg_reg_fail;
2074 	}
2075 
2076 	/* Register an interest in 'port' nodes */
2077 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
2078 	    &port_hdl);
2079 	if (rv != MDEG_SUCCESS) {
2080 		DERR(vgenp, NULL, "mdeg_register failed\n");
2081 		goto mdeg_reg_fail;
2082 	}
2083 
2084 	/* save mdeg handle in vgen_t */
2085 	vgenp->mdeg_dev_hdl = dev_hdl;
2086 	vgenp->mdeg_port_hdl = port_hdl;
2087 
2088 	return (DDI_SUCCESS);
2089 
2090 mdeg_reg_fail:
2091 	if (dev_hdl != NULL) {
2092 		(void) mdeg_unregister(dev_hdl);
2093 	}
2094 	KMEM_FREE(parentp);
2095 	kmem_free(pspecp, templatesz);
2096 	vgenp->mdeg_parentp = NULL;
2097 	return (DDI_FAILURE);
2098 }
2099 
2100 /* unregister with MD event generator */
2101 static void
2102 vgen_mdeg_unreg(vgen_t *vgenp)
2103 {
2104 	if (vgenp->mdeg_dev_hdl != NULL) {
2105 		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
2106 		vgenp->mdeg_dev_hdl = NULL;
2107 	}
2108 	if (vgenp->mdeg_port_hdl != NULL) {
2109 		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
2110 		vgenp->mdeg_port_hdl = NULL;
2111 	}
2112 
2113 	if (vgenp->mdeg_parentp != NULL) {
2114 		kmem_free(vgenp->mdeg_parentp->specp,
2115 		    sizeof (vgen_prop_template));
2116 		KMEM_FREE(vgenp->mdeg_parentp);
2117 		vgenp->mdeg_parentp = NULL;
2118 	}
2119 }
2120 
2121 /* mdeg callback function for the port node */
2122 static int
2123 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
2124 {
2125 	int idx;
2126 	int vsw_idx = -1;
2127 	uint64_t val;
2128 	vgen_t *vgenp;
2129 
2130 	if ((resp == NULL) || (cb_argp == NULL)) {
2131 		return (MDEG_FAILURE);
2132 	}
2133 
2134 	vgenp = (vgen_t *)cb_argp;
2135 	DBG1(vgenp, NULL, "enter\n");
2136 
2137 	mutex_enter(&vgenp->lock);
2138 
2139 	DBG1(vgenp, NULL, "ports: removed(%x), "
2140 	"added(%x), updated(%x)\n", resp->removed.nelem,
2141 	    resp->added.nelem, resp->match_curr.nelem);
2142 
2143 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2144 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2145 		    resp->removed.mdep[idx]);
2146 	}
2147 
2148 	if (vgenp->vsw_portp == NULL) {
2149 		/*
2150 		 * find vsw_port and add it first, because other ports need
2151 		 * this when adding fdb entry (see vgen_port_init()).
2152 		 */
2153 		for (idx = 0; idx < resp->added.nelem; idx++) {
2154 			if (!(md_get_prop_val(resp->added.mdp,
2155 			    resp->added.mdep[idx], swport_propname, &val))) {
2156 				if (val == 0) {
2157 					/*
2158 					 * This port is connected to the
2159 					 * vsw on service domain.
2160 					 */
2161 					vsw_idx = idx;
2162 					if (vgen_add_port(vgenp,
2163 					    resp->added.mdp,
2164 					    resp->added.mdep[idx]) !=
2165 					    DDI_SUCCESS) {
2166 						cmn_err(CE_NOTE, "vnet%d Could "
2167 						    "not initialize virtual "
2168 						    "switch port.",
2169 						    vgenp->instance);
2170 						mutex_exit(&vgenp->lock);
2171 						return (MDEG_FAILURE);
2172 					}
2173 					break;
2174 				}
2175 			}
2176 		}
2177 		if (vsw_idx == -1) {
2178 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2179 			mutex_exit(&vgenp->lock);
2180 			return (MDEG_FAILURE);
2181 		}
2182 	}
2183 
2184 	for (idx = 0; idx < resp->added.nelem; idx++) {
2185 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2186 			continue;
2187 
2188 		/* If this port can't be added just skip it. */
2189 		(void) vgen_add_port(vgenp, resp->added.mdp,
2190 		    resp->added.mdep[idx]);
2191 	}
2192 
2193 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2194 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2195 		    resp->match_curr.mdep[idx],
2196 		    resp->match_prev.mdp,
2197 		    resp->match_prev.mdep[idx]);
2198 	}
2199 
2200 	mutex_exit(&vgenp->lock);
2201 	DBG1(vgenp, NULL, "exit\n");
2202 	return (MDEG_SUCCESS);
2203 }
2204 
2205 /* mdeg callback function for the vnet node */
2206 static int
2207 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2208 {
2209 	vgen_t		*vgenp;
2210 	vnet_t		*vnetp;
2211 	md_t		*mdp;
2212 	mde_cookie_t	node;
2213 	uint64_t	inst;
2214 	char		*node_name = NULL;
2215 
2216 	if ((resp == NULL) || (cb_argp == NULL)) {
2217 		return (MDEG_FAILURE);
2218 	}
2219 
2220 	vgenp = (vgen_t *)cb_argp;
2221 	vnetp = vgenp->vnetp;
2222 
2223 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2224 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2225 	    resp->match_curr.nelem, resp->match_prev.nelem);
2226 
2227 	mutex_enter(&vgenp->lock);
2228 
2229 	/*
2230 	 * We get an initial callback for this node as 'added' after
2231 	 * registering with mdeg. Note that we would have already gathered
2232 	 * information about this vnet node by walking MD earlier during attach
2233 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2234 	 * of this node might have changed when we get this initial 'added'
2235 	 * callback. We handle this as if an update occured and invoke the same
2236 	 * function which handles updates to the properties of this vnet-node
2237 	 * if any. A non-zero 'match' value indicates that the MD has been
2238 	 * updated and that a 'network' node is present which may or may not
2239 	 * have been updated. It is up to the clients to examine their own
2240 	 * nodes and determine if they have changed.
2241 	 */
2242 	if (resp->added.nelem != 0) {
2243 
2244 		if (resp->added.nelem != 1) {
2245 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2246 			    "invalid: %d\n", vnetp->instance,
2247 			    resp->added.nelem);
2248 			goto vgen_mdeg_cb_err;
2249 		}
2250 
2251 		mdp = resp->added.mdp;
2252 		node = resp->added.mdep[0];
2253 
2254 	} else if (resp->match_curr.nelem != 0) {
2255 
2256 		if (resp->match_curr.nelem != 1) {
2257 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2258 			    "invalid: %d\n", vnetp->instance,
2259 			    resp->match_curr.nelem);
2260 			goto vgen_mdeg_cb_err;
2261 		}
2262 
2263 		mdp = resp->match_curr.mdp;
2264 		node = resp->match_curr.mdep[0];
2265 
2266 	} else {
2267 		goto vgen_mdeg_cb_err;
2268 	}
2269 
2270 	/* Validate name and instance */
2271 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2272 		DERR(vgenp, NULL, "unable to get node name\n");
2273 		goto vgen_mdeg_cb_err;
2274 	}
2275 
2276 	/* is this a virtual-network device? */
2277 	if (strcmp(node_name, vnet_propname) != 0) {
2278 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2279 		goto vgen_mdeg_cb_err;
2280 	}
2281 
2282 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2283 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2284 		goto vgen_mdeg_cb_err;
2285 	}
2286 
2287 	/* is this the right instance of vnet? */
2288 	if (inst != vgenp->regprop) {
2289 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2290 		goto vgen_mdeg_cb_err;
2291 	}
2292 
2293 	vgen_update_md_prop(vgenp, mdp, node);
2294 
2295 	mutex_exit(&vgenp->lock);
2296 	return (MDEG_SUCCESS);
2297 
2298 vgen_mdeg_cb_err:
2299 	mutex_exit(&vgenp->lock);
2300 	return (MDEG_FAILURE);
2301 }
2302 
2303 /*
2304  * Check to see if the relevant properties in the specified node have
2305  * changed, and if so take the appropriate action.
2306  */
2307 static void
2308 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2309 {
2310 	uint16_t	pvid;
2311 	uint16_t	*vids;
2312 	uint16_t	nvids;
2313 	vnet_t		*vnetp = vgenp->vnetp;
2314 	uint32_t	mtu;
2315 	boolean_t	pls_update;
2316 	enum		{ MD_init = 0x1,
2317 			    MD_vlans = 0x2,
2318 			    MD_mtu = 0x4,
2319 			    MD_pls = 0x8 } updated;
2320 	int		rv;
2321 
2322 	updated = MD_init;
2323 
2324 	/* Read the vlan ids */
2325 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2326 	    &nvids, NULL);
2327 
2328 	/* Determine if there are any vlan id updates */
2329 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2330 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2331 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2332 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2333 		updated |= MD_vlans;
2334 	}
2335 
2336 	/* Read mtu */
2337 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2338 	if (mtu != vnetp->mtu) {
2339 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2340 			updated |= MD_mtu;
2341 		} else {
2342 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2343 			    " as the specified value:%d is invalid\n",
2344 			    vnetp->instance, mtu);
2345 		}
2346 	}
2347 
2348 	/*
2349 	 * Read the 'linkprop' property.
2350 	 */
2351 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2352 	if (pls_update != vnetp->pls_update) {
2353 		updated |= MD_pls;
2354 	}
2355 
2356 	/* Now process the updated props */
2357 
2358 	if (updated & MD_vlans) {
2359 
2360 		/* save the new vlan ids */
2361 		vnetp->pvid = pvid;
2362 		if (vnetp->nvids != 0) {
2363 			kmem_free(vnetp->vids,
2364 			    sizeof (uint16_t) * vnetp->nvids);
2365 			vnetp->nvids = 0;
2366 		}
2367 		if (nvids != 0) {
2368 			vnetp->nvids = nvids;
2369 			vnetp->vids = vids;
2370 		}
2371 
2372 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2373 		vgen_reset_vlan_unaware_ports(vgenp);
2374 
2375 	} else {
2376 
2377 		if (nvids != 0) {
2378 			kmem_free(vids, sizeof (uint16_t) * nvids);
2379 		}
2380 	}
2381 
2382 	if (updated & MD_mtu) {
2383 
2384 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2385 		    vnetp->mtu, mtu);
2386 
2387 		rv = vnet_mtu_update(vnetp, mtu);
2388 		if (rv == 0) {
2389 			vgenp->max_frame_size = mtu +
2390 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2391 		}
2392 	}
2393 
2394 	if (updated & MD_pls) {
2395 		/* enable/disable physical link state updates */
2396 		vnetp->pls_update = pls_update;
2397 		mutex_exit(&vgenp->lock);
2398 
2399 		/* reset vsw-port to re-negotiate with the updated prop. */
2400 		vgen_reset_vsw_port(vgenp);
2401 
2402 		mutex_enter(&vgenp->lock);
2403 	}
2404 }
2405 
2406 /* add a new port to the device */
2407 static int
2408 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2409 {
2410 	vgen_port_t	*portp;
2411 	int		rv;
2412 
2413 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2414 
2415 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2416 	if (rv != DDI_SUCCESS) {
2417 		KMEM_FREE(portp);
2418 		return (DDI_FAILURE);
2419 	}
2420 
2421 	rv = vgen_port_attach(portp);
2422 	if (rv != DDI_SUCCESS) {
2423 		return (DDI_FAILURE);
2424 	}
2425 
2426 	return (DDI_SUCCESS);
2427 }
2428 
2429 /* read properties of the port from its md node */
2430 static int
2431 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2432 	mde_cookie_t mdex)
2433 {
2434 	uint64_t		port_num;
2435 	uint64_t		*ldc_ids;
2436 	uint64_t		macaddr;
2437 	uint64_t		val;
2438 	int			num_ldcs;
2439 	int			i;
2440 	int			addrsz;
2441 	int			num_nodes = 0;
2442 	int			listsz = 0;
2443 	mde_cookie_t		*listp = NULL;
2444 	uint8_t			*addrp;
2445 	struct ether_addr	ea;
2446 
2447 	/* read "id" property to get the port number */
2448 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2449 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2450 		return (DDI_FAILURE);
2451 	}
2452 
2453 	/*
2454 	 * Find the channel endpoint node(s) under this port node.
2455 	 */
2456 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2457 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2458 		    num_nodes);
2459 		return (DDI_FAILURE);
2460 	}
2461 
2462 	/* allocate space for node list */
2463 	listsz = num_nodes * sizeof (mde_cookie_t);
2464 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2465 	if (listp == NULL)
2466 		return (DDI_FAILURE);
2467 
2468 	num_ldcs = md_scan_dag(mdp, mdex,
2469 	    md_find_name(mdp, channel_propname),
2470 	    md_find_name(mdp, "fwd"), listp);
2471 
2472 	if (num_ldcs <= 0) {
2473 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2474 		kmem_free(listp, listsz);
2475 		return (DDI_FAILURE);
2476 	}
2477 
2478 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2479 
2480 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2481 	if (ldc_ids == NULL) {
2482 		kmem_free(listp, listsz);
2483 		return (DDI_FAILURE);
2484 	}
2485 
2486 	for (i = 0; i < num_ldcs; i++) {
2487 		/* read channel ids */
2488 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2489 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2490 			    id_propname);
2491 			kmem_free(listp, listsz);
2492 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2493 			return (DDI_FAILURE);
2494 		}
2495 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2496 	}
2497 
2498 	kmem_free(listp, listsz);
2499 
2500 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2501 	    &addrsz)) {
2502 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2503 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2504 		return (DDI_FAILURE);
2505 	}
2506 
2507 	if (addrsz < ETHERADDRL) {
2508 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2509 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2510 		return (DDI_FAILURE);
2511 	}
2512 
2513 	macaddr = *((uint64_t *)addrp);
2514 
2515 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2516 
2517 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2518 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2519 		macaddr >>= 8;
2520 	}
2521 
2522 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2523 		if (val == 0) {
2524 			/* This port is connected to the vswitch */
2525 			portp->is_vsw_port = B_TRUE;
2526 		} else {
2527 			portp->is_vsw_port = B_FALSE;
2528 		}
2529 	}
2530 
2531 	/* now update all properties into the port */
2532 	portp->vgenp = vgenp;
2533 	portp->port_num = port_num;
2534 	ether_copy(&ea, &portp->macaddr);
2535 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2536 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2537 	portp->num_ldcs = num_ldcs;
2538 
2539 	/* read vlan id properties of this port node */
2540 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2541 	    &portp->vids, &portp->nvids, NULL);
2542 
2543 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2544 
2545 	return (DDI_SUCCESS);
2546 }
2547 
2548 /* remove a port from the device */
2549 static int
2550 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2551 {
2552 	uint64_t	port_num;
2553 	vgen_port_t	*portp;
2554 	vgen_portlist_t	*plistp;
2555 
2556 	/* read "id" property to get the port number */
2557 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2558 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2559 		return (DDI_FAILURE);
2560 	}
2561 
2562 	plistp = &(vgenp->vgenports);
2563 
2564 	WRITE_ENTER(&plistp->rwlock);
2565 	portp = vgen_port_lookup(plistp, (int)port_num);
2566 	if (portp == NULL) {
2567 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2568 		RW_EXIT(&plistp->rwlock);
2569 		return (DDI_FAILURE);
2570 	}
2571 
2572 	vgen_port_detach_mdeg(portp);
2573 	RW_EXIT(&plistp->rwlock);
2574 
2575 	return (DDI_SUCCESS);
2576 }
2577 
2578 /* attach a port to the device based on mdeg data */
2579 static int
2580 vgen_port_attach(vgen_port_t *portp)
2581 {
2582 	int			i;
2583 	vgen_portlist_t		*plistp;
2584 	vgen_t			*vgenp;
2585 	uint64_t		*ldcids;
2586 	uint32_t		num_ldcs;
2587 	mac_register_t		*macp;
2588 	vio_net_res_type_t	type;
2589 	int			rv;
2590 
2591 	ASSERT(portp != NULL);
2592 
2593 	vgenp = portp->vgenp;
2594 	ldcids = portp->ldc_ids;
2595 	num_ldcs = portp->num_ldcs;
2596 
2597 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2598 
2599 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2600 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2601 	portp->ldclist.headp = NULL;
2602 
2603 	for (i = 0; i < num_ldcs; i++) {
2604 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2605 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2606 			vgen_port_detach(portp);
2607 			return (DDI_FAILURE);
2608 		}
2609 	}
2610 
2611 	/* create vlan id hash table */
2612 	vgen_vlan_create_hash(portp);
2613 
2614 	if (portp->is_vsw_port == B_TRUE) {
2615 		/* This port is connected to the switch port */
2616 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2617 		type = VIO_NET_RES_LDC_SERVICE;
2618 	} else {
2619 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2620 		type = VIO_NET_RES_LDC_GUEST;
2621 	}
2622 
2623 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2624 		vgen_port_detach(portp);
2625 		return (DDI_FAILURE);
2626 	}
2627 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2628 	macp->m_driver = portp;
2629 	macp->m_dip = vgenp->vnetdip;
2630 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2631 	macp->m_callbacks = &vgen_m_callbacks;
2632 	macp->m_min_sdu = 0;
2633 	macp->m_max_sdu = ETHERMTU;
2634 
2635 	mutex_enter(&portp->lock);
2636 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2637 	    portp->macaddr, &portp->vhp, &portp->vcb);
2638 	mutex_exit(&portp->lock);
2639 	mac_free(macp);
2640 
2641 	if (rv == 0) {
2642 		/* link it into the list of ports */
2643 		plistp = &(vgenp->vgenports);
2644 		WRITE_ENTER(&plistp->rwlock);
2645 		vgen_port_list_insert(portp);
2646 		RW_EXIT(&plistp->rwlock);
2647 
2648 		if (portp->is_vsw_port == B_TRUE) {
2649 			/* We now have the vswitch port attached */
2650 			vgenp->vsw_portp = portp;
2651 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2652 		}
2653 	} else {
2654 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2655 		    portp);
2656 		vgen_port_detach(portp);
2657 	}
2658 
2659 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2660 	return (DDI_SUCCESS);
2661 }
2662 
2663 /* detach a port from the device based on mdeg data */
2664 static void
2665 vgen_port_detach_mdeg(vgen_port_t *portp)
2666 {
2667 	vgen_t *vgenp = portp->vgenp;
2668 
2669 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2670 
2671 	mutex_enter(&portp->lock);
2672 
2673 	/* stop the port if needed */
2674 	if (portp->flags & VGEN_STARTED) {
2675 		vgen_port_uninit(portp);
2676 	}
2677 
2678 	mutex_exit(&portp->lock);
2679 	vgen_port_detach(portp);
2680 
2681 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2682 }
2683 
2684 static int
2685 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2686 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2687 {
2688 	uint64_t	cport_num;
2689 	uint64_t	pport_num;
2690 	vgen_portlist_t	*plistp;
2691 	vgen_port_t	*portp;
2692 	boolean_t	updated_vlans = B_FALSE;
2693 	uint16_t	pvid;
2694 	uint16_t	*vids;
2695 	uint16_t	nvids;
2696 
2697 	/*
2698 	 * For now, we get port updates only if vlan ids changed.
2699 	 * We read the port num and do some sanity check.
2700 	 */
2701 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2702 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2703 		return (DDI_FAILURE);
2704 	}
2705 
2706 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2707 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2708 		return (DDI_FAILURE);
2709 	}
2710 	if (cport_num != pport_num)
2711 		return (DDI_FAILURE);
2712 
2713 	plistp = &(vgenp->vgenports);
2714 
2715 	READ_ENTER(&plistp->rwlock);
2716 
2717 	portp = vgen_port_lookup(plistp, (int)cport_num);
2718 	if (portp == NULL) {
2719 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2720 		RW_EXIT(&plistp->rwlock);
2721 		return (DDI_FAILURE);
2722 	}
2723 
2724 	/* Read the vlan ids */
2725 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2726 	    &nvids, NULL);
2727 
2728 	/* Determine if there are any vlan id updates */
2729 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2730 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2731 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2732 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2733 		updated_vlans = B_TRUE;
2734 	}
2735 
2736 	if (updated_vlans == B_FALSE) {
2737 		RW_EXIT(&plistp->rwlock);
2738 		return (DDI_FAILURE);
2739 	}
2740 
2741 	/* remove the port from vlans it has been assigned to */
2742 	vgen_vlan_remove_ids(portp);
2743 
2744 	/* save the new vlan ids */
2745 	portp->pvid = pvid;
2746 	if (portp->nvids != 0) {
2747 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2748 		portp->nvids = 0;
2749 	}
2750 	if (nvids != 0) {
2751 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2752 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2753 		portp->nvids = nvids;
2754 		kmem_free(vids, sizeof (uint16_t) * nvids);
2755 	}
2756 
2757 	/* add port to the new vlans */
2758 	vgen_vlan_add_ids(portp);
2759 
2760 	/* reset the port if it is vlan unaware (ver < 1.3) */
2761 	vgen_vlan_unaware_port_reset(portp);
2762 
2763 	RW_EXIT(&plistp->rwlock);
2764 
2765 	return (DDI_SUCCESS);
2766 }
2767 
2768 static uint64_t
2769 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2770 {
2771 	vgen_ldclist_t	*ldclp;
2772 	vgen_ldc_t *ldcp;
2773 	uint64_t	val;
2774 
2775 	val = 0;
2776 	ldclp = &portp->ldclist;
2777 
2778 	READ_ENTER(&ldclp->rwlock);
2779 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2780 		val += vgen_ldc_stat(ldcp, stat);
2781 	}
2782 	RW_EXIT(&ldclp->rwlock);
2783 
2784 	return (val);
2785 }
2786 
2787 /* allocate receive resources */
2788 static int
2789 vgen_init_multipools(vgen_ldc_t *ldcp)
2790 {
2791 	size_t		data_sz;
2792 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2793 	int		status;
2794 	uint32_t	sz1 = 0;
2795 	uint32_t	sz2 = 0;
2796 	uint32_t	sz3 = 0;
2797 	uint32_t	sz4 = 0;
2798 
2799 	/*
2800 	 * We round up the mtu specified to be a multiple of 2K.
2801 	 * We then create rx pools based on the rounded up size.
2802 	 */
2803 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2804 	data_sz = VNET_ROUNDUP_2K(data_sz);
2805 
2806 	/*
2807 	 * If pool sizes are specified, use them. Note that the presence of
2808 	 * the first tunable will be used as a hint.
2809 	 */
2810 	if (vgen_rbufsz1 != 0) {
2811 
2812 		sz1 = vgen_rbufsz1;
2813 		sz2 = vgen_rbufsz2;
2814 		sz3 = vgen_rbufsz3;
2815 		sz4 = vgen_rbufsz4;
2816 
2817 		if (sz4 == 0) { /* need 3 pools */
2818 
2819 			ldcp->max_rxpool_size = sz3;
2820 			status = vio_init_multipools(&ldcp->vmp,
2821 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2822 			    vgen_nrbufs2, vgen_nrbufs3);
2823 
2824 		} else {
2825 
2826 			ldcp->max_rxpool_size = sz4;
2827 			status = vio_init_multipools(&ldcp->vmp,
2828 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2829 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2830 			    vgen_nrbufs4);
2831 		}
2832 		return (status);
2833 	}
2834 
2835 	/*
2836 	 * Pool sizes are not specified. We select the pool sizes based on the
2837 	 * mtu if vnet_jumbo_rxpools is enabled.
2838 	 */
2839 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2840 		/*
2841 		 * Receive buffer pool allocation based on mtu is disabled.
2842 		 * Use the default mechanism of standard size pool allocation.
2843 		 */
2844 		sz1 = VGEN_DBLK_SZ_128;
2845 		sz2 = VGEN_DBLK_SZ_256;
2846 		sz3 = VGEN_DBLK_SZ_2048;
2847 		ldcp->max_rxpool_size = sz3;
2848 
2849 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2850 		    sz1, sz2, sz3,
2851 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2852 
2853 		return (status);
2854 	}
2855 
2856 	switch (data_sz) {
2857 
2858 	case VNET_4K:
2859 
2860 		sz1 = VGEN_DBLK_SZ_128;
2861 		sz2 = VGEN_DBLK_SZ_256;
2862 		sz3 = VGEN_DBLK_SZ_2048;
2863 		sz4 = sz3 << 1;			/* 4K */
2864 		ldcp->max_rxpool_size = sz4;
2865 
2866 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2867 		    sz1, sz2, sz3, sz4,
2868 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2869 		break;
2870 
2871 	default:	/* data_sz:  4K+ to 16K */
2872 
2873 		sz1 = VGEN_DBLK_SZ_256;
2874 		sz2 = VGEN_DBLK_SZ_2048;
2875 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2876 		sz4 = data_sz;		/* Jumbo-size  */
2877 		ldcp->max_rxpool_size = sz4;
2878 
2879 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2880 		    sz1, sz2, sz3, sz4,
2881 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2882 		break;
2883 
2884 	}
2885 
2886 	return (status);
2887 }
2888 
2889 /* attach the channel corresponding to the given ldc_id to the port */
2890 static int
2891 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2892 {
2893 	vgen_t 		*vgenp;
2894 	vgen_ldclist_t	*ldclp;
2895 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2896 	ldc_attr_t 	attr;
2897 	int 		status;
2898 	ldc_status_t	istatus;
2899 	char		kname[MAXNAMELEN];
2900 	int		instance;
2901 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2902 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2903 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2904 		AST_create_rxmblks = 0x20,
2905 		AST_create_rcv_thread = 0x40} attach_state;
2906 
2907 	attach_state = AST_init;
2908 	vgenp = portp->vgenp;
2909 	ldclp = &portp->ldclist;
2910 
2911 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2912 	if (ldcp == NULL) {
2913 		goto ldc_attach_failed;
2914 	}
2915 	ldcp->ldc_id = ldc_id;
2916 	ldcp->portp = portp;
2917 
2918 	attach_state |= AST_ldc_alloc;
2919 
2920 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2921 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2922 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2923 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2924 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2925 	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2926 
2927 	attach_state |= AST_mutex_init;
2928 
2929 	attr.devclass = LDC_DEV_NT;
2930 	attr.instance = vgenp->instance;
2931 	attr.mode = LDC_MODE_UNRELIABLE;
2932 	attr.mtu = vnet_ldc_mtu;
2933 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2934 	if (status != 0) {
2935 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2936 		goto ldc_attach_failed;
2937 	}
2938 	attach_state |= AST_ldc_init;
2939 
2940 	if (vgen_rcv_thread_enabled) {
2941 		ldcp->rcv_thr_flags = 0;
2942 
2943 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2944 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2945 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2946 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2947 
2948 		attach_state |= AST_create_rcv_thread;
2949 		if (ldcp->rcv_thread == NULL) {
2950 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2951 			goto ldc_attach_failed;
2952 		}
2953 	}
2954 
2955 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2956 	if (status != 0) {
2957 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2958 		    status);
2959 		goto ldc_attach_failed;
2960 	}
2961 	/*
2962 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2963 	 * data msgs, including raw data msgs used to recv priority frames.
2964 	 */
2965 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2966 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2967 	attach_state |= AST_ldc_reg_cb;
2968 
2969 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2970 	ASSERT(istatus == LDC_INIT);
2971 	ldcp->ldc_status = istatus;
2972 
2973 	/* allocate transmit resources */
2974 	status = vgen_alloc_tx_ring(ldcp);
2975 	if (status != 0) {
2976 		goto ldc_attach_failed;
2977 	}
2978 	attach_state |= AST_alloc_tx_ring;
2979 
2980 	/* allocate receive resources */
2981 	status = vgen_init_multipools(ldcp);
2982 	if (status != 0) {
2983 		/*
2984 		 * We do not return failure if receive mblk pools can't be
2985 		 * allocated; instead allocb(9F) will be used to dynamically
2986 		 * allocate buffers during receive.
2987 		 */
2988 		DWARN(vgenp, ldcp,
2989 		    "vnet%d: status(%d), failed to allocate rx mblk pools for "
2990 		    "channel(0x%lx)\n",
2991 		    vgenp->instance, status, ldcp->ldc_id);
2992 	} else {
2993 		attach_state |= AST_create_rxmblks;
2994 	}
2995 
2996 	/* Setup kstats for the channel */
2997 	instance = vgenp->instance;
2998 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2999 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
3000 	if (ldcp->ksp == NULL) {
3001 		goto ldc_attach_failed;
3002 	}
3003 
3004 	/* initialize vgen_versions supported */
3005 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
3006 	vgen_reset_vnet_proto_ops(ldcp);
3007 
3008 	/* link it into the list of channels for this port */
3009 	WRITE_ENTER(&ldclp->rwlock);
3010 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
3011 	ldcp->nextp = *prev_ldcp;
3012 	*prev_ldcp = ldcp;
3013 	RW_EXIT(&ldclp->rwlock);
3014 
3015 	ldcp->link_state = LINK_STATE_UNKNOWN;
3016 #ifdef	VNET_IOC_DEBUG
3017 	ldcp->link_down_forced = B_FALSE;
3018 #endif
3019 	ldcp->flags |= CHANNEL_ATTACHED;
3020 	return (DDI_SUCCESS);
3021 
3022 ldc_attach_failed:
3023 	if (attach_state & AST_ldc_reg_cb) {
3024 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3025 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3026 	}
3027 	if (attach_state & AST_create_rcv_thread) {
3028 		if (ldcp->rcv_thread != NULL) {
3029 			vgen_stop_rcv_thread(ldcp);
3030 		}
3031 		mutex_destroy(&ldcp->rcv_thr_lock);
3032 		cv_destroy(&ldcp->rcv_thr_cv);
3033 	}
3034 	if (attach_state & AST_create_rxmblks) {
3035 		vio_mblk_pool_t *fvmp = NULL;
3036 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
3037 		ASSERT(fvmp == NULL);
3038 	}
3039 	if (attach_state & AST_alloc_tx_ring) {
3040 		vgen_free_tx_ring(ldcp);
3041 	}
3042 	if (attach_state & AST_ldc_init) {
3043 		(void) ldc_fini(ldcp->ldc_handle);
3044 	}
3045 	if (attach_state & AST_mutex_init) {
3046 		mutex_destroy(&ldcp->tclock);
3047 		mutex_destroy(&ldcp->txlock);
3048 		mutex_destroy(&ldcp->cblock);
3049 		mutex_destroy(&ldcp->wrlock);
3050 		mutex_destroy(&ldcp->rxlock);
3051 		mutex_destroy(&ldcp->pollq_lock);
3052 	}
3053 	if (attach_state & AST_ldc_alloc) {
3054 		KMEM_FREE(ldcp);
3055 	}
3056 	return (DDI_FAILURE);
3057 }
3058 
3059 /* detach a channel from the port */
3060 static void
3061 vgen_ldc_detach(vgen_ldc_t *ldcp)
3062 {
3063 	vgen_port_t	*portp;
3064 	vgen_t 		*vgenp;
3065 	vgen_ldc_t 	*pldcp;
3066 	vgen_ldc_t	**prev_ldcp;
3067 	vgen_ldclist_t	*ldclp;
3068 
3069 	portp = ldcp->portp;
3070 	vgenp = portp->vgenp;
3071 	ldclp = &portp->ldclist;
3072 
3073 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
3074 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
3075 		if (pldcp == ldcp) {
3076 			break;
3077 		}
3078 	}
3079 
3080 	if (pldcp == NULL) {
3081 		/* invalid ldcp? */
3082 		return;
3083 	}
3084 
3085 	if (ldcp->ldc_status != LDC_INIT) {
3086 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
3087 	}
3088 
3089 	if (ldcp->flags & CHANNEL_ATTACHED) {
3090 		ldcp->flags &= ~(CHANNEL_ATTACHED);
3091 
3092 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3093 		if (ldcp->rcv_thread != NULL) {
3094 			/* First stop the receive thread */
3095 			vgen_stop_rcv_thread(ldcp);
3096 			mutex_destroy(&ldcp->rcv_thr_lock);
3097 			cv_destroy(&ldcp->rcv_thr_cv);
3098 		}
3099 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3100 
3101 		vgen_destroy_kstats(ldcp->ksp);
3102 		ldcp->ksp = NULL;
3103 
3104 		/*
3105 		 * if we cannot reclaim all mblks, put this
3106 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
3107 		 * device gets detached (see vgen_uninit()).
3108 		 */
3109 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
3110 
3111 		/* free transmit resources */
3112 		vgen_free_tx_ring(ldcp);
3113 
3114 		(void) ldc_fini(ldcp->ldc_handle);
3115 		mutex_destroy(&ldcp->tclock);
3116 		mutex_destroy(&ldcp->txlock);
3117 		mutex_destroy(&ldcp->cblock);
3118 		mutex_destroy(&ldcp->wrlock);
3119 		mutex_destroy(&ldcp->rxlock);
3120 		mutex_destroy(&ldcp->pollq_lock);
3121 
3122 		/* unlink it from the list */
3123 		*prev_ldcp = ldcp->nextp;
3124 		KMEM_FREE(ldcp);
3125 	}
3126 }
3127 
3128 /*
3129  * This function allocates transmit resources for the channel.
3130  * The resources consist of a transmit descriptor ring and an associated
3131  * transmit buffer ring.
3132  */
3133 static int
3134 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
3135 {
3136 	void *tbufp;
3137 	ldc_mem_info_t minfo;
3138 	uint32_t txdsize;
3139 	uint32_t tbufsize;
3140 	int status;
3141 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3142 
3143 	ldcp->num_txds = vnet_ntxds;
3144 	txdsize = sizeof (vnet_public_desc_t);
3145 	tbufsize = sizeof (vgen_private_desc_t);
3146 
3147 	/* allocate transmit buffer ring */
3148 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
3149 	if (tbufp == NULL) {
3150 		return (DDI_FAILURE);
3151 	}
3152 
3153 	/* create transmit descriptor ring */
3154 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
3155 	    &ldcp->tx_dhandle);
3156 	if (status) {
3157 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
3158 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3159 		return (DDI_FAILURE);
3160 	}
3161 
3162 	/* get the addr of descripror ring */
3163 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
3164 	if (status) {
3165 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3166 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3167 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3168 		ldcp->tbufp = NULL;
3169 		return (DDI_FAILURE);
3170 	}
3171 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3172 	ldcp->tbufp = tbufp;
3173 
3174 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3175 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3176 
3177 	return (DDI_SUCCESS);
3178 }
3179 
3180 /* Free transmit resources for the channel */
3181 static void
3182 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3183 {
3184 	int tbufsize = sizeof (vgen_private_desc_t);
3185 
3186 	/* free transmit descriptor ring */
3187 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3188 
3189 	/* free transmit buffer ring */
3190 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3191 	ldcp->txdp = ldcp->txdendp = NULL;
3192 	ldcp->tbufp = ldcp->tbufendp = NULL;
3193 }
3194 
3195 /* enable transmit/receive on the channels for the port */
3196 static void
3197 vgen_init_ldcs(vgen_port_t *portp)
3198 {
3199 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3200 	vgen_ldc_t	*ldcp;
3201 
3202 	READ_ENTER(&ldclp->rwlock);
3203 	ldcp =  ldclp->headp;
3204 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3205 		(void) vgen_ldc_init(ldcp);
3206 	}
3207 	RW_EXIT(&ldclp->rwlock);
3208 }
3209 
3210 /* stop transmit/receive on the channels for the port */
3211 static void
3212 vgen_uninit_ldcs(vgen_port_t *portp)
3213 {
3214 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3215 	vgen_ldc_t	*ldcp;
3216 
3217 	READ_ENTER(&ldclp->rwlock);
3218 	ldcp =  ldclp->headp;
3219 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3220 		vgen_ldc_uninit(ldcp);
3221 	}
3222 	RW_EXIT(&ldclp->rwlock);
3223 }
3224 
3225 /* enable transmit/receive on the channel */
3226 static int
3227 vgen_ldc_init(vgen_ldc_t *ldcp)
3228 {
3229 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3230 	ldc_status_t	istatus;
3231 	int		rv;
3232 	uint32_t	retries = 0;
3233 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3234 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3235 	init_state = ST_init;
3236 
3237 	DBG1(vgenp, ldcp, "enter\n");
3238 	LDC_LOCK(ldcp);
3239 
3240 	rv = ldc_open(ldcp->ldc_handle);
3241 	if (rv != 0) {
3242 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3243 		goto ldcinit_failed;
3244 	}
3245 	init_state |= ST_ldc_open;
3246 
3247 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3248 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3249 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3250 		goto ldcinit_failed;
3251 	}
3252 	ldcp->ldc_status = istatus;
3253 
3254 	rv = vgen_init_tbufs(ldcp);
3255 	if (rv != 0) {
3256 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3257 		goto ldcinit_failed;
3258 	}
3259 	init_state |= ST_init_tbufs;
3260 
3261 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3262 	if (rv != 0) {
3263 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3264 		goto ldcinit_failed;
3265 	}
3266 
3267 	init_state |= ST_cb_enable;
3268 
3269 	do {
3270 		rv = ldc_up(ldcp->ldc_handle);
3271 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3272 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3273 			drv_usecwait(VGEN_LDC_UP_DELAY);
3274 		}
3275 		if (retries++ >= vgen_ldcup_retries)
3276 			break;
3277 	} while (rv == EWOULDBLOCK);
3278 
3279 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3280 	if (istatus == LDC_UP) {
3281 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3282 	}
3283 
3284 	ldcp->ldc_status = istatus;
3285 
3286 	/* initialize transmit watchdog timeout */
3287 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3288 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3289 
3290 	ldcp->hphase = -1;
3291 	ldcp->flags |= CHANNEL_STARTED;
3292 
3293 	/* if channel is already UP - start handshake */
3294 	if (istatus == LDC_UP) {
3295 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3296 		if (ldcp->portp != vgenp->vsw_portp) {
3297 			/*
3298 			 * As the channel is up, use this port from now on.
3299 			 */
3300 			(void) atomic_swap_32(
3301 			    &ldcp->portp->use_vsw_port, B_FALSE);
3302 		}
3303 
3304 		/* Initialize local session id */
3305 		ldcp->local_sid = ddi_get_lbolt();
3306 
3307 		/* clear peer session id */
3308 		ldcp->peer_sid = 0;
3309 		ldcp->hretries = 0;
3310 
3311 		/* Initiate Handshake process with peer ldc endpoint */
3312 		vgen_reset_hphase(ldcp);
3313 
3314 		mutex_exit(&ldcp->tclock);
3315 		mutex_exit(&ldcp->txlock);
3316 		mutex_exit(&ldcp->wrlock);
3317 		mutex_exit(&ldcp->rxlock);
3318 		vgen_handshake(vh_nextphase(ldcp));
3319 		mutex_exit(&ldcp->cblock);
3320 	} else {
3321 		LDC_UNLOCK(ldcp);
3322 	}
3323 
3324 	return (DDI_SUCCESS);
3325 
3326 ldcinit_failed:
3327 	if (init_state & ST_cb_enable) {
3328 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3329 	}
3330 	if (init_state & ST_init_tbufs) {
3331 		vgen_uninit_tbufs(ldcp);
3332 	}
3333 	if (init_state & ST_ldc_open) {
3334 		(void) ldc_close(ldcp->ldc_handle);
3335 	}
3336 	LDC_UNLOCK(ldcp);
3337 	DBG1(vgenp, ldcp, "exit\n");
3338 	return (DDI_FAILURE);
3339 }
3340 
3341 /* stop transmit/receive on the channel */
3342 static void
3343 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3344 {
3345 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3346 	int	rv;
3347 	uint_t	retries = 0;
3348 
3349 	DBG1(vgenp, ldcp, "enter\n");
3350 	LDC_LOCK(ldcp);
3351 
3352 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3353 		LDC_UNLOCK(ldcp);
3354 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3355 		return;
3356 	}
3357 
3358 	/* disable further callbacks */
3359 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3360 	if (rv != 0) {
3361 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3362 	}
3363 
3364 	/*
3365 	 * clear handshake done bit and wait for pending tx and cb to finish.
3366 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3367 	 */
3368 	ldcp->hphase &= ~(VH_DONE);
3369 	LDC_UNLOCK(ldcp);
3370 
3371 	if (vgenp->vsw_portp == ldcp->portp) {
3372 		vio_net_report_err_t rep_err =
3373 		    ldcp->portp->vcb.vio_net_report_err;
3374 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3375 	}
3376 
3377 	/* cancel handshake watchdog timeout */
3378 	if (ldcp->htid) {
3379 		(void) untimeout(ldcp->htid);
3380 		ldcp->htid = 0;
3381 	}
3382 
3383 	if (ldcp->cancel_htid) {
3384 		(void) untimeout(ldcp->cancel_htid);
3385 		ldcp->cancel_htid = 0;
3386 	}
3387 
3388 	/* cancel transmit watchdog timeout */
3389 	if (ldcp->wd_tid) {
3390 		(void) untimeout(ldcp->wd_tid);
3391 		ldcp->wd_tid = 0;
3392 	}
3393 
3394 	drv_usecwait(1000);
3395 
3396 	if (ldcp->rcv_thread != NULL) {
3397 		/*
3398 		 * Note that callbacks have been disabled already(above). The
3399 		 * drain function takes care of the condition when an already
3400 		 * executing callback signals the worker to start processing or
3401 		 * the worker has already been signalled and is in the middle of
3402 		 * processing.
3403 		 */
3404 		vgen_drain_rcv_thread(ldcp);
3405 	}
3406 
3407 	/* acquire locks again; any pending transmits and callbacks are done */
3408 	LDC_LOCK(ldcp);
3409 
3410 	vgen_reset_hphase(ldcp);
3411 
3412 	vgen_uninit_tbufs(ldcp);
3413 
3414 	/* close the channel - retry on EAGAIN */
3415 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
3416 		if (++retries > vgen_ldccl_retries) {
3417 			break;
3418 		}
3419 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
3420 	}
3421 	if (rv != 0) {
3422 		cmn_err(CE_NOTE,
3423 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
3424 		    vgenp->instance, rv, ldcp->ldc_id);
3425 	}
3426 
3427 	ldcp->ldc_status = LDC_INIT;
3428 	ldcp->flags &= ~(CHANNEL_STARTED);
3429 
3430 	LDC_UNLOCK(ldcp);
3431 
3432 	DBG1(vgenp, ldcp, "exit\n");
3433 }
3434 
3435 /* Initialize the transmit buffer ring for the channel */
3436 static int
3437 vgen_init_tbufs(vgen_ldc_t *ldcp)
3438 {
3439 	vgen_private_desc_t	*tbufp;
3440 	vnet_public_desc_t	*txdp;
3441 	vio_dring_entry_hdr_t		*hdrp;
3442 	int 			i;
3443 	int 			rv;
3444 	caddr_t			datap = NULL;
3445 	int			ci;
3446 	uint32_t		ncookies;
3447 	size_t			data_sz;
3448 	vgen_t			*vgenp;
3449 
3450 	vgenp = LDC_TO_VGEN(ldcp);
3451 
3452 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3453 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3454 
3455 	/*
3456 	 * In order to ensure that the number of ldc cookies per descriptor is
3457 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3458 	 * outlined below:
3459 	 *
3460 	 * Align the entire data buffer area to 8K and carve out per descriptor
3461 	 * data buffers starting from this 8K aligned base address.
3462 	 *
3463 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3464 	 * For sizes up to 12K we round up the size to the next 2K.
3465 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3466 	 * 14K could end up needing 3 cookies, with the buffer spread across
3467 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3468 	 */
3469 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3470 	if (data_sz <= VNET_12K) {
3471 		data_sz = VNET_ROUNDUP_2K(data_sz);
3472 	} else {
3473 		data_sz = VNET_ROUNDUP_4K(data_sz);
3474 	}
3475 
3476 	/* allocate extra 8K bytes for alignment */
3477 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3478 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3479 	ldcp->tx_datap = datap;
3480 
3481 
3482 	/* align the starting address of the data area to 8K */
3483 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3484 
3485 	/*
3486 	 * for each private descriptor, allocate a ldc mem_handle which is
3487 	 * required to map the data during transmit, set the flags
3488 	 * to free (available for use by transmit routine).
3489 	 */
3490 
3491 	for (i = 0; i < ldcp->num_txds; i++) {
3492 
3493 		tbufp = &(ldcp->tbufp[i]);
3494 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3495 		    &(tbufp->memhandle));
3496 		if (rv) {
3497 			tbufp->memhandle = 0;
3498 			goto init_tbufs_failed;
3499 		}
3500 
3501 		/*
3502 		 * bind ldc memhandle to the corresponding transmit buffer.
3503 		 */
3504 		ci = ncookies = 0;
3505 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3506 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3507 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3508 		if (rv != 0) {
3509 			goto init_tbufs_failed;
3510 		}
3511 
3512 		/*
3513 		 * successful in binding the handle to tx data buffer.
3514 		 * set datap in the private descr to this buffer.
3515 		 */
3516 		tbufp->datap = datap;
3517 
3518 		if ((ncookies == 0) ||
3519 		    (ncookies > MAX_COOKIES)) {
3520 			goto init_tbufs_failed;
3521 		}
3522 
3523 		for (ci = 1; ci < ncookies; ci++) {
3524 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3525 			    &(tbufp->memcookie[ci]));
3526 			if (rv != 0) {
3527 				goto init_tbufs_failed;
3528 			}
3529 		}
3530 
3531 		tbufp->ncookies = ncookies;
3532 		datap += data_sz;
3533 
3534 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3535 		txdp = &(ldcp->txdp[i]);
3536 		hdrp = &txdp->hdr;
3537 		hdrp->dstate = VIO_DESC_FREE;
3538 		hdrp->ack = B_FALSE;
3539 		tbufp->descp = txdp;
3540 
3541 	}
3542 
3543 	/* reset tbuf walking pointers */
3544 	ldcp->next_tbufp = ldcp->tbufp;
3545 	ldcp->cur_tbufp = ldcp->tbufp;
3546 
3547 	/* initialize tx seqnum and index */
3548 	ldcp->next_txseq = VNET_ISS;
3549 	ldcp->next_txi = 0;
3550 
3551 	ldcp->resched_peer = B_TRUE;
3552 	ldcp->resched_peer_txi = 0;
3553 
3554 	return (DDI_SUCCESS);
3555 
3556 init_tbufs_failed:;
3557 	vgen_uninit_tbufs(ldcp);
3558 	return (DDI_FAILURE);
3559 }
3560 
3561 /* Uninitialize transmit buffer ring for the channel */
3562 static void
3563 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3564 {
3565 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3566 	int 			i;
3567 
3568 	/* for each tbuf (priv_desc), free ldc mem_handle */
3569 	for (i = 0; i < ldcp->num_txds; i++) {
3570 
3571 		tbufp = &(ldcp->tbufp[i]);
3572 
3573 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3574 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3575 			tbufp->datap = NULL;
3576 		}
3577 		if (tbufp->memhandle) {
3578 			(void) ldc_mem_free_handle(tbufp->memhandle);
3579 			tbufp->memhandle = 0;
3580 		}
3581 	}
3582 
3583 	if (ldcp->tx_datap) {
3584 		/* prealloc'd tx data buffer */
3585 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3586 		ldcp->tx_datap = NULL;
3587 		ldcp->tx_data_sz = 0;
3588 	}
3589 
3590 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3591 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3592 }
3593 
3594 /* clobber tx descriptor ring */
3595 static void
3596 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3597 {
3598 	vnet_public_desc_t	*txdp;
3599 	vgen_private_desc_t	*tbufp;
3600 	vio_dring_entry_hdr_t	*hdrp;
3601 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3602 	int i;
3603 #ifdef DEBUG
3604 	int ndone = 0;
3605 #endif
3606 
3607 	for (i = 0; i < ldcp->num_txds; i++) {
3608 
3609 		tbufp = &(ldcp->tbufp[i]);
3610 		txdp = tbufp->descp;
3611 		hdrp = &txdp->hdr;
3612 
3613 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3614 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3615 #ifdef DEBUG
3616 			if (hdrp->dstate == VIO_DESC_DONE)
3617 				ndone++;
3618 #endif
3619 			hdrp->dstate = VIO_DESC_FREE;
3620 			hdrp->ack = B_FALSE;
3621 		}
3622 	}
3623 	/* reset tbuf walking pointers */
3624 	ldcp->next_tbufp = ldcp->tbufp;
3625 	ldcp->cur_tbufp = ldcp->tbufp;
3626 
3627 	/* reset tx seqnum and index */
3628 	ldcp->next_txseq = VNET_ISS;
3629 	ldcp->next_txi = 0;
3630 
3631 	ldcp->resched_peer = B_TRUE;
3632 	ldcp->resched_peer_txi = 0;
3633 
3634 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3635 }
3636 
3637 /* clobber receive descriptor ring */
3638 static void
3639 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3640 {
3641 	ldcp->rx_dhandle = 0;
3642 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3643 	ldcp->rxdp = NULL;
3644 	ldcp->next_rxi = 0;
3645 	ldcp->num_rxds = 0;
3646 	ldcp->next_rxseq = VNET_ISS;
3647 }
3648 
3649 /* initialize receive descriptor ring */
3650 static int
3651 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3652 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3653 {
3654 	int rv;
3655 	ldc_mem_info_t minfo;
3656 
3657 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3658 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3659 	if (rv != 0) {
3660 		return (DDI_FAILURE);
3661 	}
3662 
3663 	/*
3664 	 * sucessfully mapped, now try to
3665 	 * get info about the mapped dring
3666 	 */
3667 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3668 	if (rv != 0) {
3669 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3670 		return (DDI_FAILURE);
3671 	}
3672 
3673 	/*
3674 	 * save ring address, number of descriptors.
3675 	 */
3676 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3677 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3678 	ldcp->num_rxdcookies = ncookies;
3679 	ldcp->num_rxds = num_desc;
3680 	ldcp->next_rxi = 0;
3681 	ldcp->next_rxseq = VNET_ISS;
3682 	ldcp->dring_mtype = minfo.mtype;
3683 
3684 	return (DDI_SUCCESS);
3685 }
3686 
3687 /* get channel statistics */
3688 static uint64_t
3689 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3690 {
3691 	vgen_stats_t *statsp;
3692 	uint64_t val;
3693 
3694 	val = 0;
3695 	statsp = &ldcp->stats;
3696 	switch (stat) {
3697 
3698 	case MAC_STAT_MULTIRCV:
3699 		val = statsp->multircv;
3700 		break;
3701 
3702 	case MAC_STAT_BRDCSTRCV:
3703 		val = statsp->brdcstrcv;
3704 		break;
3705 
3706 	case MAC_STAT_MULTIXMT:
3707 		val = statsp->multixmt;
3708 		break;
3709 
3710 	case MAC_STAT_BRDCSTXMT:
3711 		val = statsp->brdcstxmt;
3712 		break;
3713 
3714 	case MAC_STAT_NORCVBUF:
3715 		val = statsp->norcvbuf;
3716 		break;
3717 
3718 	case MAC_STAT_IERRORS:
3719 		val = statsp->ierrors;
3720 		break;
3721 
3722 	case MAC_STAT_NOXMTBUF:
3723 		val = statsp->noxmtbuf;
3724 		break;
3725 
3726 	case MAC_STAT_OERRORS:
3727 		val = statsp->oerrors;
3728 		break;
3729 
3730 	case MAC_STAT_COLLISIONS:
3731 		break;
3732 
3733 	case MAC_STAT_RBYTES:
3734 		val = statsp->rbytes;
3735 		break;
3736 
3737 	case MAC_STAT_IPACKETS:
3738 		val = statsp->ipackets;
3739 		break;
3740 
3741 	case MAC_STAT_OBYTES:
3742 		val = statsp->obytes;
3743 		break;
3744 
3745 	case MAC_STAT_OPACKETS:
3746 		val = statsp->opackets;
3747 		break;
3748 
3749 	/* stats not relevant to ldc, return 0 */
3750 	case MAC_STAT_IFSPEED:
3751 	case ETHER_STAT_ALIGN_ERRORS:
3752 	case ETHER_STAT_FCS_ERRORS:
3753 	case ETHER_STAT_FIRST_COLLISIONS:
3754 	case ETHER_STAT_MULTI_COLLISIONS:
3755 	case ETHER_STAT_DEFER_XMTS:
3756 	case ETHER_STAT_TX_LATE_COLLISIONS:
3757 	case ETHER_STAT_EX_COLLISIONS:
3758 	case ETHER_STAT_MACXMT_ERRORS:
3759 	case ETHER_STAT_CARRIER_ERRORS:
3760 	case ETHER_STAT_TOOLONG_ERRORS:
3761 	case ETHER_STAT_XCVR_ADDR:
3762 	case ETHER_STAT_XCVR_ID:
3763 	case ETHER_STAT_XCVR_INUSE:
3764 	case ETHER_STAT_CAP_1000FDX:
3765 	case ETHER_STAT_CAP_1000HDX:
3766 	case ETHER_STAT_CAP_100FDX:
3767 	case ETHER_STAT_CAP_100HDX:
3768 	case ETHER_STAT_CAP_10FDX:
3769 	case ETHER_STAT_CAP_10HDX:
3770 	case ETHER_STAT_CAP_ASMPAUSE:
3771 	case ETHER_STAT_CAP_PAUSE:
3772 	case ETHER_STAT_CAP_AUTONEG:
3773 	case ETHER_STAT_ADV_CAP_1000FDX:
3774 	case ETHER_STAT_ADV_CAP_1000HDX:
3775 	case ETHER_STAT_ADV_CAP_100FDX:
3776 	case ETHER_STAT_ADV_CAP_100HDX:
3777 	case ETHER_STAT_ADV_CAP_10FDX:
3778 	case ETHER_STAT_ADV_CAP_10HDX:
3779 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3780 	case ETHER_STAT_ADV_CAP_PAUSE:
3781 	case ETHER_STAT_ADV_CAP_AUTONEG:
3782 	case ETHER_STAT_LP_CAP_1000FDX:
3783 	case ETHER_STAT_LP_CAP_1000HDX:
3784 	case ETHER_STAT_LP_CAP_100FDX:
3785 	case ETHER_STAT_LP_CAP_100HDX:
3786 	case ETHER_STAT_LP_CAP_10FDX:
3787 	case ETHER_STAT_LP_CAP_10HDX:
3788 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3789 	case ETHER_STAT_LP_CAP_PAUSE:
3790 	case ETHER_STAT_LP_CAP_AUTONEG:
3791 	case ETHER_STAT_LINK_ASMPAUSE:
3792 	case ETHER_STAT_LINK_PAUSE:
3793 	case ETHER_STAT_LINK_AUTONEG:
3794 	case ETHER_STAT_LINK_DUPLEX:
3795 	default:
3796 		val = 0;
3797 		break;
3798 
3799 	}
3800 	return (val);
3801 }
3802 
3803 /*
3804  * LDC channel is UP, start handshake process with peer.
3805  */
3806 static void
3807 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3808 {
3809 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3810 
3811 	DBG1(vgenp, ldcp, "enter\n");
3812 
3813 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3814 
3815 	if (ldcp->portp != vgenp->vsw_portp) {
3816 		/*
3817 		 * As the channel is up, use this port from now on.
3818 		 */
3819 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3820 	}
3821 
3822 	/* Initialize local session id */
3823 	ldcp->local_sid = ddi_get_lbolt();
3824 
3825 	/* clear peer session id */
3826 	ldcp->peer_sid = 0;
3827 	ldcp->hretries = 0;
3828 
3829 	if (ldcp->hphase != VH_PHASE0) {
3830 		vgen_handshake_reset(ldcp);
3831 	}
3832 
3833 	/* Initiate Handshake process with peer ldc endpoint */
3834 	vgen_handshake(vh_nextphase(ldcp));
3835 
3836 	DBG1(vgenp, ldcp, "exit\n");
3837 }
3838 
3839 /*
3840  * LDC channel is Reset, terminate connection with peer and try to
3841  * bring the channel up again.
3842  */
3843 static void
3844 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3845 {
3846 	ldc_status_t istatus;
3847 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3848 	int	rv;
3849 
3850 	DBG1(vgenp, ldcp, "enter\n");
3851 
3852 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3853 
3854 	if ((ldcp->portp != vgenp->vsw_portp) &&
3855 	    (vgenp->vsw_portp != NULL)) {
3856 		/*
3857 		 * As the channel is down, use the switch port until
3858 		 * the channel becomes ready to be used.
3859 		 */
3860 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3861 	}
3862 
3863 	if (vgenp->vsw_portp == ldcp->portp) {
3864 		vio_net_report_err_t rep_err =
3865 		    ldcp->portp->vcb.vio_net_report_err;
3866 
3867 		/* Post a reset message */
3868 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3869 	}
3870 
3871 	if (ldcp->hphase != VH_PHASE0) {
3872 		vgen_handshake_reset(ldcp);
3873 	}
3874 
3875 	/* try to bring the channel up */
3876 #ifdef	VNET_IOC_DEBUG
3877 	if (ldcp->link_down_forced == B_FALSE) {
3878 		rv = ldc_up(ldcp->ldc_handle);
3879 		if (rv != 0) {
3880 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3881 		}
3882 	}
3883 #else
3884 	rv = ldc_up(ldcp->ldc_handle);
3885 	if (rv != 0) {
3886 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3887 	}
3888 #endif
3889 
3890 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3891 		DWARN(vgenp, ldcp, "ldc_status err\n");
3892 	} else {
3893 		ldcp->ldc_status = istatus;
3894 	}
3895 
3896 	/* if channel is already UP - restart handshake */
3897 	if (ldcp->ldc_status == LDC_UP) {
3898 		vgen_handle_evt_up(ldcp);
3899 	}
3900 
3901 	DBG1(vgenp, ldcp, "exit\n");
3902 }
3903 
3904 /* Interrupt handler for the channel */
3905 static uint_t
3906 vgen_ldc_cb(uint64_t event, caddr_t arg)
3907 {
3908 	_NOTE(ARGUNUSED(event))
3909 	vgen_ldc_t	*ldcp;
3910 	vgen_t		*vgenp;
3911 	ldc_status_t 	istatus;
3912 	vgen_stats_t	*statsp;
3913 	timeout_id_t	cancel_htid = 0;
3914 	uint_t		ret = LDC_SUCCESS;
3915 
3916 	ldcp = (vgen_ldc_t *)arg;
3917 	vgenp = LDC_TO_VGEN(ldcp);
3918 	statsp = &ldcp->stats;
3919 
3920 	DBG1(vgenp, ldcp, "enter\n");
3921 
3922 	mutex_enter(&ldcp->cblock);
3923 	statsp->callbacks++;
3924 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3925 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3926 		    ldcp->ldc_status);
3927 		mutex_exit(&ldcp->cblock);
3928 		return (LDC_SUCCESS);
3929 	}
3930 
3931 	/*
3932 	 * cache cancel_htid before the events specific
3933 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3934 	 * as it is also used to indicate the timer to quit immediately.
3935 	 */
3936 	cancel_htid = ldcp->cancel_htid;
3937 
3938 	/*
3939 	 * NOTE: not using switch() as event could be triggered by
3940 	 * a state change and a read request. Also the ordering	of the
3941 	 * check for the event types is deliberate.
3942 	 */
3943 	if (event & LDC_EVT_UP) {
3944 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3945 			DWARN(vgenp, ldcp, "ldc_status err\n");
3946 			/* status couldn't be determined */
3947 			ret = LDC_FAILURE;
3948 			goto ldc_cb_ret;
3949 		}
3950 		ldcp->ldc_status = istatus;
3951 		if (ldcp->ldc_status != LDC_UP) {
3952 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3953 			    " but ldc status is not UP(0x%x)\n",
3954 			    ldcp->ldc_status);
3955 			/* spurious interrupt, return success */
3956 			goto ldc_cb_ret;
3957 		}
3958 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3959 		    event, ldcp->ldc_status);
3960 
3961 		vgen_handle_evt_up(ldcp);
3962 
3963 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3964 	}
3965 
3966 	/* Handle RESET/DOWN before READ event */
3967 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3968 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3969 			DWARN(vgenp, ldcp, "ldc_status error\n");
3970 			/* status couldn't be determined */
3971 			ret = LDC_FAILURE;
3972 			goto ldc_cb_ret;
3973 		}
3974 		ldcp->ldc_status = istatus;
3975 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3976 		    event, ldcp->ldc_status);
3977 
3978 		vgen_handle_evt_reset(ldcp);
3979 
3980 		/*
3981 		 * As the channel is down/reset, ignore READ event
3982 		 * but print a debug warning message.
3983 		 */
3984 		if (event & LDC_EVT_READ) {
3985 			DWARN(vgenp, ldcp,
3986 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3987 			event &= ~LDC_EVT_READ;
3988 		}
3989 	}
3990 
3991 	if (event & LDC_EVT_READ) {
3992 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3993 		    event, ldcp->ldc_status);
3994 
3995 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3996 
3997 		if (ldcp->rcv_thread != NULL) {
3998 			/*
3999 			 * If the receive thread is enabled, then
4000 			 * wakeup the receive thread to process the
4001 			 * LDC messages.
4002 			 */
4003 			mutex_exit(&ldcp->cblock);
4004 			mutex_enter(&ldcp->rcv_thr_lock);
4005 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
4006 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
4007 				cv_signal(&ldcp->rcv_thr_cv);
4008 			}
4009 			mutex_exit(&ldcp->rcv_thr_lock);
4010 			mutex_enter(&ldcp->cblock);
4011 		} else  {
4012 			vgen_handle_evt_read(ldcp);
4013 		}
4014 	}
4015 
4016 ldc_cb_ret:
4017 	/*
4018 	 * Check to see if the status of cancel_htid has
4019 	 * changed. If another timer needs to be cancelled,
4020 	 * then let the next callback to clear it.
4021 	 */
4022 	if (cancel_htid == 0) {
4023 		cancel_htid = ldcp->cancel_htid;
4024 	}
4025 	mutex_exit(&ldcp->cblock);
4026 
4027 	if (cancel_htid) {
4028 		/*
4029 		 * Cancel handshake timer.
4030 		 * untimeout(9F) will not return until the pending callback is
4031 		 * cancelled or has run. No problems will result from calling
4032 		 * untimeout if the handler has already completed.
4033 		 * If the timeout handler did run, then it would just
4034 		 * return as cancel_htid is set.
4035 		 */
4036 		DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
4037 		(void) untimeout(cancel_htid);
4038 		mutex_enter(&ldcp->cblock);
4039 		/* clear it only if its the same as the one we cancelled */
4040 		if (ldcp->cancel_htid == cancel_htid) {
4041 			ldcp->cancel_htid = 0;
4042 		}
4043 		mutex_exit(&ldcp->cblock);
4044 	}
4045 	DBG1(vgenp, ldcp, "exit\n");
4046 	return (ret);
4047 }
4048 
4049 static void
4050 vgen_handle_evt_read(vgen_ldc_t *ldcp)
4051 {
4052 	int		rv;
4053 	uint64_t	*ldcmsg;
4054 	size_t		msglen;
4055 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4056 	vio_msg_tag_t	*tagp;
4057 	ldc_status_t 	istatus;
4058 	boolean_t 	has_data;
4059 
4060 	DBG1(vgenp, ldcp, "enter\n");
4061 
4062 	ldcmsg = ldcp->ldcmsg;
4063 	/*
4064 	 * If the receive thread is enabled, then the cblock
4065 	 * need to be acquired here. If not, the vgen_ldc_cb()
4066 	 * calls this function with cblock held already.
4067 	 */
4068 	if (ldcp->rcv_thread != NULL) {
4069 		mutex_enter(&ldcp->cblock);
4070 	} else {
4071 		ASSERT(MUTEX_HELD(&ldcp->cblock));
4072 	}
4073 
4074 vgen_evt_read:
4075 	do {
4076 		msglen = ldcp->msglen;
4077 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
4078 
4079 		if (rv != 0) {
4080 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
4081 			    rv, msglen);
4082 			if (rv == ECONNRESET)
4083 				goto vgen_evtread_error;
4084 			break;
4085 		}
4086 		if (msglen == 0) {
4087 			DBG2(vgenp, ldcp, "ldc_read NODATA");
4088 			break;
4089 		}
4090 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
4091 
4092 		tagp = (vio_msg_tag_t *)ldcmsg;
4093 
4094 		if (ldcp->peer_sid) {
4095 			/*
4096 			 * check sid only after we have received peer's sid
4097 			 * in the version negotiate msg.
4098 			 */
4099 #ifdef DEBUG
4100 			if (vgen_hdbg & HDBG_BAD_SID) {
4101 				/* simulate bad sid condition */
4102 				tagp->vio_sid = 0;
4103 				vgen_hdbg &= ~(HDBG_BAD_SID);
4104 			}
4105 #endif
4106 			rv = vgen_check_sid(ldcp, tagp);
4107 			if (rv != VGEN_SUCCESS) {
4108 				/*
4109 				 * If sid mismatch is detected,
4110 				 * reset the channel.
4111 				 */
4112 				goto vgen_evtread_error;
4113 			}
4114 		}
4115 
4116 		switch (tagp->vio_msgtype) {
4117 		case VIO_TYPE_CTRL:
4118 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
4119 			break;
4120 
4121 		case VIO_TYPE_DATA:
4122 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
4123 			break;
4124 
4125 		case VIO_TYPE_ERR:
4126 			vgen_handle_errmsg(ldcp, tagp);
4127 			break;
4128 
4129 		default:
4130 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
4131 			    tagp->vio_msgtype);
4132 			break;
4133 		}
4134 
4135 		/*
4136 		 * If an error is encountered, stop processing and
4137 		 * handle the error.
4138 		 */
4139 		if (rv != 0) {
4140 			goto vgen_evtread_error;
4141 		}
4142 
4143 	} while (msglen);
4144 
4145 	/* check once more before exiting */
4146 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
4147 	if ((rv == 0) && (has_data == B_TRUE)) {
4148 		DTRACE_PROBE(vgen_chkq);
4149 		goto vgen_evt_read;
4150 	}
4151 
4152 vgen_evtread_error:
4153 	if (rv == ECONNRESET) {
4154 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4155 			DWARN(vgenp, ldcp, "ldc_status err\n");
4156 		} else {
4157 			ldcp->ldc_status = istatus;
4158 		}
4159 		vgen_handle_evt_reset(ldcp);
4160 	} else if (rv) {
4161 		vgen_ldc_reset(ldcp);
4162 	}
4163 
4164 	/*
4165 	 * If the receive thread is enabled, then cancel the
4166 	 * handshake timeout here.
4167 	 */
4168 	if (ldcp->rcv_thread != NULL) {
4169 		timeout_id_t cancel_htid = ldcp->cancel_htid;
4170 
4171 		mutex_exit(&ldcp->cblock);
4172 		if (cancel_htid) {
4173 			/*
4174 			 * Cancel handshake timer. untimeout(9F) will
4175 			 * not return until the pending callback is cancelled
4176 			 * or has run. No problems will result from calling
4177 			 * untimeout if the handler has already completed.
4178 			 * If the timeout handler did run, then it would just
4179 			 * return as cancel_htid is set.
4180 			 */
4181 			DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
4182 			(void) untimeout(cancel_htid);
4183 
4184 			/*
4185 			 * clear it only if its the same as the one we
4186 			 * cancelled
4187 			 */
4188 			mutex_enter(&ldcp->cblock);
4189 			if (ldcp->cancel_htid == cancel_htid) {
4190 				ldcp->cancel_htid = 0;
4191 			}
4192 			mutex_exit(&ldcp->cblock);
4193 		}
4194 	}
4195 
4196 	DBG1(vgenp, ldcp, "exit\n");
4197 }
4198 
4199 /* vgen handshake functions */
4200 
4201 /* change the hphase for the channel to the next phase */
4202 static vgen_ldc_t *
4203 vh_nextphase(vgen_ldc_t *ldcp)
4204 {
4205 	if (ldcp->hphase == VH_PHASE3) {
4206 		ldcp->hphase = VH_DONE;
4207 	} else {
4208 		ldcp->hphase++;
4209 	}
4210 	return (ldcp);
4211 }
4212 
4213 /*
4214  * wrapper routine to send the given message over ldc using ldc_write().
4215  */
4216 static int
4217 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4218     boolean_t caller_holds_lock)
4219 {
4220 	int			rv;
4221 	size_t			len;
4222 	uint32_t		retries = 0;
4223 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4224 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4225 	vio_dring_msg_t		*dmsg;
4226 	vio_raw_data_msg_t	*rmsg;
4227 	boolean_t		data_msg = B_FALSE;
4228 
4229 	len = msglen;
4230 	if ((len == 0) || (msg == NULL))
4231 		return (VGEN_FAILURE);
4232 
4233 	if (!caller_holds_lock) {
4234 		mutex_enter(&ldcp->wrlock);
4235 	}
4236 
4237 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4238 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4239 			dmsg = (vio_dring_msg_t *)tagp;
4240 			dmsg->seq_num = ldcp->next_txseq;
4241 			data_msg = B_TRUE;
4242 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4243 			rmsg = (vio_raw_data_msg_t *)tagp;
4244 			rmsg->seq_num = ldcp->next_txseq;
4245 			data_msg = B_TRUE;
4246 		}
4247 	}
4248 
4249 	do {
4250 		len = msglen;
4251 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4252 		if (retries++ >= vgen_ldcwr_retries)
4253 			break;
4254 	} while (rv == EWOULDBLOCK);
4255 
4256 	if (rv == 0 && data_msg == B_TRUE) {
4257 		ldcp->next_txseq++;
4258 	}
4259 
4260 	if (!caller_holds_lock) {
4261 		mutex_exit(&ldcp->wrlock);
4262 	}
4263 
4264 	if (rv != 0) {
4265 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4266 		    rv, msglen);
4267 		return (rv);
4268 	}
4269 
4270 	if (len != msglen) {
4271 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4272 		    rv, msglen);
4273 		return (VGEN_FAILURE);
4274 	}
4275 
4276 	return (VGEN_SUCCESS);
4277 }
4278 
4279 /* send version negotiate message to the peer over ldc */
4280 static int
4281 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4282 {
4283 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4284 	vio_ver_msg_t	vermsg;
4285 	vio_msg_tag_t	*tagp = &vermsg.tag;
4286 	int		rv;
4287 
4288 	bzero(&vermsg, sizeof (vermsg));
4289 
4290 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4291 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4292 	tagp->vio_subtype_env = VIO_VER_INFO;
4293 	tagp->vio_sid = ldcp->local_sid;
4294 
4295 	/* get version msg payload from ldcp->local */
4296 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4297 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4298 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4299 
4300 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4301 	if (rv != VGEN_SUCCESS) {
4302 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4303 		return (rv);
4304 	}
4305 
4306 	ldcp->hstate |= VER_INFO_SENT;
4307 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4308 	    vermsg.ver_major, vermsg.ver_minor);
4309 
4310 	return (VGEN_SUCCESS);
4311 }
4312 
4313 /* send attr info message to the peer over ldc */
4314 static int
4315 vgen_send_attr_info(vgen_ldc_t *ldcp)
4316 {
4317 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4318 	vnet_attr_msg_t	attrmsg;
4319 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4320 	int		rv;
4321 
4322 	bzero(&attrmsg, sizeof (attrmsg));
4323 
4324 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4325 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4326 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4327 	tagp->vio_sid = ldcp->local_sid;
4328 
4329 	/* get attr msg payload from ldcp->local */
4330 	attrmsg.mtu = ldcp->local_hparams.mtu;
4331 	attrmsg.addr = ldcp->local_hparams.addr;
4332 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4333 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4334 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4335 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
4336 
4337 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4338 	if (rv != VGEN_SUCCESS) {
4339 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4340 		return (rv);
4341 	}
4342 
4343 	ldcp->hstate |= ATTR_INFO_SENT;
4344 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4345 
4346 	return (VGEN_SUCCESS);
4347 }
4348 
4349 /* send descriptor ring register message to the peer over ldc */
4350 static int
4351 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4352 {
4353 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4354 	vio_dring_reg_msg_t	msg;
4355 	vio_msg_tag_t		*tagp = &msg.tag;
4356 	int		rv;
4357 
4358 	bzero(&msg, sizeof (msg));
4359 
4360 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4361 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4362 	tagp->vio_subtype_env = VIO_DRING_REG;
4363 	tagp->vio_sid = ldcp->local_sid;
4364 
4365 	/* get dring info msg payload from ldcp->local */
4366 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4367 	    sizeof (ldc_mem_cookie_t));
4368 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4369 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4370 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4371 
4372 	/*
4373 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4374 	 * value and sends it in the ack, which is saved in
4375 	 * vgen_handle_dring_reg().
4376 	 */
4377 	msg.dring_ident = 0;
4378 
4379 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4380 	if (rv != VGEN_SUCCESS) {
4381 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4382 		return (rv);
4383 	}
4384 
4385 	ldcp->hstate |= DRING_INFO_SENT;
4386 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4387 
4388 	return (VGEN_SUCCESS);
4389 }
4390 
4391 static int
4392 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4393 {
4394 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4395 	vio_rdx_msg_t	rdxmsg;
4396 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4397 	int		rv;
4398 
4399 	bzero(&rdxmsg, sizeof (rdxmsg));
4400 
4401 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4402 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4403 	tagp->vio_subtype_env = VIO_RDX;
4404 	tagp->vio_sid = ldcp->local_sid;
4405 
4406 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4407 	if (rv != VGEN_SUCCESS) {
4408 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4409 		return (rv);
4410 	}
4411 
4412 	ldcp->hstate |= RDX_INFO_SENT;
4413 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4414 
4415 	return (VGEN_SUCCESS);
4416 }
4417 
4418 /* send descriptor ring data message to the peer over ldc */
4419 static int
4420 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4421 {
4422 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4423 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4424 	vio_msg_tag_t	*tagp = &msgp->tag;
4425 	vgen_stats_t	*statsp = &ldcp->stats;
4426 	int		rv;
4427 
4428 	bzero(msgp, sizeof (*msgp));
4429 
4430 	tagp->vio_msgtype = VIO_TYPE_DATA;
4431 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4432 	tagp->vio_subtype_env = VIO_DRING_DATA;
4433 	tagp->vio_sid = ldcp->local_sid;
4434 
4435 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4436 	msgp->start_idx = start;
4437 	msgp->end_idx = end;
4438 
4439 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4440 	if (rv != VGEN_SUCCESS) {
4441 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4442 		return (rv);
4443 	}
4444 
4445 	statsp->dring_data_msgs++;
4446 
4447 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4448 
4449 	return (VGEN_SUCCESS);
4450 }
4451 
4452 /* send multicast addr info message to vsw */
4453 static int
4454 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4455 {
4456 	vnet_mcast_msg_t	mcastmsg;
4457 	vnet_mcast_msg_t	*msgp;
4458 	vio_msg_tag_t		*tagp;
4459 	vgen_t			*vgenp;
4460 	struct ether_addr	*mca;
4461 	int			rv;
4462 	int			i;
4463 	uint32_t		size;
4464 	uint32_t		mccount;
4465 	uint32_t		n;
4466 
4467 	msgp = &mcastmsg;
4468 	tagp = &msgp->tag;
4469 	vgenp = LDC_TO_VGEN(ldcp);
4470 
4471 	mccount = vgenp->mccount;
4472 	i = 0;
4473 
4474 	do {
4475 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4476 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4477 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4478 		tagp->vio_sid = ldcp->local_sid;
4479 
4480 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4481 		size = n * sizeof (struct ether_addr);
4482 
4483 		mca = &(vgenp->mctab[i]);
4484 		bcopy(mca, (msgp->mca), size);
4485 		msgp->set = B_TRUE;
4486 		msgp->count = n;
4487 
4488 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4489 		    B_FALSE);
4490 		if (rv != VGEN_SUCCESS) {
4491 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4492 			return (rv);
4493 		}
4494 
4495 		mccount -= n;
4496 		i += n;
4497 
4498 	} while (mccount);
4499 
4500 	return (VGEN_SUCCESS);
4501 }
4502 
4503 /* Initiate Phase 2 of handshake */
4504 static int
4505 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4506 {
4507 	int rv;
4508 	uint32_t ncookies = 0;
4509 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4510 
4511 #ifdef DEBUG
4512 	if (vgen_hdbg & HDBG_OUT_STATE) {
4513 		/* simulate out of state condition */
4514 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4515 		rv = vgen_send_rdx_info(ldcp);
4516 		return (rv);
4517 	}
4518 	if (vgen_hdbg & HDBG_TIMEOUT) {
4519 		/* simulate timeout condition */
4520 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4521 		return (VGEN_SUCCESS);
4522 	}
4523 #endif
4524 	rv = vgen_send_attr_info(ldcp);
4525 	if (rv != VGEN_SUCCESS) {
4526 		return (rv);
4527 	}
4528 
4529 	/* Bind descriptor ring to the channel */
4530 	if (ldcp->num_txdcookies == 0) {
4531 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4532 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4533 		    &ldcp->tx_dcookie, &ncookies);
4534 		if (rv != 0) {
4535 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4536 			    "rv(%x)\n", rv);
4537 			return (rv);
4538 		}
4539 		ASSERT(ncookies == 1);
4540 		ldcp->num_txdcookies = ncookies;
4541 	}
4542 
4543 	/* update local dring_info params */
4544 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4545 	    sizeof (ldc_mem_cookie_t));
4546 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4547 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4548 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4549 
4550 	rv = vgen_send_dring_reg(ldcp);
4551 	if (rv != VGEN_SUCCESS) {
4552 		return (rv);
4553 	}
4554 
4555 	return (VGEN_SUCCESS);
4556 }
4557 
4558 /*
4559  * Set vnet-protocol-version dependent functions based on version.
4560  */
4561 static void
4562 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4563 {
4564 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4565 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4566 
4567 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
4568 		vgen_port_t	*portp = ldcp->portp;
4569 		vnet_t		*vnetp = vgenp->vnetp;
4570 		/*
4571 		 * If the version negotiated with vswitch is >= 1.5 (link
4572 		 * status update support), set the required bits in our
4573 		 * attributes if this vnet device has been configured to get
4574 		 * physical link state updates.
4575 		 */
4576 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
4577 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
4578 		} else {
4579 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
4580 		}
4581 	}
4582 
4583 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4584 		/*
4585 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4586 		 * Support), set the mtu in our attributes to max_frame_size.
4587 		 */
4588 		lp->mtu = vgenp->max_frame_size;
4589 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4590 		/*
4591 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4592 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4593 		 */
4594 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4595 	} else {
4596 		vgen_port_t	*portp = ldcp->portp;
4597 		vnet_t		*vnetp = vgenp->vnetp;
4598 		/*
4599 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4600 		 * We can negotiate that size with those peers provided the
4601 		 * following conditions are true:
4602 		 * - Only pvid is defined for our peer and there are no vids.
4603 		 * - pvids are equal.
4604 		 * If the above conditions are true, then we can send/recv only
4605 		 * untagged frames of max size ETHERMAX.
4606 		 */
4607 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4608 			lp->mtu = ETHERMAX;
4609 		}
4610 	}
4611 
4612 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4613 		/* Versions >= 1.2 */
4614 
4615 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4616 			/*
4617 			 * enable priority routines and pkt mode only if
4618 			 * at least one pri-eth-type is specified in MD.
4619 			 */
4620 
4621 			ldcp->tx = vgen_ldcsend;
4622 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4623 
4624 			/* set xfer mode for vgen_send_attr_info() */
4625 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4626 
4627 		} else {
4628 			/* no priority eth types defined in MD */
4629 
4630 			ldcp->tx = vgen_ldcsend_dring;
4631 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4632 
4633 			/* set xfer mode for vgen_send_attr_info() */
4634 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4635 
4636 		}
4637 	} else {
4638 		/* Versions prior to 1.2  */
4639 
4640 		vgen_reset_vnet_proto_ops(ldcp);
4641 	}
4642 }
4643 
4644 /*
4645  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4646  */
4647 static void
4648 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4649 {
4650 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4651 
4652 	ldcp->tx = vgen_ldcsend_dring;
4653 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4654 
4655 	/* set xfer mode for vgen_send_attr_info() */
4656 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4657 }
4658 
4659 static void
4660 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4661 {
4662 	vgen_ldclist_t	*ldclp;
4663 	vgen_ldc_t	*ldcp;
4664 	vgen_t		*vgenp = portp->vgenp;
4665 	vnet_t		*vnetp = vgenp->vnetp;
4666 
4667 	ldclp = &portp->ldclist;
4668 
4669 	READ_ENTER(&ldclp->rwlock);
4670 
4671 	/*
4672 	 * NOTE: for now, we will assume we have a single channel.
4673 	 */
4674 	if (ldclp->headp == NULL) {
4675 		RW_EXIT(&ldclp->rwlock);
4676 		return;
4677 	}
4678 	ldcp = ldclp->headp;
4679 
4680 	mutex_enter(&ldcp->cblock);
4681 
4682 	/*
4683 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4684 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4685 	 */
4686 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4687 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4688 		vgen_ldc_reset(ldcp);
4689 	}
4690 
4691 	mutex_exit(&ldcp->cblock);
4692 
4693 	RW_EXIT(&ldclp->rwlock);
4694 }
4695 
4696 static void
4697 vgen_port_reset(vgen_port_t *portp)
4698 {
4699 	vgen_ldclist_t	*ldclp;
4700 	vgen_ldc_t	*ldcp;
4701 
4702 	ldclp = &portp->ldclist;
4703 
4704 	READ_ENTER(&ldclp->rwlock);
4705 
4706 	/*
4707 	 * NOTE: for now, we will assume we have a single channel.
4708 	 */
4709 	if (ldclp->headp == NULL) {
4710 		RW_EXIT(&ldclp->rwlock);
4711 		return;
4712 	}
4713 	ldcp = ldclp->headp;
4714 
4715 	mutex_enter(&ldcp->cblock);
4716 
4717 	vgen_ldc_reset(ldcp);
4718 
4719 	mutex_exit(&ldcp->cblock);
4720 
4721 	RW_EXIT(&ldclp->rwlock);
4722 }
4723 
4724 static void
4725 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4726 {
4727 	vgen_port_t	*portp;
4728 	vgen_portlist_t	*plistp;
4729 
4730 	plistp = &(vgenp->vgenports);
4731 	READ_ENTER(&plistp->rwlock);
4732 
4733 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4734 
4735 		vgen_vlan_unaware_port_reset(portp);
4736 
4737 	}
4738 
4739 	RW_EXIT(&plistp->rwlock);
4740 }
4741 
4742 static void
4743 vgen_reset_vsw_port(vgen_t *vgenp)
4744 {
4745 	vgen_port_t	*portp;
4746 
4747 	if ((portp = vgenp->vsw_portp) != NULL) {
4748 		vgen_port_reset(portp);
4749 	}
4750 }
4751 
4752 /*
4753  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4754  * This can happen after a channel comes up (status: LDC_UP) or
4755  * when handshake gets terminated due to various conditions.
4756  */
4757 static void
4758 vgen_reset_hphase(vgen_ldc_t *ldcp)
4759 {
4760 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4761 	ldc_status_t istatus;
4762 	int rv;
4763 
4764 	DBG1(vgenp, ldcp, "enter\n");
4765 	/* reset hstate and hphase */
4766 	ldcp->hstate = 0;
4767 	ldcp->hphase = VH_PHASE0;
4768 
4769 	vgen_reset_vnet_proto_ops(ldcp);
4770 
4771 	/*
4772 	 * Save the id of pending handshake timer in cancel_htid.
4773 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4774 	 * be cancelled after releasing cblock.
4775 	 */
4776 	if (ldcp->htid) {
4777 		ldcp->cancel_htid = ldcp->htid;
4778 		ldcp->htid = 0;
4779 	}
4780 
4781 	if (ldcp->local_hparams.dring_ready) {
4782 		ldcp->local_hparams.dring_ready = B_FALSE;
4783 	}
4784 
4785 	/* Unbind tx descriptor ring from the channel */
4786 	if (ldcp->num_txdcookies) {
4787 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4788 		if (rv != 0) {
4789 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4790 		}
4791 		ldcp->num_txdcookies = 0;
4792 	}
4793 
4794 	if (ldcp->peer_hparams.dring_ready) {
4795 		ldcp->peer_hparams.dring_ready = B_FALSE;
4796 		/* Unmap peer's dring */
4797 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4798 		vgen_clobber_rxds(ldcp);
4799 	}
4800 
4801 	vgen_clobber_tbufs(ldcp);
4802 
4803 	/*
4804 	 * clear local handshake params and initialize.
4805 	 */
4806 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4807 
4808 	/* set version to the highest version supported */
4809 	ldcp->local_hparams.ver_major =
4810 	    ldcp->vgen_versions[0].ver_major;
4811 	ldcp->local_hparams.ver_minor =
4812 	    ldcp->vgen_versions[0].ver_minor;
4813 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4814 
4815 	/* set attr_info params */
4816 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4817 	ldcp->local_hparams.addr =
4818 	    vnet_macaddr_strtoul(vgenp->macaddr);
4819 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4820 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4821 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4822 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
4823 
4824 	/*
4825 	 * Note: dring is created, but not bound yet.
4826 	 * local dring_info params will be updated when we bind the dring in
4827 	 * vgen_handshake_phase2().
4828 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4829 	 * value and sends it in the ack, which is saved in
4830 	 * vgen_handle_dring_reg().
4831 	 */
4832 	ldcp->local_hparams.dring_ident = 0;
4833 
4834 	/* clear peer_hparams */
4835 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4836 
4837 	/* reset the channel if required */
4838 #ifdef	VNET_IOC_DEBUG
4839 	if (ldcp->need_ldc_reset && !ldcp->link_down_forced) {
4840 #else
4841 	if (ldcp->need_ldc_reset) {
4842 #endif
4843 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4844 		ldcp->need_ldc_reset = B_FALSE;
4845 		(void) ldc_down(ldcp->ldc_handle);
4846 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4847 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4848 		ldcp->ldc_status = istatus;
4849 
4850 		/* clear sids */
4851 		ldcp->local_sid = 0;
4852 		ldcp->peer_sid = 0;
4853 
4854 		/* try to bring the channel up */
4855 		rv = ldc_up(ldcp->ldc_handle);
4856 		if (rv != 0) {
4857 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4858 		}
4859 
4860 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4861 			DWARN(vgenp, ldcp, "ldc_status err\n");
4862 		} else {
4863 			ldcp->ldc_status = istatus;
4864 		}
4865 	}
4866 }
4867 
4868 /* wrapper function for vgen_reset_hphase */
4869 static void
4870 vgen_handshake_reset(vgen_ldc_t *ldcp)
4871 {
4872 	vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
4873 
4874 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4875 	mutex_enter(&ldcp->rxlock);
4876 	mutex_enter(&ldcp->wrlock);
4877 	mutex_enter(&ldcp->txlock);
4878 	mutex_enter(&ldcp->tclock);
4879 
4880 	vgen_reset_hphase(ldcp);
4881 
4882 	mutex_exit(&ldcp->tclock);
4883 	mutex_exit(&ldcp->txlock);
4884 	mutex_exit(&ldcp->wrlock);
4885 	mutex_exit(&ldcp->rxlock);
4886 
4887 	/*
4888 	 * As the connection is now reset, mark the channel
4889 	 * link_state as 'down' and notify the stack if needed.
4890 	 */
4891 	if (ldcp->link_state != LINK_STATE_DOWN) {
4892 		ldcp->link_state = LINK_STATE_DOWN;
4893 
4894 		if (ldcp->portp == vgenp->vsw_portp) { /* vswitch port ? */
4895 			/*
4896 			 * As the channel link is down, mark physical link also
4897 			 * as down. After the channel comes back up and
4898 			 * handshake completes, we will get an update on the
4899 			 * physlink state from vswitch (if this device has been
4900 			 * configured to get phys link updates).
4901 			 */
4902 			vgenp->phys_link_state = LINK_STATE_DOWN;
4903 
4904 			/* Now update the stack */
4905 			mutex_exit(&ldcp->cblock);
4906 			vgen_link_update(vgenp, ldcp->link_state);
4907 			mutex_enter(&ldcp->cblock);
4908 		}
4909 	}
4910 }
4911 
4912 /*
4913  * Initiate handshake with the peer by sending various messages
4914  * based on the handshake-phase that the channel is currently in.
4915  */
4916 static void
4917 vgen_handshake(vgen_ldc_t *ldcp)
4918 {
4919 	uint32_t	hphase = ldcp->hphase;
4920 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4921 	ldc_status_t	istatus;
4922 	int		rv = 0;
4923 
4924 	switch (hphase) {
4925 
4926 	case VH_PHASE1:
4927 
4928 		/*
4929 		 * start timer, for entire handshake process, turn this timer
4930 		 * off if all phases of handshake complete successfully and
4931 		 * hphase goes to VH_DONE(below) or
4932 		 * vgen_reset_hphase() gets called or
4933 		 * channel is reset due to errors or
4934 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4935 		 */
4936 		ASSERT(ldcp->htid == 0);
4937 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4938 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4939 
4940 		/* Phase 1 involves negotiating the version */
4941 		rv = vgen_send_version_negotiate(ldcp);
4942 		break;
4943 
4944 	case VH_PHASE2:
4945 		rv = vgen_handshake_phase2(ldcp);
4946 		break;
4947 
4948 	case VH_PHASE3:
4949 		rv = vgen_send_rdx_info(ldcp);
4950 		break;
4951 
4952 	case VH_DONE:
4953 		/*
4954 		 * Save the id of pending handshake timer in cancel_htid.
4955 		 * This will be checked in vgen_ldc_cb() and the handshake
4956 		 * timer will be cancelled after releasing cblock.
4957 		 */
4958 		if (ldcp->htid) {
4959 			ldcp->cancel_htid = ldcp->htid;
4960 			ldcp->htid = 0;
4961 		}
4962 		ldcp->hretries = 0;
4963 		DBG1(vgenp, ldcp, "Handshake Done\n");
4964 
4965 		/*
4966 		 * The channel is up and handshake is done successfully. Now we
4967 		 * can mark the channel link_state as 'up'. We also notify the
4968 		 * stack if the channel is connected to vswitch.
4969 		 */
4970 		ldcp->link_state = LINK_STATE_UP;
4971 
4972 		if (ldcp->portp == vgenp->vsw_portp) {
4973 			/*
4974 			 * If this channel(port) is connected to vsw,
4975 			 * need to sync multicast table with vsw.
4976 			 */
4977 			mutex_exit(&ldcp->cblock);
4978 
4979 			mutex_enter(&vgenp->lock);
4980 			rv = vgen_send_mcast_info(ldcp);
4981 			mutex_exit(&vgenp->lock);
4982 
4983 			if (vgenp->pls_negotiated == B_FALSE) {
4984 				/*
4985 				 * We haven't negotiated with vswitch to get
4986 				 * physical link state updates. We can update
4987 				 * update the stack at this point as the
4988 				 * channel to vswitch is up and the handshake
4989 				 * is done successfully.
4990 				 *
4991 				 * If we have negotiated to get physical link
4992 				 * state updates, then we won't notify the
4993 				 * the stack here; we do that as soon as
4994 				 * vswitch sends us the initial phys link state
4995 				 * (see vgen_handle_physlink_info()).
4996 				 */
4997 				vgen_link_update(vgenp, ldcp->link_state);
4998 			}
4999 
5000 			mutex_enter(&ldcp->cblock);
5001 			if (rv != VGEN_SUCCESS)
5002 				break;
5003 		}
5004 
5005 		/*
5006 		 * Check if mac layer should be notified to restart
5007 		 * transmissions. This can happen if the channel got
5008 		 * reset and vgen_clobber_tbufs() is called, while
5009 		 * need_resched is set.
5010 		 */
5011 		mutex_enter(&ldcp->tclock);
5012 		if (ldcp->need_resched) {
5013 			vio_net_tx_update_t vtx_update =
5014 			    ldcp->portp->vcb.vio_net_tx_update;
5015 
5016 			ldcp->need_resched = B_FALSE;
5017 			vtx_update(ldcp->portp->vhp);
5018 		}
5019 		mutex_exit(&ldcp->tclock);
5020 
5021 		break;
5022 
5023 	default:
5024 		break;
5025 	}
5026 
5027 	if (rv == ECONNRESET) {
5028 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5029 			DWARN(vgenp, ldcp, "ldc_status err\n");
5030 		} else {
5031 			ldcp->ldc_status = istatus;
5032 		}
5033 		vgen_handle_evt_reset(ldcp);
5034 	} else if (rv) {
5035 		vgen_handshake_reset(ldcp);
5036 	}
5037 }
5038 
5039 /*
5040  * Check if the current handshake phase has completed successfully and
5041  * return the status.
5042  */
5043 static int
5044 vgen_handshake_done(vgen_ldc_t *ldcp)
5045 {
5046 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5047 	uint32_t	hphase = ldcp->hphase;
5048 	int 		status = 0;
5049 
5050 	switch (hphase) {
5051 
5052 	case VH_PHASE1:
5053 		/*
5054 		 * Phase1 is done, if version negotiation
5055 		 * completed successfully.
5056 		 */
5057 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
5058 		    VER_NEGOTIATED);
5059 		break;
5060 
5061 	case VH_PHASE2:
5062 		/*
5063 		 * Phase 2 is done, if attr info and dring info
5064 		 * have been exchanged successfully.
5065 		 */
5066 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
5067 		    ATTR_INFO_EXCHANGED) &&
5068 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
5069 		    DRING_INFO_EXCHANGED));
5070 		break;
5071 
5072 	case VH_PHASE3:
5073 		/* Phase 3 is done, if rdx msg has been exchanged */
5074 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
5075 		    RDX_EXCHANGED);
5076 		break;
5077 
5078 	default:
5079 		break;
5080 	}
5081 
5082 	if (status == 0) {
5083 		return (VGEN_FAILURE);
5084 	}
5085 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
5086 	return (VGEN_SUCCESS);
5087 }
5088 
5089 /* retry handshake on failure */
5090 static void
5091 vgen_handshake_retry(vgen_ldc_t *ldcp)
5092 {
5093 	/* reset handshake phase */
5094 	vgen_handshake_reset(ldcp);
5095 
5096 	/* handshake retry is specified and the channel is UP */
5097 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
5098 		if (ldcp->hretries++ < vgen_max_hretries) {
5099 			ldcp->local_sid = ddi_get_lbolt();
5100 			vgen_handshake(vh_nextphase(ldcp));
5101 		}
5102 	}
5103 }
5104 
5105 
5106 /*
5107  * Link State Update Notes:
5108  * The link state of the channel connected to vswitch is reported as the link
5109  * state of the vnet device, by default. If the channel is down or reset, then
5110  * the link state is marked 'down'. If the channel is 'up' *and* handshake
5111  * between the vnet and vswitch is successful, then the link state is marked
5112  * 'up'. If physical network link state is desired, then the vnet device must
5113  * be configured to get physical link updates and the 'linkprop' property
5114  * in the virtual-device MD node indicates this. As part of attribute exchange
5115  * the vnet device negotiates with the vswitch to obtain physical link state
5116  * updates. If it successfully negotiates, vswitch sends an initial physlink
5117  * msg once the handshake is done and further whenever the physical link state
5118  * changes. Currently we don't have mac layer interfaces to report two distinct
5119  * link states - virtual and physical. Thus, if the vnet has been configured to
5120  * get physical link updates, then the link status will be reported as 'up'
5121  * only when both the virtual and physical links are up.
5122  */
5123 static void
5124 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
5125 {
5126 	vnet_link_update(vgenp->vnetp, link_state);
5127 }
5128 
5129 /*
5130  * Handle a version info msg from the peer or an ACK/NACK from the peer
5131  * to a version info msg that we sent.
5132  */
5133 static int
5134 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5135 {
5136 	vgen_t		*vgenp;
5137 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
5138 	int		ack = 0;
5139 	int		failed = 0;
5140 	int		idx;
5141 	vgen_ver_t	*versions = ldcp->vgen_versions;
5142 	int		rv = 0;
5143 
5144 	vgenp = LDC_TO_VGEN(ldcp);
5145 	DBG1(vgenp, ldcp, "enter\n");
5146 	switch (tagp->vio_subtype) {
5147 	case VIO_SUBTYPE_INFO:
5148 
5149 		/*  Cache sid of peer if this is the first time */
5150 		if (ldcp->peer_sid == 0) {
5151 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
5152 			    tagp->vio_sid);
5153 			ldcp->peer_sid = tagp->vio_sid;
5154 		}
5155 
5156 		if (ldcp->hphase != VH_PHASE1) {
5157 			/*
5158 			 * If we are not already in VH_PHASE1, reset to
5159 			 * pre-handshake state, and initiate handshake
5160 			 * to the peer too.
5161 			 */
5162 			vgen_handshake_reset(ldcp);
5163 			vgen_handshake(vh_nextphase(ldcp));
5164 		}
5165 		ldcp->hstate |= VER_INFO_RCVD;
5166 
5167 		/* save peer's requested values */
5168 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
5169 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
5170 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
5171 
5172 		if ((vermsg->dev_class != VDEV_NETWORK) &&
5173 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
5174 			/* unsupported dev_class, send NACK */
5175 
5176 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5177 
5178 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5179 			tagp->vio_sid = ldcp->local_sid;
5180 			/* send reply msg back to peer */
5181 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5182 			    sizeof (*vermsg), B_FALSE);
5183 			if (rv != VGEN_SUCCESS) {
5184 				return (rv);
5185 			}
5186 			return (VGEN_FAILURE);
5187 		}
5188 
5189 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
5190 		    vermsg->ver_major,  vermsg->ver_minor);
5191 
5192 		idx = 0;
5193 
5194 		for (;;) {
5195 
5196 			if (vermsg->ver_major > versions[idx].ver_major) {
5197 
5198 				/* nack with next lower version */
5199 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5200 				vermsg->ver_major = versions[idx].ver_major;
5201 				vermsg->ver_minor = versions[idx].ver_minor;
5202 				break;
5203 			}
5204 
5205 			if (vermsg->ver_major == versions[idx].ver_major) {
5206 
5207 				/* major version match - ACK version */
5208 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
5209 				ack = 1;
5210 
5211 				/*
5212 				 * lower minor version to the one this endpt
5213 				 * supports, if necessary
5214 				 */
5215 				if (vermsg->ver_minor >
5216 				    versions[idx].ver_minor) {
5217 					vermsg->ver_minor =
5218 					    versions[idx].ver_minor;
5219 					ldcp->peer_hparams.ver_minor =
5220 					    versions[idx].ver_minor;
5221 				}
5222 				break;
5223 			}
5224 
5225 			idx++;
5226 
5227 			if (idx == VGEN_NUM_VER) {
5228 
5229 				/* no version match - send NACK */
5230 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5231 				vermsg->ver_major = 0;
5232 				vermsg->ver_minor = 0;
5233 				failed = 1;
5234 				break;
5235 			}
5236 
5237 		}
5238 
5239 		tagp->vio_sid = ldcp->local_sid;
5240 
5241 		/* send reply msg back to peer */
5242 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
5243 		    B_FALSE);
5244 		if (rv != VGEN_SUCCESS) {
5245 			return (rv);
5246 		}
5247 
5248 		if (ack) {
5249 			ldcp->hstate |= VER_ACK_SENT;
5250 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
5251 			    vermsg->ver_major, vermsg->ver_minor);
5252 		}
5253 		if (failed) {
5254 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
5255 			return (VGEN_FAILURE);
5256 		}
5257 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5258 
5259 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5260 
5261 			/* local and peer versions match? */
5262 			ASSERT((ldcp->local_hparams.ver_major ==
5263 			    ldcp->peer_hparams.ver_major) &&
5264 			    (ldcp->local_hparams.ver_minor ==
5265 			    ldcp->peer_hparams.ver_minor));
5266 
5267 			vgen_set_vnet_proto_ops(ldcp);
5268 
5269 			/* move to the next phase */
5270 			vgen_handshake(vh_nextphase(ldcp));
5271 		}
5272 
5273 		break;
5274 
5275 	case VIO_SUBTYPE_ACK:
5276 
5277 		if (ldcp->hphase != VH_PHASE1) {
5278 			/*  This should not happen. */
5279 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
5280 			return (VGEN_FAILURE);
5281 		}
5282 
5283 		/* SUCCESS - we have agreed on a version */
5284 		ldcp->local_hparams.ver_major = vermsg->ver_major;
5285 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
5286 		ldcp->hstate |= VER_ACK_RCVD;
5287 
5288 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
5289 		    vermsg->ver_major,  vermsg->ver_minor);
5290 
5291 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5292 
5293 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5294 
5295 			/* local and peer versions match? */
5296 			ASSERT((ldcp->local_hparams.ver_major ==
5297 			    ldcp->peer_hparams.ver_major) &&
5298 			    (ldcp->local_hparams.ver_minor ==
5299 			    ldcp->peer_hparams.ver_minor));
5300 
5301 			vgen_set_vnet_proto_ops(ldcp);
5302 
5303 			/* move to the next phase */
5304 			vgen_handshake(vh_nextphase(ldcp));
5305 		}
5306 		break;
5307 
5308 	case VIO_SUBTYPE_NACK:
5309 
5310 		if (ldcp->hphase != VH_PHASE1) {
5311 			/*  This should not happen.  */
5312 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
5313 			"Phase(%u)\n", ldcp->hphase);
5314 			return (VGEN_FAILURE);
5315 		}
5316 
5317 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
5318 		    vermsg->ver_major, vermsg->ver_minor);
5319 
5320 		/* check if version in NACK is zero */
5321 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
5322 			/*
5323 			 * Version Negotiation has failed.
5324 			 */
5325 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5326 			return (VGEN_FAILURE);
5327 		}
5328 
5329 		idx = 0;
5330 
5331 		for (;;) {
5332 
5333 			if (vermsg->ver_major > versions[idx].ver_major) {
5334 				/* select next lower version */
5335 
5336 				ldcp->local_hparams.ver_major =
5337 				    versions[idx].ver_major;
5338 				ldcp->local_hparams.ver_minor =
5339 				    versions[idx].ver_minor;
5340 				break;
5341 			}
5342 
5343 			if (vermsg->ver_major == versions[idx].ver_major) {
5344 				/* major version match */
5345 
5346 				ldcp->local_hparams.ver_major =
5347 				    versions[idx].ver_major;
5348 
5349 				ldcp->local_hparams.ver_minor =
5350 				    versions[idx].ver_minor;
5351 				break;
5352 			}
5353 
5354 			idx++;
5355 
5356 			if (idx == VGEN_NUM_VER) {
5357 				/*
5358 				 * no version match.
5359 				 * Version Negotiation has failed.
5360 				 */
5361 				DWARN(vgenp, ldcp,
5362 				    "Version Negotiation Failed\n");
5363 				return (VGEN_FAILURE);
5364 			}
5365 
5366 		}
5367 
5368 		rv = vgen_send_version_negotiate(ldcp);
5369 		if (rv != VGEN_SUCCESS) {
5370 			return (rv);
5371 		}
5372 
5373 		break;
5374 	}
5375 
5376 	DBG1(vgenp, ldcp, "exit\n");
5377 	return (VGEN_SUCCESS);
5378 }
5379 
5380 /* Check if the attributes are supported */
5381 static int
5382 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5383 {
5384 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5385 
5386 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5387 	    (msg->ack_freq > 64) ||
5388 	    (msg->xfer_mode != lp->xfer_mode)) {
5389 		return (VGEN_FAILURE);
5390 	}
5391 
5392 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5393 		/* versions < 1.4, mtu must match */
5394 		if (msg->mtu != lp->mtu) {
5395 			return (VGEN_FAILURE);
5396 		}
5397 	} else {
5398 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5399 		if (msg->mtu < ETHERMAX) {
5400 			return (VGEN_FAILURE);
5401 		}
5402 	}
5403 
5404 	return (VGEN_SUCCESS);
5405 }
5406 
5407 /*
5408  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5409  * to an attr info msg that we sent.
5410  */
5411 static int
5412 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5413 {
5414 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5415 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5416 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5417 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5418 	int		ack = 1;
5419 	int		rv = 0;
5420 	uint32_t	mtu;
5421 
5422 	DBG1(vgenp, ldcp, "enter\n");
5423 	if (ldcp->hphase != VH_PHASE2) {
5424 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5425 		" Invalid Phase(%u)\n",
5426 		    tagp->vio_subtype, ldcp->hphase);
5427 		return (VGEN_FAILURE);
5428 	}
5429 	switch (tagp->vio_subtype) {
5430 	case VIO_SUBTYPE_INFO:
5431 
5432 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5433 		ldcp->hstate |= ATTR_INFO_RCVD;
5434 
5435 		/* save peer's values */
5436 		rp->mtu = msg->mtu;
5437 		rp->addr = msg->addr;
5438 		rp->addr_type = msg->addr_type;
5439 		rp->xfer_mode = msg->xfer_mode;
5440 		rp->ack_freq = msg->ack_freq;
5441 
5442 		rv = vgen_check_attr_info(ldcp, msg);
5443 		if (rv == VGEN_FAILURE) {
5444 			/* unsupported attr, send NACK */
5445 			ack = 0;
5446 		} else {
5447 
5448 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5449 
5450 				/*
5451 				 * Versions >= 1.4:
5452 				 * The mtu is negotiated down to the
5453 				 * minimum of our mtu and peer's mtu.
5454 				 */
5455 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5456 
5457 				/*
5458 				 * If we have received an ack for the attr info
5459 				 * that we sent, then check if the mtu computed
5460 				 * above matches the mtu that the peer had ack'd
5461 				 * (saved in local hparams). If they don't
5462 				 * match, we fail the handshake.
5463 				 */
5464 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5465 					if (mtu != lp->mtu) {
5466 						/* send NACK */
5467 						ack = 0;
5468 					}
5469 				} else {
5470 					/*
5471 					 * Save the mtu computed above in our
5472 					 * attr parameters, so it gets sent in
5473 					 * the attr info from us to the peer.
5474 					 */
5475 					lp->mtu = mtu;
5476 				}
5477 
5478 				/* save the MIN mtu in the msg to be replied */
5479 				msg->mtu = mtu;
5480 
5481 			}
5482 		}
5483 
5484 
5485 		if (ack) {
5486 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5487 		} else {
5488 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5489 		}
5490 		tagp->vio_sid = ldcp->local_sid;
5491 
5492 		/* send reply msg back to peer */
5493 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5494 		    B_FALSE);
5495 		if (rv != VGEN_SUCCESS) {
5496 			return (rv);
5497 		}
5498 
5499 		if (ack) {
5500 			ldcp->hstate |= ATTR_ACK_SENT;
5501 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5502 		} else {
5503 			/* failed */
5504 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5505 			return (VGEN_FAILURE);
5506 		}
5507 
5508 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5509 			vgen_handshake(vh_nextphase(ldcp));
5510 		}
5511 
5512 		break;
5513 
5514 	case VIO_SUBTYPE_ACK:
5515 
5516 		if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
5517 		    ldcp->portp == vgenp->vsw_portp) {
5518 			/*
5519 			 * Versions >= 1.5:
5520 			 * If the vnet device has been configured to get
5521 			 * physical link state updates, check the corresponding
5522 			 * bits in the ack msg, if the peer is vswitch.
5523 			 */
5524 			if (((lp->physlink_update &
5525 			    PHYSLINK_UPDATE_STATE_MASK) ==
5526 			    PHYSLINK_UPDATE_STATE) &&
5527 
5528 			    ((msg->physlink_update &
5529 			    PHYSLINK_UPDATE_STATE_MASK) ==
5530 			    PHYSLINK_UPDATE_STATE_ACK)) {
5531 				vgenp->pls_negotiated = B_TRUE;
5532 			} else {
5533 				vgenp->pls_negotiated = B_FALSE;
5534 			}
5535 		}
5536 
5537 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5538 			/*
5539 			 * Versions >= 1.4:
5540 			 * The ack msg sent by the peer contains the minimum of
5541 			 * our mtu (that we had sent in our attr info) and the
5542 			 * peer's mtu.
5543 			 *
5544 			 * If we have sent an ack for the attr info msg from
5545 			 * the peer, check if the mtu that was computed then
5546 			 * (saved in local hparams) matches the mtu that the
5547 			 * peer has ack'd. If they don't match, we fail the
5548 			 * handshake.
5549 			 */
5550 			if (ldcp->hstate & ATTR_ACK_SENT) {
5551 				if (lp->mtu != msg->mtu) {
5552 					return (VGEN_FAILURE);
5553 				}
5554 			} else {
5555 				/*
5556 				 * If the mtu ack'd by the peer is > our mtu
5557 				 * fail handshake. Otherwise, save the mtu, so
5558 				 * we can validate it when we receive attr info
5559 				 * from our peer.
5560 				 */
5561 				if (msg->mtu > lp->mtu) {
5562 					return (VGEN_FAILURE);
5563 				}
5564 				if (msg->mtu <= lp->mtu) {
5565 					lp->mtu = msg->mtu;
5566 				}
5567 			}
5568 		}
5569 
5570 		ldcp->hstate |= ATTR_ACK_RCVD;
5571 
5572 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5573 
5574 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5575 			vgen_handshake(vh_nextphase(ldcp));
5576 		}
5577 		break;
5578 
5579 	case VIO_SUBTYPE_NACK:
5580 
5581 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5582 		return (VGEN_FAILURE);
5583 	}
5584 	DBG1(vgenp, ldcp, "exit\n");
5585 	return (VGEN_SUCCESS);
5586 }
5587 
5588 /* Check if the dring info msg is ok */
5589 static int
5590 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5591 {
5592 	/* check if msg contents are ok */
5593 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5594 	    sizeof (vnet_public_desc_t))) {
5595 		return (VGEN_FAILURE);
5596 	}
5597 	return (VGEN_SUCCESS);
5598 }
5599 
5600 /*
5601  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5602  * the peer to a dring register msg that we sent.
5603  */
5604 static int
5605 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5606 {
5607 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5608 	ldc_mem_cookie_t dcookie;
5609 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5610 	int ack = 0;
5611 	int rv = 0;
5612 
5613 	DBG1(vgenp, ldcp, "enter\n");
5614 	if (ldcp->hphase < VH_PHASE2) {
5615 		/* dring_info can be rcvd in any of the phases after Phase1 */
5616 		DWARN(vgenp, ldcp,
5617 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5618 		    tagp->vio_subtype, ldcp->hphase);
5619 		return (VGEN_FAILURE);
5620 	}
5621 	switch (tagp->vio_subtype) {
5622 	case VIO_SUBTYPE_INFO:
5623 
5624 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5625 		ldcp->hstate |= DRING_INFO_RCVD;
5626 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5627 
5628 		ASSERT(msg->ncookies == 1);
5629 
5630 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5631 			/*
5632 			 * verified dring info msg to be ok,
5633 			 * now try to map the remote dring.
5634 			 */
5635 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5636 			    msg->descriptor_size, &dcookie,
5637 			    msg->ncookies);
5638 			if (rv == DDI_SUCCESS) {
5639 				/* now we can ack the peer */
5640 				ack = 1;
5641 			}
5642 		}
5643 		if (ack == 0) {
5644 			/* failed, send NACK */
5645 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5646 		} else {
5647 			if (!(ldcp->peer_hparams.dring_ready)) {
5648 
5649 				/* save peer's dring_info values */
5650 				bcopy(&dcookie,
5651 				    &(ldcp->peer_hparams.dring_cookie),
5652 				    sizeof (dcookie));
5653 				ldcp->peer_hparams.num_desc =
5654 				    msg->num_descriptors;
5655 				ldcp->peer_hparams.desc_size =
5656 				    msg->descriptor_size;
5657 				ldcp->peer_hparams.num_dcookies =
5658 				    msg->ncookies;
5659 
5660 				/* set dring_ident for the peer */
5661 				ldcp->peer_hparams.dring_ident =
5662 				    (uint64_t)ldcp->rxdp;
5663 				/* return the dring_ident in ack msg */
5664 				msg->dring_ident =
5665 				    (uint64_t)ldcp->rxdp;
5666 
5667 				ldcp->peer_hparams.dring_ready = B_TRUE;
5668 			}
5669 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5670 		}
5671 		tagp->vio_sid = ldcp->local_sid;
5672 		/* send reply msg back to peer */
5673 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5674 		    B_FALSE);
5675 		if (rv != VGEN_SUCCESS) {
5676 			return (rv);
5677 		}
5678 
5679 		if (ack) {
5680 			ldcp->hstate |= DRING_ACK_SENT;
5681 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5682 		} else {
5683 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5684 			return (VGEN_FAILURE);
5685 		}
5686 
5687 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5688 			vgen_handshake(vh_nextphase(ldcp));
5689 		}
5690 
5691 		break;
5692 
5693 	case VIO_SUBTYPE_ACK:
5694 
5695 		ldcp->hstate |= DRING_ACK_RCVD;
5696 
5697 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5698 
5699 		if (!(ldcp->local_hparams.dring_ready)) {
5700 			/* local dring is now ready */
5701 			ldcp->local_hparams.dring_ready = B_TRUE;
5702 
5703 			/* save dring_ident acked by peer */
5704 			ldcp->local_hparams.dring_ident =
5705 			    msg->dring_ident;
5706 		}
5707 
5708 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5709 			vgen_handshake(vh_nextphase(ldcp));
5710 		}
5711 
5712 		break;
5713 
5714 	case VIO_SUBTYPE_NACK:
5715 
5716 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5717 		return (VGEN_FAILURE);
5718 	}
5719 	DBG1(vgenp, ldcp, "exit\n");
5720 	return (VGEN_SUCCESS);
5721 }
5722 
5723 /*
5724  * Handle a rdx info msg from the peer or an ACK/NACK
5725  * from the peer to a rdx info msg that we sent.
5726  */
5727 static int
5728 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5729 {
5730 	int rv = 0;
5731 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5732 
5733 	DBG1(vgenp, ldcp, "enter\n");
5734 	if (ldcp->hphase != VH_PHASE3) {
5735 		DWARN(vgenp, ldcp,
5736 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5737 		    tagp->vio_subtype, ldcp->hphase);
5738 		return (VGEN_FAILURE);
5739 	}
5740 	switch (tagp->vio_subtype) {
5741 	case VIO_SUBTYPE_INFO:
5742 
5743 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5744 		ldcp->hstate |= RDX_INFO_RCVD;
5745 
5746 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5747 		tagp->vio_sid = ldcp->local_sid;
5748 		/* send reply msg back to peer */
5749 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5750 		    B_FALSE);
5751 		if (rv != VGEN_SUCCESS) {
5752 			return (rv);
5753 		}
5754 
5755 		ldcp->hstate |= RDX_ACK_SENT;
5756 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5757 
5758 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5759 			vgen_handshake(vh_nextphase(ldcp));
5760 		}
5761 
5762 		break;
5763 
5764 	case VIO_SUBTYPE_ACK:
5765 
5766 		ldcp->hstate |= RDX_ACK_RCVD;
5767 
5768 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5769 
5770 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5771 			vgen_handshake(vh_nextphase(ldcp));
5772 		}
5773 		break;
5774 
5775 	case VIO_SUBTYPE_NACK:
5776 
5777 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5778 		return (VGEN_FAILURE);
5779 	}
5780 	DBG1(vgenp, ldcp, "exit\n");
5781 	return (VGEN_SUCCESS);
5782 }
5783 
5784 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5785 static int
5786 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5787 {
5788 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5789 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5790 	struct ether_addr *addrp;
5791 	int count;
5792 	int i;
5793 
5794 	DBG1(vgenp, ldcp, "enter\n");
5795 	switch (tagp->vio_subtype) {
5796 
5797 	case VIO_SUBTYPE_INFO:
5798 
5799 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5800 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5801 		break;
5802 
5803 	case VIO_SUBTYPE_ACK:
5804 
5805 		/* success adding/removing multicast addr */
5806 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5807 		break;
5808 
5809 	case VIO_SUBTYPE_NACK:
5810 
5811 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5812 		if (!(msgp->set)) {
5813 			/* multicast remove request failed */
5814 			break;
5815 		}
5816 
5817 		/* multicast add request failed */
5818 		for (count = 0; count < msgp->count; count++) {
5819 			addrp = &(msgp->mca[count]);
5820 
5821 			/* delete address from the table */
5822 			for (i = 0; i < vgenp->mccount; i++) {
5823 				if (ether_cmp(addrp,
5824 				    &(vgenp->mctab[i])) == 0) {
5825 					if (vgenp->mccount > 1) {
5826 						int t = vgenp->mccount - 1;
5827 						vgenp->mctab[i] =
5828 						    vgenp->mctab[t];
5829 					}
5830 					vgenp->mccount--;
5831 					break;
5832 				}
5833 			}
5834 		}
5835 		break;
5836 
5837 	}
5838 	DBG1(vgenp, ldcp, "exit\n");
5839 
5840 	return (VGEN_SUCCESS);
5841 }
5842 
5843 /*
5844  * Physical link information message from the peer. Only vswitch should send
5845  * us this message; if the vnet device has been configured to get physical link
5846  * state updates. Note that we must have already negotiated this with the
5847  * vswitch during attribute exchange phase of handshake.
5848  */
5849 static int
5850 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5851 {
5852 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5853 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5854 	link_state_t		link_state;
5855 	int			rv;
5856 
5857 	if (ldcp->portp != vgenp->vsw_portp) {
5858 		/*
5859 		 * drop the message and don't process; as we should
5860 		 * receive physlink_info message from only vswitch.
5861 		 */
5862 		return (VGEN_SUCCESS);
5863 	}
5864 
5865 	if (vgenp->pls_negotiated == B_FALSE) {
5866 		/*
5867 		 * drop the message and don't process; as we should receive
5868 		 * physlink_info message only if physlink update is enabled for
5869 		 * the device and negotiated with vswitch.
5870 		 */
5871 		return (VGEN_SUCCESS);
5872 	}
5873 
5874 	switch (tagp->vio_subtype) {
5875 
5876 	case VIO_SUBTYPE_INFO:
5877 
5878 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5879 		    VNET_PHYSLINK_STATE_UP) {
5880 			link_state = LINK_STATE_UP;
5881 		} else {
5882 			link_state = LINK_STATE_DOWN;
5883 		}
5884 
5885 		if (vgenp->phys_link_state != link_state) {
5886 			vgenp->phys_link_state = link_state;
5887 			mutex_exit(&ldcp->cblock);
5888 
5889 			/* Now update the stack */
5890 			vgen_link_update(vgenp, link_state);
5891 
5892 			mutex_enter(&ldcp->cblock);
5893 		}
5894 
5895 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5896 		tagp->vio_sid = ldcp->local_sid;
5897 
5898 		/* send reply msg back to peer */
5899 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5900 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5901 		if (rv != VGEN_SUCCESS) {
5902 			return (rv);
5903 		}
5904 		break;
5905 
5906 	case VIO_SUBTYPE_ACK:
5907 
5908 		/* vnet shouldn't recv physlink acks */
5909 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5910 		break;
5911 
5912 	case VIO_SUBTYPE_NACK:
5913 
5914 		/* vnet shouldn't recv physlink nacks */
5915 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5916 		break;
5917 
5918 	}
5919 	DBG1(vgenp, ldcp, "exit\n");
5920 
5921 	return (VGEN_SUCCESS);
5922 }
5923 
5924 /* handler for control messages received from the peer ldc end-point */
5925 static int
5926 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5927 {
5928 	int rv = 0;
5929 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5930 
5931 	DBG1(vgenp, ldcp, "enter\n");
5932 	switch (tagp->vio_subtype_env) {
5933 
5934 	case VIO_VER_INFO:
5935 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5936 		break;
5937 
5938 	case VIO_ATTR_INFO:
5939 		rv = vgen_handle_attr_info(ldcp, tagp);
5940 		break;
5941 
5942 	case VIO_DRING_REG:
5943 		rv = vgen_handle_dring_reg(ldcp, tagp);
5944 		break;
5945 
5946 	case VIO_RDX:
5947 		rv = vgen_handle_rdx_info(ldcp, tagp);
5948 		break;
5949 
5950 	case VNET_MCAST_INFO:
5951 		rv = vgen_handle_mcast_info(ldcp, tagp);
5952 		break;
5953 
5954 	case VIO_DDS_INFO:
5955 		/*
5956 		 * If we are in the process of resetting the vswitch channel,
5957 		 * drop the dds message. A new handshake will be initiated
5958 		 * when the channel comes back up after the reset and dds
5959 		 * negotiation can then continue.
5960 		 */
5961 		if (ldcp->need_ldc_reset == B_TRUE) {
5962 			break;
5963 		}
5964 		rv = vgen_dds_rx(ldcp, tagp);
5965 		break;
5966 
5967 	case VNET_PHYSLINK_INFO:
5968 		rv = vgen_handle_physlink_info(ldcp, tagp);
5969 		break;
5970 	}
5971 
5972 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5973 	return (rv);
5974 }
5975 
5976 /* handler for data messages received from the peer ldc end-point */
5977 static int
5978 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5979 {
5980 	int rv = 0;
5981 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5982 
5983 	DBG1(vgenp, ldcp, "enter\n");
5984 
5985 	if (ldcp->hphase != VH_DONE)
5986 		return (rv);
5987 
5988 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5989 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5990 		if (rv != 0) {
5991 			return (rv);
5992 		}
5993 	}
5994 
5995 	switch (tagp->vio_subtype_env) {
5996 	case VIO_DRING_DATA:
5997 		rv = vgen_handle_dring_data(ldcp, tagp);
5998 		break;
5999 
6000 	case VIO_PKT_DATA:
6001 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
6002 		break;
6003 	default:
6004 		break;
6005 	}
6006 
6007 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6008 	return (rv);
6009 }
6010 
6011 /*
6012  * dummy pkt data handler function for vnet protocol version 1.0
6013  */
6014 static void
6015 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
6016 {
6017 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
6018 }
6019 
6020 /*
6021  * This function handles raw pkt data messages received over the channel.
6022  * Currently, only priority-eth-type frames are received through this mechanism.
6023  * In this case, the frame(data) is present within the message itself which
6024  * is copied into an mblk before sending it up the stack.
6025  */
6026 static void
6027 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
6028 {
6029 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
6030 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
6031 	uint32_t		size;
6032 	mblk_t			*mp;
6033 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6034 	vgen_stats_t		*statsp = &ldcp->stats;
6035 	vgen_hparams_t		*lp = &ldcp->local_hparams;
6036 	vio_net_rx_cb_t		vrx_cb;
6037 
6038 	ASSERT(MUTEX_HELD(&ldcp->cblock));
6039 
6040 	mutex_exit(&ldcp->cblock);
6041 
6042 	size = msglen - VIO_PKT_DATA_HDRSIZE;
6043 	if (size < ETHERMIN || size > lp->mtu) {
6044 		(void) atomic_inc_32(&statsp->rx_pri_fail);
6045 		goto exit;
6046 	}
6047 
6048 	mp = vio_multipool_allocb(&ldcp->vmp, size);
6049 	if (mp == NULL) {
6050 		mp = allocb(size, BPRI_MED);
6051 		if (mp == NULL) {
6052 			(void) atomic_inc_32(&statsp->rx_pri_fail);
6053 			DWARN(vgenp, ldcp, "allocb failure, "
6054 			    "unable to process priority frame\n");
6055 			goto exit;
6056 		}
6057 	}
6058 
6059 	/* copy the frame from the payload of raw data msg into the mblk */
6060 	bcopy(pkt->data, mp->b_rptr, size);
6061 	mp->b_wptr = mp->b_rptr + size;
6062 
6063 	/* update stats */
6064 	(void) atomic_inc_64(&statsp->rx_pri_packets);
6065 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
6066 
6067 	/* send up; call vrx_cb() as cblock is already released */
6068 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6069 	vrx_cb(ldcp->portp->vhp, mp);
6070 
6071 exit:
6072 	mutex_enter(&ldcp->cblock);
6073 }
6074 
6075 static int
6076 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
6077     int32_t end, uint8_t pstate)
6078 {
6079 	int rv = 0;
6080 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6081 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
6082 
6083 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
6084 	tagp->vio_sid = ldcp->local_sid;
6085 	msgp->start_idx = start;
6086 	msgp->end_idx = end;
6087 	msgp->dring_process_state = pstate;
6088 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
6089 	if (rv != VGEN_SUCCESS) {
6090 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
6091 	}
6092 	return (rv);
6093 }
6094 
6095 static int
6096 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6097 {
6098 	int rv = 0;
6099 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6100 
6101 
6102 	DBG1(vgenp, ldcp, "enter\n");
6103 	switch (tagp->vio_subtype) {
6104 
6105 	case VIO_SUBTYPE_INFO:
6106 		/*
6107 		 * To reduce the locking contention, release the
6108 		 * cblock here and re-acquire it once we are done
6109 		 * receiving packets.
6110 		 */
6111 		mutex_exit(&ldcp->cblock);
6112 		mutex_enter(&ldcp->rxlock);
6113 		rv = vgen_handle_dring_data_info(ldcp, tagp);
6114 		mutex_exit(&ldcp->rxlock);
6115 		mutex_enter(&ldcp->cblock);
6116 		break;
6117 
6118 	case VIO_SUBTYPE_ACK:
6119 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
6120 		break;
6121 
6122 	case VIO_SUBTYPE_NACK:
6123 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
6124 		break;
6125 	}
6126 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6127 	return (rv);
6128 }
6129 
6130 static int
6131 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6132 {
6133 	uint32_t start;
6134 	int32_t end;
6135 	int rv = 0;
6136 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6137 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6138 #ifdef VGEN_HANDLE_LOST_PKTS
6139 	vgen_stats_t *statsp = &ldcp->stats;
6140 	uint32_t rxi;
6141 	int n;
6142 #endif
6143 
6144 	DBG1(vgenp, ldcp, "enter\n");
6145 
6146 	start = dringmsg->start_idx;
6147 	end = dringmsg->end_idx;
6148 	/*
6149 	 * received a data msg, which contains the start and end
6150 	 * indices of the descriptors within the rx ring holding data,
6151 	 * the seq_num of data packet corresponding to the start index,
6152 	 * and the dring_ident.
6153 	 * We can now read the contents of each of these descriptors
6154 	 * and gather data from it.
6155 	 */
6156 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
6157 	    start, end);
6158 
6159 	/* validate rx start and end indeces */
6160 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
6161 	    !(CHECK_RXI(end, ldcp)))) {
6162 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
6163 		    start, end);
6164 		/* drop the message if invalid index */
6165 		return (rv);
6166 	}
6167 
6168 	/* validate dring_ident */
6169 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
6170 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6171 		    dringmsg->dring_ident);
6172 		/* invalid dring_ident, drop the msg */
6173 		return (rv);
6174 	}
6175 #ifdef DEBUG
6176 	if (vgen_trigger_rxlost) {
6177 		/* drop this msg to simulate lost pkts for debugging */
6178 		vgen_trigger_rxlost = 0;
6179 		return (rv);
6180 	}
6181 #endif
6182 
6183 #ifdef	VGEN_HANDLE_LOST_PKTS
6184 
6185 	/* receive start index doesn't match expected index */
6186 	if (ldcp->next_rxi != start) {
6187 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
6188 		    ldcp->next_rxi, start);
6189 
6190 		/* calculate the number of pkts lost */
6191 		if (start >= ldcp->next_rxi) {
6192 			n = start - ldcp->next_rxi;
6193 		} else  {
6194 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
6195 		}
6196 
6197 		statsp->rx_lost_pkts += n;
6198 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
6199 		tagp->vio_sid = ldcp->local_sid;
6200 		/* indicate the range of lost descriptors */
6201 		dringmsg->start_idx = ldcp->next_rxi;
6202 		rxi = start;
6203 		DECR_RXI(rxi, ldcp);
6204 		dringmsg->end_idx = rxi;
6205 		/* dring ident is left unchanged */
6206 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
6207 		    sizeof (*dringmsg), B_FALSE);
6208 		if (rv != VGEN_SUCCESS) {
6209 			DWARN(vgenp, ldcp,
6210 			    "vgen_sendmsg failed, stype:NACK\n");
6211 			return (rv);
6212 		}
6213 		/*
6214 		 * treat this range of descrs/pkts as dropped
6215 		 * and set the new expected value of next_rxi
6216 		 * and continue(below) to process from the new
6217 		 * start index.
6218 		 */
6219 		ldcp->next_rxi = start;
6220 	}
6221 
6222 #endif	/* VGEN_HANDLE_LOST_PKTS */
6223 
6224 	/* Now receive messages */
6225 	rv = vgen_process_dring_data(ldcp, tagp);
6226 
6227 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6228 	return (rv);
6229 }
6230 
6231 static int
6232 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6233 {
6234 	boolean_t set_ack_start = B_FALSE;
6235 	uint32_t start;
6236 	uint32_t ack_end;
6237 	uint32_t next_rxi;
6238 	uint32_t rxi;
6239 	int count = 0;
6240 	int rv = 0;
6241 	uint32_t retries = 0;
6242 	vgen_stats_t *statsp;
6243 	vnet_public_desc_t rxd;
6244 	vio_dring_entry_hdr_t *hdrp;
6245 	mblk_t *bp = NULL;
6246 	mblk_t *bpt = NULL;
6247 	uint32_t ack_start;
6248 	boolean_t rxd_err = B_FALSE;
6249 	mblk_t *mp = NULL;
6250 	size_t nbytes;
6251 	boolean_t ack_needed = B_FALSE;
6252 	size_t nread;
6253 	uint64_t off = 0;
6254 	struct ether_header *ehp;
6255 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6256 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6257 	vgen_hparams_t	*lp = &ldcp->local_hparams;
6258 
6259 	DBG1(vgenp, ldcp, "enter\n");
6260 
6261 	statsp = &ldcp->stats;
6262 	start = dringmsg->start_idx;
6263 
6264 	/*
6265 	 * start processing the descriptors from the specified
6266 	 * start index, up to the index a descriptor is not ready
6267 	 * to be processed or we process the entire descriptor ring
6268 	 * and wrap around upto the start index.
6269 	 */
6270 
6271 	/* need to set the start index of descriptors to be ack'd */
6272 	set_ack_start = B_TRUE;
6273 
6274 	/* index upto which we have ack'd */
6275 	ack_end = start;
6276 	DECR_RXI(ack_end, ldcp);
6277 
6278 	next_rxi = rxi =  start;
6279 	do {
6280 vgen_recv_retry:
6281 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
6282 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
6283 		if (rv != 0) {
6284 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
6285 			    " rv(%d)\n", rv);
6286 			statsp->ierrors++;
6287 			return (rv);
6288 		}
6289 
6290 		hdrp = &rxd.hdr;
6291 
6292 		if (hdrp->dstate != VIO_DESC_READY) {
6293 			/*
6294 			 * Before waiting and retry here, send up
6295 			 * the packets that are received already
6296 			 */
6297 			if (bp != NULL) {
6298 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6299 				vgen_rx(ldcp, bp, bpt);
6300 				count = 0;
6301 				bp = bpt = NULL;
6302 			}
6303 			/*
6304 			 * descriptor is not ready.
6305 			 * retry descriptor acquire, stop processing
6306 			 * after max # retries.
6307 			 */
6308 			if (retries == vgen_recv_retries)
6309 				break;
6310 			retries++;
6311 			drv_usecwait(vgen_recv_delay);
6312 			goto vgen_recv_retry;
6313 		}
6314 		retries = 0;
6315 
6316 		if (set_ack_start) {
6317 			/*
6318 			 * initialize the start index of the range
6319 			 * of descriptors to be ack'd.
6320 			 */
6321 			ack_start = rxi;
6322 			set_ack_start = B_FALSE;
6323 		}
6324 
6325 		if ((rxd.nbytes < ETHERMIN) ||
6326 		    (rxd.nbytes > lp->mtu) ||
6327 		    (rxd.ncookies == 0) ||
6328 		    (rxd.ncookies > MAX_COOKIES)) {
6329 			rxd_err = B_TRUE;
6330 		} else {
6331 			/*
6332 			 * Try to allocate an mblk from the free pool
6333 			 * of recv mblks for the channel.
6334 			 * If this fails, use allocb().
6335 			 */
6336 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
6337 			if (nbytes > ldcp->max_rxpool_size) {
6338 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
6339 				    BPRI_MED);
6340 			} else {
6341 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
6342 				if (mp == NULL) {
6343 					statsp->rx_vio_allocb_fail++;
6344 					/*
6345 					 * Data buffer returned by allocb(9F)
6346 					 * is 8byte aligned. We allocate extra
6347 					 * 8 bytes to ensure size is multiple
6348 					 * of 8 bytes for ldc_mem_copy().
6349 					 */
6350 					mp = allocb(VNET_IPALIGN +
6351 					    rxd.nbytes + 8, BPRI_MED);
6352 				}
6353 			}
6354 		}
6355 		if ((rxd_err) || (mp == NULL)) {
6356 			/*
6357 			 * rxd_err or allocb() failure,
6358 			 * drop this packet, get next.
6359 			 */
6360 			if (rxd_err) {
6361 				statsp->ierrors++;
6362 				rxd_err = B_FALSE;
6363 			} else {
6364 				statsp->rx_allocb_fail++;
6365 			}
6366 
6367 			ack_needed = hdrp->ack;
6368 
6369 			/* set descriptor done bit */
6370 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6371 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6372 			    VIO_DESC_DONE);
6373 			if (rv != 0) {
6374 				DWARN(vgenp, ldcp,
6375 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
6376 				    rv);
6377 				return (rv);
6378 			}
6379 
6380 			if (ack_needed) {
6381 				ack_needed = B_FALSE;
6382 				/*
6383 				 * sender needs ack for this packet,
6384 				 * ack pkts upto this index.
6385 				 */
6386 				ack_end = rxi;
6387 
6388 				rv = vgen_send_dring_ack(ldcp, tagp,
6389 				    ack_start, ack_end,
6390 				    VIO_DP_ACTIVE);
6391 				if (rv != VGEN_SUCCESS) {
6392 					goto error_ret;
6393 				}
6394 
6395 				/* need to set new ack start index */
6396 				set_ack_start = B_TRUE;
6397 			}
6398 			goto vgen_next_rxi;
6399 		}
6400 
6401 		nread = nbytes;
6402 		rv = ldc_mem_copy(ldcp->ldc_handle,
6403 		    (caddr_t)mp->b_rptr, off, &nread,
6404 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
6405 
6406 		/* if ldc_mem_copy() failed */
6407 		if (rv) {
6408 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
6409 			statsp->ierrors++;
6410 			freemsg(mp);
6411 			goto error_ret;
6412 		}
6413 
6414 		ack_needed = hdrp->ack;
6415 
6416 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6417 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6418 		    VIO_DESC_DONE);
6419 		if (rv != 0) {
6420 			DWARN(vgenp, ldcp,
6421 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
6422 			goto error_ret;
6423 		}
6424 
6425 		mp->b_rptr += VNET_IPALIGN;
6426 
6427 		if (ack_needed) {
6428 			ack_needed = B_FALSE;
6429 			/*
6430 			 * sender needs ack for this packet,
6431 			 * ack pkts upto this index.
6432 			 */
6433 			ack_end = rxi;
6434 
6435 			rv = vgen_send_dring_ack(ldcp, tagp,
6436 			    ack_start, ack_end, VIO_DP_ACTIVE);
6437 			if (rv != VGEN_SUCCESS) {
6438 				goto error_ret;
6439 			}
6440 
6441 			/* need to set new ack start index */
6442 			set_ack_start = B_TRUE;
6443 		}
6444 
6445 		if (nread != nbytes) {
6446 			DWARN(vgenp, ldcp,
6447 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6448 			    nread, nbytes);
6449 			statsp->ierrors++;
6450 			freemsg(mp);
6451 			goto vgen_next_rxi;
6452 		}
6453 
6454 		/* point to the actual end of data */
6455 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6456 
6457 		/* update stats */
6458 		statsp->ipackets++;
6459 		statsp->rbytes += rxd.nbytes;
6460 		ehp = (struct ether_header *)mp->b_rptr;
6461 		if (IS_BROADCAST(ehp))
6462 			statsp->brdcstrcv++;
6463 		else if (IS_MULTICAST(ehp))
6464 			statsp->multircv++;
6465 
6466 		/* build a chain of received packets */
6467 		if (bp == NULL) {
6468 			/* first pkt */
6469 			bp = mp;
6470 			bpt = bp;
6471 			bpt->b_next = NULL;
6472 		} else {
6473 			mp->b_next = NULL;
6474 			bpt->b_next = mp;
6475 			bpt = mp;
6476 		}
6477 
6478 		if (count++ > vgen_chain_len) {
6479 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6480 			vgen_rx(ldcp, bp, bpt);
6481 			count = 0;
6482 			bp = bpt = NULL;
6483 		}
6484 
6485 vgen_next_rxi:
6486 		/* update end index of range of descrs to be ack'd */
6487 		ack_end = rxi;
6488 
6489 		/* update the next index to be processed */
6490 		INCR_RXI(next_rxi, ldcp);
6491 		if (next_rxi == start) {
6492 			/*
6493 			 * processed the entire descriptor ring upto
6494 			 * the index at which we started.
6495 			 */
6496 			break;
6497 		}
6498 
6499 		rxi = next_rxi;
6500 
6501 	_NOTE(CONSTCOND)
6502 	} while (1);
6503 
6504 	/*
6505 	 * send an ack message to peer indicating that we have stopped
6506 	 * processing descriptors.
6507 	 */
6508 	if (set_ack_start) {
6509 		/*
6510 		 * We have ack'd upto some index and we have not
6511 		 * processed any descriptors beyond that index.
6512 		 * Use the last ack'd index as both the start and
6513 		 * end of range of descrs being ack'd.
6514 		 * Note: This results in acking the last index twice
6515 		 * and should be harmless.
6516 		 */
6517 		ack_start = ack_end;
6518 	}
6519 
6520 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6521 	    VIO_DP_STOPPED);
6522 	if (rv != VGEN_SUCCESS) {
6523 		goto error_ret;
6524 	}
6525 
6526 	/* save new recv index of next dring msg */
6527 	ldcp->next_rxi = next_rxi;
6528 
6529 error_ret:
6530 	/* send up packets received so far */
6531 	if (bp != NULL) {
6532 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6533 		vgen_rx(ldcp, bp, bpt);
6534 		bp = bpt = NULL;
6535 	}
6536 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6537 	return (rv);
6538 
6539 }
6540 
6541 static int
6542 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6543 {
6544 	int rv = 0;
6545 	uint32_t start;
6546 	int32_t end;
6547 	uint32_t txi;
6548 	boolean_t ready_txd = B_FALSE;
6549 	vgen_stats_t *statsp;
6550 	vgen_private_desc_t *tbufp;
6551 	vnet_public_desc_t *txdp;
6552 	vio_dring_entry_hdr_t *hdrp;
6553 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6554 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6555 
6556 	DBG1(vgenp, ldcp, "enter\n");
6557 	start = dringmsg->start_idx;
6558 	end = dringmsg->end_idx;
6559 	statsp = &ldcp->stats;
6560 
6561 	/*
6562 	 * received an ack corresponding to a specific descriptor for
6563 	 * which we had set the ACK bit in the descriptor (during
6564 	 * transmit). This enables us to reclaim descriptors.
6565 	 */
6566 
6567 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6568 
6569 	/* validate start and end indeces in the tx ack msg */
6570 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6571 		/* drop the message if invalid index */
6572 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6573 		    start, end);
6574 		return (rv);
6575 	}
6576 	/* validate dring_ident */
6577 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6578 		/* invalid dring_ident, drop the msg */
6579 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6580 		    dringmsg->dring_ident);
6581 		return (rv);
6582 	}
6583 	statsp->dring_data_acks++;
6584 
6585 	/* reclaim descriptors that are done */
6586 	vgen_reclaim(ldcp);
6587 
6588 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6589 		/*
6590 		 * receiver continued processing descriptors after
6591 		 * sending us the ack.
6592 		 */
6593 		return (rv);
6594 	}
6595 
6596 	statsp->dring_stopped_acks++;
6597 
6598 	/* receiver stopped processing descriptors */
6599 	mutex_enter(&ldcp->wrlock);
6600 	mutex_enter(&ldcp->tclock);
6601 
6602 	/*
6603 	 * determine if there are any pending tx descriptors
6604 	 * ready to be processed by the receiver(peer) and if so,
6605 	 * send a message to the peer to restart receiving.
6606 	 */
6607 	ready_txd = B_FALSE;
6608 
6609 	/*
6610 	 * using the end index of the descriptor range for which
6611 	 * we received the ack, check if the next descriptor is
6612 	 * ready.
6613 	 */
6614 	txi = end;
6615 	INCR_TXI(txi, ldcp);
6616 	tbufp = &ldcp->tbufp[txi];
6617 	txdp = tbufp->descp;
6618 	hdrp = &txdp->hdr;
6619 	if (hdrp->dstate == VIO_DESC_READY) {
6620 		ready_txd = B_TRUE;
6621 	} else {
6622 		/*
6623 		 * descr next to the end of ack'd descr range is not
6624 		 * ready.
6625 		 * starting from the current reclaim index, check
6626 		 * if any descriptor is ready.
6627 		 */
6628 
6629 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6630 		tbufp = &ldcp->tbufp[txi];
6631 
6632 		txdp = tbufp->descp;
6633 		hdrp = &txdp->hdr;
6634 		if (hdrp->dstate == VIO_DESC_READY) {
6635 			ready_txd = B_TRUE;
6636 		}
6637 
6638 	}
6639 
6640 	if (ready_txd) {
6641 		/*
6642 		 * we have tx descriptor(s) ready to be
6643 		 * processed by the receiver.
6644 		 * send a message to the peer with the start index
6645 		 * of ready descriptors.
6646 		 */
6647 		rv = vgen_send_dring_data(ldcp, txi, -1);
6648 		if (rv != VGEN_SUCCESS) {
6649 			ldcp->resched_peer = B_TRUE;
6650 			ldcp->resched_peer_txi = txi;
6651 			mutex_exit(&ldcp->tclock);
6652 			mutex_exit(&ldcp->wrlock);
6653 			return (rv);
6654 		}
6655 	} else {
6656 		/*
6657 		 * no ready tx descriptors. set the flag to send a
6658 		 * message to peer when tx descriptors are ready in
6659 		 * transmit routine.
6660 		 */
6661 		ldcp->resched_peer = B_TRUE;
6662 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6663 	}
6664 
6665 	mutex_exit(&ldcp->tclock);
6666 	mutex_exit(&ldcp->wrlock);
6667 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6668 	return (rv);
6669 }
6670 
6671 static int
6672 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6673 {
6674 	int rv = 0;
6675 	uint32_t start;
6676 	int32_t end;
6677 	uint32_t txi;
6678 	vnet_public_desc_t *txdp;
6679 	vio_dring_entry_hdr_t *hdrp;
6680 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6681 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6682 
6683 	DBG1(vgenp, ldcp, "enter\n");
6684 	start = dringmsg->start_idx;
6685 	end = dringmsg->end_idx;
6686 
6687 	/*
6688 	 * peer sent a NACK msg to indicate lost packets.
6689 	 * The start and end correspond to the range of descriptors
6690 	 * for which the peer didn't receive a dring data msg and so
6691 	 * didn't receive the corresponding data.
6692 	 */
6693 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6694 
6695 	/* validate start and end indeces in the tx nack msg */
6696 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6697 		/* drop the message if invalid index */
6698 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6699 		    start, end);
6700 		return (rv);
6701 	}
6702 	/* validate dring_ident */
6703 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6704 		/* invalid dring_ident, drop the msg */
6705 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6706 		    dringmsg->dring_ident);
6707 		return (rv);
6708 	}
6709 	mutex_enter(&ldcp->txlock);
6710 	mutex_enter(&ldcp->tclock);
6711 
6712 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6713 		/* no busy descriptors, bogus nack ? */
6714 		mutex_exit(&ldcp->tclock);
6715 		mutex_exit(&ldcp->txlock);
6716 		return (rv);
6717 	}
6718 
6719 	/* we just mark the descrs as done so they can be reclaimed */
6720 	for (txi = start; txi <= end; ) {
6721 		txdp = &(ldcp->txdp[txi]);
6722 		hdrp = &txdp->hdr;
6723 		if (hdrp->dstate == VIO_DESC_READY)
6724 			hdrp->dstate = VIO_DESC_DONE;
6725 		INCR_TXI(txi, ldcp);
6726 	}
6727 	mutex_exit(&ldcp->tclock);
6728 	mutex_exit(&ldcp->txlock);
6729 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6730 	return (rv);
6731 }
6732 
6733 static void
6734 vgen_reclaim(vgen_ldc_t *ldcp)
6735 {
6736 	mutex_enter(&ldcp->tclock);
6737 
6738 	vgen_reclaim_dring(ldcp);
6739 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6740 
6741 	mutex_exit(&ldcp->tclock);
6742 }
6743 
6744 /*
6745  * transmit reclaim function. starting from the current reclaim index
6746  * look for descriptors marked DONE and reclaim the descriptor and the
6747  * corresponding buffers (tbuf).
6748  */
6749 static void
6750 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6751 {
6752 	int count = 0;
6753 	vnet_public_desc_t *txdp;
6754 	vgen_private_desc_t *tbufp;
6755 	vio_dring_entry_hdr_t	*hdrp;
6756 
6757 #ifdef DEBUG
6758 	if (vgen_trigger_txtimeout)
6759 		return;
6760 #endif
6761 
6762 	tbufp = ldcp->cur_tbufp;
6763 	txdp = tbufp->descp;
6764 	hdrp = &txdp->hdr;
6765 
6766 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6767 	    (tbufp != ldcp->next_tbufp)) {
6768 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6769 		hdrp->dstate = VIO_DESC_FREE;
6770 		hdrp->ack = B_FALSE;
6771 
6772 		tbufp = NEXTTBUF(ldcp, tbufp);
6773 		txdp = tbufp->descp;
6774 		hdrp = &txdp->hdr;
6775 		count++;
6776 	}
6777 
6778 	ldcp->cur_tbufp = tbufp;
6779 
6780 	/*
6781 	 * Check if mac layer should be notified to restart transmissions
6782 	 */
6783 	if ((ldcp->need_resched) && (count > 0)) {
6784 		vio_net_tx_update_t vtx_update =
6785 		    ldcp->portp->vcb.vio_net_tx_update;
6786 
6787 		ldcp->need_resched = B_FALSE;
6788 		vtx_update(ldcp->portp->vhp);
6789 	}
6790 }
6791 
6792 /* return the number of pending transmits for the channel */
6793 static int
6794 vgen_num_txpending(vgen_ldc_t *ldcp)
6795 {
6796 	int n;
6797 
6798 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6799 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6800 	} else  {
6801 		/* cur_tbufp > next_tbufp */
6802 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6803 	}
6804 
6805 	return (n);
6806 }
6807 
6808 /* determine if the transmit descriptor ring is full */
6809 static int
6810 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6811 {
6812 	vgen_private_desc_t	*tbufp;
6813 	vgen_private_desc_t	*ntbufp;
6814 
6815 	tbufp = ldcp->next_tbufp;
6816 	ntbufp = NEXTTBUF(ldcp, tbufp);
6817 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6818 		return (VGEN_SUCCESS);
6819 	}
6820 	return (VGEN_FAILURE);
6821 }
6822 
6823 /* determine if timeout condition has occured */
6824 static int
6825 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6826 {
6827 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6828 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6829 	    (vnet_ldcwd_txtimeout) &&
6830 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6831 		return (VGEN_SUCCESS);
6832 	} else {
6833 		return (VGEN_FAILURE);
6834 	}
6835 }
6836 
6837 /* transmit watchdog timeout handler */
6838 static void
6839 vgen_ldc_watchdog(void *arg)
6840 {
6841 	vgen_ldc_t *ldcp;
6842 	vgen_t *vgenp;
6843 	int rv;
6844 
6845 	ldcp = (vgen_ldc_t *)arg;
6846 	vgenp = LDC_TO_VGEN(ldcp);
6847 
6848 	rv = vgen_ldc_txtimeout(ldcp);
6849 	if (rv == VGEN_SUCCESS) {
6850 		DWARN(vgenp, ldcp, "transmit timeout\n");
6851 #ifdef DEBUG
6852 		if (vgen_trigger_txtimeout) {
6853 			/* tx timeout triggered for debugging */
6854 			vgen_trigger_txtimeout = 0;
6855 		}
6856 #endif
6857 		mutex_enter(&ldcp->cblock);
6858 		vgen_ldc_reset(ldcp);
6859 		mutex_exit(&ldcp->cblock);
6860 		if (ldcp->need_resched) {
6861 			vio_net_tx_update_t vtx_update =
6862 			    ldcp->portp->vcb.vio_net_tx_update;
6863 
6864 			ldcp->need_resched = B_FALSE;
6865 			vtx_update(ldcp->portp->vhp);
6866 		}
6867 	}
6868 
6869 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6870 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6871 }
6872 
6873 /* handler for error messages received from the peer ldc end-point */
6874 static void
6875 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6876 {
6877 	_NOTE(ARGUNUSED(ldcp, tagp))
6878 }
6879 
6880 static int
6881 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6882 {
6883 	vio_raw_data_msg_t	*rmsg;
6884 	vio_dring_msg_t		*dmsg;
6885 	uint64_t		seq_num;
6886 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6887 
6888 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6889 		dmsg = (vio_dring_msg_t *)tagp;
6890 		seq_num = dmsg->seq_num;
6891 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6892 		rmsg = (vio_raw_data_msg_t *)tagp;
6893 		seq_num = rmsg->seq_num;
6894 	} else {
6895 		return (EINVAL);
6896 	}
6897 
6898 	if (seq_num != ldcp->next_rxseq) {
6899 
6900 		/* seqnums don't match */
6901 		DWARN(vgenp, ldcp,
6902 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6903 		    ldcp->next_rxseq, seq_num);
6904 
6905 		return (EINVAL);
6906 
6907 	}
6908 
6909 	ldcp->next_rxseq++;
6910 
6911 	return (0);
6912 }
6913 
6914 /* Check if the session id in the received message is valid */
6915 static int
6916 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6917 {
6918 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6919 
6920 	if (tagp->vio_sid != ldcp->peer_sid) {
6921 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6922 		    ldcp->peer_sid, tagp->vio_sid);
6923 		return (VGEN_FAILURE);
6924 	}
6925 	else
6926 		return (VGEN_SUCCESS);
6927 }
6928 
6929 static caddr_t
6930 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6931 {
6932 	(void) sprintf(ebuf,
6933 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6934 	return (ebuf);
6935 }
6936 
6937 /* Handshake watchdog timeout handler */
6938 static void
6939 vgen_hwatchdog(void *arg)
6940 {
6941 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6942 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6943 
6944 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
6945 	    ldcp->hphase, ldcp->hstate);
6946 
6947 	mutex_enter(&ldcp->cblock);
6948 	if (ldcp->cancel_htid) {
6949 		ldcp->cancel_htid = 0;
6950 		mutex_exit(&ldcp->cblock);
6951 		return;
6952 	}
6953 	ldcp->htid = 0;
6954 	vgen_ldc_reset(ldcp);
6955 	mutex_exit(&ldcp->cblock);
6956 }
6957 
6958 static void
6959 vgen_print_hparams(vgen_hparams_t *hp)
6960 {
6961 	uint8_t	addr[6];
6962 	char	ea[6];
6963 	ldc_mem_cookie_t *dc;
6964 
6965 	cmn_err(CE_CONT, "version_info:\n");
6966 	cmn_err(CE_CONT,
6967 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6968 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6969 
6970 	vnet_macaddr_ultostr(hp->addr, addr);
6971 	cmn_err(CE_CONT, "attr_info:\n");
6972 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6973 	    vgen_print_ethaddr(addr, ea));
6974 	cmn_err(CE_CONT,
6975 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6976 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6977 
6978 	dc = &hp->dring_cookie;
6979 	cmn_err(CE_CONT, "dring_info:\n");
6980 	cmn_err(CE_CONT,
6981 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6982 	cmn_err(CE_CONT,
6983 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6984 	    dc->addr, dc->size);
6985 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6986 }
6987 
6988 static void
6989 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6990 {
6991 	vgen_hparams_t *hp;
6992 
6993 	cmn_err(CE_CONT, "Channel Information:\n");
6994 	cmn_err(CE_CONT,
6995 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6996 	    ldcp->ldc_id, ldcp->ldc_status);
6997 	cmn_err(CE_CONT,
6998 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6999 	    ldcp->local_sid, ldcp->peer_sid);
7000 	cmn_err(CE_CONT,
7001 	    "\thphase: 0x%x, hstate: 0x%x\n",
7002 	    ldcp->hphase, ldcp->hstate);
7003 
7004 	cmn_err(CE_CONT, "Local handshake params:\n");
7005 	hp = &ldcp->local_hparams;
7006 	vgen_print_hparams(hp);
7007 
7008 	cmn_err(CE_CONT, "Peer handshake params:\n");
7009 	hp = &ldcp->peer_hparams;
7010 	vgen_print_hparams(hp);
7011 }
7012 
7013 /*
7014  * Send received packets up the stack.
7015  */
7016 static void
7017 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt)
7018 {
7019 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
7020 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
7021 
7022 	if (ldcp->rcv_thread != NULL) {
7023 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
7024 	} else {
7025 		ASSERT(MUTEX_HELD(&ldcp->cblock));
7026 	}
7027 
7028 	mutex_enter(&ldcp->pollq_lock);
7029 
7030 	if (ldcp->polling_on == B_TRUE) {
7031 		/*
7032 		 * If we are in polling mode, simply queue
7033 		 * the packets onto the poll queue and return.
7034 		 */
7035 		if (ldcp->pollq_headp == NULL) {
7036 			ldcp->pollq_headp = bp;
7037 			ldcp->pollq_tailp = bpt;
7038 		} else {
7039 			ldcp->pollq_tailp->b_next = bp;
7040 			ldcp->pollq_tailp = bpt;
7041 		}
7042 
7043 		mutex_exit(&ldcp->pollq_lock);
7044 		return;
7045 	}
7046 
7047 	/*
7048 	 * Prepend any pending mblks in the poll queue, now that we
7049 	 * are in interrupt mode, before sending up the chain of pkts.
7050 	 */
7051 	if (ldcp->pollq_headp != NULL) {
7052 		DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n",
7053 		    (uintptr_t)ldcp);
7054 		ldcp->pollq_tailp->b_next = bp;
7055 		bp = ldcp->pollq_headp;
7056 		ldcp->pollq_headp = ldcp->pollq_tailp = NULL;
7057 	}
7058 
7059 	mutex_exit(&ldcp->pollq_lock);
7060 
7061 	if (ldcp->rcv_thread != NULL) {
7062 		mutex_exit(&ldcp->rxlock);
7063 	} else {
7064 		mutex_exit(&ldcp->cblock);
7065 	}
7066 
7067 	/* Send up the packets */
7068 	vrx_cb(ldcp->portp->vhp, bp);
7069 
7070 	if (ldcp->rcv_thread != NULL) {
7071 		mutex_enter(&ldcp->rxlock);
7072 	} else {
7073 		mutex_enter(&ldcp->cblock);
7074 	}
7075 }
7076 
7077 /*
7078  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
7079  * This thread is woken up by the LDC interrupt handler to process
7080  * LDC packets and receive data.
7081  */
7082 static void
7083 vgen_ldc_rcv_worker(void *arg)
7084 {
7085 	callb_cpr_t	cprinfo;
7086 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
7087 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7088 
7089 	DBG1(vgenp, ldcp, "enter\n");
7090 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
7091 	    "vnet_rcv_thread");
7092 	mutex_enter(&ldcp->rcv_thr_lock);
7093 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
7094 
7095 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
7096 		/*
7097 		 * Wait until the data is received or a stop
7098 		 * request is received.
7099 		 */
7100 		while (!(ldcp->rcv_thr_flags &
7101 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
7102 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
7103 		}
7104 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
7105 
7106 		/*
7107 		 * First process the stop request.
7108 		 */
7109 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
7110 			DBG2(vgenp, ldcp, "stopped\n");
7111 			break;
7112 		}
7113 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
7114 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
7115 		mutex_exit(&ldcp->rcv_thr_lock);
7116 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
7117 		vgen_handle_evt_read(ldcp);
7118 		mutex_enter(&ldcp->rcv_thr_lock);
7119 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
7120 	}
7121 
7122 	/*
7123 	 * Update the run status and wakeup the thread that
7124 	 * has sent the stop request.
7125 	 */
7126 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP;
7127 	ldcp->rcv_thread = NULL;
7128 	CALLB_CPR_EXIT(&cprinfo);
7129 
7130 	thread_exit();
7131 	DBG1(vgenp, ldcp, "exit\n");
7132 }
7133 
7134 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
7135 static void
7136 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
7137 {
7138 	kt_did_t	tid = 0;
7139 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7140 
7141 	DBG1(vgenp, ldcp, "enter\n");
7142 	/*
7143 	 * Send a stop request by setting the stop flag and
7144 	 * wait until the receive thread stops.
7145 	 */
7146 	mutex_enter(&ldcp->rcv_thr_lock);
7147 	if (ldcp->rcv_thread != NULL) {
7148 		tid = ldcp->rcv_thread->t_did;
7149 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
7150 		cv_signal(&ldcp->rcv_thr_cv);
7151 	}
7152 	mutex_exit(&ldcp->rcv_thr_lock);
7153 
7154 	if (tid != 0) {
7155 		thread_join(tid);
7156 	}
7157 	DBG1(vgenp, ldcp, "exit\n");
7158 }
7159 
7160 /*
7161  * Wait for the channel rx-queue to be drained by allowing the receive
7162  * worker thread to read all messages from the rx-queue of the channel.
7163  * Assumption: further callbacks are disabled at this time.
7164  */
7165 static void
7166 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
7167 {
7168 	clock_t	tm;
7169 	clock_t	wt;
7170 	clock_t	rv;
7171 
7172 	/*
7173 	 * If there is data in ldc rx queue, wait until the rx
7174 	 * worker thread runs and drains all msgs in the queue.
7175 	 */
7176 	wt = drv_usectohz(MILLISEC);
7177 
7178 	mutex_enter(&ldcp->rcv_thr_lock);
7179 
7180 	tm = ddi_get_lbolt() + wt;
7181 
7182 	/*
7183 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
7184 	 * If DATARCVD is set, that means the callback has signalled the worker
7185 	 * thread, but the worker hasn't started processing yet. If PROCESSING
7186 	 * is set, that means the thread is awake and processing. Note that the
7187 	 * DATARCVD state can only be seen once, as the assumption is that
7188 	 * further callbacks have been disabled at this point.
7189 	 */
7190 	while (ldcp->rcv_thr_flags &
7191 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
7192 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
7193 		if (rv == -1) {	/* timeout */
7194 			/*
7195 			 * Note that the only way we return is due to a timeout;
7196 			 * we set the new time to wait, before we go back and
7197 			 * check the condition. The other(unlikely) possibility
7198 			 * is a premature wakeup(see cv_timedwait(9F)) in which
7199 			 * case we just continue to use the same time to wait.
7200 			 */
7201 			tm = ddi_get_lbolt() + wt;
7202 		}
7203 	}
7204 
7205 	mutex_exit(&ldcp->rcv_thr_lock);
7206 }
7207 
7208 /*
7209  * vgen_dds_rx -- post DDS messages to vnet.
7210  */
7211 static int
7212 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
7213 {
7214 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
7215 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7216 
7217 	if (dmsg->dds_class != DDS_VNET_NIU) {
7218 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
7219 		return (EBADMSG);
7220 	}
7221 	vnet_dds_rx(vgenp->vnetp, dmsg);
7222 	return (0);
7223 }
7224 
7225 /*
7226  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
7227  */
7228 int
7229 vgen_dds_tx(void *arg, void *msg)
7230 {
7231 	vgen_t *vgenp = arg;
7232 	vio_dds_msg_t *dmsg = msg;
7233 	vgen_portlist_t *plistp = &vgenp->vgenports;
7234 	vgen_ldc_t *ldcp;
7235 	vgen_ldclist_t *ldclp;
7236 	int rv = EIO;
7237 
7238 
7239 	READ_ENTER(&plistp->rwlock);
7240 	ldclp = &(vgenp->vsw_portp->ldclist);
7241 	READ_ENTER(&ldclp->rwlock);
7242 	ldcp = ldclp->headp;
7243 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
7244 		goto vgen_dsend_exit;
7245 	}
7246 
7247 	dmsg->tag.vio_sid = ldcp->local_sid;
7248 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
7249 	if (rv != VGEN_SUCCESS) {
7250 		rv = EIO;
7251 	} else {
7252 		rv = 0;
7253 	}
7254 
7255 vgen_dsend_exit:
7256 	RW_EXIT(&ldclp->rwlock);
7257 	RW_EXIT(&plistp->rwlock);
7258 	return (rv);
7259 
7260 }
7261 
7262 static void
7263 vgen_ldc_reset(vgen_ldc_t *ldcp)
7264 {
7265 	vnet_t	*vnetp = LDC_TO_VNET(ldcp);
7266 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
7267 
7268 	ASSERT(MUTEX_HELD(&ldcp->cblock));
7269 
7270 	if (ldcp->need_ldc_reset == B_TRUE) {
7271 		/* another thread is already in the process of resetting */
7272 		return;
7273 	}
7274 
7275 	/* Set the flag to indicate reset is in progress */
7276 	ldcp->need_ldc_reset = B_TRUE;
7277 
7278 	if (ldcp->portp == vgenp->vsw_portp) {
7279 		mutex_exit(&ldcp->cblock);
7280 		/*
7281 		 * Now cleanup any HIO resources; the above flag also tells
7282 		 * the code that handles dds messages to drop any new msgs
7283 		 * that arrive while we are cleaning up and resetting the
7284 		 * channel.
7285 		 */
7286 		vnet_dds_cleanup_hio(vnetp);
7287 		mutex_enter(&ldcp->cblock);
7288 	}
7289 
7290 	vgen_handshake_retry(ldcp);
7291 }
7292 
7293 int
7294 vgen_enable_intr(void *arg)
7295 {
7296 	vgen_port_t		*portp = (vgen_port_t *)arg;
7297 	vgen_ldclist_t		*ldclp;
7298 	vgen_ldc_t		*ldcp;
7299 
7300 	ldclp = &portp->ldclist;
7301 	READ_ENTER(&ldclp->rwlock);
7302 	/*
7303 	 * NOTE: for now, we will assume we have a single channel.
7304 	 */
7305 	if (ldclp->headp == NULL) {
7306 		RW_EXIT(&ldclp->rwlock);
7307 		return (1);
7308 	}
7309 	ldcp = ldclp->headp;
7310 
7311 	mutex_enter(&ldcp->pollq_lock);
7312 	ldcp->polling_on = B_FALSE;
7313 	mutex_exit(&ldcp->pollq_lock);
7314 
7315 	RW_EXIT(&ldclp->rwlock);
7316 
7317 	return (0);
7318 }
7319 
7320 int
7321 vgen_disable_intr(void *arg)
7322 {
7323 	vgen_port_t		*portp = (vgen_port_t *)arg;
7324 	vgen_ldclist_t		*ldclp;
7325 	vgen_ldc_t		*ldcp;
7326 
7327 	ldclp = &portp->ldclist;
7328 	READ_ENTER(&ldclp->rwlock);
7329 	/*
7330 	 * NOTE: for now, we will assume we have a single channel.
7331 	 */
7332 	if (ldclp->headp == NULL) {
7333 		RW_EXIT(&ldclp->rwlock);
7334 		return (1);
7335 	}
7336 	ldcp = ldclp->headp;
7337 
7338 
7339 	mutex_enter(&ldcp->pollq_lock);
7340 	ldcp->polling_on = B_TRUE;
7341 	mutex_exit(&ldcp->pollq_lock);
7342 
7343 	RW_EXIT(&ldclp->rwlock);
7344 
7345 	return (0);
7346 }
7347 
7348 mblk_t *
7349 vgen_poll(void *arg, int bytes_to_pickup)
7350 {
7351 	vgen_port_t		*portp = (vgen_port_t *)arg;
7352 	vgen_ldclist_t		*ldclp;
7353 	vgen_ldc_t		*ldcp;
7354 	mblk_t			*mp = NULL;
7355 
7356 	ldclp = &portp->ldclist;
7357 	READ_ENTER(&ldclp->rwlock);
7358 	/*
7359 	 * NOTE: for now, we will assume we have a single channel.
7360 	 */
7361 	if (ldclp->headp == NULL) {
7362 		RW_EXIT(&ldclp->rwlock);
7363 		return (NULL);
7364 	}
7365 	ldcp = ldclp->headp;
7366 
7367 	mp = vgen_ldc_poll(ldcp, bytes_to_pickup);
7368 
7369 	RW_EXIT(&ldclp->rwlock);
7370 	return (mp);
7371 }
7372 
7373 static mblk_t *
7374 vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup)
7375 {
7376 	mblk_t	*bp = NULL;
7377 	mblk_t	*bpt = NULL;
7378 	mblk_t	*mp = NULL;
7379 	size_t	mblk_sz = 0;
7380 	size_t	sz = 0;
7381 	uint_t	count = 0;
7382 
7383 	mutex_enter(&ldcp->pollq_lock);
7384 
7385 	bp = ldcp->pollq_headp;
7386 	while (bp != NULL) {
7387 		/* get the size of this packet */
7388 		mblk_sz = msgdsize(bp);
7389 
7390 		/* if adding this pkt, exceeds the size limit, we are done. */
7391 		if (sz + mblk_sz >  bytes_to_pickup) {
7392 			break;
7393 		}
7394 
7395 		/* we have room for this packet */
7396 		sz += mblk_sz;
7397 
7398 		/* increment the # of packets being sent up */
7399 		count++;
7400 
7401 		/* track the last processed pkt */
7402 		bpt = bp;
7403 
7404 		/* get the next pkt */
7405 		bp = bp->b_next;
7406 	}
7407 
7408 	if (count != 0) {
7409 		/*
7410 		 * picked up some packets; save the head of pkts to be sent up.
7411 		 */
7412 		mp = ldcp->pollq_headp;
7413 
7414 		/* move the pollq_headp to skip over the pkts being sent up */
7415 		ldcp->pollq_headp = bp;
7416 
7417 		/* picked up all pending pkts in the queue; reset tail also */
7418 		if (ldcp->pollq_headp == NULL) {
7419 			ldcp->pollq_tailp = NULL;
7420 		}
7421 
7422 		/* terminate the tail of pkts to be sent up */
7423 		bpt->b_next = NULL;
7424 	}
7425 
7426 	mutex_exit(&ldcp->pollq_lock);
7427 
7428 	DTRACE_PROBE1(vgen_poll_pkts, uint_t, count);
7429 	return (mp);
7430 }
7431 
7432 #if DEBUG
7433 
7434 /*
7435  * Print debug messages - set to 0xf to enable all msgs
7436  */
7437 static void
7438 debug_printf(const char *fname, vgen_t *vgenp,
7439     vgen_ldc_t *ldcp, const char *fmt, ...)
7440 {
7441 	char    buf[256];
7442 	char    *bufp = buf;
7443 	va_list ap;
7444 
7445 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
7446 		(void) sprintf(bufp, "vnet%d:",
7447 		    ((vnet_t *)(vgenp->vnetp))->instance);
7448 		bufp += strlen(bufp);
7449 	}
7450 	if (ldcp != NULL) {
7451 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
7452 		bufp += strlen(bufp);
7453 	}
7454 	(void) sprintf(bufp, "%s: ", fname);
7455 	bufp += strlen(bufp);
7456 
7457 	va_start(ap, fmt);
7458 	(void) vsprintf(bufp, fmt, ap);
7459 	va_end(ap);
7460 
7461 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
7462 	    (vgendbg_ldcid == ldcp->ldc_id)) {
7463 		cmn_err(CE_CONT, "%s\n", buf);
7464 	}
7465 }
7466 #endif
7467 
7468 #ifdef	VNET_IOC_DEBUG
7469 
7470 static void
7471 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7472 {
7473 	struct iocblk	*iocp;
7474 	vgen_port_t	*portp;
7475 	enum		ioc_reply {
7476 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
7477 			IOC_ACK			/* OK, just send ACK    */
7478 	}		status;
7479 	int		rv;
7480 
7481 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
7482 	iocp->ioc_error = 0;
7483 	portp = (vgen_port_t *)arg;
7484 
7485 	if (portp == NULL) {
7486 		status = IOC_INVAL;
7487 		goto vgen_ioc_exit;
7488 	}
7489 
7490 	mutex_enter(&portp->lock);
7491 
7492 	switch (iocp->ioc_cmd) {
7493 
7494 	case VNET_FORCE_LINK_DOWN:
7495 	case VNET_FORCE_LINK_UP:
7496 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
7497 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
7498 		break;
7499 
7500 	default:
7501 		status = IOC_INVAL;
7502 		break;
7503 
7504 	}
7505 
7506 	mutex_exit(&portp->lock);
7507 
7508 vgen_ioc_exit:
7509 
7510 	switch (status) {
7511 	default:
7512 	case IOC_INVAL:
7513 		/* Error, reply with a NAK and EINVAL error */
7514 		miocnak(q, mp, 0, EINVAL);
7515 		break;
7516 	case IOC_ACK:
7517 		/* OK, reply with an ACK */
7518 		miocack(q, mp, 0, 0);
7519 		break;
7520 	}
7521 }
7522 
7523 static int
7524 vgen_force_link_state(vgen_port_t *portp, int cmd)
7525 {
7526 	ldc_status_t	istatus;
7527 	vgen_ldclist_t	*ldclp;
7528 	vgen_ldc_t	*ldcp;
7529 	vgen_t		*vgenp = portp->vgenp;
7530 	int		rv;
7531 
7532 	ldclp = &portp->ldclist;
7533 	READ_ENTER(&ldclp->rwlock);
7534 
7535 	/*
7536 	 * NOTE: for now, we will assume we have a single channel.
7537 	 */
7538 	if (ldclp->headp == NULL) {
7539 		RW_EXIT(&ldclp->rwlock);
7540 		return (1);
7541 	}
7542 	ldcp = ldclp->headp;
7543 	mutex_enter(&ldcp->cblock);
7544 
7545 	switch (cmd) {
7546 
7547 	case VNET_FORCE_LINK_DOWN:
7548 		(void) ldc_down(ldcp->ldc_handle);
7549 		ldcp->link_down_forced = B_TRUE;
7550 		break;
7551 
7552 	case VNET_FORCE_LINK_UP:
7553 		rv = ldc_up(ldcp->ldc_handle);
7554 		if (rv != 0) {
7555 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
7556 		}
7557 		ldcp->link_down_forced = B_FALSE;
7558 
7559 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
7560 			DWARN(vgenp, ldcp, "ldc_status err\n");
7561 		} else {
7562 			ldcp->ldc_status = istatus;
7563 		}
7564 
7565 		/* if channel is already UP - restart handshake */
7566 		if (ldcp->ldc_status == LDC_UP) {
7567 			vgen_handle_evt_up(ldcp);
7568 		}
7569 		break;
7570 
7571 	}
7572 
7573 	mutex_exit(&ldcp->cblock);
7574 	RW_EXIT(&ldclp->rwlock);
7575 
7576 	return (0);
7577 }
7578 
7579 #else
7580 
7581 static void
7582 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7583 {
7584 	vgen_port_t	*portp;
7585 
7586 	portp = (vgen_port_t *)arg;
7587 
7588 	if (portp == NULL) {
7589 		miocnak(q, mp, 0, EINVAL);
7590 		return;
7591 	}
7592 
7593 	miocnak(q, mp, 0, ENOTSUP);
7594 }
7595 
7596 #endif
7597