xref: /titanic_41/usr/src/uts/sun4v/io/vnet_gen.c (revision fbe82215144da71ed02c3a920667472cc567fafd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/sysmacros.h>
29 #include <sys/param.h>
30 #include <sys/machsystm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac provider functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /* Entry Points */
71 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
72     const uint8_t *macaddr, void **vgenhdl);
73 int vgen_init_mdeg(void *arg);
74 void vgen_uninit(void *arg);
75 int vgen_dds_tx(void *arg, void *dmsg);
76 int vgen_enable_intr(void *arg);
77 int vgen_disable_intr(void *arg);
78 mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
79 static int vgen_start(void *arg);
80 static void vgen_stop(void *arg);
81 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
82 static int vgen_multicst(void *arg, boolean_t add,
83 	const uint8_t *mca);
84 static int vgen_promisc(void *arg, boolean_t on);
85 static int vgen_unicst(void *arg, const uint8_t *mca);
86 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
87 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
88 #ifdef	VNET_IOC_DEBUG
89 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
90 #endif
91 
92 /* Port/LDC Configuration */
93 static int vgen_read_mdprops(vgen_t *vgenp);
94 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
95 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
96 	mde_cookie_t node);
97 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
98 	uint32_t *mtu);
99 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
100 	boolean_t *pls);
101 static void vgen_detach_ports(vgen_t *vgenp);
102 static void vgen_port_detach(vgen_port_t *portp);
103 static void vgen_port_list_insert(vgen_port_t *portp);
104 static void vgen_port_list_remove(vgen_port_t *portp);
105 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
106 	int port_num);
107 static int vgen_mdeg_reg(vgen_t *vgenp);
108 static void vgen_mdeg_unreg(vgen_t *vgenp);
109 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
110 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
111 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
112 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
113 	mde_cookie_t mdex);
114 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
115 static int vgen_port_attach(vgen_port_t *portp);
116 static void vgen_port_detach_mdeg(vgen_port_t *portp);
117 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
118 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
119 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
120 static void vgen_port_reset(vgen_port_t *portp);
121 static void vgen_reset_vsw_port(vgen_t *vgenp);
122 static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
123 static void vgen_ldc_up(vgen_ldc_t *ldcp);
124 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
125 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
126 static void vgen_port_init(vgen_port_t *portp);
127 static void vgen_port_uninit(vgen_port_t *portp);
128 static int vgen_ldc_init(vgen_ldc_t *ldcp);
129 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
130 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
131 
132 /* I/O Processing */
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(void *arg, mblk_t *mp);
135 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
137 static void vgen_tx_watchdog(void *arg);
138 
139 /*  Dring Configuration */
140 static int vgen_create_dring(vgen_ldc_t *ldcp);
141 static void vgen_destroy_dring(vgen_ldc_t *ldcp);
142 static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt);
143 static void vgen_unmap_dring(vgen_ldc_t *ldcp);
144 static int vgen_mapin_avail(vgen_ldc_t *ldcp);
145 
146 /* VIO Message Processing */
147 static int vgen_handshake(vgen_ldc_t *ldcp);
148 static int vgen_handshake_done(vgen_ldc_t *ldcp);
149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
150 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
151 static int vgen_handshake_phase3(vgen_ldc_t *ldcp);
152 static void vgen_setup_handshake_params(vgen_ldc_t *ldcp);
153 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
154 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
155 static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp);
156 static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp);
157 static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
158 	uint8_t option);
159 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
160 static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
161 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
162 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
163 	vio_msg_tag_t *tagp);
164 static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
166 static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
174 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
175 	uint32_t msglen);
176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
179 static int vgen_process_reset(vgen_ldc_t *ldcp, int flags);
180 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static void vgen_hwatchdog(void *arg);
182 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
183 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
184 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
185 
186 /* VLANs */
187 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
188 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
189 	uint16_t *nvidsp, uint16_t *default_idp);
190 static void vgen_vlan_create_hash(vgen_port_t *portp);
191 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
192 static void vgen_vlan_add_ids(vgen_port_t *portp);
193 static void vgen_vlan_remove_ids(vgen_port_t *portp);
194 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
195 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
196 	uint16_t *vidp);
197 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
198 	boolean_t is_tagged, uint16_t vid);
199 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
200 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
201 
202 /* Exported functions */
203 int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
204 int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
205 void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
206 void vgen_destroy_rxpools(void *arg);
207 
208 /* Externs */
209 extern void vnet_dds_rx(void *arg, void *dmsg);
210 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
211 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
212 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
213 extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
214     boolean_t caller_holds_lock);
215 extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp);
216 extern int vgen_create_tx_dring(vgen_ldc_t *ldcp);
217 extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp);
218 extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt);
219 extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp);
220 extern int vgen_create_rx_dring(vgen_ldc_t *ldcp);
221 extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
222 extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
223 extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
224 extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
225 extern int vgen_handle_dringdata_shm(void *arg1, void *arg2);
226 extern int vgen_handle_dringdata(void *arg1, void *arg2);
227 extern int vgen_dringsend_shm(void *arg, mblk_t *mp);
228 extern int vgen_dringsend(void *arg, mblk_t *mp);
229 extern void vgen_ldc_msg_worker(void *arg);
230 extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
231     uint32_t start, int32_t end, uint8_t pstate);
232 extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
233 extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup);
234 extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
235 
236 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
237 
238 #define	LDC_LOCK(ldcp)	\
239 				mutex_enter(&((ldcp)->cblock));\
240 				mutex_enter(&((ldcp)->rxlock));\
241 				mutex_enter(&((ldcp)->wrlock));\
242 				mutex_enter(&((ldcp)->txlock));\
243 				mutex_enter(&((ldcp)->tclock));
244 #define	LDC_UNLOCK(ldcp)	\
245 				mutex_exit(&((ldcp)->tclock));\
246 				mutex_exit(&((ldcp)->txlock));\
247 				mutex_exit(&((ldcp)->wrlock));\
248 				mutex_exit(&((ldcp)->rxlock));\
249 				mutex_exit(&((ldcp)->cblock));
250 
251 #define	VGEN_VER_EQ(ldcp, major, minor)	\
252 	((ldcp)->local_hparams.ver_major == (major) &&	\
253 	    (ldcp)->local_hparams.ver_minor == (minor))
254 
255 #define	VGEN_VER_LT(ldcp, major, minor)	\
256 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
257 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
258 	    (ldcp)->local_hparams.ver_minor < (minor)))
259 
260 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
261 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
262 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
263 	    (ldcp)->local_hparams.ver_minor >= (minor)))
264 
265 /*
266  * Property names
267  */
268 static char macaddr_propname[] = "mac-address";
269 static char rmacaddr_propname[] = "remote-mac-address";
270 static char channel_propname[] = "channel-endpoint";
271 static char reg_propname[] = "reg";
272 static char port_propname[] = "port";
273 static char swport_propname[] = "switch-port";
274 static char id_propname[] = "id";
275 static char vdev_propname[] = "virtual-device";
276 static char vnet_propname[] = "network";
277 static char pri_types_propname[] = "priority-ether-types";
278 static char vgen_pvid_propname[] = "port-vlan-id";
279 static char vgen_vid_propname[] = "vlan-id";
280 static char vgen_dvid_propname[] = "default-vlan-id";
281 static char port_pvid_propname[] = "remote-port-vlan-id";
282 static char port_vid_propname[] = "remote-vlan-id";
283 static char vgen_mtu_propname[] = "mtu";
284 static char vgen_linkprop_propname[] = "linkprop";
285 
286 /*
287  * VIO Protocol Version Info:
288  *
289  * The version specified below represents the version of protocol currently
290  * supported in the driver. It means the driver can negotiate with peers with
291  * versions <= this version. Here is a summary of the feature(s) that are
292  * supported at each version of the protocol:
293  *
294  * 1.0			Basic VIO protocol.
295  * 1.1			vDisk protocol update (no virtual network update).
296  * 1.2			Support for priority frames (priority-ether-types).
297  * 1.3			VLAN and HybridIO support.
298  * 1.4			Jumbo Frame support.
299  * 1.5			Link State Notification support with optional support
300  * 			for Physical Link information.
301  * 1.6			Support for RxDringData mode.
302  */
303 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 6} };
304 
305 /* Tunables */
306 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
307 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
308 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
309 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
310 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
311 uint32_t vgen_ldc_mtu = VGEN_LDC_MTU;		/* ldc mtu */
312 uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */
313 uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT;   /* tx timeout in msec */
314 
315 /*
316  * Max # of channel resets allowed during handshake.
317  */
318 uint32_t vgen_ldc_max_resets = 5;
319 
320 /*
321  * See comments in vsw.c for details on the dring modes supported.
322  * In RxDringData mode, # of buffers is determined by multiplying the # of
323  * descriptors with the factor below. Note that the factor must be > 1; i.e,
324  * the # of buffers must always be > # of descriptors. This is needed because,
325  * while the shared memory buffers are sent up the stack on the receiver, the
326  * sender needs additional buffers that can be used for further transmits.
327  * See vgen_create_rx_dring() for details.
328  */
329 uint32_t vgen_nrbufs_factor = 2;
330 
331 /*
332  * Retry delay used while destroying rx mblk pools. Used in both Dring modes.
333  */
334 int vgen_rxpool_cleanup_delay = 100000;	/* 100ms */
335 
336 /*
337  * Delay when rx descr not ready; used in TxDring mode only.
338  */
339 uint32_t vgen_recv_delay = 1;
340 
341 /*
342  * Retry when rx descr not ready; used in TxDring mode only.
343  */
344 uint32_t vgen_recv_retries = 10;
345 
346 /*
347  * Max # of packets accumulated prior to sending them up. It is best
348  * to keep this at 60% of the number of receive buffers. Used in TxDring mode
349  * by the msg worker thread. Used in RxDringData mode while in interrupt mode
350  * (not used in polled mode).
351  */
352 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
353 
354 /*
355  * Internal tunables for receive buffer pools, that is,  the size and number of
356  * mblks for each pool. At least 3 sizes must be specified if these are used.
357  * The sizes must be specified in increasing order. Non-zero value of the first
358  * size will be used as a hint to use these values instead of the algorithm
359  * that determines the sizes based on MTU. Used in TxDring mode only.
360  */
361 uint32_t vgen_rbufsz1 = 0;
362 uint32_t vgen_rbufsz2 = 0;
363 uint32_t vgen_rbufsz3 = 0;
364 uint32_t vgen_rbufsz4 = 0;
365 
366 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
367 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
368 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
369 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
370 
371 /*
372  * In the absence of "priority-ether-types" property in MD, the following
373  * internal tunable can be set to specify a single priority ethertype.
374  */
375 uint64_t vgen_pri_eth_type = 0;
376 
377 /*
378  * Number of transmit priority buffers that are preallocated per device.
379  * This number is chosen to be a small value to throttle transmission
380  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
381  */
382 uint32_t vgen_pri_tx_nmblks = 64;
383 
384 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
385 
386 /*
387  * Matching criteria passed to the MDEG to register interest
388  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
389  * by their 'name' and 'cfg-handle' properties.
390  */
391 static md_prop_match_t vdev_prop_match[] = {
392 	{ MDET_PROP_STR,    "name"   },
393 	{ MDET_PROP_VAL,    "cfg-handle" },
394 	{ MDET_LIST_END,    NULL    }
395 };
396 
397 static mdeg_node_match_t vdev_match = { "virtual-device",
398 						vdev_prop_match };
399 
400 /* MD update matching structure */
401 static md_prop_match_t	vport_prop_match[] = {
402 	{ MDET_PROP_VAL,	"id" },
403 	{ MDET_LIST_END,	NULL }
404 };
405 
406 static mdeg_node_match_t vport_match = { "virtual-device-port",
407 					vport_prop_match };
408 
409 /* Template for matching a particular vnet instance */
410 static mdeg_prop_spec_t vgen_prop_template[] = {
411 	{ MDET_PROP_STR,	"name",		"network" },
412 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
413 	{ MDET_LIST_END,	NULL,		NULL }
414 };
415 
416 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
417 
418 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
419 
420 #ifdef	VNET_IOC_DEBUG
421 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
422 #else
423 #define	VGEN_M_CALLBACK_FLAGS	(0)
424 #endif
425 
426 static mac_callbacks_t vgen_m_callbacks = {
427 	VGEN_M_CALLBACK_FLAGS,
428 	vgen_stat,
429 	vgen_start,
430 	vgen_stop,
431 	vgen_promisc,
432 	vgen_multicst,
433 	vgen_unicst,
434 	vgen_tx,
435 	NULL,
436 	vgen_ioctl,
437 	NULL,
438 	NULL
439 };
440 
441 /* Externs */
442 extern pri_t	maxclsyspri;
443 extern proc_t	p0;
444 extern uint32_t	vnet_ethermtu;
445 extern uint16_t	vnet_default_vlan_id;
446 extern uint32_t vnet_num_descriptors;
447 
448 #ifdef DEBUG
449 
450 #define	DEBUG_PRINTF	vgen_debug_printf
451 
452 extern int vnet_dbglevel;
453 
454 void vgen_debug_printf(const char *fname, vgen_t *vgenp,
455 	vgen_ldc_t *ldcp, const char *fmt, ...);
456 
457 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
458 int vgendbg_ldcid = -1;
459 
460 /* Flags to simulate error conditions for debugging */
461 int vgen_inject_err_flag = 0;
462 
463 
464 boolean_t
465 vgen_inject_error(vgen_ldc_t *ldcp, int error)
466 {
467 	if ((vgendbg_ldcid == ldcp->ldc_id) &&
468 	    (vgen_inject_err_flag & error)) {
469 		return (B_TRUE);
470 	}
471 	return (B_FALSE);
472 }
473 
474 #endif
475 
476 /*
477  * vgen_init() is called by an instance of vnet driver to initialize the
478  * corresponding generic transport layer. This layer uses Logical Domain
479  * Channels (LDCs) to communicate with the virtual switch in the service domain
480  * and also with peer vnets in other guest domains in the system.
481  *
482  * Arguments:
483  *   vnetp:   an opaque pointer to the vnet instance
484  *   regprop: frame to be transmitted
485  *   vnetdip: dip of the vnet device
486  *   macaddr: mac address of the vnet device
487  *
488  * Returns:
489  *	Sucess:  a handle to the vgen instance (vgen_t)
490  *	Failure: NULL
491  */
492 int
493 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
494     const uint8_t *macaddr, void **vgenhdl)
495 {
496 	vgen_t	*vgenp;
497 	int	instance;
498 	int	rv;
499 	char	qname[TASKQ_NAMELEN];
500 
501 	if ((vnetp == NULL) || (vnetdip == NULL))
502 		return (DDI_FAILURE);
503 
504 	instance = ddi_get_instance(vnetdip);
505 
506 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
507 
508 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
509 
510 	vgenp->vnetp = vnetp;
511 	vgenp->instance = instance;
512 	vgenp->regprop = regprop;
513 	vgenp->vnetdip = vnetdip;
514 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
515 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
516 
517 	/* allocate multicast table */
518 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
519 	    sizeof (struct ether_addr), KM_SLEEP);
520 	vgenp->mccount = 0;
521 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
522 
523 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
524 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
525 
526 	(void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
527 	    instance);
528 	if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1,
529 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
530 		cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue",
531 		    instance);
532 		goto vgen_init_fail;
533 	}
534 
535 	rv = vgen_read_mdprops(vgenp);
536 	if (rv != 0) {
537 		goto vgen_init_fail;
538 	}
539 	*vgenhdl = (void *)vgenp;
540 
541 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
542 	return (DDI_SUCCESS);
543 
544 vgen_init_fail:
545 	rw_destroy(&vgenp->vgenports.rwlock);
546 	mutex_destroy(&vgenp->lock);
547 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
548 	    sizeof (struct ether_addr));
549 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
550 		kmem_free(vgenp->pri_types,
551 		    sizeof (uint16_t) * vgenp->pri_num_types);
552 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
553 	}
554 	if (vgenp->rxp_taskq != NULL) {
555 		ddi_taskq_destroy(vgenp->rxp_taskq);
556 		vgenp->rxp_taskq = NULL;
557 	}
558 	KMEM_FREE(vgenp);
559 	return (DDI_FAILURE);
560 }
561 
562 int
563 vgen_init_mdeg(void *arg)
564 {
565 	vgen_t	*vgenp = (vgen_t *)arg;
566 
567 	/* register with MD event generator */
568 	return (vgen_mdeg_reg(vgenp));
569 }
570 
571 /*
572  * Called by vnet to undo the initializations done by vgen_init().
573  * The handle provided by generic transport during vgen_init() is the argument.
574  */
575 void
576 vgen_uninit(void *arg)
577 {
578 	vgen_t	*vgenp = (vgen_t *)arg;
579 
580 	if (vgenp == NULL) {
581 		return;
582 	}
583 
584 	DBG1(vgenp, NULL, "enter\n");
585 
586 	/* Unregister with MD event generator */
587 	vgen_mdeg_unreg(vgenp);
588 
589 	mutex_enter(&vgenp->lock);
590 
591 	/*
592 	 * Detach all ports from the device; note that the device should have
593 	 * been unplumbed by this time (See vnet_unattach() for the sequence)
594 	 * and thus vgen_stop() has already been invoked on all the ports.
595 	 */
596 	vgen_detach_ports(vgenp);
597 
598 	/*
599 	 * We now destroy the taskq used to clean up rx mblk pools that
600 	 * couldn't be destroyed when the ports/channels were detached.
601 	 * We implicitly wait for those tasks to complete in
602 	 * ddi_taskq_destroy().
603 	 */
604 	if (vgenp->rxp_taskq != NULL) {
605 		ddi_taskq_destroy(vgenp->rxp_taskq);
606 		vgenp->rxp_taskq = NULL;
607 	}
608 
609 	/* Free multicast table */
610 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
611 
612 	/* Free pri_types table */
613 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
614 		kmem_free(vgenp->pri_types,
615 		    sizeof (uint16_t) * vgenp->pri_num_types);
616 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
617 	}
618 
619 	mutex_exit(&vgenp->lock);
620 	rw_destroy(&vgenp->vgenports.rwlock);
621 	mutex_destroy(&vgenp->lock);
622 
623 	DBG1(vgenp, NULL, "exit\n");
624 	KMEM_FREE(vgenp);
625 }
626 
627 /* enable transmit/receive for the device */
628 int
629 vgen_start(void *arg)
630 {
631 	vgen_port_t	*portp = (vgen_port_t *)arg;
632 	vgen_t		*vgenp = portp->vgenp;
633 
634 	DBG1(vgenp, NULL, "enter\n");
635 	mutex_enter(&portp->lock);
636 	vgen_port_init(portp);
637 	portp->flags |= VGEN_STARTED;
638 	mutex_exit(&portp->lock);
639 	DBG1(vgenp, NULL, "exit\n");
640 
641 	return (DDI_SUCCESS);
642 }
643 
644 /* stop transmit/receive */
645 void
646 vgen_stop(void *arg)
647 {
648 	vgen_port_t	*portp = (vgen_port_t *)arg;
649 	vgen_t		*vgenp = portp->vgenp;
650 
651 	DBG1(vgenp, NULL, "enter\n");
652 
653 	mutex_enter(&portp->lock);
654 	if (portp->flags & VGEN_STARTED) {
655 		vgen_port_uninit(portp);
656 		portp->flags &= ~(VGEN_STARTED);
657 	}
658 	mutex_exit(&portp->lock);
659 	DBG1(vgenp, NULL, "exit\n");
660 
661 }
662 
663 /* vgen transmit function */
664 static mblk_t *
665 vgen_tx(void *arg, mblk_t *mp)
666 {
667 	vgen_port_t	*portp;
668 	int		status;
669 
670 	portp = (vgen_port_t *)arg;
671 	status = vgen_portsend(portp, mp);
672 	if (status != VGEN_SUCCESS) {
673 		/* failure */
674 		return (mp);
675 	}
676 	/* success */
677 	return (NULL);
678 }
679 
680 /*
681  * This function provides any necessary tagging/untagging of the frames
682  * that are being transmitted over the port. It first verifies the vlan
683  * membership of the destination(port) and drops the packet if the
684  * destination doesn't belong to the given vlan.
685  *
686  * Arguments:
687  *   portp:     port over which the frames should be transmitted
688  *   mp:        frame to be transmitted
689  *   is_tagged:
690  *              B_TRUE: indicates frame header contains the vlan tag already.
691  *              B_FALSE: indicates frame is untagged.
692  *   vid:       vlan in which the frame should be transmitted.
693  *
694  * Returns:
695  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
696  *              Failure: NULL
697  */
698 static mblk_t *
699 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
700 	uint16_t vid)
701 {
702 	vgen_t		*vgenp;
703 	boolean_t	dst_tagged;
704 	int		rv;
705 
706 	vgenp = portp->vgenp;
707 
708 	/*
709 	 * If the packet is going to a vnet:
710 	 *   Check if the destination vnet is in the same vlan.
711 	 *   Check the frame header if tag or untag is needed.
712 	 *
713 	 * We do not check the above conditions if the packet is going to vsw:
714 	 *   vsw must be present implicitly in all the vlans that a vnet device
715 	 *   is configured into; even if vsw itself is not assigned to those
716 	 *   vlans as an interface. For instance, the packet might be destined
717 	 *   to another vnet(indirectly through vsw) or to an external host
718 	 *   which is in the same vlan as this vnet and vsw itself may not be
719 	 *   present in that vlan. Similarly packets going to vsw must be
720 	 *   always tagged(unless in the default-vlan) if not already tagged,
721 	 *   as we do not know the final destination. This is needed because
722 	 *   vsw must always invoke its switching function only after tagging
723 	 *   the packet; otherwise after switching function determines the
724 	 *   destination we cannot figure out if the destination belongs to the
725 	 *   the same vlan that the frame originated from and if it needs tag/
726 	 *   untag. Note that vsw will tag the packet itself when it receives
727 	 *   it over the channel from a client if needed. However, that is
728 	 *   needed only in the case of vlan unaware clients such as obp or
729 	 *   earlier versions of vnet.
730 	 *
731 	 */
732 	if (portp != vgenp->vsw_portp) {
733 		/*
734 		 * Packet going to a vnet. Check if the destination vnet is in
735 		 * the same vlan. Then check the frame header if tag/untag is
736 		 * needed.
737 		 */
738 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
739 		if (rv == B_FALSE) {
740 			/* drop the packet */
741 			freemsg(mp);
742 			return (NULL);
743 		}
744 
745 		/* is the destination tagged or untagged in this vlan? */
746 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
747 		    (dst_tagged = B_TRUE);
748 
749 		if (is_tagged == dst_tagged) {
750 			/* no tagging/untagging needed */
751 			return (mp);
752 		}
753 
754 		if (is_tagged == B_TRUE) {
755 			/* frame is tagged; destination needs untagged */
756 			mp = vnet_vlan_remove_tag(mp);
757 			return (mp);
758 		}
759 
760 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
761 	}
762 
763 	/*
764 	 * Packet going to a vnet needs tagging.
765 	 * OR
766 	 * If the packet is going to vsw, then it must be tagged in all cases:
767 	 * unknown unicast, broadcast/multicast or to vsw interface.
768 	 */
769 
770 	if (is_tagged == B_FALSE) {
771 		mp = vnet_vlan_insert_tag(mp, vid);
772 	}
773 
774 	return (mp);
775 }
776 
777 /* transmit packets over the given port */
778 static int
779 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
780 {
781 	vgen_ldc_t		*ldcp;
782 	int			status;
783 	int			rv = VGEN_SUCCESS;
784 	vgen_t			*vgenp = portp->vgenp;
785 	vnet_t			*vnetp = vgenp->vnetp;
786 	boolean_t		is_tagged;
787 	boolean_t		dec_refcnt = B_FALSE;
788 	uint16_t		vlan_id;
789 	struct ether_header	*ehp;
790 
791 	if (portp == NULL) {
792 		return (VGEN_FAILURE);
793 	}
794 
795 	if (portp->use_vsw_port) {
796 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
797 		portp = portp->vgenp->vsw_portp;
798 		ASSERT(portp != NULL);
799 		dec_refcnt = B_TRUE;
800 	}
801 
802 	/*
803 	 * Determine the vlan id that the frame belongs to.
804 	 */
805 	ehp = (struct ether_header *)mp->b_rptr;
806 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
807 
808 	if (vlan_id == vnetp->default_vlan_id) {
809 
810 		/* Frames in default vlan must be untagged */
811 		ASSERT(is_tagged == B_FALSE);
812 
813 		/*
814 		 * If the destination is a vnet-port verify it belongs to the
815 		 * default vlan; otherwise drop the packet. We do not need
816 		 * this check for vsw-port, as it should implicitly belong to
817 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
818 		 */
819 		if (portp != vgenp->vsw_portp &&
820 		    portp->pvid != vnetp->default_vlan_id) {
821 			freemsg(mp);
822 			goto portsend_ret;
823 		}
824 
825 	} else {	/* frame not in default-vlan */
826 
827 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
828 		if (mp == NULL) {
829 			goto portsend_ret;
830 		}
831 
832 	}
833 
834 	ldcp = portp->ldcp;
835 	status = ldcp->tx(ldcp, mp);
836 
837 	if (status != VGEN_TX_SUCCESS) {
838 		rv = VGEN_FAILURE;
839 	}
840 
841 portsend_ret:
842 	if (dec_refcnt == B_TRUE) {
843 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
844 	}
845 	return (rv);
846 }
847 
848 /*
849  * Wrapper function to transmit normal and/or priority frames over the channel.
850  */
851 static int
852 vgen_ldcsend(void *arg, mblk_t *mp)
853 {
854 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
855 	int			status;
856 	struct ether_header	*ehp;
857 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
858 	uint32_t		num_types;
859 	uint16_t		*types;
860 	int			i;
861 
862 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
863 
864 	num_types = vgenp->pri_num_types;
865 	types = vgenp->pri_types;
866 	ehp = (struct ether_header *)mp->b_rptr;
867 
868 	for (i = 0; i < num_types; i++) {
869 
870 		if (ehp->ether_type == types[i]) {
871 			/* priority frame, use pri tx function */
872 			vgen_ldcsend_pkt(ldcp, mp);
873 			return (VGEN_SUCCESS);
874 		}
875 
876 	}
877 
878 	if (ldcp->tx_dringdata == NULL) {
879 		freemsg(mp);
880 		return (VGEN_SUCCESS);
881 	}
882 
883 	status  = ldcp->tx_dringdata(ldcp, mp);
884 	return (status);
885 }
886 
887 /*
888  * This function transmits the frame in the payload of a raw data
889  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
890  * send special frames with high priorities, without going through
891  * the normal data path which uses descriptor ring mechanism.
892  */
893 static void
894 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
895 {
896 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
897 	vio_raw_data_msg_t	*pkt;
898 	mblk_t			*bp;
899 	mblk_t			*nmp = NULL;
900 	vio_mblk_t		*vmp;
901 	caddr_t			dst;
902 	uint32_t		mblksz;
903 	uint32_t		size;
904 	uint32_t		nbytes;
905 	int			rv;
906 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
907 	vgen_stats_t		*statsp = &ldcp->stats;
908 
909 	/* drop the packet if ldc is not up or handshake is not done */
910 	if (ldcp->ldc_status != LDC_UP) {
911 		(void) atomic_inc_32(&statsp->tx_pri_fail);
912 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
913 		    ldcp->ldc_status);
914 		goto send_pkt_exit;
915 	}
916 
917 	if (ldcp->hphase != VH_DONE) {
918 		(void) atomic_inc_32(&statsp->tx_pri_fail);
919 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
920 		    ldcp->hphase);
921 		goto send_pkt_exit;
922 	}
923 
924 	size = msgsize(mp);
925 
926 	/* frame size bigger than available payload len of raw data msg ? */
927 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
928 		(void) atomic_inc_32(&statsp->tx_pri_fail);
929 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
930 		goto send_pkt_exit;
931 	}
932 
933 	if (size < ETHERMIN)
934 		size = ETHERMIN;
935 
936 	/* alloc space for a raw data message */
937 	vmp = vio_allocb(vgenp->pri_tx_vmp);
938 	if (vmp == NULL) {
939 		(void) atomic_inc_32(&statsp->tx_pri_fail);
940 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
941 		goto send_pkt_exit;
942 	} else {
943 		nmp = vmp->mp;
944 	}
945 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
946 
947 	/* copy frame into the payload of raw data message */
948 	dst = (caddr_t)pkt->data;
949 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
950 		mblksz = MBLKL(bp);
951 		bcopy(bp->b_rptr, dst, mblksz);
952 		dst += mblksz;
953 	}
954 
955 	vmp->state = VIO_MBLK_HAS_DATA;
956 
957 	/* setup the raw data msg */
958 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
959 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
960 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
961 	pkt->tag.vio_sid = ldcp->local_sid;
962 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
963 
964 	/* send the msg over ldc */
965 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
966 	if (rv != VGEN_SUCCESS) {
967 		(void) atomic_inc_32(&statsp->tx_pri_fail);
968 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
969 		if (rv == ECONNRESET) {
970 			(void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
971 		}
972 		goto send_pkt_exit;
973 	}
974 
975 	/* update stats */
976 	(void) atomic_inc_64(&statsp->tx_pri_packets);
977 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
978 
979 send_pkt_exit:
980 	if (nmp != NULL)
981 		freemsg(nmp);
982 	freemsg(mp);
983 }
984 
985 /*
986  * enable/disable a multicast address
987  * note that the cblock of the ldc channel connected to the vsw is used for
988  * synchronization of the mctab.
989  */
990 int
991 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
992 {
993 	vgen_t			*vgenp;
994 	vnet_mcast_msg_t	mcastmsg;
995 	vio_msg_tag_t		*tagp;
996 	vgen_port_t		*portp;
997 	vgen_ldc_t		*ldcp;
998 	struct ether_addr	*addrp;
999 	int			rv = DDI_FAILURE;
1000 	uint32_t		i;
1001 
1002 	portp = (vgen_port_t *)arg;
1003 	vgenp = portp->vgenp;
1004 
1005 	if (portp->is_vsw_port != B_TRUE) {
1006 		return (DDI_SUCCESS);
1007 	}
1008 
1009 	addrp = (struct ether_addr *)mca;
1010 	tagp = &mcastmsg.tag;
1011 	bzero(&mcastmsg, sizeof (mcastmsg));
1012 
1013 	ldcp = portp->ldcp;
1014 	if (ldcp == NULL) {
1015 		return (DDI_FAILURE);
1016 	}
1017 
1018 	mutex_enter(&ldcp->cblock);
1019 
1020 	if (ldcp->hphase == VH_DONE) {
1021 		/*
1022 		 * If handshake is done, send a msg to vsw to add/remove
1023 		 * the multicast address. Otherwise, we just update this
1024 		 * mcast address in our table and the table will be sync'd
1025 		 * with vsw when handshake completes.
1026 		 */
1027 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1028 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1029 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1030 		tagp->vio_sid = ldcp->local_sid;
1031 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1032 		mcastmsg.set = add;
1033 		mcastmsg.count = 1;
1034 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1035 		    B_FALSE) != VGEN_SUCCESS) {
1036 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1037 			rv = DDI_FAILURE;
1038 			goto vgen_mcast_exit;
1039 		}
1040 	}
1041 
1042 	if (add) {
1043 
1044 		/* expand multicast table if necessary */
1045 		if (vgenp->mccount >= vgenp->mcsize) {
1046 			struct ether_addr	*newtab;
1047 			uint32_t		newsize;
1048 
1049 
1050 			newsize = vgenp->mcsize * 2;
1051 
1052 			newtab = kmem_zalloc(newsize *
1053 			    sizeof (struct ether_addr), KM_NOSLEEP);
1054 			if (newtab == NULL)
1055 				goto vgen_mcast_exit;
1056 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1057 			    sizeof (struct ether_addr));
1058 			kmem_free(vgenp->mctab,
1059 			    vgenp->mcsize * sizeof (struct ether_addr));
1060 
1061 			vgenp->mctab = newtab;
1062 			vgenp->mcsize = newsize;
1063 		}
1064 
1065 		/* add address to the table */
1066 		vgenp->mctab[vgenp->mccount++] = *addrp;
1067 
1068 	} else {
1069 
1070 		/* delete address from the table */
1071 		for (i = 0; i < vgenp->mccount; i++) {
1072 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1073 
1074 				/*
1075 				 * If there's more than one address in this
1076 				 * table, delete the unwanted one by moving
1077 				 * the last one in the list over top of it;
1078 				 * otherwise, just remove it.
1079 				 */
1080 				if (vgenp->mccount > 1) {
1081 					vgenp->mctab[i] =
1082 					    vgenp->mctab[vgenp->mccount-1];
1083 				}
1084 				vgenp->mccount--;
1085 				break;
1086 			}
1087 		}
1088 	}
1089 
1090 	rv = DDI_SUCCESS;
1091 
1092 vgen_mcast_exit:
1093 
1094 	mutex_exit(&ldcp->cblock);
1095 	return (rv);
1096 }
1097 
1098 /* set or clear promiscuous mode on the device */
1099 static int
1100 vgen_promisc(void *arg, boolean_t on)
1101 {
1102 	_NOTE(ARGUNUSED(arg, on))
1103 	return (DDI_SUCCESS);
1104 }
1105 
1106 /* set the unicast mac address of the device */
1107 static int
1108 vgen_unicst(void *arg, const uint8_t *mca)
1109 {
1110 	_NOTE(ARGUNUSED(arg, mca))
1111 	return (DDI_SUCCESS);
1112 }
1113 
1114 /* get device statistics */
1115 int
1116 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1117 {
1118 	vgen_port_t	*portp = (vgen_port_t *)arg;
1119 
1120 	*val = vgen_port_stat(portp, stat);
1121 	return (0);
1122 }
1123 
1124 /* vgen internal functions */
1125 /* detach all ports from the device */
1126 static void
1127 vgen_detach_ports(vgen_t *vgenp)
1128 {
1129 	vgen_port_t	*portp;
1130 	vgen_portlist_t	*plistp;
1131 
1132 	plistp = &(vgenp->vgenports);
1133 	WRITE_ENTER(&plistp->rwlock);
1134 	while ((portp = plistp->headp) != NULL) {
1135 		vgen_port_detach(portp);
1136 	}
1137 	RW_EXIT(&plistp->rwlock);
1138 }
1139 
1140 /*
1141  * detach the given port.
1142  */
1143 static void
1144 vgen_port_detach(vgen_port_t *portp)
1145 {
1146 	vgen_t		*vgenp;
1147 	int		port_num;
1148 
1149 	vgenp = portp->vgenp;
1150 	port_num = portp->port_num;
1151 
1152 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1153 
1154 	/*
1155 	 * If this port is connected to the vswitch, then
1156 	 * potentially there could be ports that may be using
1157 	 * this port to transmit packets. To address this do
1158 	 * the following:
1159 	 *	- First set vgenp->vsw_portp to NULL, so that
1160 	 *	  its not used after that.
1161 	 *	- Then wait for the refcnt to go down to 0.
1162 	 *	- Now we can safely detach this port.
1163 	 */
1164 	if (vgenp->vsw_portp == portp) {
1165 		vgenp->vsw_portp = NULL;
1166 		while (vgenp->vsw_port_refcnt > 0) {
1167 			delay(drv_usectohz(vgen_tx_delay));
1168 		}
1169 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1170 	}
1171 
1172 	if (portp->vhp != NULL) {
1173 		vio_net_resource_unreg(portp->vhp);
1174 		portp->vhp = NULL;
1175 	}
1176 
1177 	vgen_vlan_destroy_hash(portp);
1178 
1179 	/* remove it from port list */
1180 	vgen_port_list_remove(portp);
1181 
1182 	/* detach channels from this port */
1183 	vgen_ldc_detach(portp->ldcp);
1184 
1185 	if (portp->num_ldcs != 0) {
1186 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1187 		portp->num_ldcs = 0;
1188 	}
1189 
1190 	mutex_destroy(&portp->lock);
1191 	KMEM_FREE(portp);
1192 
1193 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1194 }
1195 
1196 /* add a port to port list */
1197 static void
1198 vgen_port_list_insert(vgen_port_t *portp)
1199 {
1200 	vgen_portlist_t	*plistp;
1201 	vgen_t		*vgenp;
1202 
1203 	vgenp = portp->vgenp;
1204 	plistp = &(vgenp->vgenports);
1205 
1206 	if (plistp->headp == NULL) {
1207 		plistp->headp = portp;
1208 	} else {
1209 		plistp->tailp->nextp = portp;
1210 	}
1211 	plistp->tailp = portp;
1212 	portp->nextp = NULL;
1213 }
1214 
1215 /* remove a port from port list */
1216 static void
1217 vgen_port_list_remove(vgen_port_t *portp)
1218 {
1219 	vgen_port_t	*prevp;
1220 	vgen_port_t	*nextp;
1221 	vgen_portlist_t	*plistp;
1222 	vgen_t		*vgenp;
1223 
1224 	vgenp = portp->vgenp;
1225 
1226 	plistp = &(vgenp->vgenports);
1227 
1228 	if (plistp->headp == NULL)
1229 		return;
1230 
1231 	if (portp == plistp->headp) {
1232 		plistp->headp = portp->nextp;
1233 		if (portp == plistp->tailp)
1234 			plistp->tailp = plistp->headp;
1235 	} else {
1236 		for (prevp = plistp->headp;
1237 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1238 		    prevp = nextp)
1239 			;
1240 		if (nextp == portp) {
1241 			prevp->nextp = portp->nextp;
1242 		}
1243 		if (portp == plistp->tailp)
1244 			plistp->tailp = prevp;
1245 	}
1246 }
1247 
1248 /* lookup a port in the list based on port_num */
1249 static vgen_port_t *
1250 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1251 {
1252 	vgen_port_t *portp = NULL;
1253 
1254 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1255 		if (portp->port_num == port_num) {
1256 			break;
1257 		}
1258 	}
1259 
1260 	return (portp);
1261 }
1262 
1263 static void
1264 vgen_port_init(vgen_port_t *portp)
1265 {
1266 	/* Add the port to the specified vlans */
1267 	vgen_vlan_add_ids(portp);
1268 
1269 	/* Bring up the channel */
1270 	(void) vgen_ldc_init(portp->ldcp);
1271 }
1272 
1273 static void
1274 vgen_port_uninit(vgen_port_t *portp)
1275 {
1276 	vgen_ldc_uninit(portp->ldcp);
1277 
1278 	/* remove the port from vlans it has been assigned to */
1279 	vgen_vlan_remove_ids(portp);
1280 }
1281 
1282 /*
1283  * Scan the machine description for this instance of vnet
1284  * and read its properties. Called only from vgen_init().
1285  * Returns: 0 on success, 1 on failure.
1286  */
1287 static int
1288 vgen_read_mdprops(vgen_t *vgenp)
1289 {
1290 	vnet_t		*vnetp = vgenp->vnetp;
1291 	md_t		*mdp = NULL;
1292 	mde_cookie_t	rootnode;
1293 	mde_cookie_t	*listp = NULL;
1294 	uint64_t	cfgh;
1295 	char		*name;
1296 	int		rv = 1;
1297 	int		num_nodes = 0;
1298 	int		num_devs = 0;
1299 	int		listsz = 0;
1300 	int		i;
1301 
1302 	if ((mdp = md_get_handle()) == NULL) {
1303 		return (rv);
1304 	}
1305 
1306 	num_nodes = md_node_count(mdp);
1307 	ASSERT(num_nodes > 0);
1308 
1309 	listsz = num_nodes * sizeof (mde_cookie_t);
1310 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1311 
1312 	rootnode = md_root_node(mdp);
1313 
1314 	/* search for all "virtual_device" nodes */
1315 	num_devs = md_scan_dag(mdp, rootnode,
1316 	    md_find_name(mdp, vdev_propname),
1317 	    md_find_name(mdp, "fwd"), listp);
1318 	if (num_devs <= 0) {
1319 		goto vgen_readmd_exit;
1320 	}
1321 
1322 	/*
1323 	 * Now loop through the list of virtual-devices looking for
1324 	 * devices with name "network" and for each such device compare
1325 	 * its instance with what we have from the 'reg' property to
1326 	 * find the right node in MD and then read all its properties.
1327 	 */
1328 	for (i = 0; i < num_devs; i++) {
1329 
1330 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1331 			goto vgen_readmd_exit;
1332 		}
1333 
1334 		/* is this a "network" device? */
1335 		if (strcmp(name, vnet_propname) != 0)
1336 			continue;
1337 
1338 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1339 			goto vgen_readmd_exit;
1340 		}
1341 
1342 		/* is this the required instance of vnet? */
1343 		if (vgenp->regprop != cfgh)
1344 			continue;
1345 
1346 		/*
1347 		 * Read the 'linkprop' property to know if this vnet
1348 		 * device should get physical link updates from vswitch.
1349 		 */
1350 		vgen_linkprop_read(vgenp, mdp, listp[i],
1351 		    &vnetp->pls_update);
1352 
1353 		/*
1354 		 * Read the mtu. Note that we set the mtu of vnet device within
1355 		 * this routine itself, after validating the range.
1356 		 */
1357 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1358 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1359 			vnetp->mtu = ETHERMTU;
1360 		}
1361 		vgenp->max_frame_size = vnetp->mtu +
1362 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1363 
1364 		/* read priority ether types */
1365 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1366 
1367 		/* read vlan id properties of this vnet instance */
1368 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1369 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1370 		    &vnetp->default_vlan_id);
1371 
1372 		rv = 0;
1373 		break;
1374 	}
1375 
1376 vgen_readmd_exit:
1377 
1378 	kmem_free(listp, listsz);
1379 	(void) md_fini_handle(mdp);
1380 	return (rv);
1381 }
1382 
1383 /*
1384  * Read vlan id properties of the given MD node.
1385  * Arguments:
1386  *   arg:          device argument(vnet device or a port)
1387  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1388  *   mdp:          machine description
1389  *   node:         md node cookie
1390  *
1391  * Returns:
1392  *   pvidp:        port-vlan-id of the node
1393  *   vidspp:       list of vlan-ids of the node
1394  *   nvidsp:       # of vlan-ids in the list
1395  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1396  */
1397 static void
1398 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1399 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1400 	uint16_t *default_idp)
1401 {
1402 	vgen_t		*vgenp;
1403 	vnet_t		*vnetp;
1404 	vgen_port_t	*portp;
1405 	char		*pvid_propname;
1406 	char		*vid_propname;
1407 	uint_t		nvids;
1408 	uint32_t	vids_size;
1409 	int		rv;
1410 	int		i;
1411 	uint64_t	*data;
1412 	uint64_t	val;
1413 	int		size;
1414 	int		inst;
1415 
1416 	if (type == VGEN_LOCAL) {
1417 
1418 		vgenp = (vgen_t *)arg;
1419 		vnetp = vgenp->vnetp;
1420 		pvid_propname = vgen_pvid_propname;
1421 		vid_propname = vgen_vid_propname;
1422 		inst = vnetp->instance;
1423 
1424 	} else if (type == VGEN_PEER) {
1425 
1426 		portp = (vgen_port_t *)arg;
1427 		vgenp = portp->vgenp;
1428 		vnetp = vgenp->vnetp;
1429 		pvid_propname = port_pvid_propname;
1430 		vid_propname = port_vid_propname;
1431 		inst = portp->port_num;
1432 
1433 	} else {
1434 		return;
1435 	}
1436 
1437 	if (type == VGEN_LOCAL && default_idp != NULL) {
1438 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1439 		if (rv != 0) {
1440 			DWARN(vgenp, NULL, "prop(%s) not found",
1441 			    vgen_dvid_propname);
1442 
1443 			*default_idp = vnet_default_vlan_id;
1444 		} else {
1445 			*default_idp = val & 0xFFF;
1446 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1447 			    inst, *default_idp);
1448 		}
1449 	}
1450 
1451 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1452 	if (rv != 0) {
1453 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1454 		*pvidp = vnet_default_vlan_id;
1455 	} else {
1456 
1457 		*pvidp = val & 0xFFF;
1458 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1459 		    pvid_propname, inst, *pvidp);
1460 	}
1461 
1462 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1463 	    &size);
1464 	if (rv != 0) {
1465 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1466 		size = 0;
1467 	} else {
1468 		size /= sizeof (uint64_t);
1469 	}
1470 	nvids = size;
1471 
1472 	if (nvids != 0) {
1473 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1474 		vids_size = sizeof (uint16_t) * nvids;
1475 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1476 		for (i = 0; i < nvids; i++) {
1477 			(*vidspp)[i] = data[i] & 0xFFFF;
1478 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1479 		}
1480 		DBG2(vgenp, NULL, "\n");
1481 	}
1482 
1483 	*nvidsp = nvids;
1484 }
1485 
1486 /*
1487  * Create a vlan id hash table for the given port.
1488  */
1489 static void
1490 vgen_vlan_create_hash(vgen_port_t *portp)
1491 {
1492 	char		hashname[MAXNAMELEN];
1493 
1494 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1495 	    portp->port_num);
1496 
1497 	portp->vlan_nchains = vgen_vlan_nchains;
1498 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1499 	    portp->vlan_nchains, mod_hash_null_valdtor);
1500 }
1501 
1502 /*
1503  * Destroy the vlan id hash table in the given port.
1504  */
1505 static void
1506 vgen_vlan_destroy_hash(vgen_port_t *portp)
1507 {
1508 	if (portp->vlan_hashp != NULL) {
1509 		mod_hash_destroy_hash(portp->vlan_hashp);
1510 		portp->vlan_hashp = NULL;
1511 		portp->vlan_nchains = 0;
1512 	}
1513 }
1514 
1515 /*
1516  * Add a port to the vlans specified in its port properites.
1517  */
1518 static void
1519 vgen_vlan_add_ids(vgen_port_t *portp)
1520 {
1521 	int		rv;
1522 	int		i;
1523 
1524 	rv = mod_hash_insert(portp->vlan_hashp,
1525 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1526 	    (mod_hash_val_t)B_TRUE);
1527 	ASSERT(rv == 0);
1528 
1529 	for (i = 0; i < portp->nvids; i++) {
1530 		rv = mod_hash_insert(portp->vlan_hashp,
1531 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1532 		    (mod_hash_val_t)B_TRUE);
1533 		ASSERT(rv == 0);
1534 	}
1535 }
1536 
1537 /*
1538  * Remove a port from the vlans it has been assigned to.
1539  */
1540 static void
1541 vgen_vlan_remove_ids(vgen_port_t *portp)
1542 {
1543 	int		rv;
1544 	int		i;
1545 	mod_hash_val_t	vp;
1546 
1547 	rv = mod_hash_remove(portp->vlan_hashp,
1548 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1549 	    (mod_hash_val_t *)&vp);
1550 	ASSERT(rv == 0);
1551 
1552 	for (i = 0; i < portp->nvids; i++) {
1553 		rv = mod_hash_remove(portp->vlan_hashp,
1554 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1555 		    (mod_hash_val_t *)&vp);
1556 		ASSERT(rv == 0);
1557 	}
1558 }
1559 
1560 /*
1561  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1562  * then the vlan-id is available in the tag; otherwise, its vlan id is
1563  * implicitly obtained from the port-vlan-id of the vnet device.
1564  * The vlan id determined is returned in vidp.
1565  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1566  */
1567 static boolean_t
1568 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1569 {
1570 	struct ether_vlan_header	*evhp;
1571 
1572 	/* If it's a tagged frame, get the vlan id from vlan header */
1573 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1574 
1575 		evhp = (struct ether_vlan_header *)ehp;
1576 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1577 		return (B_TRUE);
1578 	}
1579 
1580 	/* Untagged frame, vlan-id is the pvid of vnet device */
1581 	*vidp = vnetp->pvid;
1582 	return (B_FALSE);
1583 }
1584 
1585 /*
1586  * Find the given vlan id in the hash table.
1587  * Return: B_TRUE if the id is found; B_FALSE if not found.
1588  */
1589 static boolean_t
1590 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1591 {
1592 	int		rv;
1593 	mod_hash_val_t	vp;
1594 
1595 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1596 
1597 	if (rv != 0)
1598 		return (B_FALSE);
1599 
1600 	return (B_TRUE);
1601 }
1602 
1603 /*
1604  * This function reads "priority-ether-types" property from md. This property
1605  * is used to enable support for priority frames. Applications which need
1606  * guaranteed and timely delivery of certain high priority frames to/from
1607  * a vnet or vsw within ldoms, should configure this property by providing
1608  * the ether type(s) for which the priority facility is needed.
1609  * Normal data frames are delivered over a ldc channel using the descriptor
1610  * ring mechanism which is constrained by factors such as descriptor ring size,
1611  * the rate at which the ring is processed at the peer ldc end point, etc.
1612  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1613  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1614  * descriptor ring path and enables a more reliable and timely delivery of
1615  * frames to the peer.
1616  */
1617 static void
1618 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1619 {
1620 	int		rv;
1621 	uint16_t	*types;
1622 	uint64_t	*data;
1623 	int		size;
1624 	int		i;
1625 	size_t		mblk_sz;
1626 
1627 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1628 	    (uint8_t **)&data, &size);
1629 	if (rv != 0) {
1630 		/*
1631 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1632 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1633 		 */
1634 		if (vgen_pri_eth_type != 0) {
1635 			size = sizeof (vgen_pri_eth_type);
1636 			data = &vgen_pri_eth_type;
1637 		} else {
1638 			DBG2(vgenp, NULL,
1639 			    "prop(%s) not found", pri_types_propname);
1640 			size = 0;
1641 		}
1642 	}
1643 
1644 	if (size == 0) {
1645 		vgenp->pri_num_types = 0;
1646 		return;
1647 	}
1648 
1649 	/*
1650 	 * we have some priority-ether-types defined;
1651 	 * allocate a table of these types and also
1652 	 * allocate a pool of mblks to transmit these
1653 	 * priority packets.
1654 	 */
1655 	size /= sizeof (uint64_t);
1656 	vgenp->pri_num_types = size;
1657 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1658 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1659 		types[i] = data[i] & 0xFFFF;
1660 	}
1661 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1662 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL,
1663 	    &vgenp->pri_tx_vmp);
1664 }
1665 
1666 static void
1667 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1668 {
1669 	int		rv;
1670 	uint64_t	val;
1671 	char		*mtu_propname;
1672 
1673 	mtu_propname = vgen_mtu_propname;
1674 
1675 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1676 	if (rv != 0) {
1677 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1678 		*mtu = vnet_ethermtu;
1679 	} else {
1680 
1681 		*mtu = val & 0xFFFF;
1682 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1683 		    vgenp->instance, *mtu);
1684 	}
1685 }
1686 
1687 static void
1688 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
1689 	boolean_t *pls)
1690 {
1691 	int		rv;
1692 	uint64_t	val;
1693 	char		*linkpropname;
1694 
1695 	linkpropname = vgen_linkprop_propname;
1696 
1697 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
1698 	if (rv != 0) {
1699 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
1700 		*pls = B_FALSE;
1701 	} else {
1702 
1703 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
1704 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
1705 		    vgenp->instance, *pls);
1706 	}
1707 }
1708 
1709 /* register with MD event generator */
1710 static int
1711 vgen_mdeg_reg(vgen_t *vgenp)
1712 {
1713 	mdeg_prop_spec_t	*pspecp;
1714 	mdeg_node_spec_t	*parentp;
1715 	uint_t			templatesz;
1716 	int			rv;
1717 	mdeg_handle_t		dev_hdl = NULL;
1718 	mdeg_handle_t		port_hdl = NULL;
1719 
1720 	templatesz = sizeof (vgen_prop_template);
1721 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1722 	if (pspecp == NULL) {
1723 		return (DDI_FAILURE);
1724 	}
1725 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1726 	if (parentp == NULL) {
1727 		kmem_free(pspecp, templatesz);
1728 		return (DDI_FAILURE);
1729 	}
1730 
1731 	bcopy(vgen_prop_template, pspecp, templatesz);
1732 
1733 	/*
1734 	 * NOTE: The instance here refers to the value of "reg" property and
1735 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1736 	 */
1737 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1738 
1739 	parentp->namep = "virtual-device";
1740 	parentp->specp = pspecp;
1741 
1742 	/* save parentp in vgen_t */
1743 	vgenp->mdeg_parentp = parentp;
1744 
1745 	/*
1746 	 * Register an interest in 'virtual-device' nodes with a
1747 	 * 'name' property of 'network'
1748 	 */
1749 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1750 	if (rv != MDEG_SUCCESS) {
1751 		DERR(vgenp, NULL, "mdeg_register failed\n");
1752 		goto mdeg_reg_fail;
1753 	}
1754 
1755 	/* Register an interest in 'port' nodes */
1756 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1757 	    &port_hdl);
1758 	if (rv != MDEG_SUCCESS) {
1759 		DERR(vgenp, NULL, "mdeg_register failed\n");
1760 		goto mdeg_reg_fail;
1761 	}
1762 
1763 	/* save mdeg handle in vgen_t */
1764 	vgenp->mdeg_dev_hdl = dev_hdl;
1765 	vgenp->mdeg_port_hdl = port_hdl;
1766 
1767 	return (DDI_SUCCESS);
1768 
1769 mdeg_reg_fail:
1770 	if (dev_hdl != NULL) {
1771 		(void) mdeg_unregister(dev_hdl);
1772 	}
1773 	KMEM_FREE(parentp);
1774 	kmem_free(pspecp, templatesz);
1775 	vgenp->mdeg_parentp = NULL;
1776 	return (DDI_FAILURE);
1777 }
1778 
1779 /* unregister with MD event generator */
1780 static void
1781 vgen_mdeg_unreg(vgen_t *vgenp)
1782 {
1783 	if (vgenp->mdeg_dev_hdl != NULL) {
1784 		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1785 		vgenp->mdeg_dev_hdl = NULL;
1786 	}
1787 	if (vgenp->mdeg_port_hdl != NULL) {
1788 		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
1789 		vgenp->mdeg_port_hdl = NULL;
1790 	}
1791 
1792 	if (vgenp->mdeg_parentp != NULL) {
1793 		kmem_free(vgenp->mdeg_parentp->specp,
1794 		    sizeof (vgen_prop_template));
1795 		KMEM_FREE(vgenp->mdeg_parentp);
1796 		vgenp->mdeg_parentp = NULL;
1797 	}
1798 }
1799 
1800 /* mdeg callback function for the port node */
1801 static int
1802 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1803 {
1804 	int		idx;
1805 	int		vsw_idx = -1;
1806 	uint64_t 	val;
1807 	vgen_t		*vgenp;
1808 
1809 	if ((resp == NULL) || (cb_argp == NULL)) {
1810 		return (MDEG_FAILURE);
1811 	}
1812 
1813 	vgenp = (vgen_t *)cb_argp;
1814 	DBG1(vgenp, NULL, "enter\n");
1815 
1816 	mutex_enter(&vgenp->lock);
1817 
1818 	DBG1(vgenp, NULL, "ports: removed(%x), "
1819 	"added(%x), updated(%x)\n", resp->removed.nelem,
1820 	    resp->added.nelem, resp->match_curr.nelem);
1821 
1822 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1823 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1824 		    resp->removed.mdep[idx]);
1825 	}
1826 
1827 	if (vgenp->vsw_portp == NULL) {
1828 		/*
1829 		 * find vsw_port and add it first, because other ports need
1830 		 * this when adding fdb entry (see vgen_port_init()).
1831 		 */
1832 		for (idx = 0; idx < resp->added.nelem; idx++) {
1833 			if (!(md_get_prop_val(resp->added.mdp,
1834 			    resp->added.mdep[idx], swport_propname, &val))) {
1835 				if (val == 0) {
1836 					/*
1837 					 * This port is connected to the
1838 					 * vsw on service domain.
1839 					 */
1840 					vsw_idx = idx;
1841 					if (vgen_add_port(vgenp,
1842 					    resp->added.mdp,
1843 					    resp->added.mdep[idx]) !=
1844 					    DDI_SUCCESS) {
1845 						cmn_err(CE_NOTE, "vnet%d Could "
1846 						    "not initialize virtual "
1847 						    "switch port.",
1848 						    vgenp->instance);
1849 						mutex_exit(&vgenp->lock);
1850 						return (MDEG_FAILURE);
1851 					}
1852 					break;
1853 				}
1854 			}
1855 		}
1856 		if (vsw_idx == -1) {
1857 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1858 			mutex_exit(&vgenp->lock);
1859 			return (MDEG_FAILURE);
1860 		}
1861 	}
1862 
1863 	for (idx = 0; idx < resp->added.nelem; idx++) {
1864 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1865 			continue;
1866 
1867 		/* If this port can't be added just skip it. */
1868 		(void) vgen_add_port(vgenp, resp->added.mdp,
1869 		    resp->added.mdep[idx]);
1870 	}
1871 
1872 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1873 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1874 		    resp->match_curr.mdep[idx],
1875 		    resp->match_prev.mdp,
1876 		    resp->match_prev.mdep[idx]);
1877 	}
1878 
1879 	mutex_exit(&vgenp->lock);
1880 	DBG1(vgenp, NULL, "exit\n");
1881 	return (MDEG_SUCCESS);
1882 }
1883 
1884 /* mdeg callback function for the vnet node */
1885 static int
1886 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1887 {
1888 	vgen_t		*vgenp;
1889 	vnet_t		*vnetp;
1890 	md_t		*mdp;
1891 	mde_cookie_t	node;
1892 	uint64_t	inst;
1893 	char		*node_name = NULL;
1894 
1895 	if ((resp == NULL) || (cb_argp == NULL)) {
1896 		return (MDEG_FAILURE);
1897 	}
1898 
1899 	vgenp = (vgen_t *)cb_argp;
1900 	vnetp = vgenp->vnetp;
1901 
1902 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
1903 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
1904 	    resp->match_curr.nelem, resp->match_prev.nelem);
1905 
1906 	mutex_enter(&vgenp->lock);
1907 
1908 	/*
1909 	 * We get an initial callback for this node as 'added' after
1910 	 * registering with mdeg. Note that we would have already gathered
1911 	 * information about this vnet node by walking MD earlier during attach
1912 	 * (in vgen_read_mdprops()). So, there is a window where the properties
1913 	 * of this node might have changed when we get this initial 'added'
1914 	 * callback. We handle this as if an update occured and invoke the same
1915 	 * function which handles updates to the properties of this vnet-node
1916 	 * if any. A non-zero 'match' value indicates that the MD has been
1917 	 * updated and that a 'network' node is present which may or may not
1918 	 * have been updated. It is up to the clients to examine their own
1919 	 * nodes and determine if they have changed.
1920 	 */
1921 	if (resp->added.nelem != 0) {
1922 
1923 		if (resp->added.nelem != 1) {
1924 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
1925 			    "invalid: %d\n", vnetp->instance,
1926 			    resp->added.nelem);
1927 			goto vgen_mdeg_cb_err;
1928 		}
1929 
1930 		mdp = resp->added.mdp;
1931 		node = resp->added.mdep[0];
1932 
1933 	} else if (resp->match_curr.nelem != 0) {
1934 
1935 		if (resp->match_curr.nelem != 1) {
1936 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
1937 			    "invalid: %d\n", vnetp->instance,
1938 			    resp->match_curr.nelem);
1939 			goto vgen_mdeg_cb_err;
1940 		}
1941 
1942 		mdp = resp->match_curr.mdp;
1943 		node = resp->match_curr.mdep[0];
1944 
1945 	} else {
1946 		goto vgen_mdeg_cb_err;
1947 	}
1948 
1949 	/* Validate name and instance */
1950 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1951 		DERR(vgenp, NULL, "unable to get node name\n");
1952 		goto vgen_mdeg_cb_err;
1953 	}
1954 
1955 	/* is this a virtual-network device? */
1956 	if (strcmp(node_name, vnet_propname) != 0) {
1957 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
1958 		goto vgen_mdeg_cb_err;
1959 	}
1960 
1961 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1962 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
1963 		goto vgen_mdeg_cb_err;
1964 	}
1965 
1966 	/* is this the right instance of vnet? */
1967 	if (inst != vgenp->regprop) {
1968 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
1969 		goto vgen_mdeg_cb_err;
1970 	}
1971 
1972 	vgen_update_md_prop(vgenp, mdp, node);
1973 
1974 	mutex_exit(&vgenp->lock);
1975 	return (MDEG_SUCCESS);
1976 
1977 vgen_mdeg_cb_err:
1978 	mutex_exit(&vgenp->lock);
1979 	return (MDEG_FAILURE);
1980 }
1981 
1982 /*
1983  * Check to see if the relevant properties in the specified node have
1984  * changed, and if so take the appropriate action.
1985  */
1986 static void
1987 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1988 {
1989 	uint16_t	pvid;
1990 	uint16_t	*vids;
1991 	uint16_t	nvids;
1992 	vnet_t		*vnetp = vgenp->vnetp;
1993 	uint32_t	mtu;
1994 	boolean_t	pls_update;
1995 	enum		{ MD_init = 0x1,
1996 			    MD_vlans = 0x2,
1997 			    MD_mtu = 0x4,
1998 			    MD_pls = 0x8 } updated;
1999 	int		rv;
2000 
2001 	updated = MD_init;
2002 
2003 	/* Read the vlan ids */
2004 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2005 	    &nvids, NULL);
2006 
2007 	/* Determine if there are any vlan id updates */
2008 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2009 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2010 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2011 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2012 		updated |= MD_vlans;
2013 	}
2014 
2015 	/* Read mtu */
2016 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2017 	if (mtu != vnetp->mtu) {
2018 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2019 			updated |= MD_mtu;
2020 		} else {
2021 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2022 			    " as the specified value:%d is invalid\n",
2023 			    vnetp->instance, mtu);
2024 		}
2025 	}
2026 
2027 	/*
2028 	 * Read the 'linkprop' property.
2029 	 */
2030 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2031 	if (pls_update != vnetp->pls_update) {
2032 		updated |= MD_pls;
2033 	}
2034 
2035 	/* Now process the updated props */
2036 
2037 	if (updated & MD_vlans) {
2038 
2039 		/* save the new vlan ids */
2040 		vnetp->pvid = pvid;
2041 		if (vnetp->nvids != 0) {
2042 			kmem_free(vnetp->vids,
2043 			    sizeof (uint16_t) * vnetp->nvids);
2044 			vnetp->nvids = 0;
2045 		}
2046 		if (nvids != 0) {
2047 			vnetp->nvids = nvids;
2048 			vnetp->vids = vids;
2049 		}
2050 
2051 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2052 		vgen_reset_vlan_unaware_ports(vgenp);
2053 
2054 	} else {
2055 
2056 		if (nvids != 0) {
2057 			kmem_free(vids, sizeof (uint16_t) * nvids);
2058 		}
2059 	}
2060 
2061 	if (updated & MD_mtu) {
2062 
2063 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2064 		    vnetp->mtu, mtu);
2065 
2066 		rv = vnet_mtu_update(vnetp, mtu);
2067 		if (rv == 0) {
2068 			vgenp->max_frame_size = mtu +
2069 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2070 		}
2071 	}
2072 
2073 	if (updated & MD_pls) {
2074 		/* enable/disable physical link state updates */
2075 		vnetp->pls_update = pls_update;
2076 		mutex_exit(&vgenp->lock);
2077 
2078 		/* reset vsw-port to re-negotiate with the updated prop. */
2079 		vgen_reset_vsw_port(vgenp);
2080 
2081 		mutex_enter(&vgenp->lock);
2082 	}
2083 }
2084 
2085 /* add a new port to the device */
2086 static int
2087 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2088 {
2089 	vgen_port_t	*portp;
2090 	int		rv;
2091 
2092 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2093 
2094 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2095 	if (rv != DDI_SUCCESS) {
2096 		KMEM_FREE(portp);
2097 		return (DDI_FAILURE);
2098 	}
2099 
2100 	rv = vgen_port_attach(portp);
2101 	if (rv != DDI_SUCCESS) {
2102 		return (DDI_FAILURE);
2103 	}
2104 
2105 	return (DDI_SUCCESS);
2106 }
2107 
2108 /* read properties of the port from its md node */
2109 static int
2110 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2111 	mde_cookie_t mdex)
2112 {
2113 	uint64_t		port_num;
2114 	uint64_t		*ldc_ids;
2115 	uint64_t		macaddr;
2116 	uint64_t		val;
2117 	int			num_ldcs;
2118 	int			i;
2119 	int			addrsz;
2120 	int			num_nodes = 0;
2121 	int			listsz = 0;
2122 	mde_cookie_t		*listp = NULL;
2123 	uint8_t			*addrp;
2124 	struct ether_addr	ea;
2125 
2126 	/* read "id" property to get the port number */
2127 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2128 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2129 		return (DDI_FAILURE);
2130 	}
2131 
2132 	/*
2133 	 * Find the channel endpoint node(s) under this port node.
2134 	 */
2135 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2136 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2137 		    num_nodes);
2138 		return (DDI_FAILURE);
2139 	}
2140 
2141 	/* allocate space for node list */
2142 	listsz = num_nodes * sizeof (mde_cookie_t);
2143 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2144 	if (listp == NULL)
2145 		return (DDI_FAILURE);
2146 
2147 	num_ldcs = md_scan_dag(mdp, mdex,
2148 	    md_find_name(mdp, channel_propname),
2149 	    md_find_name(mdp, "fwd"), listp);
2150 
2151 	if (num_ldcs <= 0) {
2152 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2153 		kmem_free(listp, listsz);
2154 		return (DDI_FAILURE);
2155 	}
2156 
2157 	if (num_ldcs > 1) {
2158 		DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n",
2159 		    port_num, num_ldcs);
2160 	}
2161 
2162 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2163 	if (ldc_ids == NULL) {
2164 		kmem_free(listp, listsz);
2165 		return (DDI_FAILURE);
2166 	}
2167 
2168 	for (i = 0; i < num_ldcs; i++) {
2169 		/* read channel ids */
2170 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2171 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2172 			    id_propname);
2173 			kmem_free(listp, listsz);
2174 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2175 			return (DDI_FAILURE);
2176 		}
2177 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2178 	}
2179 
2180 	kmem_free(listp, listsz);
2181 
2182 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2183 	    &addrsz)) {
2184 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2185 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2186 		return (DDI_FAILURE);
2187 	}
2188 
2189 	if (addrsz < ETHERADDRL) {
2190 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2191 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2192 		return (DDI_FAILURE);
2193 	}
2194 
2195 	macaddr = *((uint64_t *)addrp);
2196 
2197 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2198 
2199 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2200 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2201 		macaddr >>= 8;
2202 	}
2203 
2204 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2205 		if (val == 0) {
2206 			/* This port is connected to the vswitch */
2207 			portp->is_vsw_port = B_TRUE;
2208 		} else {
2209 			portp->is_vsw_port = B_FALSE;
2210 		}
2211 	}
2212 
2213 	/* now update all properties into the port */
2214 	portp->vgenp = vgenp;
2215 	portp->port_num = port_num;
2216 	ether_copy(&ea, &portp->macaddr);
2217 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2218 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2219 	portp->num_ldcs = num_ldcs;
2220 
2221 	/* read vlan id properties of this port node */
2222 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2223 	    &portp->vids, &portp->nvids, NULL);
2224 
2225 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2226 
2227 	return (DDI_SUCCESS);
2228 }
2229 
2230 /* remove a port from the device */
2231 static int
2232 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2233 {
2234 	uint64_t	port_num;
2235 	vgen_port_t	*portp;
2236 	vgen_portlist_t	*plistp;
2237 
2238 	/* read "id" property to get the port number */
2239 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2240 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2241 		return (DDI_FAILURE);
2242 	}
2243 
2244 	plistp = &(vgenp->vgenports);
2245 
2246 	WRITE_ENTER(&plistp->rwlock);
2247 	portp = vgen_port_lookup(plistp, (int)port_num);
2248 	if (portp == NULL) {
2249 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2250 		RW_EXIT(&plistp->rwlock);
2251 		return (DDI_FAILURE);
2252 	}
2253 
2254 	vgen_port_detach_mdeg(portp);
2255 	RW_EXIT(&plistp->rwlock);
2256 
2257 	return (DDI_SUCCESS);
2258 }
2259 
2260 /* attach a port to the device based on mdeg data */
2261 static int
2262 vgen_port_attach(vgen_port_t *portp)
2263 {
2264 	vgen_portlist_t		*plistp;
2265 	vgen_t			*vgenp;
2266 	uint64_t		*ldcids;
2267 	mac_register_t		*macp;
2268 	vio_net_res_type_t	type;
2269 	int			rv;
2270 
2271 	ASSERT(portp != NULL);
2272 	vgenp = portp->vgenp;
2273 	ldcids = portp->ldc_ids;
2274 
2275 	DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n",
2276 	    portp->port_num, ldcids[0]);
2277 
2278 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2279 
2280 	/*
2281 	 * attach the channel under the port using its channel id;
2282 	 * note that we only support one channel per port for now.
2283 	 */
2284 	if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) {
2285 		vgen_port_detach(portp);
2286 		return (DDI_FAILURE);
2287 	}
2288 
2289 	/* create vlan id hash table */
2290 	vgen_vlan_create_hash(portp);
2291 
2292 	if (portp->is_vsw_port == B_TRUE) {
2293 		/* This port is connected to the switch port */
2294 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2295 		type = VIO_NET_RES_LDC_SERVICE;
2296 	} else {
2297 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2298 		type = VIO_NET_RES_LDC_GUEST;
2299 	}
2300 
2301 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2302 		vgen_port_detach(portp);
2303 		return (DDI_FAILURE);
2304 	}
2305 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2306 	macp->m_driver = portp;
2307 	macp->m_dip = vgenp->vnetdip;
2308 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2309 	macp->m_callbacks = &vgen_m_callbacks;
2310 	macp->m_min_sdu = 0;
2311 	macp->m_max_sdu = ETHERMTU;
2312 
2313 	mutex_enter(&portp->lock);
2314 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2315 	    portp->macaddr, &portp->vhp, &portp->vcb);
2316 	mutex_exit(&portp->lock);
2317 	mac_free(macp);
2318 
2319 	if (rv == 0) {
2320 		/* link it into the list of ports */
2321 		plistp = &(vgenp->vgenports);
2322 		WRITE_ENTER(&plistp->rwlock);
2323 		vgen_port_list_insert(portp);
2324 		RW_EXIT(&plistp->rwlock);
2325 
2326 		if (portp->is_vsw_port == B_TRUE) {
2327 			/* We now have the vswitch port attached */
2328 			vgenp->vsw_portp = portp;
2329 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2330 		}
2331 	} else {
2332 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2333 		    portp);
2334 		vgen_port_detach(portp);
2335 	}
2336 
2337 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2338 	return (DDI_SUCCESS);
2339 }
2340 
2341 /* detach a port from the device based on mdeg data */
2342 static void
2343 vgen_port_detach_mdeg(vgen_port_t *portp)
2344 {
2345 	vgen_t *vgenp = portp->vgenp;
2346 
2347 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2348 
2349 	mutex_enter(&portp->lock);
2350 
2351 	/* stop the port if needed */
2352 	if (portp->flags & VGEN_STARTED) {
2353 		vgen_port_uninit(portp);
2354 		portp->flags &= ~(VGEN_STARTED);
2355 	}
2356 
2357 	mutex_exit(&portp->lock);
2358 	vgen_port_detach(portp);
2359 
2360 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2361 }
2362 
2363 static int
2364 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2365 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2366 {
2367 	uint64_t	cport_num;
2368 	uint64_t	pport_num;
2369 	vgen_portlist_t	*plistp;
2370 	vgen_port_t	*portp;
2371 	boolean_t	updated_vlans = B_FALSE;
2372 	uint16_t	pvid;
2373 	uint16_t	*vids;
2374 	uint16_t	nvids;
2375 
2376 	/*
2377 	 * For now, we get port updates only if vlan ids changed.
2378 	 * We read the port num and do some sanity check.
2379 	 */
2380 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2381 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2382 		return (DDI_FAILURE);
2383 	}
2384 
2385 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2386 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2387 		return (DDI_FAILURE);
2388 	}
2389 	if (cport_num != pport_num)
2390 		return (DDI_FAILURE);
2391 
2392 	plistp = &(vgenp->vgenports);
2393 
2394 	READ_ENTER(&plistp->rwlock);
2395 
2396 	portp = vgen_port_lookup(plistp, (int)cport_num);
2397 	if (portp == NULL) {
2398 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2399 		RW_EXIT(&plistp->rwlock);
2400 		return (DDI_FAILURE);
2401 	}
2402 
2403 	/* Read the vlan ids */
2404 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2405 	    &nvids, NULL);
2406 
2407 	/* Determine if there are any vlan id updates */
2408 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2409 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2410 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2411 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2412 		updated_vlans = B_TRUE;
2413 	}
2414 
2415 	if (updated_vlans == B_FALSE) {
2416 		RW_EXIT(&plistp->rwlock);
2417 		return (DDI_FAILURE);
2418 	}
2419 
2420 	/* remove the port from vlans it has been assigned to */
2421 	vgen_vlan_remove_ids(portp);
2422 
2423 	/* save the new vlan ids */
2424 	portp->pvid = pvid;
2425 	if (portp->nvids != 0) {
2426 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2427 		portp->nvids = 0;
2428 	}
2429 	if (nvids != 0) {
2430 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2431 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2432 		portp->nvids = nvids;
2433 		kmem_free(vids, sizeof (uint16_t) * nvids);
2434 	}
2435 
2436 	/* add port to the new vlans */
2437 	vgen_vlan_add_ids(portp);
2438 
2439 	/* reset the port if it is vlan unaware (ver < 1.3) */
2440 	vgen_vlan_unaware_port_reset(portp);
2441 
2442 	RW_EXIT(&plistp->rwlock);
2443 
2444 	return (DDI_SUCCESS);
2445 }
2446 
2447 static uint64_t
2448 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2449 {
2450 	return (vgen_ldc_stat(portp->ldcp, stat));
2451 }
2452 
2453 /* attach the channel corresponding to the given ldc_id to the port */
2454 static int
2455 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2456 {
2457 	vgen_t 		*vgenp;
2458 	vgen_ldc_t 	*ldcp;
2459 	ldc_attr_t 	attr;
2460 	int 		status;
2461 	ldc_status_t	istatus;
2462 	char		kname[MAXNAMELEN];
2463 	int		instance;
2464 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2465 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2466 		AST_ldc_reg_cb = 0x8 } attach_state;
2467 
2468 	attach_state = AST_init;
2469 	vgenp = portp->vgenp;
2470 
2471 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2472 	if (ldcp == NULL) {
2473 		goto ldc_attach_failed;
2474 	}
2475 	ldcp->ldc_id = ldc_id;
2476 	ldcp->portp = portp;
2477 
2478 	attach_state |= AST_ldc_alloc;
2479 
2480 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2481 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2482 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2483 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2484 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2485 	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2486 	mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL);
2487 	cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL);
2488 
2489 	attach_state |= AST_mutex_init;
2490 
2491 	attr.devclass = LDC_DEV_NT;
2492 	attr.instance = vgenp->instance;
2493 	attr.mode = LDC_MODE_UNRELIABLE;
2494 	attr.mtu = vgen_ldc_mtu;
2495 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2496 	if (status != 0) {
2497 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2498 		goto ldc_attach_failed;
2499 	}
2500 	attach_state |= AST_ldc_init;
2501 
2502 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2503 	if (status != 0) {
2504 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2505 		    status);
2506 		goto ldc_attach_failed;
2507 	}
2508 	/*
2509 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2510 	 * data msgs, including raw data msgs used to recv priority frames.
2511 	 */
2512 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2513 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2514 	attach_state |= AST_ldc_reg_cb;
2515 
2516 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2517 	ASSERT(istatus == LDC_INIT);
2518 	ldcp->ldc_status = istatus;
2519 
2520 	/* Setup kstats for the channel */
2521 	instance = vgenp->instance;
2522 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2523 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2524 	if (ldcp->ksp == NULL) {
2525 		goto ldc_attach_failed;
2526 	}
2527 
2528 	/* initialize vgen_versions supported */
2529 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2530 	vgen_reset_vnet_proto_ops(ldcp);
2531 
2532 	/* Link this channel to the port */
2533 	portp->ldcp = ldcp;
2534 
2535 	ldcp->link_state = LINK_STATE_UNKNOWN;
2536 #ifdef	VNET_IOC_DEBUG
2537 	ldcp->link_down_forced = B_FALSE;
2538 #endif
2539 	ldcp->flags |= CHANNEL_ATTACHED;
2540 	return (DDI_SUCCESS);
2541 
2542 ldc_attach_failed:
2543 	if (attach_state & AST_ldc_reg_cb) {
2544 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2545 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2546 	}
2547 
2548 	if (attach_state & AST_ldc_init) {
2549 		(void) ldc_fini(ldcp->ldc_handle);
2550 	}
2551 	if (attach_state & AST_mutex_init) {
2552 		mutex_destroy(&ldcp->tclock);
2553 		mutex_destroy(&ldcp->txlock);
2554 		mutex_destroy(&ldcp->cblock);
2555 		mutex_destroy(&ldcp->wrlock);
2556 		mutex_destroy(&ldcp->rxlock);
2557 		mutex_destroy(&ldcp->pollq_lock);
2558 	}
2559 	if (attach_state & AST_ldc_alloc) {
2560 		KMEM_FREE(ldcp);
2561 	}
2562 	return (DDI_FAILURE);
2563 }
2564 
2565 /* detach a channel from the port */
2566 static void
2567 vgen_ldc_detach(vgen_ldc_t *ldcp)
2568 {
2569 	vgen_port_t	*portp;
2570 	vgen_t 		*vgenp;
2571 
2572 	ASSERT(ldcp != NULL);
2573 
2574 	portp = ldcp->portp;
2575 	vgenp = portp->vgenp;
2576 
2577 	if (ldcp->ldc_status != LDC_INIT) {
2578 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2579 	}
2580 
2581 	if (ldcp->flags & CHANNEL_ATTACHED) {
2582 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2583 
2584 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2585 		(void) ldc_fini(ldcp->ldc_handle);
2586 
2587 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2588 		vgen_destroy_kstats(ldcp->ksp);
2589 		ldcp->ksp = NULL;
2590 		mutex_destroy(&ldcp->tclock);
2591 		mutex_destroy(&ldcp->txlock);
2592 		mutex_destroy(&ldcp->cblock);
2593 		mutex_destroy(&ldcp->wrlock);
2594 		mutex_destroy(&ldcp->rxlock);
2595 		mutex_destroy(&ldcp->pollq_lock);
2596 		mutex_destroy(&ldcp->msg_thr_lock);
2597 		cv_destroy(&ldcp->msg_thr_cv);
2598 
2599 		KMEM_FREE(ldcp);
2600 	}
2601 }
2602 
2603 /* enable transmit/receive on the channel */
2604 static int
2605 vgen_ldc_init(vgen_ldc_t *ldcp)
2606 {
2607 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2608 	ldc_status_t	istatus;
2609 	int		rv;
2610 	enum		{ ST_init = 0x0, ST_ldc_open = 0x1,
2611 			    ST_cb_enable = 0x2} init_state;
2612 	int		flag = 0;
2613 
2614 	init_state = ST_init;
2615 
2616 	DBG1(vgenp, ldcp, "enter\n");
2617 	LDC_LOCK(ldcp);
2618 
2619 	rv = ldc_open(ldcp->ldc_handle);
2620 	if (rv != 0) {
2621 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2622 		goto ldcinit_failed;
2623 	}
2624 	init_state |= ST_ldc_open;
2625 
2626 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2627 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
2628 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2629 		goto ldcinit_failed;
2630 	}
2631 	ldcp->ldc_status = istatus;
2632 
2633 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2634 	if (rv != 0) {
2635 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2636 		goto ldcinit_failed;
2637 	}
2638 
2639 	init_state |= ST_cb_enable;
2640 
2641 	vgen_ldc_up(ldcp);
2642 
2643 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2644 	if (istatus == LDC_UP) {
2645 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2646 	}
2647 
2648 	ldcp->ldc_status = istatus;
2649 
2650 	ldcp->hphase = VH_PHASE0;
2651 	ldcp->hstate = 0;
2652 	ldcp->flags |= CHANNEL_STARTED;
2653 
2654 	vgen_setup_handshake_params(ldcp);
2655 
2656 	/* if channel is already UP - start handshake */
2657 	if (istatus == LDC_UP) {
2658 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2659 		if (ldcp->portp != vgenp->vsw_portp) {
2660 			/*
2661 			 * As the channel is up, use this port from now on.
2662 			 */
2663 			(void) atomic_swap_32(
2664 			    &ldcp->portp->use_vsw_port, B_FALSE);
2665 		}
2666 
2667 		/* Initialize local session id */
2668 		ldcp->local_sid = ddi_get_lbolt();
2669 
2670 		/* clear peer session id */
2671 		ldcp->peer_sid = 0;
2672 
2673 		mutex_exit(&ldcp->tclock);
2674 		mutex_exit(&ldcp->txlock);
2675 		mutex_exit(&ldcp->wrlock);
2676 		mutex_exit(&ldcp->rxlock);
2677 		rv = vgen_handshake(vh_nextphase(ldcp));
2678 		mutex_exit(&ldcp->cblock);
2679 		if (rv != 0) {
2680 			flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET :
2681 			    VGEN_FLAG_NEED_LDCRESET;
2682 			(void) vgen_process_reset(ldcp, flag);
2683 		}
2684 	} else {
2685 		LDC_UNLOCK(ldcp);
2686 	}
2687 
2688 	return (DDI_SUCCESS);
2689 
2690 ldcinit_failed:
2691 	if (init_state & ST_cb_enable) {
2692 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2693 	}
2694 	if (init_state & ST_ldc_open) {
2695 		(void) ldc_close(ldcp->ldc_handle);
2696 	}
2697 	LDC_UNLOCK(ldcp);
2698 	DBG1(vgenp, ldcp, "exit\n");
2699 	return (DDI_FAILURE);
2700 }
2701 
2702 /* stop transmit/receive on the channel */
2703 static void
2704 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2705 {
2706 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2707 
2708 	DBG1(vgenp, ldcp, "enter\n");
2709 
2710 	LDC_LOCK(ldcp);
2711 
2712 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2713 		LDC_UNLOCK(ldcp);
2714 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2715 		return;
2716 	}
2717 
2718 	LDC_UNLOCK(ldcp);
2719 
2720 	while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2721 		delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY));
2722 	}
2723 
2724 	(void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT);
2725 
2726 	DBG1(vgenp, ldcp, "exit\n");
2727 }
2728 
2729 /*
2730  * Create a descriptor ring, that will be exported to the peer for mapping.
2731  */
2732 static int
2733 vgen_create_dring(vgen_ldc_t *ldcp)
2734 {
2735 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2736 	int		rv;
2737 
2738 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2739 		rv = vgen_create_rx_dring(ldcp);
2740 	} else {
2741 		rv = vgen_create_tx_dring(ldcp);
2742 	}
2743 
2744 	return (rv);
2745 }
2746 
2747 /*
2748  * Destroy the descriptor ring.
2749  */
2750 static void
2751 vgen_destroy_dring(vgen_ldc_t *ldcp)
2752 {
2753 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2754 
2755 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2756 		vgen_destroy_rx_dring(ldcp);
2757 	} else {
2758 		vgen_destroy_tx_dring(ldcp);
2759 	}
2760 }
2761 
2762 /*
2763  * Map the descriptor ring exported by the peer.
2764  */
2765 static int
2766 vgen_map_dring(vgen_ldc_t *ldcp, void *pkt)
2767 {
2768 	int		rv;
2769 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2770 
2771 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2772 		/*
2773 		 * In RxDringData mode, dring that we map in
2774 		 * becomes our transmit descriptor ring.
2775 		 */
2776 		rv = vgen_map_tx_dring(ldcp, pkt);
2777 	} else {
2778 
2779 		/*
2780 		 * In TxDring mode, dring that we map in
2781 		 * becomes our receive descriptor ring.
2782 		 */
2783 		rv = vgen_map_rx_dring(ldcp, pkt);
2784 	}
2785 
2786 	return (rv);
2787 }
2788 
2789 /*
2790  * Unmap the descriptor ring exported by the peer.
2791  */
2792 static void
2793 vgen_unmap_dring(vgen_ldc_t *ldcp)
2794 {
2795 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2796 
2797 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2798 		vgen_unmap_tx_dring(ldcp);
2799 	} else {
2800 		vgen_unmap_rx_dring(ldcp);
2801 	}
2802 }
2803 
2804 void
2805 vgen_destroy_rxpools(void *arg)
2806 {
2807 	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
2808 	vio_mblk_pool_t	*npoolp;
2809 
2810 	while (poolp != NULL) {
2811 		npoolp =  poolp->nextp;
2812 		while (vio_destroy_mblks(poolp) != 0) {
2813 			delay(drv_usectohz(vgen_rxpool_cleanup_delay));
2814 		}
2815 		poolp = npoolp;
2816 	}
2817 }
2818 
2819 /* get channel statistics */
2820 static uint64_t
2821 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2822 {
2823 	vgen_stats_t	*statsp;
2824 	uint64_t	val;
2825 
2826 	val = 0;
2827 	statsp = &ldcp->stats;
2828 	switch (stat) {
2829 
2830 	case MAC_STAT_MULTIRCV:
2831 		val = statsp->multircv;
2832 		break;
2833 
2834 	case MAC_STAT_BRDCSTRCV:
2835 		val = statsp->brdcstrcv;
2836 		break;
2837 
2838 	case MAC_STAT_MULTIXMT:
2839 		val = statsp->multixmt;
2840 		break;
2841 
2842 	case MAC_STAT_BRDCSTXMT:
2843 		val = statsp->brdcstxmt;
2844 		break;
2845 
2846 	case MAC_STAT_NORCVBUF:
2847 		val = statsp->norcvbuf;
2848 		break;
2849 
2850 	case MAC_STAT_IERRORS:
2851 		val = statsp->ierrors;
2852 		break;
2853 
2854 	case MAC_STAT_NOXMTBUF:
2855 		val = statsp->noxmtbuf;
2856 		break;
2857 
2858 	case MAC_STAT_OERRORS:
2859 		val = statsp->oerrors;
2860 		break;
2861 
2862 	case MAC_STAT_COLLISIONS:
2863 		break;
2864 
2865 	case MAC_STAT_RBYTES:
2866 		val = statsp->rbytes;
2867 		break;
2868 
2869 	case MAC_STAT_IPACKETS:
2870 		val = statsp->ipackets;
2871 		break;
2872 
2873 	case MAC_STAT_OBYTES:
2874 		val = statsp->obytes;
2875 		break;
2876 
2877 	case MAC_STAT_OPACKETS:
2878 		val = statsp->opackets;
2879 		break;
2880 
2881 	/* stats not relevant to ldc, return 0 */
2882 	case MAC_STAT_IFSPEED:
2883 	case ETHER_STAT_ALIGN_ERRORS:
2884 	case ETHER_STAT_FCS_ERRORS:
2885 	case ETHER_STAT_FIRST_COLLISIONS:
2886 	case ETHER_STAT_MULTI_COLLISIONS:
2887 	case ETHER_STAT_DEFER_XMTS:
2888 	case ETHER_STAT_TX_LATE_COLLISIONS:
2889 	case ETHER_STAT_EX_COLLISIONS:
2890 	case ETHER_STAT_MACXMT_ERRORS:
2891 	case ETHER_STAT_CARRIER_ERRORS:
2892 	case ETHER_STAT_TOOLONG_ERRORS:
2893 	case ETHER_STAT_XCVR_ADDR:
2894 	case ETHER_STAT_XCVR_ID:
2895 	case ETHER_STAT_XCVR_INUSE:
2896 	case ETHER_STAT_CAP_1000FDX:
2897 	case ETHER_STAT_CAP_1000HDX:
2898 	case ETHER_STAT_CAP_100FDX:
2899 	case ETHER_STAT_CAP_100HDX:
2900 	case ETHER_STAT_CAP_10FDX:
2901 	case ETHER_STAT_CAP_10HDX:
2902 	case ETHER_STAT_CAP_ASMPAUSE:
2903 	case ETHER_STAT_CAP_PAUSE:
2904 	case ETHER_STAT_CAP_AUTONEG:
2905 	case ETHER_STAT_ADV_CAP_1000FDX:
2906 	case ETHER_STAT_ADV_CAP_1000HDX:
2907 	case ETHER_STAT_ADV_CAP_100FDX:
2908 	case ETHER_STAT_ADV_CAP_100HDX:
2909 	case ETHER_STAT_ADV_CAP_10FDX:
2910 	case ETHER_STAT_ADV_CAP_10HDX:
2911 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2912 	case ETHER_STAT_ADV_CAP_PAUSE:
2913 	case ETHER_STAT_ADV_CAP_AUTONEG:
2914 	case ETHER_STAT_LP_CAP_1000FDX:
2915 	case ETHER_STAT_LP_CAP_1000HDX:
2916 	case ETHER_STAT_LP_CAP_100FDX:
2917 	case ETHER_STAT_LP_CAP_100HDX:
2918 	case ETHER_STAT_LP_CAP_10FDX:
2919 	case ETHER_STAT_LP_CAP_10HDX:
2920 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2921 	case ETHER_STAT_LP_CAP_PAUSE:
2922 	case ETHER_STAT_LP_CAP_AUTONEG:
2923 	case ETHER_STAT_LINK_ASMPAUSE:
2924 	case ETHER_STAT_LINK_PAUSE:
2925 	case ETHER_STAT_LINK_AUTONEG:
2926 	case ETHER_STAT_LINK_DUPLEX:
2927 	default:
2928 		val = 0;
2929 		break;
2930 
2931 	}
2932 	return (val);
2933 }
2934 
2935 /*
2936  * LDC channel is UP, start handshake process with peer.
2937  */
2938 static void
2939 vgen_handle_evt_up(vgen_ldc_t *ldcp)
2940 {
2941 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2942 
2943 	DBG1(vgenp, ldcp, "enter\n");
2944 
2945 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2946 
2947 	if (ldcp->portp != vgenp->vsw_portp) {
2948 		/*
2949 		 * As the channel is up, use this port from now on.
2950 		 */
2951 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
2952 	}
2953 
2954 	/* Initialize local session id */
2955 	ldcp->local_sid = ddi_get_lbolt();
2956 
2957 	/* clear peer session id */
2958 	ldcp->peer_sid = 0;
2959 
2960 	/* Initiate Handshake process with peer ldc endpoint */
2961 	(void) vgen_handshake(vh_nextphase(ldcp));
2962 
2963 	DBG1(vgenp, ldcp, "exit\n");
2964 }
2965 
2966 /*
2967  * LDC channel is Reset, terminate connection with peer and try to
2968  * bring the channel up again.
2969  */
2970 int
2971 vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
2972 {
2973 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2974 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2975 	}
2976 
2977 	/* Set the flag to indicate reset is in progress */
2978 	if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2979 		/* another thread is already in the process of resetting */
2980 		return (EBUSY);
2981 	}
2982 
2983 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2984 		mutex_exit(&ldcp->cblock);
2985 	}
2986 
2987 	(void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET);
2988 
2989 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2990 		mutex_enter(&ldcp->cblock);
2991 	}
2992 
2993 	return (0);
2994 }
2995 
2996 /* Interrupt handler for the channel */
2997 static uint_t
2998 vgen_ldc_cb(uint64_t event, caddr_t arg)
2999 {
3000 	_NOTE(ARGUNUSED(event))
3001 	vgen_ldc_t	*ldcp;
3002 	vgen_t		*vgenp;
3003 	ldc_status_t 	istatus;
3004 	vgen_stats_t	*statsp;
3005 	uint_t		ret = LDC_SUCCESS;
3006 
3007 	ldcp = (vgen_ldc_t *)arg;
3008 	vgenp = LDC_TO_VGEN(ldcp);
3009 	statsp = &ldcp->stats;
3010 
3011 	DBG1(vgenp, ldcp, "enter\n");
3012 
3013 	mutex_enter(&ldcp->cblock);
3014 	statsp->callbacks++;
3015 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3016 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3017 		    ldcp->ldc_status);
3018 		mutex_exit(&ldcp->cblock);
3019 		return (LDC_SUCCESS);
3020 	}
3021 
3022 	/*
3023 	 * NOTE: not using switch() as event could be triggered by
3024 	 * a state change and a read request. Also the ordering	of the
3025 	 * check for the event types is deliberate.
3026 	 */
3027 	if (event & LDC_EVT_UP) {
3028 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3029 			DWARN(vgenp, ldcp, "ldc_status err\n");
3030 			/* status couldn't be determined */
3031 			ret = LDC_FAILURE;
3032 			goto ldc_cb_ret;
3033 		}
3034 		ldcp->ldc_status = istatus;
3035 		if (ldcp->ldc_status != LDC_UP) {
3036 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3037 			    " but ldc status is not UP(0x%x)\n",
3038 			    ldcp->ldc_status);
3039 			/* spurious interrupt, return success */
3040 			goto ldc_cb_ret;
3041 		}
3042 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3043 		    event, ldcp->ldc_status);
3044 
3045 		vgen_handle_evt_up(ldcp);
3046 
3047 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3048 	}
3049 
3050 	/* Handle RESET/DOWN before READ event */
3051 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3052 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3053 			DWARN(vgenp, ldcp, "ldc_status error\n");
3054 			/* status couldn't be determined */
3055 			ret = LDC_FAILURE;
3056 			goto ldc_cb_ret;
3057 		}
3058 		ldcp->ldc_status = istatus;
3059 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3060 		    event, ldcp->ldc_status);
3061 
3062 		(void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB);
3063 
3064 		/*
3065 		 * As the channel is down/reset, ignore READ event
3066 		 * but print a debug warning message.
3067 		 */
3068 		if (event & LDC_EVT_READ) {
3069 			DWARN(vgenp, ldcp,
3070 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3071 			event &= ~LDC_EVT_READ;
3072 		}
3073 	}
3074 
3075 	if (event & LDC_EVT_READ) {
3076 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3077 		    event, ldcp->ldc_status);
3078 
3079 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3080 
3081 		if (ldcp->msg_thread != NULL) {
3082 			/*
3083 			 * If the receive thread is enabled, then
3084 			 * wakeup the receive thread to process the
3085 			 * LDC messages.
3086 			 */
3087 			mutex_exit(&ldcp->cblock);
3088 			mutex_enter(&ldcp->msg_thr_lock);
3089 			if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) {
3090 				ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD;
3091 				cv_signal(&ldcp->msg_thr_cv);
3092 			}
3093 			mutex_exit(&ldcp->msg_thr_lock);
3094 			mutex_enter(&ldcp->cblock);
3095 		} else  {
3096 			(void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB);
3097 		}
3098 	}
3099 
3100 ldc_cb_ret:
3101 	mutex_exit(&ldcp->cblock);
3102 	DBG1(vgenp, ldcp, "exit\n");
3103 	return (ret);
3104 }
3105 
3106 int
3107 vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller)
3108 {
3109 	int		rv;
3110 	uint64_t	*ldcmsg;
3111 	size_t		msglen;
3112 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3113 	vio_msg_tag_t	*tagp;
3114 	ldc_status_t 	istatus;
3115 	boolean_t 	has_data;
3116 
3117 	DBG1(vgenp, ldcp, "enter\n");
3118 
3119 	if (caller == VGEN_LDC_CB) {
3120 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3121 	} else if (caller == VGEN_MSG_THR) {
3122 		mutex_enter(&ldcp->cblock);
3123 	} else {
3124 		return (EINVAL);
3125 	}
3126 
3127 	ldcmsg = ldcp->ldcmsg;
3128 
3129 vgen_evtread:
3130 	do {
3131 		msglen = ldcp->msglen;
3132 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3133 
3134 		if (rv != 0) {
3135 			DWARN(vgenp, ldcp, "ldc_read() failed "
3136 			    "rv(%d) len(%d)\n", rv, msglen);
3137 			if (rv == ECONNRESET)
3138 				goto vgen_evtread_error;
3139 			break;
3140 		}
3141 		if (msglen == 0) {
3142 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3143 			break;
3144 		}
3145 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3146 
3147 		tagp = (vio_msg_tag_t *)ldcmsg;
3148 
3149 		if (ldcp->peer_sid) {
3150 			/*
3151 			 * check sid only after we have received peer's sid
3152 			 * in the version negotiate msg.
3153 			 */
3154 #ifdef DEBUG
3155 			if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) {
3156 				/* simulate bad sid condition */
3157 				tagp->vio_sid = 0;
3158 				vgen_inject_err_flag &= ~(VGEN_ERR_HSID);
3159 			}
3160 #endif
3161 			rv = vgen_check_sid(ldcp, tagp);
3162 			if (rv != VGEN_SUCCESS) {
3163 				/*
3164 				 * If sid mismatch is detected,
3165 				 * reset the channel.
3166 				 */
3167 				DWARN(vgenp, ldcp, "vgen_check_sid() failed\n");
3168 				goto vgen_evtread_error;
3169 			}
3170 		}
3171 
3172 		switch (tagp->vio_msgtype) {
3173 		case VIO_TYPE_CTRL:
3174 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3175 			if (rv != 0) {
3176 				DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()"
3177 				    " failed rv(%d)\n", rv);
3178 			}
3179 			break;
3180 
3181 		case VIO_TYPE_DATA:
3182 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3183 			if (rv != 0) {
3184 				DWARN(vgenp, ldcp, "vgen_handle_datamsg()"
3185 				    " failed rv(%d)\n", rv);
3186 			}
3187 			break;
3188 
3189 		case VIO_TYPE_ERR:
3190 			vgen_handle_errmsg(ldcp, tagp);
3191 			break;
3192 
3193 		default:
3194 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3195 			    tagp->vio_msgtype);
3196 			break;
3197 		}
3198 
3199 		/*
3200 		 * If an error is encountered, stop processing and
3201 		 * handle the error.
3202 		 */
3203 		if (rv != 0) {
3204 			goto vgen_evtread_error;
3205 		}
3206 
3207 	} while (msglen);
3208 
3209 	/* check once more before exiting */
3210 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3211 	if ((rv == 0) && (has_data == B_TRUE)) {
3212 		DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp);
3213 		goto vgen_evtread;
3214 	}
3215 
3216 vgen_evtread_error:
3217 	if (rv != 0) {
3218 		/*
3219 		 * We handle the error and then return the error value. If we
3220 		 * are running in the context of the msg worker, the error
3221 		 * tells the worker thread to exit, as the channel would have
3222 		 * been reset.
3223 		 */
3224 		if (rv == ECONNRESET) {
3225 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3226 				DWARN(vgenp, ldcp, "ldc_status err\n");
3227 			} else {
3228 				ldcp->ldc_status = istatus;
3229 			}
3230 			(void) vgen_handle_evt_reset(ldcp, caller);
3231 		} else {
3232 			DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n");
3233 			(void) vgen_ldc_reset(ldcp, caller);
3234 		}
3235 	}
3236 
3237 	if (caller == VGEN_MSG_THR) {
3238 		mutex_exit(&ldcp->cblock);
3239 	}
3240 
3241 	DBG1(vgenp, ldcp, "exit\n");
3242 	return (rv);
3243 }
3244 
3245 /* vgen handshake functions */
3246 
3247 /* change the hphase for the channel to the next phase */
3248 static vgen_ldc_t *
3249 vh_nextphase(vgen_ldc_t *ldcp)
3250 {
3251 	if (ldcp->hphase == VH_PHASE4) {
3252 		ldcp->hphase = VH_DONE;
3253 	} else {
3254 		ldcp->hphase++;
3255 	}
3256 	return (ldcp);
3257 }
3258 
3259 /* send version negotiate message to the peer over ldc */
3260 static int
3261 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3262 {
3263 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3264 	vio_ver_msg_t	vermsg;
3265 	vio_msg_tag_t	*tagp = &vermsg.tag;
3266 	int		rv;
3267 
3268 	bzero(&vermsg, sizeof (vermsg));
3269 
3270 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3271 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3272 	tagp->vio_subtype_env = VIO_VER_INFO;
3273 	tagp->vio_sid = ldcp->local_sid;
3274 
3275 	/* get version msg payload from ldcp->local */
3276 	vermsg.ver_major = ldcp->local_hparams.ver_major;
3277 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3278 	vermsg.dev_class = ldcp->local_hparams.dev_class;
3279 
3280 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3281 	if (rv != VGEN_SUCCESS) {
3282 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3283 		return (rv);
3284 	}
3285 
3286 	ldcp->hstate |= VER_INFO_SENT;
3287 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3288 	    vermsg.ver_major, vermsg.ver_minor);
3289 
3290 	return (VGEN_SUCCESS);
3291 }
3292 
3293 /* send attr info message to the peer over ldc */
3294 static int
3295 vgen_send_attr_info(vgen_ldc_t *ldcp)
3296 {
3297 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3298 	vnet_attr_msg_t	attrmsg;
3299 	vio_msg_tag_t	*tagp = &attrmsg.tag;
3300 	int		rv;
3301 
3302 	bzero(&attrmsg, sizeof (attrmsg));
3303 
3304 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3305 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3306 	tagp->vio_subtype_env = VIO_ATTR_INFO;
3307 	tagp->vio_sid = ldcp->local_sid;
3308 
3309 	/* get attr msg payload from ldcp->local */
3310 	attrmsg.mtu = ldcp->local_hparams.mtu;
3311 	attrmsg.addr = ldcp->local_hparams.addr;
3312 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
3313 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3314 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3315 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
3316 	attrmsg.options = ldcp->local_hparams.dring_mode;
3317 
3318 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3319 	if (rv != VGEN_SUCCESS) {
3320 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3321 		return (rv);
3322 	}
3323 
3324 	ldcp->hstate |= ATTR_INFO_SENT;
3325 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3326 
3327 	return (VGEN_SUCCESS);
3328 }
3329 
3330 /*
3331  * Send descriptor ring register message to the peer over ldc.
3332  * Invoked in RxDringData mode.
3333  */
3334 static int
3335 vgen_send_rx_dring_reg(vgen_ldc_t *ldcp)
3336 {
3337 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3338 	vio_dring_reg_msg_t	*msg;
3339 	vio_dring_reg_ext_msg_t	*emsg;
3340 	int			rv;
3341 	uint8_t			*buf;
3342 	uint_t			msgsize;
3343 
3344 	msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies);
3345 	msg = kmem_zalloc(msgsize, KM_SLEEP);
3346 
3347 	/* Initialize the common part of dring reg msg */
3348 	vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA);
3349 
3350 	/* skip over dring cookies at the tail of common section */
3351 	buf = (uint8_t *)msg->cookie;
3352 	ASSERT(msg->ncookies == 1);
3353 	buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
3354 
3355 	/* Now setup the extended part, specific to RxDringData mode */
3356 	emsg = (vio_dring_reg_ext_msg_t *)buf;
3357 
3358 	/* copy data_ncookies in the msg */
3359 	emsg->data_ncookies = ldcp->rx_data_ncookies;
3360 
3361 	/* copy data area size in the msg */
3362 	emsg->data_area_size = ldcp->rx_data_sz;
3363 
3364 	/* copy data area cookies in the msg */
3365 	bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie,
3366 	    sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies);
3367 
3368 	rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE);
3369 	if (rv != VGEN_SUCCESS) {
3370 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3371 		kmem_free(msg, msgsize);
3372 		return (rv);
3373 	}
3374 
3375 	ldcp->hstate |= DRING_INFO_SENT;
3376 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3377 
3378 	kmem_free(msg, msgsize);
3379 	return (VGEN_SUCCESS);
3380 }
3381 
3382 /*
3383  * Send descriptor ring register message to the peer over ldc.
3384  * Invoked in TxDring mode.
3385  */
3386 static int
3387 vgen_send_tx_dring_reg(vgen_ldc_t *ldcp)
3388 {
3389 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3390 	vio_dring_reg_msg_t	msg;
3391 	int			rv;
3392 
3393 	bzero(&msg, sizeof (msg));
3394 
3395 	/*
3396 	 * Initialize only the common part of dring reg msg in TxDring mode.
3397 	 */
3398 	vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING);
3399 
3400 	rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE);
3401 	if (rv != VGEN_SUCCESS) {
3402 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3403 		return (rv);
3404 	}
3405 
3406 	ldcp->hstate |= DRING_INFO_SENT;
3407 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3408 
3409 	return (VGEN_SUCCESS);
3410 }
3411 
3412 static int
3413 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3414 {
3415 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3416 	vio_rdx_msg_t	rdxmsg;
3417 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
3418 	int		rv;
3419 
3420 	bzero(&rdxmsg, sizeof (rdxmsg));
3421 
3422 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3423 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3424 	tagp->vio_subtype_env = VIO_RDX;
3425 	tagp->vio_sid = ldcp->local_sid;
3426 
3427 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3428 	if (rv != VGEN_SUCCESS) {
3429 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3430 		return (rv);
3431 	}
3432 
3433 	ldcp->hstate |= RDX_INFO_SENT;
3434 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3435 
3436 	return (VGEN_SUCCESS);
3437 }
3438 
3439 /* send multicast addr info message to vsw */
3440 static int
3441 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3442 {
3443 	vnet_mcast_msg_t	mcastmsg;
3444 	vnet_mcast_msg_t	*msgp;
3445 	vio_msg_tag_t		*tagp;
3446 	vgen_t			*vgenp;
3447 	struct ether_addr	*mca;
3448 	int			rv;
3449 	int			i;
3450 	uint32_t		size;
3451 	uint32_t		mccount;
3452 	uint32_t		n;
3453 
3454 	msgp = &mcastmsg;
3455 	tagp = &msgp->tag;
3456 	vgenp = LDC_TO_VGEN(ldcp);
3457 
3458 	mccount = vgenp->mccount;
3459 	i = 0;
3460 
3461 	do {
3462 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3463 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3464 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3465 		tagp->vio_sid = ldcp->local_sid;
3466 
3467 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3468 		size = n * sizeof (struct ether_addr);
3469 
3470 		mca = &(vgenp->mctab[i]);
3471 		bcopy(mca, (msgp->mca), size);
3472 		msgp->set = B_TRUE;
3473 		msgp->count = n;
3474 
3475 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3476 		    B_FALSE);
3477 		if (rv != VGEN_SUCCESS) {
3478 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3479 			return (rv);
3480 		}
3481 
3482 		mccount -= n;
3483 		i += n;
3484 
3485 	} while (mccount);
3486 
3487 	return (VGEN_SUCCESS);
3488 }
3489 
3490 /*
3491  * vgen_dds_rx -- post DDS messages to vnet.
3492  */
3493 static int
3494 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3495 {
3496 	vio_dds_msg_t	*dmsg = (vio_dds_msg_t *)tagp;
3497 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3498 
3499 	if (dmsg->dds_class != DDS_VNET_NIU) {
3500 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
3501 		return (EBADMSG);
3502 	}
3503 	vnet_dds_rx(vgenp->vnetp, dmsg);
3504 	return (0);
3505 }
3506 
3507 /*
3508  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
3509  */
3510 int
3511 vgen_dds_tx(void *arg, void *msg)
3512 {
3513 	vgen_t		*vgenp = arg;
3514 	vio_dds_msg_t	*dmsg = msg;
3515 	vgen_portlist_t	*plistp = &vgenp->vgenports;
3516 	vgen_ldc_t	*ldcp;
3517 	int		rv = EIO;
3518 
3519 	READ_ENTER(&plistp->rwlock);
3520 	ldcp = vgenp->vsw_portp->ldcp;
3521 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
3522 		goto vgen_dsend_exit;
3523 	}
3524 
3525 	dmsg->tag.vio_sid = ldcp->local_sid;
3526 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
3527 	if (rv != VGEN_SUCCESS) {
3528 		rv = EIO;
3529 	} else {
3530 		rv = 0;
3531 	}
3532 
3533 vgen_dsend_exit:
3534 	RW_EXIT(&plistp->rwlock);
3535 	return (rv);
3536 
3537 }
3538 
3539 /* Initiate Phase 2 of handshake */
3540 static int
3541 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3542 {
3543 	int	rv;
3544 
3545 #ifdef DEBUG
3546 	if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) {
3547 		/* simulate out of state condition */
3548 		vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE);
3549 		rv = vgen_send_rdx_info(ldcp);
3550 		return (rv);
3551 	}
3552 	if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) {
3553 		/* simulate timeout condition */
3554 		vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT);
3555 		return (VGEN_SUCCESS);
3556 	}
3557 #endif
3558 	rv = vgen_send_attr_info(ldcp);
3559 	if (rv != VGEN_SUCCESS) {
3560 		return (rv);
3561 	}
3562 
3563 	return (VGEN_SUCCESS);
3564 }
3565 
3566 static int
3567 vgen_handshake_phase3(vgen_ldc_t *ldcp)
3568 {
3569 	int		rv;
3570 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3571 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3572 	vgen_stats_t	*statsp = &ldcp->stats;
3573 
3574 	/* dring mode has been negotiated in attr phase; save in stats */
3575 	statsp->dring_mode = lp->dring_mode;
3576 
3577 	if (lp->dring_mode == VIO_RX_DRING_DATA) {	/* RxDringData mode */
3578 		ldcp->rx_dringdata = vgen_handle_dringdata_shm;
3579 		ldcp->tx_dringdata = vgen_dringsend_shm;
3580 		if (!VGEN_PRI_ETH_DEFINED(vgenp)) {
3581 			/*
3582 			 * If priority frames are not in use, we don't need a
3583 			 * separate wrapper function for 'tx', so we set it to
3584 			 * 'tx_dringdata'. If priority frames are configured,
3585 			 * we leave the 'tx' pointer as is (initialized in
3586 			 * vgen_set_vnet_proto_ops()).
3587 			 */
3588 			ldcp->tx = ldcp->tx_dringdata;
3589 		}
3590 	} else {					/* TxDring mode */
3591 		ldcp->msg_thread = thread_create(NULL,
3592 		    2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0,
3593 		    &p0, TS_RUN, maxclsyspri);
3594 	}
3595 
3596 	rv = vgen_create_dring(ldcp);
3597 	if (rv != VGEN_SUCCESS) {
3598 		return (rv);
3599 	}
3600 
3601 	/* update local dring_info params */
3602 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
3603 		bcopy(&(ldcp->rx_dring_cookie),
3604 		    &(ldcp->local_hparams.dring_cookie),
3605 		    sizeof (ldc_mem_cookie_t));
3606 		ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies;
3607 		ldcp->local_hparams.num_desc = ldcp->num_rxds;
3608 		ldcp->local_hparams.desc_size =
3609 		    sizeof (vnet_rx_dringdata_desc_t);
3610 		rv = vgen_send_rx_dring_reg(ldcp);
3611 	} else {
3612 		bcopy(&(ldcp->tx_dring_cookie),
3613 		    &(ldcp->local_hparams.dring_cookie),
3614 		    sizeof (ldc_mem_cookie_t));
3615 		ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies;
3616 		ldcp->local_hparams.num_desc = ldcp->num_txds;
3617 		ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3618 		rv = vgen_send_tx_dring_reg(ldcp);
3619 	}
3620 
3621 	if (rv != VGEN_SUCCESS) {
3622 		return (rv);
3623 	}
3624 
3625 	return (VGEN_SUCCESS);
3626 }
3627 
3628 /*
3629  * Set vnet-protocol-version dependent functions based on version.
3630  */
3631 static void
3632 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
3633 {
3634 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3635 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3636 
3637 	/*
3638 	 * Setup the appropriate dring data processing routine and any
3639 	 * associated thread based on the version.
3640 	 *
3641 	 * In versions < 1.6, we only support TxDring mode. In this mode, the
3642 	 * msg worker thread processes all types of VIO msgs (ctrl and data).
3643 	 *
3644 	 * In versions >= 1.6, we also support RxDringData mode. In this mode,
3645 	 * all msgs including dring data messages are handled directly by the
3646 	 * callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA,
3647 	 * subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be
3648 	 * disabled while the polling thread is active, in which case the
3649 	 * polling thread processes the rcv descriptor ring.
3650 	 *
3651 	 * However, for versions >= 1.6, we can force to only use TxDring mode.
3652 	 * This could happen if RxDringData mode has been disabled (see
3653 	 * below) on this guest or on the peer guest. This info is determined
3654 	 * as part of attr exchange phase of handshake. Hence, we setup these
3655 	 * pointers for v1.6 after attr msg phase completes during handshake.
3656 	 */
3657 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {	/* Ver >= 1.6 */
3658 		/*
3659 		 * Set data dring mode for vgen_send_attr_info().
3660 		 */
3661 		if (vgen_mapin_avail(ldcp) == B_TRUE) {
3662 			lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING);
3663 		} else {
3664 			lp->dring_mode = VIO_TX_DRING;
3665 		}
3666 	} else {				/* Ver <= 1.5 */
3667 		lp->dring_mode = VIO_TX_DRING;
3668 	}
3669 
3670 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
3671 		vgen_port_t	*portp = ldcp->portp;
3672 		vnet_t		*vnetp = vgenp->vnetp;
3673 		/*
3674 		 * If the version negotiated with vswitch is >= 1.5 (link
3675 		 * status update support), set the required bits in our
3676 		 * attributes if this vnet device has been configured to get
3677 		 * physical link state updates.
3678 		 */
3679 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
3680 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
3681 		} else {
3682 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
3683 		}
3684 	}
3685 
3686 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
3687 		/*
3688 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
3689 		 * Support), set the mtu in our attributes to max_frame_size.
3690 		 */
3691 		lp->mtu = vgenp->max_frame_size;
3692 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
3693 		/*
3694 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
3695 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
3696 		 */
3697 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
3698 	} else {
3699 		vgen_port_t	*portp = ldcp->portp;
3700 		vnet_t		*vnetp = vgenp->vnetp;
3701 		/*
3702 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
3703 		 * We can negotiate that size with those peers provided the
3704 		 * following conditions are true:
3705 		 * - Only pvid is defined for our peer and there are no vids.
3706 		 * - pvids are equal.
3707 		 * If the above conditions are true, then we can send/recv only
3708 		 * untagged frames of max size ETHERMAX.
3709 		 */
3710 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
3711 			lp->mtu = ETHERMAX;
3712 		}
3713 	}
3714 
3715 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {	/* Versions >= 1.2 */
3716 		/*
3717 		 * Starting v1.2 we support priority frames; so set the
3718 		 * dring processing routines and xfer modes based on the
3719 		 * version. Note that the dring routines could be changed after
3720 		 * attribute handshake phase for versions >= 1.6 (See
3721 		 * vgen_handshake_phase3())
3722 		 */
3723 		ldcp->tx_dringdata = vgen_dringsend;
3724 		ldcp->rx_dringdata = vgen_handle_dringdata;
3725 
3726 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
3727 			/*
3728 			 * Enable priority routines and pkt mode only if
3729 			 * at least one pri-eth-type is specified in MD.
3730 			 */
3731 			ldcp->tx = vgen_ldcsend;
3732 			ldcp->rx_pktdata = vgen_handle_pkt_data;
3733 
3734 			/* set xfer mode for vgen_send_attr_info() */
3735 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
3736 		} else {
3737 			/* No priority eth types defined in MD */
3738 			ldcp->tx = ldcp->tx_dringdata;
3739 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3740 
3741 			/* Set xfer mode for vgen_send_attr_info() */
3742 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
3743 		}
3744 	} else { /* Versions prior to 1.2  */
3745 		vgen_reset_vnet_proto_ops(ldcp);
3746 	}
3747 }
3748 
3749 /*
3750  * Reset vnet-protocol-version dependent functions to pre-v1.2.
3751  */
3752 static void
3753 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
3754 {
3755 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3756 
3757 	ldcp->tx = ldcp->tx_dringdata = vgen_dringsend;
3758 	ldcp->rx_dringdata = vgen_handle_dringdata;
3759 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3760 
3761 	/* set xfer mode for vgen_send_attr_info() */
3762 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
3763 }
3764 
3765 static void
3766 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
3767 {
3768 	vgen_ldc_t	*ldcp = portp->ldcp;
3769 	vgen_t		*vgenp = portp->vgenp;
3770 	vnet_t		*vnetp = vgenp->vnetp;
3771 	boolean_t	need_reset = B_FALSE;
3772 
3773 	mutex_enter(&ldcp->cblock);
3774 
3775 	/*
3776 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
3777 	 * the connection. See comments in vgen_set_vnet_proto_ops().
3778 	 */
3779 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
3780 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
3781 		need_reset = B_TRUE;
3782 	}
3783 	mutex_exit(&ldcp->cblock);
3784 
3785 	if (need_reset == B_TRUE) {
3786 		(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
3787 	}
3788 }
3789 
3790 static void
3791 vgen_port_reset(vgen_port_t *portp)
3792 {
3793 	(void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER);
3794 }
3795 
3796 static void
3797 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
3798 {
3799 	vgen_port_t	*portp;
3800 	vgen_portlist_t	*plistp;
3801 
3802 	plistp = &(vgenp->vgenports);
3803 	READ_ENTER(&plistp->rwlock);
3804 
3805 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
3806 
3807 		vgen_vlan_unaware_port_reset(portp);
3808 
3809 	}
3810 
3811 	RW_EXIT(&plistp->rwlock);
3812 }
3813 
3814 static void
3815 vgen_reset_vsw_port(vgen_t *vgenp)
3816 {
3817 	vgen_port_t	*portp;
3818 
3819 	if ((portp = vgenp->vsw_portp) != NULL) {
3820 		vgen_port_reset(portp);
3821 	}
3822 }
3823 
3824 static void
3825 vgen_setup_handshake_params(vgen_ldc_t *ldcp)
3826 {
3827 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3828 
3829 	/*
3830 	 * clear local handshake params and initialize.
3831 	 */
3832 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3833 
3834 	/* set version to the highest version supported */
3835 	ldcp->local_hparams.ver_major =
3836 	    ldcp->vgen_versions[0].ver_major;
3837 	ldcp->local_hparams.ver_minor =
3838 	    ldcp->vgen_versions[0].ver_minor;
3839 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3840 
3841 	/* set attr_info params */
3842 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
3843 	ldcp->local_hparams.addr =
3844 	    vnet_macaddr_strtoul(vgenp->macaddr);
3845 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3846 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
3847 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3848 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
3849 
3850 	/* reset protocol version specific function pointers */
3851 	vgen_reset_vnet_proto_ops(ldcp);
3852 	ldcp->local_hparams.dring_ident = 0;
3853 	ldcp->local_hparams.dring_ready = B_FALSE;
3854 
3855 	/* clear peer_hparams */
3856 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3857 	ldcp->peer_hparams.dring_ready = B_FALSE;
3858 }
3859 
3860 /*
3861  * Process Channel Reset. We tear down the resources (timers, threads,
3862  * descriptor rings etc) associated with the channel and reinitialize the
3863  * channel based on the flags.
3864  *
3865  * Arguments:
3866  *    ldcp:	The channel being processed.
3867  *
3868  *    flags:
3869  *	VGEN_FLAG_EVT_RESET:
3870  *		A ECONNRESET error occured while doing ldc operations such as
3871  *		ldc_read() or ldc_write(); the channel is already reset and it
3872  *		needs to be handled.
3873  *	VGEN_FLAG_NEED_LDCRESET:
3874  *		Some other errors occured and the error handling code needs to
3875  *		explicitly reset the channel and restart handshake with the
3876  *		peer. The error could be either in ldc operations or other
3877  *		parts of the code such as timeouts or mdeg events etc.
3878  *	VGEN_FLAG_UNINIT:
3879  *		The channel is being torn down; no need to bring up the channel
3880  *		after resetting.
3881  */
3882 static int
3883 vgen_process_reset(vgen_ldc_t *ldcp, int flags)
3884 {
3885 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3886 	vgen_port_t	*portp = ldcp->portp;
3887 	vgen_hparams_t  *lp = &ldcp->local_hparams;
3888 	boolean_t	is_vsw_port = B_FALSE;
3889 	boolean_t	link_update = B_FALSE;
3890 	ldc_status_t	istatus;
3891 	int		rv;
3892 	uint_t		retries = 0;
3893 	timeout_id_t	htid = 0;
3894 	timeout_id_t	wd_tid = 0;
3895 
3896 	if (portp == vgenp->vsw_portp) { /* vswitch port ? */
3897 		is_vsw_port = B_TRUE;
3898 	}
3899 
3900 	/*
3901 	 * Report that the channel is being reset; it ensures that any HybridIO
3902 	 * configuration is torn down before we reset the channel if it is not
3903 	 * already reset (flags == VGEN_FLAG_NEED_LDCRESET).
3904 	 */
3905 	if (is_vsw_port == B_TRUE) {
3906 		vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err;
3907 		rep_err(portp->vhp, VIO_NET_RES_DOWN);
3908 	}
3909 
3910 again:
3911 	mutex_enter(&ldcp->cblock);
3912 
3913 	/* Clear hstate and hphase */
3914 	ldcp->hstate = 0;
3915 	ldcp->hphase = VH_PHASE0;
3916 	if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) {
3917 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3918 		(void) ldc_down(ldcp->ldc_handle);
3919 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3920 		DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus);
3921 		ldcp->ldc_status = istatus;
3922 
3923 		if (flags == VGEN_FLAG_UNINIT) {
3924 			/* disable further callbacks */
3925 			rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3926 			if (rv != 0) {
3927 				DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3928 			}
3929 		}
3930 
3931 	} else {
3932 		/* flags == VGEN_FLAG_EVT_RESET */
3933 		DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status);
3934 	}
3935 
3936 	/*
3937 	 * As the connection is now reset, mark the channel
3938 	 * link_state as 'down' and notify the stack if needed.
3939 	 */
3940 	if (ldcp->link_state != LINK_STATE_DOWN) {
3941 		ldcp->link_state = LINK_STATE_DOWN;
3942 
3943 		if (is_vsw_port == B_TRUE) { /* vswitch port ? */
3944 			/*
3945 			 * As the channel link is down, mark physical link also
3946 			 * as down. After the channel comes back up and
3947 			 * handshake completes, we will get an update on the
3948 			 * physlink state from vswitch (if this device has been
3949 			 * configured to get phys link updates).
3950 			 */
3951 			vgenp->phys_link_state = LINK_STATE_DOWN;
3952 			link_update = B_TRUE;
3953 
3954 		}
3955 	}
3956 
3957 	if (ldcp->htid != 0) {
3958 		htid = ldcp->htid;
3959 		ldcp->htid = 0;
3960 	}
3961 
3962 	if (ldcp->wd_tid != 0) {
3963 		wd_tid = ldcp->wd_tid;
3964 		ldcp->wd_tid = 0;
3965 	}
3966 
3967 	mutex_exit(&ldcp->cblock);
3968 
3969 	/* Update link state to the stack */
3970 	if (link_update == B_TRUE) {
3971 		vgen_link_update(vgenp, ldcp->link_state);
3972 	}
3973 
3974 	/*
3975 	 * As the channel is being reset, redirect traffic to the peer through
3976 	 * vswitch, until the channel becomes ready to be used again.
3977 	 */
3978 	if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) {
3979 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
3980 	}
3981 
3982 	/* Cancel handshake watchdog timeout */
3983 	if (htid) {
3984 		(void) untimeout(htid);
3985 	}
3986 
3987 	/* Cancel transmit watchdog timeout */
3988 	if (wd_tid) {
3989 		(void) untimeout(wd_tid);
3990 	}
3991 
3992 	/* Stop the msg worker thread */
3993 	if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) {
3994 		vgen_stop_msg_thread(ldcp);
3995 	}
3996 
3997 	/* Grab all locks while we tear down tx/rx resources */
3998 	LDC_LOCK(ldcp);
3999 
4000 	/* Destroy the local dring which is exported to the peer */
4001 	vgen_destroy_dring(ldcp);
4002 
4003 	/* Unmap the remote dring which is imported from the peer */
4004 	vgen_unmap_dring(ldcp);
4005 
4006 	/*
4007 	 * Bring up the channel and restart handshake
4008 	 * only if the channel is not being torn down.
4009 	 */
4010 	if (flags != VGEN_FLAG_UNINIT) {
4011 
4012 		/* Setup handshake parameters to restart a new handshake */
4013 		vgen_setup_handshake_params(ldcp);
4014 
4015 		/* Bring the channel up */
4016 		vgen_ldc_up(ldcp);
4017 
4018 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4019 			DWARN(vgenp, ldcp, "ldc_status err\n");
4020 		} else {
4021 			ldcp->ldc_status = istatus;
4022 		}
4023 
4024 		/* If the channel is UP, start handshake */
4025 		if (ldcp->ldc_status == LDC_UP) {
4026 
4027 			if (is_vsw_port == B_FALSE) {
4028 				/*
4029 				 * Channel is up; use this port from now on.
4030 				 */
4031 				(void) atomic_swap_32(&portp->use_vsw_port,
4032 				    B_FALSE);
4033 			}
4034 
4035 			/* Initialize local session id */
4036 			ldcp->local_sid = ddi_get_lbolt();
4037 
4038 			/* clear peer session id */
4039 			ldcp->peer_sid = 0;
4040 
4041 			/*
4042 			 * Initiate Handshake process with peer ldc endpoint by
4043 			 * sending version info vio message. If that fails we
4044 			 * go back to the top of this function to process the
4045 			 * error again. Note that we can be in this loop for
4046 			 * 'vgen_ldc_max_resets' times, after which the channel
4047 			 * is not brought up.
4048 			 */
4049 			mutex_exit(&ldcp->tclock);
4050 			mutex_exit(&ldcp->txlock);
4051 			mutex_exit(&ldcp->wrlock);
4052 			mutex_exit(&ldcp->rxlock);
4053 			rv = vgen_handshake(vh_nextphase(ldcp));
4054 			mutex_exit(&ldcp->cblock);
4055 			if (rv != 0) {
4056 				if (rv == ECONNRESET) {
4057 					flags = VGEN_FLAG_EVT_RESET;
4058 				} else {
4059 					flags = VGEN_FLAG_NEED_LDCRESET;
4060 				}
4061 
4062 				/*
4063 				 * We still hold 'reset_in_progress'; so we can
4064 				 * just loop back to the top to restart error
4065 				 * processing.
4066 				 */
4067 				goto again;
4068 			}
4069 		} else {
4070 			LDC_UNLOCK(ldcp);
4071 		}
4072 
4073 	} else {	/* flags == VGEN_FLAG_UNINIT */
4074 
4075 		/* Close the channel - retry on EAGAIN */
4076 		while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
4077 			if (++retries > vgen_ldccl_retries) {
4078 				break;
4079 			}
4080 			drv_usecwait(VGEN_LDC_CLOSE_DELAY);
4081 		}
4082 		if (rv != 0) {
4083 			cmn_err(CE_NOTE,
4084 			    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
4085 			    vgenp->instance, rv, ldcp->ldc_id);
4086 		}
4087 
4088 		ldcp->ldc_reset_count = 0;
4089 		ldcp->ldc_status = LDC_INIT;
4090 		ldcp->flags &= ~(CHANNEL_STARTED);
4091 
4092 		LDC_UNLOCK(ldcp);
4093 	}
4094 
4095 	/* Done processing channel reset; clear the atomic flag */
4096 	ldcp->reset_in_progress = 0;
4097 	return (0);
4098 }
4099 
4100 /*
4101  * Initiate handshake with the peer by sending various messages
4102  * based on the handshake-phase that the channel is currently in.
4103  */
4104 static int
4105 vgen_handshake(vgen_ldc_t *ldcp)
4106 {
4107 	uint32_t	hphase = ldcp->hphase;
4108 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4109 	int		rv = 0;
4110 	timeout_id_t	htid;
4111 
4112 	switch (hphase) {
4113 
4114 	case VH_PHASE1:
4115 
4116 		/*
4117 		 * start timer, for entire handshake process, turn this timer
4118 		 * off if all phases of handshake complete successfully and
4119 		 * hphase goes to VH_DONE(below) or channel is reset due to
4120 		 * errors or vgen_ldc_uninit() is invoked(vgen_stop).
4121 		 */
4122 		ASSERT(ldcp->htid == 0);
4123 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4124 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4125 
4126 		/* Phase 1 involves negotiating the version */
4127 		rv = vgen_send_version_negotiate(ldcp);
4128 		break;
4129 
4130 	case VH_PHASE2:
4131 		rv = vgen_handshake_phase2(ldcp);
4132 		break;
4133 
4134 	case VH_PHASE3:
4135 		rv = vgen_handshake_phase3(ldcp);
4136 		break;
4137 
4138 	case VH_PHASE4:
4139 		rv = vgen_send_rdx_info(ldcp);
4140 		break;
4141 
4142 	case VH_DONE:
4143 
4144 		ldcp->ldc_reset_count = 0;
4145 
4146 		DBG1(vgenp, ldcp, "Handshake Done\n");
4147 
4148 		/*
4149 		 * The channel is up and handshake is done successfully. Now we
4150 		 * can mark the channel link_state as 'up'. We also notify the
4151 		 * stack if the channel is connected to vswitch.
4152 		 */
4153 		ldcp->link_state = LINK_STATE_UP;
4154 
4155 		if (ldcp->portp == vgenp->vsw_portp) {
4156 			/*
4157 			 * If this channel(port) is connected to vsw,
4158 			 * need to sync multicast table with vsw.
4159 			 */
4160 			rv = vgen_send_mcast_info(ldcp);
4161 			if (rv != VGEN_SUCCESS)
4162 				break;
4163 
4164 			if (vgenp->pls_negotiated == B_FALSE) {
4165 				/*
4166 				 * We haven't negotiated with vswitch to get
4167 				 * physical link state updates. We can update
4168 				 * update the stack at this point as the
4169 				 * channel to vswitch is up and the handshake
4170 				 * is done successfully.
4171 				 *
4172 				 * If we have negotiated to get physical link
4173 				 * state updates, then we won't notify the
4174 				 * the stack here; we do that as soon as
4175 				 * vswitch sends us the initial phys link state
4176 				 * (see vgen_handle_physlink_info()).
4177 				 */
4178 				mutex_exit(&ldcp->cblock);
4179 				vgen_link_update(vgenp, ldcp->link_state);
4180 				mutex_enter(&ldcp->cblock);
4181 			}
4182 		}
4183 
4184 		if (ldcp->htid != 0) {
4185 			htid = ldcp->htid;
4186 			ldcp->htid = 0;
4187 
4188 			mutex_exit(&ldcp->cblock);
4189 			(void) untimeout(htid);
4190 			mutex_enter(&ldcp->cblock);
4191 		}
4192 
4193 		/*
4194 		 * Check if mac layer should be notified to restart
4195 		 * transmissions. This can happen if the channel got
4196 		 * reset and while tx_blocked is set.
4197 		 */
4198 		mutex_enter(&ldcp->tclock);
4199 		if (ldcp->tx_blocked) {
4200 			vio_net_tx_update_t vtx_update =
4201 			    ldcp->portp->vcb.vio_net_tx_update;
4202 
4203 			ldcp->tx_blocked = B_FALSE;
4204 			vtx_update(ldcp->portp->vhp);
4205 		}
4206 		mutex_exit(&ldcp->tclock);
4207 
4208 		/* start transmit watchdog timer */
4209 		ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
4210 		    drv_usectohz(vgen_txwd_interval * 1000));
4211 
4212 		break;
4213 
4214 	default:
4215 		break;
4216 	}
4217 
4218 	return (rv);
4219 }
4220 
4221 /*
4222  * Check if the current handshake phase has completed successfully and
4223  * return the status.
4224  */
4225 static int
4226 vgen_handshake_done(vgen_ldc_t *ldcp)
4227 {
4228 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4229 	uint32_t	hphase = ldcp->hphase;
4230 	int 		status = 0;
4231 
4232 	switch (hphase) {
4233 
4234 	case VH_PHASE1:
4235 		/*
4236 		 * Phase1 is done, if version negotiation
4237 		 * completed successfully.
4238 		 */
4239 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4240 		    VER_NEGOTIATED);
4241 		break;
4242 
4243 	case VH_PHASE2:
4244 		/*
4245 		 * Phase 2 is done, if attr info
4246 		 * has been exchanged successfully.
4247 		 */
4248 		status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4249 		    ATTR_INFO_EXCHANGED);
4250 		break;
4251 
4252 	case VH_PHASE3:
4253 		/*
4254 		 * Phase 3 is done, if dring registration
4255 		 * has been exchanged successfully.
4256 		 */
4257 		status = ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4258 		    DRING_INFO_EXCHANGED);
4259 		break;
4260 
4261 	case VH_PHASE4:
4262 		/* Phase 4 is done, if rdx msg has been exchanged */
4263 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4264 		    RDX_EXCHANGED);
4265 		break;
4266 
4267 	default:
4268 		break;
4269 	}
4270 
4271 	if (status == 0) {
4272 		return (VGEN_FAILURE);
4273 	}
4274 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4275 	return (VGEN_SUCCESS);
4276 }
4277 
4278 /*
4279  * Link State Update Notes:
4280  * The link state of the channel connected to vswitch is reported as the link
4281  * state of the vnet device, by default. If the channel is down or reset, then
4282  * the link state is marked 'down'. If the channel is 'up' *and* handshake
4283  * between the vnet and vswitch is successful, then the link state is marked
4284  * 'up'. If physical network link state is desired, then the vnet device must
4285  * be configured to get physical link updates and the 'linkprop' property
4286  * in the virtual-device MD node indicates this. As part of attribute exchange
4287  * the vnet device negotiates with the vswitch to obtain physical link state
4288  * updates. If it successfully negotiates, vswitch sends an initial physlink
4289  * msg once the handshake is done and further whenever the physical link state
4290  * changes. Currently we don't have mac layer interfaces to report two distinct
4291  * link states - virtual and physical. Thus, if the vnet has been configured to
4292  * get physical link updates, then the link status will be reported as 'up'
4293  * only when both the virtual and physical links are up.
4294  */
4295 static void
4296 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
4297 {
4298 	vnet_link_update(vgenp->vnetp, link_state);
4299 }
4300 
4301 /*
4302  * Handle a version info msg from the peer or an ACK/NACK from the peer
4303  * to a version info msg that we sent.
4304  */
4305 static int
4306 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4307 {
4308 	vgen_t		*vgenp;
4309 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4310 	int		ack = 0;
4311 	int		failed = 0;
4312 	int		idx;
4313 	vgen_ver_t	*versions = ldcp->vgen_versions;
4314 	int		rv = 0;
4315 
4316 	vgenp = LDC_TO_VGEN(ldcp);
4317 	DBG1(vgenp, ldcp, "enter\n");
4318 	switch (tagp->vio_subtype) {
4319 	case VIO_SUBTYPE_INFO:
4320 
4321 		/*  Cache sid of peer if this is the first time */
4322 		if (ldcp->peer_sid == 0) {
4323 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4324 			    tagp->vio_sid);
4325 			ldcp->peer_sid = tagp->vio_sid;
4326 		}
4327 
4328 		if (ldcp->hphase != VH_PHASE1) {
4329 			/*
4330 			 * If we are not already in VH_PHASE1, reset to
4331 			 * pre-handshake state, and initiate handshake
4332 			 * to the peer too.
4333 			 */
4334 			return (EINVAL);
4335 		}
4336 
4337 		ldcp->hstate |= VER_INFO_RCVD;
4338 
4339 		/* save peer's requested values */
4340 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4341 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4342 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4343 
4344 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4345 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4346 			/* unsupported dev_class, send NACK */
4347 
4348 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4349 
4350 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4351 			tagp->vio_sid = ldcp->local_sid;
4352 			/* send reply msg back to peer */
4353 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4354 			    sizeof (*vermsg), B_FALSE);
4355 			if (rv != VGEN_SUCCESS) {
4356 				return (rv);
4357 			}
4358 			return (VGEN_FAILURE);
4359 		}
4360 
4361 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4362 		    vermsg->ver_major,  vermsg->ver_minor);
4363 
4364 		idx = 0;
4365 
4366 		for (;;) {
4367 
4368 			if (vermsg->ver_major > versions[idx].ver_major) {
4369 
4370 				/* nack with next lower version */
4371 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4372 				vermsg->ver_major = versions[idx].ver_major;
4373 				vermsg->ver_minor = versions[idx].ver_minor;
4374 				break;
4375 			}
4376 
4377 			if (vermsg->ver_major == versions[idx].ver_major) {
4378 
4379 				/* major version match - ACK version */
4380 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4381 				ack = 1;
4382 
4383 				/*
4384 				 * lower minor version to the one this endpt
4385 				 * supports, if necessary
4386 				 */
4387 				if (vermsg->ver_minor >
4388 				    versions[idx].ver_minor) {
4389 					vermsg->ver_minor =
4390 					    versions[idx].ver_minor;
4391 					ldcp->peer_hparams.ver_minor =
4392 					    versions[idx].ver_minor;
4393 				}
4394 				break;
4395 			}
4396 
4397 			idx++;
4398 
4399 			if (idx == VGEN_NUM_VER) {
4400 
4401 				/* no version match - send NACK */
4402 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4403 				vermsg->ver_major = 0;
4404 				vermsg->ver_minor = 0;
4405 				failed = 1;
4406 				break;
4407 			}
4408 
4409 		}
4410 
4411 		tagp->vio_sid = ldcp->local_sid;
4412 
4413 		/* send reply msg back to peer */
4414 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4415 		    B_FALSE);
4416 		if (rv != VGEN_SUCCESS) {
4417 			return (rv);
4418 		}
4419 
4420 		if (ack) {
4421 			ldcp->hstate |= VER_ACK_SENT;
4422 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4423 			    vermsg->ver_major, vermsg->ver_minor);
4424 		}
4425 		if (failed) {
4426 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4427 			return (VGEN_FAILURE);
4428 		}
4429 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4430 
4431 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4432 
4433 			/* local and peer versions match? */
4434 			ASSERT((ldcp->local_hparams.ver_major ==
4435 			    ldcp->peer_hparams.ver_major) &&
4436 			    (ldcp->local_hparams.ver_minor ==
4437 			    ldcp->peer_hparams.ver_minor));
4438 
4439 			vgen_set_vnet_proto_ops(ldcp);
4440 
4441 			/* move to the next phase */
4442 			rv = vgen_handshake(vh_nextphase(ldcp));
4443 			if (rv != 0) {
4444 				return (rv);
4445 			}
4446 		}
4447 
4448 		break;
4449 
4450 	case VIO_SUBTYPE_ACK:
4451 
4452 		if (ldcp->hphase != VH_PHASE1) {
4453 			/*  This should not happen. */
4454 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4455 			return (VGEN_FAILURE);
4456 		}
4457 
4458 		/* SUCCESS - we have agreed on a version */
4459 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4460 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4461 		ldcp->hstate |= VER_ACK_RCVD;
4462 
4463 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4464 		    vermsg->ver_major,  vermsg->ver_minor);
4465 
4466 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4467 
4468 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4469 
4470 			/* local and peer versions match? */
4471 			ASSERT((ldcp->local_hparams.ver_major ==
4472 			    ldcp->peer_hparams.ver_major) &&
4473 			    (ldcp->local_hparams.ver_minor ==
4474 			    ldcp->peer_hparams.ver_minor));
4475 
4476 			vgen_set_vnet_proto_ops(ldcp);
4477 
4478 			/* move to the next phase */
4479 			rv = vgen_handshake(vh_nextphase(ldcp));
4480 			if (rv != 0) {
4481 				return (rv);
4482 			}
4483 		}
4484 		break;
4485 
4486 	case VIO_SUBTYPE_NACK:
4487 
4488 		if (ldcp->hphase != VH_PHASE1) {
4489 			/*  This should not happen.  */
4490 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4491 			"Phase(%u)\n", ldcp->hphase);
4492 			return (VGEN_FAILURE);
4493 		}
4494 
4495 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4496 		    vermsg->ver_major, vermsg->ver_minor);
4497 
4498 		/* check if version in NACK is zero */
4499 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4500 			/*
4501 			 * Version Negotiation has failed.
4502 			 */
4503 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4504 			return (VGEN_FAILURE);
4505 		}
4506 
4507 		idx = 0;
4508 
4509 		for (;;) {
4510 
4511 			if (vermsg->ver_major > versions[idx].ver_major) {
4512 				/* select next lower version */
4513 
4514 				ldcp->local_hparams.ver_major =
4515 				    versions[idx].ver_major;
4516 				ldcp->local_hparams.ver_minor =
4517 				    versions[idx].ver_minor;
4518 				break;
4519 			}
4520 
4521 			if (vermsg->ver_major == versions[idx].ver_major) {
4522 				/* major version match */
4523 
4524 				ldcp->local_hparams.ver_major =
4525 				    versions[idx].ver_major;
4526 
4527 				ldcp->local_hparams.ver_minor =
4528 				    versions[idx].ver_minor;
4529 				break;
4530 			}
4531 
4532 			idx++;
4533 
4534 			if (idx == VGEN_NUM_VER) {
4535 				/*
4536 				 * no version match.
4537 				 * Version Negotiation has failed.
4538 				 */
4539 				DWARN(vgenp, ldcp,
4540 				    "Version Negotiation Failed\n");
4541 				return (VGEN_FAILURE);
4542 			}
4543 
4544 		}
4545 
4546 		rv = vgen_send_version_negotiate(ldcp);
4547 		if (rv != VGEN_SUCCESS) {
4548 			return (rv);
4549 		}
4550 
4551 		break;
4552 	}
4553 
4554 	DBG1(vgenp, ldcp, "exit\n");
4555 	return (VGEN_SUCCESS);
4556 }
4557 
4558 static int
4559 vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4560 {
4561 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4562 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4563 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
4564 	uint32_t	mtu;
4565 	uint8_t		dring_mode;
4566 
4567 	ldcp->hstate |= ATTR_INFO_RCVD;
4568 
4569 	/* save peer's values */
4570 	rp->mtu = msg->mtu;
4571 	rp->addr = msg->addr;
4572 	rp->addr_type = msg->addr_type;
4573 	rp->xfer_mode = msg->xfer_mode;
4574 	rp->ack_freq = msg->ack_freq;
4575 	rp->dring_mode = msg->options;
4576 
4577 	/*
4578 	 * Process address type, ack frequency and transfer mode attributes.
4579 	 */
4580 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
4581 	    (msg->ack_freq > 64) ||
4582 	    (msg->xfer_mode != lp->xfer_mode)) {
4583 		return (VGEN_FAILURE);
4584 	}
4585 
4586 	/*
4587 	 * Process dring mode attribute.
4588 	 */
4589 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4590 		/*
4591 		 * Versions >= 1.6:
4592 		 * Though we are operating in v1.6 mode, it is possible that
4593 		 * RxDringData mode has been disabled either on this guest or
4594 		 * on the peer guest. If so, we revert to pre v1.6 behavior of
4595 		 * TxDring mode. But this must be agreed upon in both
4596 		 * directions of attr exchange. We first determine the mode
4597 		 * that can be negotiated.
4598 		 */
4599 		if ((msg->options & VIO_RX_DRING_DATA) != 0 &&
4600 		    vgen_mapin_avail(ldcp) == B_TRUE) {
4601 			/*
4602 			 * We are capable of handling RxDringData AND the peer
4603 			 * is also capable of it; we enable RxDringData mode on
4604 			 * this channel.
4605 			 */
4606 			dring_mode = VIO_RX_DRING_DATA;
4607 		} else if ((msg->options & VIO_TX_DRING) != 0) {
4608 			/*
4609 			 * If the peer is capable of TxDring mode, we
4610 			 * negotiate TxDring mode on this channel.
4611 			 */
4612 			dring_mode = VIO_TX_DRING;
4613 		} else {
4614 			/*
4615 			 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA
4616 			 * modes. We don't support VIO_RX_DRING mode.
4617 			 */
4618 			return (VGEN_FAILURE);
4619 		}
4620 
4621 		/*
4622 		 * If we have received an ack for the attr info that we sent,
4623 		 * then check if the dring mode matches what the peer had ack'd
4624 		 * (saved in local hparams). If they don't match, we fail the
4625 		 * handshake.
4626 		 */
4627 		if (ldcp->hstate & ATTR_ACK_RCVD) {
4628 			if (msg->options != lp->dring_mode) {
4629 				/* send NACK */
4630 				return (VGEN_FAILURE);
4631 			}
4632 		} else {
4633 			/*
4634 			 * Save the negotiated dring mode in our attr
4635 			 * parameters, so it gets sent in the attr info from us
4636 			 * to the peer.
4637 			 */
4638 			lp->dring_mode = dring_mode;
4639 		}
4640 
4641 		/* save the negotiated dring mode in the msg to be replied */
4642 		msg->options = dring_mode;
4643 	}
4644 
4645 	/*
4646 	 * Process MTU attribute.
4647 	 */
4648 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4649 		/*
4650 		 * Versions >= 1.4:
4651 		 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu
4652 		 * is negotiated down to the minimum of our mtu and peer's mtu.
4653 		 */
4654 		if (msg->mtu < ETHERMAX) {
4655 			return (VGEN_FAILURE);
4656 		}
4657 
4658 		mtu = MIN(msg->mtu, vgenp->max_frame_size);
4659 
4660 		/*
4661 		 * If we have received an ack for the attr info
4662 		 * that we sent, then check if the mtu computed
4663 		 * above matches the mtu that the peer had ack'd
4664 		 * (saved in local hparams). If they don't
4665 		 * match, we fail the handshake.
4666 		 */
4667 		if (ldcp->hstate & ATTR_ACK_RCVD) {
4668 			if (mtu != lp->mtu) {
4669 				/* send NACK */
4670 				return (VGEN_FAILURE);
4671 			}
4672 		} else {
4673 			/*
4674 			 * Save the mtu computed above in our
4675 			 * attr parameters, so it gets sent in
4676 			 * the attr info from us to the peer.
4677 			 */
4678 			lp->mtu = mtu;
4679 		}
4680 
4681 		/* save the MIN mtu in the msg to be replied */
4682 		msg->mtu = mtu;
4683 
4684 	} else {
4685 		/* versions < 1.4, mtu must match */
4686 		if (msg->mtu != lp->mtu) {
4687 			return (VGEN_FAILURE);
4688 		}
4689 	}
4690 
4691 	return (VGEN_SUCCESS);
4692 }
4693 
4694 static int
4695 vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4696 {
4697 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4698 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4699 
4700 	/*
4701 	 * Process dring mode attribute.
4702 	 */
4703 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4704 		/*
4705 		 * Versions >= 1.6:
4706 		 * The ack msg sent by the peer contains the negotiated dring
4707 		 * mode between our capability (that we had sent in our attr
4708 		 * info) and the peer's capability.
4709 		 */
4710 		if (ldcp->hstate & ATTR_ACK_SENT) {
4711 			/*
4712 			 * If we have sent an ack for the attr info msg from
4713 			 * the peer, check if the dring mode that was
4714 			 * negotiated then (saved in local hparams) matches the
4715 			 * mode that the peer has ack'd. If they don't match,
4716 			 * we fail the handshake.
4717 			 */
4718 			if (lp->dring_mode != msg->options) {
4719 				return (VGEN_FAILURE);
4720 			}
4721 		} else {
4722 			if ((msg->options & lp->dring_mode) == 0) {
4723 				/*
4724 				 * Peer ack'd with a mode that we don't
4725 				 * support; we fail the handshake.
4726 				 */
4727 				return (VGEN_FAILURE);
4728 			}
4729 			if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA))
4730 			    == (VIO_TX_DRING|VIO_RX_DRING_DATA)) {
4731 				/*
4732 				 * Peer must ack with only one negotiated mode.
4733 				 * Otherwise fail handshake.
4734 				 */
4735 				return (VGEN_FAILURE);
4736 			}
4737 
4738 			/*
4739 			 * Save the negotiated mode, so we can validate it when
4740 			 * we receive attr info from the peer.
4741 			 */
4742 			lp->dring_mode = msg->options;
4743 		}
4744 	}
4745 
4746 	/*
4747 	 * Process Physical Link Update attribute.
4748 	 */
4749 	if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
4750 	    ldcp->portp == vgenp->vsw_portp) {
4751 		/*
4752 		 * Versions >= 1.5:
4753 		 * If the vnet device has been configured to get
4754 		 * physical link state updates, check the corresponding
4755 		 * bits in the ack msg, if the peer is vswitch.
4756 		 */
4757 		if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4758 		    PHYSLINK_UPDATE_STATE) &&
4759 		    ((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4760 		    PHYSLINK_UPDATE_STATE_ACK)) {
4761 			vgenp->pls_negotiated = B_TRUE;
4762 		} else {
4763 			vgenp->pls_negotiated = B_FALSE;
4764 		}
4765 	}
4766 
4767 	/*
4768 	 * Process MTU attribute.
4769 	 */
4770 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4771 		/*
4772 		 * Versions >= 1.4:
4773 		 * The ack msg sent by the peer contains the minimum of
4774 		 * our mtu (that we had sent in our attr info) and the
4775 		 * peer's mtu.
4776 		 *
4777 		 * If we have sent an ack for the attr info msg from
4778 		 * the peer, check if the mtu that was computed then
4779 		 * (saved in local hparams) matches the mtu that the
4780 		 * peer has ack'd. If they don't match, we fail the
4781 		 * handshake.
4782 		 */
4783 		if (ldcp->hstate & ATTR_ACK_SENT) {
4784 			if (lp->mtu != msg->mtu) {
4785 				return (VGEN_FAILURE);
4786 			}
4787 		} else {
4788 			/*
4789 			 * If the mtu ack'd by the peer is > our mtu
4790 			 * fail handshake. Otherwise, save the mtu, so
4791 			 * we can validate it when we receive attr info
4792 			 * from our peer.
4793 			 */
4794 			if (msg->mtu > lp->mtu) {
4795 				return (VGEN_FAILURE);
4796 			}
4797 			if (msg->mtu <= lp->mtu) {
4798 				lp->mtu = msg->mtu;
4799 			}
4800 		}
4801 	}
4802 
4803 	return (VGEN_SUCCESS);
4804 }
4805 
4806 
4807 /*
4808  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4809  * to an attr info msg that we sent.
4810  */
4811 static int
4812 vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4813 {
4814 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4815 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
4816 	int		rv = 0;
4817 
4818 	DBG1(vgenp, ldcp, "enter\n");
4819 	if (ldcp->hphase != VH_PHASE2) {
4820 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4821 		" Invalid Phase(%u)\n",
4822 		    tagp->vio_subtype, ldcp->hphase);
4823 		return (VGEN_FAILURE);
4824 	}
4825 	switch (tagp->vio_subtype) {
4826 	case VIO_SUBTYPE_INFO:
4827 
4828 		rv = vgen_handle_attr_info(ldcp, msg);
4829 		if (rv == VGEN_SUCCESS) {
4830 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4831 		} else {
4832 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4833 		}
4834 		tagp->vio_sid = ldcp->local_sid;
4835 
4836 		/* send reply msg back to peer */
4837 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4838 		    B_FALSE);
4839 		if (rv != VGEN_SUCCESS) {
4840 			return (rv);
4841 		}
4842 
4843 		if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
4844 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT");
4845 			break;
4846 		}
4847 
4848 		ldcp->hstate |= ATTR_ACK_SENT;
4849 		DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4850 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4851 			rv = vgen_handshake(vh_nextphase(ldcp));
4852 			if (rv != 0) {
4853 				return (rv);
4854 			}
4855 		}
4856 
4857 		break;
4858 
4859 	case VIO_SUBTYPE_ACK:
4860 
4861 		rv = vgen_handle_attr_ack(ldcp, msg);
4862 		if (rv == VGEN_FAILURE) {
4863 			break;
4864 		}
4865 
4866 		ldcp->hstate |= ATTR_ACK_RCVD;
4867 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4868 
4869 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4870 			rv = vgen_handshake(vh_nextphase(ldcp));
4871 			if (rv != 0) {
4872 				return (rv);
4873 			}
4874 		}
4875 		break;
4876 
4877 	case VIO_SUBTYPE_NACK:
4878 
4879 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4880 		return (VGEN_FAILURE);
4881 	}
4882 	DBG1(vgenp, ldcp, "exit\n");
4883 	return (VGEN_SUCCESS);
4884 }
4885 
4886 static int
4887 vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4888 {
4889 	int		rv = 0;
4890 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4891 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4892 
4893 	DBG2(vgenp, ldcp, "DRING_INFO_RCVD");
4894 	ldcp->hstate |= DRING_INFO_RCVD;
4895 
4896 	if (VGEN_VER_GTEQ(ldcp, 1, 6) &&
4897 	    (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) {
4898 		/*
4899 		 * The earlier version of Solaris vnet driver doesn't set the
4900 		 * option (VIO_TX_DRING in its case) correctly in its dring reg
4901 		 * message. We workaround that here by doing the check only
4902 		 * for versions >= v1.6.
4903 		 */
4904 		DWARN(vgenp, ldcp,
4905 		    "Rcvd dring reg option (%d), negotiated mode (%d)\n",
4906 		    ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode);
4907 		return (VGEN_FAILURE);
4908 	}
4909 
4910 	/*
4911 	 * Map dring exported by the peer.
4912 	 */
4913 	rv = vgen_map_dring(ldcp, (void *)tagp);
4914 	if (rv != VGEN_SUCCESS) {
4915 		return (rv);
4916 	}
4917 
4918 	/*
4919 	 * Map data buffers exported by the peer if we are in RxDringData mode.
4920 	 */
4921 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
4922 		rv = vgen_map_data(ldcp, (void *)tagp);
4923 		if (rv != VGEN_SUCCESS) {
4924 			vgen_unmap_dring(ldcp);
4925 			return (rv);
4926 		}
4927 	}
4928 
4929 	if (ldcp->peer_hparams.dring_ready == B_FALSE) {
4930 		ldcp->peer_hparams.dring_ready = B_TRUE;
4931 	}
4932 
4933 	return (VGEN_SUCCESS);
4934 }
4935 
4936 static int
4937 vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4938 {
4939 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4940 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4941 
4942 	DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
4943 	ldcp->hstate |= DRING_ACK_RCVD;
4944 
4945 	if (lp->dring_ready) {
4946 		return (VGEN_SUCCESS);
4947 	}
4948 
4949 	/* save dring_ident acked by peer */
4950 	lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident;
4951 
4952 	/* local dring is now ready */
4953 	lp->dring_ready = B_TRUE;
4954 
4955 	return (VGEN_SUCCESS);
4956 }
4957 
4958 /*
4959  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4960  * the peer to a dring register msg that we sent.
4961  */
4962 static int
4963 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4964 {
4965 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4966 	int		rv = 0;
4967 	int		msgsize;
4968 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4969 
4970 	DBG1(vgenp, ldcp, "enter\n");
4971 	if (ldcp->hphase < VH_PHASE2) {
4972 		/* dring_info can be rcvd in any of the phases after Phase1 */
4973 		DWARN(vgenp, ldcp,
4974 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4975 		    tagp->vio_subtype, ldcp->hphase);
4976 		return (VGEN_FAILURE);
4977 	}
4978 
4979 	switch (tagp->vio_subtype) {
4980 	case VIO_SUBTYPE_INFO:
4981 
4982 		rv = vgen_handle_dring_reg_info(ldcp, tagp);
4983 		if (rv == VGEN_SUCCESS) {
4984 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4985 		} else {
4986 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4987 		}
4988 
4989 		tagp->vio_sid = ldcp->local_sid;
4990 
4991 		if (lp->dring_mode == VIO_RX_DRING_DATA) {
4992 			msgsize =
4993 			    VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies);
4994 		} else {
4995 			msgsize = sizeof (vio_dring_reg_msg_t);
4996 		}
4997 
4998 		/* send reply msg back to peer */
4999 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize,
5000 		    B_FALSE);
5001 		if (rv != VGEN_SUCCESS) {
5002 			return (rv);
5003 		}
5004 
5005 		if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
5006 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5007 			return (VGEN_FAILURE);
5008 		}
5009 
5010 		ldcp->hstate |= DRING_ACK_SENT;
5011 		DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5012 
5013 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5014 			rv = vgen_handshake(vh_nextphase(ldcp));
5015 			if (rv != 0) {
5016 				return (rv);
5017 			}
5018 		}
5019 		break;
5020 
5021 	case VIO_SUBTYPE_ACK:
5022 
5023 		rv = vgen_handle_dring_reg_ack(ldcp, tagp);
5024 		if (rv == VGEN_FAILURE) {
5025 			return (rv);
5026 		}
5027 
5028 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5029 			rv = vgen_handshake(vh_nextphase(ldcp));
5030 			if (rv != 0) {
5031 				return (rv);
5032 			}
5033 		}
5034 
5035 		break;
5036 
5037 	case VIO_SUBTYPE_NACK:
5038 
5039 		DWARN(vgenp, ldcp, "DRING_NACK_RCVD");
5040 		return (VGEN_FAILURE);
5041 	}
5042 	DBG1(vgenp, ldcp, "exit\n");
5043 	return (VGEN_SUCCESS);
5044 }
5045 
5046 /*
5047  * Handle a rdx info msg from the peer or an ACK/NACK
5048  * from the peer to a rdx info msg that we sent.
5049  */
5050 static int
5051 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5052 {
5053 	int	rv = 0;
5054 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5055 
5056 	DBG1(vgenp, ldcp, "enter\n");
5057 	if (ldcp->hphase != VH_PHASE4) {
5058 		DWARN(vgenp, ldcp,
5059 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5060 		    tagp->vio_subtype, ldcp->hphase);
5061 		return (VGEN_FAILURE);
5062 	}
5063 	switch (tagp->vio_subtype) {
5064 	case VIO_SUBTYPE_INFO:
5065 
5066 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5067 		ldcp->hstate |= RDX_INFO_RCVD;
5068 
5069 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5070 		tagp->vio_sid = ldcp->local_sid;
5071 		/* send reply msg back to peer */
5072 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5073 		    B_FALSE);
5074 		if (rv != VGEN_SUCCESS) {
5075 			return (rv);
5076 		}
5077 
5078 		ldcp->hstate |= RDX_ACK_SENT;
5079 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5080 
5081 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5082 			rv = vgen_handshake(vh_nextphase(ldcp));
5083 			if (rv != 0) {
5084 				return (rv);
5085 			}
5086 		}
5087 
5088 		break;
5089 
5090 	case VIO_SUBTYPE_ACK:
5091 
5092 		ldcp->hstate |= RDX_ACK_RCVD;
5093 
5094 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5095 
5096 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5097 			rv = vgen_handshake(vh_nextphase(ldcp));
5098 			if (rv != 0) {
5099 				return (rv);
5100 			}
5101 		}
5102 		break;
5103 
5104 	case VIO_SUBTYPE_NACK:
5105 
5106 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5107 		return (VGEN_FAILURE);
5108 	}
5109 	DBG1(vgenp, ldcp, "exit\n");
5110 	return (VGEN_SUCCESS);
5111 }
5112 
5113 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5114 static int
5115 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5116 {
5117 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5118 	vnet_mcast_msg_t	*msgp = (vnet_mcast_msg_t *)tagp;
5119 	struct ether_addr	*addrp;
5120 	int			count;
5121 	int			i;
5122 
5123 	DBG1(vgenp, ldcp, "enter\n");
5124 	switch (tagp->vio_subtype) {
5125 
5126 	case VIO_SUBTYPE_INFO:
5127 
5128 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5129 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5130 		break;
5131 
5132 	case VIO_SUBTYPE_ACK:
5133 
5134 		/* success adding/removing multicast addr */
5135 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5136 		break;
5137 
5138 	case VIO_SUBTYPE_NACK:
5139 
5140 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5141 		if (!(msgp->set)) {
5142 			/* multicast remove request failed */
5143 			break;
5144 		}
5145 
5146 		/* multicast add request failed */
5147 		for (count = 0; count < msgp->count; count++) {
5148 			addrp = &(msgp->mca[count]);
5149 
5150 			/* delete address from the table */
5151 			for (i = 0; i < vgenp->mccount; i++) {
5152 				if (ether_cmp(addrp,
5153 				    &(vgenp->mctab[i])) == 0) {
5154 					if (vgenp->mccount > 1) {
5155 						int t = vgenp->mccount - 1;
5156 						vgenp->mctab[i] =
5157 						    vgenp->mctab[t];
5158 					}
5159 					vgenp->mccount--;
5160 					break;
5161 				}
5162 			}
5163 		}
5164 		break;
5165 
5166 	}
5167 	DBG1(vgenp, ldcp, "exit\n");
5168 
5169 	return (VGEN_SUCCESS);
5170 }
5171 
5172 /*
5173  * Physical link information message from the peer. Only vswitch should send
5174  * us this message; if the vnet device has been configured to get physical link
5175  * state updates. Note that we must have already negotiated this with the
5176  * vswitch during attribute exchange phase of handshake.
5177  */
5178 static int
5179 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5180 {
5181 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5182 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5183 	link_state_t		link_state;
5184 	int			rv;
5185 
5186 	if (ldcp->portp != vgenp->vsw_portp) {
5187 		/*
5188 		 * drop the message and don't process; as we should
5189 		 * receive physlink_info message from only vswitch.
5190 		 */
5191 		return (VGEN_SUCCESS);
5192 	}
5193 
5194 	if (vgenp->pls_negotiated == B_FALSE) {
5195 		/*
5196 		 * drop the message and don't process; as we should receive
5197 		 * physlink_info message only if physlink update is enabled for
5198 		 * the device and negotiated with vswitch.
5199 		 */
5200 		return (VGEN_SUCCESS);
5201 	}
5202 
5203 	switch (tagp->vio_subtype) {
5204 
5205 	case VIO_SUBTYPE_INFO:
5206 
5207 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5208 		    VNET_PHYSLINK_STATE_UP) {
5209 			link_state = LINK_STATE_UP;
5210 		} else {
5211 			link_state = LINK_STATE_DOWN;
5212 		}
5213 
5214 		if (vgenp->phys_link_state != link_state) {
5215 			vgenp->phys_link_state = link_state;
5216 			mutex_exit(&ldcp->cblock);
5217 
5218 			/* Now update the stack */
5219 			vgen_link_update(vgenp, link_state);
5220 
5221 			mutex_enter(&ldcp->cblock);
5222 		}
5223 
5224 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5225 		tagp->vio_sid = ldcp->local_sid;
5226 
5227 		/* send reply msg back to peer */
5228 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5229 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5230 		if (rv != VGEN_SUCCESS) {
5231 			return (rv);
5232 		}
5233 		break;
5234 
5235 	case VIO_SUBTYPE_ACK:
5236 
5237 		/* vnet shouldn't recv physlink acks */
5238 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5239 		break;
5240 
5241 	case VIO_SUBTYPE_NACK:
5242 
5243 		/* vnet shouldn't recv physlink nacks */
5244 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5245 		break;
5246 
5247 	}
5248 	DBG1(vgenp, ldcp, "exit\n");
5249 
5250 	return (VGEN_SUCCESS);
5251 }
5252 
5253 /* handler for control messages received from the peer ldc end-point */
5254 static int
5255 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5256 {
5257 	int	rv = 0;
5258 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5259 
5260 	DBG1(vgenp, ldcp, "enter\n");
5261 	switch (tagp->vio_subtype_env) {
5262 
5263 	case VIO_VER_INFO:
5264 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5265 		break;
5266 
5267 	case VIO_ATTR_INFO:
5268 		rv = vgen_handle_attr_msg(ldcp, tagp);
5269 		break;
5270 
5271 	case VIO_DRING_REG:
5272 		rv = vgen_handle_dring_reg(ldcp, tagp);
5273 		break;
5274 
5275 	case VIO_RDX:
5276 		rv = vgen_handle_rdx_info(ldcp, tagp);
5277 		break;
5278 
5279 	case VNET_MCAST_INFO:
5280 		rv = vgen_handle_mcast_info(ldcp, tagp);
5281 		break;
5282 
5283 	case VIO_DDS_INFO:
5284 		/*
5285 		 * If we are in the process of resetting the vswitch channel,
5286 		 * drop the dds message. A new handshake will be initiated
5287 		 * when the channel comes back up after the reset and dds
5288 		 * negotiation can then continue.
5289 		 */
5290 		if (ldcp->reset_in_progress == 1) {
5291 			break;
5292 		}
5293 		rv = vgen_dds_rx(ldcp, tagp);
5294 		break;
5295 
5296 	case VNET_PHYSLINK_INFO:
5297 		rv = vgen_handle_physlink_info(ldcp, tagp);
5298 		break;
5299 	}
5300 
5301 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5302 	return (rv);
5303 }
5304 
5305 /* handler for error messages received from the peer ldc end-point */
5306 static void
5307 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5308 {
5309 	_NOTE(ARGUNUSED(ldcp, tagp))
5310 }
5311 
5312 /*
5313  * This function handles raw pkt data messages received over the channel.
5314  * Currently, only priority-eth-type frames are received through this mechanism.
5315  * In this case, the frame(data) is present within the message itself which
5316  * is copied into an mblk before sending it up the stack.
5317  */
5318 void
5319 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5320 {
5321 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5322 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5323 	uint32_t		size;
5324 	mblk_t			*mp;
5325 	vio_mblk_t		*vmp;
5326 	vio_net_rx_cb_t		vrx_cb = NULL;
5327 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5328 	vgen_stats_t		*statsp = &ldcp->stats;
5329 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5330 	uint_t			dring_mode = lp->dring_mode;
5331 
5332 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5333 
5334 	mutex_exit(&ldcp->cblock);
5335 
5336 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5337 	if (size < ETHERMIN || size > lp->mtu) {
5338 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5339 		mutex_enter(&ldcp->cblock);
5340 		return;
5341 	}
5342 
5343 	vmp = vio_multipool_allocb(&ldcp->vmp, size);
5344 	if (vmp == NULL) {
5345 		mp = allocb(size, BPRI_MED);
5346 		if (mp == NULL) {
5347 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5348 			DWARN(vgenp, ldcp, "allocb failure, "
5349 			    "unable to process priority frame\n");
5350 			mutex_enter(&ldcp->cblock);
5351 			return;
5352 		}
5353 	} else {
5354 		mp = vmp->mp;
5355 	}
5356 
5357 	/* copy the frame from the payload of raw data msg into the mblk */
5358 	bcopy(pkt->data, mp->b_rptr, size);
5359 	mp->b_wptr = mp->b_rptr + size;
5360 
5361 	if (vmp != NULL) {
5362 		vmp->state = VIO_MBLK_HAS_DATA;
5363 	}
5364 
5365 	/* update stats */
5366 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5367 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5368 
5369 	/*
5370 	 * If polling is currently enabled, add the packet to the priority
5371 	 * packets list and return. It will be picked up by the polling thread.
5372 	 */
5373 	if (dring_mode == VIO_RX_DRING_DATA) {
5374 		mutex_enter(&ldcp->rxlock);
5375 	} else {
5376 		mutex_enter(&ldcp->pollq_lock);
5377 	}
5378 
5379 	if (ldcp->polling_on == B_TRUE) {
5380 		if (ldcp->rx_pri_tail != NULL) {
5381 			ldcp->rx_pri_tail->b_next = mp;
5382 		} else {
5383 			ldcp->rx_pri_head = ldcp->rx_pri_tail = mp;
5384 		}
5385 	} else {
5386 		vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5387 	}
5388 
5389 	if (dring_mode == VIO_RX_DRING_DATA) {
5390 		mutex_exit(&ldcp->rxlock);
5391 	} else {
5392 		mutex_exit(&ldcp->pollq_lock);
5393 	}
5394 
5395 	if (vrx_cb != NULL) {
5396 		vrx_cb(ldcp->portp->vhp, mp);
5397 	}
5398 
5399 	mutex_enter(&ldcp->cblock);
5400 }
5401 
5402 /*
5403  * dummy pkt data handler function for vnet protocol version 1.0
5404  */
5405 static void
5406 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5407 {
5408 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5409 }
5410 
5411 /* handler for data messages received from the peer ldc end-point */
5412 static int
5413 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5414 {
5415 	int		rv = 0;
5416 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5417 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5418 
5419 	DBG1(vgenp, ldcp, "enter\n");
5420 
5421 	if (ldcp->hphase != VH_DONE) {
5422 		return (0);
5423 	}
5424 
5425 	/*
5426 	 * We check the data msg seqnum. This is needed only in TxDring mode.
5427 	 */
5428 	if (lp->dring_mode == VIO_TX_DRING &&
5429 	    tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5430 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5431 		if (rv != 0) {
5432 			return (rv);
5433 		}
5434 	}
5435 
5436 	switch (tagp->vio_subtype_env) {
5437 	case VIO_DRING_DATA:
5438 		rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp);
5439 		break;
5440 
5441 	case VIO_PKT_DATA:
5442 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5443 		break;
5444 	default:
5445 		break;
5446 	}
5447 
5448 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5449 	return (rv);
5450 }
5451 
5452 
5453 static int
5454 vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
5455 {
5456 	int	rv;
5457 
5458 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5459 		ASSERT(MUTEX_HELD(&ldcp->cblock));
5460 	}
5461 
5462 	/* Set the flag to indicate reset is in progress */
5463 	if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
5464 		/* another thread is already in the process of resetting */
5465 		return (EBUSY);
5466 	}
5467 
5468 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5469 		mutex_exit(&ldcp->cblock);
5470 	}
5471 
5472 	rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET);
5473 
5474 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5475 		mutex_enter(&ldcp->cblock);
5476 	}
5477 
5478 	return (rv);
5479 }
5480 
5481 static void
5482 vgen_ldc_up(vgen_ldc_t *ldcp)
5483 {
5484 	int		rv;
5485 	uint32_t	retries = 0;
5486 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5487 
5488 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5489 
5490 	/*
5491 	 * If the channel has been reset max # of times, without successfully
5492 	 * completing handshake, stop and do not bring the channel up.
5493 	 */
5494 	if (ldcp->ldc_reset_count == vgen_ldc_max_resets) {
5495 		cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted"
5496 		    " handshake attempts (%d) on channel %ld",
5497 		    vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id);
5498 		return;
5499 	}
5500 	ldcp->ldc_reset_count++;
5501 
5502 	do {
5503 		rv = ldc_up(ldcp->ldc_handle);
5504 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
5505 			drv_usecwait(VGEN_LDC_UP_DELAY);
5506 		}
5507 		if (retries++ >= vgen_ldcup_retries)
5508 			break;
5509 	} while (rv == EWOULDBLOCK);
5510 
5511 	if (rv != 0) {
5512 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
5513 	}
5514 }
5515 
5516 int
5517 vgen_enable_intr(void *arg)
5518 {
5519 	uint32_t		end_ix;
5520 	vio_dring_msg_t		msg;
5521 	vgen_port_t		*portp = (vgen_port_t *)arg;
5522 	vgen_ldc_t		*ldcp = portp->ldcp;
5523 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5524 
5525 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5526 		mutex_enter(&ldcp->rxlock);
5527 
5528 		ldcp->polling_on = B_FALSE;
5529 		/*
5530 		 * We send a stopped message to peer (sender) as we are turning
5531 		 * off polled mode. This effectively restarts data interrupts
5532 		 * by allowing the peer to send further dring data msgs to us.
5533 		 */
5534 		end_ix = ldcp->next_rxi;
5535 		DECR_RXI(end_ix, ldcp);
5536 		msg.dring_ident = ldcp->peer_hparams.dring_ident;
5537 		(void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
5538 		    VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
5539 
5540 		mutex_exit(&ldcp->rxlock);
5541 	} else {
5542 		mutex_enter(&ldcp->pollq_lock);
5543 		ldcp->polling_on = B_FALSE;
5544 		mutex_exit(&ldcp->pollq_lock);
5545 	}
5546 
5547 	return (0);
5548 }
5549 
5550 int
5551 vgen_disable_intr(void *arg)
5552 {
5553 	vgen_port_t		*portp = (vgen_port_t *)arg;
5554 	vgen_ldc_t		*ldcp = portp->ldcp;
5555 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5556 
5557 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5558 		mutex_enter(&ldcp->rxlock);
5559 		ldcp->polling_on = B_TRUE;
5560 		mutex_exit(&ldcp->rxlock);
5561 	} else {
5562 		mutex_enter(&ldcp->pollq_lock);
5563 		ldcp->polling_on = B_TRUE;
5564 		mutex_exit(&ldcp->pollq_lock);
5565 	}
5566 
5567 	return (0);
5568 }
5569 
5570 mblk_t *
5571 vgen_rx_poll(void *arg, int bytes_to_pickup)
5572 {
5573 	vgen_port_t		*portp = (vgen_port_t *)arg;
5574 	vgen_ldc_t		*ldcp = portp->ldcp;
5575 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5576 	mblk_t			*mp = NULL;
5577 
5578 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5579 		mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup);
5580 	} else {
5581 		mp = vgen_poll_rcv(ldcp, bytes_to_pickup);
5582 	}
5583 
5584 	return (mp);
5585 }
5586 
5587 /* transmit watchdog timeout handler */
5588 static void
5589 vgen_tx_watchdog(void *arg)
5590 {
5591 	vgen_ldc_t	*ldcp;
5592 	vgen_t		*vgenp;
5593 	int		rv;
5594 	boolean_t	tx_blocked;
5595 	clock_t		tx_blocked_lbolt;
5596 
5597 	ldcp = (vgen_ldc_t *)arg;
5598 	vgenp = LDC_TO_VGEN(ldcp);
5599 
5600 	tx_blocked = ldcp->tx_blocked;
5601 	tx_blocked_lbolt = ldcp->tx_blocked_lbolt;
5602 
5603 	if (vgen_txwd_timeout &&
5604 	    (tx_blocked == B_TRUE) &&
5605 	    ((ddi_get_lbolt() - tx_blocked_lbolt) >
5606 	    drv_usectohz(vgen_txwd_timeout * 1000))) {
5607 		/*
5608 		 * Something is wrong; the peer is not picking up the packets
5609 		 * in the transmit dring. We now go ahead and reset the channel
5610 		 * to break out of this condition.
5611 		 */
5612 		DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), "
5613 		    "tx_blocked_lbolt(%lx)\n",
5614 		    ddi_get_lbolt(), tx_blocked_lbolt);
5615 
5616 #ifdef DEBUG
5617 		if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
5618 			/* tx timeout triggered for debugging */
5619 			vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT);
5620 		}
5621 #endif
5622 
5623 		/*
5624 		 * Clear tid before invoking vgen_ldc_reset(). Otherwise,
5625 		 * it will result in a deadlock when vgen_process_reset() tries
5626 		 * to untimeout() on seeing a non-zero tid, but it is being
5627 		 * invoked by the timer itself in this case.
5628 		 */
5629 		mutex_enter(&ldcp->cblock);
5630 		if (ldcp->wd_tid == 0) {
5631 			/* Cancelled by vgen_process_reset() */
5632 			mutex_exit(&ldcp->cblock);
5633 			return;
5634 		}
5635 		ldcp->wd_tid = 0;
5636 		mutex_exit(&ldcp->cblock);
5637 
5638 		/*
5639 		 * Now reset the channel.
5640 		 */
5641 		rv = vgen_ldc_reset(ldcp, VGEN_OTHER);
5642 		if (rv == 0) {
5643 			/*
5644 			 * We have successfully reset the channel. If we are
5645 			 * in tx flow controlled state, clear it now and enable
5646 			 * transmit in the upper layer.
5647 			 */
5648 			if (ldcp->tx_blocked) {
5649 				vio_net_tx_update_t vtx_update =
5650 				    ldcp->portp->vcb.vio_net_tx_update;
5651 
5652 				ldcp->tx_blocked = B_FALSE;
5653 				vtx_update(ldcp->portp->vhp);
5654 			}
5655 		}
5656 
5657 		/*
5658 		 * Channel has been reset by us or some other thread is already
5659 		 * in the process of resetting. In either case, we return
5660 		 * without restarting the timer. When handshake completes and
5661 		 * the channel is ready for data transmit/receive we start a
5662 		 * new watchdog timer.
5663 		 */
5664 		return;
5665 	}
5666 
5667 restart_timer:
5668 	/* Restart the timer */
5669 	mutex_enter(&ldcp->cblock);
5670 	if (ldcp->wd_tid == 0) {
5671 		/* Cancelled by vgen_process_reset() */
5672 		mutex_exit(&ldcp->cblock);
5673 		return;
5674 	}
5675 	ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
5676 	    drv_usectohz(vgen_txwd_interval * 1000));
5677 	mutex_exit(&ldcp->cblock);
5678 }
5679 
5680 /* Handshake watchdog timeout handler */
5681 static void
5682 vgen_hwatchdog(void *arg)
5683 {
5684 	vgen_ldc_t	*ldcp = (vgen_ldc_t *)arg;
5685 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5686 
5687 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
5688 	    ldcp->hphase, ldcp->hstate);
5689 
5690 	mutex_enter(&ldcp->cblock);
5691 	if (ldcp->htid == 0) {
5692 		/* Cancelled by vgen_process_reset() */
5693 		mutex_exit(&ldcp->cblock);
5694 		return;
5695 	}
5696 	ldcp->htid = 0;
5697 	mutex_exit(&ldcp->cblock);
5698 
5699 	/*
5700 	 * Something is wrong; handshake with the peer seems to be hung. We now
5701 	 * go ahead and reset the channel to break out of this condition.
5702 	 */
5703 	(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
5704 }
5705 
5706 /* Check if the session id in the received message is valid */
5707 static int
5708 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5709 {
5710 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5711 
5712 	if (tagp->vio_sid != ldcp->peer_sid) {
5713 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5714 		    ldcp->peer_sid, tagp->vio_sid);
5715 		return (VGEN_FAILURE);
5716 	}
5717 	else
5718 		return (VGEN_SUCCESS);
5719 }
5720 
5721 /*
5722  * Initialize the common part of dring registration
5723  * message; used in both TxDring and RxDringData modes.
5724  */
5725 static void
5726 vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
5727 	uint8_t option)
5728 {
5729 	vio_msg_tag_t		*tagp;
5730 
5731 	tagp = &msg->tag;
5732 	tagp->vio_msgtype = VIO_TYPE_CTRL;
5733 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
5734 	tagp->vio_subtype_env = VIO_DRING_REG;
5735 	tagp->vio_sid = ldcp->local_sid;
5736 
5737 	/* get dring info msg payload from ldcp->local */
5738 	bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]),
5739 	    sizeof (ldc_mem_cookie_t));
5740 	msg->ncookies = ldcp->local_hparams.dring_ncookies;
5741 	msg->num_descriptors = ldcp->local_hparams.num_desc;
5742 	msg->descriptor_size = ldcp->local_hparams.desc_size;
5743 
5744 	msg->options = option;
5745 
5746 	/*
5747 	 * dring_ident is set to 0. After mapping the dring, peer sets this
5748 	 * value and sends it in the ack, which is saved in
5749 	 * vgen_handle_dring_reg().
5750 	 */
5751 	msg->dring_ident = 0;
5752 }
5753 
5754 static int
5755 vgen_mapin_avail(vgen_ldc_t *ldcp)
5756 {
5757 	int		rv;
5758 	ldc_info_t	info;
5759 	uint64_t	mapin_sz_req;
5760 	uint64_t	dblk_sz;
5761 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5762 
5763 	rv = ldc_info(ldcp->ldc_handle, &info);
5764 	if (rv != 0) {
5765 		return (B_FALSE);
5766 	}
5767 
5768 	dblk_sz = RXDRING_DBLK_SZ(vgenp->max_frame_size);
5769 	mapin_sz_req = (VGEN_RXDRING_NRBUFS * dblk_sz);
5770 
5771 	if (info.direct_map_size_max >= mapin_sz_req) {
5772 		return (B_TRUE);
5773 	}
5774 
5775 	return (B_FALSE);
5776 }
5777 
5778 #if DEBUG
5779 
5780 /*
5781  * Print debug messages - set to 0xf to enable all msgs
5782  */
5783 void
5784 vgen_debug_printf(const char *fname, vgen_t *vgenp,
5785     vgen_ldc_t *ldcp, const char *fmt, ...)
5786 {
5787 	char	buf[256];
5788 	char	*bufp = buf;
5789 	va_list	ap;
5790 
5791 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5792 		(void) sprintf(bufp, "vnet%d:",
5793 		    ((vnet_t *)(vgenp->vnetp))->instance);
5794 		bufp += strlen(bufp);
5795 	}
5796 	if (ldcp != NULL) {
5797 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5798 		bufp += strlen(bufp);
5799 	}
5800 	(void) sprintf(bufp, "%s: ", fname);
5801 	bufp += strlen(bufp);
5802 
5803 	va_start(ap, fmt);
5804 	(void) vsprintf(bufp, fmt, ap);
5805 	va_end(ap);
5806 
5807 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5808 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5809 		cmn_err(CE_CONT, "%s\n", buf);
5810 	}
5811 }
5812 #endif
5813 
5814 #ifdef	VNET_IOC_DEBUG
5815 
5816 static void
5817 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5818 {
5819 	struct iocblk	*iocp;
5820 	vgen_port_t	*portp;
5821 	enum		ioc_reply {
5822 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
5823 			IOC_ACK			/* OK, just send ACK    */
5824 	}		status;
5825 	int		rv;
5826 
5827 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
5828 	iocp->ioc_error = 0;
5829 	portp = (vgen_port_t *)arg;
5830 
5831 	if (portp == NULL) {
5832 		status = IOC_INVAL;
5833 		goto vgen_ioc_exit;
5834 	}
5835 
5836 	mutex_enter(&portp->lock);
5837 
5838 	switch (iocp->ioc_cmd) {
5839 
5840 	case VNET_FORCE_LINK_DOWN:
5841 	case VNET_FORCE_LINK_UP:
5842 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
5843 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
5844 		break;
5845 
5846 	default:
5847 		status = IOC_INVAL;
5848 		break;
5849 
5850 	}
5851 
5852 	mutex_exit(&portp->lock);
5853 
5854 vgen_ioc_exit:
5855 
5856 	switch (status) {
5857 	default:
5858 	case IOC_INVAL:
5859 		/* Error, reply with a NAK and EINVAL error */
5860 		miocnak(q, mp, 0, EINVAL);
5861 		break;
5862 	case IOC_ACK:
5863 		/* OK, reply with an ACK */
5864 		miocack(q, mp, 0, 0);
5865 		break;
5866 	}
5867 }
5868 
5869 static int
5870 vgen_force_link_state(vgen_port_t *portp, int cmd)
5871 {
5872 	ldc_status_t	istatus;
5873 	int		rv;
5874 	vgen_ldc_t	*ldcp = portp->ldcp;
5875 	vgen_t		*vgenp = portp->vgenp;
5876 
5877 	mutex_enter(&ldcp->cblock);
5878 
5879 	switch (cmd) {
5880 
5881 	case VNET_FORCE_LINK_DOWN:
5882 		(void) ldc_down(ldcp->ldc_handle);
5883 		ldcp->link_down_forced = B_TRUE;
5884 		break;
5885 
5886 	case VNET_FORCE_LINK_UP:
5887 		vgen_ldc_up(ldcp);
5888 		ldcp->link_down_forced = B_FALSE;
5889 
5890 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5891 			DWARN(vgenp, ldcp, "ldc_status err\n");
5892 		} else {
5893 			ldcp->ldc_status = istatus;
5894 		}
5895 
5896 		/* if channel is already UP - restart handshake */
5897 		if (ldcp->ldc_status == LDC_UP) {
5898 			vgen_handle_evt_up(ldcp);
5899 		}
5900 		break;
5901 
5902 	}
5903 
5904 	mutex_exit(&ldcp->cblock);
5905 
5906 	return (0);
5907 }
5908 
5909 #else
5910 
5911 static void
5912 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5913 {
5914 	vgen_port_t	*portp;
5915 
5916 	portp = (vgen_port_t *)arg;
5917 
5918 	if (portp == NULL) {
5919 		miocnak(q, mp, 0, EINVAL);
5920 		return;
5921 	}
5922 
5923 	miocnak(q, mp, 0, ENOTSUP);
5924 }
5925 
5926 #endif
5927