xref: /illumos-gate/usr/src/uts/sun4v/io/vnet_gen.c (revision 6446bd46ed1b4e9f69da153665f82181ccaedad5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/sysmacros.h>
29 #include <sys/param.h>
30 #include <sys/machsystm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac provider functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /* Entry Points */
71 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
72     const uint8_t *macaddr, void **vgenhdl);
73 int vgen_init_mdeg(void *arg);
74 void vgen_uninit(void *arg);
75 int vgen_dds_tx(void *arg, void *dmsg);
76 int vgen_enable_intr(void *arg);
77 int vgen_disable_intr(void *arg);
78 mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
79 static int vgen_start(void *arg);
80 static void vgen_stop(void *arg);
81 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
82 static int vgen_multicst(void *arg, boolean_t add,
83 	const uint8_t *mca);
84 static int vgen_promisc(void *arg, boolean_t on);
85 static int vgen_unicst(void *arg, const uint8_t *mca);
86 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
87 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
88 #ifdef	VNET_IOC_DEBUG
89 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
90 #endif
91 
92 /* Port/LDC Configuration */
93 static int vgen_read_mdprops(vgen_t *vgenp);
94 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
95 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
96 	mde_cookie_t node);
97 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
98 	uint32_t *mtu);
99 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
100 	boolean_t *pls);
101 static void vgen_detach_ports(vgen_t *vgenp);
102 static void vgen_port_detach(vgen_port_t *portp);
103 static void vgen_port_list_insert(vgen_port_t *portp);
104 static void vgen_port_list_remove(vgen_port_t *portp);
105 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
106 	int port_num);
107 static int vgen_mdeg_reg(vgen_t *vgenp);
108 static void vgen_mdeg_unreg(vgen_t *vgenp);
109 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
110 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
111 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
112 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
113 	mde_cookie_t mdex);
114 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
115 static int vgen_port_attach(vgen_port_t *portp);
116 static void vgen_port_detach_mdeg(vgen_port_t *portp);
117 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
118 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
119 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
120 static void vgen_port_reset(vgen_port_t *portp);
121 static void vgen_reset_vsw_port(vgen_t *vgenp);
122 static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
123 static void vgen_ldc_up(vgen_ldc_t *ldcp);
124 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
125 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
126 static void vgen_port_init(vgen_port_t *portp);
127 static void vgen_port_uninit(vgen_port_t *portp);
128 static int vgen_ldc_init(vgen_ldc_t *ldcp);
129 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
130 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
131 
132 /* I/O Processing */
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(void *arg, mblk_t *mp);
135 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
137 static void vgen_tx_watchdog(void *arg);
138 
139 /*  Dring Configuration */
140 static int vgen_create_dring(vgen_ldc_t *ldcp);
141 static void vgen_destroy_dring(vgen_ldc_t *ldcp);
142 static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt);
143 static void vgen_unmap_dring(vgen_ldc_t *ldcp);
144 static int vgen_mapin_avail(vgen_ldc_t *ldcp);
145 
146 /* VIO Message Processing */
147 static int vgen_handshake(vgen_ldc_t *ldcp);
148 static int vgen_handshake_done(vgen_ldc_t *ldcp);
149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
150 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
151 static int vgen_handshake_phase3(vgen_ldc_t *ldcp);
152 static void vgen_setup_handshake_params(vgen_ldc_t *ldcp);
153 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
154 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
155 static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp);
156 static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp);
157 static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
158 	uint8_t option);
159 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
160 static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
161 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
162 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
163 	vio_msg_tag_t *tagp);
164 static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
166 static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
174 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
175 	uint32_t msglen);
176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
179 static int vgen_process_reset(vgen_ldc_t *ldcp, int flags);
180 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static void vgen_hwatchdog(void *arg);
182 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
183 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
184 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
185 
186 /* VLANs */
187 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
188 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
189 	uint16_t *nvidsp, uint16_t *default_idp);
190 static void vgen_vlan_create_hash(vgen_port_t *portp);
191 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
192 static void vgen_vlan_add_ids(vgen_port_t *portp);
193 static void vgen_vlan_remove_ids(vgen_port_t *portp);
194 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
195 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
196 	uint16_t *vidp);
197 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
198 	boolean_t is_tagged, uint16_t vid);
199 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
200 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
201 
202 /* Exported functions */
203 int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
204 int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
205 void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
206 void vgen_destroy_rxpools(void *arg);
207 
208 /* Externs */
209 extern void vnet_dds_rx(void *arg, void *dmsg);
210 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
211 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
212 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
213 extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
214     boolean_t caller_holds_lock);
215 extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp);
216 extern int vgen_create_tx_dring(vgen_ldc_t *ldcp);
217 extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp);
218 extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt);
219 extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp);
220 extern int vgen_create_rx_dring(vgen_ldc_t *ldcp);
221 extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
222 extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
223 extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
224 extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
225 extern int vgen_handle_dringdata_shm(void *arg1, void *arg2);
226 extern int vgen_handle_dringdata(void *arg1, void *arg2);
227 extern int vgen_dringsend_shm(void *arg, mblk_t *mp);
228 extern int vgen_dringsend(void *arg, mblk_t *mp);
229 extern void vgen_ldc_msg_worker(void *arg);
230 extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
231     uint32_t start, int32_t end, uint8_t pstate);
232 extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
233 extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup);
234 extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
235 
236 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
237 
238 #define	LDC_LOCK(ldcp)	\
239 				mutex_enter(&((ldcp)->cblock));\
240 				mutex_enter(&((ldcp)->rxlock));\
241 				mutex_enter(&((ldcp)->wrlock));\
242 				mutex_enter(&((ldcp)->txlock));\
243 				mutex_enter(&((ldcp)->tclock));
244 #define	LDC_UNLOCK(ldcp)	\
245 				mutex_exit(&((ldcp)->tclock));\
246 				mutex_exit(&((ldcp)->txlock));\
247 				mutex_exit(&((ldcp)->wrlock));\
248 				mutex_exit(&((ldcp)->rxlock));\
249 				mutex_exit(&((ldcp)->cblock));
250 
251 #define	VGEN_VER_EQ(ldcp, major, minor)	\
252 	((ldcp)->local_hparams.ver_major == (major) &&	\
253 	    (ldcp)->local_hparams.ver_minor == (minor))
254 
255 #define	VGEN_VER_LT(ldcp, major, minor)	\
256 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
257 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
258 	    (ldcp)->local_hparams.ver_minor < (minor)))
259 
260 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
261 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
262 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
263 	    (ldcp)->local_hparams.ver_minor >= (minor)))
264 
265 /*
266  * Property names
267  */
268 static char macaddr_propname[] = "mac-address";
269 static char rmacaddr_propname[] = "remote-mac-address";
270 static char channel_propname[] = "channel-endpoint";
271 static char reg_propname[] = "reg";
272 static char port_propname[] = "port";
273 static char swport_propname[] = "switch-port";
274 static char id_propname[] = "id";
275 static char vdev_propname[] = "virtual-device";
276 static char vnet_propname[] = "network";
277 static char pri_types_propname[] = "priority-ether-types";
278 static char vgen_pvid_propname[] = "port-vlan-id";
279 static char vgen_vid_propname[] = "vlan-id";
280 static char vgen_dvid_propname[] = "default-vlan-id";
281 static char port_pvid_propname[] = "remote-port-vlan-id";
282 static char port_vid_propname[] = "remote-vlan-id";
283 static char vgen_mtu_propname[] = "mtu";
284 static char vgen_linkprop_propname[] = "linkprop";
285 
286 /*
287  * VIO Protocol Version Info:
288  *
289  * The version specified below represents the version of protocol currently
290  * supported in the driver. It means the driver can negotiate with peers with
291  * versions <= this version. Here is a summary of the feature(s) that are
292  * supported at each version of the protocol:
293  *
294  * 1.0			Basic VIO protocol.
295  * 1.1			vDisk protocol update (no virtual network update).
296  * 1.2			Support for priority frames (priority-ether-types).
297  * 1.3			VLAN and HybridIO support.
298  * 1.4			Jumbo Frame support.
299  * 1.5			Link State Notification support with optional support
300  *			for Physical Link information.
301  * 1.6			Support for RxDringData mode.
302  */
303 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 6} };
304 
305 /* Tunables */
306 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
307 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
308 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
309 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
310 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
311 uint32_t vgen_ldc_mtu = VGEN_LDC_MTU;		/* ldc mtu */
312 uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */
313 uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT;   /* tx timeout in msec */
314 
315 /*
316  * Max # of channel resets allowed during handshake.
317  */
318 uint32_t vgen_ldc_max_resets = 5;
319 
320 /*
321  * See comments in vsw.c for details on the dring modes supported.
322  * In RxDringData mode, # of buffers is determined by multiplying the # of
323  * descriptors with the factor below. Note that the factor must be > 1; i.e,
324  * the # of buffers must always be > # of descriptors. This is needed because,
325  * while the shared memory buffers are sent up the stack on the receiver, the
326  * sender needs additional buffers that can be used for further transmits.
327  * See vgen_create_rx_dring() for details.
328  */
329 uint32_t vgen_nrbufs_factor = 2;
330 
331 /*
332  * Retry delay used while destroying rx mblk pools. Used in both Dring modes.
333  */
334 int vgen_rxpool_cleanup_delay = 100000;	/* 100ms */
335 
336 /*
337  * Delay when rx descr not ready; used in TxDring mode only.
338  */
339 uint32_t vgen_recv_delay = 1;
340 
341 /*
342  * Retry when rx descr not ready; used in TxDring mode only.
343  */
344 uint32_t vgen_recv_retries = 10;
345 
346 /*
347  * Max # of packets accumulated prior to sending them up. It is best
348  * to keep this at 60% of the number of receive buffers. Used in TxDring mode
349  * by the msg worker thread. Used in RxDringData mode while in interrupt mode
350  * (not used in polled mode).
351  */
352 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
353 
354 /*
355  * Internal tunables for receive buffer pools, that is,  the size and number of
356  * mblks for each pool. At least 3 sizes must be specified if these are used.
357  * The sizes must be specified in increasing order. Non-zero value of the first
358  * size will be used as a hint to use these values instead of the algorithm
359  * that determines the sizes based on MTU. Used in TxDring mode only.
360  */
361 uint32_t vgen_rbufsz1 = 0;
362 uint32_t vgen_rbufsz2 = 0;
363 uint32_t vgen_rbufsz3 = 0;
364 uint32_t vgen_rbufsz4 = 0;
365 
366 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
367 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
368 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
369 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
370 
371 /*
372  * In the absence of "priority-ether-types" property in MD, the following
373  * internal tunable can be set to specify a single priority ethertype.
374  */
375 uint64_t vgen_pri_eth_type = 0;
376 
377 /*
378  * Number of transmit priority buffers that are preallocated per device.
379  * This number is chosen to be a small value to throttle transmission
380  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
381  */
382 uint32_t vgen_pri_tx_nmblks = 64;
383 
384 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
385 
386 /*
387  * Matching criteria passed to the MDEG to register interest
388  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
389  * by their 'name' and 'cfg-handle' properties.
390  */
391 static md_prop_match_t vdev_prop_match[] = {
392 	{ MDET_PROP_STR,    "name"   },
393 	{ MDET_PROP_VAL,    "cfg-handle" },
394 	{ MDET_LIST_END,    NULL    }
395 };
396 
397 static mdeg_node_match_t vdev_match = { "virtual-device",
398 						vdev_prop_match };
399 
400 /* MD update matching structure */
401 static md_prop_match_t	vport_prop_match[] = {
402 	{ MDET_PROP_VAL,	"id" },
403 	{ MDET_LIST_END,	NULL }
404 };
405 
406 static mdeg_node_match_t vport_match = { "virtual-device-port",
407 					vport_prop_match };
408 
409 /* Template for matching a particular vnet instance */
410 static mdeg_prop_spec_t vgen_prop_template[] = {
411 	{ MDET_PROP_STR,	"name",		"network" },
412 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
413 	{ MDET_LIST_END,	NULL,		NULL }
414 };
415 
416 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
417 
418 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
419 
420 #ifdef	VNET_IOC_DEBUG
421 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
422 #else
423 #define	VGEN_M_CALLBACK_FLAGS	(0)
424 #endif
425 
426 static mac_callbacks_t vgen_m_callbacks = {
427 	VGEN_M_CALLBACK_FLAGS,
428 	vgen_stat,
429 	vgen_start,
430 	vgen_stop,
431 	vgen_promisc,
432 	vgen_multicst,
433 	vgen_unicst,
434 	vgen_tx,
435 	NULL,
436 	vgen_ioctl,
437 	NULL,
438 	NULL
439 };
440 
441 /* Externs */
442 extern pri_t	maxclsyspri;
443 extern proc_t	p0;
444 extern uint32_t	vnet_ethermtu;
445 extern uint16_t	vnet_default_vlan_id;
446 extern uint32_t vnet_num_descriptors;
447 
448 #ifdef DEBUG
449 
450 #define	DEBUG_PRINTF	vgen_debug_printf
451 
452 extern int vnet_dbglevel;
453 
454 void vgen_debug_printf(const char *fname, vgen_t *vgenp,
455 	vgen_ldc_t *ldcp, const char *fmt, ...);
456 
457 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
458 int vgendbg_ldcid = -1;
459 
460 /* Flags to simulate error conditions for debugging */
461 int vgen_inject_err_flag = 0;
462 
463 
464 boolean_t
465 vgen_inject_error(vgen_ldc_t *ldcp, int error)
466 {
467 	if ((vgendbg_ldcid == ldcp->ldc_id) &&
468 	    (vgen_inject_err_flag & error)) {
469 		return (B_TRUE);
470 	}
471 	return (B_FALSE);
472 }
473 
474 #endif
475 
476 /*
477  * vgen_init() is called by an instance of vnet driver to initialize the
478  * corresponding generic transport layer. This layer uses Logical Domain
479  * Channels (LDCs) to communicate with the virtual switch in the service domain
480  * and also with peer vnets in other guest domains in the system.
481  *
482  * Arguments:
483  *   vnetp:   an opaque pointer to the vnet instance
484  *   regprop: frame to be transmitted
485  *   vnetdip: dip of the vnet device
486  *   macaddr: mac address of the vnet device
487  *
488  * Returns:
489  *	Sucess:  a handle to the vgen instance (vgen_t)
490  *	Failure: NULL
491  */
492 int
493 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
494     const uint8_t *macaddr, void **vgenhdl)
495 {
496 	vgen_t	*vgenp;
497 	int	instance;
498 	int	rv;
499 	char	qname[TASKQ_NAMELEN];
500 
501 	if ((vnetp == NULL) || (vnetdip == NULL))
502 		return (DDI_FAILURE);
503 
504 	instance = ddi_get_instance(vnetdip);
505 
506 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
507 
508 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
509 
510 	vgenp->vnetp = vnetp;
511 	vgenp->instance = instance;
512 	vgenp->regprop = regprop;
513 	vgenp->vnetdip = vnetdip;
514 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
515 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
516 
517 	/* allocate multicast table */
518 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
519 	    sizeof (struct ether_addr), KM_SLEEP);
520 	vgenp->mccount = 0;
521 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
522 
523 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
524 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
525 
526 	(void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
527 	    instance);
528 	if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1,
529 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
530 		cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue",
531 		    instance);
532 		goto vgen_init_fail;
533 	}
534 
535 	rv = vgen_read_mdprops(vgenp);
536 	if (rv != 0) {
537 		goto vgen_init_fail;
538 	}
539 	*vgenhdl = (void *)vgenp;
540 
541 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
542 	return (DDI_SUCCESS);
543 
544 vgen_init_fail:
545 	rw_destroy(&vgenp->vgenports.rwlock);
546 	mutex_destroy(&vgenp->lock);
547 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
548 	    sizeof (struct ether_addr));
549 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
550 		kmem_free(vgenp->pri_types,
551 		    sizeof (uint16_t) * vgenp->pri_num_types);
552 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
553 	}
554 	if (vgenp->rxp_taskq != NULL) {
555 		ddi_taskq_destroy(vgenp->rxp_taskq);
556 		vgenp->rxp_taskq = NULL;
557 	}
558 	KMEM_FREE(vgenp);
559 	return (DDI_FAILURE);
560 }
561 
562 int
563 vgen_init_mdeg(void *arg)
564 {
565 	vgen_t	*vgenp = (vgen_t *)arg;
566 
567 	/* register with MD event generator */
568 	return (vgen_mdeg_reg(vgenp));
569 }
570 
571 /*
572  * Called by vnet to undo the initializations done by vgen_init().
573  * The handle provided by generic transport during vgen_init() is the argument.
574  */
575 void
576 vgen_uninit(void *arg)
577 {
578 	vgen_t	*vgenp = (vgen_t *)arg;
579 
580 	if (vgenp == NULL) {
581 		return;
582 	}
583 
584 	DBG1(vgenp, NULL, "enter\n");
585 
586 	/* Unregister with MD event generator */
587 	vgen_mdeg_unreg(vgenp);
588 
589 	mutex_enter(&vgenp->lock);
590 
591 	/*
592 	 * Detach all ports from the device; note that the device should have
593 	 * been unplumbed by this time (See vnet_unattach() for the sequence)
594 	 * and thus vgen_stop() has already been invoked on all the ports.
595 	 */
596 	vgen_detach_ports(vgenp);
597 
598 	/*
599 	 * We now destroy the taskq used to clean up rx mblk pools that
600 	 * couldn't be destroyed when the ports/channels were detached.
601 	 * We implicitly wait for those tasks to complete in
602 	 * ddi_taskq_destroy().
603 	 */
604 	if (vgenp->rxp_taskq != NULL) {
605 		ddi_taskq_destroy(vgenp->rxp_taskq);
606 		vgenp->rxp_taskq = NULL;
607 	}
608 
609 	/* Free multicast table */
610 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
611 
612 	/* Free pri_types table */
613 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
614 		kmem_free(vgenp->pri_types,
615 		    sizeof (uint16_t) * vgenp->pri_num_types);
616 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
617 	}
618 
619 	mutex_exit(&vgenp->lock);
620 	rw_destroy(&vgenp->vgenports.rwlock);
621 	mutex_destroy(&vgenp->lock);
622 
623 	DBG1(vgenp, NULL, "exit\n");
624 	KMEM_FREE(vgenp);
625 }
626 
627 /* enable transmit/receive for the device */
628 int
629 vgen_start(void *arg)
630 {
631 	vgen_port_t	*portp = (vgen_port_t *)arg;
632 	vgen_t		*vgenp = portp->vgenp;
633 
634 	DBG1(vgenp, NULL, "enter\n");
635 	mutex_enter(&portp->lock);
636 	vgen_port_init(portp);
637 	portp->flags |= VGEN_STARTED;
638 	mutex_exit(&portp->lock);
639 	DBG1(vgenp, NULL, "exit\n");
640 
641 	return (DDI_SUCCESS);
642 }
643 
644 /* stop transmit/receive */
645 void
646 vgen_stop(void *arg)
647 {
648 	vgen_port_t	*portp = (vgen_port_t *)arg;
649 	vgen_t		*vgenp = portp->vgenp;
650 
651 	DBG1(vgenp, NULL, "enter\n");
652 
653 	mutex_enter(&portp->lock);
654 	if (portp->flags & VGEN_STARTED) {
655 		vgen_port_uninit(portp);
656 		portp->flags &= ~(VGEN_STARTED);
657 	}
658 	mutex_exit(&portp->lock);
659 	DBG1(vgenp, NULL, "exit\n");
660 
661 }
662 
663 /* vgen transmit function */
664 static mblk_t *
665 vgen_tx(void *arg, mblk_t *mp)
666 {
667 	vgen_port_t	*portp;
668 	int		status;
669 
670 	portp = (vgen_port_t *)arg;
671 	status = vgen_portsend(portp, mp);
672 	if (status != VGEN_SUCCESS) {
673 		/* failure */
674 		return (mp);
675 	}
676 	/* success */
677 	return (NULL);
678 }
679 
680 /*
681  * This function provides any necessary tagging/untagging of the frames
682  * that are being transmitted over the port. It first verifies the vlan
683  * membership of the destination(port) and drops the packet if the
684  * destination doesn't belong to the given vlan.
685  *
686  * Arguments:
687  *   portp:     port over which the frames should be transmitted
688  *   mp:        frame to be transmitted
689  *   is_tagged:
690  *              B_TRUE: indicates frame header contains the vlan tag already.
691  *              B_FALSE: indicates frame is untagged.
692  *   vid:       vlan in which the frame should be transmitted.
693  *
694  * Returns:
695  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
696  *              Failure: NULL
697  */
698 static mblk_t *
699 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
700     uint16_t vid)
701 {
702 	vgen_t		*vgenp;
703 	boolean_t	dst_tagged;
704 	int		rv;
705 
706 	vgenp = portp->vgenp;
707 
708 	/*
709 	 * If the packet is going to a vnet:
710 	 *   Check if the destination vnet is in the same vlan.
711 	 *   Check the frame header if tag or untag is needed.
712 	 *
713 	 * We do not check the above conditions if the packet is going to vsw:
714 	 *   vsw must be present implicitly in all the vlans that a vnet device
715 	 *   is configured into; even if vsw itself is not assigned to those
716 	 *   vlans as an interface. For instance, the packet might be destined
717 	 *   to another vnet(indirectly through vsw) or to an external host
718 	 *   which is in the same vlan as this vnet and vsw itself may not be
719 	 *   present in that vlan. Similarly packets going to vsw must be
720 	 *   always tagged(unless in the default-vlan) if not already tagged,
721 	 *   as we do not know the final destination. This is needed because
722 	 *   vsw must always invoke its switching function only after tagging
723 	 *   the packet; otherwise after switching function determines the
724 	 *   destination we cannot figure out if the destination belongs to the
725 	 *   the same vlan that the frame originated from and if it needs tag/
726 	 *   untag. Note that vsw will tag the packet itself when it receives
727 	 *   it over the channel from a client if needed. However, that is
728 	 *   needed only in the case of vlan unaware clients such as obp or
729 	 *   earlier versions of vnet.
730 	 *
731 	 */
732 	if (portp != vgenp->vsw_portp) {
733 		/*
734 		 * Packet going to a vnet. Check if the destination vnet is in
735 		 * the same vlan. Then check the frame header if tag/untag is
736 		 * needed.
737 		 */
738 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
739 		if (rv == B_FALSE) {
740 			/* drop the packet */
741 			freemsg(mp);
742 			return (NULL);
743 		}
744 
745 		/* is the destination tagged or untagged in this vlan? */
746 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
747 		    (dst_tagged = B_TRUE);
748 
749 		if (is_tagged == dst_tagged) {
750 			/* no tagging/untagging needed */
751 			return (mp);
752 		}
753 
754 		if (is_tagged == B_TRUE) {
755 			/* frame is tagged; destination needs untagged */
756 			mp = vnet_vlan_remove_tag(mp);
757 			return (mp);
758 		}
759 
760 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
761 	}
762 
763 	/*
764 	 * Packet going to a vnet needs tagging.
765 	 * OR
766 	 * If the packet is going to vsw, then it must be tagged in all cases:
767 	 * unknown unicast, broadcast/multicast or to vsw interface.
768 	 */
769 
770 	if (is_tagged == B_FALSE) {
771 		mp = vnet_vlan_insert_tag(mp, vid);
772 	}
773 
774 	return (mp);
775 }
776 
777 /* transmit packets over the given port */
778 static int
779 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
780 {
781 	vgen_ldc_t		*ldcp;
782 	int			status;
783 	int			rv = VGEN_SUCCESS;
784 	vgen_t			*vgenp;
785 	vnet_t			*vnetp;
786 	boolean_t		is_tagged;
787 	boolean_t		dec_refcnt = B_FALSE;
788 	uint16_t		vlan_id;
789 	struct ether_header	*ehp;
790 
791 	if (portp == NULL) {
792 		return (VGEN_FAILURE);
793 	}
794 
795 	vgenp = portp->vgenp;
796 	vnetp = vgenp->vnetp;
797 
798 	if (portp->use_vsw_port) {
799 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
800 		portp = portp->vgenp->vsw_portp;
801 		ASSERT(portp != NULL);
802 		dec_refcnt = B_TRUE;
803 	}
804 
805 	/*
806 	 * Determine the vlan id that the frame belongs to.
807 	 */
808 	ehp = (struct ether_header *)mp->b_rptr;
809 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
810 
811 	if (vlan_id == vnetp->default_vlan_id) {
812 
813 		/* Frames in default vlan must be untagged */
814 		ASSERT(is_tagged == B_FALSE);
815 
816 		/*
817 		 * If the destination is a vnet-port verify it belongs to the
818 		 * default vlan; otherwise drop the packet. We do not need
819 		 * this check for vsw-port, as it should implicitly belong to
820 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
821 		 */
822 		if (portp != vgenp->vsw_portp &&
823 		    portp->pvid != vnetp->default_vlan_id) {
824 			freemsg(mp);
825 			goto portsend_ret;
826 		}
827 
828 	} else {	/* frame not in default-vlan */
829 
830 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
831 		if (mp == NULL) {
832 			goto portsend_ret;
833 		}
834 
835 	}
836 
837 	ldcp = portp->ldcp;
838 	status = ldcp->tx(ldcp, mp);
839 
840 	if (status != VGEN_TX_SUCCESS) {
841 		rv = VGEN_FAILURE;
842 	}
843 
844 portsend_ret:
845 	if (dec_refcnt == B_TRUE) {
846 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
847 	}
848 	return (rv);
849 }
850 
851 /*
852  * Wrapper function to transmit normal and/or priority frames over the channel.
853  */
854 static int
855 vgen_ldcsend(void *arg, mblk_t *mp)
856 {
857 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
858 	int			status;
859 	struct ether_header	*ehp;
860 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
861 	uint32_t		num_types;
862 	uint16_t		*types;
863 	int			i;
864 
865 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
866 
867 	num_types = vgenp->pri_num_types;
868 	types = vgenp->pri_types;
869 	ehp = (struct ether_header *)mp->b_rptr;
870 
871 	for (i = 0; i < num_types; i++) {
872 
873 		if (ehp->ether_type == types[i]) {
874 			/* priority frame, use pri tx function */
875 			vgen_ldcsend_pkt(ldcp, mp);
876 			return (VGEN_SUCCESS);
877 		}
878 
879 	}
880 
881 	if (ldcp->tx_dringdata == NULL) {
882 		freemsg(mp);
883 		return (VGEN_SUCCESS);
884 	}
885 
886 	status  = ldcp->tx_dringdata(ldcp, mp);
887 	return (status);
888 }
889 
890 /*
891  * This function transmits the frame in the payload of a raw data
892  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
893  * send special frames with high priorities, without going through
894  * the normal data path which uses descriptor ring mechanism.
895  */
896 static void
897 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
898 {
899 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
900 	vio_raw_data_msg_t	*pkt;
901 	mblk_t			*bp;
902 	mblk_t			*nmp = NULL;
903 	vio_mblk_t		*vmp;
904 	caddr_t			dst;
905 	uint32_t		mblksz;
906 	uint32_t		size;
907 	uint32_t		nbytes;
908 	int			rv;
909 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
910 	vgen_stats_t		*statsp = &ldcp->stats;
911 
912 	/* drop the packet if ldc is not up or handshake is not done */
913 	if (ldcp->ldc_status != LDC_UP) {
914 		(void) atomic_inc_32(&statsp->tx_pri_fail);
915 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
916 		    ldcp->ldc_status);
917 		goto send_pkt_exit;
918 	}
919 
920 	if (ldcp->hphase != VH_DONE) {
921 		(void) atomic_inc_32(&statsp->tx_pri_fail);
922 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
923 		    ldcp->hphase);
924 		goto send_pkt_exit;
925 	}
926 
927 	size = msgsize(mp);
928 
929 	/* frame size bigger than available payload len of raw data msg ? */
930 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
931 		(void) atomic_inc_32(&statsp->tx_pri_fail);
932 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
933 		goto send_pkt_exit;
934 	}
935 
936 	if (size < ETHERMIN)
937 		size = ETHERMIN;
938 
939 	/* alloc space for a raw data message */
940 	vmp = vio_allocb(vgenp->pri_tx_vmp);
941 	if (vmp == NULL) {
942 		(void) atomic_inc_32(&statsp->tx_pri_fail);
943 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
944 		goto send_pkt_exit;
945 	} else {
946 		nmp = vmp->mp;
947 	}
948 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
949 
950 	/* copy frame into the payload of raw data message */
951 	dst = (caddr_t)pkt->data;
952 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
953 		mblksz = MBLKL(bp);
954 		bcopy(bp->b_rptr, dst, mblksz);
955 		dst += mblksz;
956 	}
957 
958 	vmp->state = VIO_MBLK_HAS_DATA;
959 
960 	/* setup the raw data msg */
961 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
962 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
963 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
964 	pkt->tag.vio_sid = ldcp->local_sid;
965 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
966 
967 	/* send the msg over ldc */
968 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
969 	if (rv != VGEN_SUCCESS) {
970 		(void) atomic_inc_32(&statsp->tx_pri_fail);
971 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
972 		if (rv == ECONNRESET) {
973 			(void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
974 		}
975 		goto send_pkt_exit;
976 	}
977 
978 	/* update stats */
979 	(void) atomic_inc_64(&statsp->tx_pri_packets);
980 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
981 
982 send_pkt_exit:
983 	if (nmp != NULL)
984 		freemsg(nmp);
985 	freemsg(mp);
986 }
987 
988 /*
989  * enable/disable a multicast address
990  * note that the cblock of the ldc channel connected to the vsw is used for
991  * synchronization of the mctab.
992  */
993 int
994 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
995 {
996 	vgen_t			*vgenp;
997 	vnet_mcast_msg_t	mcastmsg;
998 	vio_msg_tag_t		*tagp;
999 	vgen_port_t		*portp;
1000 	vgen_ldc_t		*ldcp;
1001 	struct ether_addr	*addrp;
1002 	int			rv = DDI_FAILURE;
1003 	uint32_t		i;
1004 
1005 	portp = (vgen_port_t *)arg;
1006 	vgenp = portp->vgenp;
1007 
1008 	if (portp->is_vsw_port != B_TRUE) {
1009 		return (DDI_SUCCESS);
1010 	}
1011 
1012 	addrp = (struct ether_addr *)mca;
1013 	tagp = &mcastmsg.tag;
1014 	bzero(&mcastmsg, sizeof (mcastmsg));
1015 
1016 	ldcp = portp->ldcp;
1017 	if (ldcp == NULL) {
1018 		return (DDI_FAILURE);
1019 	}
1020 
1021 	mutex_enter(&ldcp->cblock);
1022 
1023 	if (ldcp->hphase == VH_DONE) {
1024 		/*
1025 		 * If handshake is done, send a msg to vsw to add/remove
1026 		 * the multicast address. Otherwise, we just update this
1027 		 * mcast address in our table and the table will be sync'd
1028 		 * with vsw when handshake completes.
1029 		 */
1030 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1031 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1032 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1033 		tagp->vio_sid = ldcp->local_sid;
1034 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1035 		mcastmsg.set = add;
1036 		mcastmsg.count = 1;
1037 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1038 		    B_FALSE) != VGEN_SUCCESS) {
1039 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1040 			rv = DDI_FAILURE;
1041 			goto vgen_mcast_exit;
1042 		}
1043 	}
1044 
1045 	if (add) {
1046 
1047 		/* expand multicast table if necessary */
1048 		if (vgenp->mccount >= vgenp->mcsize) {
1049 			struct ether_addr	*newtab;
1050 			uint32_t		newsize;
1051 
1052 
1053 			newsize = vgenp->mcsize * 2;
1054 
1055 			newtab = kmem_zalloc(newsize *
1056 			    sizeof (struct ether_addr), KM_NOSLEEP);
1057 			if (newtab == NULL)
1058 				goto vgen_mcast_exit;
1059 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1060 			    sizeof (struct ether_addr));
1061 			kmem_free(vgenp->mctab,
1062 			    vgenp->mcsize * sizeof (struct ether_addr));
1063 
1064 			vgenp->mctab = newtab;
1065 			vgenp->mcsize = newsize;
1066 		}
1067 
1068 		/* add address to the table */
1069 		vgenp->mctab[vgenp->mccount++] = *addrp;
1070 
1071 	} else {
1072 
1073 		/* delete address from the table */
1074 		for (i = 0; i < vgenp->mccount; i++) {
1075 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1076 
1077 				/*
1078 				 * If there's more than one address in this
1079 				 * table, delete the unwanted one by moving
1080 				 * the last one in the list over top of it;
1081 				 * otherwise, just remove it.
1082 				 */
1083 				if (vgenp->mccount > 1) {
1084 					vgenp->mctab[i] =
1085 					    vgenp->mctab[vgenp->mccount-1];
1086 				}
1087 				vgenp->mccount--;
1088 				break;
1089 			}
1090 		}
1091 	}
1092 
1093 	rv = DDI_SUCCESS;
1094 
1095 vgen_mcast_exit:
1096 
1097 	mutex_exit(&ldcp->cblock);
1098 	return (rv);
1099 }
1100 
1101 /* set or clear promiscuous mode on the device */
1102 static int
1103 vgen_promisc(void *arg, boolean_t on)
1104 {
1105 	_NOTE(ARGUNUSED(arg, on))
1106 	return (DDI_SUCCESS);
1107 }
1108 
1109 /* set the unicast mac address of the device */
1110 static int
1111 vgen_unicst(void *arg, const uint8_t *mca)
1112 {
1113 	_NOTE(ARGUNUSED(arg, mca))
1114 	return (DDI_SUCCESS);
1115 }
1116 
1117 /* get device statistics */
1118 int
1119 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1120 {
1121 	vgen_port_t	*portp = (vgen_port_t *)arg;
1122 
1123 	*val = vgen_port_stat(portp, stat);
1124 	return (0);
1125 }
1126 
1127 /* vgen internal functions */
1128 /* detach all ports from the device */
1129 static void
1130 vgen_detach_ports(vgen_t *vgenp)
1131 {
1132 	vgen_port_t	*portp;
1133 	vgen_portlist_t	*plistp;
1134 
1135 	plistp = &(vgenp->vgenports);
1136 	WRITE_ENTER(&plistp->rwlock);
1137 	while ((portp = plistp->headp) != NULL) {
1138 		vgen_port_detach(portp);
1139 	}
1140 	RW_EXIT(&plistp->rwlock);
1141 }
1142 
1143 /*
1144  * detach the given port.
1145  */
1146 static void
1147 vgen_port_detach(vgen_port_t *portp)
1148 {
1149 	vgen_t		*vgenp;
1150 	int		port_num;
1151 
1152 	vgenp = portp->vgenp;
1153 	port_num = portp->port_num;
1154 
1155 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1156 
1157 	/*
1158 	 * If this port is connected to the vswitch, then
1159 	 * potentially there could be ports that may be using
1160 	 * this port to transmit packets. To address this do
1161 	 * the following:
1162 	 *	- First set vgenp->vsw_portp to NULL, so that
1163 	 *	  its not used after that.
1164 	 *	- Then wait for the refcnt to go down to 0.
1165 	 *	- Now we can safely detach this port.
1166 	 */
1167 	if (vgenp->vsw_portp == portp) {
1168 		vgenp->vsw_portp = NULL;
1169 		while (vgenp->vsw_port_refcnt > 0) {
1170 			delay(drv_usectohz(vgen_tx_delay));
1171 		}
1172 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1173 	}
1174 
1175 	if (portp->vhp != NULL) {
1176 		vio_net_resource_unreg(portp->vhp);
1177 		portp->vhp = NULL;
1178 	}
1179 
1180 	vgen_vlan_destroy_hash(portp);
1181 
1182 	/* remove it from port list */
1183 	vgen_port_list_remove(portp);
1184 
1185 	/* detach channels from this port */
1186 	vgen_ldc_detach(portp->ldcp);
1187 
1188 	if (portp->num_ldcs != 0) {
1189 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1190 		portp->num_ldcs = 0;
1191 	}
1192 
1193 	mutex_destroy(&portp->lock);
1194 	KMEM_FREE(portp);
1195 
1196 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1197 }
1198 
1199 /* add a port to port list */
1200 static void
1201 vgen_port_list_insert(vgen_port_t *portp)
1202 {
1203 	vgen_portlist_t	*plistp;
1204 	vgen_t		*vgenp;
1205 
1206 	vgenp = portp->vgenp;
1207 	plistp = &(vgenp->vgenports);
1208 
1209 	if (plistp->headp == NULL) {
1210 		plistp->headp = portp;
1211 	} else {
1212 		plistp->tailp->nextp = portp;
1213 	}
1214 	plistp->tailp = portp;
1215 	portp->nextp = NULL;
1216 }
1217 
1218 /* remove a port from port list */
1219 static void
1220 vgen_port_list_remove(vgen_port_t *portp)
1221 {
1222 	vgen_port_t	*prevp;
1223 	vgen_port_t	*nextp;
1224 	vgen_portlist_t	*plistp;
1225 	vgen_t		*vgenp;
1226 
1227 	vgenp = portp->vgenp;
1228 
1229 	plistp = &(vgenp->vgenports);
1230 
1231 	if (plistp->headp == NULL)
1232 		return;
1233 
1234 	if (portp == plistp->headp) {
1235 		plistp->headp = portp->nextp;
1236 		if (portp == plistp->tailp)
1237 			plistp->tailp = plistp->headp;
1238 	} else {
1239 		for (prevp = plistp->headp;
1240 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1241 		    prevp = nextp)
1242 			;
1243 		if (nextp == portp) {
1244 			prevp->nextp = portp->nextp;
1245 		}
1246 		if (portp == plistp->tailp)
1247 			plistp->tailp = prevp;
1248 	}
1249 }
1250 
1251 /* lookup a port in the list based on port_num */
1252 static vgen_port_t *
1253 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1254 {
1255 	vgen_port_t *portp = NULL;
1256 
1257 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1258 		if (portp->port_num == port_num) {
1259 			break;
1260 		}
1261 	}
1262 
1263 	return (portp);
1264 }
1265 
1266 static void
1267 vgen_port_init(vgen_port_t *portp)
1268 {
1269 	/* Add the port to the specified vlans */
1270 	vgen_vlan_add_ids(portp);
1271 
1272 	/* Bring up the channel */
1273 	(void) vgen_ldc_init(portp->ldcp);
1274 }
1275 
1276 static void
1277 vgen_port_uninit(vgen_port_t *portp)
1278 {
1279 	vgen_ldc_uninit(portp->ldcp);
1280 
1281 	/* remove the port from vlans it has been assigned to */
1282 	vgen_vlan_remove_ids(portp);
1283 }
1284 
1285 /*
1286  * Scan the machine description for this instance of vnet
1287  * and read its properties. Called only from vgen_init().
1288  * Returns: 0 on success, 1 on failure.
1289  */
1290 static int
1291 vgen_read_mdprops(vgen_t *vgenp)
1292 {
1293 	vnet_t		*vnetp = vgenp->vnetp;
1294 	md_t		*mdp = NULL;
1295 	mde_cookie_t	rootnode;
1296 	mde_cookie_t	*listp = NULL;
1297 	uint64_t	cfgh;
1298 	char		*name;
1299 	int		rv = 1;
1300 	int		num_nodes = 0;
1301 	int		num_devs = 0;
1302 	int		listsz = 0;
1303 	int		i;
1304 
1305 	if ((mdp = md_get_handle()) == NULL) {
1306 		return (rv);
1307 	}
1308 
1309 	num_nodes = md_node_count(mdp);
1310 	ASSERT(num_nodes > 0);
1311 
1312 	listsz = num_nodes * sizeof (mde_cookie_t);
1313 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1314 
1315 	rootnode = md_root_node(mdp);
1316 
1317 	/* search for all "virtual_device" nodes */
1318 	num_devs = md_scan_dag(mdp, rootnode,
1319 	    md_find_name(mdp, vdev_propname),
1320 	    md_find_name(mdp, "fwd"), listp);
1321 	if (num_devs <= 0) {
1322 		goto vgen_readmd_exit;
1323 	}
1324 
1325 	/*
1326 	 * Now loop through the list of virtual-devices looking for
1327 	 * devices with name "network" and for each such device compare
1328 	 * its instance with what we have from the 'reg' property to
1329 	 * find the right node in MD and then read all its properties.
1330 	 */
1331 	for (i = 0; i < num_devs; i++) {
1332 
1333 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1334 			goto vgen_readmd_exit;
1335 		}
1336 
1337 		/* is this a "network" device? */
1338 		if (strcmp(name, vnet_propname) != 0)
1339 			continue;
1340 
1341 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1342 			goto vgen_readmd_exit;
1343 		}
1344 
1345 		/* is this the required instance of vnet? */
1346 		if (vgenp->regprop != cfgh)
1347 			continue;
1348 
1349 		/*
1350 		 * Read the 'linkprop' property to know if this vnet
1351 		 * device should get physical link updates from vswitch.
1352 		 */
1353 		vgen_linkprop_read(vgenp, mdp, listp[i],
1354 		    &vnetp->pls_update);
1355 
1356 		/*
1357 		 * Read the mtu. Note that we set the mtu of vnet device within
1358 		 * this routine itself, after validating the range.
1359 		 */
1360 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1361 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1362 			vnetp->mtu = ETHERMTU;
1363 		}
1364 		vgenp->max_frame_size = vnetp->mtu +
1365 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1366 
1367 		/* read priority ether types */
1368 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1369 
1370 		/* read vlan id properties of this vnet instance */
1371 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1372 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1373 		    &vnetp->default_vlan_id);
1374 
1375 		rv = 0;
1376 		break;
1377 	}
1378 
1379 vgen_readmd_exit:
1380 
1381 	kmem_free(listp, listsz);
1382 	(void) md_fini_handle(mdp);
1383 	return (rv);
1384 }
1385 
1386 /*
1387  * Read vlan id properties of the given MD node.
1388  * Arguments:
1389  *   arg:          device argument(vnet device or a port)
1390  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1391  *   mdp:          machine description
1392  *   node:         md node cookie
1393  *
1394  * Returns:
1395  *   pvidp:        port-vlan-id of the node
1396  *   vidspp:       list of vlan-ids of the node
1397  *   nvidsp:       # of vlan-ids in the list
1398  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1399  */
1400 static void
1401 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1402     uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1403     uint16_t *default_idp)
1404 {
1405 	vgen_t		*vgenp;
1406 	vnet_t		*vnetp;
1407 	vgen_port_t	*portp;
1408 	char		*pvid_propname;
1409 	char		*vid_propname;
1410 	uint_t		nvids;
1411 	uint32_t	vids_size;
1412 	int		rv;
1413 	int		i;
1414 	uint64_t	*data;
1415 	uint64_t	val;
1416 	int		size;
1417 	int		inst;
1418 
1419 	if (type == VGEN_LOCAL) {
1420 
1421 		vgenp = (vgen_t *)arg;
1422 		vnetp = vgenp->vnetp;
1423 		pvid_propname = vgen_pvid_propname;
1424 		vid_propname = vgen_vid_propname;
1425 		inst = vnetp->instance;
1426 
1427 	} else if (type == VGEN_PEER) {
1428 
1429 		portp = (vgen_port_t *)arg;
1430 		vgenp = portp->vgenp;
1431 		vnetp = vgenp->vnetp;
1432 		pvid_propname = port_pvid_propname;
1433 		vid_propname = port_vid_propname;
1434 		inst = portp->port_num;
1435 
1436 	} else {
1437 		return;
1438 	}
1439 
1440 	if (type == VGEN_LOCAL && default_idp != NULL) {
1441 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1442 		if (rv != 0) {
1443 			DWARN(vgenp, NULL, "prop(%s) not found",
1444 			    vgen_dvid_propname);
1445 
1446 			*default_idp = vnet_default_vlan_id;
1447 		} else {
1448 			*default_idp = val & 0xFFF;
1449 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1450 			    inst, *default_idp);
1451 		}
1452 	}
1453 
1454 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1455 	if (rv != 0) {
1456 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1457 		*pvidp = vnet_default_vlan_id;
1458 	} else {
1459 
1460 		*pvidp = val & 0xFFF;
1461 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1462 		    pvid_propname, inst, *pvidp);
1463 	}
1464 
1465 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1466 	    &size);
1467 	if (rv != 0) {
1468 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1469 		size = 0;
1470 	} else {
1471 		size /= sizeof (uint64_t);
1472 	}
1473 	nvids = size;
1474 
1475 	if (nvids != 0) {
1476 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1477 		vids_size = sizeof (uint16_t) * nvids;
1478 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1479 		for (i = 0; i < nvids; i++) {
1480 			(*vidspp)[i] = data[i] & 0xFFFF;
1481 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1482 		}
1483 		DBG2(vgenp, NULL, "\n");
1484 	}
1485 
1486 	*nvidsp = nvids;
1487 }
1488 
1489 /*
1490  * Create a vlan id hash table for the given port.
1491  */
1492 static void
1493 vgen_vlan_create_hash(vgen_port_t *portp)
1494 {
1495 	char		hashname[MAXNAMELEN];
1496 
1497 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1498 	    portp->port_num);
1499 
1500 	portp->vlan_nchains = vgen_vlan_nchains;
1501 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1502 	    portp->vlan_nchains, mod_hash_null_valdtor);
1503 }
1504 
1505 /*
1506  * Destroy the vlan id hash table in the given port.
1507  */
1508 static void
1509 vgen_vlan_destroy_hash(vgen_port_t *portp)
1510 {
1511 	if (portp->vlan_hashp != NULL) {
1512 		mod_hash_destroy_hash(portp->vlan_hashp);
1513 		portp->vlan_hashp = NULL;
1514 		portp->vlan_nchains = 0;
1515 	}
1516 }
1517 
1518 /*
1519  * Add a port to the vlans specified in its port properites.
1520  */
1521 static void
1522 vgen_vlan_add_ids(vgen_port_t *portp)
1523 {
1524 	int		rv;
1525 	int		i;
1526 
1527 	rv = mod_hash_insert(portp->vlan_hashp,
1528 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1529 	    (mod_hash_val_t)B_TRUE);
1530 	ASSERT(rv == 0);
1531 
1532 	for (i = 0; i < portp->nvids; i++) {
1533 		rv = mod_hash_insert(portp->vlan_hashp,
1534 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1535 		    (mod_hash_val_t)B_TRUE);
1536 		ASSERT(rv == 0);
1537 	}
1538 }
1539 
1540 /*
1541  * Remove a port from the vlans it has been assigned to.
1542  */
1543 static void
1544 vgen_vlan_remove_ids(vgen_port_t *portp)
1545 {
1546 	int		rv;
1547 	int		i;
1548 	mod_hash_val_t	vp;
1549 
1550 	rv = mod_hash_remove(portp->vlan_hashp,
1551 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1552 	    (mod_hash_val_t *)&vp);
1553 	ASSERT(rv == 0);
1554 
1555 	for (i = 0; i < portp->nvids; i++) {
1556 		rv = mod_hash_remove(portp->vlan_hashp,
1557 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1558 		    (mod_hash_val_t *)&vp);
1559 		ASSERT(rv == 0);
1560 	}
1561 }
1562 
1563 /*
1564  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1565  * then the vlan-id is available in the tag; otherwise, its vlan id is
1566  * implicitly obtained from the port-vlan-id of the vnet device.
1567  * The vlan id determined is returned in vidp.
1568  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1569  */
1570 static boolean_t
1571 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1572 {
1573 	struct ether_vlan_header	*evhp;
1574 
1575 	/* If it's a tagged frame, get the vlan id from vlan header */
1576 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1577 
1578 		evhp = (struct ether_vlan_header *)ehp;
1579 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1580 		return (B_TRUE);
1581 	}
1582 
1583 	/* Untagged frame, vlan-id is the pvid of vnet device */
1584 	*vidp = vnetp->pvid;
1585 	return (B_FALSE);
1586 }
1587 
1588 /*
1589  * Find the given vlan id in the hash table.
1590  * Return: B_TRUE if the id is found; B_FALSE if not found.
1591  */
1592 static boolean_t
1593 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1594 {
1595 	int		rv;
1596 	mod_hash_val_t	vp;
1597 
1598 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1599 
1600 	if (rv != 0)
1601 		return (B_FALSE);
1602 
1603 	return (B_TRUE);
1604 }
1605 
1606 /*
1607  * This function reads "priority-ether-types" property from md. This property
1608  * is used to enable support for priority frames. Applications which need
1609  * guaranteed and timely delivery of certain high priority frames to/from
1610  * a vnet or vsw within ldoms, should configure this property by providing
1611  * the ether type(s) for which the priority facility is needed.
1612  * Normal data frames are delivered over a ldc channel using the descriptor
1613  * ring mechanism which is constrained by factors such as descriptor ring size,
1614  * the rate at which the ring is processed at the peer ldc end point, etc.
1615  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1616  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1617  * descriptor ring path and enables a more reliable and timely delivery of
1618  * frames to the peer.
1619  */
1620 static void
1621 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1622 {
1623 	int		rv;
1624 	uint16_t	*types;
1625 	uint64_t	*data;
1626 	int		size;
1627 	int		i;
1628 	size_t		mblk_sz;
1629 
1630 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1631 	    (uint8_t **)&data, &size);
1632 	if (rv != 0) {
1633 		/*
1634 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1635 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1636 		 */
1637 		if (vgen_pri_eth_type != 0) {
1638 			size = sizeof (vgen_pri_eth_type);
1639 			data = &vgen_pri_eth_type;
1640 		} else {
1641 			DBG2(vgenp, NULL,
1642 			    "prop(%s) not found", pri_types_propname);
1643 			size = 0;
1644 		}
1645 	}
1646 
1647 	if (size == 0) {
1648 		vgenp->pri_num_types = 0;
1649 		return;
1650 	}
1651 
1652 	/*
1653 	 * we have some priority-ether-types defined;
1654 	 * allocate a table of these types and also
1655 	 * allocate a pool of mblks to transmit these
1656 	 * priority packets.
1657 	 */
1658 	size /= sizeof (uint64_t);
1659 	vgenp->pri_num_types = size;
1660 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1661 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1662 		types[i] = data[i] & 0xFFFF;
1663 	}
1664 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1665 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL,
1666 	    &vgenp->pri_tx_vmp);
1667 }
1668 
1669 static void
1670 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1671 {
1672 	int		rv;
1673 	uint64_t	val;
1674 	char		*mtu_propname;
1675 
1676 	mtu_propname = vgen_mtu_propname;
1677 
1678 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1679 	if (rv != 0) {
1680 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1681 		*mtu = vnet_ethermtu;
1682 	} else {
1683 
1684 		*mtu = val & 0xFFFF;
1685 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1686 		    vgenp->instance, *mtu);
1687 	}
1688 }
1689 
1690 static void
1691 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
1692     boolean_t *pls)
1693 {
1694 	int		rv;
1695 	uint64_t	val;
1696 	char		*linkpropname;
1697 
1698 	linkpropname = vgen_linkprop_propname;
1699 
1700 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
1701 	if (rv != 0) {
1702 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
1703 		*pls = B_FALSE;
1704 	} else {
1705 
1706 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
1707 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
1708 		    vgenp->instance, *pls);
1709 	}
1710 }
1711 
1712 /* register with MD event generator */
1713 static int
1714 vgen_mdeg_reg(vgen_t *vgenp)
1715 {
1716 	mdeg_prop_spec_t	*pspecp;
1717 	mdeg_node_spec_t	*parentp;
1718 	uint_t			templatesz;
1719 	int			rv;
1720 	mdeg_handle_t		dev_hdl = 0;
1721 	mdeg_handle_t		port_hdl = 0;
1722 
1723 	templatesz = sizeof (vgen_prop_template);
1724 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1725 	if (pspecp == NULL) {
1726 		return (DDI_FAILURE);
1727 	}
1728 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1729 	if (parentp == NULL) {
1730 		kmem_free(pspecp, templatesz);
1731 		return (DDI_FAILURE);
1732 	}
1733 
1734 	bcopy(vgen_prop_template, pspecp, templatesz);
1735 
1736 	/*
1737 	 * NOTE: The instance here refers to the value of "reg" property and
1738 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1739 	 */
1740 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1741 
1742 	parentp->namep = "virtual-device";
1743 	parentp->specp = pspecp;
1744 
1745 	/* save parentp in vgen_t */
1746 	vgenp->mdeg_parentp = parentp;
1747 
1748 	/*
1749 	 * Register an interest in 'virtual-device' nodes with a
1750 	 * 'name' property of 'network'
1751 	 */
1752 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1753 	if (rv != MDEG_SUCCESS) {
1754 		DERR(vgenp, NULL, "mdeg_register failed\n");
1755 		goto mdeg_reg_fail;
1756 	}
1757 
1758 	/* Register an interest in 'port' nodes */
1759 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1760 	    &port_hdl);
1761 	if (rv != MDEG_SUCCESS) {
1762 		DERR(vgenp, NULL, "mdeg_register failed\n");
1763 		goto mdeg_reg_fail;
1764 	}
1765 
1766 	/* save mdeg handle in vgen_t */
1767 	vgenp->mdeg_dev_hdl = dev_hdl;
1768 	vgenp->mdeg_port_hdl = port_hdl;
1769 
1770 	return (DDI_SUCCESS);
1771 
1772 mdeg_reg_fail:
1773 	if (dev_hdl != 0) {
1774 		(void) mdeg_unregister(dev_hdl);
1775 	}
1776 	KMEM_FREE(parentp);
1777 	kmem_free(pspecp, templatesz);
1778 	vgenp->mdeg_parentp = NULL;
1779 	return (DDI_FAILURE);
1780 }
1781 
1782 /* unregister with MD event generator */
1783 static void
1784 vgen_mdeg_unreg(vgen_t *vgenp)
1785 {
1786 	if (vgenp->mdeg_dev_hdl != 0) {
1787 		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1788 		vgenp->mdeg_dev_hdl = 0;
1789 	}
1790 	if (vgenp->mdeg_port_hdl != 0) {
1791 		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
1792 		vgenp->mdeg_port_hdl = 0;
1793 	}
1794 
1795 	if (vgenp->mdeg_parentp != NULL) {
1796 		kmem_free(vgenp->mdeg_parentp->specp,
1797 		    sizeof (vgen_prop_template));
1798 		KMEM_FREE(vgenp->mdeg_parentp);
1799 		vgenp->mdeg_parentp = NULL;
1800 	}
1801 }
1802 
1803 /* mdeg callback function for the port node */
1804 static int
1805 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1806 {
1807 	int		idx;
1808 	int		vsw_idx = -1;
1809 	uint64_t	val;
1810 	vgen_t		*vgenp;
1811 
1812 	if ((resp == NULL) || (cb_argp == NULL)) {
1813 		return (MDEG_FAILURE);
1814 	}
1815 
1816 	vgenp = (vgen_t *)cb_argp;
1817 	DBG1(vgenp, NULL, "enter\n");
1818 
1819 	mutex_enter(&vgenp->lock);
1820 
1821 	DBG1(vgenp, NULL, "ports: removed(%x), "
1822 	"added(%x), updated(%x)\n", resp->removed.nelem,
1823 	    resp->added.nelem, resp->match_curr.nelem);
1824 
1825 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1826 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1827 		    resp->removed.mdep[idx]);
1828 	}
1829 
1830 	if (vgenp->vsw_portp == NULL) {
1831 		/*
1832 		 * find vsw_port and add it first, because other ports need
1833 		 * this when adding fdb entry (see vgen_port_init()).
1834 		 */
1835 		for (idx = 0; idx < resp->added.nelem; idx++) {
1836 			if (!(md_get_prop_val(resp->added.mdp,
1837 			    resp->added.mdep[idx], swport_propname, &val))) {
1838 				if (val == 0) {
1839 					/*
1840 					 * This port is connected to the
1841 					 * vsw on service domain.
1842 					 */
1843 					vsw_idx = idx;
1844 					if (vgen_add_port(vgenp,
1845 					    resp->added.mdp,
1846 					    resp->added.mdep[idx]) !=
1847 					    DDI_SUCCESS) {
1848 						cmn_err(CE_NOTE, "vnet%d Could "
1849 						    "not initialize virtual "
1850 						    "switch port.",
1851 						    vgenp->instance);
1852 						mutex_exit(&vgenp->lock);
1853 						return (MDEG_FAILURE);
1854 					}
1855 					break;
1856 				}
1857 			}
1858 		}
1859 		if (vsw_idx == -1) {
1860 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1861 			mutex_exit(&vgenp->lock);
1862 			return (MDEG_FAILURE);
1863 		}
1864 	}
1865 
1866 	for (idx = 0; idx < resp->added.nelem; idx++) {
1867 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1868 			continue;
1869 
1870 		/* If this port can't be added just skip it. */
1871 		(void) vgen_add_port(vgenp, resp->added.mdp,
1872 		    resp->added.mdep[idx]);
1873 	}
1874 
1875 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1876 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1877 		    resp->match_curr.mdep[idx],
1878 		    resp->match_prev.mdp,
1879 		    resp->match_prev.mdep[idx]);
1880 	}
1881 
1882 	mutex_exit(&vgenp->lock);
1883 	DBG1(vgenp, NULL, "exit\n");
1884 	return (MDEG_SUCCESS);
1885 }
1886 
1887 /* mdeg callback function for the vnet node */
1888 static int
1889 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1890 {
1891 	vgen_t		*vgenp;
1892 	vnet_t		*vnetp;
1893 	md_t		*mdp;
1894 	mde_cookie_t	node;
1895 	uint64_t	inst;
1896 	char		*node_name = NULL;
1897 
1898 	if ((resp == NULL) || (cb_argp == NULL)) {
1899 		return (MDEG_FAILURE);
1900 	}
1901 
1902 	vgenp = (vgen_t *)cb_argp;
1903 	vnetp = vgenp->vnetp;
1904 
1905 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
1906 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
1907 	    resp->match_curr.nelem, resp->match_prev.nelem);
1908 
1909 	mutex_enter(&vgenp->lock);
1910 
1911 	/*
1912 	 * We get an initial callback for this node as 'added' after
1913 	 * registering with mdeg. Note that we would have already gathered
1914 	 * information about this vnet node by walking MD earlier during attach
1915 	 * (in vgen_read_mdprops()). So, there is a window where the properties
1916 	 * of this node might have changed when we get this initial 'added'
1917 	 * callback. We handle this as if an update occured and invoke the same
1918 	 * function which handles updates to the properties of this vnet-node
1919 	 * if any. A non-zero 'match' value indicates that the MD has been
1920 	 * updated and that a 'network' node is present which may or may not
1921 	 * have been updated. It is up to the clients to examine their own
1922 	 * nodes and determine if they have changed.
1923 	 */
1924 	if (resp->added.nelem != 0) {
1925 
1926 		if (resp->added.nelem != 1) {
1927 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
1928 			    "invalid: %d\n", vnetp->instance,
1929 			    resp->added.nelem);
1930 			goto vgen_mdeg_cb_err;
1931 		}
1932 
1933 		mdp = resp->added.mdp;
1934 		node = resp->added.mdep[0];
1935 
1936 	} else if (resp->match_curr.nelem != 0) {
1937 
1938 		if (resp->match_curr.nelem != 1) {
1939 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
1940 			    "invalid: %d\n", vnetp->instance,
1941 			    resp->match_curr.nelem);
1942 			goto vgen_mdeg_cb_err;
1943 		}
1944 
1945 		mdp = resp->match_curr.mdp;
1946 		node = resp->match_curr.mdep[0];
1947 
1948 	} else {
1949 		goto vgen_mdeg_cb_err;
1950 	}
1951 
1952 	/* Validate name and instance */
1953 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1954 		DERR(vgenp, NULL, "unable to get node name\n");
1955 		goto vgen_mdeg_cb_err;
1956 	}
1957 
1958 	/* is this a virtual-network device? */
1959 	if (strcmp(node_name, vnet_propname) != 0) {
1960 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
1961 		goto vgen_mdeg_cb_err;
1962 	}
1963 
1964 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1965 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
1966 		goto vgen_mdeg_cb_err;
1967 	}
1968 
1969 	/* is this the right instance of vnet? */
1970 	if (inst != vgenp->regprop) {
1971 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
1972 		goto vgen_mdeg_cb_err;
1973 	}
1974 
1975 	vgen_update_md_prop(vgenp, mdp, node);
1976 
1977 	mutex_exit(&vgenp->lock);
1978 	return (MDEG_SUCCESS);
1979 
1980 vgen_mdeg_cb_err:
1981 	mutex_exit(&vgenp->lock);
1982 	return (MDEG_FAILURE);
1983 }
1984 
1985 /*
1986  * Check to see if the relevant properties in the specified node have
1987  * changed, and if so take the appropriate action.
1988  */
1989 static void
1990 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1991 {
1992 	uint16_t	pvid;
1993 	uint16_t	*vids;
1994 	uint16_t	nvids;
1995 	vnet_t		*vnetp = vgenp->vnetp;
1996 	uint32_t	mtu;
1997 	boolean_t	pls_update;
1998 	enum		{ MD_init = 0x1,
1999 			    MD_vlans = 0x2,
2000 			    MD_mtu = 0x4,
2001 			    MD_pls = 0x8 } updated;
2002 	int		rv;
2003 
2004 	updated = MD_init;
2005 
2006 	/* Read the vlan ids */
2007 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2008 	    &nvids, NULL);
2009 
2010 	/* Determine if there are any vlan id updates */
2011 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2012 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2013 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2014 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2015 		updated |= MD_vlans;
2016 	}
2017 
2018 	/* Read mtu */
2019 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2020 	if (mtu != vnetp->mtu) {
2021 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2022 			updated |= MD_mtu;
2023 		} else {
2024 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2025 			    " as the specified value:%d is invalid\n",
2026 			    vnetp->instance, mtu);
2027 		}
2028 	}
2029 
2030 	/*
2031 	 * Read the 'linkprop' property.
2032 	 */
2033 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2034 	if (pls_update != vnetp->pls_update) {
2035 		updated |= MD_pls;
2036 	}
2037 
2038 	/* Now process the updated props */
2039 
2040 	if (updated & MD_vlans) {
2041 
2042 		/* save the new vlan ids */
2043 		vnetp->pvid = pvid;
2044 		if (vnetp->nvids != 0) {
2045 			kmem_free(vnetp->vids,
2046 			    sizeof (uint16_t) * vnetp->nvids);
2047 			vnetp->nvids = 0;
2048 		}
2049 		if (nvids != 0) {
2050 			vnetp->nvids = nvids;
2051 			vnetp->vids = vids;
2052 		}
2053 
2054 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2055 		vgen_reset_vlan_unaware_ports(vgenp);
2056 
2057 	} else {
2058 
2059 		if (nvids != 0) {
2060 			kmem_free(vids, sizeof (uint16_t) * nvids);
2061 		}
2062 	}
2063 
2064 	if (updated & MD_mtu) {
2065 
2066 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2067 		    vnetp->mtu, mtu);
2068 
2069 		rv = vnet_mtu_update(vnetp, mtu);
2070 		if (rv == 0) {
2071 			vgenp->max_frame_size = mtu +
2072 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2073 		}
2074 	}
2075 
2076 	if (updated & MD_pls) {
2077 		/* enable/disable physical link state updates */
2078 		vnetp->pls_update = pls_update;
2079 		mutex_exit(&vgenp->lock);
2080 
2081 		/* reset vsw-port to re-negotiate with the updated prop. */
2082 		vgen_reset_vsw_port(vgenp);
2083 
2084 		mutex_enter(&vgenp->lock);
2085 	}
2086 }
2087 
2088 /* add a new port to the device */
2089 static int
2090 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2091 {
2092 	vgen_port_t	*portp;
2093 	int		rv;
2094 
2095 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2096 
2097 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2098 	if (rv != DDI_SUCCESS) {
2099 		KMEM_FREE(portp);
2100 		return (DDI_FAILURE);
2101 	}
2102 
2103 	rv = vgen_port_attach(portp);
2104 	if (rv != DDI_SUCCESS) {
2105 		return (DDI_FAILURE);
2106 	}
2107 
2108 	return (DDI_SUCCESS);
2109 }
2110 
2111 /* read properties of the port from its md node */
2112 static int
2113 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2114     mde_cookie_t mdex)
2115 {
2116 	uint64_t		port_num;
2117 	uint64_t		*ldc_ids;
2118 	uint64_t		macaddr;
2119 	uint64_t		val;
2120 	int			num_ldcs;
2121 	int			i;
2122 	int			addrsz;
2123 	int			num_nodes = 0;
2124 	int			listsz = 0;
2125 	mde_cookie_t		*listp = NULL;
2126 	uint8_t			*addrp;
2127 	struct ether_addr	ea;
2128 
2129 	/* read "id" property to get the port number */
2130 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2131 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2132 		return (DDI_FAILURE);
2133 	}
2134 
2135 	/*
2136 	 * Find the channel endpoint node(s) under this port node.
2137 	 */
2138 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2139 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2140 		    num_nodes);
2141 		return (DDI_FAILURE);
2142 	}
2143 
2144 	/* allocate space for node list */
2145 	listsz = num_nodes * sizeof (mde_cookie_t);
2146 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2147 	if (listp == NULL)
2148 		return (DDI_FAILURE);
2149 
2150 	num_ldcs = md_scan_dag(mdp, mdex,
2151 	    md_find_name(mdp, channel_propname),
2152 	    md_find_name(mdp, "fwd"), listp);
2153 
2154 	if (num_ldcs <= 0) {
2155 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2156 		kmem_free(listp, listsz);
2157 		return (DDI_FAILURE);
2158 	}
2159 
2160 	if (num_ldcs > 1) {
2161 		DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n",
2162 		    port_num, num_ldcs);
2163 	}
2164 
2165 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2166 	if (ldc_ids == NULL) {
2167 		kmem_free(listp, listsz);
2168 		return (DDI_FAILURE);
2169 	}
2170 
2171 	for (i = 0; i < num_ldcs; i++) {
2172 		/* read channel ids */
2173 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2174 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2175 			    id_propname);
2176 			kmem_free(listp, listsz);
2177 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2178 			return (DDI_FAILURE);
2179 		}
2180 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2181 	}
2182 
2183 	kmem_free(listp, listsz);
2184 
2185 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2186 	    &addrsz)) {
2187 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2188 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2189 		return (DDI_FAILURE);
2190 	}
2191 
2192 	if (addrsz < ETHERADDRL) {
2193 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2194 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2195 		return (DDI_FAILURE);
2196 	}
2197 
2198 	macaddr = *((uint64_t *)addrp);
2199 
2200 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2201 
2202 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2203 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2204 		macaddr >>= 8;
2205 	}
2206 
2207 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2208 		if (val == 0) {
2209 			/* This port is connected to the vswitch */
2210 			portp->is_vsw_port = B_TRUE;
2211 		} else {
2212 			portp->is_vsw_port = B_FALSE;
2213 		}
2214 	}
2215 
2216 	/* now update all properties into the port */
2217 	portp->vgenp = vgenp;
2218 	portp->port_num = port_num;
2219 	ether_copy(&ea, &portp->macaddr);
2220 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2221 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2222 	portp->num_ldcs = num_ldcs;
2223 
2224 	/* read vlan id properties of this port node */
2225 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2226 	    &portp->vids, &portp->nvids, NULL);
2227 
2228 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2229 
2230 	return (DDI_SUCCESS);
2231 }
2232 
2233 /* remove a port from the device */
2234 static int
2235 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2236 {
2237 	uint64_t	port_num;
2238 	vgen_port_t	*portp;
2239 	vgen_portlist_t	*plistp;
2240 
2241 	/* read "id" property to get the port number */
2242 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2243 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2244 		return (DDI_FAILURE);
2245 	}
2246 
2247 	plistp = &(vgenp->vgenports);
2248 
2249 	WRITE_ENTER(&plistp->rwlock);
2250 	portp = vgen_port_lookup(plistp, (int)port_num);
2251 	if (portp == NULL) {
2252 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2253 		RW_EXIT(&plistp->rwlock);
2254 		return (DDI_FAILURE);
2255 	}
2256 
2257 	vgen_port_detach_mdeg(portp);
2258 	RW_EXIT(&plistp->rwlock);
2259 
2260 	return (DDI_SUCCESS);
2261 }
2262 
2263 /* attach a port to the device based on mdeg data */
2264 static int
2265 vgen_port_attach(vgen_port_t *portp)
2266 {
2267 	vgen_portlist_t		*plistp;
2268 	vgen_t			*vgenp;
2269 	uint64_t		*ldcids;
2270 	mac_register_t		*macp;
2271 	vio_net_res_type_t	type;
2272 	int			rv;
2273 
2274 	ASSERT(portp != NULL);
2275 	vgenp = portp->vgenp;
2276 	ldcids = portp->ldc_ids;
2277 
2278 	DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n",
2279 	    portp->port_num, ldcids[0]);
2280 
2281 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2282 
2283 	/*
2284 	 * attach the channel under the port using its channel id;
2285 	 * note that we only support one channel per port for now.
2286 	 */
2287 	if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) {
2288 		vgen_port_detach(portp);
2289 		return (DDI_FAILURE);
2290 	}
2291 
2292 	/* create vlan id hash table */
2293 	vgen_vlan_create_hash(portp);
2294 
2295 	if (portp->is_vsw_port == B_TRUE) {
2296 		/* This port is connected to the switch port */
2297 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2298 		type = VIO_NET_RES_LDC_SERVICE;
2299 	} else {
2300 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2301 		type = VIO_NET_RES_LDC_GUEST;
2302 	}
2303 
2304 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2305 		vgen_port_detach(portp);
2306 		return (DDI_FAILURE);
2307 	}
2308 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2309 	macp->m_driver = portp;
2310 	macp->m_dip = vgenp->vnetdip;
2311 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2312 	macp->m_callbacks = &vgen_m_callbacks;
2313 	macp->m_min_sdu = 0;
2314 	macp->m_max_sdu = ETHERMTU;
2315 
2316 	mutex_enter(&portp->lock);
2317 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2318 	    portp->macaddr, &portp->vhp, &portp->vcb);
2319 	mutex_exit(&portp->lock);
2320 	mac_free(macp);
2321 
2322 	if (rv == 0) {
2323 		/* link it into the list of ports */
2324 		plistp = &(vgenp->vgenports);
2325 		WRITE_ENTER(&plistp->rwlock);
2326 		vgen_port_list_insert(portp);
2327 		RW_EXIT(&plistp->rwlock);
2328 
2329 		if (portp->is_vsw_port == B_TRUE) {
2330 			/* We now have the vswitch port attached */
2331 			vgenp->vsw_portp = portp;
2332 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2333 		}
2334 	} else {
2335 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2336 		    portp);
2337 		vgen_port_detach(portp);
2338 	}
2339 
2340 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2341 	return (DDI_SUCCESS);
2342 }
2343 
2344 /* detach a port from the device based on mdeg data */
2345 static void
2346 vgen_port_detach_mdeg(vgen_port_t *portp)
2347 {
2348 	vgen_t *vgenp = portp->vgenp;
2349 
2350 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2351 
2352 	mutex_enter(&portp->lock);
2353 
2354 	/* stop the port if needed */
2355 	if (portp->flags & VGEN_STARTED) {
2356 		vgen_port_uninit(portp);
2357 		portp->flags &= ~(VGEN_STARTED);
2358 	}
2359 
2360 	mutex_exit(&portp->lock);
2361 	vgen_port_detach(portp);
2362 
2363 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2364 }
2365 
2366 static int
2367 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2368     md_t *prev_mdp, mde_cookie_t prev_mdex)
2369 {
2370 	uint64_t	cport_num;
2371 	uint64_t	pport_num;
2372 	vgen_portlist_t	*plistp;
2373 	vgen_port_t	*portp;
2374 	boolean_t	updated_vlans = B_FALSE;
2375 	uint16_t	pvid;
2376 	uint16_t	*vids;
2377 	uint16_t	nvids;
2378 
2379 	/*
2380 	 * For now, we get port updates only if vlan ids changed.
2381 	 * We read the port num and do some sanity check.
2382 	 */
2383 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2384 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2385 		return (DDI_FAILURE);
2386 	}
2387 
2388 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2389 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2390 		return (DDI_FAILURE);
2391 	}
2392 	if (cport_num != pport_num)
2393 		return (DDI_FAILURE);
2394 
2395 	plistp = &(vgenp->vgenports);
2396 
2397 	READ_ENTER(&plistp->rwlock);
2398 
2399 	portp = vgen_port_lookup(plistp, (int)cport_num);
2400 	if (portp == NULL) {
2401 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2402 		RW_EXIT(&plistp->rwlock);
2403 		return (DDI_FAILURE);
2404 	}
2405 
2406 	/* Read the vlan ids */
2407 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2408 	    &nvids, NULL);
2409 
2410 	/* Determine if there are any vlan id updates */
2411 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2412 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2413 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2414 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2415 		updated_vlans = B_TRUE;
2416 	}
2417 
2418 	if (updated_vlans == B_FALSE) {
2419 		RW_EXIT(&plistp->rwlock);
2420 		return (DDI_FAILURE);
2421 	}
2422 
2423 	/* remove the port from vlans it has been assigned to */
2424 	vgen_vlan_remove_ids(portp);
2425 
2426 	/* save the new vlan ids */
2427 	portp->pvid = pvid;
2428 	if (portp->nvids != 0) {
2429 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2430 		portp->nvids = 0;
2431 	}
2432 	if (nvids != 0) {
2433 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2434 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2435 		portp->nvids = nvids;
2436 		kmem_free(vids, sizeof (uint16_t) * nvids);
2437 	}
2438 
2439 	/* add port to the new vlans */
2440 	vgen_vlan_add_ids(portp);
2441 
2442 	/* reset the port if it is vlan unaware (ver < 1.3) */
2443 	vgen_vlan_unaware_port_reset(portp);
2444 
2445 	RW_EXIT(&plistp->rwlock);
2446 
2447 	return (DDI_SUCCESS);
2448 }
2449 
2450 static uint64_t
2451 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2452 {
2453 	return (vgen_ldc_stat(portp->ldcp, stat));
2454 }
2455 
2456 /* attach the channel corresponding to the given ldc_id to the port */
2457 static int
2458 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2459 {
2460 	vgen_t		*vgenp;
2461 	vgen_ldc_t	*ldcp;
2462 	ldc_attr_t	attr;
2463 	int		status;
2464 	ldc_status_t	istatus;
2465 	char		kname[MAXNAMELEN];
2466 	int		instance;
2467 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2468 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2469 		AST_ldc_reg_cb = 0x8 } attach_state;
2470 
2471 	attach_state = AST_init;
2472 	vgenp = portp->vgenp;
2473 
2474 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2475 	if (ldcp == NULL) {
2476 		goto ldc_attach_failed;
2477 	}
2478 	ldcp->ldc_id = ldc_id;
2479 	ldcp->portp = portp;
2480 
2481 	attach_state |= AST_ldc_alloc;
2482 
2483 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2484 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2485 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2486 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2487 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2488 	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2489 	mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL);
2490 	cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL);
2491 
2492 	attach_state |= AST_mutex_init;
2493 
2494 	attr.devclass = LDC_DEV_NT;
2495 	attr.instance = vgenp->instance;
2496 	attr.mode = LDC_MODE_UNRELIABLE;
2497 	attr.mtu = vgen_ldc_mtu;
2498 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2499 	if (status != 0) {
2500 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2501 		goto ldc_attach_failed;
2502 	}
2503 	attach_state |= AST_ldc_init;
2504 
2505 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2506 	if (status != 0) {
2507 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2508 		    status);
2509 		goto ldc_attach_failed;
2510 	}
2511 	/*
2512 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2513 	 * data msgs, including raw data msgs used to recv priority frames.
2514 	 */
2515 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2516 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2517 	attach_state |= AST_ldc_reg_cb;
2518 
2519 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2520 	ASSERT(istatus == LDC_INIT);
2521 	ldcp->ldc_status = istatus;
2522 
2523 	/* Setup kstats for the channel */
2524 	instance = vgenp->instance;
2525 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2526 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2527 	if (ldcp->ksp == NULL) {
2528 		goto ldc_attach_failed;
2529 	}
2530 
2531 	/* initialize vgen_versions supported */
2532 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2533 	vgen_reset_vnet_proto_ops(ldcp);
2534 
2535 	/* Link this channel to the port */
2536 	portp->ldcp = ldcp;
2537 
2538 	ldcp->link_state = LINK_STATE_UNKNOWN;
2539 #ifdef	VNET_IOC_DEBUG
2540 	ldcp->link_down_forced = B_FALSE;
2541 #endif
2542 	ldcp->flags |= CHANNEL_ATTACHED;
2543 	return (DDI_SUCCESS);
2544 
2545 ldc_attach_failed:
2546 	if (attach_state & AST_ldc_reg_cb) {
2547 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2548 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2549 	}
2550 
2551 	if (attach_state & AST_ldc_init) {
2552 		(void) ldc_fini(ldcp->ldc_handle);
2553 	}
2554 	if (attach_state & AST_mutex_init) {
2555 		mutex_destroy(&ldcp->tclock);
2556 		mutex_destroy(&ldcp->txlock);
2557 		mutex_destroy(&ldcp->cblock);
2558 		mutex_destroy(&ldcp->wrlock);
2559 		mutex_destroy(&ldcp->rxlock);
2560 		mutex_destroy(&ldcp->pollq_lock);
2561 	}
2562 	if (attach_state & AST_ldc_alloc) {
2563 		KMEM_FREE(ldcp);
2564 	}
2565 	return (DDI_FAILURE);
2566 }
2567 
2568 /* detach a channel from the port */
2569 static void
2570 vgen_ldc_detach(vgen_ldc_t *ldcp)
2571 {
2572 	vgen_port_t	*portp;
2573 	vgen_t		*vgenp;
2574 
2575 	ASSERT(ldcp != NULL);
2576 
2577 	portp = ldcp->portp;
2578 	vgenp = portp->vgenp;
2579 
2580 	if (ldcp->ldc_status != LDC_INIT) {
2581 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2582 	}
2583 
2584 	if (ldcp->flags & CHANNEL_ATTACHED) {
2585 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2586 
2587 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2588 		(void) ldc_fini(ldcp->ldc_handle);
2589 
2590 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2591 		vgen_destroy_kstats(ldcp->ksp);
2592 		ldcp->ksp = NULL;
2593 		mutex_destroy(&ldcp->tclock);
2594 		mutex_destroy(&ldcp->txlock);
2595 		mutex_destroy(&ldcp->cblock);
2596 		mutex_destroy(&ldcp->wrlock);
2597 		mutex_destroy(&ldcp->rxlock);
2598 		mutex_destroy(&ldcp->pollq_lock);
2599 		mutex_destroy(&ldcp->msg_thr_lock);
2600 		cv_destroy(&ldcp->msg_thr_cv);
2601 
2602 		KMEM_FREE(ldcp);
2603 	}
2604 }
2605 
2606 /* enable transmit/receive on the channel */
2607 static int
2608 vgen_ldc_init(vgen_ldc_t *ldcp)
2609 {
2610 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2611 	ldc_status_t	istatus;
2612 	int		rv;
2613 	enum		{ ST_init = 0x0, ST_ldc_open = 0x1,
2614 			    ST_cb_enable = 0x2} init_state;
2615 	int		flag = 0;
2616 
2617 	init_state = ST_init;
2618 
2619 	DBG1(vgenp, ldcp, "enter\n");
2620 	LDC_LOCK(ldcp);
2621 
2622 	rv = ldc_open(ldcp->ldc_handle);
2623 	if (rv != 0) {
2624 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2625 		goto ldcinit_failed;
2626 	}
2627 	init_state |= ST_ldc_open;
2628 
2629 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2630 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
2631 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2632 		goto ldcinit_failed;
2633 	}
2634 	ldcp->ldc_status = istatus;
2635 
2636 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2637 	if (rv != 0) {
2638 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2639 		goto ldcinit_failed;
2640 	}
2641 
2642 	init_state |= ST_cb_enable;
2643 
2644 	vgen_ldc_up(ldcp);
2645 
2646 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2647 	if (istatus == LDC_UP) {
2648 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2649 	}
2650 
2651 	ldcp->ldc_status = istatus;
2652 
2653 	ldcp->hphase = VH_PHASE0;
2654 	ldcp->hstate = 0;
2655 	ldcp->flags |= CHANNEL_STARTED;
2656 
2657 	vgen_setup_handshake_params(ldcp);
2658 
2659 	/* if channel is already UP - start handshake */
2660 	if (istatus == LDC_UP) {
2661 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2662 		if (ldcp->portp != vgenp->vsw_portp) {
2663 			/*
2664 			 * As the channel is up, use this port from now on.
2665 			 */
2666 			(void) atomic_swap_32(
2667 			    &ldcp->portp->use_vsw_port, B_FALSE);
2668 		}
2669 
2670 		/* Initialize local session id */
2671 		ldcp->local_sid = ddi_get_lbolt();
2672 
2673 		/* clear peer session id */
2674 		ldcp->peer_sid = 0;
2675 
2676 		mutex_exit(&ldcp->tclock);
2677 		mutex_exit(&ldcp->txlock);
2678 		mutex_exit(&ldcp->wrlock);
2679 		mutex_exit(&ldcp->rxlock);
2680 		rv = vgen_handshake(vh_nextphase(ldcp));
2681 		mutex_exit(&ldcp->cblock);
2682 		if (rv != 0) {
2683 			flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET :
2684 			    VGEN_FLAG_NEED_LDCRESET;
2685 			(void) vgen_process_reset(ldcp, flag);
2686 		}
2687 	} else {
2688 		LDC_UNLOCK(ldcp);
2689 	}
2690 
2691 	return (DDI_SUCCESS);
2692 
2693 ldcinit_failed:
2694 	if (init_state & ST_cb_enable) {
2695 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2696 	}
2697 	if (init_state & ST_ldc_open) {
2698 		(void) ldc_close(ldcp->ldc_handle);
2699 	}
2700 	LDC_UNLOCK(ldcp);
2701 	DBG1(vgenp, ldcp, "exit\n");
2702 	return (DDI_FAILURE);
2703 }
2704 
2705 /* stop transmit/receive on the channel */
2706 static void
2707 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2708 {
2709 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2710 
2711 	DBG1(vgenp, ldcp, "enter\n");
2712 
2713 	LDC_LOCK(ldcp);
2714 
2715 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2716 		LDC_UNLOCK(ldcp);
2717 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2718 		return;
2719 	}
2720 
2721 	LDC_UNLOCK(ldcp);
2722 
2723 	while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2724 		delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY));
2725 	}
2726 
2727 	(void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT);
2728 
2729 	DBG1(vgenp, ldcp, "exit\n");
2730 }
2731 
2732 /*
2733  * Create a descriptor ring, that will be exported to the peer for mapping.
2734  */
2735 static int
2736 vgen_create_dring(vgen_ldc_t *ldcp)
2737 {
2738 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2739 	int		rv;
2740 
2741 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2742 		rv = vgen_create_rx_dring(ldcp);
2743 	} else {
2744 		rv = vgen_create_tx_dring(ldcp);
2745 	}
2746 
2747 	return (rv);
2748 }
2749 
2750 /*
2751  * Destroy the descriptor ring.
2752  */
2753 static void
2754 vgen_destroy_dring(vgen_ldc_t *ldcp)
2755 {
2756 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2757 
2758 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2759 		vgen_destroy_rx_dring(ldcp);
2760 	} else {
2761 		vgen_destroy_tx_dring(ldcp);
2762 	}
2763 }
2764 
2765 /*
2766  * Map the descriptor ring exported by the peer.
2767  */
2768 static int
2769 vgen_map_dring(vgen_ldc_t *ldcp, void *pkt)
2770 {
2771 	int		rv;
2772 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2773 
2774 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2775 		/*
2776 		 * In RxDringData mode, dring that we map in
2777 		 * becomes our transmit descriptor ring.
2778 		 */
2779 		rv = vgen_map_tx_dring(ldcp, pkt);
2780 	} else {
2781 
2782 		/*
2783 		 * In TxDring mode, dring that we map in
2784 		 * becomes our receive descriptor ring.
2785 		 */
2786 		rv = vgen_map_rx_dring(ldcp, pkt);
2787 	}
2788 
2789 	return (rv);
2790 }
2791 
2792 /*
2793  * Unmap the descriptor ring exported by the peer.
2794  */
2795 static void
2796 vgen_unmap_dring(vgen_ldc_t *ldcp)
2797 {
2798 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2799 
2800 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2801 		vgen_unmap_tx_dring(ldcp);
2802 	} else {
2803 		vgen_unmap_rx_dring(ldcp);
2804 	}
2805 }
2806 
2807 void
2808 vgen_destroy_rxpools(void *arg)
2809 {
2810 	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
2811 	vio_mblk_pool_t	*npoolp;
2812 
2813 	while (poolp != NULL) {
2814 		npoolp =  poolp->nextp;
2815 		while (vio_destroy_mblks(poolp) != 0) {
2816 			delay(drv_usectohz(vgen_rxpool_cleanup_delay));
2817 		}
2818 		poolp = npoolp;
2819 	}
2820 }
2821 
2822 /* get channel statistics */
2823 static uint64_t
2824 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2825 {
2826 	vgen_stats_t	*statsp;
2827 	uint64_t	val;
2828 
2829 	val = 0;
2830 	statsp = &ldcp->stats;
2831 	switch (stat) {
2832 
2833 	case MAC_STAT_MULTIRCV:
2834 		val = statsp->multircv;
2835 		break;
2836 
2837 	case MAC_STAT_BRDCSTRCV:
2838 		val = statsp->brdcstrcv;
2839 		break;
2840 
2841 	case MAC_STAT_MULTIXMT:
2842 		val = statsp->multixmt;
2843 		break;
2844 
2845 	case MAC_STAT_BRDCSTXMT:
2846 		val = statsp->brdcstxmt;
2847 		break;
2848 
2849 	case MAC_STAT_NORCVBUF:
2850 		val = statsp->norcvbuf;
2851 		break;
2852 
2853 	case MAC_STAT_IERRORS:
2854 		val = statsp->ierrors;
2855 		break;
2856 
2857 	case MAC_STAT_NOXMTBUF:
2858 		val = statsp->noxmtbuf;
2859 		break;
2860 
2861 	case MAC_STAT_OERRORS:
2862 		val = statsp->oerrors;
2863 		break;
2864 
2865 	case MAC_STAT_COLLISIONS:
2866 		break;
2867 
2868 	case MAC_STAT_RBYTES:
2869 		val = statsp->rbytes;
2870 		break;
2871 
2872 	case MAC_STAT_IPACKETS:
2873 		val = statsp->ipackets;
2874 		break;
2875 
2876 	case MAC_STAT_OBYTES:
2877 		val = statsp->obytes;
2878 		break;
2879 
2880 	case MAC_STAT_OPACKETS:
2881 		val = statsp->opackets;
2882 		break;
2883 
2884 	/* stats not relevant to ldc, return 0 */
2885 	case MAC_STAT_IFSPEED:
2886 	case ETHER_STAT_ALIGN_ERRORS:
2887 	case ETHER_STAT_FCS_ERRORS:
2888 	case ETHER_STAT_FIRST_COLLISIONS:
2889 	case ETHER_STAT_MULTI_COLLISIONS:
2890 	case ETHER_STAT_DEFER_XMTS:
2891 	case ETHER_STAT_TX_LATE_COLLISIONS:
2892 	case ETHER_STAT_EX_COLLISIONS:
2893 	case ETHER_STAT_MACXMT_ERRORS:
2894 	case ETHER_STAT_CARRIER_ERRORS:
2895 	case ETHER_STAT_TOOLONG_ERRORS:
2896 	case ETHER_STAT_XCVR_ADDR:
2897 	case ETHER_STAT_XCVR_ID:
2898 	case ETHER_STAT_XCVR_INUSE:
2899 	case ETHER_STAT_CAP_1000FDX:
2900 	case ETHER_STAT_CAP_1000HDX:
2901 	case ETHER_STAT_CAP_100FDX:
2902 	case ETHER_STAT_CAP_100HDX:
2903 	case ETHER_STAT_CAP_10FDX:
2904 	case ETHER_STAT_CAP_10HDX:
2905 	case ETHER_STAT_CAP_ASMPAUSE:
2906 	case ETHER_STAT_CAP_PAUSE:
2907 	case ETHER_STAT_CAP_AUTONEG:
2908 	case ETHER_STAT_ADV_CAP_1000FDX:
2909 	case ETHER_STAT_ADV_CAP_1000HDX:
2910 	case ETHER_STAT_ADV_CAP_100FDX:
2911 	case ETHER_STAT_ADV_CAP_100HDX:
2912 	case ETHER_STAT_ADV_CAP_10FDX:
2913 	case ETHER_STAT_ADV_CAP_10HDX:
2914 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2915 	case ETHER_STAT_ADV_CAP_PAUSE:
2916 	case ETHER_STAT_ADV_CAP_AUTONEG:
2917 	case ETHER_STAT_LP_CAP_1000FDX:
2918 	case ETHER_STAT_LP_CAP_1000HDX:
2919 	case ETHER_STAT_LP_CAP_100FDX:
2920 	case ETHER_STAT_LP_CAP_100HDX:
2921 	case ETHER_STAT_LP_CAP_10FDX:
2922 	case ETHER_STAT_LP_CAP_10HDX:
2923 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2924 	case ETHER_STAT_LP_CAP_PAUSE:
2925 	case ETHER_STAT_LP_CAP_AUTONEG:
2926 	case ETHER_STAT_LINK_ASMPAUSE:
2927 	case ETHER_STAT_LINK_PAUSE:
2928 	case ETHER_STAT_LINK_AUTONEG:
2929 	case ETHER_STAT_LINK_DUPLEX:
2930 	default:
2931 		val = 0;
2932 		break;
2933 
2934 	}
2935 	return (val);
2936 }
2937 
2938 /*
2939  * LDC channel is UP, start handshake process with peer.
2940  */
2941 static void
2942 vgen_handle_evt_up(vgen_ldc_t *ldcp)
2943 {
2944 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2945 
2946 	DBG1(vgenp, ldcp, "enter\n");
2947 
2948 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2949 
2950 	if (ldcp->portp != vgenp->vsw_portp) {
2951 		/*
2952 		 * As the channel is up, use this port from now on.
2953 		 */
2954 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
2955 	}
2956 
2957 	/* Initialize local session id */
2958 	ldcp->local_sid = ddi_get_lbolt();
2959 
2960 	/* clear peer session id */
2961 	ldcp->peer_sid = 0;
2962 
2963 	/* Initiate Handshake process with peer ldc endpoint */
2964 	(void) vgen_handshake(vh_nextphase(ldcp));
2965 
2966 	DBG1(vgenp, ldcp, "exit\n");
2967 }
2968 
2969 /*
2970  * LDC channel is Reset, terminate connection with peer and try to
2971  * bring the channel up again.
2972  */
2973 int
2974 vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
2975 {
2976 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2977 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2978 	}
2979 
2980 	/* Set the flag to indicate reset is in progress */
2981 	if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2982 		/* another thread is already in the process of resetting */
2983 		return (EBUSY);
2984 	}
2985 
2986 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2987 		mutex_exit(&ldcp->cblock);
2988 	}
2989 
2990 	(void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET);
2991 
2992 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2993 		mutex_enter(&ldcp->cblock);
2994 	}
2995 
2996 	return (0);
2997 }
2998 
2999 /* Interrupt handler for the channel */
3000 static uint_t
3001 vgen_ldc_cb(uint64_t event, caddr_t arg)
3002 {
3003 	_NOTE(ARGUNUSED(event))
3004 	vgen_ldc_t	*ldcp;
3005 	vgen_t		*vgenp;
3006 	ldc_status_t	istatus;
3007 	vgen_stats_t	*statsp;
3008 	uint_t		ret = LDC_SUCCESS;
3009 
3010 	ldcp = (vgen_ldc_t *)arg;
3011 	vgenp = LDC_TO_VGEN(ldcp);
3012 	statsp = &ldcp->stats;
3013 
3014 	DBG1(vgenp, ldcp, "enter\n");
3015 
3016 	mutex_enter(&ldcp->cblock);
3017 	statsp->callbacks++;
3018 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == 0)) {
3019 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3020 		    ldcp->ldc_status);
3021 		mutex_exit(&ldcp->cblock);
3022 		return (LDC_SUCCESS);
3023 	}
3024 
3025 	/*
3026 	 * NOTE: not using switch() as event could be triggered by
3027 	 * a state change and a read request. Also the ordering	of the
3028 	 * check for the event types is deliberate.
3029 	 */
3030 	if (event & LDC_EVT_UP) {
3031 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3032 			DWARN(vgenp, ldcp, "ldc_status err\n");
3033 			/* status couldn't be determined */
3034 			ret = LDC_FAILURE;
3035 			goto ldc_cb_ret;
3036 		}
3037 		ldcp->ldc_status = istatus;
3038 		if (ldcp->ldc_status != LDC_UP) {
3039 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3040 			    " but ldc status is not UP(0x%x)\n",
3041 			    ldcp->ldc_status);
3042 			/* spurious interrupt, return success */
3043 			goto ldc_cb_ret;
3044 		}
3045 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3046 		    event, ldcp->ldc_status);
3047 
3048 		vgen_handle_evt_up(ldcp);
3049 
3050 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3051 	}
3052 
3053 	/* Handle RESET/DOWN before READ event */
3054 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3055 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3056 			DWARN(vgenp, ldcp, "ldc_status error\n");
3057 			/* status couldn't be determined */
3058 			ret = LDC_FAILURE;
3059 			goto ldc_cb_ret;
3060 		}
3061 		ldcp->ldc_status = istatus;
3062 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3063 		    event, ldcp->ldc_status);
3064 
3065 		(void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB);
3066 
3067 		/*
3068 		 * As the channel is down/reset, ignore READ event
3069 		 * but print a debug warning message.
3070 		 */
3071 		if (event & LDC_EVT_READ) {
3072 			DWARN(vgenp, ldcp,
3073 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3074 			event &= ~LDC_EVT_READ;
3075 		}
3076 	}
3077 
3078 	if (event & LDC_EVT_READ) {
3079 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3080 		    event, ldcp->ldc_status);
3081 
3082 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3083 
3084 		if (ldcp->msg_thread != NULL) {
3085 			/*
3086 			 * If the receive thread is enabled, then
3087 			 * wakeup the receive thread to process the
3088 			 * LDC messages.
3089 			 */
3090 			mutex_exit(&ldcp->cblock);
3091 			mutex_enter(&ldcp->msg_thr_lock);
3092 			if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) {
3093 				ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD;
3094 				cv_signal(&ldcp->msg_thr_cv);
3095 			}
3096 			mutex_exit(&ldcp->msg_thr_lock);
3097 			mutex_enter(&ldcp->cblock);
3098 		} else  {
3099 			(void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB);
3100 		}
3101 	}
3102 
3103 ldc_cb_ret:
3104 	mutex_exit(&ldcp->cblock);
3105 	DBG1(vgenp, ldcp, "exit\n");
3106 	return (ret);
3107 }
3108 
3109 int
3110 vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller)
3111 {
3112 	int		rv;
3113 	uint64_t	*ldcmsg;
3114 	size_t		msglen;
3115 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3116 	vio_msg_tag_t	*tagp;
3117 	ldc_status_t	istatus;
3118 	boolean_t	has_data;
3119 
3120 	DBG1(vgenp, ldcp, "enter\n");
3121 
3122 	if (caller == VGEN_LDC_CB) {
3123 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3124 	} else if (caller == VGEN_MSG_THR) {
3125 		mutex_enter(&ldcp->cblock);
3126 	} else {
3127 		return (EINVAL);
3128 	}
3129 
3130 	ldcmsg = ldcp->ldcmsg;
3131 
3132 vgen_evtread:
3133 	do {
3134 		msglen = ldcp->msglen;
3135 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3136 
3137 		if (rv != 0) {
3138 			DWARN(vgenp, ldcp, "ldc_read() failed "
3139 			    "rv(%d) len(%d)\n", rv, msglen);
3140 			if (rv == ECONNRESET)
3141 				goto vgen_evtread_error;
3142 			break;
3143 		}
3144 		if (msglen == 0) {
3145 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3146 			break;
3147 		}
3148 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3149 
3150 		tagp = (vio_msg_tag_t *)ldcmsg;
3151 
3152 		if (ldcp->peer_sid) {
3153 			/*
3154 			 * check sid only after we have received peer's sid
3155 			 * in the version negotiate msg.
3156 			 */
3157 #ifdef DEBUG
3158 			if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) {
3159 				/* simulate bad sid condition */
3160 				tagp->vio_sid = 0;
3161 				vgen_inject_err_flag &= ~(VGEN_ERR_HSID);
3162 			}
3163 #endif
3164 			rv = vgen_check_sid(ldcp, tagp);
3165 			if (rv != VGEN_SUCCESS) {
3166 				/*
3167 				 * If sid mismatch is detected,
3168 				 * reset the channel.
3169 				 */
3170 				DWARN(vgenp, ldcp, "vgen_check_sid() failed\n");
3171 				goto vgen_evtread_error;
3172 			}
3173 		}
3174 
3175 		switch (tagp->vio_msgtype) {
3176 		case VIO_TYPE_CTRL:
3177 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3178 			if (rv != 0) {
3179 				DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()"
3180 				    " failed rv(%d)\n", rv);
3181 			}
3182 			break;
3183 
3184 		case VIO_TYPE_DATA:
3185 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3186 			if (rv != 0) {
3187 				DWARN(vgenp, ldcp, "vgen_handle_datamsg()"
3188 				    " failed rv(%d)\n", rv);
3189 			}
3190 			break;
3191 
3192 		case VIO_TYPE_ERR:
3193 			vgen_handle_errmsg(ldcp, tagp);
3194 			break;
3195 
3196 		default:
3197 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3198 			    tagp->vio_msgtype);
3199 			break;
3200 		}
3201 
3202 		/*
3203 		 * If an error is encountered, stop processing and
3204 		 * handle the error.
3205 		 */
3206 		if (rv != 0) {
3207 			goto vgen_evtread_error;
3208 		}
3209 
3210 	} while (msglen);
3211 
3212 	/* check once more before exiting */
3213 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3214 	if ((rv == 0) && (has_data == B_TRUE)) {
3215 		DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp);
3216 		goto vgen_evtread;
3217 	}
3218 
3219 vgen_evtread_error:
3220 	if (rv != 0) {
3221 		/*
3222 		 * We handle the error and then return the error value. If we
3223 		 * are running in the context of the msg worker, the error
3224 		 * tells the worker thread to exit, as the channel would have
3225 		 * been reset.
3226 		 */
3227 		if (rv == ECONNRESET) {
3228 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3229 				DWARN(vgenp, ldcp, "ldc_status err\n");
3230 			} else {
3231 				ldcp->ldc_status = istatus;
3232 			}
3233 			(void) vgen_handle_evt_reset(ldcp, caller);
3234 		} else {
3235 			DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n");
3236 			(void) vgen_ldc_reset(ldcp, caller);
3237 		}
3238 	}
3239 
3240 	if (caller == VGEN_MSG_THR) {
3241 		mutex_exit(&ldcp->cblock);
3242 	}
3243 
3244 	DBG1(vgenp, ldcp, "exit\n");
3245 	return (rv);
3246 }
3247 
3248 /* vgen handshake functions */
3249 
3250 /* change the hphase for the channel to the next phase */
3251 static vgen_ldc_t *
3252 vh_nextphase(vgen_ldc_t *ldcp)
3253 {
3254 	if (ldcp->hphase == VH_PHASE4) {
3255 		ldcp->hphase = VH_DONE;
3256 	} else {
3257 		ldcp->hphase++;
3258 	}
3259 	return (ldcp);
3260 }
3261 
3262 /* send version negotiate message to the peer over ldc */
3263 static int
3264 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3265 {
3266 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3267 	vio_ver_msg_t	vermsg;
3268 	vio_msg_tag_t	*tagp = &vermsg.tag;
3269 	int		rv;
3270 
3271 	bzero(&vermsg, sizeof (vermsg));
3272 
3273 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3274 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3275 	tagp->vio_subtype_env = VIO_VER_INFO;
3276 	tagp->vio_sid = ldcp->local_sid;
3277 
3278 	/* get version msg payload from ldcp->local */
3279 	vermsg.ver_major = ldcp->local_hparams.ver_major;
3280 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3281 	vermsg.dev_class = ldcp->local_hparams.dev_class;
3282 
3283 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3284 	if (rv != VGEN_SUCCESS) {
3285 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3286 		return (rv);
3287 	}
3288 
3289 	ldcp->hstate |= VER_INFO_SENT;
3290 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3291 	    vermsg.ver_major, vermsg.ver_minor);
3292 
3293 	return (VGEN_SUCCESS);
3294 }
3295 
3296 /* send attr info message to the peer over ldc */
3297 static int
3298 vgen_send_attr_info(vgen_ldc_t *ldcp)
3299 {
3300 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3301 	vnet_attr_msg_t	attrmsg;
3302 	vio_msg_tag_t	*tagp = &attrmsg.tag;
3303 	int		rv;
3304 
3305 	bzero(&attrmsg, sizeof (attrmsg));
3306 
3307 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3308 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3309 	tagp->vio_subtype_env = VIO_ATTR_INFO;
3310 	tagp->vio_sid = ldcp->local_sid;
3311 
3312 	/* get attr msg payload from ldcp->local */
3313 	attrmsg.mtu = ldcp->local_hparams.mtu;
3314 	attrmsg.addr = ldcp->local_hparams.addr;
3315 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
3316 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3317 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3318 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
3319 	attrmsg.options = ldcp->local_hparams.dring_mode;
3320 
3321 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3322 	if (rv != VGEN_SUCCESS) {
3323 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3324 		return (rv);
3325 	}
3326 
3327 	ldcp->hstate |= ATTR_INFO_SENT;
3328 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3329 
3330 	return (VGEN_SUCCESS);
3331 }
3332 
3333 /*
3334  * Send descriptor ring register message to the peer over ldc.
3335  * Invoked in RxDringData mode.
3336  */
3337 static int
3338 vgen_send_rx_dring_reg(vgen_ldc_t *ldcp)
3339 {
3340 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3341 	vio_dring_reg_msg_t	*msg;
3342 	vio_dring_reg_ext_msg_t	*emsg;
3343 	int			rv;
3344 	uint8_t			*buf;
3345 	uint_t			msgsize;
3346 
3347 	msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies);
3348 	msg = kmem_zalloc(msgsize, KM_SLEEP);
3349 
3350 	/* Initialize the common part of dring reg msg */
3351 	vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA);
3352 
3353 	/* skip over dring cookies at the tail of common section */
3354 	buf = (uint8_t *)msg->cookie;
3355 	ASSERT(msg->ncookies == 1);
3356 	buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
3357 
3358 	/* Now setup the extended part, specific to RxDringData mode */
3359 	emsg = (vio_dring_reg_ext_msg_t *)buf;
3360 
3361 	/* copy data_ncookies in the msg */
3362 	emsg->data_ncookies = ldcp->rx_data_ncookies;
3363 
3364 	/* copy data area size in the msg */
3365 	emsg->data_area_size = ldcp->rx_data_sz;
3366 
3367 	/* copy data area cookies in the msg */
3368 	bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie,
3369 	    sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies);
3370 
3371 	rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE);
3372 	if (rv != VGEN_SUCCESS) {
3373 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3374 		kmem_free(msg, msgsize);
3375 		return (rv);
3376 	}
3377 
3378 	ldcp->hstate |= DRING_INFO_SENT;
3379 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3380 
3381 	kmem_free(msg, msgsize);
3382 	return (VGEN_SUCCESS);
3383 }
3384 
3385 /*
3386  * Send descriptor ring register message to the peer over ldc.
3387  * Invoked in TxDring mode.
3388  */
3389 static int
3390 vgen_send_tx_dring_reg(vgen_ldc_t *ldcp)
3391 {
3392 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3393 	vio_dring_reg_msg_t	msg;
3394 	int			rv;
3395 
3396 	bzero(&msg, sizeof (msg));
3397 
3398 	/*
3399 	 * Initialize only the common part of dring reg msg in TxDring mode.
3400 	 */
3401 	vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING);
3402 
3403 	rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE);
3404 	if (rv != VGEN_SUCCESS) {
3405 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3406 		return (rv);
3407 	}
3408 
3409 	ldcp->hstate |= DRING_INFO_SENT;
3410 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3411 
3412 	return (VGEN_SUCCESS);
3413 }
3414 
3415 static int
3416 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3417 {
3418 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3419 	vio_rdx_msg_t	rdxmsg;
3420 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
3421 	int		rv;
3422 
3423 	bzero(&rdxmsg, sizeof (rdxmsg));
3424 
3425 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3426 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3427 	tagp->vio_subtype_env = VIO_RDX;
3428 	tagp->vio_sid = ldcp->local_sid;
3429 
3430 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3431 	if (rv != VGEN_SUCCESS) {
3432 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3433 		return (rv);
3434 	}
3435 
3436 	ldcp->hstate |= RDX_INFO_SENT;
3437 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3438 
3439 	return (VGEN_SUCCESS);
3440 }
3441 
3442 /* send multicast addr info message to vsw */
3443 static int
3444 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3445 {
3446 	vnet_mcast_msg_t	mcastmsg;
3447 	vnet_mcast_msg_t	*msgp;
3448 	vio_msg_tag_t		*tagp;
3449 	vgen_t			*vgenp;
3450 	struct ether_addr	*mca;
3451 	int			rv;
3452 	int			i;
3453 	uint32_t		size;
3454 	uint32_t		mccount;
3455 	uint32_t		n;
3456 
3457 	msgp = &mcastmsg;
3458 	tagp = &msgp->tag;
3459 	vgenp = LDC_TO_VGEN(ldcp);
3460 
3461 	mccount = vgenp->mccount;
3462 	i = 0;
3463 
3464 	do {
3465 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3466 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3467 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3468 		tagp->vio_sid = ldcp->local_sid;
3469 
3470 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3471 		size = n * sizeof (struct ether_addr);
3472 
3473 		mca = &(vgenp->mctab[i]);
3474 		bcopy(mca, (msgp->mca), size);
3475 		msgp->set = B_TRUE;
3476 		msgp->count = n;
3477 
3478 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3479 		    B_FALSE);
3480 		if (rv != VGEN_SUCCESS) {
3481 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3482 			return (rv);
3483 		}
3484 
3485 		mccount -= n;
3486 		i += n;
3487 
3488 	} while (mccount);
3489 
3490 	return (VGEN_SUCCESS);
3491 }
3492 
3493 /*
3494  * vgen_dds_rx -- post DDS messages to vnet.
3495  */
3496 static int
3497 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3498 {
3499 	vio_dds_msg_t	*dmsg = (vio_dds_msg_t *)tagp;
3500 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3501 
3502 	if (dmsg->dds_class != DDS_VNET_NIU) {
3503 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
3504 		return (EBADMSG);
3505 	}
3506 	vnet_dds_rx(vgenp->vnetp, dmsg);
3507 	return (0);
3508 }
3509 
3510 /*
3511  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
3512  */
3513 int
3514 vgen_dds_tx(void *arg, void *msg)
3515 {
3516 	vgen_t		*vgenp = arg;
3517 	vio_dds_msg_t	*dmsg = msg;
3518 	vgen_portlist_t	*plistp = &vgenp->vgenports;
3519 	vgen_ldc_t	*ldcp;
3520 	int		rv = EIO;
3521 
3522 	READ_ENTER(&plistp->rwlock);
3523 	ldcp = vgenp->vsw_portp->ldcp;
3524 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
3525 		goto vgen_dsend_exit;
3526 	}
3527 
3528 	dmsg->tag.vio_sid = ldcp->local_sid;
3529 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
3530 	if (rv != VGEN_SUCCESS) {
3531 		rv = EIO;
3532 	} else {
3533 		rv = 0;
3534 	}
3535 
3536 vgen_dsend_exit:
3537 	RW_EXIT(&plistp->rwlock);
3538 	return (rv);
3539 
3540 }
3541 
3542 /* Initiate Phase 2 of handshake */
3543 static int
3544 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3545 {
3546 	int	rv;
3547 
3548 #ifdef DEBUG
3549 	if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) {
3550 		/* simulate out of state condition */
3551 		vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE);
3552 		rv = vgen_send_rdx_info(ldcp);
3553 		return (rv);
3554 	}
3555 	if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) {
3556 		/* simulate timeout condition */
3557 		vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT);
3558 		return (VGEN_SUCCESS);
3559 	}
3560 #endif
3561 	rv = vgen_send_attr_info(ldcp);
3562 	if (rv != VGEN_SUCCESS) {
3563 		return (rv);
3564 	}
3565 
3566 	return (VGEN_SUCCESS);
3567 }
3568 
3569 static int
3570 vgen_handshake_phase3(vgen_ldc_t *ldcp)
3571 {
3572 	int		rv;
3573 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3574 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3575 	vgen_stats_t	*statsp = &ldcp->stats;
3576 
3577 	/* dring mode has been negotiated in attr phase; save in stats */
3578 	statsp->dring_mode = lp->dring_mode;
3579 
3580 	if (lp->dring_mode == VIO_RX_DRING_DATA) {	/* RxDringData mode */
3581 		ldcp->rx_dringdata = vgen_handle_dringdata_shm;
3582 		ldcp->tx_dringdata = vgen_dringsend_shm;
3583 		if (!VGEN_PRI_ETH_DEFINED(vgenp)) {
3584 			/*
3585 			 * If priority frames are not in use, we don't need a
3586 			 * separate wrapper function for 'tx', so we set it to
3587 			 * 'tx_dringdata'. If priority frames are configured,
3588 			 * we leave the 'tx' pointer as is (initialized in
3589 			 * vgen_set_vnet_proto_ops()).
3590 			 */
3591 			ldcp->tx = ldcp->tx_dringdata;
3592 		}
3593 	} else {					/* TxDring mode */
3594 		ldcp->msg_thread = thread_create(NULL,
3595 		    2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0,
3596 		    &p0, TS_RUN, maxclsyspri);
3597 	}
3598 
3599 	rv = vgen_create_dring(ldcp);
3600 	if (rv != VGEN_SUCCESS) {
3601 		return (rv);
3602 	}
3603 
3604 	/* update local dring_info params */
3605 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
3606 		bcopy(&(ldcp->rx_dring_cookie),
3607 		    &(ldcp->local_hparams.dring_cookie),
3608 		    sizeof (ldc_mem_cookie_t));
3609 		ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies;
3610 		ldcp->local_hparams.num_desc = ldcp->num_rxds;
3611 		ldcp->local_hparams.desc_size =
3612 		    sizeof (vnet_rx_dringdata_desc_t);
3613 		rv = vgen_send_rx_dring_reg(ldcp);
3614 	} else {
3615 		bcopy(&(ldcp->tx_dring_cookie),
3616 		    &(ldcp->local_hparams.dring_cookie),
3617 		    sizeof (ldc_mem_cookie_t));
3618 		ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies;
3619 		ldcp->local_hparams.num_desc = ldcp->num_txds;
3620 		ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3621 		rv = vgen_send_tx_dring_reg(ldcp);
3622 	}
3623 
3624 	if (rv != VGEN_SUCCESS) {
3625 		return (rv);
3626 	}
3627 
3628 	return (VGEN_SUCCESS);
3629 }
3630 
3631 /*
3632  * Set vnet-protocol-version dependent functions based on version.
3633  */
3634 static void
3635 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
3636 {
3637 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3638 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3639 
3640 	/*
3641 	 * Setup the appropriate dring data processing routine and any
3642 	 * associated thread based on the version.
3643 	 *
3644 	 * In versions < 1.6, we only support TxDring mode. In this mode, the
3645 	 * msg worker thread processes all types of VIO msgs (ctrl and data).
3646 	 *
3647 	 * In versions >= 1.6, we also support RxDringData mode. In this mode,
3648 	 * all msgs including dring data messages are handled directly by the
3649 	 * callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA,
3650 	 * subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be
3651 	 * disabled while the polling thread is active, in which case the
3652 	 * polling thread processes the rcv descriptor ring.
3653 	 *
3654 	 * However, for versions >= 1.6, we can force to only use TxDring mode.
3655 	 * This could happen if RxDringData mode has been disabled (see
3656 	 * below) on this guest or on the peer guest. This info is determined
3657 	 * as part of attr exchange phase of handshake. Hence, we setup these
3658 	 * pointers for v1.6 after attr msg phase completes during handshake.
3659 	 */
3660 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {	/* Ver >= 1.6 */
3661 		/*
3662 		 * Set data dring mode for vgen_send_attr_info().
3663 		 */
3664 		if (vgen_mapin_avail(ldcp) == B_TRUE) {
3665 			lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING);
3666 		} else {
3667 			lp->dring_mode = VIO_TX_DRING;
3668 		}
3669 	} else {				/* Ver <= 1.5 */
3670 		lp->dring_mode = VIO_TX_DRING;
3671 	}
3672 
3673 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
3674 		vgen_port_t	*portp = ldcp->portp;
3675 		vnet_t		*vnetp = vgenp->vnetp;
3676 		/*
3677 		 * If the version negotiated with vswitch is >= 1.5 (link
3678 		 * status update support), set the required bits in our
3679 		 * attributes if this vnet device has been configured to get
3680 		 * physical link state updates.
3681 		 */
3682 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
3683 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
3684 		} else {
3685 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
3686 		}
3687 	}
3688 
3689 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
3690 		/*
3691 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
3692 		 * Support), set the mtu in our attributes to max_frame_size.
3693 		 */
3694 		lp->mtu = vgenp->max_frame_size;
3695 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
3696 		/*
3697 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
3698 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
3699 		 */
3700 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
3701 	} else {
3702 		vgen_port_t	*portp = ldcp->portp;
3703 		vnet_t		*vnetp = vgenp->vnetp;
3704 		/*
3705 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
3706 		 * We can negotiate that size with those peers provided the
3707 		 * following conditions are true:
3708 		 * - Only pvid is defined for our peer and there are no vids.
3709 		 * - pvids are equal.
3710 		 * If the above conditions are true, then we can send/recv only
3711 		 * untagged frames of max size ETHERMAX.
3712 		 */
3713 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
3714 			lp->mtu = ETHERMAX;
3715 		}
3716 	}
3717 
3718 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {	/* Versions >= 1.2 */
3719 		/*
3720 		 * Starting v1.2 we support priority frames; so set the
3721 		 * dring processing routines and xfer modes based on the
3722 		 * version. Note that the dring routines could be changed after
3723 		 * attribute handshake phase for versions >= 1.6 (See
3724 		 * vgen_handshake_phase3())
3725 		 */
3726 		ldcp->tx_dringdata = vgen_dringsend;
3727 		ldcp->rx_dringdata = vgen_handle_dringdata;
3728 
3729 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
3730 			/*
3731 			 * Enable priority routines and pkt mode only if
3732 			 * at least one pri-eth-type is specified in MD.
3733 			 */
3734 			ldcp->tx = vgen_ldcsend;
3735 			ldcp->rx_pktdata = vgen_handle_pkt_data;
3736 
3737 			/* set xfer mode for vgen_send_attr_info() */
3738 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
3739 		} else {
3740 			/* No priority eth types defined in MD */
3741 			ldcp->tx = ldcp->tx_dringdata;
3742 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3743 
3744 			/* Set xfer mode for vgen_send_attr_info() */
3745 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
3746 		}
3747 	} else { /* Versions prior to 1.2  */
3748 		vgen_reset_vnet_proto_ops(ldcp);
3749 	}
3750 }
3751 
3752 /*
3753  * Reset vnet-protocol-version dependent functions to pre-v1.2.
3754  */
3755 static void
3756 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
3757 {
3758 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3759 
3760 	ldcp->tx = ldcp->tx_dringdata = vgen_dringsend;
3761 	ldcp->rx_dringdata = vgen_handle_dringdata;
3762 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3763 
3764 	/* set xfer mode for vgen_send_attr_info() */
3765 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
3766 }
3767 
3768 static void
3769 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
3770 {
3771 	vgen_ldc_t	*ldcp = portp->ldcp;
3772 	vgen_t		*vgenp = portp->vgenp;
3773 	vnet_t		*vnetp = vgenp->vnetp;
3774 	boolean_t	need_reset = B_FALSE;
3775 
3776 	mutex_enter(&ldcp->cblock);
3777 
3778 	/*
3779 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
3780 	 * the connection. See comments in vgen_set_vnet_proto_ops().
3781 	 */
3782 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
3783 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
3784 		need_reset = B_TRUE;
3785 	}
3786 	mutex_exit(&ldcp->cblock);
3787 
3788 	if (need_reset == B_TRUE) {
3789 		(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
3790 	}
3791 }
3792 
3793 static void
3794 vgen_port_reset(vgen_port_t *portp)
3795 {
3796 	(void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER);
3797 }
3798 
3799 static void
3800 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
3801 {
3802 	vgen_port_t	*portp;
3803 	vgen_portlist_t	*plistp;
3804 
3805 	plistp = &(vgenp->vgenports);
3806 	READ_ENTER(&plistp->rwlock);
3807 
3808 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
3809 
3810 		vgen_vlan_unaware_port_reset(portp);
3811 
3812 	}
3813 
3814 	RW_EXIT(&plistp->rwlock);
3815 }
3816 
3817 static void
3818 vgen_reset_vsw_port(vgen_t *vgenp)
3819 {
3820 	vgen_port_t	*portp;
3821 
3822 	if ((portp = vgenp->vsw_portp) != NULL) {
3823 		vgen_port_reset(portp);
3824 	}
3825 }
3826 
3827 static void
3828 vgen_setup_handshake_params(vgen_ldc_t *ldcp)
3829 {
3830 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3831 
3832 	/*
3833 	 * clear local handshake params and initialize.
3834 	 */
3835 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3836 
3837 	/* set version to the highest version supported */
3838 	ldcp->local_hparams.ver_major =
3839 	    ldcp->vgen_versions[0].ver_major;
3840 	ldcp->local_hparams.ver_minor =
3841 	    ldcp->vgen_versions[0].ver_minor;
3842 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3843 
3844 	/* set attr_info params */
3845 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
3846 	ldcp->local_hparams.addr =
3847 	    vnet_macaddr_strtoul(vgenp->macaddr);
3848 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3849 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
3850 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3851 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
3852 
3853 	/* reset protocol version specific function pointers */
3854 	vgen_reset_vnet_proto_ops(ldcp);
3855 	ldcp->local_hparams.dring_ident = 0;
3856 	ldcp->local_hparams.dring_ready = B_FALSE;
3857 
3858 	/* clear peer_hparams */
3859 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3860 	ldcp->peer_hparams.dring_ready = B_FALSE;
3861 }
3862 
3863 /*
3864  * Process Channel Reset. We tear down the resources (timers, threads,
3865  * descriptor rings etc) associated with the channel and reinitialize the
3866  * channel based on the flags.
3867  *
3868  * Arguments:
3869  *    ldcp:	The channel being processed.
3870  *
3871  *    flags:
3872  *	VGEN_FLAG_EVT_RESET:
3873  *		A ECONNRESET error occured while doing ldc operations such as
3874  *		ldc_read() or ldc_write(); the channel is already reset and it
3875  *		needs to be handled.
3876  *	VGEN_FLAG_NEED_LDCRESET:
3877  *		Some other errors occured and the error handling code needs to
3878  *		explicitly reset the channel and restart handshake with the
3879  *		peer. The error could be either in ldc operations or other
3880  *		parts of the code such as timeouts or mdeg events etc.
3881  *	VGEN_FLAG_UNINIT:
3882  *		The channel is being torn down; no need to bring up the channel
3883  *		after resetting.
3884  */
3885 static int
3886 vgen_process_reset(vgen_ldc_t *ldcp, int flags)
3887 {
3888 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3889 	vgen_port_t	*portp = ldcp->portp;
3890 	vgen_hparams_t  *lp = &ldcp->local_hparams;
3891 	boolean_t	is_vsw_port = B_FALSE;
3892 	boolean_t	link_update = B_FALSE;
3893 	ldc_status_t	istatus;
3894 	int		rv;
3895 	uint_t		retries = 0;
3896 	timeout_id_t	htid = 0;
3897 	timeout_id_t	wd_tid = 0;
3898 
3899 	if (portp == vgenp->vsw_portp) { /* vswitch port ? */
3900 		is_vsw_port = B_TRUE;
3901 	}
3902 
3903 	/*
3904 	 * Report that the channel is being reset; it ensures that any HybridIO
3905 	 * configuration is torn down before we reset the channel if it is not
3906 	 * already reset (flags == VGEN_FLAG_NEED_LDCRESET).
3907 	 */
3908 	if (is_vsw_port == B_TRUE) {
3909 		vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err;
3910 		rep_err(portp->vhp, VIO_NET_RES_DOWN);
3911 	}
3912 
3913 again:
3914 	mutex_enter(&ldcp->cblock);
3915 
3916 	/* Clear hstate and hphase */
3917 	ldcp->hstate = 0;
3918 	ldcp->hphase = VH_PHASE0;
3919 	if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) {
3920 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3921 		(void) ldc_down(ldcp->ldc_handle);
3922 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3923 		DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus);
3924 		ldcp->ldc_status = istatus;
3925 
3926 		if (flags == VGEN_FLAG_UNINIT) {
3927 			/* disable further callbacks */
3928 			rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3929 			if (rv != 0) {
3930 				DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3931 			}
3932 		}
3933 
3934 	} else {
3935 		/* flags == VGEN_FLAG_EVT_RESET */
3936 		DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status);
3937 	}
3938 
3939 	/*
3940 	 * As the connection is now reset, mark the channel
3941 	 * link_state as 'down' and notify the stack if needed.
3942 	 */
3943 	if (ldcp->link_state != LINK_STATE_DOWN) {
3944 		ldcp->link_state = LINK_STATE_DOWN;
3945 
3946 		if (is_vsw_port == B_TRUE) { /* vswitch port ? */
3947 			/*
3948 			 * As the channel link is down, mark physical link also
3949 			 * as down. After the channel comes back up and
3950 			 * handshake completes, we will get an update on the
3951 			 * physlink state from vswitch (if this device has been
3952 			 * configured to get phys link updates).
3953 			 */
3954 			vgenp->phys_link_state = LINK_STATE_DOWN;
3955 			link_update = B_TRUE;
3956 
3957 		}
3958 	}
3959 
3960 	if (ldcp->htid != 0) {
3961 		htid = ldcp->htid;
3962 		ldcp->htid = 0;
3963 	}
3964 
3965 	if (ldcp->wd_tid != 0) {
3966 		wd_tid = ldcp->wd_tid;
3967 		ldcp->wd_tid = 0;
3968 	}
3969 
3970 	mutex_exit(&ldcp->cblock);
3971 
3972 	/* Update link state to the stack */
3973 	if (link_update == B_TRUE) {
3974 		vgen_link_update(vgenp, ldcp->link_state);
3975 	}
3976 
3977 	/*
3978 	 * As the channel is being reset, redirect traffic to the peer through
3979 	 * vswitch, until the channel becomes ready to be used again.
3980 	 */
3981 	if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) {
3982 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
3983 	}
3984 
3985 	/* Cancel handshake watchdog timeout */
3986 	if (htid) {
3987 		(void) untimeout(htid);
3988 	}
3989 
3990 	/* Cancel transmit watchdog timeout */
3991 	if (wd_tid) {
3992 		(void) untimeout(wd_tid);
3993 	}
3994 
3995 	/* Stop the msg worker thread */
3996 	if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) {
3997 		vgen_stop_msg_thread(ldcp);
3998 	}
3999 
4000 	/* Grab all locks while we tear down tx/rx resources */
4001 	LDC_LOCK(ldcp);
4002 
4003 	/* Destroy the local dring which is exported to the peer */
4004 	vgen_destroy_dring(ldcp);
4005 
4006 	/* Unmap the remote dring which is imported from the peer */
4007 	vgen_unmap_dring(ldcp);
4008 
4009 	/*
4010 	 * Bring up the channel and restart handshake
4011 	 * only if the channel is not being torn down.
4012 	 */
4013 	if (flags != VGEN_FLAG_UNINIT) {
4014 
4015 		/* Setup handshake parameters to restart a new handshake */
4016 		vgen_setup_handshake_params(ldcp);
4017 
4018 		/* Bring the channel up */
4019 		vgen_ldc_up(ldcp);
4020 
4021 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4022 			DWARN(vgenp, ldcp, "ldc_status err\n");
4023 		} else {
4024 			ldcp->ldc_status = istatus;
4025 		}
4026 
4027 		/* If the channel is UP, start handshake */
4028 		if (ldcp->ldc_status == LDC_UP) {
4029 
4030 			if (is_vsw_port == B_FALSE) {
4031 				/*
4032 				 * Channel is up; use this port from now on.
4033 				 */
4034 				(void) atomic_swap_32(&portp->use_vsw_port,
4035 				    B_FALSE);
4036 			}
4037 
4038 			/* Initialize local session id */
4039 			ldcp->local_sid = ddi_get_lbolt();
4040 
4041 			/* clear peer session id */
4042 			ldcp->peer_sid = 0;
4043 
4044 			/*
4045 			 * Initiate Handshake process with peer ldc endpoint by
4046 			 * sending version info vio message. If that fails we
4047 			 * go back to the top of this function to process the
4048 			 * error again. Note that we can be in this loop for
4049 			 * 'vgen_ldc_max_resets' times, after which the channel
4050 			 * is not brought up.
4051 			 */
4052 			mutex_exit(&ldcp->tclock);
4053 			mutex_exit(&ldcp->txlock);
4054 			mutex_exit(&ldcp->wrlock);
4055 			mutex_exit(&ldcp->rxlock);
4056 			rv = vgen_handshake(vh_nextphase(ldcp));
4057 			mutex_exit(&ldcp->cblock);
4058 			if (rv != 0) {
4059 				if (rv == ECONNRESET) {
4060 					flags = VGEN_FLAG_EVT_RESET;
4061 				} else {
4062 					flags = VGEN_FLAG_NEED_LDCRESET;
4063 				}
4064 
4065 				/*
4066 				 * We still hold 'reset_in_progress'; so we can
4067 				 * just loop back to the top to restart error
4068 				 * processing.
4069 				 */
4070 				goto again;
4071 			}
4072 		} else {
4073 			LDC_UNLOCK(ldcp);
4074 		}
4075 
4076 	} else {	/* flags == VGEN_FLAG_UNINIT */
4077 
4078 		/* Close the channel - retry on EAGAIN */
4079 		while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
4080 			if (++retries > vgen_ldccl_retries) {
4081 				break;
4082 			}
4083 			drv_usecwait(VGEN_LDC_CLOSE_DELAY);
4084 		}
4085 		if (rv != 0) {
4086 			cmn_err(CE_NOTE,
4087 			    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
4088 			    vgenp->instance, rv, ldcp->ldc_id);
4089 		}
4090 
4091 		ldcp->ldc_reset_count = 0;
4092 		ldcp->ldc_status = LDC_INIT;
4093 		ldcp->flags &= ~(CHANNEL_STARTED);
4094 
4095 		LDC_UNLOCK(ldcp);
4096 	}
4097 
4098 	/* Done processing channel reset; clear the atomic flag */
4099 	ldcp->reset_in_progress = 0;
4100 	return (0);
4101 }
4102 
4103 /*
4104  * Initiate handshake with the peer by sending various messages
4105  * based on the handshake-phase that the channel is currently in.
4106  */
4107 static int
4108 vgen_handshake(vgen_ldc_t *ldcp)
4109 {
4110 	uint32_t	hphase = ldcp->hphase;
4111 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4112 	int		rv = 0;
4113 	timeout_id_t	htid;
4114 
4115 	switch (hphase) {
4116 
4117 	case VH_PHASE1:
4118 
4119 		/*
4120 		 * start timer, for entire handshake process, turn this timer
4121 		 * off if all phases of handshake complete successfully and
4122 		 * hphase goes to VH_DONE(below) or channel is reset due to
4123 		 * errors or vgen_ldc_uninit() is invoked(vgen_stop).
4124 		 */
4125 		ASSERT(ldcp->htid == 0);
4126 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4127 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4128 
4129 		/* Phase 1 involves negotiating the version */
4130 		rv = vgen_send_version_negotiate(ldcp);
4131 		break;
4132 
4133 	case VH_PHASE2:
4134 		rv = vgen_handshake_phase2(ldcp);
4135 		break;
4136 
4137 	case VH_PHASE3:
4138 		rv = vgen_handshake_phase3(ldcp);
4139 		break;
4140 
4141 	case VH_PHASE4:
4142 		rv = vgen_send_rdx_info(ldcp);
4143 		break;
4144 
4145 	case VH_DONE:
4146 
4147 		ldcp->ldc_reset_count = 0;
4148 
4149 		DBG1(vgenp, ldcp, "Handshake Done\n");
4150 
4151 		/*
4152 		 * The channel is up and handshake is done successfully. Now we
4153 		 * can mark the channel link_state as 'up'. We also notify the
4154 		 * stack if the channel is connected to vswitch.
4155 		 */
4156 		ldcp->link_state = LINK_STATE_UP;
4157 
4158 		if (ldcp->portp == vgenp->vsw_portp) {
4159 			/*
4160 			 * If this channel(port) is connected to vsw,
4161 			 * need to sync multicast table with vsw.
4162 			 */
4163 			rv = vgen_send_mcast_info(ldcp);
4164 			if (rv != VGEN_SUCCESS)
4165 				break;
4166 
4167 			if (vgenp->pls_negotiated == B_FALSE) {
4168 				/*
4169 				 * We haven't negotiated with vswitch to get
4170 				 * physical link state updates. We can update
4171 				 * update the stack at this point as the
4172 				 * channel to vswitch is up and the handshake
4173 				 * is done successfully.
4174 				 *
4175 				 * If we have negotiated to get physical link
4176 				 * state updates, then we won't notify the
4177 				 * the stack here; we do that as soon as
4178 				 * vswitch sends us the initial phys link state
4179 				 * (see vgen_handle_physlink_info()).
4180 				 */
4181 				mutex_exit(&ldcp->cblock);
4182 				vgen_link_update(vgenp, ldcp->link_state);
4183 				mutex_enter(&ldcp->cblock);
4184 			}
4185 		}
4186 
4187 		if (ldcp->htid != 0) {
4188 			htid = ldcp->htid;
4189 			ldcp->htid = 0;
4190 
4191 			mutex_exit(&ldcp->cblock);
4192 			(void) untimeout(htid);
4193 			mutex_enter(&ldcp->cblock);
4194 		}
4195 
4196 		/*
4197 		 * Check if mac layer should be notified to restart
4198 		 * transmissions. This can happen if the channel got
4199 		 * reset and while tx_blocked is set.
4200 		 */
4201 		mutex_enter(&ldcp->tclock);
4202 		if (ldcp->tx_blocked) {
4203 			vio_net_tx_update_t vtx_update =
4204 			    ldcp->portp->vcb.vio_net_tx_update;
4205 
4206 			ldcp->tx_blocked = B_FALSE;
4207 			vtx_update(ldcp->portp->vhp);
4208 		}
4209 		mutex_exit(&ldcp->tclock);
4210 
4211 		/* start transmit watchdog timer */
4212 		ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
4213 		    drv_usectohz(vgen_txwd_interval * 1000));
4214 
4215 		break;
4216 
4217 	default:
4218 		break;
4219 	}
4220 
4221 	return (rv);
4222 }
4223 
4224 /*
4225  * Check if the current handshake phase has completed successfully and
4226  * return the status.
4227  */
4228 static int
4229 vgen_handshake_done(vgen_ldc_t *ldcp)
4230 {
4231 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4232 	uint32_t	hphase = ldcp->hphase;
4233 	int		status = 0;
4234 
4235 	switch (hphase) {
4236 
4237 	case VH_PHASE1:
4238 		/*
4239 		 * Phase1 is done, if version negotiation
4240 		 * completed successfully.
4241 		 */
4242 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4243 		    VER_NEGOTIATED);
4244 		break;
4245 
4246 	case VH_PHASE2:
4247 		/*
4248 		 * Phase 2 is done, if attr info
4249 		 * has been exchanged successfully.
4250 		 */
4251 		status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4252 		    ATTR_INFO_EXCHANGED);
4253 		break;
4254 
4255 	case VH_PHASE3:
4256 		/*
4257 		 * Phase 3 is done, if dring registration
4258 		 * has been exchanged successfully.
4259 		 */
4260 		status = ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4261 		    DRING_INFO_EXCHANGED);
4262 		break;
4263 
4264 	case VH_PHASE4:
4265 		/* Phase 4 is done, if rdx msg has been exchanged */
4266 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4267 		    RDX_EXCHANGED);
4268 		break;
4269 
4270 	default:
4271 		break;
4272 	}
4273 
4274 	if (status == 0) {
4275 		return (VGEN_FAILURE);
4276 	}
4277 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4278 	return (VGEN_SUCCESS);
4279 }
4280 
4281 /*
4282  * Link State Update Notes:
4283  * The link state of the channel connected to vswitch is reported as the link
4284  * state of the vnet device, by default. If the channel is down or reset, then
4285  * the link state is marked 'down'. If the channel is 'up' *and* handshake
4286  * between the vnet and vswitch is successful, then the link state is marked
4287  * 'up'. If physical network link state is desired, then the vnet device must
4288  * be configured to get physical link updates and the 'linkprop' property
4289  * in the virtual-device MD node indicates this. As part of attribute exchange
4290  * the vnet device negotiates with the vswitch to obtain physical link state
4291  * updates. If it successfully negotiates, vswitch sends an initial physlink
4292  * msg once the handshake is done and further whenever the physical link state
4293  * changes. Currently we don't have mac layer interfaces to report two distinct
4294  * link states - virtual and physical. Thus, if the vnet has been configured to
4295  * get physical link updates, then the link status will be reported as 'up'
4296  * only when both the virtual and physical links are up.
4297  */
4298 static void
4299 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
4300 {
4301 	vnet_link_update(vgenp->vnetp, link_state);
4302 }
4303 
4304 /*
4305  * Handle a version info msg from the peer or an ACK/NACK from the peer
4306  * to a version info msg that we sent.
4307  */
4308 static int
4309 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4310 {
4311 	vgen_t		*vgenp;
4312 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4313 	int		ack = 0;
4314 	int		failed = 0;
4315 	int		idx;
4316 	vgen_ver_t	*versions = ldcp->vgen_versions;
4317 	int		rv = 0;
4318 
4319 	vgenp = LDC_TO_VGEN(ldcp);
4320 	DBG1(vgenp, ldcp, "enter\n");
4321 	switch (tagp->vio_subtype) {
4322 	case VIO_SUBTYPE_INFO:
4323 
4324 		/*  Cache sid of peer if this is the first time */
4325 		if (ldcp->peer_sid == 0) {
4326 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4327 			    tagp->vio_sid);
4328 			ldcp->peer_sid = tagp->vio_sid;
4329 		}
4330 
4331 		if (ldcp->hphase != VH_PHASE1) {
4332 			/*
4333 			 * If we are not already in VH_PHASE1, reset to
4334 			 * pre-handshake state, and initiate handshake
4335 			 * to the peer too.
4336 			 */
4337 			return (EINVAL);
4338 		}
4339 
4340 		ldcp->hstate |= VER_INFO_RCVD;
4341 
4342 		/* save peer's requested values */
4343 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4344 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4345 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4346 
4347 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4348 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4349 			/* unsupported dev_class, send NACK */
4350 
4351 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4352 
4353 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4354 			tagp->vio_sid = ldcp->local_sid;
4355 			/* send reply msg back to peer */
4356 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4357 			    sizeof (*vermsg), B_FALSE);
4358 			if (rv != VGEN_SUCCESS) {
4359 				return (rv);
4360 			}
4361 			return (VGEN_FAILURE);
4362 		}
4363 
4364 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4365 		    vermsg->ver_major,  vermsg->ver_minor);
4366 
4367 		idx = 0;
4368 
4369 		for (;;) {
4370 
4371 			if (vermsg->ver_major > versions[idx].ver_major) {
4372 
4373 				/* nack with next lower version */
4374 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4375 				vermsg->ver_major = versions[idx].ver_major;
4376 				vermsg->ver_minor = versions[idx].ver_minor;
4377 				break;
4378 			}
4379 
4380 			if (vermsg->ver_major == versions[idx].ver_major) {
4381 
4382 				/* major version match - ACK version */
4383 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4384 				ack = 1;
4385 
4386 				/*
4387 				 * lower minor version to the one this endpt
4388 				 * supports, if necessary
4389 				 */
4390 				if (vermsg->ver_minor >
4391 				    versions[idx].ver_minor) {
4392 					vermsg->ver_minor =
4393 					    versions[idx].ver_minor;
4394 					ldcp->peer_hparams.ver_minor =
4395 					    versions[idx].ver_minor;
4396 				}
4397 				break;
4398 			}
4399 
4400 			idx++;
4401 
4402 			if (idx == VGEN_NUM_VER) {
4403 
4404 				/* no version match - send NACK */
4405 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4406 				vermsg->ver_major = 0;
4407 				vermsg->ver_minor = 0;
4408 				failed = 1;
4409 				break;
4410 			}
4411 
4412 		}
4413 
4414 		tagp->vio_sid = ldcp->local_sid;
4415 
4416 		/* send reply msg back to peer */
4417 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4418 		    B_FALSE);
4419 		if (rv != VGEN_SUCCESS) {
4420 			return (rv);
4421 		}
4422 
4423 		if (ack) {
4424 			ldcp->hstate |= VER_ACK_SENT;
4425 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4426 			    vermsg->ver_major, vermsg->ver_minor);
4427 		}
4428 		if (failed) {
4429 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4430 			return (VGEN_FAILURE);
4431 		}
4432 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4433 
4434 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4435 
4436 			/* local and peer versions match? */
4437 			ASSERT((ldcp->local_hparams.ver_major ==
4438 			    ldcp->peer_hparams.ver_major) &&
4439 			    (ldcp->local_hparams.ver_minor ==
4440 			    ldcp->peer_hparams.ver_minor));
4441 
4442 			vgen_set_vnet_proto_ops(ldcp);
4443 
4444 			/* move to the next phase */
4445 			rv = vgen_handshake(vh_nextphase(ldcp));
4446 			if (rv != 0) {
4447 				return (rv);
4448 			}
4449 		}
4450 
4451 		break;
4452 
4453 	case VIO_SUBTYPE_ACK:
4454 
4455 		if (ldcp->hphase != VH_PHASE1) {
4456 			/*  This should not happen. */
4457 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4458 			return (VGEN_FAILURE);
4459 		}
4460 
4461 		/* SUCCESS - we have agreed on a version */
4462 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4463 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4464 		ldcp->hstate |= VER_ACK_RCVD;
4465 
4466 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4467 		    vermsg->ver_major,  vermsg->ver_minor);
4468 
4469 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4470 
4471 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4472 
4473 			/* local and peer versions match? */
4474 			ASSERT((ldcp->local_hparams.ver_major ==
4475 			    ldcp->peer_hparams.ver_major) &&
4476 			    (ldcp->local_hparams.ver_minor ==
4477 			    ldcp->peer_hparams.ver_minor));
4478 
4479 			vgen_set_vnet_proto_ops(ldcp);
4480 
4481 			/* move to the next phase */
4482 			rv = vgen_handshake(vh_nextphase(ldcp));
4483 			if (rv != 0) {
4484 				return (rv);
4485 			}
4486 		}
4487 		break;
4488 
4489 	case VIO_SUBTYPE_NACK:
4490 
4491 		if (ldcp->hphase != VH_PHASE1) {
4492 			/*  This should not happen.  */
4493 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4494 			"Phase(%u)\n", ldcp->hphase);
4495 			return (VGEN_FAILURE);
4496 		}
4497 
4498 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4499 		    vermsg->ver_major, vermsg->ver_minor);
4500 
4501 		/* check if version in NACK is zero */
4502 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4503 			/*
4504 			 * Version Negotiation has failed.
4505 			 */
4506 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4507 			return (VGEN_FAILURE);
4508 		}
4509 
4510 		idx = 0;
4511 
4512 		for (;;) {
4513 
4514 			if (vermsg->ver_major > versions[idx].ver_major) {
4515 				/* select next lower version */
4516 
4517 				ldcp->local_hparams.ver_major =
4518 				    versions[idx].ver_major;
4519 				ldcp->local_hparams.ver_minor =
4520 				    versions[idx].ver_minor;
4521 				break;
4522 			}
4523 
4524 			if (vermsg->ver_major == versions[idx].ver_major) {
4525 				/* major version match */
4526 
4527 				ldcp->local_hparams.ver_major =
4528 				    versions[idx].ver_major;
4529 
4530 				ldcp->local_hparams.ver_minor =
4531 				    versions[idx].ver_minor;
4532 				break;
4533 			}
4534 
4535 			idx++;
4536 
4537 			if (idx == VGEN_NUM_VER) {
4538 				/*
4539 				 * no version match.
4540 				 * Version Negotiation has failed.
4541 				 */
4542 				DWARN(vgenp, ldcp,
4543 				    "Version Negotiation Failed\n");
4544 				return (VGEN_FAILURE);
4545 			}
4546 
4547 		}
4548 
4549 		rv = vgen_send_version_negotiate(ldcp);
4550 		if (rv != VGEN_SUCCESS) {
4551 			return (rv);
4552 		}
4553 
4554 		break;
4555 	}
4556 
4557 	DBG1(vgenp, ldcp, "exit\n");
4558 	return (VGEN_SUCCESS);
4559 }
4560 
4561 static int
4562 vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4563 {
4564 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4565 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4566 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
4567 	uint32_t	mtu;
4568 	uint8_t		dring_mode;
4569 
4570 	ldcp->hstate |= ATTR_INFO_RCVD;
4571 
4572 	/* save peer's values */
4573 	rp->mtu = msg->mtu;
4574 	rp->addr = msg->addr;
4575 	rp->addr_type = msg->addr_type;
4576 	rp->xfer_mode = msg->xfer_mode;
4577 	rp->ack_freq = msg->ack_freq;
4578 	rp->dring_mode = msg->options;
4579 
4580 	/*
4581 	 * Process address type, ack frequency and transfer mode attributes.
4582 	 */
4583 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
4584 	    (msg->ack_freq > 64) ||
4585 	    (msg->xfer_mode != lp->xfer_mode)) {
4586 		return (VGEN_FAILURE);
4587 	}
4588 
4589 	/*
4590 	 * Process dring mode attribute.
4591 	 */
4592 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4593 		/*
4594 		 * Versions >= 1.6:
4595 		 * Though we are operating in v1.6 mode, it is possible that
4596 		 * RxDringData mode has been disabled either on this guest or
4597 		 * on the peer guest. If so, we revert to pre v1.6 behavior of
4598 		 * TxDring mode. But this must be agreed upon in both
4599 		 * directions of attr exchange. We first determine the mode
4600 		 * that can be negotiated.
4601 		 */
4602 		if ((msg->options & VIO_RX_DRING_DATA) != 0 &&
4603 		    vgen_mapin_avail(ldcp) == B_TRUE) {
4604 			/*
4605 			 * We are capable of handling RxDringData AND the peer
4606 			 * is also capable of it; we enable RxDringData mode on
4607 			 * this channel.
4608 			 */
4609 			dring_mode = VIO_RX_DRING_DATA;
4610 		} else if ((msg->options & VIO_TX_DRING) != 0) {
4611 			/*
4612 			 * If the peer is capable of TxDring mode, we
4613 			 * negotiate TxDring mode on this channel.
4614 			 */
4615 			dring_mode = VIO_TX_DRING;
4616 		} else {
4617 			/*
4618 			 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA
4619 			 * modes. We don't support VIO_RX_DRING mode.
4620 			 */
4621 			return (VGEN_FAILURE);
4622 		}
4623 
4624 		/*
4625 		 * If we have received an ack for the attr info that we sent,
4626 		 * then check if the dring mode matches what the peer had ack'd
4627 		 * (saved in local hparams). If they don't match, we fail the
4628 		 * handshake.
4629 		 */
4630 		if (ldcp->hstate & ATTR_ACK_RCVD) {
4631 			if (msg->options != lp->dring_mode) {
4632 				/* send NACK */
4633 				return (VGEN_FAILURE);
4634 			}
4635 		} else {
4636 			/*
4637 			 * Save the negotiated dring mode in our attr
4638 			 * parameters, so it gets sent in the attr info from us
4639 			 * to the peer.
4640 			 */
4641 			lp->dring_mode = dring_mode;
4642 		}
4643 
4644 		/* save the negotiated dring mode in the msg to be replied */
4645 		msg->options = dring_mode;
4646 	}
4647 
4648 	/*
4649 	 * Process MTU attribute.
4650 	 */
4651 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4652 		/*
4653 		 * Versions >= 1.4:
4654 		 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu
4655 		 * is negotiated down to the minimum of our mtu and peer's mtu.
4656 		 */
4657 		if (msg->mtu < ETHERMAX) {
4658 			return (VGEN_FAILURE);
4659 		}
4660 
4661 		mtu = MIN(msg->mtu, vgenp->max_frame_size);
4662 
4663 		/*
4664 		 * If we have received an ack for the attr info
4665 		 * that we sent, then check if the mtu computed
4666 		 * above matches the mtu that the peer had ack'd
4667 		 * (saved in local hparams). If they don't
4668 		 * match, we fail the handshake.
4669 		 */
4670 		if (ldcp->hstate & ATTR_ACK_RCVD) {
4671 			if (mtu != lp->mtu) {
4672 				/* send NACK */
4673 				return (VGEN_FAILURE);
4674 			}
4675 		} else {
4676 			/*
4677 			 * Save the mtu computed above in our
4678 			 * attr parameters, so it gets sent in
4679 			 * the attr info from us to the peer.
4680 			 */
4681 			lp->mtu = mtu;
4682 		}
4683 
4684 		/* save the MIN mtu in the msg to be replied */
4685 		msg->mtu = mtu;
4686 
4687 	} else {
4688 		/* versions < 1.4, mtu must match */
4689 		if (msg->mtu != lp->mtu) {
4690 			return (VGEN_FAILURE);
4691 		}
4692 	}
4693 
4694 	return (VGEN_SUCCESS);
4695 }
4696 
4697 static int
4698 vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4699 {
4700 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4701 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4702 
4703 	/*
4704 	 * Process dring mode attribute.
4705 	 */
4706 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4707 		/*
4708 		 * Versions >= 1.6:
4709 		 * The ack msg sent by the peer contains the negotiated dring
4710 		 * mode between our capability (that we had sent in our attr
4711 		 * info) and the peer's capability.
4712 		 */
4713 		if (ldcp->hstate & ATTR_ACK_SENT) {
4714 			/*
4715 			 * If we have sent an ack for the attr info msg from
4716 			 * the peer, check if the dring mode that was
4717 			 * negotiated then (saved in local hparams) matches the
4718 			 * mode that the peer has ack'd. If they don't match,
4719 			 * we fail the handshake.
4720 			 */
4721 			if (lp->dring_mode != msg->options) {
4722 				return (VGEN_FAILURE);
4723 			}
4724 		} else {
4725 			if ((msg->options & lp->dring_mode) == 0) {
4726 				/*
4727 				 * Peer ack'd with a mode that we don't
4728 				 * support; we fail the handshake.
4729 				 */
4730 				return (VGEN_FAILURE);
4731 			}
4732 			if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA))
4733 			    == (VIO_TX_DRING|VIO_RX_DRING_DATA)) {
4734 				/*
4735 				 * Peer must ack with only one negotiated mode.
4736 				 * Otherwise fail handshake.
4737 				 */
4738 				return (VGEN_FAILURE);
4739 			}
4740 
4741 			/*
4742 			 * Save the negotiated mode, so we can validate it when
4743 			 * we receive attr info from the peer.
4744 			 */
4745 			lp->dring_mode = msg->options;
4746 		}
4747 	}
4748 
4749 	/*
4750 	 * Process Physical Link Update attribute.
4751 	 */
4752 	if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
4753 	    ldcp->portp == vgenp->vsw_portp) {
4754 		/*
4755 		 * Versions >= 1.5:
4756 		 * If the vnet device has been configured to get
4757 		 * physical link state updates, check the corresponding
4758 		 * bits in the ack msg, if the peer is vswitch.
4759 		 */
4760 		if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4761 		    PHYSLINK_UPDATE_STATE) &&
4762 		    ((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4763 		    PHYSLINK_UPDATE_STATE_ACK)) {
4764 			vgenp->pls_negotiated = B_TRUE;
4765 		} else {
4766 			vgenp->pls_negotiated = B_FALSE;
4767 		}
4768 	}
4769 
4770 	/*
4771 	 * Process MTU attribute.
4772 	 */
4773 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4774 		/*
4775 		 * Versions >= 1.4:
4776 		 * The ack msg sent by the peer contains the minimum of
4777 		 * our mtu (that we had sent in our attr info) and the
4778 		 * peer's mtu.
4779 		 *
4780 		 * If we have sent an ack for the attr info msg from
4781 		 * the peer, check if the mtu that was computed then
4782 		 * (saved in local hparams) matches the mtu that the
4783 		 * peer has ack'd. If they don't match, we fail the
4784 		 * handshake.
4785 		 */
4786 		if (ldcp->hstate & ATTR_ACK_SENT) {
4787 			if (lp->mtu != msg->mtu) {
4788 				return (VGEN_FAILURE);
4789 			}
4790 		} else {
4791 			/*
4792 			 * If the mtu ack'd by the peer is > our mtu
4793 			 * fail handshake. Otherwise, save the mtu, so
4794 			 * we can validate it when we receive attr info
4795 			 * from our peer.
4796 			 */
4797 			if (msg->mtu > lp->mtu) {
4798 				return (VGEN_FAILURE);
4799 			}
4800 			if (msg->mtu <= lp->mtu) {
4801 				lp->mtu = msg->mtu;
4802 			}
4803 		}
4804 	}
4805 
4806 	return (VGEN_SUCCESS);
4807 }
4808 
4809 
4810 /*
4811  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4812  * to an attr info msg that we sent.
4813  */
4814 static int
4815 vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4816 {
4817 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4818 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
4819 	int		rv = 0;
4820 
4821 	DBG1(vgenp, ldcp, "enter\n");
4822 	if (ldcp->hphase != VH_PHASE2) {
4823 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4824 		" Invalid Phase(%u)\n",
4825 		    tagp->vio_subtype, ldcp->hphase);
4826 		return (VGEN_FAILURE);
4827 	}
4828 	switch (tagp->vio_subtype) {
4829 	case VIO_SUBTYPE_INFO:
4830 
4831 		rv = vgen_handle_attr_info(ldcp, msg);
4832 		if (rv == VGEN_SUCCESS) {
4833 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4834 		} else {
4835 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4836 		}
4837 		tagp->vio_sid = ldcp->local_sid;
4838 
4839 		/* send reply msg back to peer */
4840 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4841 		    B_FALSE);
4842 		if (rv != VGEN_SUCCESS) {
4843 			return (rv);
4844 		}
4845 
4846 		if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
4847 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT");
4848 			break;
4849 		}
4850 
4851 		ldcp->hstate |= ATTR_ACK_SENT;
4852 		DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4853 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4854 			rv = vgen_handshake(vh_nextphase(ldcp));
4855 			if (rv != 0) {
4856 				return (rv);
4857 			}
4858 		}
4859 
4860 		break;
4861 
4862 	case VIO_SUBTYPE_ACK:
4863 
4864 		rv = vgen_handle_attr_ack(ldcp, msg);
4865 		if (rv == VGEN_FAILURE) {
4866 			break;
4867 		}
4868 
4869 		ldcp->hstate |= ATTR_ACK_RCVD;
4870 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4871 
4872 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4873 			rv = vgen_handshake(vh_nextphase(ldcp));
4874 			if (rv != 0) {
4875 				return (rv);
4876 			}
4877 		}
4878 		break;
4879 
4880 	case VIO_SUBTYPE_NACK:
4881 
4882 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4883 		return (VGEN_FAILURE);
4884 	}
4885 	DBG1(vgenp, ldcp, "exit\n");
4886 	return (VGEN_SUCCESS);
4887 }
4888 
4889 static int
4890 vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4891 {
4892 	int		rv = 0;
4893 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4894 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4895 
4896 	DBG2(vgenp, ldcp, "DRING_INFO_RCVD");
4897 	ldcp->hstate |= DRING_INFO_RCVD;
4898 
4899 	if (VGEN_VER_GTEQ(ldcp, 1, 6) &&
4900 	    (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) {
4901 		/*
4902 		 * The earlier version of Solaris vnet driver doesn't set the
4903 		 * option (VIO_TX_DRING in its case) correctly in its dring reg
4904 		 * message. We workaround that here by doing the check only
4905 		 * for versions >= v1.6.
4906 		 */
4907 		DWARN(vgenp, ldcp,
4908 		    "Rcvd dring reg option (%d), negotiated mode (%d)\n",
4909 		    ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode);
4910 		return (VGEN_FAILURE);
4911 	}
4912 
4913 	/*
4914 	 * Map dring exported by the peer.
4915 	 */
4916 	rv = vgen_map_dring(ldcp, (void *)tagp);
4917 	if (rv != VGEN_SUCCESS) {
4918 		return (rv);
4919 	}
4920 
4921 	/*
4922 	 * Map data buffers exported by the peer if we are in RxDringData mode.
4923 	 */
4924 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
4925 		rv = vgen_map_data(ldcp, (void *)tagp);
4926 		if (rv != VGEN_SUCCESS) {
4927 			vgen_unmap_dring(ldcp);
4928 			return (rv);
4929 		}
4930 	}
4931 
4932 	if (ldcp->peer_hparams.dring_ready == B_FALSE) {
4933 		ldcp->peer_hparams.dring_ready = B_TRUE;
4934 	}
4935 
4936 	return (VGEN_SUCCESS);
4937 }
4938 
4939 static int
4940 vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4941 {
4942 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4943 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4944 
4945 	DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
4946 	ldcp->hstate |= DRING_ACK_RCVD;
4947 
4948 	if (lp->dring_ready) {
4949 		return (VGEN_SUCCESS);
4950 	}
4951 
4952 	/* save dring_ident acked by peer */
4953 	lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident;
4954 
4955 	/* local dring is now ready */
4956 	lp->dring_ready = B_TRUE;
4957 
4958 	return (VGEN_SUCCESS);
4959 }
4960 
4961 /*
4962  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4963  * the peer to a dring register msg that we sent.
4964  */
4965 static int
4966 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4967 {
4968 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4969 	int		rv = 0;
4970 	int		msgsize;
4971 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4972 
4973 	DBG1(vgenp, ldcp, "enter\n");
4974 	if (ldcp->hphase < VH_PHASE2) {
4975 		/* dring_info can be rcvd in any of the phases after Phase1 */
4976 		DWARN(vgenp, ldcp,
4977 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4978 		    tagp->vio_subtype, ldcp->hphase);
4979 		return (VGEN_FAILURE);
4980 	}
4981 
4982 	switch (tagp->vio_subtype) {
4983 	case VIO_SUBTYPE_INFO:
4984 
4985 		rv = vgen_handle_dring_reg_info(ldcp, tagp);
4986 		if (rv == VGEN_SUCCESS) {
4987 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4988 		} else {
4989 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4990 		}
4991 
4992 		tagp->vio_sid = ldcp->local_sid;
4993 
4994 		if (lp->dring_mode == VIO_RX_DRING_DATA) {
4995 			msgsize =
4996 			    VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies);
4997 		} else {
4998 			msgsize = sizeof (vio_dring_reg_msg_t);
4999 		}
5000 
5001 		/* send reply msg back to peer */
5002 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize,
5003 		    B_FALSE);
5004 		if (rv != VGEN_SUCCESS) {
5005 			return (rv);
5006 		}
5007 
5008 		if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
5009 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5010 			return (VGEN_FAILURE);
5011 		}
5012 
5013 		ldcp->hstate |= DRING_ACK_SENT;
5014 		DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5015 
5016 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5017 			rv = vgen_handshake(vh_nextphase(ldcp));
5018 			if (rv != 0) {
5019 				return (rv);
5020 			}
5021 		}
5022 		break;
5023 
5024 	case VIO_SUBTYPE_ACK:
5025 
5026 		rv = vgen_handle_dring_reg_ack(ldcp, tagp);
5027 		if (rv == VGEN_FAILURE) {
5028 			return (rv);
5029 		}
5030 
5031 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5032 			rv = vgen_handshake(vh_nextphase(ldcp));
5033 			if (rv != 0) {
5034 				return (rv);
5035 			}
5036 		}
5037 
5038 		break;
5039 
5040 	case VIO_SUBTYPE_NACK:
5041 
5042 		DWARN(vgenp, ldcp, "DRING_NACK_RCVD");
5043 		return (VGEN_FAILURE);
5044 	}
5045 	DBG1(vgenp, ldcp, "exit\n");
5046 	return (VGEN_SUCCESS);
5047 }
5048 
5049 /*
5050  * Handle a rdx info msg from the peer or an ACK/NACK
5051  * from the peer to a rdx info msg that we sent.
5052  */
5053 static int
5054 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5055 {
5056 	int	rv = 0;
5057 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5058 
5059 	DBG1(vgenp, ldcp, "enter\n");
5060 	if (ldcp->hphase != VH_PHASE4) {
5061 		DWARN(vgenp, ldcp,
5062 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5063 		    tagp->vio_subtype, ldcp->hphase);
5064 		return (VGEN_FAILURE);
5065 	}
5066 	switch (tagp->vio_subtype) {
5067 	case VIO_SUBTYPE_INFO:
5068 
5069 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5070 		ldcp->hstate |= RDX_INFO_RCVD;
5071 
5072 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5073 		tagp->vio_sid = ldcp->local_sid;
5074 		/* send reply msg back to peer */
5075 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5076 		    B_FALSE);
5077 		if (rv != VGEN_SUCCESS) {
5078 			return (rv);
5079 		}
5080 
5081 		ldcp->hstate |= RDX_ACK_SENT;
5082 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5083 
5084 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5085 			rv = vgen_handshake(vh_nextphase(ldcp));
5086 			if (rv != 0) {
5087 				return (rv);
5088 			}
5089 		}
5090 
5091 		break;
5092 
5093 	case VIO_SUBTYPE_ACK:
5094 
5095 		ldcp->hstate |= RDX_ACK_RCVD;
5096 
5097 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5098 
5099 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5100 			rv = vgen_handshake(vh_nextphase(ldcp));
5101 			if (rv != 0) {
5102 				return (rv);
5103 			}
5104 		}
5105 		break;
5106 
5107 	case VIO_SUBTYPE_NACK:
5108 
5109 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5110 		return (VGEN_FAILURE);
5111 	}
5112 	DBG1(vgenp, ldcp, "exit\n");
5113 	return (VGEN_SUCCESS);
5114 }
5115 
5116 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5117 static int
5118 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5119 {
5120 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5121 	vnet_mcast_msg_t	*msgp = (vnet_mcast_msg_t *)tagp;
5122 	struct ether_addr	*addrp;
5123 	int			count;
5124 	int			i;
5125 
5126 	DBG1(vgenp, ldcp, "enter\n");
5127 	switch (tagp->vio_subtype) {
5128 
5129 	case VIO_SUBTYPE_INFO:
5130 
5131 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5132 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5133 		break;
5134 
5135 	case VIO_SUBTYPE_ACK:
5136 
5137 		/* success adding/removing multicast addr */
5138 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5139 		break;
5140 
5141 	case VIO_SUBTYPE_NACK:
5142 
5143 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5144 		if (!(msgp->set)) {
5145 			/* multicast remove request failed */
5146 			break;
5147 		}
5148 
5149 		/* multicast add request failed */
5150 		for (count = 0; count < msgp->count; count++) {
5151 			addrp = &(msgp->mca[count]);
5152 
5153 			/* delete address from the table */
5154 			for (i = 0; i < vgenp->mccount; i++) {
5155 				if (ether_cmp(addrp,
5156 				    &(vgenp->mctab[i])) == 0) {
5157 					if (vgenp->mccount > 1) {
5158 						int t = vgenp->mccount - 1;
5159 						vgenp->mctab[i] =
5160 						    vgenp->mctab[t];
5161 					}
5162 					vgenp->mccount--;
5163 					break;
5164 				}
5165 			}
5166 		}
5167 		break;
5168 
5169 	}
5170 	DBG1(vgenp, ldcp, "exit\n");
5171 
5172 	return (VGEN_SUCCESS);
5173 }
5174 
5175 /*
5176  * Physical link information message from the peer. Only vswitch should send
5177  * us this message; if the vnet device has been configured to get physical link
5178  * state updates. Note that we must have already negotiated this with the
5179  * vswitch during attribute exchange phase of handshake.
5180  */
5181 static int
5182 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5183 {
5184 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5185 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5186 	link_state_t		link_state;
5187 	int			rv;
5188 
5189 	if (ldcp->portp != vgenp->vsw_portp) {
5190 		/*
5191 		 * drop the message and don't process; as we should
5192 		 * receive physlink_info message from only vswitch.
5193 		 */
5194 		return (VGEN_SUCCESS);
5195 	}
5196 
5197 	if (vgenp->pls_negotiated == B_FALSE) {
5198 		/*
5199 		 * drop the message and don't process; as we should receive
5200 		 * physlink_info message only if physlink update is enabled for
5201 		 * the device and negotiated with vswitch.
5202 		 */
5203 		return (VGEN_SUCCESS);
5204 	}
5205 
5206 	switch (tagp->vio_subtype) {
5207 
5208 	case VIO_SUBTYPE_INFO:
5209 
5210 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5211 		    VNET_PHYSLINK_STATE_UP) {
5212 			link_state = LINK_STATE_UP;
5213 		} else {
5214 			link_state = LINK_STATE_DOWN;
5215 		}
5216 
5217 		if (vgenp->phys_link_state != link_state) {
5218 			vgenp->phys_link_state = link_state;
5219 			mutex_exit(&ldcp->cblock);
5220 
5221 			/* Now update the stack */
5222 			vgen_link_update(vgenp, link_state);
5223 
5224 			mutex_enter(&ldcp->cblock);
5225 		}
5226 
5227 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5228 		tagp->vio_sid = ldcp->local_sid;
5229 
5230 		/* send reply msg back to peer */
5231 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5232 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5233 		if (rv != VGEN_SUCCESS) {
5234 			return (rv);
5235 		}
5236 		break;
5237 
5238 	case VIO_SUBTYPE_ACK:
5239 
5240 		/* vnet shouldn't recv physlink acks */
5241 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5242 		break;
5243 
5244 	case VIO_SUBTYPE_NACK:
5245 
5246 		/* vnet shouldn't recv physlink nacks */
5247 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5248 		break;
5249 
5250 	}
5251 	DBG1(vgenp, ldcp, "exit\n");
5252 
5253 	return (VGEN_SUCCESS);
5254 }
5255 
5256 /* handler for control messages received from the peer ldc end-point */
5257 static int
5258 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5259 {
5260 	int	rv = 0;
5261 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5262 
5263 	DBG1(vgenp, ldcp, "enter\n");
5264 	switch (tagp->vio_subtype_env) {
5265 
5266 	case VIO_VER_INFO:
5267 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5268 		break;
5269 
5270 	case VIO_ATTR_INFO:
5271 		rv = vgen_handle_attr_msg(ldcp, tagp);
5272 		break;
5273 
5274 	case VIO_DRING_REG:
5275 		rv = vgen_handle_dring_reg(ldcp, tagp);
5276 		break;
5277 
5278 	case VIO_RDX:
5279 		rv = vgen_handle_rdx_info(ldcp, tagp);
5280 		break;
5281 
5282 	case VNET_MCAST_INFO:
5283 		rv = vgen_handle_mcast_info(ldcp, tagp);
5284 		break;
5285 
5286 	case VIO_DDS_INFO:
5287 		/*
5288 		 * If we are in the process of resetting the vswitch channel,
5289 		 * drop the dds message. A new handshake will be initiated
5290 		 * when the channel comes back up after the reset and dds
5291 		 * negotiation can then continue.
5292 		 */
5293 		if (ldcp->reset_in_progress == 1) {
5294 			break;
5295 		}
5296 		rv = vgen_dds_rx(ldcp, tagp);
5297 		break;
5298 
5299 	case VNET_PHYSLINK_INFO:
5300 		rv = vgen_handle_physlink_info(ldcp, tagp);
5301 		break;
5302 	}
5303 
5304 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5305 	return (rv);
5306 }
5307 
5308 /* handler for error messages received from the peer ldc end-point */
5309 static void
5310 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5311 {
5312 	_NOTE(ARGUNUSED(ldcp, tagp))
5313 }
5314 
5315 /*
5316  * This function handles raw pkt data messages received over the channel.
5317  * Currently, only priority-eth-type frames are received through this mechanism.
5318  * In this case, the frame(data) is present within the message itself which
5319  * is copied into an mblk before sending it up the stack.
5320  */
5321 void
5322 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5323 {
5324 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5325 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5326 	uint32_t		size;
5327 	mblk_t			*mp;
5328 	vio_mblk_t		*vmp;
5329 	vio_net_rx_cb_t		vrx_cb = NULL;
5330 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5331 	vgen_stats_t		*statsp = &ldcp->stats;
5332 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5333 	uint_t			dring_mode = lp->dring_mode;
5334 
5335 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5336 
5337 	mutex_exit(&ldcp->cblock);
5338 
5339 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5340 	if (size < ETHERMIN || size > lp->mtu) {
5341 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5342 		mutex_enter(&ldcp->cblock);
5343 		return;
5344 	}
5345 
5346 	vmp = vio_multipool_allocb(&ldcp->vmp, size);
5347 	if (vmp == NULL) {
5348 		mp = allocb(size, BPRI_MED);
5349 		if (mp == NULL) {
5350 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5351 			DWARN(vgenp, ldcp, "allocb failure, "
5352 			    "unable to process priority frame\n");
5353 			mutex_enter(&ldcp->cblock);
5354 			return;
5355 		}
5356 	} else {
5357 		mp = vmp->mp;
5358 	}
5359 
5360 	/* copy the frame from the payload of raw data msg into the mblk */
5361 	bcopy(pkt->data, mp->b_rptr, size);
5362 	mp->b_wptr = mp->b_rptr + size;
5363 
5364 	if (vmp != NULL) {
5365 		vmp->state = VIO_MBLK_HAS_DATA;
5366 	}
5367 
5368 	/* update stats */
5369 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5370 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5371 
5372 	/*
5373 	 * If polling is currently enabled, add the packet to the priority
5374 	 * packets list and return. It will be picked up by the polling thread.
5375 	 */
5376 	if (dring_mode == VIO_RX_DRING_DATA) {
5377 		mutex_enter(&ldcp->rxlock);
5378 	} else {
5379 		mutex_enter(&ldcp->pollq_lock);
5380 	}
5381 
5382 	if (ldcp->polling_on == B_TRUE) {
5383 		if (ldcp->rx_pri_tail != NULL) {
5384 			ldcp->rx_pri_tail->b_next = mp;
5385 		} else {
5386 			ldcp->rx_pri_head = ldcp->rx_pri_tail = mp;
5387 		}
5388 	} else {
5389 		vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5390 	}
5391 
5392 	if (dring_mode == VIO_RX_DRING_DATA) {
5393 		mutex_exit(&ldcp->rxlock);
5394 	} else {
5395 		mutex_exit(&ldcp->pollq_lock);
5396 	}
5397 
5398 	if (vrx_cb != NULL) {
5399 		vrx_cb(ldcp->portp->vhp, mp);
5400 	}
5401 
5402 	mutex_enter(&ldcp->cblock);
5403 }
5404 
5405 /*
5406  * dummy pkt data handler function for vnet protocol version 1.0
5407  */
5408 static void
5409 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5410 {
5411 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5412 }
5413 
5414 /* handler for data messages received from the peer ldc end-point */
5415 static int
5416 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5417 {
5418 	int		rv = 0;
5419 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5420 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5421 
5422 	DBG1(vgenp, ldcp, "enter\n");
5423 
5424 	if (ldcp->hphase != VH_DONE) {
5425 		return (0);
5426 	}
5427 
5428 	/*
5429 	 * We check the data msg seqnum. This is needed only in TxDring mode.
5430 	 */
5431 	if (lp->dring_mode == VIO_TX_DRING &&
5432 	    tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5433 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5434 		if (rv != 0) {
5435 			return (rv);
5436 		}
5437 	}
5438 
5439 	switch (tagp->vio_subtype_env) {
5440 	case VIO_DRING_DATA:
5441 		rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp);
5442 		break;
5443 
5444 	case VIO_PKT_DATA:
5445 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5446 		break;
5447 	default:
5448 		break;
5449 	}
5450 
5451 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5452 	return (rv);
5453 }
5454 
5455 
5456 static int
5457 vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
5458 {
5459 	int	rv;
5460 
5461 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5462 		ASSERT(MUTEX_HELD(&ldcp->cblock));
5463 	}
5464 
5465 	/* Set the flag to indicate reset is in progress */
5466 	if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
5467 		/* another thread is already in the process of resetting */
5468 		return (EBUSY);
5469 	}
5470 
5471 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5472 		mutex_exit(&ldcp->cblock);
5473 	}
5474 
5475 	rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET);
5476 
5477 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5478 		mutex_enter(&ldcp->cblock);
5479 	}
5480 
5481 	return (rv);
5482 }
5483 
5484 static void
5485 vgen_ldc_up(vgen_ldc_t *ldcp)
5486 {
5487 	int		rv;
5488 	uint32_t	retries = 0;
5489 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5490 
5491 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5492 
5493 	/*
5494 	 * If the channel has been reset max # of times, without successfully
5495 	 * completing handshake, stop and do not bring the channel up.
5496 	 */
5497 	if (ldcp->ldc_reset_count == vgen_ldc_max_resets) {
5498 		cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted"
5499 		    " handshake attempts (%d) on channel %ld",
5500 		    vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id);
5501 		return;
5502 	}
5503 	ldcp->ldc_reset_count++;
5504 
5505 	do {
5506 		rv = ldc_up(ldcp->ldc_handle);
5507 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
5508 			drv_usecwait(VGEN_LDC_UP_DELAY);
5509 		}
5510 		if (retries++ >= vgen_ldcup_retries)
5511 			break;
5512 	} while (rv == EWOULDBLOCK);
5513 
5514 	if (rv != 0) {
5515 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
5516 	}
5517 }
5518 
5519 int
5520 vgen_enable_intr(void *arg)
5521 {
5522 	uint32_t		end_ix;
5523 	vio_dring_msg_t		msg;
5524 	vgen_port_t		*portp = (vgen_port_t *)arg;
5525 	vgen_ldc_t		*ldcp = portp->ldcp;
5526 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5527 
5528 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5529 		mutex_enter(&ldcp->rxlock);
5530 
5531 		ldcp->polling_on = B_FALSE;
5532 		/*
5533 		 * We send a stopped message to peer (sender) as we are turning
5534 		 * off polled mode. This effectively restarts data interrupts
5535 		 * by allowing the peer to send further dring data msgs to us.
5536 		 */
5537 		end_ix = ldcp->next_rxi;
5538 		DECR_RXI(end_ix, ldcp);
5539 		msg.dring_ident = ldcp->peer_hparams.dring_ident;
5540 		(void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
5541 		    VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
5542 
5543 		mutex_exit(&ldcp->rxlock);
5544 	} else {
5545 		mutex_enter(&ldcp->pollq_lock);
5546 		ldcp->polling_on = B_FALSE;
5547 		mutex_exit(&ldcp->pollq_lock);
5548 	}
5549 
5550 	return (0);
5551 }
5552 
5553 int
5554 vgen_disable_intr(void *arg)
5555 {
5556 	vgen_port_t		*portp = (vgen_port_t *)arg;
5557 	vgen_ldc_t		*ldcp = portp->ldcp;
5558 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5559 
5560 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5561 		mutex_enter(&ldcp->rxlock);
5562 		ldcp->polling_on = B_TRUE;
5563 		mutex_exit(&ldcp->rxlock);
5564 	} else {
5565 		mutex_enter(&ldcp->pollq_lock);
5566 		ldcp->polling_on = B_TRUE;
5567 		mutex_exit(&ldcp->pollq_lock);
5568 	}
5569 
5570 	return (0);
5571 }
5572 
5573 mblk_t *
5574 vgen_rx_poll(void *arg, int bytes_to_pickup)
5575 {
5576 	vgen_port_t		*portp = (vgen_port_t *)arg;
5577 	vgen_ldc_t		*ldcp = portp->ldcp;
5578 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5579 	mblk_t			*mp = NULL;
5580 
5581 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5582 		mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup);
5583 	} else {
5584 		mp = vgen_poll_rcv(ldcp, bytes_to_pickup);
5585 	}
5586 
5587 	return (mp);
5588 }
5589 
5590 /* transmit watchdog timeout handler */
5591 static void
5592 vgen_tx_watchdog(void *arg)
5593 {
5594 	vgen_ldc_t	*ldcp;
5595 	vgen_t		*vgenp;
5596 	int		rv;
5597 	boolean_t	tx_blocked;
5598 	clock_t		tx_blocked_lbolt;
5599 
5600 	ldcp = (vgen_ldc_t *)arg;
5601 	vgenp = LDC_TO_VGEN(ldcp);
5602 
5603 	tx_blocked = ldcp->tx_blocked;
5604 	tx_blocked_lbolt = ldcp->tx_blocked_lbolt;
5605 
5606 	if (vgen_txwd_timeout &&
5607 	    (tx_blocked == B_TRUE) &&
5608 	    ((ddi_get_lbolt() - tx_blocked_lbolt) >
5609 	    drv_usectohz(vgen_txwd_timeout * 1000))) {
5610 		/*
5611 		 * Something is wrong; the peer is not picking up the packets
5612 		 * in the transmit dring. We now go ahead and reset the channel
5613 		 * to break out of this condition.
5614 		 */
5615 		DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), "
5616 		    "tx_blocked_lbolt(%lx)\n",
5617 		    ddi_get_lbolt(), tx_blocked_lbolt);
5618 
5619 #ifdef DEBUG
5620 		if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
5621 			/* tx timeout triggered for debugging */
5622 			vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT);
5623 		}
5624 #endif
5625 
5626 		/*
5627 		 * Clear tid before invoking vgen_ldc_reset(). Otherwise,
5628 		 * it will result in a deadlock when vgen_process_reset() tries
5629 		 * to untimeout() on seeing a non-zero tid, but it is being
5630 		 * invoked by the timer itself in this case.
5631 		 */
5632 		mutex_enter(&ldcp->cblock);
5633 		if (ldcp->wd_tid == 0) {
5634 			/* Cancelled by vgen_process_reset() */
5635 			mutex_exit(&ldcp->cblock);
5636 			return;
5637 		}
5638 		ldcp->wd_tid = 0;
5639 		mutex_exit(&ldcp->cblock);
5640 
5641 		/*
5642 		 * Now reset the channel.
5643 		 */
5644 		rv = vgen_ldc_reset(ldcp, VGEN_OTHER);
5645 		if (rv == 0) {
5646 			/*
5647 			 * We have successfully reset the channel. If we are
5648 			 * in tx flow controlled state, clear it now and enable
5649 			 * transmit in the upper layer.
5650 			 */
5651 			if (ldcp->tx_blocked) {
5652 				vio_net_tx_update_t vtx_update =
5653 				    ldcp->portp->vcb.vio_net_tx_update;
5654 
5655 				ldcp->tx_blocked = B_FALSE;
5656 				vtx_update(ldcp->portp->vhp);
5657 			}
5658 		}
5659 
5660 		/*
5661 		 * Channel has been reset by us or some other thread is already
5662 		 * in the process of resetting. In either case, we return
5663 		 * without restarting the timer. When handshake completes and
5664 		 * the channel is ready for data transmit/receive we start a
5665 		 * new watchdog timer.
5666 		 */
5667 		return;
5668 	}
5669 
5670 restart_timer:
5671 	/* Restart the timer */
5672 	mutex_enter(&ldcp->cblock);
5673 	if (ldcp->wd_tid == 0) {
5674 		/* Cancelled by vgen_process_reset() */
5675 		mutex_exit(&ldcp->cblock);
5676 		return;
5677 	}
5678 	ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
5679 	    drv_usectohz(vgen_txwd_interval * 1000));
5680 	mutex_exit(&ldcp->cblock);
5681 }
5682 
5683 /* Handshake watchdog timeout handler */
5684 static void
5685 vgen_hwatchdog(void *arg)
5686 {
5687 	vgen_ldc_t	*ldcp = (vgen_ldc_t *)arg;
5688 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5689 
5690 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
5691 	    ldcp->hphase, ldcp->hstate);
5692 
5693 	mutex_enter(&ldcp->cblock);
5694 	if (ldcp->htid == 0) {
5695 		/* Cancelled by vgen_process_reset() */
5696 		mutex_exit(&ldcp->cblock);
5697 		return;
5698 	}
5699 	ldcp->htid = 0;
5700 	mutex_exit(&ldcp->cblock);
5701 
5702 	/*
5703 	 * Something is wrong; handshake with the peer seems to be hung. We now
5704 	 * go ahead and reset the channel to break out of this condition.
5705 	 */
5706 	(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
5707 }
5708 
5709 /* Check if the session id in the received message is valid */
5710 static int
5711 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5712 {
5713 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5714 
5715 	if (tagp->vio_sid != ldcp->peer_sid) {
5716 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5717 		    ldcp->peer_sid, tagp->vio_sid);
5718 		return (VGEN_FAILURE);
5719 	}
5720 	else
5721 		return (VGEN_SUCCESS);
5722 }
5723 
5724 /*
5725  * Initialize the common part of dring registration
5726  * message; used in both TxDring and RxDringData modes.
5727  */
5728 static void
5729 vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
5730     uint8_t option)
5731 {
5732 	vio_msg_tag_t		*tagp;
5733 
5734 	tagp = &msg->tag;
5735 	tagp->vio_msgtype = VIO_TYPE_CTRL;
5736 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
5737 	tagp->vio_subtype_env = VIO_DRING_REG;
5738 	tagp->vio_sid = ldcp->local_sid;
5739 
5740 	/* get dring info msg payload from ldcp->local */
5741 	bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]),
5742 	    sizeof (ldc_mem_cookie_t));
5743 	msg->ncookies = ldcp->local_hparams.dring_ncookies;
5744 	msg->num_descriptors = ldcp->local_hparams.num_desc;
5745 	msg->descriptor_size = ldcp->local_hparams.desc_size;
5746 
5747 	msg->options = option;
5748 
5749 	/*
5750 	 * dring_ident is set to 0. After mapping the dring, peer sets this
5751 	 * value and sends it in the ack, which is saved in
5752 	 * vgen_handle_dring_reg().
5753 	 */
5754 	msg->dring_ident = 0;
5755 }
5756 
5757 static int
5758 vgen_mapin_avail(vgen_ldc_t *ldcp)
5759 {
5760 	int		rv;
5761 	ldc_info_t	info;
5762 	uint64_t	mapin_sz_req;
5763 	uint64_t	dblk_sz;
5764 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5765 
5766 	rv = ldc_info(ldcp->ldc_handle, &info);
5767 	if (rv != 0) {
5768 		return (B_FALSE);
5769 	}
5770 
5771 	dblk_sz = RXDRING_DBLK_SZ(vgenp->max_frame_size);
5772 	mapin_sz_req = (VGEN_RXDRING_NRBUFS * dblk_sz);
5773 
5774 	if (info.direct_map_size_max >= mapin_sz_req) {
5775 		return (B_TRUE);
5776 	}
5777 
5778 	return (B_FALSE);
5779 }
5780 
5781 #if DEBUG
5782 
5783 /*
5784  * Print debug messages - set to 0xf to enable all msgs
5785  */
5786 void
5787 vgen_debug_printf(const char *fname, vgen_t *vgenp,
5788     vgen_ldc_t *ldcp, const char *fmt, ...)
5789 {
5790 	char	buf[256];
5791 	char	*bufp = buf;
5792 	va_list	ap;
5793 
5794 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5795 		(void) sprintf(bufp, "vnet%d:",
5796 		    ((vnet_t *)(vgenp->vnetp))->instance);
5797 		bufp += strlen(bufp);
5798 	}
5799 	if (ldcp != NULL) {
5800 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5801 		bufp += strlen(bufp);
5802 	}
5803 	(void) sprintf(bufp, "%s: ", fname);
5804 	bufp += strlen(bufp);
5805 
5806 	va_start(ap, fmt);
5807 	(void) vsprintf(bufp, fmt, ap);
5808 	va_end(ap);
5809 
5810 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5811 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5812 		cmn_err(CE_CONT, "%s\n", buf);
5813 	}
5814 }
5815 #endif
5816 
5817 #ifdef	VNET_IOC_DEBUG
5818 
5819 static void
5820 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5821 {
5822 	struct iocblk	*iocp;
5823 	vgen_port_t	*portp;
5824 	enum		ioc_reply {
5825 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
5826 			IOC_ACK			/* OK, just send ACK    */
5827 	}		status;
5828 	int		rv;
5829 
5830 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
5831 	iocp->ioc_error = 0;
5832 	portp = (vgen_port_t *)arg;
5833 
5834 	if (portp == NULL) {
5835 		status = IOC_INVAL;
5836 		goto vgen_ioc_exit;
5837 	}
5838 
5839 	mutex_enter(&portp->lock);
5840 
5841 	switch (iocp->ioc_cmd) {
5842 
5843 	case VNET_FORCE_LINK_DOWN:
5844 	case VNET_FORCE_LINK_UP:
5845 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
5846 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
5847 		break;
5848 
5849 	default:
5850 		status = IOC_INVAL;
5851 		break;
5852 
5853 	}
5854 
5855 	mutex_exit(&portp->lock);
5856 
5857 vgen_ioc_exit:
5858 
5859 	switch (status) {
5860 	default:
5861 	case IOC_INVAL:
5862 		/* Error, reply with a NAK and EINVAL error */
5863 		miocnak(q, mp, 0, EINVAL);
5864 		break;
5865 	case IOC_ACK:
5866 		/* OK, reply with an ACK */
5867 		miocack(q, mp, 0, 0);
5868 		break;
5869 	}
5870 }
5871 
5872 static int
5873 vgen_force_link_state(vgen_port_t *portp, int cmd)
5874 {
5875 	ldc_status_t	istatus;
5876 	int		rv;
5877 	vgen_ldc_t	*ldcp = portp->ldcp;
5878 	vgen_t		*vgenp = portp->vgenp;
5879 
5880 	mutex_enter(&ldcp->cblock);
5881 
5882 	switch (cmd) {
5883 
5884 	case VNET_FORCE_LINK_DOWN:
5885 		(void) ldc_down(ldcp->ldc_handle);
5886 		ldcp->link_down_forced = B_TRUE;
5887 		break;
5888 
5889 	case VNET_FORCE_LINK_UP:
5890 		vgen_ldc_up(ldcp);
5891 		ldcp->link_down_forced = B_FALSE;
5892 
5893 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5894 			DWARN(vgenp, ldcp, "ldc_status err\n");
5895 		} else {
5896 			ldcp->ldc_status = istatus;
5897 		}
5898 
5899 		/* if channel is already UP - restart handshake */
5900 		if (ldcp->ldc_status == LDC_UP) {
5901 			vgen_handle_evt_up(ldcp);
5902 		}
5903 		break;
5904 
5905 	}
5906 
5907 	mutex_exit(&ldcp->cblock);
5908 
5909 	return (0);
5910 }
5911 
5912 #else
5913 
5914 static void
5915 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5916 {
5917 	vgen_port_t	*portp;
5918 
5919 	portp = (vgen_port_t *)arg;
5920 
5921 	if (portp == NULL) {
5922 		miocnak(q, mp, 0, EINVAL);
5923 		return;
5924 	}
5925 
5926 	miocnak(q, mp, 0, ENOTSUP);
5927 }
5928 
5929 #endif
5930