xref: /titanic_44/usr/src/uts/sun4v/io/vnet_gen.c (revision cedde6488e5f58449af69e4bfcb5b8c70a73a1cd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/machsystm.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac_provider.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/atomic.h>
60 #include <sys/callb.h>
61 #include <sys/sdt.h>
62 #include <sys/intr.h>
63 #include <sys/pattr.h>
64 #include <sys/vlan.h>
65 
66 /*
67  * Implementation of the mac provider functionality for vnet using the
68  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
69  */
70 
71 /* Entry Points */
72 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
73     const uint8_t *macaddr, void **vgenhdl);
74 int vgen_init_mdeg(void *arg);
75 void vgen_uninit(void *arg);
76 int vgen_dds_tx(void *arg, void *dmsg);
77 int vgen_enable_intr(void *arg);
78 int vgen_disable_intr(void *arg);
79 mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
80 static int vgen_start(void *arg);
81 static void vgen_stop(void *arg);
82 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
83 static int vgen_multicst(void *arg, boolean_t add,
84 	const uint8_t *mca);
85 static int vgen_promisc(void *arg, boolean_t on);
86 static int vgen_unicst(void *arg, const uint8_t *mca);
87 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
88 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
89 #ifdef	VNET_IOC_DEBUG
90 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
91 #endif
92 
93 /* Port/LDC Configuration */
94 static int vgen_read_mdprops(vgen_t *vgenp);
95 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
96 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
97 	mde_cookie_t node);
98 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
99 	uint32_t *mtu);
100 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
101 	boolean_t *pls);
102 static void vgen_detach_ports(vgen_t *vgenp);
103 static void vgen_port_detach(vgen_port_t *portp);
104 static void vgen_port_list_insert(vgen_port_t *portp);
105 static void vgen_port_list_remove(vgen_port_t *portp);
106 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
107 	int port_num);
108 static int vgen_mdeg_reg(vgen_t *vgenp);
109 static void vgen_mdeg_unreg(vgen_t *vgenp);
110 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
111 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
112 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
113 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
114 	mde_cookie_t mdex);
115 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
116 static int vgen_port_attach(vgen_port_t *portp);
117 static void vgen_port_detach_mdeg(vgen_port_t *portp);
118 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
119 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
120 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
121 static void vgen_port_reset(vgen_port_t *portp);
122 static void vgen_reset_vsw_port(vgen_t *vgenp);
123 static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
124 static void vgen_ldc_up(vgen_ldc_t *ldcp);
125 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
126 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
127 static void vgen_port_init(vgen_port_t *portp);
128 static void vgen_port_uninit(vgen_port_t *portp);
129 static int vgen_ldc_init(vgen_ldc_t *ldcp);
130 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
131 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
132 
133 /* I/O Processing */
134 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
135 static int vgen_ldcsend(void *arg, mblk_t *mp);
136 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
137 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
138 static void vgen_tx_watchdog(void *arg);
139 
140 /*  Dring Configuration */
141 static int vgen_create_dring(vgen_ldc_t *ldcp);
142 static void vgen_destroy_dring(vgen_ldc_t *ldcp);
143 static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt);
144 static void vgen_unmap_dring(vgen_ldc_t *ldcp);
145 
146 /* VIO Message Processing */
147 static int vgen_handshake(vgen_ldc_t *ldcp);
148 static int vgen_handshake_done(vgen_ldc_t *ldcp);
149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
150 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
151 static int vgen_handshake_phase3(vgen_ldc_t *ldcp);
152 static void vgen_setup_handshake_params(vgen_ldc_t *ldcp);
153 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
154 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
155 static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp);
156 static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp);
157 static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
158 	uint8_t option);
159 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
160 static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
161 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
162 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
163 	vio_msg_tag_t *tagp);
164 static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
166 static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
174 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
175 	uint32_t msglen);
176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
179 static int vgen_process_reset(vgen_ldc_t *ldcp, int flags);
180 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static void vgen_hwatchdog(void *arg);
182 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
183 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
184 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
185 
186 /* VLANs */
187 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
188 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
189 	uint16_t *nvidsp, uint16_t *default_idp);
190 static void vgen_vlan_create_hash(vgen_port_t *portp);
191 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
192 static void vgen_vlan_add_ids(vgen_port_t *portp);
193 static void vgen_vlan_remove_ids(vgen_port_t *portp);
194 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
195 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
196 	uint16_t *vidp);
197 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
198 	boolean_t is_tagged, uint16_t vid);
199 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
200 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
201 
202 /* Exported functions */
203 int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
204 int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
205 void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
206 void vgen_destroy_rxpools(void *arg);
207 
208 /* Externs */
209 extern void vnet_dds_rx(void *arg, void *dmsg);
210 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
211 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
212 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
213 extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
214     boolean_t caller_holds_lock);
215 extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp);
216 extern int vgen_create_tx_dring(vgen_ldc_t *ldcp);
217 extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp);
218 extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt);
219 extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp);
220 extern int vgen_create_rx_dring(vgen_ldc_t *ldcp);
221 extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
222 extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
223 extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
224 extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
225 extern int vgen_handle_dringdata_shm(void *arg1, void *arg2);
226 extern int vgen_handle_dringdata(void *arg1, void *arg2);
227 extern int vgen_dringsend_shm(void *arg, mblk_t *mp);
228 extern int vgen_dringsend(void *arg, mblk_t *mp);
229 extern void vgen_ldc_msg_worker(void *arg);
230 extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
231     uint32_t start, int32_t end, uint8_t pstate);
232 extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
233 extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup);
234 extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
235 
236 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
237 
238 #define	LDC_LOCK(ldcp)	\
239 				mutex_enter(&((ldcp)->cblock));\
240 				mutex_enter(&((ldcp)->rxlock));\
241 				mutex_enter(&((ldcp)->wrlock));\
242 				mutex_enter(&((ldcp)->txlock));\
243 				mutex_enter(&((ldcp)->tclock));
244 #define	LDC_UNLOCK(ldcp)	\
245 				mutex_exit(&((ldcp)->tclock));\
246 				mutex_exit(&((ldcp)->txlock));\
247 				mutex_exit(&((ldcp)->wrlock));\
248 				mutex_exit(&((ldcp)->rxlock));\
249 				mutex_exit(&((ldcp)->cblock));
250 
251 #define	VGEN_VER_EQ(ldcp, major, minor)	\
252 	((ldcp)->local_hparams.ver_major == (major) &&	\
253 	    (ldcp)->local_hparams.ver_minor == (minor))
254 
255 #define	VGEN_VER_LT(ldcp, major, minor)	\
256 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
257 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
258 	    (ldcp)->local_hparams.ver_minor < (minor)))
259 
260 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
261 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
262 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
263 	    (ldcp)->local_hparams.ver_minor >= (minor)))
264 
265 /*
266  * Property names
267  */
268 static char macaddr_propname[] = "mac-address";
269 static char rmacaddr_propname[] = "remote-mac-address";
270 static char channel_propname[] = "channel-endpoint";
271 static char reg_propname[] = "reg";
272 static char port_propname[] = "port";
273 static char swport_propname[] = "switch-port";
274 static char id_propname[] = "id";
275 static char vdev_propname[] = "virtual-device";
276 static char vnet_propname[] = "network";
277 static char pri_types_propname[] = "priority-ether-types";
278 static char vgen_pvid_propname[] = "port-vlan-id";
279 static char vgen_vid_propname[] = "vlan-id";
280 static char vgen_dvid_propname[] = "default-vlan-id";
281 static char port_pvid_propname[] = "remote-port-vlan-id";
282 static char port_vid_propname[] = "remote-vlan-id";
283 static char vgen_mtu_propname[] = "mtu";
284 static char vgen_linkprop_propname[] = "linkprop";
285 
286 /*
287  * VIO Protocol Version Info:
288  *
289  * The version specified below represents the version of protocol currently
290  * supported in the driver. It means the driver can negotiate with peers with
291  * versions <= this version. Here is a summary of the feature(s) that are
292  * supported at each version of the protocol:
293  *
294  * 1.0			Basic VIO protocol.
295  * 1.1			vDisk protocol update (no virtual network update).
296  * 1.2			Support for priority frames (priority-ether-types).
297  * 1.3			VLAN and HybridIO support.
298  * 1.4			Jumbo Frame support.
299  * 1.5			Link State Notification support with optional support
300  * 			for Physical Link information.
301  * 1.6			Support for RxDringData mode.
302  */
303 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 6} };
304 
305 /* Tunables */
306 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
307 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
308 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
309 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
310 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
311 uint32_t vgen_ldc_mtu = VGEN_LDC_MTU;		/* ldc mtu */
312 uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */
313 uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT;   /* tx timeout in msec */
314 
315 /*
316  * Max # of channel resets allowed during handshake.
317  */
318 uint32_t vgen_ldc_max_resets = 5;
319 
320 /*
321  * We provide a tunable to enable RxDringData mode for versions >= 1.6. By
322  * default, this tunable is set to 1 (VIO_TX_DRING). To enable RxDringData mode
323  * set this tunable to 4 (VIO_RX_DRING_DATA).
324  * See comments in vsw.c for details on the dring modes supported.
325  */
326 uint8_t  vgen_dring_mode = VIO_TX_DRING;
327 
328 /*
329  * In RxDringData mode, # of buffers is determined by multiplying the # of
330  * descriptors with the factor below. Note that the factor must be > 1; i.e,
331  * the # of buffers must always be > # of descriptors. This is needed because,
332  * while the shared memory buffers are sent up the stack on the receiver, the
333  * sender needs additional buffers that can be used for further transmits.
334  * See vgen_create_rx_dring() for details.
335  */
336 uint32_t vgen_nrbufs_factor = 2;
337 
338 /*
339  * Retry delay used while destroying rx mblk pools. Used in both Dring modes.
340  */
341 int vgen_rxpool_cleanup_delay = 100000;	/* 100ms */
342 
343 /*
344  * Delay when rx descr not ready; used in TxDring mode only.
345  */
346 uint32_t vgen_recv_delay = 1;
347 
348 /*
349  * Retry when rx descr not ready; used in TxDring mode only.
350  */
351 uint32_t vgen_recv_retries = 10;
352 
353 /*
354  * Max # of packets accumulated prior to sending them up. It is best
355  * to keep this at 60% of the number of receive buffers. Used in TxDring mode
356  * by the msg worker thread. Used in RxDringData mode while in interrupt mode
357  * (not used in polled mode).
358  */
359 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
360 
361 /*
362  * Internal tunables for receive buffer pools, that is,  the size and number of
363  * mblks for each pool. At least 3 sizes must be specified if these are used.
364  * The sizes must be specified in increasing order. Non-zero value of the first
365  * size will be used as a hint to use these values instead of the algorithm
366  * that determines the sizes based on MTU. Used in TxDring mode only.
367  */
368 uint32_t vgen_rbufsz1 = 0;
369 uint32_t vgen_rbufsz2 = 0;
370 uint32_t vgen_rbufsz3 = 0;
371 uint32_t vgen_rbufsz4 = 0;
372 
373 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
374 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
375 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
376 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
377 
378 /*
379  * In the absence of "priority-ether-types" property in MD, the following
380  * internal tunable can be set to specify a single priority ethertype.
381  */
382 uint64_t vgen_pri_eth_type = 0;
383 
384 /*
385  * Number of transmit priority buffers that are preallocated per device.
386  * This number is chosen to be a small value to throttle transmission
387  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
388  */
389 uint32_t vgen_pri_tx_nmblks = 64;
390 
391 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
392 
393 /*
394  * Matching criteria passed to the MDEG to register interest
395  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
396  * by their 'name' and 'cfg-handle' properties.
397  */
398 static md_prop_match_t vdev_prop_match[] = {
399 	{ MDET_PROP_STR,    "name"   },
400 	{ MDET_PROP_VAL,    "cfg-handle" },
401 	{ MDET_LIST_END,    NULL    }
402 };
403 
404 static mdeg_node_match_t vdev_match = { "virtual-device",
405 						vdev_prop_match };
406 
407 /* MD update matching structure */
408 static md_prop_match_t	vport_prop_match[] = {
409 	{ MDET_PROP_VAL,	"id" },
410 	{ MDET_LIST_END,	NULL }
411 };
412 
413 static mdeg_node_match_t vport_match = { "virtual-device-port",
414 					vport_prop_match };
415 
416 /* Template for matching a particular vnet instance */
417 static mdeg_prop_spec_t vgen_prop_template[] = {
418 	{ MDET_PROP_STR,	"name",		"network" },
419 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
420 	{ MDET_LIST_END,	NULL,		NULL }
421 };
422 
423 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
424 
425 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
426 
427 #ifdef	VNET_IOC_DEBUG
428 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
429 #else
430 #define	VGEN_M_CALLBACK_FLAGS	(0)
431 #endif
432 
433 static mac_callbacks_t vgen_m_callbacks = {
434 	VGEN_M_CALLBACK_FLAGS,
435 	vgen_stat,
436 	vgen_start,
437 	vgen_stop,
438 	vgen_promisc,
439 	vgen_multicst,
440 	vgen_unicst,
441 	vgen_tx,
442 	NULL,
443 	vgen_ioctl,
444 	NULL,
445 	NULL
446 };
447 
448 /* Externs */
449 extern pri_t	maxclsyspri;
450 extern proc_t	p0;
451 extern uint32_t	vnet_ethermtu;
452 extern uint16_t	vnet_default_vlan_id;
453 
454 #ifdef DEBUG
455 
456 #define	DEBUG_PRINTF	vgen_debug_printf
457 
458 extern int vnet_dbglevel;
459 
460 void vgen_debug_printf(const char *fname, vgen_t *vgenp,
461 	vgen_ldc_t *ldcp, const char *fmt, ...);
462 
463 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
464 int vgendbg_ldcid = -1;
465 
466 /* Flags to simulate error conditions for debugging */
467 int vgen_inject_err_flag = 0;
468 
469 
470 boolean_t
471 vgen_inject_error(vgen_ldc_t *ldcp, int error)
472 {
473 	if ((vgendbg_ldcid == ldcp->ldc_id) &&
474 	    (vgen_inject_err_flag & error)) {
475 		return (B_TRUE);
476 	}
477 	return (B_FALSE);
478 }
479 
480 #endif
481 
482 /*
483  * vgen_init() is called by an instance of vnet driver to initialize the
484  * corresponding generic transport layer. This layer uses Logical Domain
485  * Channels (LDCs) to communicate with the virtual switch in the service domain
486  * and also with peer vnets in other guest domains in the system.
487  *
488  * Arguments:
489  *   vnetp:   an opaque pointer to the vnet instance
490  *   regprop: frame to be transmitted
491  *   vnetdip: dip of the vnet device
492  *   macaddr: mac address of the vnet device
493  *
494  * Returns:
495  *	Sucess:  a handle to the vgen instance (vgen_t)
496  *	Failure: NULL
497  */
498 int
499 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
500     const uint8_t *macaddr, void **vgenhdl)
501 {
502 	vgen_t	*vgenp;
503 	int	instance;
504 	int	rv;
505 	char	qname[TASKQ_NAMELEN];
506 
507 	if ((vnetp == NULL) || (vnetdip == NULL))
508 		return (DDI_FAILURE);
509 
510 	instance = ddi_get_instance(vnetdip);
511 
512 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
513 
514 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
515 
516 	vgenp->vnetp = vnetp;
517 	vgenp->instance = instance;
518 	vgenp->regprop = regprop;
519 	vgenp->vnetdip = vnetdip;
520 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
521 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
522 
523 	/* allocate multicast table */
524 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
525 	    sizeof (struct ether_addr), KM_SLEEP);
526 	vgenp->mccount = 0;
527 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
528 
529 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
530 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
531 
532 	(void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
533 	    instance);
534 	if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1,
535 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
536 		cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue",
537 		    instance);
538 		goto vgen_init_fail;
539 	}
540 
541 	rv = vgen_read_mdprops(vgenp);
542 	if (rv != 0) {
543 		goto vgen_init_fail;
544 	}
545 	*vgenhdl = (void *)vgenp;
546 
547 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
548 	return (DDI_SUCCESS);
549 
550 vgen_init_fail:
551 	rw_destroy(&vgenp->vgenports.rwlock);
552 	mutex_destroy(&vgenp->lock);
553 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
554 	    sizeof (struct ether_addr));
555 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
556 		kmem_free(vgenp->pri_types,
557 		    sizeof (uint16_t) * vgenp->pri_num_types);
558 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
559 	}
560 	if (vgenp->rxp_taskq != NULL) {
561 		ddi_taskq_destroy(vgenp->rxp_taskq);
562 		vgenp->rxp_taskq = NULL;
563 	}
564 	KMEM_FREE(vgenp);
565 	return (DDI_FAILURE);
566 }
567 
568 int
569 vgen_init_mdeg(void *arg)
570 {
571 	vgen_t	*vgenp = (vgen_t *)arg;
572 
573 	/* register with MD event generator */
574 	return (vgen_mdeg_reg(vgenp));
575 }
576 
577 /*
578  * Called by vnet to undo the initializations done by vgen_init().
579  * The handle provided by generic transport during vgen_init() is the argument.
580  */
581 void
582 vgen_uninit(void *arg)
583 {
584 	vgen_t	*vgenp = (vgen_t *)arg;
585 
586 	if (vgenp == NULL) {
587 		return;
588 	}
589 
590 	DBG1(vgenp, NULL, "enter\n");
591 
592 	/* Unregister with MD event generator */
593 	vgen_mdeg_unreg(vgenp);
594 
595 	mutex_enter(&vgenp->lock);
596 
597 	/*
598 	 * Detach all ports from the device; note that the device should have
599 	 * been unplumbed by this time (See vnet_unattach() for the sequence)
600 	 * and thus vgen_stop() has already been invoked on all the ports.
601 	 */
602 	vgen_detach_ports(vgenp);
603 
604 	/*
605 	 * We now destroy the taskq used to clean up rx mblk pools that
606 	 * couldn't be destroyed when the ports/channels were detached.
607 	 * We implicitly wait for those tasks to complete in
608 	 * ddi_taskq_destroy().
609 	 */
610 	if (vgenp->rxp_taskq != NULL) {
611 		ddi_taskq_destroy(vgenp->rxp_taskq);
612 		vgenp->rxp_taskq = NULL;
613 	}
614 
615 	/* Free multicast table */
616 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
617 
618 	/* Free pri_types table */
619 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
620 		kmem_free(vgenp->pri_types,
621 		    sizeof (uint16_t) * vgenp->pri_num_types);
622 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
623 	}
624 
625 	mutex_exit(&vgenp->lock);
626 	rw_destroy(&vgenp->vgenports.rwlock);
627 	mutex_destroy(&vgenp->lock);
628 
629 	DBG1(vgenp, NULL, "exit\n");
630 	KMEM_FREE(vgenp);
631 }
632 
633 /* enable transmit/receive for the device */
634 int
635 vgen_start(void *arg)
636 {
637 	vgen_port_t	*portp = (vgen_port_t *)arg;
638 	vgen_t		*vgenp = portp->vgenp;
639 
640 	DBG1(vgenp, NULL, "enter\n");
641 	mutex_enter(&portp->lock);
642 	vgen_port_init(portp);
643 	portp->flags |= VGEN_STARTED;
644 	mutex_exit(&portp->lock);
645 	DBG1(vgenp, NULL, "exit\n");
646 
647 	return (DDI_SUCCESS);
648 }
649 
650 /* stop transmit/receive */
651 void
652 vgen_stop(void *arg)
653 {
654 	vgen_port_t	*portp = (vgen_port_t *)arg;
655 	vgen_t		*vgenp = portp->vgenp;
656 
657 	DBG1(vgenp, NULL, "enter\n");
658 
659 	mutex_enter(&portp->lock);
660 	if (portp->flags & VGEN_STARTED) {
661 		vgen_port_uninit(portp);
662 		portp->flags &= ~(VGEN_STARTED);
663 	}
664 	mutex_exit(&portp->lock);
665 	DBG1(vgenp, NULL, "exit\n");
666 
667 }
668 
669 /* vgen transmit function */
670 static mblk_t *
671 vgen_tx(void *arg, mblk_t *mp)
672 {
673 	vgen_port_t	*portp;
674 	int		status;
675 
676 	portp = (vgen_port_t *)arg;
677 	status = vgen_portsend(portp, mp);
678 	if (status != VGEN_SUCCESS) {
679 		/* failure */
680 		return (mp);
681 	}
682 	/* success */
683 	return (NULL);
684 }
685 
686 /*
687  * This function provides any necessary tagging/untagging of the frames
688  * that are being transmitted over the port. It first verifies the vlan
689  * membership of the destination(port) and drops the packet if the
690  * destination doesn't belong to the given vlan.
691  *
692  * Arguments:
693  *   portp:     port over which the frames should be transmitted
694  *   mp:        frame to be transmitted
695  *   is_tagged:
696  *              B_TRUE: indicates frame header contains the vlan tag already.
697  *              B_FALSE: indicates frame is untagged.
698  *   vid:       vlan in which the frame should be transmitted.
699  *
700  * Returns:
701  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
702  *              Failure: NULL
703  */
704 static mblk_t *
705 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
706 	uint16_t vid)
707 {
708 	vgen_t		*vgenp;
709 	boolean_t	dst_tagged;
710 	int		rv;
711 
712 	vgenp = portp->vgenp;
713 
714 	/*
715 	 * If the packet is going to a vnet:
716 	 *   Check if the destination vnet is in the same vlan.
717 	 *   Check the frame header if tag or untag is needed.
718 	 *
719 	 * We do not check the above conditions if the packet is going to vsw:
720 	 *   vsw must be present implicitly in all the vlans that a vnet device
721 	 *   is configured into; even if vsw itself is not assigned to those
722 	 *   vlans as an interface. For instance, the packet might be destined
723 	 *   to another vnet(indirectly through vsw) or to an external host
724 	 *   which is in the same vlan as this vnet and vsw itself may not be
725 	 *   present in that vlan. Similarly packets going to vsw must be
726 	 *   always tagged(unless in the default-vlan) if not already tagged,
727 	 *   as we do not know the final destination. This is needed because
728 	 *   vsw must always invoke its switching function only after tagging
729 	 *   the packet; otherwise after switching function determines the
730 	 *   destination we cannot figure out if the destination belongs to the
731 	 *   the same vlan that the frame originated from and if it needs tag/
732 	 *   untag. Note that vsw will tag the packet itself when it receives
733 	 *   it over the channel from a client if needed. However, that is
734 	 *   needed only in the case of vlan unaware clients such as obp or
735 	 *   earlier versions of vnet.
736 	 *
737 	 */
738 	if (portp != vgenp->vsw_portp) {
739 		/*
740 		 * Packet going to a vnet. Check if the destination vnet is in
741 		 * the same vlan. Then check the frame header if tag/untag is
742 		 * needed.
743 		 */
744 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
745 		if (rv == B_FALSE) {
746 			/* drop the packet */
747 			freemsg(mp);
748 			return (NULL);
749 		}
750 
751 		/* is the destination tagged or untagged in this vlan? */
752 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
753 		    (dst_tagged = B_TRUE);
754 
755 		if (is_tagged == dst_tagged) {
756 			/* no tagging/untagging needed */
757 			return (mp);
758 		}
759 
760 		if (is_tagged == B_TRUE) {
761 			/* frame is tagged; destination needs untagged */
762 			mp = vnet_vlan_remove_tag(mp);
763 			return (mp);
764 		}
765 
766 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
767 	}
768 
769 	/*
770 	 * Packet going to a vnet needs tagging.
771 	 * OR
772 	 * If the packet is going to vsw, then it must be tagged in all cases:
773 	 * unknown unicast, broadcast/multicast or to vsw interface.
774 	 */
775 
776 	if (is_tagged == B_FALSE) {
777 		mp = vnet_vlan_insert_tag(mp, vid);
778 	}
779 
780 	return (mp);
781 }
782 
783 /* transmit packets over the given port */
784 static int
785 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
786 {
787 	vgen_ldc_t		*ldcp;
788 	int			status;
789 	int			rv = VGEN_SUCCESS;
790 	vgen_t			*vgenp = portp->vgenp;
791 	vnet_t			*vnetp = vgenp->vnetp;
792 	boolean_t		is_tagged;
793 	boolean_t		dec_refcnt = B_FALSE;
794 	uint16_t		vlan_id;
795 	struct ether_header	*ehp;
796 
797 	if (portp == NULL) {
798 		return (VGEN_FAILURE);
799 	}
800 
801 	if (portp->use_vsw_port) {
802 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
803 		portp = portp->vgenp->vsw_portp;
804 		ASSERT(portp != NULL);
805 		dec_refcnt = B_TRUE;
806 	}
807 
808 	/*
809 	 * Determine the vlan id that the frame belongs to.
810 	 */
811 	ehp = (struct ether_header *)mp->b_rptr;
812 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
813 
814 	if (vlan_id == vnetp->default_vlan_id) {
815 
816 		/* Frames in default vlan must be untagged */
817 		ASSERT(is_tagged == B_FALSE);
818 
819 		/*
820 		 * If the destination is a vnet-port verify it belongs to the
821 		 * default vlan; otherwise drop the packet. We do not need
822 		 * this check for vsw-port, as it should implicitly belong to
823 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
824 		 */
825 		if (portp != vgenp->vsw_portp &&
826 		    portp->pvid != vnetp->default_vlan_id) {
827 			freemsg(mp);
828 			goto portsend_ret;
829 		}
830 
831 	} else {	/* frame not in default-vlan */
832 
833 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
834 		if (mp == NULL) {
835 			goto portsend_ret;
836 		}
837 
838 	}
839 
840 	ldcp = portp->ldcp;
841 	status = ldcp->tx(ldcp, mp);
842 
843 	if (status != VGEN_TX_SUCCESS) {
844 		rv = VGEN_FAILURE;
845 	}
846 
847 portsend_ret:
848 	if (dec_refcnt == B_TRUE) {
849 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
850 	}
851 	return (rv);
852 }
853 
854 /*
855  * Wrapper function to transmit normal and/or priority frames over the channel.
856  */
857 static int
858 vgen_ldcsend(void *arg, mblk_t *mp)
859 {
860 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
861 	int			status;
862 	struct ether_header	*ehp;
863 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
864 	uint32_t		num_types;
865 	uint16_t		*types;
866 	int			i;
867 
868 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
869 
870 	num_types = vgenp->pri_num_types;
871 	types = vgenp->pri_types;
872 	ehp = (struct ether_header *)mp->b_rptr;
873 
874 	for (i = 0; i < num_types; i++) {
875 
876 		if (ehp->ether_type == types[i]) {
877 			/* priority frame, use pri tx function */
878 			vgen_ldcsend_pkt(ldcp, mp);
879 			return (VGEN_SUCCESS);
880 		}
881 
882 	}
883 
884 	if (ldcp->tx_dringdata == NULL) {
885 		freemsg(mp);
886 		return (VGEN_SUCCESS);
887 	}
888 
889 	status  = ldcp->tx_dringdata(ldcp, mp);
890 	return (status);
891 }
892 
893 /*
894  * This function transmits the frame in the payload of a raw data
895  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
896  * send special frames with high priorities, without going through
897  * the normal data path which uses descriptor ring mechanism.
898  */
899 static void
900 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
901 {
902 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
903 	vio_raw_data_msg_t	*pkt;
904 	mblk_t			*bp;
905 	mblk_t			*nmp = NULL;
906 	vio_mblk_t		*vmp;
907 	caddr_t			dst;
908 	uint32_t		mblksz;
909 	uint32_t		size;
910 	uint32_t		nbytes;
911 	int			rv;
912 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
913 	vgen_stats_t		*statsp = &ldcp->stats;
914 
915 	/* drop the packet if ldc is not up or handshake is not done */
916 	if (ldcp->ldc_status != LDC_UP) {
917 		(void) atomic_inc_32(&statsp->tx_pri_fail);
918 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
919 		    ldcp->ldc_status);
920 		goto send_pkt_exit;
921 	}
922 
923 	if (ldcp->hphase != VH_DONE) {
924 		(void) atomic_inc_32(&statsp->tx_pri_fail);
925 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
926 		    ldcp->hphase);
927 		goto send_pkt_exit;
928 	}
929 
930 	size = msgsize(mp);
931 
932 	/* frame size bigger than available payload len of raw data msg ? */
933 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
934 		(void) atomic_inc_32(&statsp->tx_pri_fail);
935 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
936 		goto send_pkt_exit;
937 	}
938 
939 	if (size < ETHERMIN)
940 		size = ETHERMIN;
941 
942 	/* alloc space for a raw data message */
943 	vmp = vio_allocb(vgenp->pri_tx_vmp);
944 	if (vmp == NULL) {
945 		(void) atomic_inc_32(&statsp->tx_pri_fail);
946 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
947 		goto send_pkt_exit;
948 	} else {
949 		nmp = vmp->mp;
950 	}
951 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
952 
953 	/* copy frame into the payload of raw data message */
954 	dst = (caddr_t)pkt->data;
955 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
956 		mblksz = MBLKL(bp);
957 		bcopy(bp->b_rptr, dst, mblksz);
958 		dst += mblksz;
959 	}
960 
961 	vmp->state = VIO_MBLK_HAS_DATA;
962 
963 	/* setup the raw data msg */
964 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
965 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
966 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
967 	pkt->tag.vio_sid = ldcp->local_sid;
968 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
969 
970 	/* send the msg over ldc */
971 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
972 	if (rv != VGEN_SUCCESS) {
973 		(void) atomic_inc_32(&statsp->tx_pri_fail);
974 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
975 		if (rv == ECONNRESET) {
976 			(void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
977 		}
978 		goto send_pkt_exit;
979 	}
980 
981 	/* update stats */
982 	(void) atomic_inc_64(&statsp->tx_pri_packets);
983 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
984 
985 send_pkt_exit:
986 	if (nmp != NULL)
987 		freemsg(nmp);
988 	freemsg(mp);
989 }
990 
991 /*
992  * enable/disable a multicast address
993  * note that the cblock of the ldc channel connected to the vsw is used for
994  * synchronization of the mctab.
995  */
996 int
997 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
998 {
999 	vgen_t			*vgenp;
1000 	vnet_mcast_msg_t	mcastmsg;
1001 	vio_msg_tag_t		*tagp;
1002 	vgen_port_t		*portp;
1003 	vgen_ldc_t		*ldcp;
1004 	struct ether_addr	*addrp;
1005 	int			rv = DDI_FAILURE;
1006 	uint32_t		i;
1007 
1008 	portp = (vgen_port_t *)arg;
1009 	vgenp = portp->vgenp;
1010 
1011 	if (portp->is_vsw_port != B_TRUE) {
1012 		return (DDI_SUCCESS);
1013 	}
1014 
1015 	addrp = (struct ether_addr *)mca;
1016 	tagp = &mcastmsg.tag;
1017 	bzero(&mcastmsg, sizeof (mcastmsg));
1018 
1019 	ldcp = portp->ldcp;
1020 	if (ldcp == NULL) {
1021 		return (DDI_FAILURE);
1022 	}
1023 
1024 	mutex_enter(&ldcp->cblock);
1025 
1026 	if (ldcp->hphase == VH_DONE) {
1027 		/*
1028 		 * If handshake is done, send a msg to vsw to add/remove
1029 		 * the multicast address. Otherwise, we just update this
1030 		 * mcast address in our table and the table will be sync'd
1031 		 * with vsw when handshake completes.
1032 		 */
1033 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1034 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1035 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1036 		tagp->vio_sid = ldcp->local_sid;
1037 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1038 		mcastmsg.set = add;
1039 		mcastmsg.count = 1;
1040 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1041 		    B_FALSE) != VGEN_SUCCESS) {
1042 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1043 			rv = DDI_FAILURE;
1044 			goto vgen_mcast_exit;
1045 		}
1046 	}
1047 
1048 	if (add) {
1049 
1050 		/* expand multicast table if necessary */
1051 		if (vgenp->mccount >= vgenp->mcsize) {
1052 			struct ether_addr	*newtab;
1053 			uint32_t		newsize;
1054 
1055 
1056 			newsize = vgenp->mcsize * 2;
1057 
1058 			newtab = kmem_zalloc(newsize *
1059 			    sizeof (struct ether_addr), KM_NOSLEEP);
1060 			if (newtab == NULL)
1061 				goto vgen_mcast_exit;
1062 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1063 			    sizeof (struct ether_addr));
1064 			kmem_free(vgenp->mctab,
1065 			    vgenp->mcsize * sizeof (struct ether_addr));
1066 
1067 			vgenp->mctab = newtab;
1068 			vgenp->mcsize = newsize;
1069 		}
1070 
1071 		/* add address to the table */
1072 		vgenp->mctab[vgenp->mccount++] = *addrp;
1073 
1074 	} else {
1075 
1076 		/* delete address from the table */
1077 		for (i = 0; i < vgenp->mccount; i++) {
1078 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1079 
1080 				/*
1081 				 * If there's more than one address in this
1082 				 * table, delete the unwanted one by moving
1083 				 * the last one in the list over top of it;
1084 				 * otherwise, just remove it.
1085 				 */
1086 				if (vgenp->mccount > 1) {
1087 					vgenp->mctab[i] =
1088 					    vgenp->mctab[vgenp->mccount-1];
1089 				}
1090 				vgenp->mccount--;
1091 				break;
1092 			}
1093 		}
1094 	}
1095 
1096 	rv = DDI_SUCCESS;
1097 
1098 vgen_mcast_exit:
1099 
1100 	mutex_exit(&ldcp->cblock);
1101 	return (rv);
1102 }
1103 
1104 /* set or clear promiscuous mode on the device */
1105 static int
1106 vgen_promisc(void *arg, boolean_t on)
1107 {
1108 	_NOTE(ARGUNUSED(arg, on))
1109 	return (DDI_SUCCESS);
1110 }
1111 
1112 /* set the unicast mac address of the device */
1113 static int
1114 vgen_unicst(void *arg, const uint8_t *mca)
1115 {
1116 	_NOTE(ARGUNUSED(arg, mca))
1117 	return (DDI_SUCCESS);
1118 }
1119 
1120 /* get device statistics */
1121 int
1122 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1123 {
1124 	vgen_port_t	*portp = (vgen_port_t *)arg;
1125 
1126 	*val = vgen_port_stat(portp, stat);
1127 	return (0);
1128 }
1129 
1130 /* vgen internal functions */
1131 /* detach all ports from the device */
1132 static void
1133 vgen_detach_ports(vgen_t *vgenp)
1134 {
1135 	vgen_port_t	*portp;
1136 	vgen_portlist_t	*plistp;
1137 
1138 	plistp = &(vgenp->vgenports);
1139 	WRITE_ENTER(&plistp->rwlock);
1140 	while ((portp = plistp->headp) != NULL) {
1141 		vgen_port_detach(portp);
1142 	}
1143 	RW_EXIT(&plistp->rwlock);
1144 }
1145 
1146 /*
1147  * detach the given port.
1148  */
1149 static void
1150 vgen_port_detach(vgen_port_t *portp)
1151 {
1152 	vgen_t		*vgenp;
1153 	int		port_num;
1154 
1155 	vgenp = portp->vgenp;
1156 	port_num = portp->port_num;
1157 
1158 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1159 
1160 	/*
1161 	 * If this port is connected to the vswitch, then
1162 	 * potentially there could be ports that may be using
1163 	 * this port to transmit packets. To address this do
1164 	 * the following:
1165 	 *	- First set vgenp->vsw_portp to NULL, so that
1166 	 *	  its not used after that.
1167 	 *	- Then wait for the refcnt to go down to 0.
1168 	 *	- Now we can safely detach this port.
1169 	 */
1170 	if (vgenp->vsw_portp == portp) {
1171 		vgenp->vsw_portp = NULL;
1172 		while (vgenp->vsw_port_refcnt > 0) {
1173 			delay(drv_usectohz(vgen_tx_delay));
1174 		}
1175 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1176 	}
1177 
1178 	if (portp->vhp != NULL) {
1179 		vio_net_resource_unreg(portp->vhp);
1180 		portp->vhp = NULL;
1181 	}
1182 
1183 	vgen_vlan_destroy_hash(portp);
1184 
1185 	/* remove it from port list */
1186 	vgen_port_list_remove(portp);
1187 
1188 	/* detach channels from this port */
1189 	vgen_ldc_detach(portp->ldcp);
1190 
1191 	if (portp->num_ldcs != 0) {
1192 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1193 		portp->num_ldcs = 0;
1194 	}
1195 
1196 	mutex_destroy(&portp->lock);
1197 	KMEM_FREE(portp);
1198 
1199 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1200 }
1201 
1202 /* add a port to port list */
1203 static void
1204 vgen_port_list_insert(vgen_port_t *portp)
1205 {
1206 	vgen_portlist_t	*plistp;
1207 	vgen_t		*vgenp;
1208 
1209 	vgenp = portp->vgenp;
1210 	plistp = &(vgenp->vgenports);
1211 
1212 	if (plistp->headp == NULL) {
1213 		plistp->headp = portp;
1214 	} else {
1215 		plistp->tailp->nextp = portp;
1216 	}
1217 	plistp->tailp = portp;
1218 	portp->nextp = NULL;
1219 }
1220 
1221 /* remove a port from port list */
1222 static void
1223 vgen_port_list_remove(vgen_port_t *portp)
1224 {
1225 	vgen_port_t	*prevp;
1226 	vgen_port_t	*nextp;
1227 	vgen_portlist_t	*plistp;
1228 	vgen_t		*vgenp;
1229 
1230 	vgenp = portp->vgenp;
1231 
1232 	plistp = &(vgenp->vgenports);
1233 
1234 	if (plistp->headp == NULL)
1235 		return;
1236 
1237 	if (portp == plistp->headp) {
1238 		plistp->headp = portp->nextp;
1239 		if (portp == plistp->tailp)
1240 			plistp->tailp = plistp->headp;
1241 	} else {
1242 		for (prevp = plistp->headp;
1243 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1244 		    prevp = nextp)
1245 			;
1246 		if (nextp == portp) {
1247 			prevp->nextp = portp->nextp;
1248 		}
1249 		if (portp == plistp->tailp)
1250 			plistp->tailp = prevp;
1251 	}
1252 }
1253 
1254 /* lookup a port in the list based on port_num */
1255 static vgen_port_t *
1256 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1257 {
1258 	vgen_port_t *portp = NULL;
1259 
1260 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1261 		if (portp->port_num == port_num) {
1262 			break;
1263 		}
1264 	}
1265 
1266 	return (portp);
1267 }
1268 
1269 static void
1270 vgen_port_init(vgen_port_t *portp)
1271 {
1272 	/* Add the port to the specified vlans */
1273 	vgen_vlan_add_ids(portp);
1274 
1275 	/* Bring up the channel */
1276 	(void) vgen_ldc_init(portp->ldcp);
1277 }
1278 
1279 static void
1280 vgen_port_uninit(vgen_port_t *portp)
1281 {
1282 	vgen_ldc_uninit(portp->ldcp);
1283 
1284 	/* remove the port from vlans it has been assigned to */
1285 	vgen_vlan_remove_ids(portp);
1286 }
1287 
1288 /*
1289  * Scan the machine description for this instance of vnet
1290  * and read its properties. Called only from vgen_init().
1291  * Returns: 0 on success, 1 on failure.
1292  */
1293 static int
1294 vgen_read_mdprops(vgen_t *vgenp)
1295 {
1296 	vnet_t		*vnetp = vgenp->vnetp;
1297 	md_t		*mdp = NULL;
1298 	mde_cookie_t	rootnode;
1299 	mde_cookie_t	*listp = NULL;
1300 	uint64_t	cfgh;
1301 	char		*name;
1302 	int		rv = 1;
1303 	int		num_nodes = 0;
1304 	int		num_devs = 0;
1305 	int		listsz = 0;
1306 	int		i;
1307 
1308 	if ((mdp = md_get_handle()) == NULL) {
1309 		return (rv);
1310 	}
1311 
1312 	num_nodes = md_node_count(mdp);
1313 	ASSERT(num_nodes > 0);
1314 
1315 	listsz = num_nodes * sizeof (mde_cookie_t);
1316 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1317 
1318 	rootnode = md_root_node(mdp);
1319 
1320 	/* search for all "virtual_device" nodes */
1321 	num_devs = md_scan_dag(mdp, rootnode,
1322 	    md_find_name(mdp, vdev_propname),
1323 	    md_find_name(mdp, "fwd"), listp);
1324 	if (num_devs <= 0) {
1325 		goto vgen_readmd_exit;
1326 	}
1327 
1328 	/*
1329 	 * Now loop through the list of virtual-devices looking for
1330 	 * devices with name "network" and for each such device compare
1331 	 * its instance with what we have from the 'reg' property to
1332 	 * find the right node in MD and then read all its properties.
1333 	 */
1334 	for (i = 0; i < num_devs; i++) {
1335 
1336 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1337 			goto vgen_readmd_exit;
1338 		}
1339 
1340 		/* is this a "network" device? */
1341 		if (strcmp(name, vnet_propname) != 0)
1342 			continue;
1343 
1344 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1345 			goto vgen_readmd_exit;
1346 		}
1347 
1348 		/* is this the required instance of vnet? */
1349 		if (vgenp->regprop != cfgh)
1350 			continue;
1351 
1352 		/*
1353 		 * Read the 'linkprop' property to know if this vnet
1354 		 * device should get physical link updates from vswitch.
1355 		 */
1356 		vgen_linkprop_read(vgenp, mdp, listp[i],
1357 		    &vnetp->pls_update);
1358 
1359 		/*
1360 		 * Read the mtu. Note that we set the mtu of vnet device within
1361 		 * this routine itself, after validating the range.
1362 		 */
1363 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1364 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1365 			vnetp->mtu = ETHERMTU;
1366 		}
1367 		vgenp->max_frame_size = vnetp->mtu +
1368 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1369 
1370 		/* read priority ether types */
1371 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1372 
1373 		/* read vlan id properties of this vnet instance */
1374 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1375 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1376 		    &vnetp->default_vlan_id);
1377 
1378 		rv = 0;
1379 		break;
1380 	}
1381 
1382 vgen_readmd_exit:
1383 
1384 	kmem_free(listp, listsz);
1385 	(void) md_fini_handle(mdp);
1386 	return (rv);
1387 }
1388 
1389 /*
1390  * Read vlan id properties of the given MD node.
1391  * Arguments:
1392  *   arg:          device argument(vnet device or a port)
1393  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1394  *   mdp:          machine description
1395  *   node:         md node cookie
1396  *
1397  * Returns:
1398  *   pvidp:        port-vlan-id of the node
1399  *   vidspp:       list of vlan-ids of the node
1400  *   nvidsp:       # of vlan-ids in the list
1401  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1402  */
1403 static void
1404 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1405 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1406 	uint16_t *default_idp)
1407 {
1408 	vgen_t		*vgenp;
1409 	vnet_t		*vnetp;
1410 	vgen_port_t	*portp;
1411 	char		*pvid_propname;
1412 	char		*vid_propname;
1413 	uint_t		nvids;
1414 	uint32_t	vids_size;
1415 	int		rv;
1416 	int		i;
1417 	uint64_t	*data;
1418 	uint64_t	val;
1419 	int		size;
1420 	int		inst;
1421 
1422 	if (type == VGEN_LOCAL) {
1423 
1424 		vgenp = (vgen_t *)arg;
1425 		vnetp = vgenp->vnetp;
1426 		pvid_propname = vgen_pvid_propname;
1427 		vid_propname = vgen_vid_propname;
1428 		inst = vnetp->instance;
1429 
1430 	} else if (type == VGEN_PEER) {
1431 
1432 		portp = (vgen_port_t *)arg;
1433 		vgenp = portp->vgenp;
1434 		vnetp = vgenp->vnetp;
1435 		pvid_propname = port_pvid_propname;
1436 		vid_propname = port_vid_propname;
1437 		inst = portp->port_num;
1438 
1439 	} else {
1440 		return;
1441 	}
1442 
1443 	if (type == VGEN_LOCAL && default_idp != NULL) {
1444 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1445 		if (rv != 0) {
1446 			DWARN(vgenp, NULL, "prop(%s) not found",
1447 			    vgen_dvid_propname);
1448 
1449 			*default_idp = vnet_default_vlan_id;
1450 		} else {
1451 			*default_idp = val & 0xFFF;
1452 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1453 			    inst, *default_idp);
1454 		}
1455 	}
1456 
1457 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1458 	if (rv != 0) {
1459 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1460 		*pvidp = vnet_default_vlan_id;
1461 	} else {
1462 
1463 		*pvidp = val & 0xFFF;
1464 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1465 		    pvid_propname, inst, *pvidp);
1466 	}
1467 
1468 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1469 	    &size);
1470 	if (rv != 0) {
1471 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1472 		size = 0;
1473 	} else {
1474 		size /= sizeof (uint64_t);
1475 	}
1476 	nvids = size;
1477 
1478 	if (nvids != 0) {
1479 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1480 		vids_size = sizeof (uint16_t) * nvids;
1481 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1482 		for (i = 0; i < nvids; i++) {
1483 			(*vidspp)[i] = data[i] & 0xFFFF;
1484 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1485 		}
1486 		DBG2(vgenp, NULL, "\n");
1487 	}
1488 
1489 	*nvidsp = nvids;
1490 }
1491 
1492 /*
1493  * Create a vlan id hash table for the given port.
1494  */
1495 static void
1496 vgen_vlan_create_hash(vgen_port_t *portp)
1497 {
1498 	char		hashname[MAXNAMELEN];
1499 
1500 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1501 	    portp->port_num);
1502 
1503 	portp->vlan_nchains = vgen_vlan_nchains;
1504 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1505 	    portp->vlan_nchains, mod_hash_null_valdtor);
1506 }
1507 
1508 /*
1509  * Destroy the vlan id hash table in the given port.
1510  */
1511 static void
1512 vgen_vlan_destroy_hash(vgen_port_t *portp)
1513 {
1514 	if (portp->vlan_hashp != NULL) {
1515 		mod_hash_destroy_hash(portp->vlan_hashp);
1516 		portp->vlan_hashp = NULL;
1517 		portp->vlan_nchains = 0;
1518 	}
1519 }
1520 
1521 /*
1522  * Add a port to the vlans specified in its port properites.
1523  */
1524 static void
1525 vgen_vlan_add_ids(vgen_port_t *portp)
1526 {
1527 	int		rv;
1528 	int		i;
1529 
1530 	rv = mod_hash_insert(portp->vlan_hashp,
1531 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1532 	    (mod_hash_val_t)B_TRUE);
1533 	ASSERT(rv == 0);
1534 
1535 	for (i = 0; i < portp->nvids; i++) {
1536 		rv = mod_hash_insert(portp->vlan_hashp,
1537 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1538 		    (mod_hash_val_t)B_TRUE);
1539 		ASSERT(rv == 0);
1540 	}
1541 }
1542 
1543 /*
1544  * Remove a port from the vlans it has been assigned to.
1545  */
1546 static void
1547 vgen_vlan_remove_ids(vgen_port_t *portp)
1548 {
1549 	int		rv;
1550 	int		i;
1551 	mod_hash_val_t	vp;
1552 
1553 	rv = mod_hash_remove(portp->vlan_hashp,
1554 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1555 	    (mod_hash_val_t *)&vp);
1556 	ASSERT(rv == 0);
1557 
1558 	for (i = 0; i < portp->nvids; i++) {
1559 		rv = mod_hash_remove(portp->vlan_hashp,
1560 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1561 		    (mod_hash_val_t *)&vp);
1562 		ASSERT(rv == 0);
1563 	}
1564 }
1565 
1566 /*
1567  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1568  * then the vlan-id is available in the tag; otherwise, its vlan id is
1569  * implicitly obtained from the port-vlan-id of the vnet device.
1570  * The vlan id determined is returned in vidp.
1571  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1572  */
1573 static boolean_t
1574 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1575 {
1576 	struct ether_vlan_header	*evhp;
1577 
1578 	/* If it's a tagged frame, get the vlan id from vlan header */
1579 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1580 
1581 		evhp = (struct ether_vlan_header *)ehp;
1582 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1583 		return (B_TRUE);
1584 	}
1585 
1586 	/* Untagged frame, vlan-id is the pvid of vnet device */
1587 	*vidp = vnetp->pvid;
1588 	return (B_FALSE);
1589 }
1590 
1591 /*
1592  * Find the given vlan id in the hash table.
1593  * Return: B_TRUE if the id is found; B_FALSE if not found.
1594  */
1595 static boolean_t
1596 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1597 {
1598 	int		rv;
1599 	mod_hash_val_t	vp;
1600 
1601 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1602 
1603 	if (rv != 0)
1604 		return (B_FALSE);
1605 
1606 	return (B_TRUE);
1607 }
1608 
1609 /*
1610  * This function reads "priority-ether-types" property from md. This property
1611  * is used to enable support for priority frames. Applications which need
1612  * guaranteed and timely delivery of certain high priority frames to/from
1613  * a vnet or vsw within ldoms, should configure this property by providing
1614  * the ether type(s) for which the priority facility is needed.
1615  * Normal data frames are delivered over a ldc channel using the descriptor
1616  * ring mechanism which is constrained by factors such as descriptor ring size,
1617  * the rate at which the ring is processed at the peer ldc end point, etc.
1618  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1619  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1620  * descriptor ring path and enables a more reliable and timely delivery of
1621  * frames to the peer.
1622  */
1623 static void
1624 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1625 {
1626 	int		rv;
1627 	uint16_t	*types;
1628 	uint64_t	*data;
1629 	int		size;
1630 	int		i;
1631 	size_t		mblk_sz;
1632 
1633 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1634 	    (uint8_t **)&data, &size);
1635 	if (rv != 0) {
1636 		/*
1637 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1638 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1639 		 */
1640 		if (vgen_pri_eth_type != 0) {
1641 			size = sizeof (vgen_pri_eth_type);
1642 			data = &vgen_pri_eth_type;
1643 		} else {
1644 			DBG2(vgenp, NULL,
1645 			    "prop(%s) not found", pri_types_propname);
1646 			size = 0;
1647 		}
1648 	}
1649 
1650 	if (size == 0) {
1651 		vgenp->pri_num_types = 0;
1652 		return;
1653 	}
1654 
1655 	/*
1656 	 * we have some priority-ether-types defined;
1657 	 * allocate a table of these types and also
1658 	 * allocate a pool of mblks to transmit these
1659 	 * priority packets.
1660 	 */
1661 	size /= sizeof (uint64_t);
1662 	vgenp->pri_num_types = size;
1663 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1664 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1665 		types[i] = data[i] & 0xFFFF;
1666 	}
1667 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1668 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL,
1669 	    &vgenp->pri_tx_vmp);
1670 }
1671 
1672 static void
1673 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1674 {
1675 	int		rv;
1676 	uint64_t	val;
1677 	char		*mtu_propname;
1678 
1679 	mtu_propname = vgen_mtu_propname;
1680 
1681 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1682 	if (rv != 0) {
1683 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1684 		*mtu = vnet_ethermtu;
1685 	} else {
1686 
1687 		*mtu = val & 0xFFFF;
1688 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1689 		    vgenp->instance, *mtu);
1690 	}
1691 }
1692 
1693 static void
1694 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
1695 	boolean_t *pls)
1696 {
1697 	int		rv;
1698 	uint64_t	val;
1699 	char		*linkpropname;
1700 
1701 	linkpropname = vgen_linkprop_propname;
1702 
1703 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
1704 	if (rv != 0) {
1705 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
1706 		*pls = B_FALSE;
1707 	} else {
1708 
1709 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
1710 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
1711 		    vgenp->instance, *pls);
1712 	}
1713 }
1714 
1715 /* register with MD event generator */
1716 static int
1717 vgen_mdeg_reg(vgen_t *vgenp)
1718 {
1719 	mdeg_prop_spec_t	*pspecp;
1720 	mdeg_node_spec_t	*parentp;
1721 	uint_t			templatesz;
1722 	int			rv;
1723 	mdeg_handle_t		dev_hdl = NULL;
1724 	mdeg_handle_t		port_hdl = NULL;
1725 
1726 	templatesz = sizeof (vgen_prop_template);
1727 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1728 	if (pspecp == NULL) {
1729 		return (DDI_FAILURE);
1730 	}
1731 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1732 	if (parentp == NULL) {
1733 		kmem_free(pspecp, templatesz);
1734 		return (DDI_FAILURE);
1735 	}
1736 
1737 	bcopy(vgen_prop_template, pspecp, templatesz);
1738 
1739 	/*
1740 	 * NOTE: The instance here refers to the value of "reg" property and
1741 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1742 	 */
1743 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1744 
1745 	parentp->namep = "virtual-device";
1746 	parentp->specp = pspecp;
1747 
1748 	/* save parentp in vgen_t */
1749 	vgenp->mdeg_parentp = parentp;
1750 
1751 	/*
1752 	 * Register an interest in 'virtual-device' nodes with a
1753 	 * 'name' property of 'network'
1754 	 */
1755 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1756 	if (rv != MDEG_SUCCESS) {
1757 		DERR(vgenp, NULL, "mdeg_register failed\n");
1758 		goto mdeg_reg_fail;
1759 	}
1760 
1761 	/* Register an interest in 'port' nodes */
1762 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1763 	    &port_hdl);
1764 	if (rv != MDEG_SUCCESS) {
1765 		DERR(vgenp, NULL, "mdeg_register failed\n");
1766 		goto mdeg_reg_fail;
1767 	}
1768 
1769 	/* save mdeg handle in vgen_t */
1770 	vgenp->mdeg_dev_hdl = dev_hdl;
1771 	vgenp->mdeg_port_hdl = port_hdl;
1772 
1773 	return (DDI_SUCCESS);
1774 
1775 mdeg_reg_fail:
1776 	if (dev_hdl != NULL) {
1777 		(void) mdeg_unregister(dev_hdl);
1778 	}
1779 	KMEM_FREE(parentp);
1780 	kmem_free(pspecp, templatesz);
1781 	vgenp->mdeg_parentp = NULL;
1782 	return (DDI_FAILURE);
1783 }
1784 
1785 /* unregister with MD event generator */
1786 static void
1787 vgen_mdeg_unreg(vgen_t *vgenp)
1788 {
1789 	if (vgenp->mdeg_dev_hdl != NULL) {
1790 		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1791 		vgenp->mdeg_dev_hdl = NULL;
1792 	}
1793 	if (vgenp->mdeg_port_hdl != NULL) {
1794 		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
1795 		vgenp->mdeg_port_hdl = NULL;
1796 	}
1797 
1798 	if (vgenp->mdeg_parentp != NULL) {
1799 		kmem_free(vgenp->mdeg_parentp->specp,
1800 		    sizeof (vgen_prop_template));
1801 		KMEM_FREE(vgenp->mdeg_parentp);
1802 		vgenp->mdeg_parentp = NULL;
1803 	}
1804 }
1805 
1806 /* mdeg callback function for the port node */
1807 static int
1808 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1809 {
1810 	int		idx;
1811 	int		vsw_idx = -1;
1812 	uint64_t 	val;
1813 	vgen_t		*vgenp;
1814 
1815 	if ((resp == NULL) || (cb_argp == NULL)) {
1816 		return (MDEG_FAILURE);
1817 	}
1818 
1819 	vgenp = (vgen_t *)cb_argp;
1820 	DBG1(vgenp, NULL, "enter\n");
1821 
1822 	mutex_enter(&vgenp->lock);
1823 
1824 	DBG1(vgenp, NULL, "ports: removed(%x), "
1825 	"added(%x), updated(%x)\n", resp->removed.nelem,
1826 	    resp->added.nelem, resp->match_curr.nelem);
1827 
1828 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1829 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1830 		    resp->removed.mdep[idx]);
1831 	}
1832 
1833 	if (vgenp->vsw_portp == NULL) {
1834 		/*
1835 		 * find vsw_port and add it first, because other ports need
1836 		 * this when adding fdb entry (see vgen_port_init()).
1837 		 */
1838 		for (idx = 0; idx < resp->added.nelem; idx++) {
1839 			if (!(md_get_prop_val(resp->added.mdp,
1840 			    resp->added.mdep[idx], swport_propname, &val))) {
1841 				if (val == 0) {
1842 					/*
1843 					 * This port is connected to the
1844 					 * vsw on service domain.
1845 					 */
1846 					vsw_idx = idx;
1847 					if (vgen_add_port(vgenp,
1848 					    resp->added.mdp,
1849 					    resp->added.mdep[idx]) !=
1850 					    DDI_SUCCESS) {
1851 						cmn_err(CE_NOTE, "vnet%d Could "
1852 						    "not initialize virtual "
1853 						    "switch port.",
1854 						    vgenp->instance);
1855 						mutex_exit(&vgenp->lock);
1856 						return (MDEG_FAILURE);
1857 					}
1858 					break;
1859 				}
1860 			}
1861 		}
1862 		if (vsw_idx == -1) {
1863 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1864 			mutex_exit(&vgenp->lock);
1865 			return (MDEG_FAILURE);
1866 		}
1867 	}
1868 
1869 	for (idx = 0; idx < resp->added.nelem; idx++) {
1870 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1871 			continue;
1872 
1873 		/* If this port can't be added just skip it. */
1874 		(void) vgen_add_port(vgenp, resp->added.mdp,
1875 		    resp->added.mdep[idx]);
1876 	}
1877 
1878 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1879 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1880 		    resp->match_curr.mdep[idx],
1881 		    resp->match_prev.mdp,
1882 		    resp->match_prev.mdep[idx]);
1883 	}
1884 
1885 	mutex_exit(&vgenp->lock);
1886 	DBG1(vgenp, NULL, "exit\n");
1887 	return (MDEG_SUCCESS);
1888 }
1889 
1890 /* mdeg callback function for the vnet node */
1891 static int
1892 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1893 {
1894 	vgen_t		*vgenp;
1895 	vnet_t		*vnetp;
1896 	md_t		*mdp;
1897 	mde_cookie_t	node;
1898 	uint64_t	inst;
1899 	char		*node_name = NULL;
1900 
1901 	if ((resp == NULL) || (cb_argp == NULL)) {
1902 		return (MDEG_FAILURE);
1903 	}
1904 
1905 	vgenp = (vgen_t *)cb_argp;
1906 	vnetp = vgenp->vnetp;
1907 
1908 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
1909 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
1910 	    resp->match_curr.nelem, resp->match_prev.nelem);
1911 
1912 	mutex_enter(&vgenp->lock);
1913 
1914 	/*
1915 	 * We get an initial callback for this node as 'added' after
1916 	 * registering with mdeg. Note that we would have already gathered
1917 	 * information about this vnet node by walking MD earlier during attach
1918 	 * (in vgen_read_mdprops()). So, there is a window where the properties
1919 	 * of this node might have changed when we get this initial 'added'
1920 	 * callback. We handle this as if an update occured and invoke the same
1921 	 * function which handles updates to the properties of this vnet-node
1922 	 * if any. A non-zero 'match' value indicates that the MD has been
1923 	 * updated and that a 'network' node is present which may or may not
1924 	 * have been updated. It is up to the clients to examine their own
1925 	 * nodes and determine if they have changed.
1926 	 */
1927 	if (resp->added.nelem != 0) {
1928 
1929 		if (resp->added.nelem != 1) {
1930 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
1931 			    "invalid: %d\n", vnetp->instance,
1932 			    resp->added.nelem);
1933 			goto vgen_mdeg_cb_err;
1934 		}
1935 
1936 		mdp = resp->added.mdp;
1937 		node = resp->added.mdep[0];
1938 
1939 	} else if (resp->match_curr.nelem != 0) {
1940 
1941 		if (resp->match_curr.nelem != 1) {
1942 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
1943 			    "invalid: %d\n", vnetp->instance,
1944 			    resp->match_curr.nelem);
1945 			goto vgen_mdeg_cb_err;
1946 		}
1947 
1948 		mdp = resp->match_curr.mdp;
1949 		node = resp->match_curr.mdep[0];
1950 
1951 	} else {
1952 		goto vgen_mdeg_cb_err;
1953 	}
1954 
1955 	/* Validate name and instance */
1956 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1957 		DERR(vgenp, NULL, "unable to get node name\n");
1958 		goto vgen_mdeg_cb_err;
1959 	}
1960 
1961 	/* is this a virtual-network device? */
1962 	if (strcmp(node_name, vnet_propname) != 0) {
1963 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
1964 		goto vgen_mdeg_cb_err;
1965 	}
1966 
1967 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1968 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
1969 		goto vgen_mdeg_cb_err;
1970 	}
1971 
1972 	/* is this the right instance of vnet? */
1973 	if (inst != vgenp->regprop) {
1974 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
1975 		goto vgen_mdeg_cb_err;
1976 	}
1977 
1978 	vgen_update_md_prop(vgenp, mdp, node);
1979 
1980 	mutex_exit(&vgenp->lock);
1981 	return (MDEG_SUCCESS);
1982 
1983 vgen_mdeg_cb_err:
1984 	mutex_exit(&vgenp->lock);
1985 	return (MDEG_FAILURE);
1986 }
1987 
1988 /*
1989  * Check to see if the relevant properties in the specified node have
1990  * changed, and if so take the appropriate action.
1991  */
1992 static void
1993 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1994 {
1995 	uint16_t	pvid;
1996 	uint16_t	*vids;
1997 	uint16_t	nvids;
1998 	vnet_t		*vnetp = vgenp->vnetp;
1999 	uint32_t	mtu;
2000 	boolean_t	pls_update;
2001 	enum		{ MD_init = 0x1,
2002 			    MD_vlans = 0x2,
2003 			    MD_mtu = 0x4,
2004 			    MD_pls = 0x8 } updated;
2005 	int		rv;
2006 
2007 	updated = MD_init;
2008 
2009 	/* Read the vlan ids */
2010 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2011 	    &nvids, NULL);
2012 
2013 	/* Determine if there are any vlan id updates */
2014 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2015 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2016 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2017 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2018 		updated |= MD_vlans;
2019 	}
2020 
2021 	/* Read mtu */
2022 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2023 	if (mtu != vnetp->mtu) {
2024 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2025 			updated |= MD_mtu;
2026 		} else {
2027 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2028 			    " as the specified value:%d is invalid\n",
2029 			    vnetp->instance, mtu);
2030 		}
2031 	}
2032 
2033 	/*
2034 	 * Read the 'linkprop' property.
2035 	 */
2036 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2037 	if (pls_update != vnetp->pls_update) {
2038 		updated |= MD_pls;
2039 	}
2040 
2041 	/* Now process the updated props */
2042 
2043 	if (updated & MD_vlans) {
2044 
2045 		/* save the new vlan ids */
2046 		vnetp->pvid = pvid;
2047 		if (vnetp->nvids != 0) {
2048 			kmem_free(vnetp->vids,
2049 			    sizeof (uint16_t) * vnetp->nvids);
2050 			vnetp->nvids = 0;
2051 		}
2052 		if (nvids != 0) {
2053 			vnetp->nvids = nvids;
2054 			vnetp->vids = vids;
2055 		}
2056 
2057 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2058 		vgen_reset_vlan_unaware_ports(vgenp);
2059 
2060 	} else {
2061 
2062 		if (nvids != 0) {
2063 			kmem_free(vids, sizeof (uint16_t) * nvids);
2064 		}
2065 	}
2066 
2067 	if (updated & MD_mtu) {
2068 
2069 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2070 		    vnetp->mtu, mtu);
2071 
2072 		rv = vnet_mtu_update(vnetp, mtu);
2073 		if (rv == 0) {
2074 			vgenp->max_frame_size = mtu +
2075 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2076 		}
2077 	}
2078 
2079 	if (updated & MD_pls) {
2080 		/* enable/disable physical link state updates */
2081 		vnetp->pls_update = pls_update;
2082 		mutex_exit(&vgenp->lock);
2083 
2084 		/* reset vsw-port to re-negotiate with the updated prop. */
2085 		vgen_reset_vsw_port(vgenp);
2086 
2087 		mutex_enter(&vgenp->lock);
2088 	}
2089 }
2090 
2091 /* add a new port to the device */
2092 static int
2093 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2094 {
2095 	vgen_port_t	*portp;
2096 	int		rv;
2097 
2098 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2099 
2100 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2101 	if (rv != DDI_SUCCESS) {
2102 		KMEM_FREE(portp);
2103 		return (DDI_FAILURE);
2104 	}
2105 
2106 	rv = vgen_port_attach(portp);
2107 	if (rv != DDI_SUCCESS) {
2108 		return (DDI_FAILURE);
2109 	}
2110 
2111 	return (DDI_SUCCESS);
2112 }
2113 
2114 /* read properties of the port from its md node */
2115 static int
2116 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2117 	mde_cookie_t mdex)
2118 {
2119 	uint64_t		port_num;
2120 	uint64_t		*ldc_ids;
2121 	uint64_t		macaddr;
2122 	uint64_t		val;
2123 	int			num_ldcs;
2124 	int			i;
2125 	int			addrsz;
2126 	int			num_nodes = 0;
2127 	int			listsz = 0;
2128 	mde_cookie_t		*listp = NULL;
2129 	uint8_t			*addrp;
2130 	struct ether_addr	ea;
2131 
2132 	/* read "id" property to get the port number */
2133 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2134 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2135 		return (DDI_FAILURE);
2136 	}
2137 
2138 	/*
2139 	 * Find the channel endpoint node(s) under this port node.
2140 	 */
2141 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2142 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2143 		    num_nodes);
2144 		return (DDI_FAILURE);
2145 	}
2146 
2147 	/* allocate space for node list */
2148 	listsz = num_nodes * sizeof (mde_cookie_t);
2149 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2150 	if (listp == NULL)
2151 		return (DDI_FAILURE);
2152 
2153 	num_ldcs = md_scan_dag(mdp, mdex,
2154 	    md_find_name(mdp, channel_propname),
2155 	    md_find_name(mdp, "fwd"), listp);
2156 
2157 	if (num_ldcs <= 0) {
2158 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2159 		kmem_free(listp, listsz);
2160 		return (DDI_FAILURE);
2161 	}
2162 
2163 	if (num_ldcs > 1) {
2164 		DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n",
2165 		    port_num, num_ldcs);
2166 	}
2167 
2168 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2169 	if (ldc_ids == NULL) {
2170 		kmem_free(listp, listsz);
2171 		return (DDI_FAILURE);
2172 	}
2173 
2174 	for (i = 0; i < num_ldcs; i++) {
2175 		/* read channel ids */
2176 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2177 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2178 			    id_propname);
2179 			kmem_free(listp, listsz);
2180 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2181 			return (DDI_FAILURE);
2182 		}
2183 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2184 	}
2185 
2186 	kmem_free(listp, listsz);
2187 
2188 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2189 	    &addrsz)) {
2190 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2191 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2192 		return (DDI_FAILURE);
2193 	}
2194 
2195 	if (addrsz < ETHERADDRL) {
2196 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2197 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2198 		return (DDI_FAILURE);
2199 	}
2200 
2201 	macaddr = *((uint64_t *)addrp);
2202 
2203 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2204 
2205 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2206 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2207 		macaddr >>= 8;
2208 	}
2209 
2210 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2211 		if (val == 0) {
2212 			/* This port is connected to the vswitch */
2213 			portp->is_vsw_port = B_TRUE;
2214 		} else {
2215 			portp->is_vsw_port = B_FALSE;
2216 		}
2217 	}
2218 
2219 	/* now update all properties into the port */
2220 	portp->vgenp = vgenp;
2221 	portp->port_num = port_num;
2222 	ether_copy(&ea, &portp->macaddr);
2223 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2224 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2225 	portp->num_ldcs = num_ldcs;
2226 
2227 	/* read vlan id properties of this port node */
2228 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2229 	    &portp->vids, &portp->nvids, NULL);
2230 
2231 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2232 
2233 	return (DDI_SUCCESS);
2234 }
2235 
2236 /* remove a port from the device */
2237 static int
2238 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2239 {
2240 	uint64_t	port_num;
2241 	vgen_port_t	*portp;
2242 	vgen_portlist_t	*plistp;
2243 
2244 	/* read "id" property to get the port number */
2245 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2246 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2247 		return (DDI_FAILURE);
2248 	}
2249 
2250 	plistp = &(vgenp->vgenports);
2251 
2252 	WRITE_ENTER(&plistp->rwlock);
2253 	portp = vgen_port_lookup(plistp, (int)port_num);
2254 	if (portp == NULL) {
2255 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2256 		RW_EXIT(&plistp->rwlock);
2257 		return (DDI_FAILURE);
2258 	}
2259 
2260 	vgen_port_detach_mdeg(portp);
2261 	RW_EXIT(&plistp->rwlock);
2262 
2263 	return (DDI_SUCCESS);
2264 }
2265 
2266 /* attach a port to the device based on mdeg data */
2267 static int
2268 vgen_port_attach(vgen_port_t *portp)
2269 {
2270 	vgen_portlist_t		*plistp;
2271 	vgen_t			*vgenp;
2272 	uint64_t		*ldcids;
2273 	mac_register_t		*macp;
2274 	vio_net_res_type_t	type;
2275 	int			rv;
2276 
2277 	ASSERT(portp != NULL);
2278 	vgenp = portp->vgenp;
2279 	ldcids = portp->ldc_ids;
2280 
2281 	DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n",
2282 	    portp->port_num, ldcids[0]);
2283 
2284 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2285 
2286 	/*
2287 	 * attach the channel under the port using its channel id;
2288 	 * note that we only support one channel per port for now.
2289 	 */
2290 	if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) {
2291 		vgen_port_detach(portp);
2292 		return (DDI_FAILURE);
2293 	}
2294 
2295 	/* create vlan id hash table */
2296 	vgen_vlan_create_hash(portp);
2297 
2298 	if (portp->is_vsw_port == B_TRUE) {
2299 		/* This port is connected to the switch port */
2300 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2301 		type = VIO_NET_RES_LDC_SERVICE;
2302 	} else {
2303 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2304 		type = VIO_NET_RES_LDC_GUEST;
2305 	}
2306 
2307 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2308 		vgen_port_detach(portp);
2309 		return (DDI_FAILURE);
2310 	}
2311 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2312 	macp->m_driver = portp;
2313 	macp->m_dip = vgenp->vnetdip;
2314 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2315 	macp->m_callbacks = &vgen_m_callbacks;
2316 	macp->m_min_sdu = 0;
2317 	macp->m_max_sdu = ETHERMTU;
2318 
2319 	mutex_enter(&portp->lock);
2320 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2321 	    portp->macaddr, &portp->vhp, &portp->vcb);
2322 	mutex_exit(&portp->lock);
2323 	mac_free(macp);
2324 
2325 	if (rv == 0) {
2326 		/* link it into the list of ports */
2327 		plistp = &(vgenp->vgenports);
2328 		WRITE_ENTER(&plistp->rwlock);
2329 		vgen_port_list_insert(portp);
2330 		RW_EXIT(&plistp->rwlock);
2331 
2332 		if (portp->is_vsw_port == B_TRUE) {
2333 			/* We now have the vswitch port attached */
2334 			vgenp->vsw_portp = portp;
2335 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2336 		}
2337 	} else {
2338 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2339 		    portp);
2340 		vgen_port_detach(portp);
2341 	}
2342 
2343 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2344 	return (DDI_SUCCESS);
2345 }
2346 
2347 /* detach a port from the device based on mdeg data */
2348 static void
2349 vgen_port_detach_mdeg(vgen_port_t *portp)
2350 {
2351 	vgen_t *vgenp = portp->vgenp;
2352 
2353 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2354 
2355 	mutex_enter(&portp->lock);
2356 
2357 	/* stop the port if needed */
2358 	if (portp->flags & VGEN_STARTED) {
2359 		vgen_port_uninit(portp);
2360 		portp->flags &= ~(VGEN_STARTED);
2361 	}
2362 
2363 	mutex_exit(&portp->lock);
2364 	vgen_port_detach(portp);
2365 
2366 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2367 }
2368 
2369 static int
2370 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2371 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2372 {
2373 	uint64_t	cport_num;
2374 	uint64_t	pport_num;
2375 	vgen_portlist_t	*plistp;
2376 	vgen_port_t	*portp;
2377 	boolean_t	updated_vlans = B_FALSE;
2378 	uint16_t	pvid;
2379 	uint16_t	*vids;
2380 	uint16_t	nvids;
2381 
2382 	/*
2383 	 * For now, we get port updates only if vlan ids changed.
2384 	 * We read the port num and do some sanity check.
2385 	 */
2386 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2387 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2388 		return (DDI_FAILURE);
2389 	}
2390 
2391 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2392 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2393 		return (DDI_FAILURE);
2394 	}
2395 	if (cport_num != pport_num)
2396 		return (DDI_FAILURE);
2397 
2398 	plistp = &(vgenp->vgenports);
2399 
2400 	READ_ENTER(&plistp->rwlock);
2401 
2402 	portp = vgen_port_lookup(plistp, (int)cport_num);
2403 	if (portp == NULL) {
2404 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2405 		RW_EXIT(&plistp->rwlock);
2406 		return (DDI_FAILURE);
2407 	}
2408 
2409 	/* Read the vlan ids */
2410 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2411 	    &nvids, NULL);
2412 
2413 	/* Determine if there are any vlan id updates */
2414 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2415 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2416 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2417 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2418 		updated_vlans = B_TRUE;
2419 	}
2420 
2421 	if (updated_vlans == B_FALSE) {
2422 		RW_EXIT(&plistp->rwlock);
2423 		return (DDI_FAILURE);
2424 	}
2425 
2426 	/* remove the port from vlans it has been assigned to */
2427 	vgen_vlan_remove_ids(portp);
2428 
2429 	/* save the new vlan ids */
2430 	portp->pvid = pvid;
2431 	if (portp->nvids != 0) {
2432 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2433 		portp->nvids = 0;
2434 	}
2435 	if (nvids != 0) {
2436 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2437 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2438 		portp->nvids = nvids;
2439 		kmem_free(vids, sizeof (uint16_t) * nvids);
2440 	}
2441 
2442 	/* add port to the new vlans */
2443 	vgen_vlan_add_ids(portp);
2444 
2445 	/* reset the port if it is vlan unaware (ver < 1.3) */
2446 	vgen_vlan_unaware_port_reset(portp);
2447 
2448 	RW_EXIT(&plistp->rwlock);
2449 
2450 	return (DDI_SUCCESS);
2451 }
2452 
2453 static uint64_t
2454 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2455 {
2456 	return (vgen_ldc_stat(portp->ldcp, stat));
2457 }
2458 
2459 /* attach the channel corresponding to the given ldc_id to the port */
2460 static int
2461 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2462 {
2463 	vgen_t 		*vgenp;
2464 	vgen_ldc_t 	*ldcp;
2465 	ldc_attr_t 	attr;
2466 	int 		status;
2467 	ldc_status_t	istatus;
2468 	char		kname[MAXNAMELEN];
2469 	int		instance;
2470 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2471 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2472 		AST_ldc_reg_cb = 0x8 } attach_state;
2473 
2474 	attach_state = AST_init;
2475 	vgenp = portp->vgenp;
2476 
2477 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2478 	if (ldcp == NULL) {
2479 		goto ldc_attach_failed;
2480 	}
2481 	ldcp->ldc_id = ldc_id;
2482 	ldcp->portp = portp;
2483 
2484 	attach_state |= AST_ldc_alloc;
2485 
2486 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2487 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2488 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2489 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2490 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2491 	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2492 	mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL);
2493 	cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL);
2494 
2495 	attach_state |= AST_mutex_init;
2496 
2497 	attr.devclass = LDC_DEV_NT;
2498 	attr.instance = vgenp->instance;
2499 	attr.mode = LDC_MODE_UNRELIABLE;
2500 	attr.mtu = vgen_ldc_mtu;
2501 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2502 	if (status != 0) {
2503 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2504 		goto ldc_attach_failed;
2505 	}
2506 	attach_state |= AST_ldc_init;
2507 
2508 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2509 	if (status != 0) {
2510 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2511 		    status);
2512 		goto ldc_attach_failed;
2513 	}
2514 	/*
2515 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2516 	 * data msgs, including raw data msgs used to recv priority frames.
2517 	 */
2518 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2519 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2520 	attach_state |= AST_ldc_reg_cb;
2521 
2522 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2523 	ASSERT(istatus == LDC_INIT);
2524 	ldcp->ldc_status = istatus;
2525 
2526 	/* Setup kstats for the channel */
2527 	instance = vgenp->instance;
2528 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2529 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2530 	if (ldcp->ksp == NULL) {
2531 		goto ldc_attach_failed;
2532 	}
2533 
2534 	/* initialize vgen_versions supported */
2535 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2536 	vgen_reset_vnet_proto_ops(ldcp);
2537 
2538 	/* Link this channel to the port */
2539 	portp->ldcp = ldcp;
2540 
2541 	ldcp->link_state = LINK_STATE_UNKNOWN;
2542 #ifdef	VNET_IOC_DEBUG
2543 	ldcp->link_down_forced = B_FALSE;
2544 #endif
2545 	ldcp->flags |= CHANNEL_ATTACHED;
2546 	return (DDI_SUCCESS);
2547 
2548 ldc_attach_failed:
2549 	if (attach_state & AST_ldc_reg_cb) {
2550 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2551 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2552 	}
2553 
2554 	if (attach_state & AST_ldc_init) {
2555 		(void) ldc_fini(ldcp->ldc_handle);
2556 	}
2557 	if (attach_state & AST_mutex_init) {
2558 		mutex_destroy(&ldcp->tclock);
2559 		mutex_destroy(&ldcp->txlock);
2560 		mutex_destroy(&ldcp->cblock);
2561 		mutex_destroy(&ldcp->wrlock);
2562 		mutex_destroy(&ldcp->rxlock);
2563 		mutex_destroy(&ldcp->pollq_lock);
2564 	}
2565 	if (attach_state & AST_ldc_alloc) {
2566 		KMEM_FREE(ldcp);
2567 	}
2568 	return (DDI_FAILURE);
2569 }
2570 
2571 /* detach a channel from the port */
2572 static void
2573 vgen_ldc_detach(vgen_ldc_t *ldcp)
2574 {
2575 	vgen_port_t	*portp;
2576 	vgen_t 		*vgenp;
2577 
2578 	ASSERT(ldcp != NULL);
2579 
2580 	portp = ldcp->portp;
2581 	vgenp = portp->vgenp;
2582 
2583 	if (ldcp->ldc_status != LDC_INIT) {
2584 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2585 	}
2586 
2587 	if (ldcp->flags & CHANNEL_ATTACHED) {
2588 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2589 
2590 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2591 		(void) ldc_fini(ldcp->ldc_handle);
2592 
2593 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2594 		vgen_destroy_kstats(ldcp->ksp);
2595 		ldcp->ksp = NULL;
2596 		mutex_destroy(&ldcp->tclock);
2597 		mutex_destroy(&ldcp->txlock);
2598 		mutex_destroy(&ldcp->cblock);
2599 		mutex_destroy(&ldcp->wrlock);
2600 		mutex_destroy(&ldcp->rxlock);
2601 		mutex_destroy(&ldcp->pollq_lock);
2602 		mutex_destroy(&ldcp->msg_thr_lock);
2603 		cv_destroy(&ldcp->msg_thr_cv);
2604 
2605 		KMEM_FREE(ldcp);
2606 	}
2607 }
2608 
2609 /* enable transmit/receive on the channel */
2610 static int
2611 vgen_ldc_init(vgen_ldc_t *ldcp)
2612 {
2613 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2614 	ldc_status_t	istatus;
2615 	int		rv;
2616 	enum		{ ST_init = 0x0, ST_ldc_open = 0x1,
2617 			    ST_cb_enable = 0x2} init_state;
2618 	int		flag = 0;
2619 
2620 	init_state = ST_init;
2621 
2622 	DBG1(vgenp, ldcp, "enter\n");
2623 	LDC_LOCK(ldcp);
2624 
2625 	rv = ldc_open(ldcp->ldc_handle);
2626 	if (rv != 0) {
2627 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2628 		goto ldcinit_failed;
2629 	}
2630 	init_state |= ST_ldc_open;
2631 
2632 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2633 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
2634 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2635 		goto ldcinit_failed;
2636 	}
2637 	ldcp->ldc_status = istatus;
2638 
2639 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2640 	if (rv != 0) {
2641 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2642 		goto ldcinit_failed;
2643 	}
2644 
2645 	init_state |= ST_cb_enable;
2646 
2647 	vgen_ldc_up(ldcp);
2648 
2649 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2650 	if (istatus == LDC_UP) {
2651 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2652 	}
2653 
2654 	ldcp->ldc_status = istatus;
2655 
2656 	ldcp->hphase = VH_PHASE0;
2657 	ldcp->hstate = 0;
2658 	ldcp->flags |= CHANNEL_STARTED;
2659 
2660 	vgen_setup_handshake_params(ldcp);
2661 
2662 	/* if channel is already UP - start handshake */
2663 	if (istatus == LDC_UP) {
2664 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2665 		if (ldcp->portp != vgenp->vsw_portp) {
2666 			/*
2667 			 * As the channel is up, use this port from now on.
2668 			 */
2669 			(void) atomic_swap_32(
2670 			    &ldcp->portp->use_vsw_port, B_FALSE);
2671 		}
2672 
2673 		/* Initialize local session id */
2674 		ldcp->local_sid = ddi_get_lbolt();
2675 
2676 		/* clear peer session id */
2677 		ldcp->peer_sid = 0;
2678 
2679 		mutex_exit(&ldcp->tclock);
2680 		mutex_exit(&ldcp->txlock);
2681 		mutex_exit(&ldcp->wrlock);
2682 		mutex_exit(&ldcp->rxlock);
2683 		rv = vgen_handshake(vh_nextphase(ldcp));
2684 		mutex_exit(&ldcp->cblock);
2685 		if (rv != 0) {
2686 			flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET :
2687 			    VGEN_FLAG_NEED_LDCRESET;
2688 			(void) vgen_process_reset(ldcp, flag);
2689 		}
2690 	} else {
2691 		LDC_UNLOCK(ldcp);
2692 	}
2693 
2694 	return (DDI_SUCCESS);
2695 
2696 ldcinit_failed:
2697 	if (init_state & ST_cb_enable) {
2698 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2699 	}
2700 	if (init_state & ST_ldc_open) {
2701 		(void) ldc_close(ldcp->ldc_handle);
2702 	}
2703 	LDC_UNLOCK(ldcp);
2704 	DBG1(vgenp, ldcp, "exit\n");
2705 	return (DDI_FAILURE);
2706 }
2707 
2708 /* stop transmit/receive on the channel */
2709 static void
2710 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2711 {
2712 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2713 
2714 	DBG1(vgenp, ldcp, "enter\n");
2715 
2716 	LDC_LOCK(ldcp);
2717 
2718 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2719 		LDC_UNLOCK(ldcp);
2720 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2721 		return;
2722 	}
2723 
2724 	LDC_UNLOCK(ldcp);
2725 
2726 	while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2727 		delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY));
2728 	}
2729 
2730 	(void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT);
2731 
2732 	DBG1(vgenp, ldcp, "exit\n");
2733 }
2734 
2735 /*
2736  * Create a descriptor ring, that will be exported to the peer for mapping.
2737  */
2738 static int
2739 vgen_create_dring(vgen_ldc_t *ldcp)
2740 {
2741 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2742 	int		rv;
2743 
2744 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2745 		rv = vgen_create_rx_dring(ldcp);
2746 	} else {
2747 		rv = vgen_create_tx_dring(ldcp);
2748 	}
2749 
2750 	return (rv);
2751 }
2752 
2753 /*
2754  * Destroy the descriptor ring.
2755  */
2756 static void
2757 vgen_destroy_dring(vgen_ldc_t *ldcp)
2758 {
2759 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2760 
2761 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2762 		vgen_destroy_rx_dring(ldcp);
2763 	} else {
2764 		vgen_destroy_tx_dring(ldcp);
2765 	}
2766 }
2767 
2768 /*
2769  * Map the descriptor ring exported by the peer.
2770  */
2771 static int
2772 vgen_map_dring(vgen_ldc_t *ldcp, void *pkt)
2773 {
2774 	int		rv;
2775 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2776 
2777 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2778 		/*
2779 		 * In RxDringData mode, dring that we map in
2780 		 * becomes our transmit descriptor ring.
2781 		 */
2782 		rv = vgen_map_tx_dring(ldcp, pkt);
2783 	} else {
2784 
2785 		/*
2786 		 * In TxDring mode, dring that we map in
2787 		 * becomes our receive descriptor ring.
2788 		 */
2789 		rv = vgen_map_rx_dring(ldcp, pkt);
2790 	}
2791 
2792 	return (rv);
2793 }
2794 
2795 /*
2796  * Unmap the descriptor ring exported by the peer.
2797  */
2798 static void
2799 vgen_unmap_dring(vgen_ldc_t *ldcp)
2800 {
2801 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2802 
2803 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2804 		vgen_unmap_tx_dring(ldcp);
2805 	} else {
2806 		vgen_unmap_rx_dring(ldcp);
2807 	}
2808 }
2809 
2810 void
2811 vgen_destroy_rxpools(void *arg)
2812 {
2813 	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
2814 	vio_mblk_pool_t	*npoolp;
2815 
2816 	while (poolp != NULL) {
2817 		npoolp =  poolp->nextp;
2818 		while (vio_destroy_mblks(poolp) != 0) {
2819 			drv_usecwait(vgen_rxpool_cleanup_delay);
2820 		}
2821 		poolp = npoolp;
2822 	}
2823 }
2824 
2825 /* get channel statistics */
2826 static uint64_t
2827 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2828 {
2829 	vgen_stats_t	*statsp;
2830 	uint64_t	val;
2831 
2832 	val = 0;
2833 	statsp = &ldcp->stats;
2834 	switch (stat) {
2835 
2836 	case MAC_STAT_MULTIRCV:
2837 		val = statsp->multircv;
2838 		break;
2839 
2840 	case MAC_STAT_BRDCSTRCV:
2841 		val = statsp->brdcstrcv;
2842 		break;
2843 
2844 	case MAC_STAT_MULTIXMT:
2845 		val = statsp->multixmt;
2846 		break;
2847 
2848 	case MAC_STAT_BRDCSTXMT:
2849 		val = statsp->brdcstxmt;
2850 		break;
2851 
2852 	case MAC_STAT_NORCVBUF:
2853 		val = statsp->norcvbuf;
2854 		break;
2855 
2856 	case MAC_STAT_IERRORS:
2857 		val = statsp->ierrors;
2858 		break;
2859 
2860 	case MAC_STAT_NOXMTBUF:
2861 		val = statsp->noxmtbuf;
2862 		break;
2863 
2864 	case MAC_STAT_OERRORS:
2865 		val = statsp->oerrors;
2866 		break;
2867 
2868 	case MAC_STAT_COLLISIONS:
2869 		break;
2870 
2871 	case MAC_STAT_RBYTES:
2872 		val = statsp->rbytes;
2873 		break;
2874 
2875 	case MAC_STAT_IPACKETS:
2876 		val = statsp->ipackets;
2877 		break;
2878 
2879 	case MAC_STAT_OBYTES:
2880 		val = statsp->obytes;
2881 		break;
2882 
2883 	case MAC_STAT_OPACKETS:
2884 		val = statsp->opackets;
2885 		break;
2886 
2887 	/* stats not relevant to ldc, return 0 */
2888 	case MAC_STAT_IFSPEED:
2889 	case ETHER_STAT_ALIGN_ERRORS:
2890 	case ETHER_STAT_FCS_ERRORS:
2891 	case ETHER_STAT_FIRST_COLLISIONS:
2892 	case ETHER_STAT_MULTI_COLLISIONS:
2893 	case ETHER_STAT_DEFER_XMTS:
2894 	case ETHER_STAT_TX_LATE_COLLISIONS:
2895 	case ETHER_STAT_EX_COLLISIONS:
2896 	case ETHER_STAT_MACXMT_ERRORS:
2897 	case ETHER_STAT_CARRIER_ERRORS:
2898 	case ETHER_STAT_TOOLONG_ERRORS:
2899 	case ETHER_STAT_XCVR_ADDR:
2900 	case ETHER_STAT_XCVR_ID:
2901 	case ETHER_STAT_XCVR_INUSE:
2902 	case ETHER_STAT_CAP_1000FDX:
2903 	case ETHER_STAT_CAP_1000HDX:
2904 	case ETHER_STAT_CAP_100FDX:
2905 	case ETHER_STAT_CAP_100HDX:
2906 	case ETHER_STAT_CAP_10FDX:
2907 	case ETHER_STAT_CAP_10HDX:
2908 	case ETHER_STAT_CAP_ASMPAUSE:
2909 	case ETHER_STAT_CAP_PAUSE:
2910 	case ETHER_STAT_CAP_AUTONEG:
2911 	case ETHER_STAT_ADV_CAP_1000FDX:
2912 	case ETHER_STAT_ADV_CAP_1000HDX:
2913 	case ETHER_STAT_ADV_CAP_100FDX:
2914 	case ETHER_STAT_ADV_CAP_100HDX:
2915 	case ETHER_STAT_ADV_CAP_10FDX:
2916 	case ETHER_STAT_ADV_CAP_10HDX:
2917 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2918 	case ETHER_STAT_ADV_CAP_PAUSE:
2919 	case ETHER_STAT_ADV_CAP_AUTONEG:
2920 	case ETHER_STAT_LP_CAP_1000FDX:
2921 	case ETHER_STAT_LP_CAP_1000HDX:
2922 	case ETHER_STAT_LP_CAP_100FDX:
2923 	case ETHER_STAT_LP_CAP_100HDX:
2924 	case ETHER_STAT_LP_CAP_10FDX:
2925 	case ETHER_STAT_LP_CAP_10HDX:
2926 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2927 	case ETHER_STAT_LP_CAP_PAUSE:
2928 	case ETHER_STAT_LP_CAP_AUTONEG:
2929 	case ETHER_STAT_LINK_ASMPAUSE:
2930 	case ETHER_STAT_LINK_PAUSE:
2931 	case ETHER_STAT_LINK_AUTONEG:
2932 	case ETHER_STAT_LINK_DUPLEX:
2933 	default:
2934 		val = 0;
2935 		break;
2936 
2937 	}
2938 	return (val);
2939 }
2940 
2941 /*
2942  * LDC channel is UP, start handshake process with peer.
2943  */
2944 static void
2945 vgen_handle_evt_up(vgen_ldc_t *ldcp)
2946 {
2947 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2948 
2949 	DBG1(vgenp, ldcp, "enter\n");
2950 
2951 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2952 
2953 	if (ldcp->portp != vgenp->vsw_portp) {
2954 		/*
2955 		 * As the channel is up, use this port from now on.
2956 		 */
2957 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
2958 	}
2959 
2960 	/* Initialize local session id */
2961 	ldcp->local_sid = ddi_get_lbolt();
2962 
2963 	/* clear peer session id */
2964 	ldcp->peer_sid = 0;
2965 
2966 	/* Initiate Handshake process with peer ldc endpoint */
2967 	(void) vgen_handshake(vh_nextphase(ldcp));
2968 
2969 	DBG1(vgenp, ldcp, "exit\n");
2970 }
2971 
2972 /*
2973  * LDC channel is Reset, terminate connection with peer and try to
2974  * bring the channel up again.
2975  */
2976 int
2977 vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
2978 {
2979 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2980 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2981 	}
2982 
2983 	/* Set the flag to indicate reset is in progress */
2984 	if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2985 		/* another thread is already in the process of resetting */
2986 		return (EBUSY);
2987 	}
2988 
2989 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2990 		mutex_exit(&ldcp->cblock);
2991 	}
2992 
2993 	(void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET);
2994 
2995 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2996 		mutex_enter(&ldcp->cblock);
2997 	}
2998 
2999 	return (0);
3000 }
3001 
3002 /* Interrupt handler for the channel */
3003 static uint_t
3004 vgen_ldc_cb(uint64_t event, caddr_t arg)
3005 {
3006 	_NOTE(ARGUNUSED(event))
3007 	vgen_ldc_t	*ldcp;
3008 	vgen_t		*vgenp;
3009 	ldc_status_t 	istatus;
3010 	vgen_stats_t	*statsp;
3011 	uint_t		ret = LDC_SUCCESS;
3012 
3013 	ldcp = (vgen_ldc_t *)arg;
3014 	vgenp = LDC_TO_VGEN(ldcp);
3015 	statsp = &ldcp->stats;
3016 
3017 	DBG1(vgenp, ldcp, "enter\n");
3018 
3019 	mutex_enter(&ldcp->cblock);
3020 	statsp->callbacks++;
3021 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3022 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3023 		    ldcp->ldc_status);
3024 		mutex_exit(&ldcp->cblock);
3025 		return (LDC_SUCCESS);
3026 	}
3027 
3028 	/*
3029 	 * NOTE: not using switch() as event could be triggered by
3030 	 * a state change and a read request. Also the ordering	of the
3031 	 * check for the event types is deliberate.
3032 	 */
3033 	if (event & LDC_EVT_UP) {
3034 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3035 			DWARN(vgenp, ldcp, "ldc_status err\n");
3036 			/* status couldn't be determined */
3037 			ret = LDC_FAILURE;
3038 			goto ldc_cb_ret;
3039 		}
3040 		ldcp->ldc_status = istatus;
3041 		if (ldcp->ldc_status != LDC_UP) {
3042 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3043 			    " but ldc status is not UP(0x%x)\n",
3044 			    ldcp->ldc_status);
3045 			/* spurious interrupt, return success */
3046 			goto ldc_cb_ret;
3047 		}
3048 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3049 		    event, ldcp->ldc_status);
3050 
3051 		vgen_handle_evt_up(ldcp);
3052 
3053 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3054 	}
3055 
3056 	/* Handle RESET/DOWN before READ event */
3057 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3058 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3059 			DWARN(vgenp, ldcp, "ldc_status error\n");
3060 			/* status couldn't be determined */
3061 			ret = LDC_FAILURE;
3062 			goto ldc_cb_ret;
3063 		}
3064 		ldcp->ldc_status = istatus;
3065 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3066 		    event, ldcp->ldc_status);
3067 
3068 		(void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB);
3069 
3070 		/*
3071 		 * As the channel is down/reset, ignore READ event
3072 		 * but print a debug warning message.
3073 		 */
3074 		if (event & LDC_EVT_READ) {
3075 			DWARN(vgenp, ldcp,
3076 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3077 			event &= ~LDC_EVT_READ;
3078 		}
3079 	}
3080 
3081 	if (event & LDC_EVT_READ) {
3082 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3083 		    event, ldcp->ldc_status);
3084 
3085 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3086 
3087 		if (ldcp->msg_thread != NULL) {
3088 			/*
3089 			 * If the receive thread is enabled, then
3090 			 * wakeup the receive thread to process the
3091 			 * LDC messages.
3092 			 */
3093 			mutex_exit(&ldcp->cblock);
3094 			mutex_enter(&ldcp->msg_thr_lock);
3095 			if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) {
3096 				ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD;
3097 				cv_signal(&ldcp->msg_thr_cv);
3098 			}
3099 			mutex_exit(&ldcp->msg_thr_lock);
3100 			mutex_enter(&ldcp->cblock);
3101 		} else  {
3102 			(void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB);
3103 		}
3104 	}
3105 
3106 ldc_cb_ret:
3107 	mutex_exit(&ldcp->cblock);
3108 	DBG1(vgenp, ldcp, "exit\n");
3109 	return (ret);
3110 }
3111 
3112 int
3113 vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller)
3114 {
3115 	int		rv;
3116 	uint64_t	*ldcmsg;
3117 	size_t		msglen;
3118 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3119 	vio_msg_tag_t	*tagp;
3120 	ldc_status_t 	istatus;
3121 	boolean_t 	has_data;
3122 
3123 	DBG1(vgenp, ldcp, "enter\n");
3124 
3125 	if (caller == VGEN_LDC_CB) {
3126 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3127 	} else if (caller == VGEN_MSG_THR) {
3128 		mutex_enter(&ldcp->cblock);
3129 	} else {
3130 		return (EINVAL);
3131 	}
3132 
3133 	ldcmsg = ldcp->ldcmsg;
3134 
3135 vgen_evtread:
3136 	do {
3137 		msglen = ldcp->msglen;
3138 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3139 
3140 		if (rv != 0) {
3141 			DWARN(vgenp, ldcp, "ldc_read() failed "
3142 			    "rv(%d) len(%d)\n", rv, msglen);
3143 			if (rv == ECONNRESET)
3144 				goto vgen_evtread_error;
3145 			break;
3146 		}
3147 		if (msglen == 0) {
3148 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3149 			break;
3150 		}
3151 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3152 
3153 		tagp = (vio_msg_tag_t *)ldcmsg;
3154 
3155 		if (ldcp->peer_sid) {
3156 			/*
3157 			 * check sid only after we have received peer's sid
3158 			 * in the version negotiate msg.
3159 			 */
3160 #ifdef DEBUG
3161 			if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) {
3162 				/* simulate bad sid condition */
3163 				tagp->vio_sid = 0;
3164 				vgen_inject_err_flag &= ~(VGEN_ERR_HSID);
3165 			}
3166 #endif
3167 			rv = vgen_check_sid(ldcp, tagp);
3168 			if (rv != VGEN_SUCCESS) {
3169 				/*
3170 				 * If sid mismatch is detected,
3171 				 * reset the channel.
3172 				 */
3173 				DWARN(vgenp, ldcp, "vgen_check_sid() failed\n");
3174 				goto vgen_evtread_error;
3175 			}
3176 		}
3177 
3178 		switch (tagp->vio_msgtype) {
3179 		case VIO_TYPE_CTRL:
3180 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3181 			if (rv != 0) {
3182 				DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()"
3183 				    " failed rv(%d)\n", rv);
3184 			}
3185 			break;
3186 
3187 		case VIO_TYPE_DATA:
3188 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3189 			if (rv != 0) {
3190 				DWARN(vgenp, ldcp, "vgen_handle_datamsg()"
3191 				    " failed rv(%d)\n", rv);
3192 			}
3193 			break;
3194 
3195 		case VIO_TYPE_ERR:
3196 			vgen_handle_errmsg(ldcp, tagp);
3197 			break;
3198 
3199 		default:
3200 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3201 			    tagp->vio_msgtype);
3202 			break;
3203 		}
3204 
3205 		/*
3206 		 * If an error is encountered, stop processing and
3207 		 * handle the error.
3208 		 */
3209 		if (rv != 0) {
3210 			goto vgen_evtread_error;
3211 		}
3212 
3213 	} while (msglen);
3214 
3215 	/* check once more before exiting */
3216 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3217 	if ((rv == 0) && (has_data == B_TRUE)) {
3218 		DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp);
3219 		goto vgen_evtread;
3220 	}
3221 
3222 vgen_evtread_error:
3223 	if (rv != 0) {
3224 		/*
3225 		 * We handle the error and then return the error value. If we
3226 		 * are running in the context of the msg worker, the error
3227 		 * tells the worker thread to exit, as the channel would have
3228 		 * been reset.
3229 		 */
3230 		if (rv == ECONNRESET) {
3231 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3232 				DWARN(vgenp, ldcp, "ldc_status err\n");
3233 			} else {
3234 				ldcp->ldc_status = istatus;
3235 			}
3236 			(void) vgen_handle_evt_reset(ldcp, caller);
3237 		} else {
3238 			DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n");
3239 			(void) vgen_ldc_reset(ldcp, caller);
3240 		}
3241 	}
3242 
3243 	if (caller == VGEN_MSG_THR) {
3244 		mutex_exit(&ldcp->cblock);
3245 	}
3246 
3247 	DBG1(vgenp, ldcp, "exit\n");
3248 	return (rv);
3249 }
3250 
3251 /* vgen handshake functions */
3252 
3253 /* change the hphase for the channel to the next phase */
3254 static vgen_ldc_t *
3255 vh_nextphase(vgen_ldc_t *ldcp)
3256 {
3257 	if (ldcp->hphase == VH_PHASE4) {
3258 		ldcp->hphase = VH_DONE;
3259 	} else {
3260 		ldcp->hphase++;
3261 	}
3262 	return (ldcp);
3263 }
3264 
3265 /* send version negotiate message to the peer over ldc */
3266 static int
3267 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3268 {
3269 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3270 	vio_ver_msg_t	vermsg;
3271 	vio_msg_tag_t	*tagp = &vermsg.tag;
3272 	int		rv;
3273 
3274 	bzero(&vermsg, sizeof (vermsg));
3275 
3276 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3277 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3278 	tagp->vio_subtype_env = VIO_VER_INFO;
3279 	tagp->vio_sid = ldcp->local_sid;
3280 
3281 	/* get version msg payload from ldcp->local */
3282 	vermsg.ver_major = ldcp->local_hparams.ver_major;
3283 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3284 	vermsg.dev_class = ldcp->local_hparams.dev_class;
3285 
3286 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3287 	if (rv != VGEN_SUCCESS) {
3288 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3289 		return (rv);
3290 	}
3291 
3292 	ldcp->hstate |= VER_INFO_SENT;
3293 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3294 	    vermsg.ver_major, vermsg.ver_minor);
3295 
3296 	return (VGEN_SUCCESS);
3297 }
3298 
3299 /* send attr info message to the peer over ldc */
3300 static int
3301 vgen_send_attr_info(vgen_ldc_t *ldcp)
3302 {
3303 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3304 	vnet_attr_msg_t	attrmsg;
3305 	vio_msg_tag_t	*tagp = &attrmsg.tag;
3306 	int		rv;
3307 
3308 	bzero(&attrmsg, sizeof (attrmsg));
3309 
3310 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3311 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3312 	tagp->vio_subtype_env = VIO_ATTR_INFO;
3313 	tagp->vio_sid = ldcp->local_sid;
3314 
3315 	/* get attr msg payload from ldcp->local */
3316 	attrmsg.mtu = ldcp->local_hparams.mtu;
3317 	attrmsg.addr = ldcp->local_hparams.addr;
3318 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
3319 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3320 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3321 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
3322 	attrmsg.options = ldcp->local_hparams.dring_mode;
3323 
3324 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3325 	if (rv != VGEN_SUCCESS) {
3326 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3327 		return (rv);
3328 	}
3329 
3330 	ldcp->hstate |= ATTR_INFO_SENT;
3331 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3332 
3333 	return (VGEN_SUCCESS);
3334 }
3335 
3336 /*
3337  * Send descriptor ring register message to the peer over ldc.
3338  * Invoked in RxDringData mode.
3339  */
3340 static int
3341 vgen_send_rx_dring_reg(vgen_ldc_t *ldcp)
3342 {
3343 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3344 	vio_dring_reg_msg_t	*msg;
3345 	vio_dring_reg_ext_msg_t	*emsg;
3346 	int			rv;
3347 	uint8_t			*buf;
3348 	uint_t			msgsize;
3349 
3350 	msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies);
3351 	msg = kmem_zalloc(msgsize, KM_SLEEP);
3352 
3353 	/* Initialize the common part of dring reg msg */
3354 	vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA);
3355 
3356 	/* skip over dring cookies at the tail of common section */
3357 	buf = (uint8_t *)msg->cookie;
3358 	ASSERT(msg->ncookies == 1);
3359 	buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
3360 
3361 	/* Now setup the extended part, specific to RxDringData mode */
3362 	emsg = (vio_dring_reg_ext_msg_t *)buf;
3363 
3364 	/* copy data_ncookies in the msg */
3365 	emsg->data_ncookies = ldcp->rx_data_ncookies;
3366 
3367 	/* copy data area size in the msg */
3368 	emsg->data_area_size = ldcp->rx_data_sz;
3369 
3370 	/* copy data area cookies in the msg */
3371 	bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie,
3372 	    sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies);
3373 
3374 	rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE);
3375 	if (rv != VGEN_SUCCESS) {
3376 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3377 		kmem_free(msg, msgsize);
3378 		return (rv);
3379 	}
3380 
3381 	ldcp->hstate |= DRING_INFO_SENT;
3382 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3383 
3384 	kmem_free(msg, msgsize);
3385 	return (VGEN_SUCCESS);
3386 }
3387 
3388 /*
3389  * Send descriptor ring register message to the peer over ldc.
3390  * Invoked in TxDring mode.
3391  */
3392 static int
3393 vgen_send_tx_dring_reg(vgen_ldc_t *ldcp)
3394 {
3395 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3396 	vio_dring_reg_msg_t	msg;
3397 	int			rv;
3398 
3399 	bzero(&msg, sizeof (msg));
3400 
3401 	/*
3402 	 * Initialize only the common part of dring reg msg in TxDring mode.
3403 	 */
3404 	vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING);
3405 
3406 	rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE);
3407 	if (rv != VGEN_SUCCESS) {
3408 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3409 		return (rv);
3410 	}
3411 
3412 	ldcp->hstate |= DRING_INFO_SENT;
3413 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3414 
3415 	return (VGEN_SUCCESS);
3416 }
3417 
3418 static int
3419 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3420 {
3421 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3422 	vio_rdx_msg_t	rdxmsg;
3423 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
3424 	int		rv;
3425 
3426 	bzero(&rdxmsg, sizeof (rdxmsg));
3427 
3428 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3429 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3430 	tagp->vio_subtype_env = VIO_RDX;
3431 	tagp->vio_sid = ldcp->local_sid;
3432 
3433 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3434 	if (rv != VGEN_SUCCESS) {
3435 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3436 		return (rv);
3437 	}
3438 
3439 	ldcp->hstate |= RDX_INFO_SENT;
3440 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3441 
3442 	return (VGEN_SUCCESS);
3443 }
3444 
3445 /* send multicast addr info message to vsw */
3446 static int
3447 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3448 {
3449 	vnet_mcast_msg_t	mcastmsg;
3450 	vnet_mcast_msg_t	*msgp;
3451 	vio_msg_tag_t		*tagp;
3452 	vgen_t			*vgenp;
3453 	struct ether_addr	*mca;
3454 	int			rv;
3455 	int			i;
3456 	uint32_t		size;
3457 	uint32_t		mccount;
3458 	uint32_t		n;
3459 
3460 	msgp = &mcastmsg;
3461 	tagp = &msgp->tag;
3462 	vgenp = LDC_TO_VGEN(ldcp);
3463 
3464 	mccount = vgenp->mccount;
3465 	i = 0;
3466 
3467 	do {
3468 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3469 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3470 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3471 		tagp->vio_sid = ldcp->local_sid;
3472 
3473 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3474 		size = n * sizeof (struct ether_addr);
3475 
3476 		mca = &(vgenp->mctab[i]);
3477 		bcopy(mca, (msgp->mca), size);
3478 		msgp->set = B_TRUE;
3479 		msgp->count = n;
3480 
3481 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3482 		    B_FALSE);
3483 		if (rv != VGEN_SUCCESS) {
3484 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3485 			return (rv);
3486 		}
3487 
3488 		mccount -= n;
3489 		i += n;
3490 
3491 	} while (mccount);
3492 
3493 	return (VGEN_SUCCESS);
3494 }
3495 
3496 /*
3497  * vgen_dds_rx -- post DDS messages to vnet.
3498  */
3499 static int
3500 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3501 {
3502 	vio_dds_msg_t	*dmsg = (vio_dds_msg_t *)tagp;
3503 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3504 
3505 	if (dmsg->dds_class != DDS_VNET_NIU) {
3506 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
3507 		return (EBADMSG);
3508 	}
3509 	vnet_dds_rx(vgenp->vnetp, dmsg);
3510 	return (0);
3511 }
3512 
3513 /*
3514  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
3515  */
3516 int
3517 vgen_dds_tx(void *arg, void *msg)
3518 {
3519 	vgen_t		*vgenp = arg;
3520 	vio_dds_msg_t	*dmsg = msg;
3521 	vgen_portlist_t	*plistp = &vgenp->vgenports;
3522 	vgen_ldc_t	*ldcp;
3523 	int		rv = EIO;
3524 
3525 	READ_ENTER(&plistp->rwlock);
3526 	ldcp = vgenp->vsw_portp->ldcp;
3527 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
3528 		goto vgen_dsend_exit;
3529 	}
3530 
3531 	dmsg->tag.vio_sid = ldcp->local_sid;
3532 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
3533 	if (rv != VGEN_SUCCESS) {
3534 		rv = EIO;
3535 	} else {
3536 		rv = 0;
3537 	}
3538 
3539 vgen_dsend_exit:
3540 	RW_EXIT(&plistp->rwlock);
3541 	return (rv);
3542 
3543 }
3544 
3545 /* Initiate Phase 2 of handshake */
3546 static int
3547 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3548 {
3549 	int	rv;
3550 
3551 #ifdef DEBUG
3552 	if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) {
3553 		/* simulate out of state condition */
3554 		vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE);
3555 		rv = vgen_send_rdx_info(ldcp);
3556 		return (rv);
3557 	}
3558 	if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) {
3559 		/* simulate timeout condition */
3560 		vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT);
3561 		return (VGEN_SUCCESS);
3562 	}
3563 #endif
3564 	rv = vgen_send_attr_info(ldcp);
3565 	if (rv != VGEN_SUCCESS) {
3566 		return (rv);
3567 	}
3568 
3569 	return (VGEN_SUCCESS);
3570 }
3571 
3572 static int
3573 vgen_handshake_phase3(vgen_ldc_t *ldcp)
3574 {
3575 	int		rv;
3576 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3577 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3578 	vgen_stats_t	*statsp = &ldcp->stats;
3579 
3580 	/* dring mode has been negotiated in attr phase; save in stats */
3581 	statsp->dring_mode = lp->dring_mode;
3582 
3583 	if (lp->dring_mode == VIO_RX_DRING_DATA) {	/* RxDringData mode */
3584 		ldcp->rx_dringdata = vgen_handle_dringdata_shm;
3585 		ldcp->tx_dringdata = vgen_dringsend_shm;
3586 		if (!VGEN_PRI_ETH_DEFINED(vgenp)) {
3587 			/*
3588 			 * If priority frames are not in use, we don't need a
3589 			 * separate wrapper function for 'tx', so we set it to
3590 			 * 'tx_dringdata'. If priority frames are configured,
3591 			 * we leave the 'tx' pointer as is (initialized in
3592 			 * vgen_set_vnet_proto_ops()).
3593 			 */
3594 			ldcp->tx = ldcp->tx_dringdata;
3595 		}
3596 	} else {					/* TxDring mode */
3597 		ldcp->msg_thread = thread_create(NULL,
3598 		    2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0,
3599 		    &p0, TS_RUN, maxclsyspri);
3600 	}
3601 
3602 	rv = vgen_create_dring(ldcp);
3603 	if (rv != VGEN_SUCCESS) {
3604 		return (rv);
3605 	}
3606 
3607 	/* update local dring_info params */
3608 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
3609 		bcopy(&(ldcp->rx_dring_cookie),
3610 		    &(ldcp->local_hparams.dring_cookie),
3611 		    sizeof (ldc_mem_cookie_t));
3612 		ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies;
3613 		ldcp->local_hparams.num_desc = ldcp->num_rxds;
3614 		ldcp->local_hparams.desc_size =
3615 		    sizeof (vnet_rx_dringdata_desc_t);
3616 		rv = vgen_send_rx_dring_reg(ldcp);
3617 	} else {
3618 		bcopy(&(ldcp->tx_dring_cookie),
3619 		    &(ldcp->local_hparams.dring_cookie),
3620 		    sizeof (ldc_mem_cookie_t));
3621 		ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies;
3622 		ldcp->local_hparams.num_desc = ldcp->num_txds;
3623 		ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3624 		rv = vgen_send_tx_dring_reg(ldcp);
3625 	}
3626 
3627 	if (rv != VGEN_SUCCESS) {
3628 		return (rv);
3629 	}
3630 
3631 	return (VGEN_SUCCESS);
3632 }
3633 
3634 /*
3635  * Set vnet-protocol-version dependent functions based on version.
3636  */
3637 static void
3638 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
3639 {
3640 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3641 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3642 
3643 	/*
3644 	 * Setup the appropriate dring data processing routine and any
3645 	 * associated thread based on the version.
3646 	 *
3647 	 * In versions < 1.6, we only support TxDring mode. In this mode, the
3648 	 * msg worker thread processes all types of VIO msgs (ctrl and data).
3649 	 *
3650 	 * In versions >= 1.6, we also support RxDringData mode. In this mode,
3651 	 * all msgs including dring data messages are handled directly by the
3652 	 * callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA,
3653 	 * subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be
3654 	 * disabled while the polling thread is active, in which case the
3655 	 * polling thread processes the rcv descriptor ring.
3656 	 *
3657 	 * However, for versions >= 1.6, we can force to only use TxDring mode.
3658 	 * This could happen if RxDringData mode has been disabled (see
3659 	 * vgen_dring_mode) on this guest or on the peer guest. This info is
3660 	 * determined as part of attr exchange phase of handshake. Hence, we
3661 	 * setup these pointers for v1.6 after attr msg phase completes during
3662 	 * handshake.
3663 	 */
3664 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {	/* Ver >= 1.6 */
3665 		/*
3666 		 * Set data dring mode for vgen_send_attr_info().
3667 		 */
3668 		if (vgen_dring_mode == VIO_RX_DRING_DATA) {
3669 			lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING);
3670 		} else {
3671 			lp->dring_mode = VIO_TX_DRING;
3672 		}
3673 	} else {				/* Ver <= 1.5 */
3674 		lp->dring_mode = VIO_TX_DRING;
3675 	}
3676 
3677 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
3678 		vgen_port_t	*portp = ldcp->portp;
3679 		vnet_t		*vnetp = vgenp->vnetp;
3680 		/*
3681 		 * If the version negotiated with vswitch is >= 1.5 (link
3682 		 * status update support), set the required bits in our
3683 		 * attributes if this vnet device has been configured to get
3684 		 * physical link state updates.
3685 		 */
3686 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
3687 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
3688 		} else {
3689 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
3690 		}
3691 	}
3692 
3693 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
3694 		/*
3695 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
3696 		 * Support), set the mtu in our attributes to max_frame_size.
3697 		 */
3698 		lp->mtu = vgenp->max_frame_size;
3699 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
3700 		/*
3701 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
3702 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
3703 		 */
3704 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
3705 	} else {
3706 		vgen_port_t	*portp = ldcp->portp;
3707 		vnet_t		*vnetp = vgenp->vnetp;
3708 		/*
3709 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
3710 		 * We can negotiate that size with those peers provided the
3711 		 * following conditions are true:
3712 		 * - Only pvid is defined for our peer and there are no vids.
3713 		 * - pvids are equal.
3714 		 * If the above conditions are true, then we can send/recv only
3715 		 * untagged frames of max size ETHERMAX.
3716 		 */
3717 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
3718 			lp->mtu = ETHERMAX;
3719 		}
3720 	}
3721 
3722 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {	/* Versions >= 1.2 */
3723 		/*
3724 		 * Starting v1.2 we support priority frames; so set the
3725 		 * dring processing routines and xfer modes based on the
3726 		 * version. Note that the dring routines could be changed after
3727 		 * attribute handshake phase for versions >= 1.6 (See
3728 		 * vgen_handshake_phase3())
3729 		 */
3730 		ldcp->tx_dringdata = vgen_dringsend;
3731 		ldcp->rx_dringdata = vgen_handle_dringdata;
3732 
3733 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
3734 			/*
3735 			 * Enable priority routines and pkt mode only if
3736 			 * at least one pri-eth-type is specified in MD.
3737 			 */
3738 			ldcp->tx = vgen_ldcsend;
3739 			ldcp->rx_pktdata = vgen_handle_pkt_data;
3740 
3741 			/* set xfer mode for vgen_send_attr_info() */
3742 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
3743 		} else {
3744 			/* No priority eth types defined in MD */
3745 			ldcp->tx = ldcp->tx_dringdata;
3746 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3747 
3748 			/* Set xfer mode for vgen_send_attr_info() */
3749 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
3750 		}
3751 	} else { /* Versions prior to 1.2  */
3752 		vgen_reset_vnet_proto_ops(ldcp);
3753 	}
3754 }
3755 
3756 /*
3757  * Reset vnet-protocol-version dependent functions to pre-v1.2.
3758  */
3759 static void
3760 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
3761 {
3762 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3763 
3764 	ldcp->tx = ldcp->tx_dringdata = vgen_dringsend;
3765 	ldcp->rx_dringdata = vgen_handle_dringdata;
3766 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3767 
3768 	/* set xfer mode for vgen_send_attr_info() */
3769 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
3770 }
3771 
3772 static void
3773 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
3774 {
3775 	vgen_ldc_t	*ldcp = portp->ldcp;
3776 	vgen_t		*vgenp = portp->vgenp;
3777 	vnet_t		*vnetp = vgenp->vnetp;
3778 	boolean_t	need_reset = B_FALSE;
3779 
3780 	mutex_enter(&ldcp->cblock);
3781 
3782 	/*
3783 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
3784 	 * the connection. See comments in vgen_set_vnet_proto_ops().
3785 	 */
3786 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
3787 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
3788 		need_reset = B_TRUE;
3789 	}
3790 	mutex_exit(&ldcp->cblock);
3791 
3792 	if (need_reset == B_TRUE) {
3793 		(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
3794 	}
3795 }
3796 
3797 static void
3798 vgen_port_reset(vgen_port_t *portp)
3799 {
3800 	(void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER);
3801 }
3802 
3803 static void
3804 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
3805 {
3806 	vgen_port_t	*portp;
3807 	vgen_portlist_t	*plistp;
3808 
3809 	plistp = &(vgenp->vgenports);
3810 	READ_ENTER(&plistp->rwlock);
3811 
3812 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
3813 
3814 		vgen_vlan_unaware_port_reset(portp);
3815 
3816 	}
3817 
3818 	RW_EXIT(&plistp->rwlock);
3819 }
3820 
3821 static void
3822 vgen_reset_vsw_port(vgen_t *vgenp)
3823 {
3824 	vgen_port_t	*portp;
3825 
3826 	if ((portp = vgenp->vsw_portp) != NULL) {
3827 		vgen_port_reset(portp);
3828 	}
3829 }
3830 
3831 static void
3832 vgen_setup_handshake_params(vgen_ldc_t *ldcp)
3833 {
3834 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3835 
3836 	/*
3837 	 * clear local handshake params and initialize.
3838 	 */
3839 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3840 
3841 	/* set version to the highest version supported */
3842 	ldcp->local_hparams.ver_major =
3843 	    ldcp->vgen_versions[0].ver_major;
3844 	ldcp->local_hparams.ver_minor =
3845 	    ldcp->vgen_versions[0].ver_minor;
3846 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3847 
3848 	/* set attr_info params */
3849 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
3850 	ldcp->local_hparams.addr =
3851 	    vnet_macaddr_strtoul(vgenp->macaddr);
3852 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3853 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
3854 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3855 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
3856 
3857 	/* reset protocol version specific function pointers */
3858 	vgen_reset_vnet_proto_ops(ldcp);
3859 	ldcp->local_hparams.dring_ident = 0;
3860 	ldcp->local_hparams.dring_ready = B_FALSE;
3861 
3862 	/* clear peer_hparams */
3863 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3864 	ldcp->peer_hparams.dring_ready = B_FALSE;
3865 }
3866 
3867 /*
3868  * Process Channel Reset. We tear down the resources (timers, threads,
3869  * descriptor rings etc) associated with the channel and reinitialize the
3870  * channel based on the flags.
3871  *
3872  * Arguments:
3873  *    ldcp:	The channel being processed.
3874  *
3875  *    flags:
3876  *	VGEN_FLAG_EVT_RESET:
3877  *		A ECONNRESET error occured while doing ldc operations such as
3878  *		ldc_read() or ldc_write(); the channel is already reset and it
3879  *		needs to be handled.
3880  *	VGEN_FLAG_NEED_LDCRESET:
3881  *		Some other errors occured and the error handling code needs to
3882  *		explicitly reset the channel and restart handshake with the
3883  *		peer. The error could be either in ldc operations or other
3884  *		parts of the code such as timeouts or mdeg events etc.
3885  *	VGEN_FLAG_UNINIT:
3886  *		The channel is being torn down; no need to bring up the channel
3887  *		after resetting.
3888  */
3889 static int
3890 vgen_process_reset(vgen_ldc_t *ldcp, int flags)
3891 {
3892 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3893 	vgen_port_t	*portp = ldcp->portp;
3894 	vgen_hparams_t  *lp = &ldcp->local_hparams;
3895 	boolean_t	is_vsw_port = B_FALSE;
3896 	boolean_t	link_update = B_FALSE;
3897 	ldc_status_t	istatus;
3898 	int		rv;
3899 	uint_t		retries = 0;
3900 	timeout_id_t	htid = 0;
3901 	timeout_id_t	wd_tid = 0;
3902 
3903 	if (portp == vgenp->vsw_portp) { /* vswitch port ? */
3904 		is_vsw_port = B_TRUE;
3905 	}
3906 
3907 	/*
3908 	 * Report that the channel is being reset; it ensures that any HybridIO
3909 	 * configuration is torn down before we reset the channel if it is not
3910 	 * already reset (flags == VGEN_FLAG_NEED_LDCRESET).
3911 	 */
3912 	if (is_vsw_port == B_TRUE) {
3913 		vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err;
3914 		rep_err(portp->vhp, VIO_NET_RES_DOWN);
3915 	}
3916 
3917 again:
3918 	mutex_enter(&ldcp->cblock);
3919 
3920 	/* Clear hstate and hphase */
3921 	ldcp->hstate = 0;
3922 	ldcp->hphase = VH_PHASE0;
3923 	if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) {
3924 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3925 		(void) ldc_down(ldcp->ldc_handle);
3926 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3927 		DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus);
3928 		ldcp->ldc_status = istatus;
3929 
3930 		if (flags == VGEN_FLAG_UNINIT) {
3931 			/* disable further callbacks */
3932 			rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3933 			if (rv != 0) {
3934 				DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3935 			}
3936 		}
3937 
3938 	} else {
3939 		/* flags == VGEN_FLAG_EVT_RESET */
3940 		DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status);
3941 	}
3942 
3943 	/*
3944 	 * As the connection is now reset, mark the channel
3945 	 * link_state as 'down' and notify the stack if needed.
3946 	 */
3947 	if (ldcp->link_state != LINK_STATE_DOWN) {
3948 		ldcp->link_state = LINK_STATE_DOWN;
3949 
3950 		if (is_vsw_port == B_TRUE) { /* vswitch port ? */
3951 			/*
3952 			 * As the channel link is down, mark physical link also
3953 			 * as down. After the channel comes back up and
3954 			 * handshake completes, we will get an update on the
3955 			 * physlink state from vswitch (if this device has been
3956 			 * configured to get phys link updates).
3957 			 */
3958 			vgenp->phys_link_state = LINK_STATE_DOWN;
3959 			link_update = B_TRUE;
3960 
3961 		}
3962 	}
3963 
3964 	if (ldcp->htid != 0) {
3965 		htid = ldcp->htid;
3966 		ldcp->htid = 0;
3967 	}
3968 
3969 	if (ldcp->wd_tid != 0) {
3970 		wd_tid = ldcp->wd_tid;
3971 		ldcp->wd_tid = 0;
3972 	}
3973 
3974 	mutex_exit(&ldcp->cblock);
3975 
3976 	/* Update link state to the stack */
3977 	if (link_update == B_TRUE) {
3978 		vgen_link_update(vgenp, ldcp->link_state);
3979 	}
3980 
3981 	/*
3982 	 * As the channel is being reset, redirect traffic to the peer through
3983 	 * vswitch, until the channel becomes ready to be used again.
3984 	 */
3985 	if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) {
3986 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
3987 	}
3988 
3989 	/* Cancel handshake watchdog timeout */
3990 	if (htid) {
3991 		(void) untimeout(htid);
3992 	}
3993 
3994 	/* Cancel transmit watchdog timeout */
3995 	if (wd_tid) {
3996 		(void) untimeout(wd_tid);
3997 	}
3998 
3999 	/* Stop the msg worker thread */
4000 	if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) {
4001 		vgen_stop_msg_thread(ldcp);
4002 	}
4003 
4004 	/* Grab all locks while we tear down tx/rx resources */
4005 	LDC_LOCK(ldcp);
4006 
4007 	/* Destroy the local dring which is exported to the peer */
4008 	vgen_destroy_dring(ldcp);
4009 
4010 	/* Unmap the remote dring which is imported from the peer */
4011 	vgen_unmap_dring(ldcp);
4012 
4013 	/*
4014 	 * Bring up the channel and restart handshake
4015 	 * only if the channel is not being torn down.
4016 	 */
4017 	if (flags != VGEN_FLAG_UNINIT) {
4018 
4019 		/* Setup handshake parameters to restart a new handshake */
4020 		vgen_setup_handshake_params(ldcp);
4021 
4022 		/* Bring the channel up */
4023 		vgen_ldc_up(ldcp);
4024 
4025 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4026 			DWARN(vgenp, ldcp, "ldc_status err\n");
4027 		} else {
4028 			ldcp->ldc_status = istatus;
4029 		}
4030 
4031 		/* If the channel is UP, start handshake */
4032 		if (ldcp->ldc_status == LDC_UP) {
4033 
4034 			if (is_vsw_port == B_FALSE) {
4035 				/*
4036 				 * Channel is up; use this port from now on.
4037 				 */
4038 				(void) atomic_swap_32(&portp->use_vsw_port,
4039 				    B_FALSE);
4040 			}
4041 
4042 			/* Initialize local session id */
4043 			ldcp->local_sid = ddi_get_lbolt();
4044 
4045 			/* clear peer session id */
4046 			ldcp->peer_sid = 0;
4047 
4048 			/*
4049 			 * Initiate Handshake process with peer ldc endpoint by
4050 			 * sending version info vio message. If that fails we
4051 			 * go back to the top of this function to process the
4052 			 * error again. Note that we can be in this loop for
4053 			 * 'vgen_ldc_max_resets' times, after which the channel
4054 			 * is not brought up.
4055 			 */
4056 			mutex_exit(&ldcp->tclock);
4057 			mutex_exit(&ldcp->txlock);
4058 			mutex_exit(&ldcp->wrlock);
4059 			mutex_exit(&ldcp->rxlock);
4060 			rv = vgen_handshake(vh_nextphase(ldcp));
4061 			mutex_exit(&ldcp->cblock);
4062 			if (rv != 0) {
4063 				if (rv == ECONNRESET) {
4064 					flags = VGEN_FLAG_EVT_RESET;
4065 				} else {
4066 					flags = VGEN_FLAG_NEED_LDCRESET;
4067 				}
4068 
4069 				/*
4070 				 * We still hold 'reset_in_progress'; so we can
4071 				 * just loop back to the top to restart error
4072 				 * processing.
4073 				 */
4074 				goto again;
4075 			}
4076 		} else {
4077 			LDC_UNLOCK(ldcp);
4078 		}
4079 
4080 	} else {	/* flags == VGEN_FLAG_UNINIT */
4081 
4082 		/* Close the channel - retry on EAGAIN */
4083 		while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
4084 			if (++retries > vgen_ldccl_retries) {
4085 				break;
4086 			}
4087 			drv_usecwait(VGEN_LDC_CLOSE_DELAY);
4088 		}
4089 		if (rv != 0) {
4090 			cmn_err(CE_NOTE,
4091 			    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
4092 			    vgenp->instance, rv, ldcp->ldc_id);
4093 		}
4094 
4095 		ldcp->ldc_reset_count = 0;
4096 		ldcp->ldc_status = LDC_INIT;
4097 		ldcp->flags &= ~(CHANNEL_STARTED);
4098 
4099 		LDC_UNLOCK(ldcp);
4100 	}
4101 
4102 	/* Done processing channel reset; clear the atomic flag */
4103 	ldcp->reset_in_progress = 0;
4104 	return (0);
4105 }
4106 
4107 /*
4108  * Initiate handshake with the peer by sending various messages
4109  * based on the handshake-phase that the channel is currently in.
4110  */
4111 static int
4112 vgen_handshake(vgen_ldc_t *ldcp)
4113 {
4114 	uint32_t	hphase = ldcp->hphase;
4115 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4116 	int		rv = 0;
4117 	timeout_id_t	htid;
4118 
4119 	switch (hphase) {
4120 
4121 	case VH_PHASE1:
4122 
4123 		/*
4124 		 * start timer, for entire handshake process, turn this timer
4125 		 * off if all phases of handshake complete successfully and
4126 		 * hphase goes to VH_DONE(below) or channel is reset due to
4127 		 * errors or vgen_ldc_uninit() is invoked(vgen_stop).
4128 		 */
4129 		ASSERT(ldcp->htid == 0);
4130 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4131 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4132 
4133 		/* Phase 1 involves negotiating the version */
4134 		rv = vgen_send_version_negotiate(ldcp);
4135 		break;
4136 
4137 	case VH_PHASE2:
4138 		rv = vgen_handshake_phase2(ldcp);
4139 		break;
4140 
4141 	case VH_PHASE3:
4142 		rv = vgen_handshake_phase3(ldcp);
4143 		break;
4144 
4145 	case VH_PHASE4:
4146 		rv = vgen_send_rdx_info(ldcp);
4147 		break;
4148 
4149 	case VH_DONE:
4150 
4151 		ldcp->ldc_reset_count = 0;
4152 
4153 		DBG1(vgenp, ldcp, "Handshake Done\n");
4154 
4155 		/*
4156 		 * The channel is up and handshake is done successfully. Now we
4157 		 * can mark the channel link_state as 'up'. We also notify the
4158 		 * stack if the channel is connected to vswitch.
4159 		 */
4160 		ldcp->link_state = LINK_STATE_UP;
4161 
4162 		if (ldcp->portp == vgenp->vsw_portp) {
4163 			/*
4164 			 * If this channel(port) is connected to vsw,
4165 			 * need to sync multicast table with vsw.
4166 			 */
4167 			rv = vgen_send_mcast_info(ldcp);
4168 			if (rv != VGEN_SUCCESS)
4169 				break;
4170 
4171 			if (vgenp->pls_negotiated == B_FALSE) {
4172 				/*
4173 				 * We haven't negotiated with vswitch to get
4174 				 * physical link state updates. We can update
4175 				 * update the stack at this point as the
4176 				 * channel to vswitch is up and the handshake
4177 				 * is done successfully.
4178 				 *
4179 				 * If we have negotiated to get physical link
4180 				 * state updates, then we won't notify the
4181 				 * the stack here; we do that as soon as
4182 				 * vswitch sends us the initial phys link state
4183 				 * (see vgen_handle_physlink_info()).
4184 				 */
4185 				mutex_exit(&ldcp->cblock);
4186 				vgen_link_update(vgenp, ldcp->link_state);
4187 				mutex_enter(&ldcp->cblock);
4188 			}
4189 		}
4190 
4191 		if (ldcp->htid != 0) {
4192 			htid = ldcp->htid;
4193 			ldcp->htid = 0;
4194 
4195 			mutex_exit(&ldcp->cblock);
4196 			(void) untimeout(htid);
4197 			mutex_enter(&ldcp->cblock);
4198 		}
4199 
4200 		/*
4201 		 * Check if mac layer should be notified to restart
4202 		 * transmissions. This can happen if the channel got
4203 		 * reset and while tx_blocked is set.
4204 		 */
4205 		mutex_enter(&ldcp->tclock);
4206 		if (ldcp->tx_blocked) {
4207 			vio_net_tx_update_t vtx_update =
4208 			    ldcp->portp->vcb.vio_net_tx_update;
4209 
4210 			ldcp->tx_blocked = B_FALSE;
4211 			vtx_update(ldcp->portp->vhp);
4212 		}
4213 		mutex_exit(&ldcp->tclock);
4214 
4215 		/* start transmit watchdog timer */
4216 		ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
4217 		    drv_usectohz(vgen_txwd_interval * 1000));
4218 
4219 		break;
4220 
4221 	default:
4222 		break;
4223 	}
4224 
4225 	return (rv);
4226 }
4227 
4228 /*
4229  * Check if the current handshake phase has completed successfully and
4230  * return the status.
4231  */
4232 static int
4233 vgen_handshake_done(vgen_ldc_t *ldcp)
4234 {
4235 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4236 	uint32_t	hphase = ldcp->hphase;
4237 	int 		status = 0;
4238 
4239 	switch (hphase) {
4240 
4241 	case VH_PHASE1:
4242 		/*
4243 		 * Phase1 is done, if version negotiation
4244 		 * completed successfully.
4245 		 */
4246 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4247 		    VER_NEGOTIATED);
4248 		break;
4249 
4250 	case VH_PHASE2:
4251 		/*
4252 		 * Phase 2 is done, if attr info
4253 		 * has been exchanged successfully.
4254 		 */
4255 		status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4256 		    ATTR_INFO_EXCHANGED);
4257 		break;
4258 
4259 	case VH_PHASE3:
4260 		/*
4261 		 * Phase 3 is done, if dring registration
4262 		 * has been exchanged successfully.
4263 		 */
4264 		status = ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4265 		    DRING_INFO_EXCHANGED);
4266 		break;
4267 
4268 	case VH_PHASE4:
4269 		/* Phase 4 is done, if rdx msg has been exchanged */
4270 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4271 		    RDX_EXCHANGED);
4272 		break;
4273 
4274 	default:
4275 		break;
4276 	}
4277 
4278 	if (status == 0) {
4279 		return (VGEN_FAILURE);
4280 	}
4281 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4282 	return (VGEN_SUCCESS);
4283 }
4284 
4285 /*
4286  * Link State Update Notes:
4287  * The link state of the channel connected to vswitch is reported as the link
4288  * state of the vnet device, by default. If the channel is down or reset, then
4289  * the link state is marked 'down'. If the channel is 'up' *and* handshake
4290  * between the vnet and vswitch is successful, then the link state is marked
4291  * 'up'. If physical network link state is desired, then the vnet device must
4292  * be configured to get physical link updates and the 'linkprop' property
4293  * in the virtual-device MD node indicates this. As part of attribute exchange
4294  * the vnet device negotiates with the vswitch to obtain physical link state
4295  * updates. If it successfully negotiates, vswitch sends an initial physlink
4296  * msg once the handshake is done and further whenever the physical link state
4297  * changes. Currently we don't have mac layer interfaces to report two distinct
4298  * link states - virtual and physical. Thus, if the vnet has been configured to
4299  * get physical link updates, then the link status will be reported as 'up'
4300  * only when both the virtual and physical links are up.
4301  */
4302 static void
4303 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
4304 {
4305 	vnet_link_update(vgenp->vnetp, link_state);
4306 }
4307 
4308 /*
4309  * Handle a version info msg from the peer or an ACK/NACK from the peer
4310  * to a version info msg that we sent.
4311  */
4312 static int
4313 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4314 {
4315 	vgen_t		*vgenp;
4316 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4317 	int		ack = 0;
4318 	int		failed = 0;
4319 	int		idx;
4320 	vgen_ver_t	*versions = ldcp->vgen_versions;
4321 	int		rv = 0;
4322 
4323 	vgenp = LDC_TO_VGEN(ldcp);
4324 	DBG1(vgenp, ldcp, "enter\n");
4325 	switch (tagp->vio_subtype) {
4326 	case VIO_SUBTYPE_INFO:
4327 
4328 		/*  Cache sid of peer if this is the first time */
4329 		if (ldcp->peer_sid == 0) {
4330 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4331 			    tagp->vio_sid);
4332 			ldcp->peer_sid = tagp->vio_sid;
4333 		}
4334 
4335 		if (ldcp->hphase != VH_PHASE1) {
4336 			/*
4337 			 * If we are not already in VH_PHASE1, reset to
4338 			 * pre-handshake state, and initiate handshake
4339 			 * to the peer too.
4340 			 */
4341 			return (EINVAL);
4342 		}
4343 
4344 		ldcp->hstate |= VER_INFO_RCVD;
4345 
4346 		/* save peer's requested values */
4347 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4348 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4349 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4350 
4351 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4352 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4353 			/* unsupported dev_class, send NACK */
4354 
4355 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4356 
4357 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4358 			tagp->vio_sid = ldcp->local_sid;
4359 			/* send reply msg back to peer */
4360 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4361 			    sizeof (*vermsg), B_FALSE);
4362 			if (rv != VGEN_SUCCESS) {
4363 				return (rv);
4364 			}
4365 			return (VGEN_FAILURE);
4366 		}
4367 
4368 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4369 		    vermsg->ver_major,  vermsg->ver_minor);
4370 
4371 		idx = 0;
4372 
4373 		for (;;) {
4374 
4375 			if (vermsg->ver_major > versions[idx].ver_major) {
4376 
4377 				/* nack with next lower version */
4378 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4379 				vermsg->ver_major = versions[idx].ver_major;
4380 				vermsg->ver_minor = versions[idx].ver_minor;
4381 				break;
4382 			}
4383 
4384 			if (vermsg->ver_major == versions[idx].ver_major) {
4385 
4386 				/* major version match - ACK version */
4387 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4388 				ack = 1;
4389 
4390 				/*
4391 				 * lower minor version to the one this endpt
4392 				 * supports, if necessary
4393 				 */
4394 				if (vermsg->ver_minor >
4395 				    versions[idx].ver_minor) {
4396 					vermsg->ver_minor =
4397 					    versions[idx].ver_minor;
4398 					ldcp->peer_hparams.ver_minor =
4399 					    versions[idx].ver_minor;
4400 				}
4401 				break;
4402 			}
4403 
4404 			idx++;
4405 
4406 			if (idx == VGEN_NUM_VER) {
4407 
4408 				/* no version match - send NACK */
4409 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4410 				vermsg->ver_major = 0;
4411 				vermsg->ver_minor = 0;
4412 				failed = 1;
4413 				break;
4414 			}
4415 
4416 		}
4417 
4418 		tagp->vio_sid = ldcp->local_sid;
4419 
4420 		/* send reply msg back to peer */
4421 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4422 		    B_FALSE);
4423 		if (rv != VGEN_SUCCESS) {
4424 			return (rv);
4425 		}
4426 
4427 		if (ack) {
4428 			ldcp->hstate |= VER_ACK_SENT;
4429 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4430 			    vermsg->ver_major, vermsg->ver_minor);
4431 		}
4432 		if (failed) {
4433 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4434 			return (VGEN_FAILURE);
4435 		}
4436 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4437 
4438 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4439 
4440 			/* local and peer versions match? */
4441 			ASSERT((ldcp->local_hparams.ver_major ==
4442 			    ldcp->peer_hparams.ver_major) &&
4443 			    (ldcp->local_hparams.ver_minor ==
4444 			    ldcp->peer_hparams.ver_minor));
4445 
4446 			vgen_set_vnet_proto_ops(ldcp);
4447 
4448 			/* move to the next phase */
4449 			rv = vgen_handshake(vh_nextphase(ldcp));
4450 			if (rv != 0) {
4451 				return (rv);
4452 			}
4453 		}
4454 
4455 		break;
4456 
4457 	case VIO_SUBTYPE_ACK:
4458 
4459 		if (ldcp->hphase != VH_PHASE1) {
4460 			/*  This should not happen. */
4461 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4462 			return (VGEN_FAILURE);
4463 		}
4464 
4465 		/* SUCCESS - we have agreed on a version */
4466 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4467 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4468 		ldcp->hstate |= VER_ACK_RCVD;
4469 
4470 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4471 		    vermsg->ver_major,  vermsg->ver_minor);
4472 
4473 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4474 
4475 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4476 
4477 			/* local and peer versions match? */
4478 			ASSERT((ldcp->local_hparams.ver_major ==
4479 			    ldcp->peer_hparams.ver_major) &&
4480 			    (ldcp->local_hparams.ver_minor ==
4481 			    ldcp->peer_hparams.ver_minor));
4482 
4483 			vgen_set_vnet_proto_ops(ldcp);
4484 
4485 			/* move to the next phase */
4486 			rv = vgen_handshake(vh_nextphase(ldcp));
4487 			if (rv != 0) {
4488 				return (rv);
4489 			}
4490 		}
4491 		break;
4492 
4493 	case VIO_SUBTYPE_NACK:
4494 
4495 		if (ldcp->hphase != VH_PHASE1) {
4496 			/*  This should not happen.  */
4497 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4498 			"Phase(%u)\n", ldcp->hphase);
4499 			return (VGEN_FAILURE);
4500 		}
4501 
4502 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4503 		    vermsg->ver_major, vermsg->ver_minor);
4504 
4505 		/* check if version in NACK is zero */
4506 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4507 			/*
4508 			 * Version Negotiation has failed.
4509 			 */
4510 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4511 			return (VGEN_FAILURE);
4512 		}
4513 
4514 		idx = 0;
4515 
4516 		for (;;) {
4517 
4518 			if (vermsg->ver_major > versions[idx].ver_major) {
4519 				/* select next lower version */
4520 
4521 				ldcp->local_hparams.ver_major =
4522 				    versions[idx].ver_major;
4523 				ldcp->local_hparams.ver_minor =
4524 				    versions[idx].ver_minor;
4525 				break;
4526 			}
4527 
4528 			if (vermsg->ver_major == versions[idx].ver_major) {
4529 				/* major version match */
4530 
4531 				ldcp->local_hparams.ver_major =
4532 				    versions[idx].ver_major;
4533 
4534 				ldcp->local_hparams.ver_minor =
4535 				    versions[idx].ver_minor;
4536 				break;
4537 			}
4538 
4539 			idx++;
4540 
4541 			if (idx == VGEN_NUM_VER) {
4542 				/*
4543 				 * no version match.
4544 				 * Version Negotiation has failed.
4545 				 */
4546 				DWARN(vgenp, ldcp,
4547 				    "Version Negotiation Failed\n");
4548 				return (VGEN_FAILURE);
4549 			}
4550 
4551 		}
4552 
4553 		rv = vgen_send_version_negotiate(ldcp);
4554 		if (rv != VGEN_SUCCESS) {
4555 			return (rv);
4556 		}
4557 
4558 		break;
4559 	}
4560 
4561 	DBG1(vgenp, ldcp, "exit\n");
4562 	return (VGEN_SUCCESS);
4563 }
4564 
4565 static int
4566 vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4567 {
4568 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4569 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4570 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
4571 	uint32_t	mtu;
4572 	uint8_t		dring_mode;
4573 
4574 	ldcp->hstate |= ATTR_INFO_RCVD;
4575 
4576 	/* save peer's values */
4577 	rp->mtu = msg->mtu;
4578 	rp->addr = msg->addr;
4579 	rp->addr_type = msg->addr_type;
4580 	rp->xfer_mode = msg->xfer_mode;
4581 	rp->ack_freq = msg->ack_freq;
4582 	rp->dring_mode = msg->options;
4583 
4584 	/*
4585 	 * Process address type, ack frequency and transfer mode attributes.
4586 	 */
4587 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
4588 	    (msg->ack_freq > 64) ||
4589 	    (msg->xfer_mode != lp->xfer_mode)) {
4590 		return (VGEN_FAILURE);
4591 	}
4592 
4593 	/*
4594 	 * Process dring mode attribute.
4595 	 */
4596 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4597 		/*
4598 		 * Versions >= 1.6:
4599 		 * Though we are operating in v1.6 mode, it is possible that
4600 		 * RxDringData mode has been disabled either on this guest or
4601 		 * on the peer guest. If so, we revert to pre v1.6 behavior of
4602 		 * TxDring mode. But this must be agreed upon in both
4603 		 * directions of attr exchange. We first determine the mode
4604 		 * that can be negotiated.
4605 		 */
4606 		if ((msg->options & VIO_RX_DRING_DATA) != 0 &&
4607 		    vgen_dring_mode == VIO_RX_DRING_DATA) {
4608 			/*
4609 			 * We are capable of handling RxDringData AND the peer
4610 			 * is also capable of it; we enable RxDringData mode on
4611 			 * this channel.
4612 			 */
4613 			dring_mode = VIO_RX_DRING_DATA;
4614 		} else if ((msg->options & VIO_TX_DRING) != 0) {
4615 			/*
4616 			 * If the peer is capable of TxDring mode, we
4617 			 * negotiate TxDring mode on this channel.
4618 			 */
4619 			dring_mode = VIO_TX_DRING;
4620 		} else {
4621 			/*
4622 			 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA
4623 			 * modes. We don't support VIO_RX_DRING mode.
4624 			 */
4625 			return (VGEN_FAILURE);
4626 		}
4627 
4628 		/*
4629 		 * If we have received an ack for the attr info that we sent,
4630 		 * then check if the dring mode matches what the peer had ack'd
4631 		 * (saved in local hparams). If they don't match, we fail the
4632 		 * handshake.
4633 		 */
4634 		if (ldcp->hstate & ATTR_ACK_RCVD) {
4635 			if (msg->options != lp->dring_mode) {
4636 				/* send NACK */
4637 				return (VGEN_FAILURE);
4638 			}
4639 		} else {
4640 			/*
4641 			 * Save the negotiated dring mode in our attr
4642 			 * parameters, so it gets sent in the attr info from us
4643 			 * to the peer.
4644 			 */
4645 			lp->dring_mode = dring_mode;
4646 		}
4647 
4648 		/* save the negotiated dring mode in the msg to be replied */
4649 		msg->options = dring_mode;
4650 	}
4651 
4652 	/*
4653 	 * Process MTU attribute.
4654 	 */
4655 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4656 		/*
4657 		 * Versions >= 1.4:
4658 		 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu
4659 		 * is negotiated down to the minimum of our mtu and peer's mtu.
4660 		 */
4661 		if (msg->mtu < ETHERMAX) {
4662 			return (VGEN_FAILURE);
4663 		}
4664 
4665 		mtu = MIN(msg->mtu, vgenp->max_frame_size);
4666 
4667 		/*
4668 		 * If we have received an ack for the attr info
4669 		 * that we sent, then check if the mtu computed
4670 		 * above matches the mtu that the peer had ack'd
4671 		 * (saved in local hparams). If they don't
4672 		 * match, we fail the handshake.
4673 		 */
4674 		if (ldcp->hstate & ATTR_ACK_RCVD) {
4675 			if (mtu != lp->mtu) {
4676 				/* send NACK */
4677 				return (VGEN_FAILURE);
4678 			}
4679 		} else {
4680 			/*
4681 			 * Save the mtu computed above in our
4682 			 * attr parameters, so it gets sent in
4683 			 * the attr info from us to the peer.
4684 			 */
4685 			lp->mtu = mtu;
4686 		}
4687 
4688 		/* save the MIN mtu in the msg to be replied */
4689 		msg->mtu = mtu;
4690 
4691 	} else {
4692 		/* versions < 1.4, mtu must match */
4693 		if (msg->mtu != lp->mtu) {
4694 			return (VGEN_FAILURE);
4695 		}
4696 	}
4697 
4698 	return (VGEN_SUCCESS);
4699 }
4700 
4701 static int
4702 vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4703 {
4704 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4705 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4706 
4707 	/*
4708 	 * Process dring mode attribute.
4709 	 */
4710 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4711 		/*
4712 		 * Versions >= 1.6:
4713 		 * The ack msg sent by the peer contains the negotiated dring
4714 		 * mode between our capability (that we had sent in our attr
4715 		 * info) and the peer's capability.
4716 		 */
4717 		if (ldcp->hstate & ATTR_ACK_SENT) {
4718 			/*
4719 			 * If we have sent an ack for the attr info msg from
4720 			 * the peer, check if the dring mode that was
4721 			 * negotiated then (saved in local hparams) matches the
4722 			 * mode that the peer has ack'd. If they don't match,
4723 			 * we fail the handshake.
4724 			 */
4725 			if (lp->dring_mode != msg->options) {
4726 				return (VGEN_FAILURE);
4727 			}
4728 		} else {
4729 			if ((msg->options & lp->dring_mode) == 0) {
4730 				/*
4731 				 * Peer ack'd with a mode that we don't
4732 				 * support; we fail the handshake.
4733 				 */
4734 				return (VGEN_FAILURE);
4735 			}
4736 			if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA))
4737 			    == (VIO_TX_DRING|VIO_RX_DRING_DATA)) {
4738 				/*
4739 				 * Peer must ack with only one negotiated mode.
4740 				 * Otherwise fail handshake.
4741 				 */
4742 				return (VGEN_FAILURE);
4743 			}
4744 
4745 			/*
4746 			 * Save the negotiated mode, so we can validate it when
4747 			 * we receive attr info from the peer.
4748 			 */
4749 			lp->dring_mode = msg->options;
4750 		}
4751 	}
4752 
4753 	/*
4754 	 * Process Physical Link Update attribute.
4755 	 */
4756 	if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
4757 	    ldcp->portp == vgenp->vsw_portp) {
4758 		/*
4759 		 * Versions >= 1.5:
4760 		 * If the vnet device has been configured to get
4761 		 * physical link state updates, check the corresponding
4762 		 * bits in the ack msg, if the peer is vswitch.
4763 		 */
4764 		if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4765 		    PHYSLINK_UPDATE_STATE) &&
4766 		    ((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4767 		    PHYSLINK_UPDATE_STATE_ACK)) {
4768 			vgenp->pls_negotiated = B_TRUE;
4769 		} else {
4770 			vgenp->pls_negotiated = B_FALSE;
4771 		}
4772 	}
4773 
4774 	/*
4775 	 * Process MTU attribute.
4776 	 */
4777 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4778 		/*
4779 		 * Versions >= 1.4:
4780 		 * The ack msg sent by the peer contains the minimum of
4781 		 * our mtu (that we had sent in our attr info) and the
4782 		 * peer's mtu.
4783 		 *
4784 		 * If we have sent an ack for the attr info msg from
4785 		 * the peer, check if the mtu that was computed then
4786 		 * (saved in local hparams) matches the mtu that the
4787 		 * peer has ack'd. If they don't match, we fail the
4788 		 * handshake.
4789 		 */
4790 		if (ldcp->hstate & ATTR_ACK_SENT) {
4791 			if (lp->mtu != msg->mtu) {
4792 				return (VGEN_FAILURE);
4793 			}
4794 		} else {
4795 			/*
4796 			 * If the mtu ack'd by the peer is > our mtu
4797 			 * fail handshake. Otherwise, save the mtu, so
4798 			 * we can validate it when we receive attr info
4799 			 * from our peer.
4800 			 */
4801 			if (msg->mtu > lp->mtu) {
4802 				return (VGEN_FAILURE);
4803 			}
4804 			if (msg->mtu <= lp->mtu) {
4805 				lp->mtu = msg->mtu;
4806 			}
4807 		}
4808 	}
4809 
4810 	return (VGEN_SUCCESS);
4811 }
4812 
4813 
4814 /*
4815  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4816  * to an attr info msg that we sent.
4817  */
4818 static int
4819 vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4820 {
4821 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4822 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
4823 	int		rv = 0;
4824 
4825 	DBG1(vgenp, ldcp, "enter\n");
4826 	if (ldcp->hphase != VH_PHASE2) {
4827 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4828 		" Invalid Phase(%u)\n",
4829 		    tagp->vio_subtype, ldcp->hphase);
4830 		return (VGEN_FAILURE);
4831 	}
4832 	switch (tagp->vio_subtype) {
4833 	case VIO_SUBTYPE_INFO:
4834 
4835 		rv = vgen_handle_attr_info(ldcp, msg);
4836 		if (rv == VGEN_SUCCESS) {
4837 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4838 		} else {
4839 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4840 		}
4841 		tagp->vio_sid = ldcp->local_sid;
4842 
4843 		/* send reply msg back to peer */
4844 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4845 		    B_FALSE);
4846 		if (rv != VGEN_SUCCESS) {
4847 			return (rv);
4848 		}
4849 
4850 		if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
4851 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT");
4852 			break;
4853 		}
4854 
4855 		ldcp->hstate |= ATTR_ACK_SENT;
4856 		DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4857 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4858 			rv = vgen_handshake(vh_nextphase(ldcp));
4859 			if (rv != 0) {
4860 				return (rv);
4861 			}
4862 		}
4863 
4864 		break;
4865 
4866 	case VIO_SUBTYPE_ACK:
4867 
4868 		rv = vgen_handle_attr_ack(ldcp, msg);
4869 		if (rv == VGEN_FAILURE) {
4870 			break;
4871 		}
4872 
4873 		ldcp->hstate |= ATTR_ACK_RCVD;
4874 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4875 
4876 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4877 			rv = vgen_handshake(vh_nextphase(ldcp));
4878 			if (rv != 0) {
4879 				return (rv);
4880 			}
4881 		}
4882 		break;
4883 
4884 	case VIO_SUBTYPE_NACK:
4885 
4886 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4887 		return (VGEN_FAILURE);
4888 	}
4889 	DBG1(vgenp, ldcp, "exit\n");
4890 	return (VGEN_SUCCESS);
4891 }
4892 
4893 static int
4894 vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4895 {
4896 	int		rv = 0;
4897 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4898 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4899 
4900 	DBG2(vgenp, ldcp, "DRING_INFO_RCVD");
4901 	ldcp->hstate |= DRING_INFO_RCVD;
4902 
4903 	if (VGEN_VER_GTEQ(ldcp, 1, 6) &&
4904 	    (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) {
4905 		/*
4906 		 * The earlier version of Solaris vnet driver doesn't set the
4907 		 * option (VIO_TX_DRING in its case) correctly in its dring reg
4908 		 * message. We workaround that here by doing the check only
4909 		 * for versions >= v1.6.
4910 		 */
4911 		DWARN(vgenp, ldcp,
4912 		    "Rcvd dring reg option (%d), negotiated mode (%d)\n",
4913 		    ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode);
4914 		return (VGEN_FAILURE);
4915 	}
4916 
4917 	/*
4918 	 * Map dring exported by the peer.
4919 	 */
4920 	rv = vgen_map_dring(ldcp, (void *)tagp);
4921 	if (rv != VGEN_SUCCESS) {
4922 		return (rv);
4923 	}
4924 
4925 	/*
4926 	 * Map data buffers exported by the peer if we are in RxDringData mode.
4927 	 */
4928 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
4929 		rv = vgen_map_data(ldcp, (void *)tagp);
4930 		if (rv != VGEN_SUCCESS) {
4931 			vgen_unmap_dring(ldcp);
4932 			return (rv);
4933 		}
4934 	}
4935 
4936 	if (ldcp->peer_hparams.dring_ready == B_FALSE) {
4937 		ldcp->peer_hparams.dring_ready = B_TRUE;
4938 	}
4939 
4940 	return (VGEN_SUCCESS);
4941 }
4942 
4943 static int
4944 vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4945 {
4946 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4947 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4948 
4949 	DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
4950 	ldcp->hstate |= DRING_ACK_RCVD;
4951 
4952 	if (lp->dring_ready) {
4953 		return (VGEN_SUCCESS);
4954 	}
4955 
4956 	/* save dring_ident acked by peer */
4957 	lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident;
4958 
4959 	/* local dring is now ready */
4960 	lp->dring_ready = B_TRUE;
4961 
4962 	return (VGEN_SUCCESS);
4963 }
4964 
4965 /*
4966  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4967  * the peer to a dring register msg that we sent.
4968  */
4969 static int
4970 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4971 {
4972 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4973 	int		rv = 0;
4974 	int		msgsize;
4975 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4976 
4977 	DBG1(vgenp, ldcp, "enter\n");
4978 	if (ldcp->hphase < VH_PHASE2) {
4979 		/* dring_info can be rcvd in any of the phases after Phase1 */
4980 		DWARN(vgenp, ldcp,
4981 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4982 		    tagp->vio_subtype, ldcp->hphase);
4983 		return (VGEN_FAILURE);
4984 	}
4985 
4986 	switch (tagp->vio_subtype) {
4987 	case VIO_SUBTYPE_INFO:
4988 
4989 		rv = vgen_handle_dring_reg_info(ldcp, tagp);
4990 		if (rv == VGEN_SUCCESS) {
4991 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4992 		} else {
4993 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4994 		}
4995 
4996 		tagp->vio_sid = ldcp->local_sid;
4997 
4998 		if (lp->dring_mode == VIO_RX_DRING_DATA) {
4999 			msgsize =
5000 			    VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies);
5001 		} else {
5002 			msgsize = sizeof (vio_dring_reg_msg_t);
5003 		}
5004 
5005 		/* send reply msg back to peer */
5006 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize,
5007 		    B_FALSE);
5008 		if (rv != VGEN_SUCCESS) {
5009 			return (rv);
5010 		}
5011 
5012 		if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
5013 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5014 			return (VGEN_FAILURE);
5015 		}
5016 
5017 		ldcp->hstate |= DRING_ACK_SENT;
5018 		DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5019 
5020 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5021 			rv = vgen_handshake(vh_nextphase(ldcp));
5022 			if (rv != 0) {
5023 				return (rv);
5024 			}
5025 		}
5026 		break;
5027 
5028 	case VIO_SUBTYPE_ACK:
5029 
5030 		rv = vgen_handle_dring_reg_ack(ldcp, tagp);
5031 		if (rv == VGEN_FAILURE) {
5032 			return (rv);
5033 		}
5034 
5035 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5036 			rv = vgen_handshake(vh_nextphase(ldcp));
5037 			if (rv != 0) {
5038 				return (rv);
5039 			}
5040 		}
5041 
5042 		break;
5043 
5044 	case VIO_SUBTYPE_NACK:
5045 
5046 		DWARN(vgenp, ldcp, "DRING_NACK_RCVD");
5047 		return (VGEN_FAILURE);
5048 	}
5049 	DBG1(vgenp, ldcp, "exit\n");
5050 	return (VGEN_SUCCESS);
5051 }
5052 
5053 /*
5054  * Handle a rdx info msg from the peer or an ACK/NACK
5055  * from the peer to a rdx info msg that we sent.
5056  */
5057 static int
5058 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5059 {
5060 	int	rv = 0;
5061 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5062 
5063 	DBG1(vgenp, ldcp, "enter\n");
5064 	if (ldcp->hphase != VH_PHASE4) {
5065 		DWARN(vgenp, ldcp,
5066 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5067 		    tagp->vio_subtype, ldcp->hphase);
5068 		return (VGEN_FAILURE);
5069 	}
5070 	switch (tagp->vio_subtype) {
5071 	case VIO_SUBTYPE_INFO:
5072 
5073 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5074 		ldcp->hstate |= RDX_INFO_RCVD;
5075 
5076 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5077 		tagp->vio_sid = ldcp->local_sid;
5078 		/* send reply msg back to peer */
5079 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5080 		    B_FALSE);
5081 		if (rv != VGEN_SUCCESS) {
5082 			return (rv);
5083 		}
5084 
5085 		ldcp->hstate |= RDX_ACK_SENT;
5086 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5087 
5088 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5089 			rv = vgen_handshake(vh_nextphase(ldcp));
5090 			if (rv != 0) {
5091 				return (rv);
5092 			}
5093 		}
5094 
5095 		break;
5096 
5097 	case VIO_SUBTYPE_ACK:
5098 
5099 		ldcp->hstate |= RDX_ACK_RCVD;
5100 
5101 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5102 
5103 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5104 			rv = vgen_handshake(vh_nextphase(ldcp));
5105 			if (rv != 0) {
5106 				return (rv);
5107 			}
5108 		}
5109 		break;
5110 
5111 	case VIO_SUBTYPE_NACK:
5112 
5113 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5114 		return (VGEN_FAILURE);
5115 	}
5116 	DBG1(vgenp, ldcp, "exit\n");
5117 	return (VGEN_SUCCESS);
5118 }
5119 
5120 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5121 static int
5122 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5123 {
5124 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5125 	vnet_mcast_msg_t	*msgp = (vnet_mcast_msg_t *)tagp;
5126 	struct ether_addr	*addrp;
5127 	int			count;
5128 	int			i;
5129 
5130 	DBG1(vgenp, ldcp, "enter\n");
5131 	switch (tagp->vio_subtype) {
5132 
5133 	case VIO_SUBTYPE_INFO:
5134 
5135 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5136 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5137 		break;
5138 
5139 	case VIO_SUBTYPE_ACK:
5140 
5141 		/* success adding/removing multicast addr */
5142 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5143 		break;
5144 
5145 	case VIO_SUBTYPE_NACK:
5146 
5147 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5148 		if (!(msgp->set)) {
5149 			/* multicast remove request failed */
5150 			break;
5151 		}
5152 
5153 		/* multicast add request failed */
5154 		for (count = 0; count < msgp->count; count++) {
5155 			addrp = &(msgp->mca[count]);
5156 
5157 			/* delete address from the table */
5158 			for (i = 0; i < vgenp->mccount; i++) {
5159 				if (ether_cmp(addrp,
5160 				    &(vgenp->mctab[i])) == 0) {
5161 					if (vgenp->mccount > 1) {
5162 						int t = vgenp->mccount - 1;
5163 						vgenp->mctab[i] =
5164 						    vgenp->mctab[t];
5165 					}
5166 					vgenp->mccount--;
5167 					break;
5168 				}
5169 			}
5170 		}
5171 		break;
5172 
5173 	}
5174 	DBG1(vgenp, ldcp, "exit\n");
5175 
5176 	return (VGEN_SUCCESS);
5177 }
5178 
5179 /*
5180  * Physical link information message from the peer. Only vswitch should send
5181  * us this message; if the vnet device has been configured to get physical link
5182  * state updates. Note that we must have already negotiated this with the
5183  * vswitch during attribute exchange phase of handshake.
5184  */
5185 static int
5186 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5187 {
5188 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5189 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5190 	link_state_t		link_state;
5191 	int			rv;
5192 
5193 	if (ldcp->portp != vgenp->vsw_portp) {
5194 		/*
5195 		 * drop the message and don't process; as we should
5196 		 * receive physlink_info message from only vswitch.
5197 		 */
5198 		return (VGEN_SUCCESS);
5199 	}
5200 
5201 	if (vgenp->pls_negotiated == B_FALSE) {
5202 		/*
5203 		 * drop the message and don't process; as we should receive
5204 		 * physlink_info message only if physlink update is enabled for
5205 		 * the device and negotiated with vswitch.
5206 		 */
5207 		return (VGEN_SUCCESS);
5208 	}
5209 
5210 	switch (tagp->vio_subtype) {
5211 
5212 	case VIO_SUBTYPE_INFO:
5213 
5214 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5215 		    VNET_PHYSLINK_STATE_UP) {
5216 			link_state = LINK_STATE_UP;
5217 		} else {
5218 			link_state = LINK_STATE_DOWN;
5219 		}
5220 
5221 		if (vgenp->phys_link_state != link_state) {
5222 			vgenp->phys_link_state = link_state;
5223 			mutex_exit(&ldcp->cblock);
5224 
5225 			/* Now update the stack */
5226 			vgen_link_update(vgenp, link_state);
5227 
5228 			mutex_enter(&ldcp->cblock);
5229 		}
5230 
5231 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5232 		tagp->vio_sid = ldcp->local_sid;
5233 
5234 		/* send reply msg back to peer */
5235 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5236 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5237 		if (rv != VGEN_SUCCESS) {
5238 			return (rv);
5239 		}
5240 		break;
5241 
5242 	case VIO_SUBTYPE_ACK:
5243 
5244 		/* vnet shouldn't recv physlink acks */
5245 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5246 		break;
5247 
5248 	case VIO_SUBTYPE_NACK:
5249 
5250 		/* vnet shouldn't recv physlink nacks */
5251 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5252 		break;
5253 
5254 	}
5255 	DBG1(vgenp, ldcp, "exit\n");
5256 
5257 	return (VGEN_SUCCESS);
5258 }
5259 
5260 /* handler for control messages received from the peer ldc end-point */
5261 static int
5262 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5263 {
5264 	int	rv = 0;
5265 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5266 
5267 	DBG1(vgenp, ldcp, "enter\n");
5268 	switch (tagp->vio_subtype_env) {
5269 
5270 	case VIO_VER_INFO:
5271 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5272 		break;
5273 
5274 	case VIO_ATTR_INFO:
5275 		rv = vgen_handle_attr_msg(ldcp, tagp);
5276 		break;
5277 
5278 	case VIO_DRING_REG:
5279 		rv = vgen_handle_dring_reg(ldcp, tagp);
5280 		break;
5281 
5282 	case VIO_RDX:
5283 		rv = vgen_handle_rdx_info(ldcp, tagp);
5284 		break;
5285 
5286 	case VNET_MCAST_INFO:
5287 		rv = vgen_handle_mcast_info(ldcp, tagp);
5288 		break;
5289 
5290 	case VIO_DDS_INFO:
5291 		/*
5292 		 * If we are in the process of resetting the vswitch channel,
5293 		 * drop the dds message. A new handshake will be initiated
5294 		 * when the channel comes back up after the reset and dds
5295 		 * negotiation can then continue.
5296 		 */
5297 		if (ldcp->reset_in_progress == 1) {
5298 			break;
5299 		}
5300 		rv = vgen_dds_rx(ldcp, tagp);
5301 		break;
5302 
5303 	case VNET_PHYSLINK_INFO:
5304 		rv = vgen_handle_physlink_info(ldcp, tagp);
5305 		break;
5306 	}
5307 
5308 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5309 	return (rv);
5310 }
5311 
5312 /* handler for error messages received from the peer ldc end-point */
5313 static void
5314 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5315 {
5316 	_NOTE(ARGUNUSED(ldcp, tagp))
5317 }
5318 
5319 /*
5320  * This function handles raw pkt data messages received over the channel.
5321  * Currently, only priority-eth-type frames are received through this mechanism.
5322  * In this case, the frame(data) is present within the message itself which
5323  * is copied into an mblk before sending it up the stack.
5324  */
5325 void
5326 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5327 {
5328 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5329 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5330 	uint32_t		size;
5331 	mblk_t			*mp;
5332 	vio_mblk_t		*vmp;
5333 	vio_net_rx_cb_t		vrx_cb = NULL;
5334 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5335 	vgen_stats_t		*statsp = &ldcp->stats;
5336 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5337 	uint_t			dring_mode = lp->dring_mode;
5338 
5339 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5340 
5341 	mutex_exit(&ldcp->cblock);
5342 
5343 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5344 	if (size < ETHERMIN || size > lp->mtu) {
5345 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5346 		mutex_enter(&ldcp->cblock);
5347 		return;
5348 	}
5349 
5350 	vmp = vio_multipool_allocb(&ldcp->vmp, size);
5351 	if (vmp == NULL) {
5352 		mp = allocb(size, BPRI_MED);
5353 		if (mp == NULL) {
5354 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5355 			DWARN(vgenp, ldcp, "allocb failure, "
5356 			    "unable to process priority frame\n");
5357 			mutex_enter(&ldcp->cblock);
5358 			return;
5359 		}
5360 	} else {
5361 		mp = vmp->mp;
5362 	}
5363 
5364 	/* copy the frame from the payload of raw data msg into the mblk */
5365 	bcopy(pkt->data, mp->b_rptr, size);
5366 	mp->b_wptr = mp->b_rptr + size;
5367 
5368 	if (vmp != NULL) {
5369 		vmp->state = VIO_MBLK_HAS_DATA;
5370 	}
5371 
5372 	/* update stats */
5373 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5374 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5375 
5376 	/*
5377 	 * If polling is currently enabled, add the packet to the priority
5378 	 * packets list and return. It will be picked up by the polling thread.
5379 	 */
5380 	if (dring_mode == VIO_RX_DRING_DATA) {
5381 		mutex_enter(&ldcp->rxlock);
5382 	} else {
5383 		mutex_enter(&ldcp->pollq_lock);
5384 	}
5385 
5386 	if (ldcp->polling_on == B_TRUE) {
5387 		if (ldcp->rx_pri_tail != NULL) {
5388 			ldcp->rx_pri_tail->b_next = mp;
5389 		} else {
5390 			ldcp->rx_pri_head = ldcp->rx_pri_tail = mp;
5391 		}
5392 	} else {
5393 		vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5394 	}
5395 
5396 	if (dring_mode == VIO_RX_DRING_DATA) {
5397 		mutex_exit(&ldcp->rxlock);
5398 	} else {
5399 		mutex_exit(&ldcp->pollq_lock);
5400 	}
5401 
5402 	if (vrx_cb != NULL) {
5403 		vrx_cb(ldcp->portp->vhp, mp);
5404 	}
5405 
5406 	mutex_enter(&ldcp->cblock);
5407 }
5408 
5409 /*
5410  * dummy pkt data handler function for vnet protocol version 1.0
5411  */
5412 static void
5413 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5414 {
5415 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5416 }
5417 
5418 /* handler for data messages received from the peer ldc end-point */
5419 static int
5420 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5421 {
5422 	int		rv = 0;
5423 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5424 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5425 
5426 	DBG1(vgenp, ldcp, "enter\n");
5427 
5428 	if (ldcp->hphase != VH_DONE) {
5429 		return (0);
5430 	}
5431 
5432 	/*
5433 	 * We check the data msg seqnum. This is needed only in TxDring mode.
5434 	 */
5435 	if (lp->dring_mode == VIO_TX_DRING &&
5436 	    tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5437 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5438 		if (rv != 0) {
5439 			return (rv);
5440 		}
5441 	}
5442 
5443 	switch (tagp->vio_subtype_env) {
5444 	case VIO_DRING_DATA:
5445 		rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp);
5446 		break;
5447 
5448 	case VIO_PKT_DATA:
5449 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5450 		break;
5451 	default:
5452 		break;
5453 	}
5454 
5455 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5456 	return (rv);
5457 }
5458 
5459 
5460 static int
5461 vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
5462 {
5463 	int	rv;
5464 
5465 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5466 		ASSERT(MUTEX_HELD(&ldcp->cblock));
5467 	}
5468 
5469 	/* Set the flag to indicate reset is in progress */
5470 	if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
5471 		/* another thread is already in the process of resetting */
5472 		return (EBUSY);
5473 	}
5474 
5475 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5476 		mutex_exit(&ldcp->cblock);
5477 	}
5478 
5479 	rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET);
5480 
5481 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5482 		mutex_enter(&ldcp->cblock);
5483 	}
5484 
5485 	return (rv);
5486 }
5487 
5488 static void
5489 vgen_ldc_up(vgen_ldc_t *ldcp)
5490 {
5491 	int		rv;
5492 	uint32_t	retries = 0;
5493 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5494 
5495 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5496 
5497 	/*
5498 	 * If the channel has been reset max # of times, without successfully
5499 	 * completing handshake, stop and do not bring the channel up.
5500 	 */
5501 	if (ldcp->ldc_reset_count == vgen_ldc_max_resets) {
5502 		cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted"
5503 		    " handshake attempts (%d) on channel %ld",
5504 		    vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id);
5505 		return;
5506 	}
5507 	ldcp->ldc_reset_count++;
5508 
5509 	do {
5510 		rv = ldc_up(ldcp->ldc_handle);
5511 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
5512 			drv_usecwait(VGEN_LDC_UP_DELAY);
5513 		}
5514 		if (retries++ >= vgen_ldcup_retries)
5515 			break;
5516 	} while (rv == EWOULDBLOCK);
5517 
5518 	if (rv != 0) {
5519 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
5520 	}
5521 }
5522 
5523 int
5524 vgen_enable_intr(void *arg)
5525 {
5526 	uint32_t		end_ix;
5527 	vio_dring_msg_t		msg;
5528 	vgen_port_t		*portp = (vgen_port_t *)arg;
5529 	vgen_ldc_t		*ldcp = portp->ldcp;
5530 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5531 
5532 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5533 		mutex_enter(&ldcp->rxlock);
5534 
5535 		ldcp->polling_on = B_FALSE;
5536 		/*
5537 		 * We send a stopped message to peer (sender) as we are turning
5538 		 * off polled mode. This effectively restarts data interrupts
5539 		 * by allowing the peer to send further dring data msgs to us.
5540 		 */
5541 		end_ix = ldcp->next_rxi;
5542 		DECR_RXI(end_ix, ldcp);
5543 		msg.dring_ident = ldcp->peer_hparams.dring_ident;
5544 		(void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
5545 		    VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
5546 
5547 		mutex_exit(&ldcp->rxlock);
5548 	} else {
5549 		mutex_enter(&ldcp->pollq_lock);
5550 		ldcp->polling_on = B_FALSE;
5551 		mutex_exit(&ldcp->pollq_lock);
5552 	}
5553 
5554 	return (0);
5555 }
5556 
5557 int
5558 vgen_disable_intr(void *arg)
5559 {
5560 	vgen_port_t		*portp = (vgen_port_t *)arg;
5561 	vgen_ldc_t		*ldcp = portp->ldcp;
5562 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5563 
5564 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5565 		mutex_enter(&ldcp->rxlock);
5566 		ldcp->polling_on = B_TRUE;
5567 		mutex_exit(&ldcp->rxlock);
5568 	} else {
5569 		mutex_enter(&ldcp->pollq_lock);
5570 		ldcp->polling_on = B_TRUE;
5571 		mutex_exit(&ldcp->pollq_lock);
5572 	}
5573 
5574 	return (0);
5575 }
5576 
5577 mblk_t *
5578 vgen_rx_poll(void *arg, int bytes_to_pickup)
5579 {
5580 	vgen_port_t		*portp = (vgen_port_t *)arg;
5581 	vgen_ldc_t		*ldcp = portp->ldcp;
5582 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5583 	mblk_t			*mp = NULL;
5584 
5585 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5586 		mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup);
5587 	} else {
5588 		mp = vgen_poll_rcv(ldcp, bytes_to_pickup);
5589 	}
5590 
5591 	return (mp);
5592 }
5593 
5594 /* transmit watchdog timeout handler */
5595 static void
5596 vgen_tx_watchdog(void *arg)
5597 {
5598 	vgen_ldc_t	*ldcp;
5599 	vgen_t		*vgenp;
5600 	int		rv;
5601 	boolean_t	tx_blocked;
5602 	clock_t		tx_blocked_lbolt;
5603 
5604 	ldcp = (vgen_ldc_t *)arg;
5605 	vgenp = LDC_TO_VGEN(ldcp);
5606 
5607 	tx_blocked = ldcp->tx_blocked;
5608 	tx_blocked_lbolt = ldcp->tx_blocked_lbolt;
5609 
5610 	if (vgen_txwd_timeout &&
5611 	    (tx_blocked == B_TRUE) &&
5612 	    ((ddi_get_lbolt() - tx_blocked_lbolt) >
5613 	    drv_usectohz(vgen_txwd_timeout * 1000))) {
5614 		/*
5615 		 * Something is wrong; the peer is not picking up the packets
5616 		 * in the transmit dring. We now go ahead and reset the channel
5617 		 * to break out of this condition.
5618 		 */
5619 		DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), "
5620 		    "tx_blocked_lbolt(%lx)\n",
5621 		    ddi_get_lbolt(), tx_blocked_lbolt);
5622 
5623 #ifdef DEBUG
5624 		if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
5625 			/* tx timeout triggered for debugging */
5626 			vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT);
5627 		}
5628 #endif
5629 
5630 		/*
5631 		 * Clear tid before invoking vgen_ldc_reset(). Otherwise,
5632 		 * it will result in a deadlock when vgen_process_reset() tries
5633 		 * to untimeout() on seeing a non-zero tid, but it is being
5634 		 * invoked by the timer itself in this case.
5635 		 */
5636 		mutex_enter(&ldcp->cblock);
5637 		if (ldcp->wd_tid == 0) {
5638 			/* Cancelled by vgen_process_reset() */
5639 			mutex_exit(&ldcp->cblock);
5640 			return;
5641 		}
5642 		ldcp->wd_tid = 0;
5643 		mutex_exit(&ldcp->cblock);
5644 
5645 		/*
5646 		 * Now reset the channel.
5647 		 */
5648 		rv = vgen_ldc_reset(ldcp, VGEN_OTHER);
5649 		if (rv == 0) {
5650 			/*
5651 			 * We have successfully reset the channel. If we are
5652 			 * in tx flow controlled state, clear it now and enable
5653 			 * transmit in the upper layer.
5654 			 */
5655 			if (ldcp->tx_blocked) {
5656 				vio_net_tx_update_t vtx_update =
5657 				    ldcp->portp->vcb.vio_net_tx_update;
5658 
5659 				ldcp->tx_blocked = B_FALSE;
5660 				vtx_update(ldcp->portp->vhp);
5661 			}
5662 		}
5663 
5664 		/*
5665 		 * Channel has been reset by us or some other thread is already
5666 		 * in the process of resetting. In either case, we return
5667 		 * without restarting the timer. When handshake completes and
5668 		 * the channel is ready for data transmit/receive we start a
5669 		 * new watchdog timer.
5670 		 */
5671 		return;
5672 	}
5673 
5674 restart_timer:
5675 	/* Restart the timer */
5676 	mutex_enter(&ldcp->cblock);
5677 	if (ldcp->wd_tid == 0) {
5678 		/* Cancelled by vgen_process_reset() */
5679 		mutex_exit(&ldcp->cblock);
5680 		return;
5681 	}
5682 	ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
5683 	    drv_usectohz(vgen_txwd_interval * 1000));
5684 	mutex_exit(&ldcp->cblock);
5685 }
5686 
5687 /* Handshake watchdog timeout handler */
5688 static void
5689 vgen_hwatchdog(void *arg)
5690 {
5691 	vgen_ldc_t	*ldcp = (vgen_ldc_t *)arg;
5692 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5693 
5694 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
5695 	    ldcp->hphase, ldcp->hstate);
5696 
5697 	mutex_enter(&ldcp->cblock);
5698 	if (ldcp->htid == 0) {
5699 		/* Cancelled by vgen_process_reset() */
5700 		mutex_exit(&ldcp->cblock);
5701 		return;
5702 	}
5703 	ldcp->htid = 0;
5704 	mutex_exit(&ldcp->cblock);
5705 
5706 	/*
5707 	 * Something is wrong; handshake with the peer seems to be hung. We now
5708 	 * go ahead and reset the channel to break out of this condition.
5709 	 */
5710 	(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
5711 }
5712 
5713 /* Check if the session id in the received message is valid */
5714 static int
5715 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5716 {
5717 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5718 
5719 	if (tagp->vio_sid != ldcp->peer_sid) {
5720 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5721 		    ldcp->peer_sid, tagp->vio_sid);
5722 		return (VGEN_FAILURE);
5723 	}
5724 	else
5725 		return (VGEN_SUCCESS);
5726 }
5727 
5728 /*
5729  * Initialize the common part of dring registration
5730  * message; used in both TxDring and RxDringData modes.
5731  */
5732 static void
5733 vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
5734 	uint8_t option)
5735 {
5736 	vio_msg_tag_t		*tagp;
5737 
5738 	tagp = &msg->tag;
5739 	tagp->vio_msgtype = VIO_TYPE_CTRL;
5740 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
5741 	tagp->vio_subtype_env = VIO_DRING_REG;
5742 	tagp->vio_sid = ldcp->local_sid;
5743 
5744 	/* get dring info msg payload from ldcp->local */
5745 	bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]),
5746 	    sizeof (ldc_mem_cookie_t));
5747 	msg->ncookies = ldcp->local_hparams.dring_ncookies;
5748 	msg->num_descriptors = ldcp->local_hparams.num_desc;
5749 	msg->descriptor_size = ldcp->local_hparams.desc_size;
5750 
5751 	msg->options = option;
5752 
5753 	/*
5754 	 * dring_ident is set to 0. After mapping the dring, peer sets this
5755 	 * value and sends it in the ack, which is saved in
5756 	 * vgen_handle_dring_reg().
5757 	 */
5758 	msg->dring_ident = 0;
5759 }
5760 
5761 #if DEBUG
5762 
5763 /*
5764  * Print debug messages - set to 0xf to enable all msgs
5765  */
5766 void
5767 vgen_debug_printf(const char *fname, vgen_t *vgenp,
5768     vgen_ldc_t *ldcp, const char *fmt, ...)
5769 {
5770 	char	buf[256];
5771 	char	*bufp = buf;
5772 	va_list	ap;
5773 
5774 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5775 		(void) sprintf(bufp, "vnet%d:",
5776 		    ((vnet_t *)(vgenp->vnetp))->instance);
5777 		bufp += strlen(bufp);
5778 	}
5779 	if (ldcp != NULL) {
5780 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5781 		bufp += strlen(bufp);
5782 	}
5783 	(void) sprintf(bufp, "%s: ", fname);
5784 	bufp += strlen(bufp);
5785 
5786 	va_start(ap, fmt);
5787 	(void) vsprintf(bufp, fmt, ap);
5788 	va_end(ap);
5789 
5790 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5791 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5792 		cmn_err(CE_CONT, "%s\n", buf);
5793 	}
5794 }
5795 #endif
5796 
5797 #ifdef	VNET_IOC_DEBUG
5798 
5799 static void
5800 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5801 {
5802 	struct iocblk	*iocp;
5803 	vgen_port_t	*portp;
5804 	enum		ioc_reply {
5805 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
5806 			IOC_ACK			/* OK, just send ACK    */
5807 	}		status;
5808 	int		rv;
5809 
5810 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
5811 	iocp->ioc_error = 0;
5812 	portp = (vgen_port_t *)arg;
5813 
5814 	if (portp == NULL) {
5815 		status = IOC_INVAL;
5816 		goto vgen_ioc_exit;
5817 	}
5818 
5819 	mutex_enter(&portp->lock);
5820 
5821 	switch (iocp->ioc_cmd) {
5822 
5823 	case VNET_FORCE_LINK_DOWN:
5824 	case VNET_FORCE_LINK_UP:
5825 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
5826 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
5827 		break;
5828 
5829 	default:
5830 		status = IOC_INVAL;
5831 		break;
5832 
5833 	}
5834 
5835 	mutex_exit(&portp->lock);
5836 
5837 vgen_ioc_exit:
5838 
5839 	switch (status) {
5840 	default:
5841 	case IOC_INVAL:
5842 		/* Error, reply with a NAK and EINVAL error */
5843 		miocnak(q, mp, 0, EINVAL);
5844 		break;
5845 	case IOC_ACK:
5846 		/* OK, reply with an ACK */
5847 		miocack(q, mp, 0, 0);
5848 		break;
5849 	}
5850 }
5851 
5852 static int
5853 vgen_force_link_state(vgen_port_t *portp, int cmd)
5854 {
5855 	ldc_status_t	istatus;
5856 	int		rv;
5857 	vgen_ldc_t	*ldcp = portp->ldcp;
5858 	vgen_t		*vgenp = portp->vgenp;
5859 
5860 	mutex_enter(&ldcp->cblock);
5861 
5862 	switch (cmd) {
5863 
5864 	case VNET_FORCE_LINK_DOWN:
5865 		(void) ldc_down(ldcp->ldc_handle);
5866 		ldcp->link_down_forced = B_TRUE;
5867 		break;
5868 
5869 	case VNET_FORCE_LINK_UP:
5870 		vgen_ldc_up(ldcp);
5871 		ldcp->link_down_forced = B_FALSE;
5872 
5873 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5874 			DWARN(vgenp, ldcp, "ldc_status err\n");
5875 		} else {
5876 			ldcp->ldc_status = istatus;
5877 		}
5878 
5879 		/* if channel is already UP - restart handshake */
5880 		if (ldcp->ldc_status == LDC_UP) {
5881 			vgen_handle_evt_up(ldcp);
5882 		}
5883 		break;
5884 
5885 	}
5886 
5887 	mutex_exit(&ldcp->cblock);
5888 
5889 	return (0);
5890 }
5891 
5892 #else
5893 
5894 static void
5895 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5896 {
5897 	vgen_port_t	*portp;
5898 
5899 	portp = (vgen_port_t *)arg;
5900 
5901 	if (portp == NULL) {
5902 		miocnak(q, mp, 0, EINVAL);
5903 		return;
5904 	}
5905 
5906 	miocnak(q, mp, 0, ENOTSUP);
5907 }
5908 
5909 #endif
5910