xref: /illumos-gate/usr/src/uts/sun4v/io/vnet_gen.c (revision 1de082f7b7fd4b6629e14b0f9b8f94f6c0bda3c2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 void vgen_uninit(void *arg);
77 int vgen_dds_tx(void *arg, void *dmsg);
78 void vgen_mod_init(void);
79 int vgen_mod_cleanup(void);
80 void vgen_mod_fini(void);
81 static int vgen_start(void *arg);
82 static void vgen_stop(void *arg);
83 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
84 static int vgen_multicst(void *arg, boolean_t add,
85 	const uint8_t *mca);
86 static int vgen_promisc(void *arg, boolean_t on);
87 static int vgen_unicst(void *arg, const uint8_t *mca);
88 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
89 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
90 #ifdef	VNET_IOC_DEBUG
91 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
92 #endif
93 
94 /* vgen internal functions */
95 static int vgen_read_mdprops(vgen_t *vgenp);
96 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
97 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
98 	mde_cookie_t node);
99 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
100 	uint32_t *mtu);
101 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
102 	boolean_t *pls);
103 static void vgen_detach_ports(vgen_t *vgenp);
104 static void vgen_port_detach(vgen_port_t *portp);
105 static void vgen_port_list_insert(vgen_port_t *portp);
106 static void vgen_port_list_remove(vgen_port_t *portp);
107 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
108 	int port_num);
109 static int vgen_mdeg_reg(vgen_t *vgenp);
110 static void vgen_mdeg_unreg(vgen_t *vgenp);
111 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
112 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
113 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
114 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
115 	mde_cookie_t mdex);
116 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
117 static int vgen_port_attach(vgen_port_t *portp);
118 static void vgen_port_detach_mdeg(vgen_port_t *portp);
119 static void vgen_port_detach_mdeg(vgen_port_t *portp);
120 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
121 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
122 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
123 static void vgen_port_reset(vgen_port_t *portp);
124 static void vgen_reset_vsw_port(vgen_t *vgenp);
125 static void vgen_ldc_reset(vgen_ldc_t *ldcp);
126 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
127 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
128 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
129 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
130 static void vgen_init_ports(vgen_t *vgenp);
131 static void vgen_port_init(vgen_port_t *portp);
132 static void vgen_uninit_ports(vgen_t *vgenp);
133 static void vgen_port_uninit(vgen_port_t *portp);
134 static void vgen_init_ldcs(vgen_port_t *portp);
135 static void vgen_uninit_ldcs(vgen_port_t *portp);
136 static int vgen_ldc_init(vgen_ldc_t *ldcp);
137 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
138 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
139 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
140 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
141 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
142 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
143 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
144 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
145 static int vgen_ldcsend(void *arg, mblk_t *mp);
146 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
147 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
148 static void vgen_reclaim(vgen_ldc_t *ldcp);
149 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
150 static int vgen_num_txpending(vgen_ldc_t *ldcp);
151 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
152 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
153 static void vgen_ldc_watchdog(void *arg);
154 
155 /* vgen handshake functions */
156 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
157 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
158 	boolean_t caller_holds_lock);
159 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
160 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
161 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
162 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
163 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
164 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
165 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
166 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
167 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
168 static void vgen_handshake(vgen_ldc_t *ldcp);
169 static int vgen_handshake_done(vgen_ldc_t *ldcp);
170 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
171 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
172 	vio_msg_tag_t *tagp);
173 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
179 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
180 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
182 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
184 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
185 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
186 	uint32_t start, int32_t end, uint8_t pstate);
187 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
188 	uint32_t msglen);
189 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
190 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
191 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
192 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
193 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
194 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
195 static void vgen_hwatchdog(void *arg);
196 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
197 static void vgen_print_hparams(vgen_hparams_t *hp);
198 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
199 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
200 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
201 static void vgen_ldc_rcv_worker(void *arg);
202 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
203 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
204 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
205 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
206 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
207 
208 /* VLAN routines */
209 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
210 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
211 	uint16_t *nvidsp, uint16_t *default_idp);
212 static void vgen_vlan_create_hash(vgen_port_t *portp);
213 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
214 static void vgen_vlan_add_ids(vgen_port_t *portp);
215 static void vgen_vlan_remove_ids(vgen_port_t *portp);
216 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
217 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
218 	uint16_t *vidp);
219 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
220 	boolean_t is_tagged, uint16_t vid);
221 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
222 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
223 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
224 
225 /* externs */
226 extern void vnet_dds_rx(void *arg, void *dmsg);
227 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
228 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
229 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
230 
231 /*
232  * The handshake process consists of 5 phases defined below, with VH_PHASE0
233  * being the pre-handshake phase and VH_DONE is the phase to indicate
234  * successful completion of all phases.
235  * Each phase may have one to several handshake states which are required
236  * to complete successfully to move to the next phase.
237  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
238  * more details.
239  */
240 /* handshake phases */
241 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
242 
243 /* handshake states */
244 enum {
245 
246 	VER_INFO_SENT	=	0x1,
247 	VER_ACK_RCVD	=	0x2,
248 	VER_INFO_RCVD	=	0x4,
249 	VER_ACK_SENT	=	0x8,
250 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
251 
252 	ATTR_INFO_SENT	=	0x10,
253 	ATTR_ACK_RCVD	=	0x20,
254 	ATTR_INFO_RCVD	=	0x40,
255 	ATTR_ACK_SENT	=	0x80,
256 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
257 
258 	DRING_INFO_SENT	=	0x100,
259 	DRING_ACK_RCVD	=	0x200,
260 	DRING_INFO_RCVD	=	0x400,
261 	DRING_ACK_SENT	=	0x800,
262 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
263 
264 	RDX_INFO_SENT	=	0x1000,
265 	RDX_ACK_RCVD	=	0x2000,
266 	RDX_INFO_RCVD	=	0x4000,
267 	RDX_ACK_SENT	=	0x8000,
268 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
269 
270 };
271 
272 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
273 
274 #define	LDC_LOCK(ldcp)	\
275 				mutex_enter(&((ldcp)->cblock));\
276 				mutex_enter(&((ldcp)->rxlock));\
277 				mutex_enter(&((ldcp)->wrlock));\
278 				mutex_enter(&((ldcp)->txlock));\
279 				mutex_enter(&((ldcp)->tclock));
280 #define	LDC_UNLOCK(ldcp)	\
281 				mutex_exit(&((ldcp)->tclock));\
282 				mutex_exit(&((ldcp)->txlock));\
283 				mutex_exit(&((ldcp)->wrlock));\
284 				mutex_exit(&((ldcp)->rxlock));\
285 				mutex_exit(&((ldcp)->cblock));
286 
287 #define	VGEN_VER_EQ(ldcp, major, minor)	\
288 	((ldcp)->local_hparams.ver_major == (major) &&	\
289 	    (ldcp)->local_hparams.ver_minor == (minor))
290 
291 #define	VGEN_VER_LT(ldcp, major, minor)	\
292 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
293 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
294 	    (ldcp)->local_hparams.ver_minor < (minor)))
295 
296 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
297 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
298 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
299 	    (ldcp)->local_hparams.ver_minor >= (minor)))
300 
301 static struct ether_addr etherbroadcastaddr = {
302 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
303 };
304 /*
305  * MIB II broadcast/multicast packets
306  */
307 #define	IS_BROADCAST(ehp) \
308 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
309 #define	IS_MULTICAST(ehp) \
310 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
311 
312 /*
313  * Property names
314  */
315 static char macaddr_propname[] = "mac-address";
316 static char rmacaddr_propname[] = "remote-mac-address";
317 static char channel_propname[] = "channel-endpoint";
318 static char reg_propname[] = "reg";
319 static char port_propname[] = "port";
320 static char swport_propname[] = "switch-port";
321 static char id_propname[] = "id";
322 static char vdev_propname[] = "virtual-device";
323 static char vnet_propname[] = "network";
324 static char pri_types_propname[] = "priority-ether-types";
325 static char vgen_pvid_propname[] = "port-vlan-id";
326 static char vgen_vid_propname[] = "vlan-id";
327 static char vgen_dvid_propname[] = "default-vlan-id";
328 static char port_pvid_propname[] = "remote-port-vlan-id";
329 static char port_vid_propname[] = "remote-vlan-id";
330 static char vgen_mtu_propname[] = "mtu";
331 static char vgen_linkprop_propname[] = "linkprop";
332 
333 /*
334  * VIO Protocol Version Info:
335  *
336  * The version specified below represents the version of protocol currently
337  * supported in the driver. It means the driver can negotiate with peers with
338  * versions <= this version. Here is a summary of the feature(s) that are
339  * supported at each version of the protocol:
340  *
341  * 1.0			Basic VIO protocol.
342  * 1.1			vDisk protocol update (no virtual network update).
343  * 1.2			Support for priority frames (priority-ether-types).
344  * 1.3			VLAN and HybridIO support.
345  * 1.4			Jumbo Frame support.
346  * 1.5			Link State Notification support with optional support
347  * 			for Physical Link information.
348  */
349 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 5} };
350 
351 /* Tunables */
352 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
353 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
354 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
355 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
356 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
357 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
358 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
359 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
360 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
361 
362 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
363 
364 static vio_mblk_pool_t	*vgen_rx_poolp = NULL;
365 static krwlock_t	vgen_rw;
366 
367 /*
368  * max # of packets accumulated prior to sending them up. It is best
369  * to keep this at 60% of the number of recieve buffers.
370  */
371 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
372 
373 /*
374  * Internal tunables for receive buffer pools, that is,  the size and number of
375  * mblks for each pool. At least 3 sizes must be specified if these are used.
376  * The sizes must be specified in increasing order. Non-zero value of the first
377  * size will be used as a hint to use these values instead of the algorithm
378  * that determines the sizes based on MTU.
379  */
380 uint32_t vgen_rbufsz1 = 0;
381 uint32_t vgen_rbufsz2 = 0;
382 uint32_t vgen_rbufsz3 = 0;
383 uint32_t vgen_rbufsz4 = 0;
384 
385 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
386 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
387 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
388 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
389 
390 /*
391  * In the absence of "priority-ether-types" property in MD, the following
392  * internal tunable can be set to specify a single priority ethertype.
393  */
394 uint64_t vgen_pri_eth_type = 0;
395 
396 /*
397  * Number of transmit priority buffers that are preallocated per device.
398  * This number is chosen to be a small value to throttle transmission
399  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
400  */
401 uint32_t vgen_pri_tx_nmblks = 64;
402 
403 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
404 
405 #ifdef DEBUG
406 /* flags to simulate error conditions for debugging */
407 int vgen_trigger_txtimeout = 0;
408 int vgen_trigger_rxlost = 0;
409 #endif
410 
411 /*
412  * Matching criteria passed to the MDEG to register interest
413  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
414  * by their 'name' and 'cfg-handle' properties.
415  */
416 static md_prop_match_t vdev_prop_match[] = {
417 	{ MDET_PROP_STR,    "name"   },
418 	{ MDET_PROP_VAL,    "cfg-handle" },
419 	{ MDET_LIST_END,    NULL    }
420 };
421 
422 static mdeg_node_match_t vdev_match = { "virtual-device",
423 						vdev_prop_match };
424 
425 /* MD update matching structure */
426 static md_prop_match_t	vport_prop_match[] = {
427 	{ MDET_PROP_VAL,	"id" },
428 	{ MDET_LIST_END,	NULL }
429 };
430 
431 static mdeg_node_match_t vport_match = { "virtual-device-port",
432 					vport_prop_match };
433 
434 /* template for matching a particular vnet instance */
435 static mdeg_prop_spec_t vgen_prop_template[] = {
436 	{ MDET_PROP_STR,	"name",		"network" },
437 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
438 	{ MDET_LIST_END,	NULL,		NULL }
439 };
440 
441 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
442 
443 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
444 
445 #ifdef	VNET_IOC_DEBUG
446 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
447 #else
448 #define	VGEN_M_CALLBACK_FLAGS	(0)
449 #endif
450 
451 static mac_callbacks_t vgen_m_callbacks = {
452 	VGEN_M_CALLBACK_FLAGS,
453 	vgen_stat,
454 	vgen_start,
455 	vgen_stop,
456 	vgen_promisc,
457 	vgen_multicst,
458 	vgen_unicst,
459 	vgen_tx,
460 	vgen_ioctl,
461 	NULL,
462 	NULL
463 };
464 
465 /* externs */
466 extern pri_t	maxclsyspri;
467 extern proc_t	p0;
468 extern uint32_t vnet_ntxds;
469 extern uint32_t vnet_ldcwd_interval;
470 extern uint32_t vnet_ldcwd_txtimeout;
471 extern uint32_t vnet_ldc_mtu;
472 extern uint32_t vnet_nrbufs;
473 extern uint32_t	vnet_ethermtu;
474 extern uint16_t	vnet_default_vlan_id;
475 extern boolean_t vnet_jumbo_rxpools;
476 
477 #ifdef DEBUG
478 
479 extern int vnet_dbglevel;
480 static void debug_printf(const char *fname, vgen_t *vgenp,
481 	vgen_ldc_t *ldcp, const char *fmt, ...);
482 
483 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
484 int vgendbg_ldcid = -1;
485 
486 /* simulate handshake error conditions for debug */
487 uint32_t vgen_hdbg;
488 #define	HDBG_VERSION	0x1
489 #define	HDBG_TIMEOUT	0x2
490 #define	HDBG_BAD_SID	0x4
491 #define	HDBG_OUT_STATE	0x8
492 
493 #endif
494 
495 /*
496  * vgen_init() is called by an instance of vnet driver to initialize the
497  * corresponding generic proxy transport layer. The arguments passed by vnet
498  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
499  * the mac address of the vnet device, and a pointer to vgen_t is passed
500  * back as a handle to vnet.
501  */
502 int
503 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
504     const uint8_t *macaddr, void **vgenhdl)
505 {
506 	vgen_t *vgenp;
507 	int instance;
508 	int rv;
509 
510 	if ((vnetp == NULL) || (vnetdip == NULL))
511 		return (DDI_FAILURE);
512 
513 	instance = ddi_get_instance(vnetdip);
514 
515 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
516 
517 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
518 
519 	vgenp->vnetp = vnetp;
520 	vgenp->instance = instance;
521 	vgenp->regprop = regprop;
522 	vgenp->vnetdip = vnetdip;
523 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
524 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
525 
526 	/* allocate multicast table */
527 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
528 	    sizeof (struct ether_addr), KM_SLEEP);
529 	vgenp->mccount = 0;
530 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
531 
532 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
533 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
534 
535 	rv = vgen_read_mdprops(vgenp);
536 	if (rv != 0) {
537 		goto vgen_init_fail;
538 	}
539 
540 	/* register with MD event generator */
541 	rv = vgen_mdeg_reg(vgenp);
542 	if (rv != DDI_SUCCESS) {
543 		goto vgen_init_fail;
544 	}
545 
546 	*vgenhdl = (void *)vgenp;
547 
548 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
549 	return (DDI_SUCCESS);
550 
551 vgen_init_fail:
552 	rw_destroy(&vgenp->vgenports.rwlock);
553 	mutex_destroy(&vgenp->lock);
554 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
555 	    sizeof (struct ether_addr));
556 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
557 		kmem_free(vgenp->pri_types,
558 		    sizeof (uint16_t) * vgenp->pri_num_types);
559 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
560 	}
561 	KMEM_FREE(vgenp);
562 	return (DDI_FAILURE);
563 }
564 
565 /*
566  * Called by vnet to undo the initializations done by vgen_init().
567  * The handle provided by generic transport during vgen_init() is the argument.
568  */
569 void
570 vgen_uninit(void *arg)
571 {
572 	vgen_t		*vgenp = (vgen_t *)arg;
573 	vio_mblk_pool_t	*rp;
574 	vio_mblk_pool_t	*nrp;
575 
576 	if (vgenp == NULL) {
577 		return;
578 	}
579 
580 	DBG1(vgenp, NULL, "enter\n");
581 
582 	/* unregister with MD event generator */
583 	vgen_mdeg_unreg(vgenp);
584 
585 	mutex_enter(&vgenp->lock);
586 
587 	/* detach all ports from the device */
588 	vgen_detach_ports(vgenp);
589 
590 	/*
591 	 * free any pending rx mblk pools,
592 	 * that couldn't be freed previously during channel detach.
593 	 */
594 	rp = vgenp->rmp;
595 	while (rp != NULL) {
596 		nrp = vgenp->rmp = rp->nextp;
597 		if (vio_destroy_mblks(rp)) {
598 			WRITE_ENTER(&vgen_rw);
599 			rp->nextp = vgen_rx_poolp;
600 			vgen_rx_poolp = rp;
601 			RW_EXIT(&vgen_rw);
602 		}
603 		rp = nrp;
604 	}
605 
606 	/* free multicast table */
607 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
608 
609 	/* free pri_types table */
610 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
611 		kmem_free(vgenp->pri_types,
612 		    sizeof (uint16_t) * vgenp->pri_num_types);
613 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
614 	}
615 
616 	mutex_exit(&vgenp->lock);
617 
618 	rw_destroy(&vgenp->vgenports.rwlock);
619 	mutex_destroy(&vgenp->lock);
620 
621 	DBG1(vgenp, NULL, "exit\n");
622 	KMEM_FREE(vgenp);
623 }
624 
625 /*
626  * module specific initialization common to all instances of vnet/vgen.
627  */
628 void
629 vgen_mod_init(void)
630 {
631 	rw_init(&vgen_rw, NULL, RW_DRIVER, NULL);
632 }
633 
634 /*
635  * module specific cleanup common to all instances of vnet/vgen.
636  */
637 int
638 vgen_mod_cleanup(void)
639 {
640 	vio_mblk_pool_t	*poolp, *npoolp;
641 
642 	/*
643 	 * If any rx mblk pools are still in use, return
644 	 * error and stop the module from unloading.
645 	 */
646 	WRITE_ENTER(&vgen_rw);
647 	poolp = vgen_rx_poolp;
648 	while (poolp != NULL) {
649 		npoolp = vgen_rx_poolp = poolp->nextp;
650 		if (vio_destroy_mblks(poolp) != 0) {
651 			vgen_rx_poolp = poolp;
652 			RW_EXIT(&vgen_rw);
653 			return (EBUSY);
654 		}
655 		poolp = npoolp;
656 	}
657 	RW_EXIT(&vgen_rw);
658 
659 	return (0);
660 }
661 
662 /*
663  * module specific uninitialization common to all instances of vnet/vgen.
664  */
665 void
666 vgen_mod_fini(void)
667 {
668 	rw_destroy(&vgen_rw);
669 }
670 
671 /* enable transmit/receive for the device */
672 int
673 vgen_start(void *arg)
674 {
675 	vgen_port_t	*portp = (vgen_port_t *)arg;
676 	vgen_t		*vgenp = portp->vgenp;
677 
678 	DBG1(vgenp, NULL, "enter\n");
679 	mutex_enter(&portp->lock);
680 	vgen_port_init(portp);
681 	portp->flags |= VGEN_STARTED;
682 	mutex_exit(&portp->lock);
683 	DBG1(vgenp, NULL, "exit\n");
684 
685 	return (DDI_SUCCESS);
686 }
687 
688 /* stop transmit/receive */
689 void
690 vgen_stop(void *arg)
691 {
692 	vgen_port_t	*portp = (vgen_port_t *)arg;
693 	vgen_t		*vgenp = portp->vgenp;
694 
695 	DBG1(vgenp, NULL, "enter\n");
696 
697 	mutex_enter(&portp->lock);
698 	vgen_port_uninit(portp);
699 	portp->flags &= ~(VGEN_STARTED);
700 	mutex_exit(&portp->lock);
701 	DBG1(vgenp, NULL, "exit\n");
702 
703 }
704 
705 /* vgen transmit function */
706 static mblk_t *
707 vgen_tx(void *arg, mblk_t *mp)
708 {
709 	int i;
710 	vgen_port_t *portp;
711 	int status = VGEN_FAILURE;
712 
713 	portp = (vgen_port_t *)arg;
714 	/*
715 	 * Retry so that we avoid reporting a failure
716 	 * to the upper layer. Returning a failure may cause the
717 	 * upper layer to go into single threaded mode there by
718 	 * causing performance degradation, especially for a large
719 	 * number of connections.
720 	 */
721 	for (i = 0; i < vgen_tx_retries; ) {
722 		status = vgen_portsend(portp, mp);
723 		if (status == VGEN_SUCCESS) {
724 			break;
725 		}
726 		if (++i < vgen_tx_retries)
727 			delay(drv_usectohz(vgen_tx_delay));
728 	}
729 	if (status != VGEN_SUCCESS) {
730 		/* failure */
731 		return (mp);
732 	}
733 	/* success */
734 	return (NULL);
735 }
736 
737 /*
738  * This function provides any necessary tagging/untagging of the frames
739  * that are being transmitted over the port. It first verifies the vlan
740  * membership of the destination(port) and drops the packet if the
741  * destination doesn't belong to the given vlan.
742  *
743  * Arguments:
744  *   portp:     port over which the frames should be transmitted
745  *   mp:        frame to be transmitted
746  *   is_tagged:
747  *              B_TRUE: indicates frame header contains the vlan tag already.
748  *              B_FALSE: indicates frame is untagged.
749  *   vid:       vlan in which the frame should be transmitted.
750  *
751  * Returns:
752  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
753  *              Failure: NULL
754  */
755 static mblk_t *
756 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
757 	uint16_t vid)
758 {
759 	vgen_t				*vgenp;
760 	boolean_t			dst_tagged;
761 	int				rv;
762 
763 	vgenp = portp->vgenp;
764 
765 	/*
766 	 * If the packet is going to a vnet:
767 	 *   Check if the destination vnet is in the same vlan.
768 	 *   Check the frame header if tag or untag is needed.
769 	 *
770 	 * We do not check the above conditions if the packet is going to vsw:
771 	 *   vsw must be present implicitly in all the vlans that a vnet device
772 	 *   is configured into; even if vsw itself is not assigned to those
773 	 *   vlans as an interface. For instance, the packet might be destined
774 	 *   to another vnet(indirectly through vsw) or to an external host
775 	 *   which is in the same vlan as this vnet and vsw itself may not be
776 	 *   present in that vlan. Similarly packets going to vsw must be
777 	 *   always tagged(unless in the default-vlan) if not already tagged,
778 	 *   as we do not know the final destination. This is needed because
779 	 *   vsw must always invoke its switching function only after tagging
780 	 *   the packet; otherwise after switching function determines the
781 	 *   destination we cannot figure out if the destination belongs to the
782 	 *   the same vlan that the frame originated from and if it needs tag/
783 	 *   untag. Note that vsw will tag the packet itself when it receives
784 	 *   it over the channel from a client if needed. However, that is
785 	 *   needed only in the case of vlan unaware clients such as obp or
786 	 *   earlier versions of vnet.
787 	 *
788 	 */
789 	if (portp != vgenp->vsw_portp) {
790 		/*
791 		 * Packet going to a vnet. Check if the destination vnet is in
792 		 * the same vlan. Then check the frame header if tag/untag is
793 		 * needed.
794 		 */
795 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
796 		if (rv == B_FALSE) {
797 			/* drop the packet */
798 			freemsg(mp);
799 			return (NULL);
800 		}
801 
802 		/* is the destination tagged or untagged in this vlan? */
803 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
804 		    (dst_tagged = B_TRUE);
805 
806 		if (is_tagged == dst_tagged) {
807 			/* no tagging/untagging needed */
808 			return (mp);
809 		}
810 
811 		if (is_tagged == B_TRUE) {
812 			/* frame is tagged; destination needs untagged */
813 			mp = vnet_vlan_remove_tag(mp);
814 			return (mp);
815 		}
816 
817 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
818 	}
819 
820 	/*
821 	 * Packet going to a vnet needs tagging.
822 	 * OR
823 	 * If the packet is going to vsw, then it must be tagged in all cases:
824 	 * unknown unicast, broadcast/multicast or to vsw interface.
825 	 */
826 
827 	if (is_tagged == B_FALSE) {
828 		mp = vnet_vlan_insert_tag(mp, vid);
829 	}
830 
831 	return (mp);
832 }
833 
834 /* transmit packets over the given port */
835 static int
836 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
837 {
838 	vgen_ldclist_t		*ldclp;
839 	vgen_ldc_t		*ldcp;
840 	int			status;
841 	int			rv = VGEN_SUCCESS;
842 	vgen_t			*vgenp = portp->vgenp;
843 	vnet_t			*vnetp = vgenp->vnetp;
844 	boolean_t		is_tagged;
845 	boolean_t		dec_refcnt = B_FALSE;
846 	uint16_t		vlan_id;
847 	struct ether_header	*ehp;
848 
849 	if (portp->use_vsw_port) {
850 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
851 		portp = portp->vgenp->vsw_portp;
852 		dec_refcnt = B_TRUE;
853 	}
854 	if (portp == NULL) {
855 		return (VGEN_FAILURE);
856 	}
857 
858 	/*
859 	 * Determine the vlan id that the frame belongs to.
860 	 */
861 	ehp = (struct ether_header *)mp->b_rptr;
862 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
863 
864 	if (vlan_id == vnetp->default_vlan_id) {
865 
866 		/* Frames in default vlan must be untagged */
867 		ASSERT(is_tagged == B_FALSE);
868 
869 		/*
870 		 * If the destination is a vnet-port verify it belongs to the
871 		 * default vlan; otherwise drop the packet. We do not need
872 		 * this check for vsw-port, as it should implicitly belong to
873 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
874 		 */
875 		if (portp != vgenp->vsw_portp &&
876 		    portp->pvid != vnetp->default_vlan_id) {
877 			freemsg(mp);
878 			goto portsend_ret;
879 		}
880 
881 	} else {	/* frame not in default-vlan */
882 
883 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
884 		if (mp == NULL) {
885 			goto portsend_ret;
886 		}
887 
888 	}
889 
890 	ldclp = &portp->ldclist;
891 	READ_ENTER(&ldclp->rwlock);
892 	/*
893 	 * NOTE: for now, we will assume we have a single channel.
894 	 */
895 	if (ldclp->headp == NULL) {
896 		RW_EXIT(&ldclp->rwlock);
897 		rv = VGEN_FAILURE;
898 		goto portsend_ret;
899 	}
900 	ldcp = ldclp->headp;
901 
902 	status = ldcp->tx(ldcp, mp);
903 
904 	RW_EXIT(&ldclp->rwlock);
905 
906 	if (status != VGEN_TX_SUCCESS) {
907 		rv = VGEN_FAILURE;
908 	}
909 
910 portsend_ret:
911 	if (dec_refcnt == B_TRUE) {
912 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
913 	}
914 	return (rv);
915 }
916 
917 /*
918  * Wrapper function to transmit normal and/or priority frames over the channel.
919  */
920 static int
921 vgen_ldcsend(void *arg, mblk_t *mp)
922 {
923 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
924 	int			status;
925 	struct ether_header	*ehp;
926 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
927 	uint32_t		num_types;
928 	uint16_t		*types;
929 	int			i;
930 
931 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
932 
933 	num_types = vgenp->pri_num_types;
934 	types = vgenp->pri_types;
935 	ehp = (struct ether_header *)mp->b_rptr;
936 
937 	for (i = 0; i < num_types; i++) {
938 
939 		if (ehp->ether_type == types[i]) {
940 			/* priority frame, use pri tx function */
941 			vgen_ldcsend_pkt(ldcp, mp);
942 			return (VGEN_SUCCESS);
943 		}
944 
945 	}
946 
947 	status  = vgen_ldcsend_dring(ldcp, mp);
948 
949 	return (status);
950 }
951 
952 /*
953  * This functions handles ldc channel reset while in the context
954  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
955  */
956 static void
957 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
958 {
959 	ldc_status_t	istatus;
960 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
961 
962 	if (mutex_tryenter(&ldcp->cblock)) {
963 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
964 			DWARN(vgenp, ldcp, "ldc_status() error\n");
965 		} else {
966 			ldcp->ldc_status = istatus;
967 		}
968 		if (ldcp->ldc_status != LDC_UP) {
969 			vgen_handle_evt_reset(ldcp);
970 		}
971 		mutex_exit(&ldcp->cblock);
972 	}
973 }
974 
975 /*
976  * This function transmits the frame in the payload of a raw data
977  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
978  * send special frames with high priorities, without going through
979  * the normal data path which uses descriptor ring mechanism.
980  */
981 static void
982 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
983 {
984 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
985 	vio_raw_data_msg_t	*pkt;
986 	mblk_t			*bp;
987 	mblk_t			*nmp = NULL;
988 	caddr_t			dst;
989 	uint32_t		mblksz;
990 	uint32_t		size;
991 	uint32_t		nbytes;
992 	int			rv;
993 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
994 	vgen_stats_t		*statsp = &ldcp->stats;
995 
996 	/* drop the packet if ldc is not up or handshake is not done */
997 	if (ldcp->ldc_status != LDC_UP) {
998 		(void) atomic_inc_32(&statsp->tx_pri_fail);
999 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1000 		    ldcp->ldc_status);
1001 		goto send_pkt_exit;
1002 	}
1003 
1004 	if (ldcp->hphase != VH_DONE) {
1005 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1006 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1007 		    ldcp->hphase);
1008 		goto send_pkt_exit;
1009 	}
1010 
1011 	size = msgsize(mp);
1012 
1013 	/* frame size bigger than available payload len of raw data msg ? */
1014 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
1015 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1016 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1017 		goto send_pkt_exit;
1018 	}
1019 
1020 	if (size < ETHERMIN)
1021 		size = ETHERMIN;
1022 
1023 	/* alloc space for a raw data message */
1024 	nmp = vio_allocb(vgenp->pri_tx_vmp);
1025 	if (nmp == NULL) {
1026 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1027 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
1028 		goto send_pkt_exit;
1029 	}
1030 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
1031 
1032 	/* copy frame into the payload of raw data message */
1033 	dst = (caddr_t)pkt->data;
1034 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1035 		mblksz = MBLKL(bp);
1036 		bcopy(bp->b_rptr, dst, mblksz);
1037 		dst += mblksz;
1038 	}
1039 
1040 	/* setup the raw data msg */
1041 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
1042 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
1043 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
1044 	pkt->tag.vio_sid = ldcp->local_sid;
1045 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
1046 
1047 	/* send the msg over ldc */
1048 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
1049 	if (rv != VGEN_SUCCESS) {
1050 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1051 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
1052 		if (rv == ECONNRESET) {
1053 			vgen_ldcsend_process_reset(ldcp);
1054 		}
1055 		goto send_pkt_exit;
1056 	}
1057 
1058 	/* update stats */
1059 	(void) atomic_inc_64(&statsp->tx_pri_packets);
1060 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
1061 
1062 send_pkt_exit:
1063 	if (nmp != NULL)
1064 		freemsg(nmp);
1065 	freemsg(mp);
1066 }
1067 
1068 /*
1069  * This function transmits normal (non-priority) data frames over
1070  * the channel. It queues the frame into the transmit descriptor ring
1071  * and sends a VIO_DRING_DATA message if needed, to wake up the
1072  * peer to (re)start processing.
1073  */
1074 static int
1075 vgen_ldcsend_dring(void *arg, mblk_t *mp)
1076 {
1077 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
1078 	vgen_private_desc_t	*tbufp;
1079 	vgen_private_desc_t	*rtbufp;
1080 	vnet_public_desc_t	*rtxdp;
1081 	vgen_private_desc_t	*ntbufp;
1082 	vnet_public_desc_t	*txdp;
1083 	vio_dring_entry_hdr_t	*hdrp;
1084 	vgen_stats_t		*statsp;
1085 	struct ether_header	*ehp;
1086 	boolean_t		is_bcast = B_FALSE;
1087 	boolean_t		is_mcast = B_FALSE;
1088 	size_t			mblksz;
1089 	caddr_t			dst;
1090 	mblk_t			*bp;
1091 	size_t			size;
1092 	int			rv = 0;
1093 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1094 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1095 
1096 	statsp = &ldcp->stats;
1097 	size = msgsize(mp);
1098 
1099 	DBG1(vgenp, ldcp, "enter\n");
1100 
1101 	if (ldcp->ldc_status != LDC_UP) {
1102 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1103 		    ldcp->ldc_status);
1104 		/* retry ldc_up() if needed */
1105 #ifdef	VNET_IOC_DEBUG
1106 		if (ldcp->flags & CHANNEL_STARTED && !ldcp->link_down_forced) {
1107 #else
1108 		if (ldcp->flags & CHANNEL_STARTED) {
1109 #endif
1110 			(void) ldc_up(ldcp->ldc_handle);
1111 		}
1112 		goto send_dring_exit;
1113 	}
1114 
1115 	/* drop the packet if ldc is not up or handshake is not done */
1116 	if (ldcp->hphase != VH_DONE) {
1117 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1118 		    ldcp->hphase);
1119 		goto send_dring_exit;
1120 	}
1121 
1122 	if (size > (size_t)lp->mtu) {
1123 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1124 		goto send_dring_exit;
1125 	}
1126 	if (size < ETHERMIN)
1127 		size = ETHERMIN;
1128 
1129 	ehp = (struct ether_header *)mp->b_rptr;
1130 	is_bcast = IS_BROADCAST(ehp);
1131 	is_mcast = IS_MULTICAST(ehp);
1132 
1133 	mutex_enter(&ldcp->txlock);
1134 	/*
1135 	 * allocate a descriptor
1136 	 */
1137 	tbufp = ldcp->next_tbufp;
1138 	ntbufp = NEXTTBUF(ldcp, tbufp);
1139 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1140 
1141 		mutex_enter(&ldcp->tclock);
1142 		/* Try reclaiming now */
1143 		vgen_reclaim_dring(ldcp);
1144 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1145 
1146 		if (ntbufp == ldcp->cur_tbufp) {
1147 			/* Now we are really out of tbuf/txds */
1148 			ldcp->need_resched = B_TRUE;
1149 			mutex_exit(&ldcp->tclock);
1150 
1151 			statsp->tx_no_desc++;
1152 			mutex_exit(&ldcp->txlock);
1153 
1154 			return (VGEN_TX_NORESOURCES);
1155 		}
1156 		mutex_exit(&ldcp->tclock);
1157 	}
1158 	/* update next available tbuf in the ring and update tx index */
1159 	ldcp->next_tbufp = ntbufp;
1160 	INCR_TXI(ldcp->next_txi, ldcp);
1161 
1162 	/* Mark the buffer busy before releasing the lock */
1163 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1164 	mutex_exit(&ldcp->txlock);
1165 
1166 	/* copy data into pre-allocated transmit buffer */
1167 	dst = tbufp->datap + VNET_IPALIGN;
1168 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1169 		mblksz = MBLKL(bp);
1170 		bcopy(bp->b_rptr, dst, mblksz);
1171 		dst += mblksz;
1172 	}
1173 
1174 	tbufp->datalen = size;
1175 
1176 	/* initialize the corresponding public descriptor (txd) */
1177 	txdp = tbufp->descp;
1178 	hdrp = &txdp->hdr;
1179 	txdp->nbytes = size;
1180 	txdp->ncookies = tbufp->ncookies;
1181 	bcopy((tbufp->memcookie), (txdp->memcookie),
1182 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1183 
1184 	mutex_enter(&ldcp->wrlock);
1185 	/*
1186 	 * If the flags not set to BUSY, it implies that the clobber
1187 	 * was done while we were copying the data. In such case,
1188 	 * discard the packet and return.
1189 	 */
1190 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1191 		statsp->oerrors++;
1192 		mutex_exit(&ldcp->wrlock);
1193 		goto send_dring_exit;
1194 	}
1195 	hdrp->dstate = VIO_DESC_READY;
1196 
1197 	/* update stats */
1198 	statsp->opackets++;
1199 	statsp->obytes += size;
1200 	if (is_bcast)
1201 		statsp->brdcstxmt++;
1202 	else if (is_mcast)
1203 		statsp->multixmt++;
1204 
1205 	/* send dring datamsg to the peer */
1206 	if (ldcp->resched_peer) {
1207 
1208 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1209 		rtxdp = rtbufp->descp;
1210 
1211 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1212 
1213 			rv = vgen_send_dring_data(ldcp,
1214 			    (uint32_t)ldcp->resched_peer_txi, -1);
1215 			if (rv != 0) {
1216 				/* error: drop the packet */
1217 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1218 				    "failed: rv(%d) len(%d)\n",
1219 				    ldcp->ldc_id, rv, size);
1220 				statsp->oerrors++;
1221 			} else {
1222 				ldcp->resched_peer = B_FALSE;
1223 			}
1224 
1225 		}
1226 
1227 	}
1228 
1229 	mutex_exit(&ldcp->wrlock);
1230 
1231 send_dring_exit:
1232 	if (rv == ECONNRESET) {
1233 		vgen_ldcsend_process_reset(ldcp);
1234 	}
1235 	freemsg(mp);
1236 	DBG1(vgenp, ldcp, "exit\n");
1237 	return (VGEN_TX_SUCCESS);
1238 }
1239 
1240 /* enable/disable a multicast address */
1241 int
1242 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1243 {
1244 	vgen_t			*vgenp;
1245 	vnet_mcast_msg_t	mcastmsg;
1246 	vio_msg_tag_t		*tagp;
1247 	vgen_port_t		*portp;
1248 	vgen_portlist_t		*plistp;
1249 	vgen_ldc_t		*ldcp;
1250 	vgen_ldclist_t		*ldclp;
1251 	struct ether_addr	*addrp;
1252 	int			rv = DDI_FAILURE;
1253 	uint32_t		i;
1254 
1255 	portp = (vgen_port_t *)arg;
1256 	vgenp = portp->vgenp;
1257 
1258 	if (portp != vgenp->vsw_portp) {
1259 		return (DDI_SUCCESS);
1260 	}
1261 
1262 	addrp = (struct ether_addr *)mca;
1263 	tagp = &mcastmsg.tag;
1264 	bzero(&mcastmsg, sizeof (mcastmsg));
1265 
1266 	mutex_enter(&vgenp->lock);
1267 
1268 	plistp = &(vgenp->vgenports);
1269 
1270 	READ_ENTER(&plistp->rwlock);
1271 
1272 	portp = vgenp->vsw_portp;
1273 	if (portp == NULL) {
1274 		RW_EXIT(&plistp->rwlock);
1275 		mutex_exit(&vgenp->lock);
1276 		return (rv);
1277 	}
1278 	ldclp = &portp->ldclist;
1279 
1280 	READ_ENTER(&ldclp->rwlock);
1281 
1282 	ldcp = ldclp->headp;
1283 	if (ldcp == NULL)
1284 		goto vgen_mcast_exit;
1285 
1286 	mutex_enter(&ldcp->cblock);
1287 
1288 	if (ldcp->hphase == VH_DONE) {
1289 		/*
1290 		 * If handshake is done, send a msg to vsw to add/remove
1291 		 * the multicast address. Otherwise, we just update this
1292 		 * mcast address in our table and the table will be sync'd
1293 		 * with vsw when handshake completes.
1294 		 */
1295 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1296 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1297 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1298 		tagp->vio_sid = ldcp->local_sid;
1299 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1300 		mcastmsg.set = add;
1301 		mcastmsg.count = 1;
1302 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1303 		    B_FALSE) != VGEN_SUCCESS) {
1304 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1305 			mutex_exit(&ldcp->cblock);
1306 			goto vgen_mcast_exit;
1307 		}
1308 	}
1309 
1310 	mutex_exit(&ldcp->cblock);
1311 
1312 	if (add) {
1313 
1314 		/* expand multicast table if necessary */
1315 		if (vgenp->mccount >= vgenp->mcsize) {
1316 			struct ether_addr	*newtab;
1317 			uint32_t		newsize;
1318 
1319 
1320 			newsize = vgenp->mcsize * 2;
1321 
1322 			newtab = kmem_zalloc(newsize *
1323 			    sizeof (struct ether_addr), KM_NOSLEEP);
1324 			if (newtab == NULL)
1325 				goto vgen_mcast_exit;
1326 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1327 			    sizeof (struct ether_addr));
1328 			kmem_free(vgenp->mctab,
1329 			    vgenp->mcsize * sizeof (struct ether_addr));
1330 
1331 			vgenp->mctab = newtab;
1332 			vgenp->mcsize = newsize;
1333 		}
1334 
1335 		/* add address to the table */
1336 		vgenp->mctab[vgenp->mccount++] = *addrp;
1337 
1338 	} else {
1339 
1340 		/* delete address from the table */
1341 		for (i = 0; i < vgenp->mccount; i++) {
1342 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1343 
1344 				/*
1345 				 * If there's more than one address in this
1346 				 * table, delete the unwanted one by moving
1347 				 * the last one in the list over top of it;
1348 				 * otherwise, just remove it.
1349 				 */
1350 				if (vgenp->mccount > 1) {
1351 					vgenp->mctab[i] =
1352 					    vgenp->mctab[vgenp->mccount-1];
1353 				}
1354 				vgenp->mccount--;
1355 				break;
1356 			}
1357 		}
1358 	}
1359 
1360 	rv = DDI_SUCCESS;
1361 
1362 vgen_mcast_exit:
1363 	RW_EXIT(&ldclp->rwlock);
1364 	RW_EXIT(&plistp->rwlock);
1365 
1366 	mutex_exit(&vgenp->lock);
1367 	return (rv);
1368 }
1369 
1370 /* set or clear promiscuous mode on the device */
1371 static int
1372 vgen_promisc(void *arg, boolean_t on)
1373 {
1374 	_NOTE(ARGUNUSED(arg, on))
1375 	return (DDI_SUCCESS);
1376 }
1377 
1378 /* set the unicast mac address of the device */
1379 static int
1380 vgen_unicst(void *arg, const uint8_t *mca)
1381 {
1382 	_NOTE(ARGUNUSED(arg, mca))
1383 	return (DDI_SUCCESS);
1384 }
1385 
1386 /* get device statistics */
1387 int
1388 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1389 {
1390 	vgen_port_t	*portp = (vgen_port_t *)arg;
1391 
1392 	*val = vgen_port_stat(portp, stat);
1393 
1394 	return (0);
1395 }
1396 
1397 /* vgen internal functions */
1398 /* detach all ports from the device */
1399 static void
1400 vgen_detach_ports(vgen_t *vgenp)
1401 {
1402 	vgen_port_t	*portp;
1403 	vgen_portlist_t	*plistp;
1404 
1405 	plistp = &(vgenp->vgenports);
1406 	WRITE_ENTER(&plistp->rwlock);
1407 	while ((portp = plistp->headp) != NULL) {
1408 		vgen_port_detach(portp);
1409 	}
1410 	RW_EXIT(&plistp->rwlock);
1411 }
1412 
1413 /*
1414  * detach the given port.
1415  */
1416 static void
1417 vgen_port_detach(vgen_port_t *portp)
1418 {
1419 	vgen_t		*vgenp;
1420 	vgen_ldclist_t	*ldclp;
1421 	int		port_num;
1422 
1423 	vgenp = portp->vgenp;
1424 	port_num = portp->port_num;
1425 
1426 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1427 
1428 	/*
1429 	 * If this port is connected to the vswitch, then
1430 	 * potentially there could be ports that may be using
1431 	 * this port to transmit packets. To address this do
1432 	 * the following:
1433 	 *	- First set vgenp->vsw_portp to NULL, so that
1434 	 *	  its not used after that.
1435 	 *	- Then wait for the refcnt to go down to 0.
1436 	 *	- Now we can safely detach this port.
1437 	 */
1438 	if (vgenp->vsw_portp == portp) {
1439 		vgenp->vsw_portp = NULL;
1440 		while (vgenp->vsw_port_refcnt > 0) {
1441 			delay(drv_usectohz(vgen_tx_delay));
1442 		}
1443 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1444 	}
1445 
1446 	if (portp->vhp != NULL) {
1447 		vio_net_resource_unreg(portp->vhp);
1448 		portp->vhp = NULL;
1449 	}
1450 
1451 	vgen_vlan_destroy_hash(portp);
1452 
1453 	/* remove it from port list */
1454 	vgen_port_list_remove(portp);
1455 
1456 	/* detach channels from this port */
1457 	ldclp = &portp->ldclist;
1458 	WRITE_ENTER(&ldclp->rwlock);
1459 	while (ldclp->headp) {
1460 		vgen_ldc_detach(ldclp->headp);
1461 	}
1462 	RW_EXIT(&ldclp->rwlock);
1463 	rw_destroy(&ldclp->rwlock);
1464 
1465 	if (portp->num_ldcs != 0) {
1466 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1467 		portp->num_ldcs = 0;
1468 	}
1469 
1470 	mutex_destroy(&portp->lock);
1471 	KMEM_FREE(portp);
1472 
1473 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1474 }
1475 
1476 /* add a port to port list */
1477 static void
1478 vgen_port_list_insert(vgen_port_t *portp)
1479 {
1480 	vgen_portlist_t *plistp;
1481 	vgen_t *vgenp;
1482 
1483 	vgenp = portp->vgenp;
1484 	plistp = &(vgenp->vgenports);
1485 
1486 	if (plistp->headp == NULL) {
1487 		plistp->headp = portp;
1488 	} else {
1489 		plistp->tailp->nextp = portp;
1490 	}
1491 	plistp->tailp = portp;
1492 	portp->nextp = NULL;
1493 }
1494 
1495 /* remove a port from port list */
1496 static void
1497 vgen_port_list_remove(vgen_port_t *portp)
1498 {
1499 	vgen_port_t *prevp;
1500 	vgen_port_t *nextp;
1501 	vgen_portlist_t *plistp;
1502 	vgen_t *vgenp;
1503 
1504 	vgenp = portp->vgenp;
1505 
1506 	plistp = &(vgenp->vgenports);
1507 
1508 	if (plistp->headp == NULL)
1509 		return;
1510 
1511 	if (portp == plistp->headp) {
1512 		plistp->headp = portp->nextp;
1513 		if (portp == plistp->tailp)
1514 			plistp->tailp = plistp->headp;
1515 	} else {
1516 		for (prevp = plistp->headp;
1517 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1518 		    prevp = nextp)
1519 			;
1520 		if (nextp == portp) {
1521 			prevp->nextp = portp->nextp;
1522 		}
1523 		if (portp == plistp->tailp)
1524 			plistp->tailp = prevp;
1525 	}
1526 }
1527 
1528 /* lookup a port in the list based on port_num */
1529 static vgen_port_t *
1530 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1531 {
1532 	vgen_port_t *portp = NULL;
1533 
1534 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1535 		if (portp->port_num == port_num) {
1536 			break;
1537 		}
1538 	}
1539 
1540 	return (portp);
1541 }
1542 
1543 /* enable ports for transmit/receive */
1544 static void
1545 vgen_init_ports(vgen_t *vgenp)
1546 {
1547 	vgen_port_t	*portp;
1548 	vgen_portlist_t	*plistp;
1549 
1550 	plistp = &(vgenp->vgenports);
1551 	READ_ENTER(&plistp->rwlock);
1552 
1553 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1554 		vgen_port_init(portp);
1555 	}
1556 
1557 	RW_EXIT(&plistp->rwlock);
1558 }
1559 
1560 static void
1561 vgen_port_init(vgen_port_t *portp)
1562 {
1563 	/* Add the port to the specified vlans */
1564 	vgen_vlan_add_ids(portp);
1565 
1566 	/* Bring up the channels of this port */
1567 	vgen_init_ldcs(portp);
1568 }
1569 
1570 /* disable transmit/receive on ports */
1571 static void
1572 vgen_uninit_ports(vgen_t *vgenp)
1573 {
1574 	vgen_port_t	*portp;
1575 	vgen_portlist_t	*plistp;
1576 
1577 	plistp = &(vgenp->vgenports);
1578 	READ_ENTER(&plistp->rwlock);
1579 
1580 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1581 		vgen_port_uninit(portp);
1582 	}
1583 
1584 	RW_EXIT(&plistp->rwlock);
1585 }
1586 
1587 static void
1588 vgen_port_uninit(vgen_port_t *portp)
1589 {
1590 	vgen_uninit_ldcs(portp);
1591 
1592 	/* remove the port from vlans it has been assigned to */
1593 	vgen_vlan_remove_ids(portp);
1594 }
1595 
1596 /*
1597  * Scan the machine description for this instance of vnet
1598  * and read its properties. Called only from vgen_init().
1599  * Returns: 0 on success, 1 on failure.
1600  */
1601 static int
1602 vgen_read_mdprops(vgen_t *vgenp)
1603 {
1604 	vnet_t		*vnetp = vgenp->vnetp;
1605 	md_t		*mdp = NULL;
1606 	mde_cookie_t	rootnode;
1607 	mde_cookie_t	*listp = NULL;
1608 	uint64_t	cfgh;
1609 	char		*name;
1610 	int		rv = 1;
1611 	int		num_nodes = 0;
1612 	int		num_devs = 0;
1613 	int		listsz = 0;
1614 	int		i;
1615 
1616 	if ((mdp = md_get_handle()) == NULL) {
1617 		return (rv);
1618 	}
1619 
1620 	num_nodes = md_node_count(mdp);
1621 	ASSERT(num_nodes > 0);
1622 
1623 	listsz = num_nodes * sizeof (mde_cookie_t);
1624 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1625 
1626 	rootnode = md_root_node(mdp);
1627 
1628 	/* search for all "virtual_device" nodes */
1629 	num_devs = md_scan_dag(mdp, rootnode,
1630 	    md_find_name(mdp, vdev_propname),
1631 	    md_find_name(mdp, "fwd"), listp);
1632 	if (num_devs <= 0) {
1633 		goto vgen_readmd_exit;
1634 	}
1635 
1636 	/*
1637 	 * Now loop through the list of virtual-devices looking for
1638 	 * devices with name "network" and for each such device compare
1639 	 * its instance with what we have from the 'reg' property to
1640 	 * find the right node in MD and then read all its properties.
1641 	 */
1642 	for (i = 0; i < num_devs; i++) {
1643 
1644 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1645 			goto vgen_readmd_exit;
1646 		}
1647 
1648 		/* is this a "network" device? */
1649 		if (strcmp(name, vnet_propname) != 0)
1650 			continue;
1651 
1652 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1653 			goto vgen_readmd_exit;
1654 		}
1655 
1656 		/* is this the required instance of vnet? */
1657 		if (vgenp->regprop != cfgh)
1658 			continue;
1659 
1660 		/*
1661 		 * Read the 'linkprop' property to know if this vnet
1662 		 * device should get physical link updates from vswitch.
1663 		 */
1664 		vgen_linkprop_read(vgenp, mdp, listp[i],
1665 		    &vnetp->pls_update);
1666 
1667 		/*
1668 		 * Read the mtu. Note that we set the mtu of vnet device within
1669 		 * this routine itself, after validating the range.
1670 		 */
1671 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1672 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1673 			vnetp->mtu = ETHERMTU;
1674 		}
1675 		vgenp->max_frame_size = vnetp->mtu +
1676 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1677 
1678 		/* read priority ether types */
1679 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1680 
1681 		/* read vlan id properties of this vnet instance */
1682 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1683 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1684 		    &vnetp->default_vlan_id);
1685 
1686 		rv = 0;
1687 		break;
1688 	}
1689 
1690 vgen_readmd_exit:
1691 
1692 	kmem_free(listp, listsz);
1693 	(void) md_fini_handle(mdp);
1694 	return (rv);
1695 }
1696 
1697 /*
1698  * Read vlan id properties of the given MD node.
1699  * Arguments:
1700  *   arg:          device argument(vnet device or a port)
1701  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1702  *   mdp:          machine description
1703  *   node:         md node cookie
1704  *
1705  * Returns:
1706  *   pvidp:        port-vlan-id of the node
1707  *   vidspp:       list of vlan-ids of the node
1708  *   nvidsp:       # of vlan-ids in the list
1709  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1710  */
1711 static void
1712 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1713 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1714 	uint16_t *default_idp)
1715 {
1716 	vgen_t		*vgenp;
1717 	vnet_t		*vnetp;
1718 	vgen_port_t	*portp;
1719 	char		*pvid_propname;
1720 	char		*vid_propname;
1721 	uint_t		nvids;
1722 	uint32_t	vids_size;
1723 	int		rv;
1724 	int		i;
1725 	uint64_t	*data;
1726 	uint64_t	val;
1727 	int		size;
1728 	int		inst;
1729 
1730 	if (type == VGEN_LOCAL) {
1731 
1732 		vgenp = (vgen_t *)arg;
1733 		vnetp = vgenp->vnetp;
1734 		pvid_propname = vgen_pvid_propname;
1735 		vid_propname = vgen_vid_propname;
1736 		inst = vnetp->instance;
1737 
1738 	} else if (type == VGEN_PEER) {
1739 
1740 		portp = (vgen_port_t *)arg;
1741 		vgenp = portp->vgenp;
1742 		vnetp = vgenp->vnetp;
1743 		pvid_propname = port_pvid_propname;
1744 		vid_propname = port_vid_propname;
1745 		inst = portp->port_num;
1746 
1747 	} else {
1748 		return;
1749 	}
1750 
1751 	if (type == VGEN_LOCAL && default_idp != NULL) {
1752 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1753 		if (rv != 0) {
1754 			DWARN(vgenp, NULL, "prop(%s) not found",
1755 			    vgen_dvid_propname);
1756 
1757 			*default_idp = vnet_default_vlan_id;
1758 		} else {
1759 			*default_idp = val & 0xFFF;
1760 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1761 			    inst, *default_idp);
1762 		}
1763 	}
1764 
1765 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1766 	if (rv != 0) {
1767 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1768 		*pvidp = vnet_default_vlan_id;
1769 	} else {
1770 
1771 		*pvidp = val & 0xFFF;
1772 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1773 		    pvid_propname, inst, *pvidp);
1774 	}
1775 
1776 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1777 	    &size);
1778 	if (rv != 0) {
1779 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1780 		size = 0;
1781 	} else {
1782 		size /= sizeof (uint64_t);
1783 	}
1784 	nvids = size;
1785 
1786 	if (nvids != 0) {
1787 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1788 		vids_size = sizeof (uint16_t) * nvids;
1789 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1790 		for (i = 0; i < nvids; i++) {
1791 			(*vidspp)[i] = data[i] & 0xFFFF;
1792 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1793 		}
1794 		DBG2(vgenp, NULL, "\n");
1795 	}
1796 
1797 	*nvidsp = nvids;
1798 }
1799 
1800 /*
1801  * Create a vlan id hash table for the given port.
1802  */
1803 static void
1804 vgen_vlan_create_hash(vgen_port_t *portp)
1805 {
1806 	char		hashname[MAXNAMELEN];
1807 
1808 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1809 	    portp->port_num);
1810 
1811 	portp->vlan_nchains = vgen_vlan_nchains;
1812 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1813 	    portp->vlan_nchains, mod_hash_null_valdtor);
1814 }
1815 
1816 /*
1817  * Destroy the vlan id hash table in the given port.
1818  */
1819 static void
1820 vgen_vlan_destroy_hash(vgen_port_t *portp)
1821 {
1822 	if (portp->vlan_hashp != NULL) {
1823 		mod_hash_destroy_hash(portp->vlan_hashp);
1824 		portp->vlan_hashp = NULL;
1825 		portp->vlan_nchains = 0;
1826 	}
1827 }
1828 
1829 /*
1830  * Add a port to the vlans specified in its port properites.
1831  */
1832 static void
1833 vgen_vlan_add_ids(vgen_port_t *portp)
1834 {
1835 	int		rv;
1836 	int		i;
1837 
1838 	rv = mod_hash_insert(portp->vlan_hashp,
1839 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1840 	    (mod_hash_val_t)B_TRUE);
1841 	ASSERT(rv == 0);
1842 
1843 	for (i = 0; i < portp->nvids; i++) {
1844 		rv = mod_hash_insert(portp->vlan_hashp,
1845 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1846 		    (mod_hash_val_t)B_TRUE);
1847 		ASSERT(rv == 0);
1848 	}
1849 }
1850 
1851 /*
1852  * Remove a port from the vlans it has been assigned to.
1853  */
1854 static void
1855 vgen_vlan_remove_ids(vgen_port_t *portp)
1856 {
1857 	int		rv;
1858 	int		i;
1859 	mod_hash_val_t	vp;
1860 
1861 	rv = mod_hash_remove(portp->vlan_hashp,
1862 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1863 	    (mod_hash_val_t *)&vp);
1864 	ASSERT(rv == 0);
1865 
1866 	for (i = 0; i < portp->nvids; i++) {
1867 		rv = mod_hash_remove(portp->vlan_hashp,
1868 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1869 		    (mod_hash_val_t *)&vp);
1870 		ASSERT(rv == 0);
1871 	}
1872 }
1873 
1874 /*
1875  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1876  * then the vlan-id is available in the tag; otherwise, its vlan id is
1877  * implicitly obtained from the port-vlan-id of the vnet device.
1878  * The vlan id determined is returned in vidp.
1879  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1880  */
1881 static boolean_t
1882 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1883 {
1884 	struct ether_vlan_header	*evhp;
1885 
1886 	/* If it's a tagged frame, get the vlan id from vlan header */
1887 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1888 
1889 		evhp = (struct ether_vlan_header *)ehp;
1890 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1891 		return (B_TRUE);
1892 	}
1893 
1894 	/* Untagged frame, vlan-id is the pvid of vnet device */
1895 	*vidp = vnetp->pvid;
1896 	return (B_FALSE);
1897 }
1898 
1899 /*
1900  * Find the given vlan id in the hash table.
1901  * Return: B_TRUE if the id is found; B_FALSE if not found.
1902  */
1903 static boolean_t
1904 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1905 {
1906 	int		rv;
1907 	mod_hash_val_t	vp;
1908 
1909 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1910 
1911 	if (rv != 0)
1912 		return (B_FALSE);
1913 
1914 	return (B_TRUE);
1915 }
1916 
1917 /*
1918  * This function reads "priority-ether-types" property from md. This property
1919  * is used to enable support for priority frames. Applications which need
1920  * guaranteed and timely delivery of certain high priority frames to/from
1921  * a vnet or vsw within ldoms, should configure this property by providing
1922  * the ether type(s) for which the priority facility is needed.
1923  * Normal data frames are delivered over a ldc channel using the descriptor
1924  * ring mechanism which is constrained by factors such as descriptor ring size,
1925  * the rate at which the ring is processed at the peer ldc end point, etc.
1926  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1927  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1928  * descriptor ring path and enables a more reliable and timely delivery of
1929  * frames to the peer.
1930  */
1931 static void
1932 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1933 {
1934 	int		rv;
1935 	uint16_t	*types;
1936 	uint64_t	*data;
1937 	int		size;
1938 	int		i;
1939 	size_t		mblk_sz;
1940 
1941 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1942 	    (uint8_t **)&data, &size);
1943 	if (rv != 0) {
1944 		/*
1945 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1946 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1947 		 */
1948 		if (vgen_pri_eth_type != 0) {
1949 			size = sizeof (vgen_pri_eth_type);
1950 			data = &vgen_pri_eth_type;
1951 		} else {
1952 			DBG2(vgenp, NULL,
1953 			    "prop(%s) not found", pri_types_propname);
1954 			size = 0;
1955 		}
1956 	}
1957 
1958 	if (size == 0) {
1959 		vgenp->pri_num_types = 0;
1960 		return;
1961 	}
1962 
1963 	/*
1964 	 * we have some priority-ether-types defined;
1965 	 * allocate a table of these types and also
1966 	 * allocate a pool of mblks to transmit these
1967 	 * priority packets.
1968 	 */
1969 	size /= sizeof (uint64_t);
1970 	vgenp->pri_num_types = size;
1971 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1972 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1973 		types[i] = data[i] & 0xFFFF;
1974 	}
1975 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1976 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1977 	    &vgenp->pri_tx_vmp);
1978 }
1979 
1980 static void
1981 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1982 {
1983 	int		rv;
1984 	uint64_t	val;
1985 	char		*mtu_propname;
1986 
1987 	mtu_propname = vgen_mtu_propname;
1988 
1989 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1990 	if (rv != 0) {
1991 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1992 		*mtu = vnet_ethermtu;
1993 	} else {
1994 
1995 		*mtu = val & 0xFFFF;
1996 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1997 		    vgenp->instance, *mtu);
1998 	}
1999 }
2000 
2001 static void
2002 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
2003 	boolean_t *pls)
2004 {
2005 	int		rv;
2006 	uint64_t	val;
2007 	char		*linkpropname;
2008 
2009 	linkpropname = vgen_linkprop_propname;
2010 
2011 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
2012 	if (rv != 0) {
2013 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
2014 		*pls = B_FALSE;
2015 	} else {
2016 
2017 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
2018 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
2019 		    vgenp->instance, *pls);
2020 	}
2021 }
2022 
2023 /* register with MD event generator */
2024 static int
2025 vgen_mdeg_reg(vgen_t *vgenp)
2026 {
2027 	mdeg_prop_spec_t	*pspecp;
2028 	mdeg_node_spec_t	*parentp;
2029 	uint_t			templatesz;
2030 	int			rv;
2031 	mdeg_handle_t		dev_hdl = NULL;
2032 	mdeg_handle_t		port_hdl = NULL;
2033 
2034 	templatesz = sizeof (vgen_prop_template);
2035 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
2036 	if (pspecp == NULL) {
2037 		return (DDI_FAILURE);
2038 	}
2039 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
2040 	if (parentp == NULL) {
2041 		kmem_free(pspecp, templatesz);
2042 		return (DDI_FAILURE);
2043 	}
2044 
2045 	bcopy(vgen_prop_template, pspecp, templatesz);
2046 
2047 	/*
2048 	 * NOTE: The instance here refers to the value of "reg" property and
2049 	 * not the dev_info instance (ddi_get_instance()) of vnet.
2050 	 */
2051 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
2052 
2053 	parentp->namep = "virtual-device";
2054 	parentp->specp = pspecp;
2055 
2056 	/* save parentp in vgen_t */
2057 	vgenp->mdeg_parentp = parentp;
2058 
2059 	/*
2060 	 * Register an interest in 'virtual-device' nodes with a
2061 	 * 'name' property of 'network'
2062 	 */
2063 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
2064 	if (rv != MDEG_SUCCESS) {
2065 		DERR(vgenp, NULL, "mdeg_register failed\n");
2066 		goto mdeg_reg_fail;
2067 	}
2068 
2069 	/* Register an interest in 'port' nodes */
2070 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
2071 	    &port_hdl);
2072 	if (rv != MDEG_SUCCESS) {
2073 		DERR(vgenp, NULL, "mdeg_register failed\n");
2074 		goto mdeg_reg_fail;
2075 	}
2076 
2077 	/* save mdeg handle in vgen_t */
2078 	vgenp->mdeg_dev_hdl = dev_hdl;
2079 	vgenp->mdeg_port_hdl = port_hdl;
2080 
2081 	return (DDI_SUCCESS);
2082 
2083 mdeg_reg_fail:
2084 	if (dev_hdl != NULL) {
2085 		(void) mdeg_unregister(dev_hdl);
2086 	}
2087 	KMEM_FREE(parentp);
2088 	kmem_free(pspecp, templatesz);
2089 	vgenp->mdeg_parentp = NULL;
2090 	return (DDI_FAILURE);
2091 }
2092 
2093 /* unregister with MD event generator */
2094 static void
2095 vgen_mdeg_unreg(vgen_t *vgenp)
2096 {
2097 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
2098 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
2099 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
2100 	KMEM_FREE(vgenp->mdeg_parentp);
2101 	vgenp->mdeg_parentp = NULL;
2102 	vgenp->mdeg_dev_hdl = NULL;
2103 	vgenp->mdeg_port_hdl = NULL;
2104 }
2105 
2106 /* mdeg callback function for the port node */
2107 static int
2108 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
2109 {
2110 	int idx;
2111 	int vsw_idx = -1;
2112 	uint64_t val;
2113 	vgen_t *vgenp;
2114 
2115 	if ((resp == NULL) || (cb_argp == NULL)) {
2116 		return (MDEG_FAILURE);
2117 	}
2118 
2119 	vgenp = (vgen_t *)cb_argp;
2120 	DBG1(vgenp, NULL, "enter\n");
2121 
2122 	mutex_enter(&vgenp->lock);
2123 
2124 	DBG1(vgenp, NULL, "ports: removed(%x), "
2125 	"added(%x), updated(%x)\n", resp->removed.nelem,
2126 	    resp->added.nelem, resp->match_curr.nelem);
2127 
2128 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2129 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2130 		    resp->removed.mdep[idx]);
2131 	}
2132 
2133 	if (vgenp->vsw_portp == NULL) {
2134 		/*
2135 		 * find vsw_port and add it first, because other ports need
2136 		 * this when adding fdb entry (see vgen_port_init()).
2137 		 */
2138 		for (idx = 0; idx < resp->added.nelem; idx++) {
2139 			if (!(md_get_prop_val(resp->added.mdp,
2140 			    resp->added.mdep[idx], swport_propname, &val))) {
2141 				if (val == 0) {
2142 					/*
2143 					 * This port is connected to the
2144 					 * vsw on service domain.
2145 					 */
2146 					vsw_idx = idx;
2147 					if (vgen_add_port(vgenp,
2148 					    resp->added.mdp,
2149 					    resp->added.mdep[idx]) !=
2150 					    DDI_SUCCESS) {
2151 						cmn_err(CE_NOTE, "vnet%d Could "
2152 						    "not initialize virtual "
2153 						    "switch port.",
2154 						    vgenp->instance);
2155 						mutex_exit(&vgenp->lock);
2156 						return (MDEG_FAILURE);
2157 					}
2158 					break;
2159 				}
2160 			}
2161 		}
2162 		if (vsw_idx == -1) {
2163 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2164 			mutex_exit(&vgenp->lock);
2165 			return (MDEG_FAILURE);
2166 		}
2167 	}
2168 
2169 	for (idx = 0; idx < resp->added.nelem; idx++) {
2170 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2171 			continue;
2172 
2173 		/* If this port can't be added just skip it. */
2174 		(void) vgen_add_port(vgenp, resp->added.mdp,
2175 		    resp->added.mdep[idx]);
2176 	}
2177 
2178 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2179 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2180 		    resp->match_curr.mdep[idx],
2181 		    resp->match_prev.mdp,
2182 		    resp->match_prev.mdep[idx]);
2183 	}
2184 
2185 	mutex_exit(&vgenp->lock);
2186 	DBG1(vgenp, NULL, "exit\n");
2187 	return (MDEG_SUCCESS);
2188 }
2189 
2190 /* mdeg callback function for the vnet node */
2191 static int
2192 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2193 {
2194 	vgen_t		*vgenp;
2195 	vnet_t		*vnetp;
2196 	md_t		*mdp;
2197 	mde_cookie_t	node;
2198 	uint64_t	inst;
2199 	char		*node_name = NULL;
2200 
2201 	if ((resp == NULL) || (cb_argp == NULL)) {
2202 		return (MDEG_FAILURE);
2203 	}
2204 
2205 	vgenp = (vgen_t *)cb_argp;
2206 	vnetp = vgenp->vnetp;
2207 
2208 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2209 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2210 	    resp->match_curr.nelem, resp->match_prev.nelem);
2211 
2212 	mutex_enter(&vgenp->lock);
2213 
2214 	/*
2215 	 * We get an initial callback for this node as 'added' after
2216 	 * registering with mdeg. Note that we would have already gathered
2217 	 * information about this vnet node by walking MD earlier during attach
2218 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2219 	 * of this node might have changed when we get this initial 'added'
2220 	 * callback. We handle this as if an update occured and invoke the same
2221 	 * function which handles updates to the properties of this vnet-node
2222 	 * if any. A non-zero 'match' value indicates that the MD has been
2223 	 * updated and that a 'network' node is present which may or may not
2224 	 * have been updated. It is up to the clients to examine their own
2225 	 * nodes and determine if they have changed.
2226 	 */
2227 	if (resp->added.nelem != 0) {
2228 
2229 		if (resp->added.nelem != 1) {
2230 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2231 			    "invalid: %d\n", vnetp->instance,
2232 			    resp->added.nelem);
2233 			goto vgen_mdeg_cb_err;
2234 		}
2235 
2236 		mdp = resp->added.mdp;
2237 		node = resp->added.mdep[0];
2238 
2239 	} else if (resp->match_curr.nelem != 0) {
2240 
2241 		if (resp->match_curr.nelem != 1) {
2242 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2243 			    "invalid: %d\n", vnetp->instance,
2244 			    resp->match_curr.nelem);
2245 			goto vgen_mdeg_cb_err;
2246 		}
2247 
2248 		mdp = resp->match_curr.mdp;
2249 		node = resp->match_curr.mdep[0];
2250 
2251 	} else {
2252 		goto vgen_mdeg_cb_err;
2253 	}
2254 
2255 	/* Validate name and instance */
2256 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2257 		DERR(vgenp, NULL, "unable to get node name\n");
2258 		goto vgen_mdeg_cb_err;
2259 	}
2260 
2261 	/* is this a virtual-network device? */
2262 	if (strcmp(node_name, vnet_propname) != 0) {
2263 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2264 		goto vgen_mdeg_cb_err;
2265 	}
2266 
2267 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2268 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2269 		goto vgen_mdeg_cb_err;
2270 	}
2271 
2272 	/* is this the right instance of vnet? */
2273 	if (inst != vgenp->regprop) {
2274 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2275 		goto vgen_mdeg_cb_err;
2276 	}
2277 
2278 	vgen_update_md_prop(vgenp, mdp, node);
2279 
2280 	mutex_exit(&vgenp->lock);
2281 	return (MDEG_SUCCESS);
2282 
2283 vgen_mdeg_cb_err:
2284 	mutex_exit(&vgenp->lock);
2285 	return (MDEG_FAILURE);
2286 }
2287 
2288 /*
2289  * Check to see if the relevant properties in the specified node have
2290  * changed, and if so take the appropriate action.
2291  */
2292 static void
2293 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2294 {
2295 	uint16_t	pvid;
2296 	uint16_t	*vids;
2297 	uint16_t	nvids;
2298 	vnet_t		*vnetp = vgenp->vnetp;
2299 	uint32_t	mtu;
2300 	boolean_t	pls_update;
2301 	enum		{ MD_init = 0x1,
2302 			    MD_vlans = 0x2,
2303 			    MD_mtu = 0x4,
2304 			    MD_pls = 0x8 } updated;
2305 	int		rv;
2306 
2307 	updated = MD_init;
2308 
2309 	/* Read the vlan ids */
2310 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2311 	    &nvids, NULL);
2312 
2313 	/* Determine if there are any vlan id updates */
2314 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2315 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2316 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2317 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2318 		updated |= MD_vlans;
2319 	}
2320 
2321 	/* Read mtu */
2322 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2323 	if (mtu != vnetp->mtu) {
2324 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2325 			updated |= MD_mtu;
2326 		} else {
2327 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2328 			    " as the specified value:%d is invalid\n",
2329 			    vnetp->instance, mtu);
2330 		}
2331 	}
2332 
2333 	/*
2334 	 * Read the 'linkprop' property.
2335 	 */
2336 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2337 	if (pls_update != vnetp->pls_update) {
2338 		updated |= MD_pls;
2339 	}
2340 
2341 	/* Now process the updated props */
2342 
2343 	if (updated & MD_vlans) {
2344 
2345 		/* save the new vlan ids */
2346 		vnetp->pvid = pvid;
2347 		if (vnetp->nvids != 0) {
2348 			kmem_free(vnetp->vids,
2349 			    sizeof (uint16_t) * vnetp->nvids);
2350 			vnetp->nvids = 0;
2351 		}
2352 		if (nvids != 0) {
2353 			vnetp->nvids = nvids;
2354 			vnetp->vids = vids;
2355 		}
2356 
2357 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2358 		vgen_reset_vlan_unaware_ports(vgenp);
2359 
2360 	} else {
2361 
2362 		if (nvids != 0) {
2363 			kmem_free(vids, sizeof (uint16_t) * nvids);
2364 		}
2365 	}
2366 
2367 	if (updated & MD_mtu) {
2368 
2369 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2370 		    vnetp->mtu, mtu);
2371 
2372 		rv = vnet_mtu_update(vnetp, mtu);
2373 		if (rv == 0) {
2374 			vgenp->max_frame_size = mtu +
2375 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2376 		}
2377 	}
2378 
2379 	if (updated & MD_pls) {
2380 		/* enable/disable physical link state updates */
2381 		vnetp->pls_update = pls_update;
2382 		mutex_exit(&vgenp->lock);
2383 
2384 		/* reset vsw-port to re-negotiate with the updated prop. */
2385 		vgen_reset_vsw_port(vgenp);
2386 
2387 		mutex_enter(&vgenp->lock);
2388 	}
2389 }
2390 
2391 /* add a new port to the device */
2392 static int
2393 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2394 {
2395 	vgen_port_t	*portp;
2396 	int		rv;
2397 
2398 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2399 
2400 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2401 	if (rv != DDI_SUCCESS) {
2402 		KMEM_FREE(portp);
2403 		return (DDI_FAILURE);
2404 	}
2405 
2406 	rv = vgen_port_attach(portp);
2407 	if (rv != DDI_SUCCESS) {
2408 		return (DDI_FAILURE);
2409 	}
2410 
2411 	return (DDI_SUCCESS);
2412 }
2413 
2414 /* read properties of the port from its md node */
2415 static int
2416 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2417 	mde_cookie_t mdex)
2418 {
2419 	uint64_t		port_num;
2420 	uint64_t		*ldc_ids;
2421 	uint64_t		macaddr;
2422 	uint64_t		val;
2423 	int			num_ldcs;
2424 	int			i;
2425 	int			addrsz;
2426 	int			num_nodes = 0;
2427 	int			listsz = 0;
2428 	mde_cookie_t		*listp = NULL;
2429 	uint8_t			*addrp;
2430 	struct ether_addr	ea;
2431 
2432 	/* read "id" property to get the port number */
2433 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2434 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2435 		return (DDI_FAILURE);
2436 	}
2437 
2438 	/*
2439 	 * Find the channel endpoint node(s) under this port node.
2440 	 */
2441 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2442 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2443 		    num_nodes);
2444 		return (DDI_FAILURE);
2445 	}
2446 
2447 	/* allocate space for node list */
2448 	listsz = num_nodes * sizeof (mde_cookie_t);
2449 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2450 	if (listp == NULL)
2451 		return (DDI_FAILURE);
2452 
2453 	num_ldcs = md_scan_dag(mdp, mdex,
2454 	    md_find_name(mdp, channel_propname),
2455 	    md_find_name(mdp, "fwd"), listp);
2456 
2457 	if (num_ldcs <= 0) {
2458 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2459 		kmem_free(listp, listsz);
2460 		return (DDI_FAILURE);
2461 	}
2462 
2463 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2464 
2465 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2466 	if (ldc_ids == NULL) {
2467 		kmem_free(listp, listsz);
2468 		return (DDI_FAILURE);
2469 	}
2470 
2471 	for (i = 0; i < num_ldcs; i++) {
2472 		/* read channel ids */
2473 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2474 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2475 			    id_propname);
2476 			kmem_free(listp, listsz);
2477 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2478 			return (DDI_FAILURE);
2479 		}
2480 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2481 	}
2482 
2483 	kmem_free(listp, listsz);
2484 
2485 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2486 	    &addrsz)) {
2487 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2488 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2489 		return (DDI_FAILURE);
2490 	}
2491 
2492 	if (addrsz < ETHERADDRL) {
2493 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2494 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2495 		return (DDI_FAILURE);
2496 	}
2497 
2498 	macaddr = *((uint64_t *)addrp);
2499 
2500 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2501 
2502 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2503 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2504 		macaddr >>= 8;
2505 	}
2506 
2507 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2508 		if (val == 0) {
2509 			/* This port is connected to the vswitch */
2510 			portp->is_vsw_port = B_TRUE;
2511 		} else {
2512 			portp->is_vsw_port = B_FALSE;
2513 		}
2514 	}
2515 
2516 	/* now update all properties into the port */
2517 	portp->vgenp = vgenp;
2518 	portp->port_num = port_num;
2519 	ether_copy(&ea, &portp->macaddr);
2520 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2521 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2522 	portp->num_ldcs = num_ldcs;
2523 
2524 	/* read vlan id properties of this port node */
2525 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2526 	    &portp->vids, &portp->nvids, NULL);
2527 
2528 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2529 
2530 	return (DDI_SUCCESS);
2531 }
2532 
2533 /* remove a port from the device */
2534 static int
2535 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2536 {
2537 	uint64_t	port_num;
2538 	vgen_port_t	*portp;
2539 	vgen_portlist_t	*plistp;
2540 
2541 	/* read "id" property to get the port number */
2542 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2543 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2544 		return (DDI_FAILURE);
2545 	}
2546 
2547 	plistp = &(vgenp->vgenports);
2548 
2549 	WRITE_ENTER(&plistp->rwlock);
2550 	portp = vgen_port_lookup(plistp, (int)port_num);
2551 	if (portp == NULL) {
2552 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2553 		RW_EXIT(&plistp->rwlock);
2554 		return (DDI_FAILURE);
2555 	}
2556 
2557 	vgen_port_detach_mdeg(portp);
2558 	RW_EXIT(&plistp->rwlock);
2559 
2560 	return (DDI_SUCCESS);
2561 }
2562 
2563 /* attach a port to the device based on mdeg data */
2564 static int
2565 vgen_port_attach(vgen_port_t *portp)
2566 {
2567 	int			i;
2568 	vgen_portlist_t		*plistp;
2569 	vgen_t			*vgenp;
2570 	uint64_t		*ldcids;
2571 	uint32_t		num_ldcs;
2572 	mac_register_t		*macp;
2573 	vio_net_res_type_t	type;
2574 	int			rv;
2575 
2576 	ASSERT(portp != NULL);
2577 
2578 	vgenp = portp->vgenp;
2579 	ldcids = portp->ldc_ids;
2580 	num_ldcs = portp->num_ldcs;
2581 
2582 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2583 
2584 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2585 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2586 	portp->ldclist.headp = NULL;
2587 
2588 	for (i = 0; i < num_ldcs; i++) {
2589 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2590 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2591 			vgen_port_detach(portp);
2592 			return (DDI_FAILURE);
2593 		}
2594 	}
2595 
2596 	/* create vlan id hash table */
2597 	vgen_vlan_create_hash(portp);
2598 
2599 	if (portp->is_vsw_port == B_TRUE) {
2600 		/* This port is connected to the switch port */
2601 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2602 		type = VIO_NET_RES_LDC_SERVICE;
2603 	} else {
2604 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2605 		type = VIO_NET_RES_LDC_GUEST;
2606 	}
2607 
2608 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2609 		vgen_port_detach(portp);
2610 		return (DDI_FAILURE);
2611 	}
2612 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2613 	macp->m_driver = portp;
2614 	macp->m_dip = vgenp->vnetdip;
2615 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2616 	macp->m_callbacks = &vgen_m_callbacks;
2617 	macp->m_min_sdu = 0;
2618 	macp->m_max_sdu = ETHERMTU;
2619 
2620 	mutex_enter(&portp->lock);
2621 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2622 	    portp->macaddr, &portp->vhp, &portp->vcb);
2623 	mutex_exit(&portp->lock);
2624 	mac_free(macp);
2625 
2626 	if (rv == 0) {
2627 		/* link it into the list of ports */
2628 		plistp = &(vgenp->vgenports);
2629 		WRITE_ENTER(&plistp->rwlock);
2630 		vgen_port_list_insert(portp);
2631 		RW_EXIT(&plistp->rwlock);
2632 
2633 		if (portp->is_vsw_port == B_TRUE) {
2634 			/* We now have the vswitch port attached */
2635 			vgenp->vsw_portp = portp;
2636 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2637 		}
2638 	} else {
2639 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2640 		    portp);
2641 		vgen_port_detach(portp);
2642 	}
2643 
2644 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2645 	return (DDI_SUCCESS);
2646 }
2647 
2648 /* detach a port from the device based on mdeg data */
2649 static void
2650 vgen_port_detach_mdeg(vgen_port_t *portp)
2651 {
2652 	vgen_t *vgenp = portp->vgenp;
2653 
2654 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2655 
2656 	mutex_enter(&portp->lock);
2657 
2658 	/* stop the port if needed */
2659 	if (portp->flags & VGEN_STARTED) {
2660 		vgen_port_uninit(portp);
2661 	}
2662 
2663 	mutex_exit(&portp->lock);
2664 	vgen_port_detach(portp);
2665 
2666 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2667 }
2668 
2669 static int
2670 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2671 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2672 {
2673 	uint64_t	cport_num;
2674 	uint64_t	pport_num;
2675 	vgen_portlist_t	*plistp;
2676 	vgen_port_t	*portp;
2677 	boolean_t	updated_vlans = B_FALSE;
2678 	uint16_t	pvid;
2679 	uint16_t	*vids;
2680 	uint16_t	nvids;
2681 
2682 	/*
2683 	 * For now, we get port updates only if vlan ids changed.
2684 	 * We read the port num and do some sanity check.
2685 	 */
2686 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2687 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2688 		return (DDI_FAILURE);
2689 	}
2690 
2691 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2692 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2693 		return (DDI_FAILURE);
2694 	}
2695 	if (cport_num != pport_num)
2696 		return (DDI_FAILURE);
2697 
2698 	plistp = &(vgenp->vgenports);
2699 
2700 	READ_ENTER(&plistp->rwlock);
2701 
2702 	portp = vgen_port_lookup(plistp, (int)cport_num);
2703 	if (portp == NULL) {
2704 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2705 		RW_EXIT(&plistp->rwlock);
2706 		return (DDI_FAILURE);
2707 	}
2708 
2709 	/* Read the vlan ids */
2710 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2711 	    &nvids, NULL);
2712 
2713 	/* Determine if there are any vlan id updates */
2714 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2715 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2716 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2717 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2718 		updated_vlans = B_TRUE;
2719 	}
2720 
2721 	if (updated_vlans == B_FALSE) {
2722 		RW_EXIT(&plistp->rwlock);
2723 		return (DDI_FAILURE);
2724 	}
2725 
2726 	/* remove the port from vlans it has been assigned to */
2727 	vgen_vlan_remove_ids(portp);
2728 
2729 	/* save the new vlan ids */
2730 	portp->pvid = pvid;
2731 	if (portp->nvids != 0) {
2732 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2733 		portp->nvids = 0;
2734 	}
2735 	if (nvids != 0) {
2736 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2737 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2738 		portp->nvids = nvids;
2739 		kmem_free(vids, sizeof (uint16_t) * nvids);
2740 	}
2741 
2742 	/* add port to the new vlans */
2743 	vgen_vlan_add_ids(portp);
2744 
2745 	/* reset the port if it is vlan unaware (ver < 1.3) */
2746 	vgen_vlan_unaware_port_reset(portp);
2747 
2748 	RW_EXIT(&plistp->rwlock);
2749 
2750 	return (DDI_SUCCESS);
2751 }
2752 
2753 static uint64_t
2754 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2755 {
2756 	vgen_ldclist_t	*ldclp;
2757 	vgen_ldc_t *ldcp;
2758 	uint64_t	val;
2759 
2760 	val = 0;
2761 	ldclp = &portp->ldclist;
2762 
2763 	READ_ENTER(&ldclp->rwlock);
2764 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2765 		val += vgen_ldc_stat(ldcp, stat);
2766 	}
2767 	RW_EXIT(&ldclp->rwlock);
2768 
2769 	return (val);
2770 }
2771 
2772 /* allocate receive resources */
2773 static int
2774 vgen_init_multipools(vgen_ldc_t *ldcp)
2775 {
2776 	size_t		data_sz;
2777 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2778 	int		status;
2779 	uint32_t	sz1 = 0;
2780 	uint32_t	sz2 = 0;
2781 	uint32_t	sz3 = 0;
2782 	uint32_t	sz4 = 0;
2783 
2784 	/*
2785 	 * We round up the mtu specified to be a multiple of 2K.
2786 	 * We then create rx pools based on the rounded up size.
2787 	 */
2788 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2789 	data_sz = VNET_ROUNDUP_2K(data_sz);
2790 
2791 	/*
2792 	 * If pool sizes are specified, use them. Note that the presence of
2793 	 * the first tunable will be used as a hint.
2794 	 */
2795 	if (vgen_rbufsz1 != 0) {
2796 
2797 		sz1 = vgen_rbufsz1;
2798 		sz2 = vgen_rbufsz2;
2799 		sz3 = vgen_rbufsz3;
2800 		sz4 = vgen_rbufsz4;
2801 
2802 		if (sz4 == 0) { /* need 3 pools */
2803 
2804 			ldcp->max_rxpool_size = sz3;
2805 			status = vio_init_multipools(&ldcp->vmp,
2806 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2807 			    vgen_nrbufs2, vgen_nrbufs3);
2808 
2809 		} else {
2810 
2811 			ldcp->max_rxpool_size = sz4;
2812 			status = vio_init_multipools(&ldcp->vmp,
2813 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2814 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2815 			    vgen_nrbufs4);
2816 		}
2817 		return (status);
2818 	}
2819 
2820 	/*
2821 	 * Pool sizes are not specified. We select the pool sizes based on the
2822 	 * mtu if vnet_jumbo_rxpools is enabled.
2823 	 */
2824 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2825 		/*
2826 		 * Receive buffer pool allocation based on mtu is disabled.
2827 		 * Use the default mechanism of standard size pool allocation.
2828 		 */
2829 		sz1 = VGEN_DBLK_SZ_128;
2830 		sz2 = VGEN_DBLK_SZ_256;
2831 		sz3 = VGEN_DBLK_SZ_2048;
2832 		ldcp->max_rxpool_size = sz3;
2833 
2834 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2835 		    sz1, sz2, sz3,
2836 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2837 
2838 		return (status);
2839 	}
2840 
2841 	switch (data_sz) {
2842 
2843 	case VNET_4K:
2844 
2845 		sz1 = VGEN_DBLK_SZ_128;
2846 		sz2 = VGEN_DBLK_SZ_256;
2847 		sz3 = VGEN_DBLK_SZ_2048;
2848 		sz4 = sz3 << 1;			/* 4K */
2849 		ldcp->max_rxpool_size = sz4;
2850 
2851 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2852 		    sz1, sz2, sz3, sz4,
2853 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2854 		break;
2855 
2856 	default:	/* data_sz:  4K+ to 16K */
2857 
2858 		sz1 = VGEN_DBLK_SZ_256;
2859 		sz2 = VGEN_DBLK_SZ_2048;
2860 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2861 		sz4 = data_sz;		/* Jumbo-size  */
2862 		ldcp->max_rxpool_size = sz4;
2863 
2864 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2865 		    sz1, sz2, sz3, sz4,
2866 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2867 		break;
2868 
2869 	}
2870 
2871 	return (status);
2872 }
2873 
2874 /* attach the channel corresponding to the given ldc_id to the port */
2875 static int
2876 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2877 {
2878 	vgen_t 		*vgenp;
2879 	vgen_ldclist_t	*ldclp;
2880 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2881 	ldc_attr_t 	attr;
2882 	int 		status;
2883 	ldc_status_t	istatus;
2884 	char		kname[MAXNAMELEN];
2885 	int		instance;
2886 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2887 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2888 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2889 		AST_create_rxmblks = 0x20,
2890 		AST_create_rcv_thread = 0x40} attach_state;
2891 
2892 	attach_state = AST_init;
2893 	vgenp = portp->vgenp;
2894 	ldclp = &portp->ldclist;
2895 
2896 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2897 	if (ldcp == NULL) {
2898 		goto ldc_attach_failed;
2899 	}
2900 	ldcp->ldc_id = ldc_id;
2901 	ldcp->portp = portp;
2902 
2903 	attach_state |= AST_ldc_alloc;
2904 
2905 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2906 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2907 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2908 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2909 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2910 
2911 	attach_state |= AST_mutex_init;
2912 
2913 	attr.devclass = LDC_DEV_NT;
2914 	attr.instance = vgenp->instance;
2915 	attr.mode = LDC_MODE_UNRELIABLE;
2916 	attr.mtu = vnet_ldc_mtu;
2917 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2918 	if (status != 0) {
2919 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2920 		goto ldc_attach_failed;
2921 	}
2922 	attach_state |= AST_ldc_init;
2923 
2924 	if (vgen_rcv_thread_enabled) {
2925 		ldcp->rcv_thr_flags = 0;
2926 
2927 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2928 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2929 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2930 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2931 
2932 		attach_state |= AST_create_rcv_thread;
2933 		if (ldcp->rcv_thread == NULL) {
2934 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2935 			goto ldc_attach_failed;
2936 		}
2937 	}
2938 
2939 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2940 	if (status != 0) {
2941 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2942 		    status);
2943 		goto ldc_attach_failed;
2944 	}
2945 	/*
2946 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2947 	 * data msgs, including raw data msgs used to recv priority frames.
2948 	 */
2949 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2950 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2951 	attach_state |= AST_ldc_reg_cb;
2952 
2953 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2954 	ASSERT(istatus == LDC_INIT);
2955 	ldcp->ldc_status = istatus;
2956 
2957 	/* allocate transmit resources */
2958 	status = vgen_alloc_tx_ring(ldcp);
2959 	if (status != 0) {
2960 		goto ldc_attach_failed;
2961 	}
2962 	attach_state |= AST_alloc_tx_ring;
2963 
2964 	/* allocate receive resources */
2965 	status = vgen_init_multipools(ldcp);
2966 	if (status != 0) {
2967 		/*
2968 		 * We do not return failure if receive mblk pools can't be
2969 		 * allocated; instead allocb(9F) will be used to dynamically
2970 		 * allocate buffers during receive.
2971 		 */
2972 		DWARN(vgenp, ldcp,
2973 		    "vnet%d: status(%d), failed to allocate rx mblk pools for "
2974 		    "channel(0x%lx)\n",
2975 		    vgenp->instance, status, ldcp->ldc_id);
2976 	} else {
2977 		attach_state |= AST_create_rxmblks;
2978 	}
2979 
2980 	/* Setup kstats for the channel */
2981 	instance = vgenp->instance;
2982 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2983 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2984 	if (ldcp->ksp == NULL) {
2985 		goto ldc_attach_failed;
2986 	}
2987 
2988 	/* initialize vgen_versions supported */
2989 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2990 	vgen_reset_vnet_proto_ops(ldcp);
2991 
2992 	/* link it into the list of channels for this port */
2993 	WRITE_ENTER(&ldclp->rwlock);
2994 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2995 	ldcp->nextp = *prev_ldcp;
2996 	*prev_ldcp = ldcp;
2997 	RW_EXIT(&ldclp->rwlock);
2998 
2999 	ldcp->link_state = LINK_STATE_UNKNOWN;
3000 #ifdef	VNET_IOC_DEBUG
3001 	ldcp->link_down_forced = B_FALSE;
3002 #endif
3003 	ldcp->flags |= CHANNEL_ATTACHED;
3004 	return (DDI_SUCCESS);
3005 
3006 ldc_attach_failed:
3007 	if (attach_state & AST_ldc_reg_cb) {
3008 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3009 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3010 	}
3011 	if (attach_state & AST_create_rcv_thread) {
3012 		if (ldcp->rcv_thread != NULL) {
3013 			vgen_stop_rcv_thread(ldcp);
3014 		}
3015 		mutex_destroy(&ldcp->rcv_thr_lock);
3016 		cv_destroy(&ldcp->rcv_thr_cv);
3017 	}
3018 	if (attach_state & AST_create_rxmblks) {
3019 		vio_mblk_pool_t *fvmp = NULL;
3020 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
3021 		ASSERT(fvmp == NULL);
3022 	}
3023 	if (attach_state & AST_alloc_tx_ring) {
3024 		vgen_free_tx_ring(ldcp);
3025 	}
3026 	if (attach_state & AST_ldc_init) {
3027 		(void) ldc_fini(ldcp->ldc_handle);
3028 	}
3029 	if (attach_state & AST_mutex_init) {
3030 		mutex_destroy(&ldcp->tclock);
3031 		mutex_destroy(&ldcp->txlock);
3032 		mutex_destroy(&ldcp->cblock);
3033 		mutex_destroy(&ldcp->wrlock);
3034 		mutex_destroy(&ldcp->rxlock);
3035 	}
3036 	if (attach_state & AST_ldc_alloc) {
3037 		KMEM_FREE(ldcp);
3038 	}
3039 	return (DDI_FAILURE);
3040 }
3041 
3042 /* detach a channel from the port */
3043 static void
3044 vgen_ldc_detach(vgen_ldc_t *ldcp)
3045 {
3046 	vgen_port_t	*portp;
3047 	vgen_t 		*vgenp;
3048 	vgen_ldc_t 	*pldcp;
3049 	vgen_ldc_t	**prev_ldcp;
3050 	vgen_ldclist_t	*ldclp;
3051 
3052 	portp = ldcp->portp;
3053 	vgenp = portp->vgenp;
3054 	ldclp = &portp->ldclist;
3055 
3056 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
3057 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
3058 		if (pldcp == ldcp) {
3059 			break;
3060 		}
3061 	}
3062 
3063 	if (pldcp == NULL) {
3064 		/* invalid ldcp? */
3065 		return;
3066 	}
3067 
3068 	if (ldcp->ldc_status != LDC_INIT) {
3069 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
3070 	}
3071 
3072 	if (ldcp->flags & CHANNEL_ATTACHED) {
3073 		ldcp->flags &= ~(CHANNEL_ATTACHED);
3074 
3075 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3076 		if (ldcp->rcv_thread != NULL) {
3077 			/* First stop the receive thread */
3078 			vgen_stop_rcv_thread(ldcp);
3079 			mutex_destroy(&ldcp->rcv_thr_lock);
3080 			cv_destroy(&ldcp->rcv_thr_cv);
3081 		}
3082 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3083 
3084 		vgen_destroy_kstats(ldcp->ksp);
3085 		ldcp->ksp = NULL;
3086 
3087 		/*
3088 		 * if we cannot reclaim all mblks, put this
3089 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
3090 		 * device gets detached (see vgen_uninit()).
3091 		 */
3092 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
3093 
3094 		/* free transmit resources */
3095 		vgen_free_tx_ring(ldcp);
3096 
3097 		(void) ldc_fini(ldcp->ldc_handle);
3098 		mutex_destroy(&ldcp->tclock);
3099 		mutex_destroy(&ldcp->txlock);
3100 		mutex_destroy(&ldcp->cblock);
3101 		mutex_destroy(&ldcp->wrlock);
3102 		mutex_destroy(&ldcp->rxlock);
3103 
3104 		/* unlink it from the list */
3105 		*prev_ldcp = ldcp->nextp;
3106 		KMEM_FREE(ldcp);
3107 	}
3108 }
3109 
3110 /*
3111  * This function allocates transmit resources for the channel.
3112  * The resources consist of a transmit descriptor ring and an associated
3113  * transmit buffer ring.
3114  */
3115 static int
3116 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
3117 {
3118 	void *tbufp;
3119 	ldc_mem_info_t minfo;
3120 	uint32_t txdsize;
3121 	uint32_t tbufsize;
3122 	int status;
3123 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3124 
3125 	ldcp->num_txds = vnet_ntxds;
3126 	txdsize = sizeof (vnet_public_desc_t);
3127 	tbufsize = sizeof (vgen_private_desc_t);
3128 
3129 	/* allocate transmit buffer ring */
3130 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
3131 	if (tbufp == NULL) {
3132 		return (DDI_FAILURE);
3133 	}
3134 
3135 	/* create transmit descriptor ring */
3136 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
3137 	    &ldcp->tx_dhandle);
3138 	if (status) {
3139 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
3140 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3141 		return (DDI_FAILURE);
3142 	}
3143 
3144 	/* get the addr of descripror ring */
3145 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
3146 	if (status) {
3147 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3148 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3149 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3150 		ldcp->tbufp = NULL;
3151 		return (DDI_FAILURE);
3152 	}
3153 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3154 	ldcp->tbufp = tbufp;
3155 
3156 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3157 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3158 
3159 	return (DDI_SUCCESS);
3160 }
3161 
3162 /* Free transmit resources for the channel */
3163 static void
3164 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3165 {
3166 	int tbufsize = sizeof (vgen_private_desc_t);
3167 
3168 	/* free transmit descriptor ring */
3169 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3170 
3171 	/* free transmit buffer ring */
3172 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3173 	ldcp->txdp = ldcp->txdendp = NULL;
3174 	ldcp->tbufp = ldcp->tbufendp = NULL;
3175 }
3176 
3177 /* enable transmit/receive on the channels for the port */
3178 static void
3179 vgen_init_ldcs(vgen_port_t *portp)
3180 {
3181 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3182 	vgen_ldc_t	*ldcp;
3183 
3184 	READ_ENTER(&ldclp->rwlock);
3185 	ldcp =  ldclp->headp;
3186 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3187 		(void) vgen_ldc_init(ldcp);
3188 	}
3189 	RW_EXIT(&ldclp->rwlock);
3190 }
3191 
3192 /* stop transmit/receive on the channels for the port */
3193 static void
3194 vgen_uninit_ldcs(vgen_port_t *portp)
3195 {
3196 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3197 	vgen_ldc_t	*ldcp;
3198 
3199 	READ_ENTER(&ldclp->rwlock);
3200 	ldcp =  ldclp->headp;
3201 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3202 		vgen_ldc_uninit(ldcp);
3203 	}
3204 	RW_EXIT(&ldclp->rwlock);
3205 }
3206 
3207 /* enable transmit/receive on the channel */
3208 static int
3209 vgen_ldc_init(vgen_ldc_t *ldcp)
3210 {
3211 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3212 	ldc_status_t	istatus;
3213 	int		rv;
3214 	uint32_t	retries = 0;
3215 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3216 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3217 	init_state = ST_init;
3218 
3219 	DBG1(vgenp, ldcp, "enter\n");
3220 	LDC_LOCK(ldcp);
3221 
3222 	rv = ldc_open(ldcp->ldc_handle);
3223 	if (rv != 0) {
3224 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3225 		goto ldcinit_failed;
3226 	}
3227 	init_state |= ST_ldc_open;
3228 
3229 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3230 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3231 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3232 		goto ldcinit_failed;
3233 	}
3234 	ldcp->ldc_status = istatus;
3235 
3236 	rv = vgen_init_tbufs(ldcp);
3237 	if (rv != 0) {
3238 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3239 		goto ldcinit_failed;
3240 	}
3241 	init_state |= ST_init_tbufs;
3242 
3243 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3244 	if (rv != 0) {
3245 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3246 		goto ldcinit_failed;
3247 	}
3248 
3249 	init_state |= ST_cb_enable;
3250 
3251 	do {
3252 		rv = ldc_up(ldcp->ldc_handle);
3253 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3254 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3255 			drv_usecwait(VGEN_LDC_UP_DELAY);
3256 		}
3257 		if (retries++ >= vgen_ldcup_retries)
3258 			break;
3259 	} while (rv == EWOULDBLOCK);
3260 
3261 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3262 	if (istatus == LDC_UP) {
3263 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3264 	}
3265 
3266 	ldcp->ldc_status = istatus;
3267 
3268 	/* initialize transmit watchdog timeout */
3269 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3270 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3271 
3272 	ldcp->hphase = -1;
3273 	ldcp->flags |= CHANNEL_STARTED;
3274 
3275 	/* if channel is already UP - start handshake */
3276 	if (istatus == LDC_UP) {
3277 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3278 		if (ldcp->portp != vgenp->vsw_portp) {
3279 			/*
3280 			 * As the channel is up, use this port from now on.
3281 			 */
3282 			(void) atomic_swap_32(
3283 			    &ldcp->portp->use_vsw_port, B_FALSE);
3284 		}
3285 
3286 		/* Initialize local session id */
3287 		ldcp->local_sid = ddi_get_lbolt();
3288 
3289 		/* clear peer session id */
3290 		ldcp->peer_sid = 0;
3291 		ldcp->hretries = 0;
3292 
3293 		/* Initiate Handshake process with peer ldc endpoint */
3294 		vgen_reset_hphase(ldcp);
3295 
3296 		mutex_exit(&ldcp->tclock);
3297 		mutex_exit(&ldcp->txlock);
3298 		mutex_exit(&ldcp->wrlock);
3299 		mutex_exit(&ldcp->rxlock);
3300 		vgen_handshake(vh_nextphase(ldcp));
3301 		mutex_exit(&ldcp->cblock);
3302 	} else {
3303 		LDC_UNLOCK(ldcp);
3304 	}
3305 
3306 	return (DDI_SUCCESS);
3307 
3308 ldcinit_failed:
3309 	if (init_state & ST_cb_enable) {
3310 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3311 	}
3312 	if (init_state & ST_init_tbufs) {
3313 		vgen_uninit_tbufs(ldcp);
3314 	}
3315 	if (init_state & ST_ldc_open) {
3316 		(void) ldc_close(ldcp->ldc_handle);
3317 	}
3318 	LDC_UNLOCK(ldcp);
3319 	DBG1(vgenp, ldcp, "exit\n");
3320 	return (DDI_FAILURE);
3321 }
3322 
3323 /* stop transmit/receive on the channel */
3324 static void
3325 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3326 {
3327 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3328 	int	rv;
3329 	uint_t	retries = 0;
3330 
3331 	DBG1(vgenp, ldcp, "enter\n");
3332 	LDC_LOCK(ldcp);
3333 
3334 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3335 		LDC_UNLOCK(ldcp);
3336 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3337 		return;
3338 	}
3339 
3340 	/* disable further callbacks */
3341 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3342 	if (rv != 0) {
3343 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3344 	}
3345 
3346 	/*
3347 	 * clear handshake done bit and wait for pending tx and cb to finish.
3348 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3349 	 */
3350 	ldcp->hphase &= ~(VH_DONE);
3351 	LDC_UNLOCK(ldcp);
3352 
3353 	if (vgenp->vsw_portp == ldcp->portp) {
3354 		vio_net_report_err_t rep_err =
3355 		    ldcp->portp->vcb.vio_net_report_err;
3356 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3357 	}
3358 
3359 	/* cancel handshake watchdog timeout */
3360 	if (ldcp->htid) {
3361 		(void) untimeout(ldcp->htid);
3362 		ldcp->htid = 0;
3363 	}
3364 
3365 	if (ldcp->cancel_htid) {
3366 		(void) untimeout(ldcp->cancel_htid);
3367 		ldcp->cancel_htid = 0;
3368 	}
3369 
3370 	/* cancel transmit watchdog timeout */
3371 	if (ldcp->wd_tid) {
3372 		(void) untimeout(ldcp->wd_tid);
3373 		ldcp->wd_tid = 0;
3374 	}
3375 
3376 	drv_usecwait(1000);
3377 
3378 	if (ldcp->rcv_thread != NULL) {
3379 		/*
3380 		 * Note that callbacks have been disabled already(above). The
3381 		 * drain function takes care of the condition when an already
3382 		 * executing callback signals the worker to start processing or
3383 		 * the worker has already been signalled and is in the middle of
3384 		 * processing.
3385 		 */
3386 		vgen_drain_rcv_thread(ldcp);
3387 	}
3388 
3389 	/* acquire locks again; any pending transmits and callbacks are done */
3390 	LDC_LOCK(ldcp);
3391 
3392 	vgen_reset_hphase(ldcp);
3393 
3394 	vgen_uninit_tbufs(ldcp);
3395 
3396 	/* close the channel - retry on EAGAIN */
3397 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
3398 		if (++retries > vgen_ldccl_retries) {
3399 			break;
3400 		}
3401 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
3402 	}
3403 	if (rv != 0) {
3404 		cmn_err(CE_NOTE,
3405 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
3406 		    vgenp->instance, rv, ldcp->ldc_id);
3407 	}
3408 
3409 	ldcp->ldc_status = LDC_INIT;
3410 	ldcp->flags &= ~(CHANNEL_STARTED);
3411 
3412 	LDC_UNLOCK(ldcp);
3413 
3414 	DBG1(vgenp, ldcp, "exit\n");
3415 }
3416 
3417 /* Initialize the transmit buffer ring for the channel */
3418 static int
3419 vgen_init_tbufs(vgen_ldc_t *ldcp)
3420 {
3421 	vgen_private_desc_t	*tbufp;
3422 	vnet_public_desc_t	*txdp;
3423 	vio_dring_entry_hdr_t		*hdrp;
3424 	int 			i;
3425 	int 			rv;
3426 	caddr_t			datap = NULL;
3427 	int			ci;
3428 	uint32_t		ncookies;
3429 	size_t			data_sz;
3430 	vgen_t			*vgenp;
3431 
3432 	vgenp = LDC_TO_VGEN(ldcp);
3433 
3434 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3435 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3436 
3437 	/*
3438 	 * In order to ensure that the number of ldc cookies per descriptor is
3439 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3440 	 * outlined below:
3441 	 *
3442 	 * Align the entire data buffer area to 8K and carve out per descriptor
3443 	 * data buffers starting from this 8K aligned base address.
3444 	 *
3445 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3446 	 * For sizes up to 12K we round up the size to the next 2K.
3447 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3448 	 * 14K could end up needing 3 cookies, with the buffer spread across
3449 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3450 	 */
3451 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3452 	if (data_sz <= VNET_12K) {
3453 		data_sz = VNET_ROUNDUP_2K(data_sz);
3454 	} else {
3455 		data_sz = VNET_ROUNDUP_4K(data_sz);
3456 	}
3457 
3458 	/* allocate extra 8K bytes for alignment */
3459 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3460 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3461 	ldcp->tx_datap = datap;
3462 
3463 
3464 	/* align the starting address of the data area to 8K */
3465 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3466 
3467 	/*
3468 	 * for each private descriptor, allocate a ldc mem_handle which is
3469 	 * required to map the data during transmit, set the flags
3470 	 * to free (available for use by transmit routine).
3471 	 */
3472 
3473 	for (i = 0; i < ldcp->num_txds; i++) {
3474 
3475 		tbufp = &(ldcp->tbufp[i]);
3476 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3477 		    &(tbufp->memhandle));
3478 		if (rv) {
3479 			tbufp->memhandle = 0;
3480 			goto init_tbufs_failed;
3481 		}
3482 
3483 		/*
3484 		 * bind ldc memhandle to the corresponding transmit buffer.
3485 		 */
3486 		ci = ncookies = 0;
3487 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3488 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3489 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3490 		if (rv != 0) {
3491 			goto init_tbufs_failed;
3492 		}
3493 
3494 		/*
3495 		 * successful in binding the handle to tx data buffer.
3496 		 * set datap in the private descr to this buffer.
3497 		 */
3498 		tbufp->datap = datap;
3499 
3500 		if ((ncookies == 0) ||
3501 		    (ncookies > MAX_COOKIES)) {
3502 			goto init_tbufs_failed;
3503 		}
3504 
3505 		for (ci = 1; ci < ncookies; ci++) {
3506 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3507 			    &(tbufp->memcookie[ci]));
3508 			if (rv != 0) {
3509 				goto init_tbufs_failed;
3510 			}
3511 		}
3512 
3513 		tbufp->ncookies = ncookies;
3514 		datap += data_sz;
3515 
3516 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3517 		txdp = &(ldcp->txdp[i]);
3518 		hdrp = &txdp->hdr;
3519 		hdrp->dstate = VIO_DESC_FREE;
3520 		hdrp->ack = B_FALSE;
3521 		tbufp->descp = txdp;
3522 
3523 	}
3524 
3525 	/* reset tbuf walking pointers */
3526 	ldcp->next_tbufp = ldcp->tbufp;
3527 	ldcp->cur_tbufp = ldcp->tbufp;
3528 
3529 	/* initialize tx seqnum and index */
3530 	ldcp->next_txseq = VNET_ISS;
3531 	ldcp->next_txi = 0;
3532 
3533 	ldcp->resched_peer = B_TRUE;
3534 	ldcp->resched_peer_txi = 0;
3535 
3536 	return (DDI_SUCCESS);
3537 
3538 init_tbufs_failed:;
3539 	vgen_uninit_tbufs(ldcp);
3540 	return (DDI_FAILURE);
3541 }
3542 
3543 /* Uninitialize transmit buffer ring for the channel */
3544 static void
3545 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3546 {
3547 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3548 	int 			i;
3549 
3550 	/* for each tbuf (priv_desc), free ldc mem_handle */
3551 	for (i = 0; i < ldcp->num_txds; i++) {
3552 
3553 		tbufp = &(ldcp->tbufp[i]);
3554 
3555 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3556 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3557 			tbufp->datap = NULL;
3558 		}
3559 		if (tbufp->memhandle) {
3560 			(void) ldc_mem_free_handle(tbufp->memhandle);
3561 			tbufp->memhandle = 0;
3562 		}
3563 	}
3564 
3565 	if (ldcp->tx_datap) {
3566 		/* prealloc'd tx data buffer */
3567 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3568 		ldcp->tx_datap = NULL;
3569 		ldcp->tx_data_sz = 0;
3570 	}
3571 
3572 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3573 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3574 }
3575 
3576 /* clobber tx descriptor ring */
3577 static void
3578 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3579 {
3580 	vnet_public_desc_t	*txdp;
3581 	vgen_private_desc_t	*tbufp;
3582 	vio_dring_entry_hdr_t	*hdrp;
3583 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3584 	int i;
3585 #ifdef DEBUG
3586 	int ndone = 0;
3587 #endif
3588 
3589 	for (i = 0; i < ldcp->num_txds; i++) {
3590 
3591 		tbufp = &(ldcp->tbufp[i]);
3592 		txdp = tbufp->descp;
3593 		hdrp = &txdp->hdr;
3594 
3595 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3596 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3597 #ifdef DEBUG
3598 			if (hdrp->dstate == VIO_DESC_DONE)
3599 				ndone++;
3600 #endif
3601 			hdrp->dstate = VIO_DESC_FREE;
3602 			hdrp->ack = B_FALSE;
3603 		}
3604 	}
3605 	/* reset tbuf walking pointers */
3606 	ldcp->next_tbufp = ldcp->tbufp;
3607 	ldcp->cur_tbufp = ldcp->tbufp;
3608 
3609 	/* reset tx seqnum and index */
3610 	ldcp->next_txseq = VNET_ISS;
3611 	ldcp->next_txi = 0;
3612 
3613 	ldcp->resched_peer = B_TRUE;
3614 	ldcp->resched_peer_txi = 0;
3615 
3616 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3617 }
3618 
3619 /* clobber receive descriptor ring */
3620 static void
3621 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3622 {
3623 	ldcp->rx_dhandle = 0;
3624 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3625 	ldcp->rxdp = NULL;
3626 	ldcp->next_rxi = 0;
3627 	ldcp->num_rxds = 0;
3628 	ldcp->next_rxseq = VNET_ISS;
3629 }
3630 
3631 /* initialize receive descriptor ring */
3632 static int
3633 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3634 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3635 {
3636 	int rv;
3637 	ldc_mem_info_t minfo;
3638 
3639 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3640 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3641 	if (rv != 0) {
3642 		return (DDI_FAILURE);
3643 	}
3644 
3645 	/*
3646 	 * sucessfully mapped, now try to
3647 	 * get info about the mapped dring
3648 	 */
3649 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3650 	if (rv != 0) {
3651 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3652 		return (DDI_FAILURE);
3653 	}
3654 
3655 	/*
3656 	 * save ring address, number of descriptors.
3657 	 */
3658 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3659 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3660 	ldcp->num_rxdcookies = ncookies;
3661 	ldcp->num_rxds = num_desc;
3662 	ldcp->next_rxi = 0;
3663 	ldcp->next_rxseq = VNET_ISS;
3664 	ldcp->dring_mtype = minfo.mtype;
3665 
3666 	return (DDI_SUCCESS);
3667 }
3668 
3669 /* get channel statistics */
3670 static uint64_t
3671 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3672 {
3673 	vgen_stats_t *statsp;
3674 	uint64_t val;
3675 
3676 	val = 0;
3677 	statsp = &ldcp->stats;
3678 	switch (stat) {
3679 
3680 	case MAC_STAT_MULTIRCV:
3681 		val = statsp->multircv;
3682 		break;
3683 
3684 	case MAC_STAT_BRDCSTRCV:
3685 		val = statsp->brdcstrcv;
3686 		break;
3687 
3688 	case MAC_STAT_MULTIXMT:
3689 		val = statsp->multixmt;
3690 		break;
3691 
3692 	case MAC_STAT_BRDCSTXMT:
3693 		val = statsp->brdcstxmt;
3694 		break;
3695 
3696 	case MAC_STAT_NORCVBUF:
3697 		val = statsp->norcvbuf;
3698 		break;
3699 
3700 	case MAC_STAT_IERRORS:
3701 		val = statsp->ierrors;
3702 		break;
3703 
3704 	case MAC_STAT_NOXMTBUF:
3705 		val = statsp->noxmtbuf;
3706 		break;
3707 
3708 	case MAC_STAT_OERRORS:
3709 		val = statsp->oerrors;
3710 		break;
3711 
3712 	case MAC_STAT_COLLISIONS:
3713 		break;
3714 
3715 	case MAC_STAT_RBYTES:
3716 		val = statsp->rbytes;
3717 		break;
3718 
3719 	case MAC_STAT_IPACKETS:
3720 		val = statsp->ipackets;
3721 		break;
3722 
3723 	case MAC_STAT_OBYTES:
3724 		val = statsp->obytes;
3725 		break;
3726 
3727 	case MAC_STAT_OPACKETS:
3728 		val = statsp->opackets;
3729 		break;
3730 
3731 	/* stats not relevant to ldc, return 0 */
3732 	case MAC_STAT_IFSPEED:
3733 	case ETHER_STAT_ALIGN_ERRORS:
3734 	case ETHER_STAT_FCS_ERRORS:
3735 	case ETHER_STAT_FIRST_COLLISIONS:
3736 	case ETHER_STAT_MULTI_COLLISIONS:
3737 	case ETHER_STAT_DEFER_XMTS:
3738 	case ETHER_STAT_TX_LATE_COLLISIONS:
3739 	case ETHER_STAT_EX_COLLISIONS:
3740 	case ETHER_STAT_MACXMT_ERRORS:
3741 	case ETHER_STAT_CARRIER_ERRORS:
3742 	case ETHER_STAT_TOOLONG_ERRORS:
3743 	case ETHER_STAT_XCVR_ADDR:
3744 	case ETHER_STAT_XCVR_ID:
3745 	case ETHER_STAT_XCVR_INUSE:
3746 	case ETHER_STAT_CAP_1000FDX:
3747 	case ETHER_STAT_CAP_1000HDX:
3748 	case ETHER_STAT_CAP_100FDX:
3749 	case ETHER_STAT_CAP_100HDX:
3750 	case ETHER_STAT_CAP_10FDX:
3751 	case ETHER_STAT_CAP_10HDX:
3752 	case ETHER_STAT_CAP_ASMPAUSE:
3753 	case ETHER_STAT_CAP_PAUSE:
3754 	case ETHER_STAT_CAP_AUTONEG:
3755 	case ETHER_STAT_ADV_CAP_1000FDX:
3756 	case ETHER_STAT_ADV_CAP_1000HDX:
3757 	case ETHER_STAT_ADV_CAP_100FDX:
3758 	case ETHER_STAT_ADV_CAP_100HDX:
3759 	case ETHER_STAT_ADV_CAP_10FDX:
3760 	case ETHER_STAT_ADV_CAP_10HDX:
3761 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3762 	case ETHER_STAT_ADV_CAP_PAUSE:
3763 	case ETHER_STAT_ADV_CAP_AUTONEG:
3764 	case ETHER_STAT_LP_CAP_1000FDX:
3765 	case ETHER_STAT_LP_CAP_1000HDX:
3766 	case ETHER_STAT_LP_CAP_100FDX:
3767 	case ETHER_STAT_LP_CAP_100HDX:
3768 	case ETHER_STAT_LP_CAP_10FDX:
3769 	case ETHER_STAT_LP_CAP_10HDX:
3770 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3771 	case ETHER_STAT_LP_CAP_PAUSE:
3772 	case ETHER_STAT_LP_CAP_AUTONEG:
3773 	case ETHER_STAT_LINK_ASMPAUSE:
3774 	case ETHER_STAT_LINK_PAUSE:
3775 	case ETHER_STAT_LINK_AUTONEG:
3776 	case ETHER_STAT_LINK_DUPLEX:
3777 	default:
3778 		val = 0;
3779 		break;
3780 
3781 	}
3782 	return (val);
3783 }
3784 
3785 /*
3786  * LDC channel is UP, start handshake process with peer.
3787  */
3788 static void
3789 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3790 {
3791 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3792 
3793 	DBG1(vgenp, ldcp, "enter\n");
3794 
3795 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3796 
3797 	if (ldcp->portp != vgenp->vsw_portp) {
3798 		/*
3799 		 * As the channel is up, use this port from now on.
3800 		 */
3801 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3802 	}
3803 
3804 	/* Initialize local session id */
3805 	ldcp->local_sid = ddi_get_lbolt();
3806 
3807 	/* clear peer session id */
3808 	ldcp->peer_sid = 0;
3809 	ldcp->hretries = 0;
3810 
3811 	if (ldcp->hphase != VH_PHASE0) {
3812 		vgen_handshake_reset(ldcp);
3813 	}
3814 
3815 	/* Initiate Handshake process with peer ldc endpoint */
3816 	vgen_handshake(vh_nextphase(ldcp));
3817 
3818 	DBG1(vgenp, ldcp, "exit\n");
3819 }
3820 
3821 /*
3822  * LDC channel is Reset, terminate connection with peer and try to
3823  * bring the channel up again.
3824  */
3825 static void
3826 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3827 {
3828 	ldc_status_t istatus;
3829 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3830 	int	rv;
3831 
3832 	DBG1(vgenp, ldcp, "enter\n");
3833 
3834 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3835 
3836 	if ((ldcp->portp != vgenp->vsw_portp) &&
3837 	    (vgenp->vsw_portp != NULL)) {
3838 		/*
3839 		 * As the channel is down, use the switch port until
3840 		 * the channel becomes ready to be used.
3841 		 */
3842 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3843 	}
3844 
3845 	if (vgenp->vsw_portp == ldcp->portp) {
3846 		vio_net_report_err_t rep_err =
3847 		    ldcp->portp->vcb.vio_net_report_err;
3848 
3849 		/* Post a reset message */
3850 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3851 	}
3852 
3853 	if (ldcp->hphase != VH_PHASE0) {
3854 		vgen_handshake_reset(ldcp);
3855 	}
3856 
3857 	/* try to bring the channel up */
3858 #ifdef	VNET_IOC_DEBUG
3859 	if (ldcp->link_down_forced == B_FALSE) {
3860 		rv = ldc_up(ldcp->ldc_handle);
3861 		if (rv != 0) {
3862 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3863 		}
3864 	}
3865 #else
3866 	rv = ldc_up(ldcp->ldc_handle);
3867 	if (rv != 0) {
3868 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3869 	}
3870 #endif
3871 
3872 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3873 		DWARN(vgenp, ldcp, "ldc_status err\n");
3874 	} else {
3875 		ldcp->ldc_status = istatus;
3876 	}
3877 
3878 	/* if channel is already UP - restart handshake */
3879 	if (ldcp->ldc_status == LDC_UP) {
3880 		vgen_handle_evt_up(ldcp);
3881 	}
3882 
3883 	DBG1(vgenp, ldcp, "exit\n");
3884 }
3885 
3886 /* Interrupt handler for the channel */
3887 static uint_t
3888 vgen_ldc_cb(uint64_t event, caddr_t arg)
3889 {
3890 	_NOTE(ARGUNUSED(event))
3891 	vgen_ldc_t	*ldcp;
3892 	vgen_t		*vgenp;
3893 	ldc_status_t 	istatus;
3894 	vgen_stats_t	*statsp;
3895 	timeout_id_t	cancel_htid = 0;
3896 	uint_t		ret = LDC_SUCCESS;
3897 
3898 	ldcp = (vgen_ldc_t *)arg;
3899 	vgenp = LDC_TO_VGEN(ldcp);
3900 	statsp = &ldcp->stats;
3901 
3902 	DBG1(vgenp, ldcp, "enter\n");
3903 
3904 	mutex_enter(&ldcp->cblock);
3905 	statsp->callbacks++;
3906 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3907 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3908 		    ldcp->ldc_status);
3909 		mutex_exit(&ldcp->cblock);
3910 		return (LDC_SUCCESS);
3911 	}
3912 
3913 	/*
3914 	 * cache cancel_htid before the events specific
3915 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3916 	 * as it is also used to indicate the timer to quit immediately.
3917 	 */
3918 	cancel_htid = ldcp->cancel_htid;
3919 
3920 	/*
3921 	 * NOTE: not using switch() as event could be triggered by
3922 	 * a state change and a read request. Also the ordering	of the
3923 	 * check for the event types is deliberate.
3924 	 */
3925 	if (event & LDC_EVT_UP) {
3926 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3927 			DWARN(vgenp, ldcp, "ldc_status err\n");
3928 			/* status couldn't be determined */
3929 			ret = LDC_FAILURE;
3930 			goto ldc_cb_ret;
3931 		}
3932 		ldcp->ldc_status = istatus;
3933 		if (ldcp->ldc_status != LDC_UP) {
3934 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3935 			    " but ldc status is not UP(0x%x)\n",
3936 			    ldcp->ldc_status);
3937 			/* spurious interrupt, return success */
3938 			goto ldc_cb_ret;
3939 		}
3940 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3941 		    event, ldcp->ldc_status);
3942 
3943 		vgen_handle_evt_up(ldcp);
3944 
3945 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3946 	}
3947 
3948 	/* Handle RESET/DOWN before READ event */
3949 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3950 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3951 			DWARN(vgenp, ldcp, "ldc_status error\n");
3952 			/* status couldn't be determined */
3953 			ret = LDC_FAILURE;
3954 			goto ldc_cb_ret;
3955 		}
3956 		ldcp->ldc_status = istatus;
3957 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3958 		    event, ldcp->ldc_status);
3959 
3960 		vgen_handle_evt_reset(ldcp);
3961 
3962 		/*
3963 		 * As the channel is down/reset, ignore READ event
3964 		 * but print a debug warning message.
3965 		 */
3966 		if (event & LDC_EVT_READ) {
3967 			DWARN(vgenp, ldcp,
3968 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3969 			event &= ~LDC_EVT_READ;
3970 		}
3971 	}
3972 
3973 	if (event & LDC_EVT_READ) {
3974 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3975 		    event, ldcp->ldc_status);
3976 
3977 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3978 
3979 		if (ldcp->rcv_thread != NULL) {
3980 			/*
3981 			 * If the receive thread is enabled, then
3982 			 * wakeup the receive thread to process the
3983 			 * LDC messages.
3984 			 */
3985 			mutex_exit(&ldcp->cblock);
3986 			mutex_enter(&ldcp->rcv_thr_lock);
3987 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3988 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3989 				cv_signal(&ldcp->rcv_thr_cv);
3990 			}
3991 			mutex_exit(&ldcp->rcv_thr_lock);
3992 			mutex_enter(&ldcp->cblock);
3993 		} else  {
3994 			vgen_handle_evt_read(ldcp);
3995 		}
3996 	}
3997 
3998 ldc_cb_ret:
3999 	/*
4000 	 * Check to see if the status of cancel_htid has
4001 	 * changed. If another timer needs to be cancelled,
4002 	 * then let the next callback to clear it.
4003 	 */
4004 	if (cancel_htid == 0) {
4005 		cancel_htid = ldcp->cancel_htid;
4006 	}
4007 	mutex_exit(&ldcp->cblock);
4008 
4009 	if (cancel_htid) {
4010 		/*
4011 		 * Cancel handshake timer.
4012 		 * untimeout(9F) will not return until the pending callback is
4013 		 * cancelled or has run. No problems will result from calling
4014 		 * untimeout if the handler has already completed.
4015 		 * If the timeout handler did run, then it would just
4016 		 * return as cancel_htid is set.
4017 		 */
4018 		DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
4019 		(void) untimeout(cancel_htid);
4020 		mutex_enter(&ldcp->cblock);
4021 		/* clear it only if its the same as the one we cancelled */
4022 		if (ldcp->cancel_htid == cancel_htid) {
4023 			ldcp->cancel_htid = 0;
4024 		}
4025 		mutex_exit(&ldcp->cblock);
4026 	}
4027 	DBG1(vgenp, ldcp, "exit\n");
4028 	return (ret);
4029 }
4030 
4031 static void
4032 vgen_handle_evt_read(vgen_ldc_t *ldcp)
4033 {
4034 	int		rv;
4035 	uint64_t	*ldcmsg;
4036 	size_t		msglen;
4037 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4038 	vio_msg_tag_t	*tagp;
4039 	ldc_status_t 	istatus;
4040 	boolean_t 	has_data;
4041 
4042 	DBG1(vgenp, ldcp, "enter\n");
4043 
4044 	ldcmsg = ldcp->ldcmsg;
4045 	/*
4046 	 * If the receive thread is enabled, then the cblock
4047 	 * need to be acquired here. If not, the vgen_ldc_cb()
4048 	 * calls this function with cblock held already.
4049 	 */
4050 	if (ldcp->rcv_thread != NULL) {
4051 		mutex_enter(&ldcp->cblock);
4052 	} else {
4053 		ASSERT(MUTEX_HELD(&ldcp->cblock));
4054 	}
4055 
4056 vgen_evt_read:
4057 	do {
4058 		msglen = ldcp->msglen;
4059 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
4060 
4061 		if (rv != 0) {
4062 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
4063 			    rv, msglen);
4064 			if (rv == ECONNRESET)
4065 				goto vgen_evtread_error;
4066 			break;
4067 		}
4068 		if (msglen == 0) {
4069 			DBG2(vgenp, ldcp, "ldc_read NODATA");
4070 			break;
4071 		}
4072 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
4073 
4074 		tagp = (vio_msg_tag_t *)ldcmsg;
4075 
4076 		if (ldcp->peer_sid) {
4077 			/*
4078 			 * check sid only after we have received peer's sid
4079 			 * in the version negotiate msg.
4080 			 */
4081 #ifdef DEBUG
4082 			if (vgen_hdbg & HDBG_BAD_SID) {
4083 				/* simulate bad sid condition */
4084 				tagp->vio_sid = 0;
4085 				vgen_hdbg &= ~(HDBG_BAD_SID);
4086 			}
4087 #endif
4088 			rv = vgen_check_sid(ldcp, tagp);
4089 			if (rv != VGEN_SUCCESS) {
4090 				/*
4091 				 * If sid mismatch is detected,
4092 				 * reset the channel.
4093 				 */
4094 				goto vgen_evtread_error;
4095 			}
4096 		}
4097 
4098 		switch (tagp->vio_msgtype) {
4099 		case VIO_TYPE_CTRL:
4100 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
4101 			break;
4102 
4103 		case VIO_TYPE_DATA:
4104 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
4105 			break;
4106 
4107 		case VIO_TYPE_ERR:
4108 			vgen_handle_errmsg(ldcp, tagp);
4109 			break;
4110 
4111 		default:
4112 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
4113 			    tagp->vio_msgtype);
4114 			break;
4115 		}
4116 
4117 		/*
4118 		 * If an error is encountered, stop processing and
4119 		 * handle the error.
4120 		 */
4121 		if (rv != 0) {
4122 			goto vgen_evtread_error;
4123 		}
4124 
4125 	} while (msglen);
4126 
4127 	/* check once more before exiting */
4128 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
4129 	if ((rv == 0) && (has_data == B_TRUE)) {
4130 		DTRACE_PROBE(vgen_chkq);
4131 		goto vgen_evt_read;
4132 	}
4133 
4134 vgen_evtread_error:
4135 	if (rv == ECONNRESET) {
4136 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4137 			DWARN(vgenp, ldcp, "ldc_status err\n");
4138 		} else {
4139 			ldcp->ldc_status = istatus;
4140 		}
4141 		vgen_handle_evt_reset(ldcp);
4142 	} else if (rv) {
4143 		vgen_ldc_reset(ldcp);
4144 	}
4145 
4146 	/*
4147 	 * If the receive thread is enabled, then cancel the
4148 	 * handshake timeout here.
4149 	 */
4150 	if (ldcp->rcv_thread != NULL) {
4151 		timeout_id_t cancel_htid = ldcp->cancel_htid;
4152 
4153 		mutex_exit(&ldcp->cblock);
4154 		if (cancel_htid) {
4155 			/*
4156 			 * Cancel handshake timer. untimeout(9F) will
4157 			 * not return until the pending callback is cancelled
4158 			 * or has run. No problems will result from calling
4159 			 * untimeout if the handler has already completed.
4160 			 * If the timeout handler did run, then it would just
4161 			 * return as cancel_htid is set.
4162 			 */
4163 			DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
4164 			(void) untimeout(cancel_htid);
4165 
4166 			/*
4167 			 * clear it only if its the same as the one we
4168 			 * cancelled
4169 			 */
4170 			mutex_enter(&ldcp->cblock);
4171 			if (ldcp->cancel_htid == cancel_htid) {
4172 				ldcp->cancel_htid = 0;
4173 			}
4174 			mutex_exit(&ldcp->cblock);
4175 		}
4176 	}
4177 
4178 	DBG1(vgenp, ldcp, "exit\n");
4179 }
4180 
4181 /* vgen handshake functions */
4182 
4183 /* change the hphase for the channel to the next phase */
4184 static vgen_ldc_t *
4185 vh_nextphase(vgen_ldc_t *ldcp)
4186 {
4187 	if (ldcp->hphase == VH_PHASE3) {
4188 		ldcp->hphase = VH_DONE;
4189 	} else {
4190 		ldcp->hphase++;
4191 	}
4192 	return (ldcp);
4193 }
4194 
4195 /*
4196  * wrapper routine to send the given message over ldc using ldc_write().
4197  */
4198 static int
4199 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4200     boolean_t caller_holds_lock)
4201 {
4202 	int			rv;
4203 	size_t			len;
4204 	uint32_t		retries = 0;
4205 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4206 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4207 	vio_dring_msg_t		*dmsg;
4208 	vio_raw_data_msg_t	*rmsg;
4209 	boolean_t		data_msg = B_FALSE;
4210 
4211 	len = msglen;
4212 	if ((len == 0) || (msg == NULL))
4213 		return (VGEN_FAILURE);
4214 
4215 	if (!caller_holds_lock) {
4216 		mutex_enter(&ldcp->wrlock);
4217 	}
4218 
4219 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4220 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4221 			dmsg = (vio_dring_msg_t *)tagp;
4222 			dmsg->seq_num = ldcp->next_txseq;
4223 			data_msg = B_TRUE;
4224 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4225 			rmsg = (vio_raw_data_msg_t *)tagp;
4226 			rmsg->seq_num = ldcp->next_txseq;
4227 			data_msg = B_TRUE;
4228 		}
4229 	}
4230 
4231 	do {
4232 		len = msglen;
4233 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4234 		if (retries++ >= vgen_ldcwr_retries)
4235 			break;
4236 	} while (rv == EWOULDBLOCK);
4237 
4238 	if (rv == 0 && data_msg == B_TRUE) {
4239 		ldcp->next_txseq++;
4240 	}
4241 
4242 	if (!caller_holds_lock) {
4243 		mutex_exit(&ldcp->wrlock);
4244 	}
4245 
4246 	if (rv != 0) {
4247 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4248 		    rv, msglen);
4249 		return (rv);
4250 	}
4251 
4252 	if (len != msglen) {
4253 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4254 		    rv, msglen);
4255 		return (VGEN_FAILURE);
4256 	}
4257 
4258 	return (VGEN_SUCCESS);
4259 }
4260 
4261 /* send version negotiate message to the peer over ldc */
4262 static int
4263 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4264 {
4265 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4266 	vio_ver_msg_t	vermsg;
4267 	vio_msg_tag_t	*tagp = &vermsg.tag;
4268 	int		rv;
4269 
4270 	bzero(&vermsg, sizeof (vermsg));
4271 
4272 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4273 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4274 	tagp->vio_subtype_env = VIO_VER_INFO;
4275 	tagp->vio_sid = ldcp->local_sid;
4276 
4277 	/* get version msg payload from ldcp->local */
4278 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4279 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4280 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4281 
4282 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4283 	if (rv != VGEN_SUCCESS) {
4284 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4285 		return (rv);
4286 	}
4287 
4288 	ldcp->hstate |= VER_INFO_SENT;
4289 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4290 	    vermsg.ver_major, vermsg.ver_minor);
4291 
4292 	return (VGEN_SUCCESS);
4293 }
4294 
4295 /* send attr info message to the peer over ldc */
4296 static int
4297 vgen_send_attr_info(vgen_ldc_t *ldcp)
4298 {
4299 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4300 	vnet_attr_msg_t	attrmsg;
4301 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4302 	int		rv;
4303 
4304 	bzero(&attrmsg, sizeof (attrmsg));
4305 
4306 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4307 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4308 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4309 	tagp->vio_sid = ldcp->local_sid;
4310 
4311 	/* get attr msg payload from ldcp->local */
4312 	attrmsg.mtu = ldcp->local_hparams.mtu;
4313 	attrmsg.addr = ldcp->local_hparams.addr;
4314 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4315 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4316 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4317 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
4318 
4319 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4320 	if (rv != VGEN_SUCCESS) {
4321 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4322 		return (rv);
4323 	}
4324 
4325 	ldcp->hstate |= ATTR_INFO_SENT;
4326 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4327 
4328 	return (VGEN_SUCCESS);
4329 }
4330 
4331 /* send descriptor ring register message to the peer over ldc */
4332 static int
4333 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4334 {
4335 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4336 	vio_dring_reg_msg_t	msg;
4337 	vio_msg_tag_t		*tagp = &msg.tag;
4338 	int		rv;
4339 
4340 	bzero(&msg, sizeof (msg));
4341 
4342 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4343 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4344 	tagp->vio_subtype_env = VIO_DRING_REG;
4345 	tagp->vio_sid = ldcp->local_sid;
4346 
4347 	/* get dring info msg payload from ldcp->local */
4348 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4349 	    sizeof (ldc_mem_cookie_t));
4350 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4351 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4352 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4353 
4354 	/*
4355 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4356 	 * value and sends it in the ack, which is saved in
4357 	 * vgen_handle_dring_reg().
4358 	 */
4359 	msg.dring_ident = 0;
4360 
4361 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4362 	if (rv != VGEN_SUCCESS) {
4363 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4364 		return (rv);
4365 	}
4366 
4367 	ldcp->hstate |= DRING_INFO_SENT;
4368 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4369 
4370 	return (VGEN_SUCCESS);
4371 }
4372 
4373 static int
4374 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4375 {
4376 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4377 	vio_rdx_msg_t	rdxmsg;
4378 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4379 	int		rv;
4380 
4381 	bzero(&rdxmsg, sizeof (rdxmsg));
4382 
4383 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4384 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4385 	tagp->vio_subtype_env = VIO_RDX;
4386 	tagp->vio_sid = ldcp->local_sid;
4387 
4388 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4389 	if (rv != VGEN_SUCCESS) {
4390 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4391 		return (rv);
4392 	}
4393 
4394 	ldcp->hstate |= RDX_INFO_SENT;
4395 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4396 
4397 	return (VGEN_SUCCESS);
4398 }
4399 
4400 /* send descriptor ring data message to the peer over ldc */
4401 static int
4402 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4403 {
4404 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4405 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4406 	vio_msg_tag_t	*tagp = &msgp->tag;
4407 	vgen_stats_t	*statsp = &ldcp->stats;
4408 	int		rv;
4409 
4410 	bzero(msgp, sizeof (*msgp));
4411 
4412 	tagp->vio_msgtype = VIO_TYPE_DATA;
4413 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4414 	tagp->vio_subtype_env = VIO_DRING_DATA;
4415 	tagp->vio_sid = ldcp->local_sid;
4416 
4417 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4418 	msgp->start_idx = start;
4419 	msgp->end_idx = end;
4420 
4421 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4422 	if (rv != VGEN_SUCCESS) {
4423 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4424 		return (rv);
4425 	}
4426 
4427 	statsp->dring_data_msgs++;
4428 
4429 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4430 
4431 	return (VGEN_SUCCESS);
4432 }
4433 
4434 /* send multicast addr info message to vsw */
4435 static int
4436 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4437 {
4438 	vnet_mcast_msg_t	mcastmsg;
4439 	vnet_mcast_msg_t	*msgp;
4440 	vio_msg_tag_t		*tagp;
4441 	vgen_t			*vgenp;
4442 	struct ether_addr	*mca;
4443 	int			rv;
4444 	int			i;
4445 	uint32_t		size;
4446 	uint32_t		mccount;
4447 	uint32_t		n;
4448 
4449 	msgp = &mcastmsg;
4450 	tagp = &msgp->tag;
4451 	vgenp = LDC_TO_VGEN(ldcp);
4452 
4453 	mccount = vgenp->mccount;
4454 	i = 0;
4455 
4456 	do {
4457 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4458 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4459 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4460 		tagp->vio_sid = ldcp->local_sid;
4461 
4462 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4463 		size = n * sizeof (struct ether_addr);
4464 
4465 		mca = &(vgenp->mctab[i]);
4466 		bcopy(mca, (msgp->mca), size);
4467 		msgp->set = B_TRUE;
4468 		msgp->count = n;
4469 
4470 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4471 		    B_FALSE);
4472 		if (rv != VGEN_SUCCESS) {
4473 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4474 			return (rv);
4475 		}
4476 
4477 		mccount -= n;
4478 		i += n;
4479 
4480 	} while (mccount);
4481 
4482 	return (VGEN_SUCCESS);
4483 }
4484 
4485 /* Initiate Phase 2 of handshake */
4486 static int
4487 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4488 {
4489 	int rv;
4490 	uint32_t ncookies = 0;
4491 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4492 
4493 #ifdef DEBUG
4494 	if (vgen_hdbg & HDBG_OUT_STATE) {
4495 		/* simulate out of state condition */
4496 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4497 		rv = vgen_send_rdx_info(ldcp);
4498 		return (rv);
4499 	}
4500 	if (vgen_hdbg & HDBG_TIMEOUT) {
4501 		/* simulate timeout condition */
4502 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4503 		return (VGEN_SUCCESS);
4504 	}
4505 #endif
4506 	rv = vgen_send_attr_info(ldcp);
4507 	if (rv != VGEN_SUCCESS) {
4508 		return (rv);
4509 	}
4510 
4511 	/* Bind descriptor ring to the channel */
4512 	if (ldcp->num_txdcookies == 0) {
4513 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4514 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4515 		    &ldcp->tx_dcookie, &ncookies);
4516 		if (rv != 0) {
4517 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4518 			    "rv(%x)\n", rv);
4519 			return (rv);
4520 		}
4521 		ASSERT(ncookies == 1);
4522 		ldcp->num_txdcookies = ncookies;
4523 	}
4524 
4525 	/* update local dring_info params */
4526 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4527 	    sizeof (ldc_mem_cookie_t));
4528 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4529 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4530 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4531 
4532 	rv = vgen_send_dring_reg(ldcp);
4533 	if (rv != VGEN_SUCCESS) {
4534 		return (rv);
4535 	}
4536 
4537 	return (VGEN_SUCCESS);
4538 }
4539 
4540 /*
4541  * Set vnet-protocol-version dependent functions based on version.
4542  */
4543 static void
4544 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4545 {
4546 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4547 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4548 
4549 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
4550 		vgen_port_t	*portp = ldcp->portp;
4551 		vnet_t		*vnetp = vgenp->vnetp;
4552 		/*
4553 		 * If the version negotiated with vswitch is >= 1.5 (link
4554 		 * status update support), set the required bits in our
4555 		 * attributes if this vnet device has been configured to get
4556 		 * physical link state updates.
4557 		 */
4558 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
4559 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
4560 		} else {
4561 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
4562 		}
4563 	}
4564 
4565 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4566 		/*
4567 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4568 		 * Support), set the mtu in our attributes to max_frame_size.
4569 		 */
4570 		lp->mtu = vgenp->max_frame_size;
4571 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4572 		/*
4573 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4574 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4575 		 */
4576 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4577 	} else {
4578 		vgen_port_t	*portp = ldcp->portp;
4579 		vnet_t		*vnetp = vgenp->vnetp;
4580 		/*
4581 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4582 		 * We can negotiate that size with those peers provided the
4583 		 * following conditions are true:
4584 		 * - Only pvid is defined for our peer and there are no vids.
4585 		 * - pvids are equal.
4586 		 * If the above conditions are true, then we can send/recv only
4587 		 * untagged frames of max size ETHERMAX.
4588 		 */
4589 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4590 			lp->mtu = ETHERMAX;
4591 		}
4592 	}
4593 
4594 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4595 		/* Versions >= 1.2 */
4596 
4597 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4598 			/*
4599 			 * enable priority routines and pkt mode only if
4600 			 * at least one pri-eth-type is specified in MD.
4601 			 */
4602 
4603 			ldcp->tx = vgen_ldcsend;
4604 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4605 
4606 			/* set xfer mode for vgen_send_attr_info() */
4607 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4608 
4609 		} else {
4610 			/* no priority eth types defined in MD */
4611 
4612 			ldcp->tx = vgen_ldcsend_dring;
4613 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4614 
4615 			/* set xfer mode for vgen_send_attr_info() */
4616 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4617 
4618 		}
4619 	} else {
4620 		/* Versions prior to 1.2  */
4621 
4622 		vgen_reset_vnet_proto_ops(ldcp);
4623 	}
4624 }
4625 
4626 /*
4627  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4628  */
4629 static void
4630 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4631 {
4632 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4633 
4634 	ldcp->tx = vgen_ldcsend_dring;
4635 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4636 
4637 	/* set xfer mode for vgen_send_attr_info() */
4638 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4639 }
4640 
4641 static void
4642 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4643 {
4644 	vgen_ldclist_t	*ldclp;
4645 	vgen_ldc_t	*ldcp;
4646 	vgen_t		*vgenp = portp->vgenp;
4647 	vnet_t		*vnetp = vgenp->vnetp;
4648 
4649 	ldclp = &portp->ldclist;
4650 
4651 	READ_ENTER(&ldclp->rwlock);
4652 
4653 	/*
4654 	 * NOTE: for now, we will assume we have a single channel.
4655 	 */
4656 	if (ldclp->headp == NULL) {
4657 		RW_EXIT(&ldclp->rwlock);
4658 		return;
4659 	}
4660 	ldcp = ldclp->headp;
4661 
4662 	mutex_enter(&ldcp->cblock);
4663 
4664 	/*
4665 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4666 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4667 	 */
4668 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4669 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4670 		vgen_ldc_reset(ldcp);
4671 	}
4672 
4673 	mutex_exit(&ldcp->cblock);
4674 
4675 	RW_EXIT(&ldclp->rwlock);
4676 }
4677 
4678 static void
4679 vgen_port_reset(vgen_port_t *portp)
4680 {
4681 	vgen_ldclist_t	*ldclp;
4682 	vgen_ldc_t	*ldcp;
4683 
4684 	ldclp = &portp->ldclist;
4685 
4686 	READ_ENTER(&ldclp->rwlock);
4687 
4688 	/*
4689 	 * NOTE: for now, we will assume we have a single channel.
4690 	 */
4691 	if (ldclp->headp == NULL) {
4692 		RW_EXIT(&ldclp->rwlock);
4693 		return;
4694 	}
4695 	ldcp = ldclp->headp;
4696 
4697 	mutex_enter(&ldcp->cblock);
4698 
4699 	vgen_ldc_reset(ldcp);
4700 
4701 	mutex_exit(&ldcp->cblock);
4702 
4703 	RW_EXIT(&ldclp->rwlock);
4704 }
4705 
4706 static void
4707 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4708 {
4709 	vgen_port_t	*portp;
4710 	vgen_portlist_t	*plistp;
4711 
4712 	plistp = &(vgenp->vgenports);
4713 	READ_ENTER(&plistp->rwlock);
4714 
4715 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4716 
4717 		vgen_vlan_unaware_port_reset(portp);
4718 
4719 	}
4720 
4721 	RW_EXIT(&plistp->rwlock);
4722 }
4723 
4724 static void
4725 vgen_reset_vsw_port(vgen_t *vgenp)
4726 {
4727 	vgen_port_t	*portp;
4728 
4729 	if ((portp = vgenp->vsw_portp) != NULL) {
4730 		vgen_port_reset(portp);
4731 	}
4732 }
4733 
4734 /*
4735  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4736  * This can happen after a channel comes up (status: LDC_UP) or
4737  * when handshake gets terminated due to various conditions.
4738  */
4739 static void
4740 vgen_reset_hphase(vgen_ldc_t *ldcp)
4741 {
4742 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4743 	ldc_status_t istatus;
4744 	int rv;
4745 
4746 	DBG1(vgenp, ldcp, "enter\n");
4747 	/* reset hstate and hphase */
4748 	ldcp->hstate = 0;
4749 	ldcp->hphase = VH_PHASE0;
4750 
4751 	vgen_reset_vnet_proto_ops(ldcp);
4752 
4753 	/*
4754 	 * Save the id of pending handshake timer in cancel_htid.
4755 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4756 	 * be cancelled after releasing cblock.
4757 	 */
4758 	if (ldcp->htid) {
4759 		ldcp->cancel_htid = ldcp->htid;
4760 		ldcp->htid = 0;
4761 	}
4762 
4763 	if (ldcp->local_hparams.dring_ready) {
4764 		ldcp->local_hparams.dring_ready = B_FALSE;
4765 	}
4766 
4767 	/* Unbind tx descriptor ring from the channel */
4768 	if (ldcp->num_txdcookies) {
4769 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4770 		if (rv != 0) {
4771 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4772 		}
4773 		ldcp->num_txdcookies = 0;
4774 	}
4775 
4776 	if (ldcp->peer_hparams.dring_ready) {
4777 		ldcp->peer_hparams.dring_ready = B_FALSE;
4778 		/* Unmap peer's dring */
4779 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4780 		vgen_clobber_rxds(ldcp);
4781 	}
4782 
4783 	vgen_clobber_tbufs(ldcp);
4784 
4785 	/*
4786 	 * clear local handshake params and initialize.
4787 	 */
4788 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4789 
4790 	/* set version to the highest version supported */
4791 	ldcp->local_hparams.ver_major =
4792 	    ldcp->vgen_versions[0].ver_major;
4793 	ldcp->local_hparams.ver_minor =
4794 	    ldcp->vgen_versions[0].ver_minor;
4795 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4796 
4797 	/* set attr_info params */
4798 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4799 	ldcp->local_hparams.addr =
4800 	    vnet_macaddr_strtoul(vgenp->macaddr);
4801 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4802 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4803 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4804 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
4805 
4806 	/*
4807 	 * Note: dring is created, but not bound yet.
4808 	 * local dring_info params will be updated when we bind the dring in
4809 	 * vgen_handshake_phase2().
4810 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4811 	 * value and sends it in the ack, which is saved in
4812 	 * vgen_handle_dring_reg().
4813 	 */
4814 	ldcp->local_hparams.dring_ident = 0;
4815 
4816 	/* clear peer_hparams */
4817 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4818 
4819 	/* reset the channel if required */
4820 #ifdef	VNET_IOC_DEBUG
4821 	if (ldcp->need_ldc_reset && !ldcp->link_down_forced) {
4822 #else
4823 	if (ldcp->need_ldc_reset) {
4824 #endif
4825 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4826 		ldcp->need_ldc_reset = B_FALSE;
4827 		(void) ldc_down(ldcp->ldc_handle);
4828 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4829 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4830 		ldcp->ldc_status = istatus;
4831 
4832 		/* clear sids */
4833 		ldcp->local_sid = 0;
4834 		ldcp->peer_sid = 0;
4835 
4836 		/* try to bring the channel up */
4837 		rv = ldc_up(ldcp->ldc_handle);
4838 		if (rv != 0) {
4839 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4840 		}
4841 
4842 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4843 			DWARN(vgenp, ldcp, "ldc_status err\n");
4844 		} else {
4845 			ldcp->ldc_status = istatus;
4846 		}
4847 	}
4848 }
4849 
4850 /* wrapper function for vgen_reset_hphase */
4851 static void
4852 vgen_handshake_reset(vgen_ldc_t *ldcp)
4853 {
4854 	vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
4855 
4856 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4857 	mutex_enter(&ldcp->rxlock);
4858 	mutex_enter(&ldcp->wrlock);
4859 	mutex_enter(&ldcp->txlock);
4860 	mutex_enter(&ldcp->tclock);
4861 
4862 	vgen_reset_hphase(ldcp);
4863 
4864 	mutex_exit(&ldcp->tclock);
4865 	mutex_exit(&ldcp->txlock);
4866 	mutex_exit(&ldcp->wrlock);
4867 	mutex_exit(&ldcp->rxlock);
4868 
4869 	/*
4870 	 * As the connection is now reset, mark the channel
4871 	 * link_state as 'down' and notify the stack if needed.
4872 	 */
4873 	if (ldcp->link_state != LINK_STATE_DOWN) {
4874 		ldcp->link_state = LINK_STATE_DOWN;
4875 
4876 		if (ldcp->portp == vgenp->vsw_portp) { /* vswitch port ? */
4877 			/*
4878 			 * As the channel link is down, mark physical link also
4879 			 * as down. After the channel comes back up and
4880 			 * handshake completes, we will get an update on the
4881 			 * physlink state from vswitch (if this device has been
4882 			 * configured to get phys link updates).
4883 			 */
4884 			vgenp->phys_link_state = LINK_STATE_DOWN;
4885 
4886 			/* Now update the stack */
4887 			mutex_exit(&ldcp->cblock);
4888 			vgen_link_update(vgenp, ldcp->link_state);
4889 			mutex_enter(&ldcp->cblock);
4890 		}
4891 	}
4892 }
4893 
4894 /*
4895  * Initiate handshake with the peer by sending various messages
4896  * based on the handshake-phase that the channel is currently in.
4897  */
4898 static void
4899 vgen_handshake(vgen_ldc_t *ldcp)
4900 {
4901 	uint32_t	hphase = ldcp->hphase;
4902 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4903 	ldc_status_t	istatus;
4904 	int		rv = 0;
4905 
4906 	switch (hphase) {
4907 
4908 	case VH_PHASE1:
4909 
4910 		/*
4911 		 * start timer, for entire handshake process, turn this timer
4912 		 * off if all phases of handshake complete successfully and
4913 		 * hphase goes to VH_DONE(below) or
4914 		 * vgen_reset_hphase() gets called or
4915 		 * channel is reset due to errors or
4916 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4917 		 */
4918 		ASSERT(ldcp->htid == 0);
4919 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4920 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4921 
4922 		/* Phase 1 involves negotiating the version */
4923 		rv = vgen_send_version_negotiate(ldcp);
4924 		break;
4925 
4926 	case VH_PHASE2:
4927 		rv = vgen_handshake_phase2(ldcp);
4928 		break;
4929 
4930 	case VH_PHASE3:
4931 		rv = vgen_send_rdx_info(ldcp);
4932 		break;
4933 
4934 	case VH_DONE:
4935 		/*
4936 		 * Save the id of pending handshake timer in cancel_htid.
4937 		 * This will be checked in vgen_ldc_cb() and the handshake
4938 		 * timer will be cancelled after releasing cblock.
4939 		 */
4940 		if (ldcp->htid) {
4941 			ldcp->cancel_htid = ldcp->htid;
4942 			ldcp->htid = 0;
4943 		}
4944 		ldcp->hretries = 0;
4945 		DBG1(vgenp, ldcp, "Handshake Done\n");
4946 
4947 		/*
4948 		 * The channel is up and handshake is done successfully. Now we
4949 		 * can mark the channel link_state as 'up'. We also notify the
4950 		 * stack if the channel is connected to vswitch.
4951 		 */
4952 		ldcp->link_state = LINK_STATE_UP;
4953 
4954 		if (ldcp->portp == vgenp->vsw_portp) {
4955 			/*
4956 			 * If this channel(port) is connected to vsw,
4957 			 * need to sync multicast table with vsw.
4958 			 */
4959 			mutex_exit(&ldcp->cblock);
4960 
4961 			mutex_enter(&vgenp->lock);
4962 			rv = vgen_send_mcast_info(ldcp);
4963 			mutex_exit(&vgenp->lock);
4964 
4965 			if (vgenp->pls_negotiated == B_FALSE) {
4966 				/*
4967 				 * We haven't negotiated with vswitch to get
4968 				 * physical link state updates. We can update
4969 				 * update the stack at this point as the
4970 				 * channel to vswitch is up and the handshake
4971 				 * is done successfully.
4972 				 *
4973 				 * If we have negotiated to get physical link
4974 				 * state updates, then we won't notify the
4975 				 * the stack here; we do that as soon as
4976 				 * vswitch sends us the initial phys link state
4977 				 * (see vgen_handle_physlink_info()).
4978 				 */
4979 				vgen_link_update(vgenp, ldcp->link_state);
4980 			}
4981 
4982 			mutex_enter(&ldcp->cblock);
4983 			if (rv != VGEN_SUCCESS)
4984 				break;
4985 		}
4986 
4987 		/*
4988 		 * Check if mac layer should be notified to restart
4989 		 * transmissions. This can happen if the channel got
4990 		 * reset and vgen_clobber_tbufs() is called, while
4991 		 * need_resched is set.
4992 		 */
4993 		mutex_enter(&ldcp->tclock);
4994 		if (ldcp->need_resched) {
4995 			vio_net_tx_update_t vtx_update =
4996 			    ldcp->portp->vcb.vio_net_tx_update;
4997 
4998 			ldcp->need_resched = B_FALSE;
4999 			vtx_update(ldcp->portp->vhp);
5000 		}
5001 		mutex_exit(&ldcp->tclock);
5002 
5003 		break;
5004 
5005 	default:
5006 		break;
5007 	}
5008 
5009 	if (rv == ECONNRESET) {
5010 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5011 			DWARN(vgenp, ldcp, "ldc_status err\n");
5012 		} else {
5013 			ldcp->ldc_status = istatus;
5014 		}
5015 		vgen_handle_evt_reset(ldcp);
5016 	} else if (rv) {
5017 		vgen_handshake_reset(ldcp);
5018 	}
5019 }
5020 
5021 /*
5022  * Check if the current handshake phase has completed successfully and
5023  * return the status.
5024  */
5025 static int
5026 vgen_handshake_done(vgen_ldc_t *ldcp)
5027 {
5028 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5029 	uint32_t	hphase = ldcp->hphase;
5030 	int 		status = 0;
5031 
5032 	switch (hphase) {
5033 
5034 	case VH_PHASE1:
5035 		/*
5036 		 * Phase1 is done, if version negotiation
5037 		 * completed successfully.
5038 		 */
5039 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
5040 		    VER_NEGOTIATED);
5041 		break;
5042 
5043 	case VH_PHASE2:
5044 		/*
5045 		 * Phase 2 is done, if attr info and dring info
5046 		 * have been exchanged successfully.
5047 		 */
5048 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
5049 		    ATTR_INFO_EXCHANGED) &&
5050 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
5051 		    DRING_INFO_EXCHANGED));
5052 		break;
5053 
5054 	case VH_PHASE3:
5055 		/* Phase 3 is done, if rdx msg has been exchanged */
5056 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
5057 		    RDX_EXCHANGED);
5058 		break;
5059 
5060 	default:
5061 		break;
5062 	}
5063 
5064 	if (status == 0) {
5065 		return (VGEN_FAILURE);
5066 	}
5067 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
5068 	return (VGEN_SUCCESS);
5069 }
5070 
5071 /* retry handshake on failure */
5072 static void
5073 vgen_handshake_retry(vgen_ldc_t *ldcp)
5074 {
5075 	/* reset handshake phase */
5076 	vgen_handshake_reset(ldcp);
5077 
5078 	/* handshake retry is specified and the channel is UP */
5079 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
5080 		if (ldcp->hretries++ < vgen_max_hretries) {
5081 			ldcp->local_sid = ddi_get_lbolt();
5082 			vgen_handshake(vh_nextphase(ldcp));
5083 		}
5084 	}
5085 }
5086 
5087 
5088 /*
5089  * Link State Update Notes:
5090  * The link state of the channel connected to vswitch is reported as the link
5091  * state of the vnet device, by default. If the channel is down or reset, then
5092  * the link state is marked 'down'. If the channel is 'up' *and* handshake
5093  * between the vnet and vswitch is successful, then the link state is marked
5094  * 'up'. If physical network link state is desired, then the vnet device must
5095  * be configured to get physical link updates and the 'linkprop' property
5096  * in the virtual-device MD node indicates this. As part of attribute exchange
5097  * the vnet device negotiates with the vswitch to obtain physical link state
5098  * updates. If it successfully negotiates, vswitch sends an initial physlink
5099  * msg once the handshake is done and further whenever the physical link state
5100  * changes. Currently we don't have mac layer interfaces to report two distinct
5101  * link states - virtual and physical. Thus, if the vnet has been configured to
5102  * get physical link updates, then the link status will be reported as 'up'
5103  * only when both the virtual and physical links are up.
5104  */
5105 static void
5106 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
5107 {
5108 	vnet_link_update(vgenp->vnetp, link_state);
5109 }
5110 
5111 /*
5112  * Handle a version info msg from the peer or an ACK/NACK from the peer
5113  * to a version info msg that we sent.
5114  */
5115 static int
5116 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5117 {
5118 	vgen_t		*vgenp;
5119 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
5120 	int		ack = 0;
5121 	int		failed = 0;
5122 	int		idx;
5123 	vgen_ver_t	*versions = ldcp->vgen_versions;
5124 	int		rv = 0;
5125 
5126 	vgenp = LDC_TO_VGEN(ldcp);
5127 	DBG1(vgenp, ldcp, "enter\n");
5128 	switch (tagp->vio_subtype) {
5129 	case VIO_SUBTYPE_INFO:
5130 
5131 		/*  Cache sid of peer if this is the first time */
5132 		if (ldcp->peer_sid == 0) {
5133 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
5134 			    tagp->vio_sid);
5135 			ldcp->peer_sid = tagp->vio_sid;
5136 		}
5137 
5138 		if (ldcp->hphase != VH_PHASE1) {
5139 			/*
5140 			 * If we are not already in VH_PHASE1, reset to
5141 			 * pre-handshake state, and initiate handshake
5142 			 * to the peer too.
5143 			 */
5144 			vgen_handshake_reset(ldcp);
5145 			vgen_handshake(vh_nextphase(ldcp));
5146 		}
5147 		ldcp->hstate |= VER_INFO_RCVD;
5148 
5149 		/* save peer's requested values */
5150 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
5151 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
5152 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
5153 
5154 		if ((vermsg->dev_class != VDEV_NETWORK) &&
5155 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
5156 			/* unsupported dev_class, send NACK */
5157 
5158 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5159 
5160 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5161 			tagp->vio_sid = ldcp->local_sid;
5162 			/* send reply msg back to peer */
5163 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5164 			    sizeof (*vermsg), B_FALSE);
5165 			if (rv != VGEN_SUCCESS) {
5166 				return (rv);
5167 			}
5168 			return (VGEN_FAILURE);
5169 		}
5170 
5171 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
5172 		    vermsg->ver_major,  vermsg->ver_minor);
5173 
5174 		idx = 0;
5175 
5176 		for (;;) {
5177 
5178 			if (vermsg->ver_major > versions[idx].ver_major) {
5179 
5180 				/* nack with next lower version */
5181 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5182 				vermsg->ver_major = versions[idx].ver_major;
5183 				vermsg->ver_minor = versions[idx].ver_minor;
5184 				break;
5185 			}
5186 
5187 			if (vermsg->ver_major == versions[idx].ver_major) {
5188 
5189 				/* major version match - ACK version */
5190 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
5191 				ack = 1;
5192 
5193 				/*
5194 				 * lower minor version to the one this endpt
5195 				 * supports, if necessary
5196 				 */
5197 				if (vermsg->ver_minor >
5198 				    versions[idx].ver_minor) {
5199 					vermsg->ver_minor =
5200 					    versions[idx].ver_minor;
5201 					ldcp->peer_hparams.ver_minor =
5202 					    versions[idx].ver_minor;
5203 				}
5204 				break;
5205 			}
5206 
5207 			idx++;
5208 
5209 			if (idx == VGEN_NUM_VER) {
5210 
5211 				/* no version match - send NACK */
5212 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5213 				vermsg->ver_major = 0;
5214 				vermsg->ver_minor = 0;
5215 				failed = 1;
5216 				break;
5217 			}
5218 
5219 		}
5220 
5221 		tagp->vio_sid = ldcp->local_sid;
5222 
5223 		/* send reply msg back to peer */
5224 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
5225 		    B_FALSE);
5226 		if (rv != VGEN_SUCCESS) {
5227 			return (rv);
5228 		}
5229 
5230 		if (ack) {
5231 			ldcp->hstate |= VER_ACK_SENT;
5232 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
5233 			    vermsg->ver_major, vermsg->ver_minor);
5234 		}
5235 		if (failed) {
5236 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
5237 			return (VGEN_FAILURE);
5238 		}
5239 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5240 
5241 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5242 
5243 			/* local and peer versions match? */
5244 			ASSERT((ldcp->local_hparams.ver_major ==
5245 			    ldcp->peer_hparams.ver_major) &&
5246 			    (ldcp->local_hparams.ver_minor ==
5247 			    ldcp->peer_hparams.ver_minor));
5248 
5249 			vgen_set_vnet_proto_ops(ldcp);
5250 
5251 			/* move to the next phase */
5252 			vgen_handshake(vh_nextphase(ldcp));
5253 		}
5254 
5255 		break;
5256 
5257 	case VIO_SUBTYPE_ACK:
5258 
5259 		if (ldcp->hphase != VH_PHASE1) {
5260 			/*  This should not happen. */
5261 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
5262 			return (VGEN_FAILURE);
5263 		}
5264 
5265 		/* SUCCESS - we have agreed on a version */
5266 		ldcp->local_hparams.ver_major = vermsg->ver_major;
5267 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
5268 		ldcp->hstate |= VER_ACK_RCVD;
5269 
5270 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
5271 		    vermsg->ver_major,  vermsg->ver_minor);
5272 
5273 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5274 
5275 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5276 
5277 			/* local and peer versions match? */
5278 			ASSERT((ldcp->local_hparams.ver_major ==
5279 			    ldcp->peer_hparams.ver_major) &&
5280 			    (ldcp->local_hparams.ver_minor ==
5281 			    ldcp->peer_hparams.ver_minor));
5282 
5283 			vgen_set_vnet_proto_ops(ldcp);
5284 
5285 			/* move to the next phase */
5286 			vgen_handshake(vh_nextphase(ldcp));
5287 		}
5288 		break;
5289 
5290 	case VIO_SUBTYPE_NACK:
5291 
5292 		if (ldcp->hphase != VH_PHASE1) {
5293 			/*  This should not happen.  */
5294 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
5295 			"Phase(%u)\n", ldcp->hphase);
5296 			return (VGEN_FAILURE);
5297 		}
5298 
5299 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
5300 		    vermsg->ver_major, vermsg->ver_minor);
5301 
5302 		/* check if version in NACK is zero */
5303 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
5304 			/*
5305 			 * Version Negotiation has failed.
5306 			 */
5307 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5308 			return (VGEN_FAILURE);
5309 		}
5310 
5311 		idx = 0;
5312 
5313 		for (;;) {
5314 
5315 			if (vermsg->ver_major > versions[idx].ver_major) {
5316 				/* select next lower version */
5317 
5318 				ldcp->local_hparams.ver_major =
5319 				    versions[idx].ver_major;
5320 				ldcp->local_hparams.ver_minor =
5321 				    versions[idx].ver_minor;
5322 				break;
5323 			}
5324 
5325 			if (vermsg->ver_major == versions[idx].ver_major) {
5326 				/* major version match */
5327 
5328 				ldcp->local_hparams.ver_major =
5329 				    versions[idx].ver_major;
5330 
5331 				ldcp->local_hparams.ver_minor =
5332 				    versions[idx].ver_minor;
5333 				break;
5334 			}
5335 
5336 			idx++;
5337 
5338 			if (idx == VGEN_NUM_VER) {
5339 				/*
5340 				 * no version match.
5341 				 * Version Negotiation has failed.
5342 				 */
5343 				DWARN(vgenp, ldcp,
5344 				    "Version Negotiation Failed\n");
5345 				return (VGEN_FAILURE);
5346 			}
5347 
5348 		}
5349 
5350 		rv = vgen_send_version_negotiate(ldcp);
5351 		if (rv != VGEN_SUCCESS) {
5352 			return (rv);
5353 		}
5354 
5355 		break;
5356 	}
5357 
5358 	DBG1(vgenp, ldcp, "exit\n");
5359 	return (VGEN_SUCCESS);
5360 }
5361 
5362 /* Check if the attributes are supported */
5363 static int
5364 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5365 {
5366 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5367 
5368 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5369 	    (msg->ack_freq > 64) ||
5370 	    (msg->xfer_mode != lp->xfer_mode)) {
5371 		return (VGEN_FAILURE);
5372 	}
5373 
5374 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5375 		/* versions < 1.4, mtu must match */
5376 		if (msg->mtu != lp->mtu) {
5377 			return (VGEN_FAILURE);
5378 		}
5379 	} else {
5380 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5381 		if (msg->mtu < ETHERMAX) {
5382 			return (VGEN_FAILURE);
5383 		}
5384 	}
5385 
5386 	return (VGEN_SUCCESS);
5387 }
5388 
5389 /*
5390  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5391  * to an attr info msg that we sent.
5392  */
5393 static int
5394 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5395 {
5396 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5397 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5398 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5399 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5400 	int		ack = 1;
5401 	int		rv = 0;
5402 	uint32_t	mtu;
5403 
5404 	DBG1(vgenp, ldcp, "enter\n");
5405 	if (ldcp->hphase != VH_PHASE2) {
5406 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5407 		" Invalid Phase(%u)\n",
5408 		    tagp->vio_subtype, ldcp->hphase);
5409 		return (VGEN_FAILURE);
5410 	}
5411 	switch (tagp->vio_subtype) {
5412 	case VIO_SUBTYPE_INFO:
5413 
5414 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5415 		ldcp->hstate |= ATTR_INFO_RCVD;
5416 
5417 		/* save peer's values */
5418 		rp->mtu = msg->mtu;
5419 		rp->addr = msg->addr;
5420 		rp->addr_type = msg->addr_type;
5421 		rp->xfer_mode = msg->xfer_mode;
5422 		rp->ack_freq = msg->ack_freq;
5423 
5424 		rv = vgen_check_attr_info(ldcp, msg);
5425 		if (rv == VGEN_FAILURE) {
5426 			/* unsupported attr, send NACK */
5427 			ack = 0;
5428 		} else {
5429 
5430 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5431 
5432 				/*
5433 				 * Versions >= 1.4:
5434 				 * The mtu is negotiated down to the
5435 				 * minimum of our mtu and peer's mtu.
5436 				 */
5437 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5438 
5439 				/*
5440 				 * If we have received an ack for the attr info
5441 				 * that we sent, then check if the mtu computed
5442 				 * above matches the mtu that the peer had ack'd
5443 				 * (saved in local hparams). If they don't
5444 				 * match, we fail the handshake.
5445 				 */
5446 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5447 					if (mtu != lp->mtu) {
5448 						/* send NACK */
5449 						ack = 0;
5450 					}
5451 				} else {
5452 					/*
5453 					 * Save the mtu computed above in our
5454 					 * attr parameters, so it gets sent in
5455 					 * the attr info from us to the peer.
5456 					 */
5457 					lp->mtu = mtu;
5458 				}
5459 
5460 				/* save the MIN mtu in the msg to be replied */
5461 				msg->mtu = mtu;
5462 
5463 			}
5464 		}
5465 
5466 
5467 		if (ack) {
5468 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5469 		} else {
5470 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5471 		}
5472 		tagp->vio_sid = ldcp->local_sid;
5473 
5474 		/* send reply msg back to peer */
5475 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5476 		    B_FALSE);
5477 		if (rv != VGEN_SUCCESS) {
5478 			return (rv);
5479 		}
5480 
5481 		if (ack) {
5482 			ldcp->hstate |= ATTR_ACK_SENT;
5483 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5484 		} else {
5485 			/* failed */
5486 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5487 			return (VGEN_FAILURE);
5488 		}
5489 
5490 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5491 			vgen_handshake(vh_nextphase(ldcp));
5492 		}
5493 
5494 		break;
5495 
5496 	case VIO_SUBTYPE_ACK:
5497 
5498 		if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
5499 		    ldcp->portp == vgenp->vsw_portp) {
5500 			/*
5501 			 * Versions >= 1.5:
5502 			 * If the vnet device has been configured to get
5503 			 * physical link state updates, check the corresponding
5504 			 * bits in the ack msg, if the peer is vswitch.
5505 			 */
5506 			if (((lp->physlink_update &
5507 			    PHYSLINK_UPDATE_STATE_MASK) ==
5508 			    PHYSLINK_UPDATE_STATE) &&
5509 
5510 			    ((msg->physlink_update &
5511 			    PHYSLINK_UPDATE_STATE_MASK) ==
5512 			    PHYSLINK_UPDATE_STATE_ACK)) {
5513 				vgenp->pls_negotiated = B_TRUE;
5514 			} else {
5515 				vgenp->pls_negotiated = B_FALSE;
5516 			}
5517 		}
5518 
5519 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5520 			/*
5521 			 * Versions >= 1.4:
5522 			 * The ack msg sent by the peer contains the minimum of
5523 			 * our mtu (that we had sent in our attr info) and the
5524 			 * peer's mtu.
5525 			 *
5526 			 * If we have sent an ack for the attr info msg from
5527 			 * the peer, check if the mtu that was computed then
5528 			 * (saved in local hparams) matches the mtu that the
5529 			 * peer has ack'd. If they don't match, we fail the
5530 			 * handshake.
5531 			 */
5532 			if (ldcp->hstate & ATTR_ACK_SENT) {
5533 				if (lp->mtu != msg->mtu) {
5534 					return (VGEN_FAILURE);
5535 				}
5536 			} else {
5537 				/*
5538 				 * If the mtu ack'd by the peer is > our mtu
5539 				 * fail handshake. Otherwise, save the mtu, so
5540 				 * we can validate it when we receive attr info
5541 				 * from our peer.
5542 				 */
5543 				if (msg->mtu > lp->mtu) {
5544 					return (VGEN_FAILURE);
5545 				}
5546 				if (msg->mtu <= lp->mtu) {
5547 					lp->mtu = msg->mtu;
5548 				}
5549 			}
5550 		}
5551 
5552 		ldcp->hstate |= ATTR_ACK_RCVD;
5553 
5554 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5555 
5556 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5557 			vgen_handshake(vh_nextphase(ldcp));
5558 		}
5559 		break;
5560 
5561 	case VIO_SUBTYPE_NACK:
5562 
5563 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5564 		return (VGEN_FAILURE);
5565 	}
5566 	DBG1(vgenp, ldcp, "exit\n");
5567 	return (VGEN_SUCCESS);
5568 }
5569 
5570 /* Check if the dring info msg is ok */
5571 static int
5572 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5573 {
5574 	/* check if msg contents are ok */
5575 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5576 	    sizeof (vnet_public_desc_t))) {
5577 		return (VGEN_FAILURE);
5578 	}
5579 	return (VGEN_SUCCESS);
5580 }
5581 
5582 /*
5583  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5584  * the peer to a dring register msg that we sent.
5585  */
5586 static int
5587 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5588 {
5589 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5590 	ldc_mem_cookie_t dcookie;
5591 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5592 	int ack = 0;
5593 	int rv = 0;
5594 
5595 	DBG1(vgenp, ldcp, "enter\n");
5596 	if (ldcp->hphase < VH_PHASE2) {
5597 		/* dring_info can be rcvd in any of the phases after Phase1 */
5598 		DWARN(vgenp, ldcp,
5599 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5600 		    tagp->vio_subtype, ldcp->hphase);
5601 		return (VGEN_FAILURE);
5602 	}
5603 	switch (tagp->vio_subtype) {
5604 	case VIO_SUBTYPE_INFO:
5605 
5606 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5607 		ldcp->hstate |= DRING_INFO_RCVD;
5608 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5609 
5610 		ASSERT(msg->ncookies == 1);
5611 
5612 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5613 			/*
5614 			 * verified dring info msg to be ok,
5615 			 * now try to map the remote dring.
5616 			 */
5617 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5618 			    msg->descriptor_size, &dcookie,
5619 			    msg->ncookies);
5620 			if (rv == DDI_SUCCESS) {
5621 				/* now we can ack the peer */
5622 				ack = 1;
5623 			}
5624 		}
5625 		if (ack == 0) {
5626 			/* failed, send NACK */
5627 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5628 		} else {
5629 			if (!(ldcp->peer_hparams.dring_ready)) {
5630 
5631 				/* save peer's dring_info values */
5632 				bcopy(&dcookie,
5633 				    &(ldcp->peer_hparams.dring_cookie),
5634 				    sizeof (dcookie));
5635 				ldcp->peer_hparams.num_desc =
5636 				    msg->num_descriptors;
5637 				ldcp->peer_hparams.desc_size =
5638 				    msg->descriptor_size;
5639 				ldcp->peer_hparams.num_dcookies =
5640 				    msg->ncookies;
5641 
5642 				/* set dring_ident for the peer */
5643 				ldcp->peer_hparams.dring_ident =
5644 				    (uint64_t)ldcp->rxdp;
5645 				/* return the dring_ident in ack msg */
5646 				msg->dring_ident =
5647 				    (uint64_t)ldcp->rxdp;
5648 
5649 				ldcp->peer_hparams.dring_ready = B_TRUE;
5650 			}
5651 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5652 		}
5653 		tagp->vio_sid = ldcp->local_sid;
5654 		/* send reply msg back to peer */
5655 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5656 		    B_FALSE);
5657 		if (rv != VGEN_SUCCESS) {
5658 			return (rv);
5659 		}
5660 
5661 		if (ack) {
5662 			ldcp->hstate |= DRING_ACK_SENT;
5663 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5664 		} else {
5665 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5666 			return (VGEN_FAILURE);
5667 		}
5668 
5669 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5670 			vgen_handshake(vh_nextphase(ldcp));
5671 		}
5672 
5673 		break;
5674 
5675 	case VIO_SUBTYPE_ACK:
5676 
5677 		ldcp->hstate |= DRING_ACK_RCVD;
5678 
5679 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5680 
5681 		if (!(ldcp->local_hparams.dring_ready)) {
5682 			/* local dring is now ready */
5683 			ldcp->local_hparams.dring_ready = B_TRUE;
5684 
5685 			/* save dring_ident acked by peer */
5686 			ldcp->local_hparams.dring_ident =
5687 			    msg->dring_ident;
5688 		}
5689 
5690 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5691 			vgen_handshake(vh_nextphase(ldcp));
5692 		}
5693 
5694 		break;
5695 
5696 	case VIO_SUBTYPE_NACK:
5697 
5698 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5699 		return (VGEN_FAILURE);
5700 	}
5701 	DBG1(vgenp, ldcp, "exit\n");
5702 	return (VGEN_SUCCESS);
5703 }
5704 
5705 /*
5706  * Handle a rdx info msg from the peer or an ACK/NACK
5707  * from the peer to a rdx info msg that we sent.
5708  */
5709 static int
5710 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5711 {
5712 	int rv = 0;
5713 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5714 
5715 	DBG1(vgenp, ldcp, "enter\n");
5716 	if (ldcp->hphase != VH_PHASE3) {
5717 		DWARN(vgenp, ldcp,
5718 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5719 		    tagp->vio_subtype, ldcp->hphase);
5720 		return (VGEN_FAILURE);
5721 	}
5722 	switch (tagp->vio_subtype) {
5723 	case VIO_SUBTYPE_INFO:
5724 
5725 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5726 		ldcp->hstate |= RDX_INFO_RCVD;
5727 
5728 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5729 		tagp->vio_sid = ldcp->local_sid;
5730 		/* send reply msg back to peer */
5731 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5732 		    B_FALSE);
5733 		if (rv != VGEN_SUCCESS) {
5734 			return (rv);
5735 		}
5736 
5737 		ldcp->hstate |= RDX_ACK_SENT;
5738 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5739 
5740 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5741 			vgen_handshake(vh_nextphase(ldcp));
5742 		}
5743 
5744 		break;
5745 
5746 	case VIO_SUBTYPE_ACK:
5747 
5748 		ldcp->hstate |= RDX_ACK_RCVD;
5749 
5750 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5751 
5752 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5753 			vgen_handshake(vh_nextphase(ldcp));
5754 		}
5755 		break;
5756 
5757 	case VIO_SUBTYPE_NACK:
5758 
5759 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5760 		return (VGEN_FAILURE);
5761 	}
5762 	DBG1(vgenp, ldcp, "exit\n");
5763 	return (VGEN_SUCCESS);
5764 }
5765 
5766 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5767 static int
5768 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5769 {
5770 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5771 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5772 	struct ether_addr *addrp;
5773 	int count;
5774 	int i;
5775 
5776 	DBG1(vgenp, ldcp, "enter\n");
5777 	switch (tagp->vio_subtype) {
5778 
5779 	case VIO_SUBTYPE_INFO:
5780 
5781 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5782 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5783 		break;
5784 
5785 	case VIO_SUBTYPE_ACK:
5786 
5787 		/* success adding/removing multicast addr */
5788 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5789 		break;
5790 
5791 	case VIO_SUBTYPE_NACK:
5792 
5793 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5794 		if (!(msgp->set)) {
5795 			/* multicast remove request failed */
5796 			break;
5797 		}
5798 
5799 		/* multicast add request failed */
5800 		for (count = 0; count < msgp->count; count++) {
5801 			addrp = &(msgp->mca[count]);
5802 
5803 			/* delete address from the table */
5804 			for (i = 0; i < vgenp->mccount; i++) {
5805 				if (ether_cmp(addrp,
5806 				    &(vgenp->mctab[i])) == 0) {
5807 					if (vgenp->mccount > 1) {
5808 						int t = vgenp->mccount - 1;
5809 						vgenp->mctab[i] =
5810 						    vgenp->mctab[t];
5811 					}
5812 					vgenp->mccount--;
5813 					break;
5814 				}
5815 			}
5816 		}
5817 		break;
5818 
5819 	}
5820 	DBG1(vgenp, ldcp, "exit\n");
5821 
5822 	return (VGEN_SUCCESS);
5823 }
5824 
5825 /*
5826  * Physical link information message from the peer. Only vswitch should send
5827  * us this message; if the vnet device has been configured to get physical link
5828  * state updates. Note that we must have already negotiated this with the
5829  * vswitch during attribute exchange phase of handshake.
5830  */
5831 static int
5832 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5833 {
5834 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5835 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5836 	link_state_t		link_state;
5837 	int			rv;
5838 
5839 	if (ldcp->portp != vgenp->vsw_portp) {
5840 		/*
5841 		 * drop the message and don't process; as we should
5842 		 * receive physlink_info message from only vswitch.
5843 		 */
5844 		return (VGEN_SUCCESS);
5845 	}
5846 
5847 	if (vgenp->pls_negotiated == B_FALSE) {
5848 		/*
5849 		 * drop the message and don't process; as we should receive
5850 		 * physlink_info message only if physlink update is enabled for
5851 		 * the device and negotiated with vswitch.
5852 		 */
5853 		return (VGEN_SUCCESS);
5854 	}
5855 
5856 	switch (tagp->vio_subtype) {
5857 
5858 	case VIO_SUBTYPE_INFO:
5859 
5860 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5861 		    VNET_PHYSLINK_STATE_UP) {
5862 			link_state = LINK_STATE_UP;
5863 		} else {
5864 			link_state = LINK_STATE_DOWN;
5865 		}
5866 
5867 		if (vgenp->phys_link_state != link_state) {
5868 			vgenp->phys_link_state = link_state;
5869 			mutex_exit(&ldcp->cblock);
5870 
5871 			/* Now update the stack */
5872 			vgen_link_update(vgenp, link_state);
5873 
5874 			mutex_enter(&ldcp->cblock);
5875 		}
5876 
5877 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5878 		tagp->vio_sid = ldcp->local_sid;
5879 
5880 		/* send reply msg back to peer */
5881 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5882 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5883 		if (rv != VGEN_SUCCESS) {
5884 			return (rv);
5885 		}
5886 		break;
5887 
5888 	case VIO_SUBTYPE_ACK:
5889 
5890 		/* vnet shouldn't recv physlink acks */
5891 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5892 		break;
5893 
5894 	case VIO_SUBTYPE_NACK:
5895 
5896 		/* vnet shouldn't recv physlink nacks */
5897 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5898 		break;
5899 
5900 	}
5901 	DBG1(vgenp, ldcp, "exit\n");
5902 
5903 	return (VGEN_SUCCESS);
5904 }
5905 
5906 /* handler for control messages received from the peer ldc end-point */
5907 static int
5908 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5909 {
5910 	int rv = 0;
5911 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5912 
5913 	DBG1(vgenp, ldcp, "enter\n");
5914 	switch (tagp->vio_subtype_env) {
5915 
5916 	case VIO_VER_INFO:
5917 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5918 		break;
5919 
5920 	case VIO_ATTR_INFO:
5921 		rv = vgen_handle_attr_info(ldcp, tagp);
5922 		break;
5923 
5924 	case VIO_DRING_REG:
5925 		rv = vgen_handle_dring_reg(ldcp, tagp);
5926 		break;
5927 
5928 	case VIO_RDX:
5929 		rv = vgen_handle_rdx_info(ldcp, tagp);
5930 		break;
5931 
5932 	case VNET_MCAST_INFO:
5933 		rv = vgen_handle_mcast_info(ldcp, tagp);
5934 		break;
5935 
5936 	case VIO_DDS_INFO:
5937 		/*
5938 		 * If we are in the process of resetting the vswitch channel,
5939 		 * drop the dds message. A new handshake will be initiated
5940 		 * when the channel comes back up after the reset and dds
5941 		 * negotiation can then continue.
5942 		 */
5943 		if (ldcp->need_ldc_reset == B_TRUE) {
5944 			break;
5945 		}
5946 		rv = vgen_dds_rx(ldcp, tagp);
5947 		break;
5948 
5949 	case VNET_PHYSLINK_INFO:
5950 		rv = vgen_handle_physlink_info(ldcp, tagp);
5951 		break;
5952 	}
5953 
5954 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5955 	return (rv);
5956 }
5957 
5958 /* handler for data messages received from the peer ldc end-point */
5959 static int
5960 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5961 {
5962 	int rv = 0;
5963 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5964 
5965 	DBG1(vgenp, ldcp, "enter\n");
5966 
5967 	if (ldcp->hphase != VH_DONE)
5968 		return (rv);
5969 
5970 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5971 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5972 		if (rv != 0) {
5973 			return (rv);
5974 		}
5975 	}
5976 
5977 	switch (tagp->vio_subtype_env) {
5978 	case VIO_DRING_DATA:
5979 		rv = vgen_handle_dring_data(ldcp, tagp);
5980 		break;
5981 
5982 	case VIO_PKT_DATA:
5983 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5984 		break;
5985 	default:
5986 		break;
5987 	}
5988 
5989 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5990 	return (rv);
5991 }
5992 
5993 /*
5994  * dummy pkt data handler function for vnet protocol version 1.0
5995  */
5996 static void
5997 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5998 {
5999 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
6000 }
6001 
6002 /*
6003  * This function handles raw pkt data messages received over the channel.
6004  * Currently, only priority-eth-type frames are received through this mechanism.
6005  * In this case, the frame(data) is present within the message itself which
6006  * is copied into an mblk before sending it up the stack.
6007  */
6008 static void
6009 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
6010 {
6011 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
6012 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
6013 	uint32_t		size;
6014 	mblk_t			*mp;
6015 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6016 	vgen_stats_t		*statsp = &ldcp->stats;
6017 	vgen_hparams_t		*lp = &ldcp->local_hparams;
6018 	vio_net_rx_cb_t		vrx_cb;
6019 
6020 	ASSERT(MUTEX_HELD(&ldcp->cblock));
6021 
6022 	mutex_exit(&ldcp->cblock);
6023 
6024 	size = msglen - VIO_PKT_DATA_HDRSIZE;
6025 	if (size < ETHERMIN || size > lp->mtu) {
6026 		(void) atomic_inc_32(&statsp->rx_pri_fail);
6027 		goto exit;
6028 	}
6029 
6030 	mp = vio_multipool_allocb(&ldcp->vmp, size);
6031 	if (mp == NULL) {
6032 		mp = allocb(size, BPRI_MED);
6033 		if (mp == NULL) {
6034 			(void) atomic_inc_32(&statsp->rx_pri_fail);
6035 			DWARN(vgenp, ldcp, "allocb failure, "
6036 			    "unable to process priority frame\n");
6037 			goto exit;
6038 		}
6039 	}
6040 
6041 	/* copy the frame from the payload of raw data msg into the mblk */
6042 	bcopy(pkt->data, mp->b_rptr, size);
6043 	mp->b_wptr = mp->b_rptr + size;
6044 
6045 	/* update stats */
6046 	(void) atomic_inc_64(&statsp->rx_pri_packets);
6047 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
6048 
6049 	/* send up; call vrx_cb() as cblock is already released */
6050 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6051 	vrx_cb(ldcp->portp->vhp, mp);
6052 
6053 exit:
6054 	mutex_enter(&ldcp->cblock);
6055 }
6056 
6057 static int
6058 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
6059     int32_t end, uint8_t pstate)
6060 {
6061 	int rv = 0;
6062 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6063 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
6064 
6065 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
6066 	tagp->vio_sid = ldcp->local_sid;
6067 	msgp->start_idx = start;
6068 	msgp->end_idx = end;
6069 	msgp->dring_process_state = pstate;
6070 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
6071 	if (rv != VGEN_SUCCESS) {
6072 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
6073 	}
6074 	return (rv);
6075 }
6076 
6077 static int
6078 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6079 {
6080 	int rv = 0;
6081 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6082 
6083 
6084 	DBG1(vgenp, ldcp, "enter\n");
6085 	switch (tagp->vio_subtype) {
6086 
6087 	case VIO_SUBTYPE_INFO:
6088 		/*
6089 		 * To reduce the locking contention, release the
6090 		 * cblock here and re-acquire it once we are done
6091 		 * receiving packets.
6092 		 */
6093 		mutex_exit(&ldcp->cblock);
6094 		mutex_enter(&ldcp->rxlock);
6095 		rv = vgen_handle_dring_data_info(ldcp, tagp);
6096 		mutex_exit(&ldcp->rxlock);
6097 		mutex_enter(&ldcp->cblock);
6098 		break;
6099 
6100 	case VIO_SUBTYPE_ACK:
6101 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
6102 		break;
6103 
6104 	case VIO_SUBTYPE_NACK:
6105 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
6106 		break;
6107 	}
6108 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6109 	return (rv);
6110 }
6111 
6112 static int
6113 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6114 {
6115 	uint32_t start;
6116 	int32_t end;
6117 	int rv = 0;
6118 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6119 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6120 #ifdef VGEN_HANDLE_LOST_PKTS
6121 	vgen_stats_t *statsp = &ldcp->stats;
6122 	uint32_t rxi;
6123 	int n;
6124 #endif
6125 
6126 	DBG1(vgenp, ldcp, "enter\n");
6127 
6128 	start = dringmsg->start_idx;
6129 	end = dringmsg->end_idx;
6130 	/*
6131 	 * received a data msg, which contains the start and end
6132 	 * indices of the descriptors within the rx ring holding data,
6133 	 * the seq_num of data packet corresponding to the start index,
6134 	 * and the dring_ident.
6135 	 * We can now read the contents of each of these descriptors
6136 	 * and gather data from it.
6137 	 */
6138 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
6139 	    start, end);
6140 
6141 	/* validate rx start and end indeces */
6142 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
6143 	    !(CHECK_RXI(end, ldcp)))) {
6144 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
6145 		    start, end);
6146 		/* drop the message if invalid index */
6147 		return (rv);
6148 	}
6149 
6150 	/* validate dring_ident */
6151 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
6152 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6153 		    dringmsg->dring_ident);
6154 		/* invalid dring_ident, drop the msg */
6155 		return (rv);
6156 	}
6157 #ifdef DEBUG
6158 	if (vgen_trigger_rxlost) {
6159 		/* drop this msg to simulate lost pkts for debugging */
6160 		vgen_trigger_rxlost = 0;
6161 		return (rv);
6162 	}
6163 #endif
6164 
6165 #ifdef	VGEN_HANDLE_LOST_PKTS
6166 
6167 	/* receive start index doesn't match expected index */
6168 	if (ldcp->next_rxi != start) {
6169 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
6170 		    ldcp->next_rxi, start);
6171 
6172 		/* calculate the number of pkts lost */
6173 		if (start >= ldcp->next_rxi) {
6174 			n = start - ldcp->next_rxi;
6175 		} else  {
6176 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
6177 		}
6178 
6179 		statsp->rx_lost_pkts += n;
6180 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
6181 		tagp->vio_sid = ldcp->local_sid;
6182 		/* indicate the range of lost descriptors */
6183 		dringmsg->start_idx = ldcp->next_rxi;
6184 		rxi = start;
6185 		DECR_RXI(rxi, ldcp);
6186 		dringmsg->end_idx = rxi;
6187 		/* dring ident is left unchanged */
6188 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
6189 		    sizeof (*dringmsg), B_FALSE);
6190 		if (rv != VGEN_SUCCESS) {
6191 			DWARN(vgenp, ldcp,
6192 			    "vgen_sendmsg failed, stype:NACK\n");
6193 			return (rv);
6194 		}
6195 		/*
6196 		 * treat this range of descrs/pkts as dropped
6197 		 * and set the new expected value of next_rxi
6198 		 * and continue(below) to process from the new
6199 		 * start index.
6200 		 */
6201 		ldcp->next_rxi = start;
6202 	}
6203 
6204 #endif	/* VGEN_HANDLE_LOST_PKTS */
6205 
6206 	/* Now receive messages */
6207 	rv = vgen_process_dring_data(ldcp, tagp);
6208 
6209 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6210 	return (rv);
6211 }
6212 
6213 static int
6214 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6215 {
6216 	boolean_t set_ack_start = B_FALSE;
6217 	uint32_t start;
6218 	uint32_t ack_end;
6219 	uint32_t next_rxi;
6220 	uint32_t rxi;
6221 	int count = 0;
6222 	int rv = 0;
6223 	uint32_t retries = 0;
6224 	vgen_stats_t *statsp;
6225 	vnet_public_desc_t rxd;
6226 	vio_dring_entry_hdr_t *hdrp;
6227 	mblk_t *bp = NULL;
6228 	mblk_t *bpt = NULL;
6229 	uint32_t ack_start;
6230 	boolean_t rxd_err = B_FALSE;
6231 	mblk_t *mp = NULL;
6232 	size_t nbytes;
6233 	boolean_t ack_needed = B_FALSE;
6234 	size_t nread;
6235 	uint64_t off = 0;
6236 	struct ether_header *ehp;
6237 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6238 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6239 	vgen_hparams_t	*lp = &ldcp->local_hparams;
6240 
6241 	DBG1(vgenp, ldcp, "enter\n");
6242 
6243 	statsp = &ldcp->stats;
6244 	start = dringmsg->start_idx;
6245 
6246 	/*
6247 	 * start processing the descriptors from the specified
6248 	 * start index, up to the index a descriptor is not ready
6249 	 * to be processed or we process the entire descriptor ring
6250 	 * and wrap around upto the start index.
6251 	 */
6252 
6253 	/* need to set the start index of descriptors to be ack'd */
6254 	set_ack_start = B_TRUE;
6255 
6256 	/* index upto which we have ack'd */
6257 	ack_end = start;
6258 	DECR_RXI(ack_end, ldcp);
6259 
6260 	next_rxi = rxi =  start;
6261 	do {
6262 vgen_recv_retry:
6263 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
6264 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
6265 		if (rv != 0) {
6266 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
6267 			    " rv(%d)\n", rv);
6268 			statsp->ierrors++;
6269 			return (rv);
6270 		}
6271 
6272 		hdrp = &rxd.hdr;
6273 
6274 		if (hdrp->dstate != VIO_DESC_READY) {
6275 			/*
6276 			 * Before waiting and retry here, send up
6277 			 * the packets that are received already
6278 			 */
6279 			if (bp != NULL) {
6280 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6281 				vgen_rx(ldcp, bp);
6282 				count = 0;
6283 				bp = bpt = NULL;
6284 			}
6285 			/*
6286 			 * descriptor is not ready.
6287 			 * retry descriptor acquire, stop processing
6288 			 * after max # retries.
6289 			 */
6290 			if (retries == vgen_recv_retries)
6291 				break;
6292 			retries++;
6293 			drv_usecwait(vgen_recv_delay);
6294 			goto vgen_recv_retry;
6295 		}
6296 		retries = 0;
6297 
6298 		if (set_ack_start) {
6299 			/*
6300 			 * initialize the start index of the range
6301 			 * of descriptors to be ack'd.
6302 			 */
6303 			ack_start = rxi;
6304 			set_ack_start = B_FALSE;
6305 		}
6306 
6307 		if ((rxd.nbytes < ETHERMIN) ||
6308 		    (rxd.nbytes > lp->mtu) ||
6309 		    (rxd.ncookies == 0) ||
6310 		    (rxd.ncookies > MAX_COOKIES)) {
6311 			rxd_err = B_TRUE;
6312 		} else {
6313 			/*
6314 			 * Try to allocate an mblk from the free pool
6315 			 * of recv mblks for the channel.
6316 			 * If this fails, use allocb().
6317 			 */
6318 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
6319 			if (nbytes > ldcp->max_rxpool_size) {
6320 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
6321 				    BPRI_MED);
6322 			} else {
6323 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
6324 				if (mp == NULL) {
6325 					statsp->rx_vio_allocb_fail++;
6326 					/*
6327 					 * Data buffer returned by allocb(9F)
6328 					 * is 8byte aligned. We allocate extra
6329 					 * 8 bytes to ensure size is multiple
6330 					 * of 8 bytes for ldc_mem_copy().
6331 					 */
6332 					mp = allocb(VNET_IPALIGN +
6333 					    rxd.nbytes + 8, BPRI_MED);
6334 				}
6335 			}
6336 		}
6337 		if ((rxd_err) || (mp == NULL)) {
6338 			/*
6339 			 * rxd_err or allocb() failure,
6340 			 * drop this packet, get next.
6341 			 */
6342 			if (rxd_err) {
6343 				statsp->ierrors++;
6344 				rxd_err = B_FALSE;
6345 			} else {
6346 				statsp->rx_allocb_fail++;
6347 			}
6348 
6349 			ack_needed = hdrp->ack;
6350 
6351 			/* set descriptor done bit */
6352 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6353 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6354 			    VIO_DESC_DONE);
6355 			if (rv != 0) {
6356 				DWARN(vgenp, ldcp,
6357 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
6358 				    rv);
6359 				return (rv);
6360 			}
6361 
6362 			if (ack_needed) {
6363 				ack_needed = B_FALSE;
6364 				/*
6365 				 * sender needs ack for this packet,
6366 				 * ack pkts upto this index.
6367 				 */
6368 				ack_end = rxi;
6369 
6370 				rv = vgen_send_dring_ack(ldcp, tagp,
6371 				    ack_start, ack_end,
6372 				    VIO_DP_ACTIVE);
6373 				if (rv != VGEN_SUCCESS) {
6374 					goto error_ret;
6375 				}
6376 
6377 				/* need to set new ack start index */
6378 				set_ack_start = B_TRUE;
6379 			}
6380 			goto vgen_next_rxi;
6381 		}
6382 
6383 		nread = nbytes;
6384 		rv = ldc_mem_copy(ldcp->ldc_handle,
6385 		    (caddr_t)mp->b_rptr, off, &nread,
6386 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
6387 
6388 		/* if ldc_mem_copy() failed */
6389 		if (rv) {
6390 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
6391 			statsp->ierrors++;
6392 			freemsg(mp);
6393 			goto error_ret;
6394 		}
6395 
6396 		ack_needed = hdrp->ack;
6397 
6398 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6399 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6400 		    VIO_DESC_DONE);
6401 		if (rv != 0) {
6402 			DWARN(vgenp, ldcp,
6403 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
6404 			goto error_ret;
6405 		}
6406 
6407 		mp->b_rptr += VNET_IPALIGN;
6408 
6409 		if (ack_needed) {
6410 			ack_needed = B_FALSE;
6411 			/*
6412 			 * sender needs ack for this packet,
6413 			 * ack pkts upto this index.
6414 			 */
6415 			ack_end = rxi;
6416 
6417 			rv = vgen_send_dring_ack(ldcp, tagp,
6418 			    ack_start, ack_end, VIO_DP_ACTIVE);
6419 			if (rv != VGEN_SUCCESS) {
6420 				goto error_ret;
6421 			}
6422 
6423 			/* need to set new ack start index */
6424 			set_ack_start = B_TRUE;
6425 		}
6426 
6427 		if (nread != nbytes) {
6428 			DWARN(vgenp, ldcp,
6429 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6430 			    nread, nbytes);
6431 			statsp->ierrors++;
6432 			freemsg(mp);
6433 			goto vgen_next_rxi;
6434 		}
6435 
6436 		/* point to the actual end of data */
6437 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6438 
6439 		/* update stats */
6440 		statsp->ipackets++;
6441 		statsp->rbytes += rxd.nbytes;
6442 		ehp = (struct ether_header *)mp->b_rptr;
6443 		if (IS_BROADCAST(ehp))
6444 			statsp->brdcstrcv++;
6445 		else if (IS_MULTICAST(ehp))
6446 			statsp->multircv++;
6447 
6448 		/* build a chain of received packets */
6449 		if (bp == NULL) {
6450 			/* first pkt */
6451 			bp = mp;
6452 			bpt = bp;
6453 			bpt->b_next = NULL;
6454 		} else {
6455 			mp->b_next = NULL;
6456 			bpt->b_next = mp;
6457 			bpt = mp;
6458 		}
6459 
6460 		if (count++ > vgen_chain_len) {
6461 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6462 			vgen_rx(ldcp, bp);
6463 			count = 0;
6464 			bp = bpt = NULL;
6465 		}
6466 
6467 vgen_next_rxi:
6468 		/* update end index of range of descrs to be ack'd */
6469 		ack_end = rxi;
6470 
6471 		/* update the next index to be processed */
6472 		INCR_RXI(next_rxi, ldcp);
6473 		if (next_rxi == start) {
6474 			/*
6475 			 * processed the entire descriptor ring upto
6476 			 * the index at which we started.
6477 			 */
6478 			break;
6479 		}
6480 
6481 		rxi = next_rxi;
6482 
6483 	_NOTE(CONSTCOND)
6484 	} while (1);
6485 
6486 	/*
6487 	 * send an ack message to peer indicating that we have stopped
6488 	 * processing descriptors.
6489 	 */
6490 	if (set_ack_start) {
6491 		/*
6492 		 * We have ack'd upto some index and we have not
6493 		 * processed any descriptors beyond that index.
6494 		 * Use the last ack'd index as both the start and
6495 		 * end of range of descrs being ack'd.
6496 		 * Note: This results in acking the last index twice
6497 		 * and should be harmless.
6498 		 */
6499 		ack_start = ack_end;
6500 	}
6501 
6502 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6503 	    VIO_DP_STOPPED);
6504 	if (rv != VGEN_SUCCESS) {
6505 		goto error_ret;
6506 	}
6507 
6508 	/* save new recv index of next dring msg */
6509 	ldcp->next_rxi = next_rxi;
6510 
6511 error_ret:
6512 	/* send up packets received so far */
6513 	if (bp != NULL) {
6514 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6515 		vgen_rx(ldcp, bp);
6516 		bp = bpt = NULL;
6517 	}
6518 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6519 	return (rv);
6520 
6521 }
6522 
6523 static int
6524 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6525 {
6526 	int rv = 0;
6527 	uint32_t start;
6528 	int32_t end;
6529 	uint32_t txi;
6530 	boolean_t ready_txd = B_FALSE;
6531 	vgen_stats_t *statsp;
6532 	vgen_private_desc_t *tbufp;
6533 	vnet_public_desc_t *txdp;
6534 	vio_dring_entry_hdr_t *hdrp;
6535 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6536 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6537 
6538 	DBG1(vgenp, ldcp, "enter\n");
6539 	start = dringmsg->start_idx;
6540 	end = dringmsg->end_idx;
6541 	statsp = &ldcp->stats;
6542 
6543 	/*
6544 	 * received an ack corresponding to a specific descriptor for
6545 	 * which we had set the ACK bit in the descriptor (during
6546 	 * transmit). This enables us to reclaim descriptors.
6547 	 */
6548 
6549 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6550 
6551 	/* validate start and end indeces in the tx ack msg */
6552 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6553 		/* drop the message if invalid index */
6554 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6555 		    start, end);
6556 		return (rv);
6557 	}
6558 	/* validate dring_ident */
6559 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6560 		/* invalid dring_ident, drop the msg */
6561 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6562 		    dringmsg->dring_ident);
6563 		return (rv);
6564 	}
6565 	statsp->dring_data_acks++;
6566 
6567 	/* reclaim descriptors that are done */
6568 	vgen_reclaim(ldcp);
6569 
6570 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6571 		/*
6572 		 * receiver continued processing descriptors after
6573 		 * sending us the ack.
6574 		 */
6575 		return (rv);
6576 	}
6577 
6578 	statsp->dring_stopped_acks++;
6579 
6580 	/* receiver stopped processing descriptors */
6581 	mutex_enter(&ldcp->wrlock);
6582 	mutex_enter(&ldcp->tclock);
6583 
6584 	/*
6585 	 * determine if there are any pending tx descriptors
6586 	 * ready to be processed by the receiver(peer) and if so,
6587 	 * send a message to the peer to restart receiving.
6588 	 */
6589 	ready_txd = B_FALSE;
6590 
6591 	/*
6592 	 * using the end index of the descriptor range for which
6593 	 * we received the ack, check if the next descriptor is
6594 	 * ready.
6595 	 */
6596 	txi = end;
6597 	INCR_TXI(txi, ldcp);
6598 	tbufp = &ldcp->tbufp[txi];
6599 	txdp = tbufp->descp;
6600 	hdrp = &txdp->hdr;
6601 	if (hdrp->dstate == VIO_DESC_READY) {
6602 		ready_txd = B_TRUE;
6603 	} else {
6604 		/*
6605 		 * descr next to the end of ack'd descr range is not
6606 		 * ready.
6607 		 * starting from the current reclaim index, check
6608 		 * if any descriptor is ready.
6609 		 */
6610 
6611 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6612 		tbufp = &ldcp->tbufp[txi];
6613 
6614 		txdp = tbufp->descp;
6615 		hdrp = &txdp->hdr;
6616 		if (hdrp->dstate == VIO_DESC_READY) {
6617 			ready_txd = B_TRUE;
6618 		}
6619 
6620 	}
6621 
6622 	if (ready_txd) {
6623 		/*
6624 		 * we have tx descriptor(s) ready to be
6625 		 * processed by the receiver.
6626 		 * send a message to the peer with the start index
6627 		 * of ready descriptors.
6628 		 */
6629 		rv = vgen_send_dring_data(ldcp, txi, -1);
6630 		if (rv != VGEN_SUCCESS) {
6631 			ldcp->resched_peer = B_TRUE;
6632 			ldcp->resched_peer_txi = txi;
6633 			mutex_exit(&ldcp->tclock);
6634 			mutex_exit(&ldcp->wrlock);
6635 			return (rv);
6636 		}
6637 	} else {
6638 		/*
6639 		 * no ready tx descriptors. set the flag to send a
6640 		 * message to peer when tx descriptors are ready in
6641 		 * transmit routine.
6642 		 */
6643 		ldcp->resched_peer = B_TRUE;
6644 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6645 	}
6646 
6647 	mutex_exit(&ldcp->tclock);
6648 	mutex_exit(&ldcp->wrlock);
6649 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6650 	return (rv);
6651 }
6652 
6653 static int
6654 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6655 {
6656 	int rv = 0;
6657 	uint32_t start;
6658 	int32_t end;
6659 	uint32_t txi;
6660 	vnet_public_desc_t *txdp;
6661 	vio_dring_entry_hdr_t *hdrp;
6662 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6663 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6664 
6665 	DBG1(vgenp, ldcp, "enter\n");
6666 	start = dringmsg->start_idx;
6667 	end = dringmsg->end_idx;
6668 
6669 	/*
6670 	 * peer sent a NACK msg to indicate lost packets.
6671 	 * The start and end correspond to the range of descriptors
6672 	 * for which the peer didn't receive a dring data msg and so
6673 	 * didn't receive the corresponding data.
6674 	 */
6675 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6676 
6677 	/* validate start and end indeces in the tx nack msg */
6678 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6679 		/* drop the message if invalid index */
6680 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6681 		    start, end);
6682 		return (rv);
6683 	}
6684 	/* validate dring_ident */
6685 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6686 		/* invalid dring_ident, drop the msg */
6687 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6688 		    dringmsg->dring_ident);
6689 		return (rv);
6690 	}
6691 	mutex_enter(&ldcp->txlock);
6692 	mutex_enter(&ldcp->tclock);
6693 
6694 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6695 		/* no busy descriptors, bogus nack ? */
6696 		mutex_exit(&ldcp->tclock);
6697 		mutex_exit(&ldcp->txlock);
6698 		return (rv);
6699 	}
6700 
6701 	/* we just mark the descrs as done so they can be reclaimed */
6702 	for (txi = start; txi <= end; ) {
6703 		txdp = &(ldcp->txdp[txi]);
6704 		hdrp = &txdp->hdr;
6705 		if (hdrp->dstate == VIO_DESC_READY)
6706 			hdrp->dstate = VIO_DESC_DONE;
6707 		INCR_TXI(txi, ldcp);
6708 	}
6709 	mutex_exit(&ldcp->tclock);
6710 	mutex_exit(&ldcp->txlock);
6711 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6712 	return (rv);
6713 }
6714 
6715 static void
6716 vgen_reclaim(vgen_ldc_t *ldcp)
6717 {
6718 	mutex_enter(&ldcp->tclock);
6719 
6720 	vgen_reclaim_dring(ldcp);
6721 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6722 
6723 	mutex_exit(&ldcp->tclock);
6724 }
6725 
6726 /*
6727  * transmit reclaim function. starting from the current reclaim index
6728  * look for descriptors marked DONE and reclaim the descriptor and the
6729  * corresponding buffers (tbuf).
6730  */
6731 static void
6732 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6733 {
6734 	int count = 0;
6735 	vnet_public_desc_t *txdp;
6736 	vgen_private_desc_t *tbufp;
6737 	vio_dring_entry_hdr_t	*hdrp;
6738 
6739 #ifdef DEBUG
6740 	if (vgen_trigger_txtimeout)
6741 		return;
6742 #endif
6743 
6744 	tbufp = ldcp->cur_tbufp;
6745 	txdp = tbufp->descp;
6746 	hdrp = &txdp->hdr;
6747 
6748 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6749 	    (tbufp != ldcp->next_tbufp)) {
6750 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6751 		hdrp->dstate = VIO_DESC_FREE;
6752 		hdrp->ack = B_FALSE;
6753 
6754 		tbufp = NEXTTBUF(ldcp, tbufp);
6755 		txdp = tbufp->descp;
6756 		hdrp = &txdp->hdr;
6757 		count++;
6758 	}
6759 
6760 	ldcp->cur_tbufp = tbufp;
6761 
6762 	/*
6763 	 * Check if mac layer should be notified to restart transmissions
6764 	 */
6765 	if ((ldcp->need_resched) && (count > 0)) {
6766 		vio_net_tx_update_t vtx_update =
6767 		    ldcp->portp->vcb.vio_net_tx_update;
6768 
6769 		ldcp->need_resched = B_FALSE;
6770 		vtx_update(ldcp->portp->vhp);
6771 	}
6772 }
6773 
6774 /* return the number of pending transmits for the channel */
6775 static int
6776 vgen_num_txpending(vgen_ldc_t *ldcp)
6777 {
6778 	int n;
6779 
6780 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6781 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6782 	} else  {
6783 		/* cur_tbufp > next_tbufp */
6784 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6785 	}
6786 
6787 	return (n);
6788 }
6789 
6790 /* determine if the transmit descriptor ring is full */
6791 static int
6792 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6793 {
6794 	vgen_private_desc_t	*tbufp;
6795 	vgen_private_desc_t	*ntbufp;
6796 
6797 	tbufp = ldcp->next_tbufp;
6798 	ntbufp = NEXTTBUF(ldcp, tbufp);
6799 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6800 		return (VGEN_SUCCESS);
6801 	}
6802 	return (VGEN_FAILURE);
6803 }
6804 
6805 /* determine if timeout condition has occured */
6806 static int
6807 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6808 {
6809 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6810 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6811 	    (vnet_ldcwd_txtimeout) &&
6812 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6813 		return (VGEN_SUCCESS);
6814 	} else {
6815 		return (VGEN_FAILURE);
6816 	}
6817 }
6818 
6819 /* transmit watchdog timeout handler */
6820 static void
6821 vgen_ldc_watchdog(void *arg)
6822 {
6823 	vgen_ldc_t *ldcp;
6824 	vgen_t *vgenp;
6825 	int rv;
6826 
6827 	ldcp = (vgen_ldc_t *)arg;
6828 	vgenp = LDC_TO_VGEN(ldcp);
6829 
6830 	rv = vgen_ldc_txtimeout(ldcp);
6831 	if (rv == VGEN_SUCCESS) {
6832 		DWARN(vgenp, ldcp, "transmit timeout\n");
6833 #ifdef DEBUG
6834 		if (vgen_trigger_txtimeout) {
6835 			/* tx timeout triggered for debugging */
6836 			vgen_trigger_txtimeout = 0;
6837 		}
6838 #endif
6839 		mutex_enter(&ldcp->cblock);
6840 		vgen_ldc_reset(ldcp);
6841 		mutex_exit(&ldcp->cblock);
6842 		if (ldcp->need_resched) {
6843 			vio_net_tx_update_t vtx_update =
6844 			    ldcp->portp->vcb.vio_net_tx_update;
6845 
6846 			ldcp->need_resched = B_FALSE;
6847 			vtx_update(ldcp->portp->vhp);
6848 		}
6849 	}
6850 
6851 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6852 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6853 }
6854 
6855 /* handler for error messages received from the peer ldc end-point */
6856 static void
6857 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6858 {
6859 	_NOTE(ARGUNUSED(ldcp, tagp))
6860 }
6861 
6862 static int
6863 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6864 {
6865 	vio_raw_data_msg_t	*rmsg;
6866 	vio_dring_msg_t		*dmsg;
6867 	uint64_t		seq_num;
6868 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6869 
6870 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6871 		dmsg = (vio_dring_msg_t *)tagp;
6872 		seq_num = dmsg->seq_num;
6873 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6874 		rmsg = (vio_raw_data_msg_t *)tagp;
6875 		seq_num = rmsg->seq_num;
6876 	} else {
6877 		return (EINVAL);
6878 	}
6879 
6880 	if (seq_num != ldcp->next_rxseq) {
6881 
6882 		/* seqnums don't match */
6883 		DWARN(vgenp, ldcp,
6884 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6885 		    ldcp->next_rxseq, seq_num);
6886 
6887 		return (EINVAL);
6888 
6889 	}
6890 
6891 	ldcp->next_rxseq++;
6892 
6893 	return (0);
6894 }
6895 
6896 /* Check if the session id in the received message is valid */
6897 static int
6898 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6899 {
6900 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6901 
6902 	if (tagp->vio_sid != ldcp->peer_sid) {
6903 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6904 		    ldcp->peer_sid, tagp->vio_sid);
6905 		return (VGEN_FAILURE);
6906 	}
6907 	else
6908 		return (VGEN_SUCCESS);
6909 }
6910 
6911 static caddr_t
6912 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6913 {
6914 	(void) sprintf(ebuf,
6915 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6916 	return (ebuf);
6917 }
6918 
6919 /* Handshake watchdog timeout handler */
6920 static void
6921 vgen_hwatchdog(void *arg)
6922 {
6923 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6924 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6925 
6926 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
6927 	    ldcp->hphase, ldcp->hstate);
6928 
6929 	mutex_enter(&ldcp->cblock);
6930 	if (ldcp->cancel_htid) {
6931 		ldcp->cancel_htid = 0;
6932 		mutex_exit(&ldcp->cblock);
6933 		return;
6934 	}
6935 	ldcp->htid = 0;
6936 	vgen_ldc_reset(ldcp);
6937 	mutex_exit(&ldcp->cblock);
6938 }
6939 
6940 static void
6941 vgen_print_hparams(vgen_hparams_t *hp)
6942 {
6943 	uint8_t	addr[6];
6944 	char	ea[6];
6945 	ldc_mem_cookie_t *dc;
6946 
6947 	cmn_err(CE_CONT, "version_info:\n");
6948 	cmn_err(CE_CONT,
6949 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6950 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6951 
6952 	vnet_macaddr_ultostr(hp->addr, addr);
6953 	cmn_err(CE_CONT, "attr_info:\n");
6954 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6955 	    vgen_print_ethaddr(addr, ea));
6956 	cmn_err(CE_CONT,
6957 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6958 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6959 
6960 	dc = &hp->dring_cookie;
6961 	cmn_err(CE_CONT, "dring_info:\n");
6962 	cmn_err(CE_CONT,
6963 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6964 	cmn_err(CE_CONT,
6965 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6966 	    dc->addr, dc->size);
6967 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6968 }
6969 
6970 static void
6971 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6972 {
6973 	vgen_hparams_t *hp;
6974 
6975 	cmn_err(CE_CONT, "Channel Information:\n");
6976 	cmn_err(CE_CONT,
6977 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6978 	    ldcp->ldc_id, ldcp->ldc_status);
6979 	cmn_err(CE_CONT,
6980 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6981 	    ldcp->local_sid, ldcp->peer_sid);
6982 	cmn_err(CE_CONT,
6983 	    "\thphase: 0x%x, hstate: 0x%x\n",
6984 	    ldcp->hphase, ldcp->hstate);
6985 
6986 	cmn_err(CE_CONT, "Local handshake params:\n");
6987 	hp = &ldcp->local_hparams;
6988 	vgen_print_hparams(hp);
6989 
6990 	cmn_err(CE_CONT, "Peer handshake params:\n");
6991 	hp = &ldcp->peer_hparams;
6992 	vgen_print_hparams(hp);
6993 }
6994 
6995 /*
6996  * Send received packets up the stack.
6997  */
6998 static void
6999 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
7000 {
7001 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
7002 
7003 	if (ldcp->rcv_thread != NULL) {
7004 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
7005 		mutex_exit(&ldcp->rxlock);
7006 	} else {
7007 		ASSERT(MUTEX_HELD(&ldcp->cblock));
7008 		mutex_exit(&ldcp->cblock);
7009 	}
7010 
7011 	vrx_cb(ldcp->portp->vhp, bp);
7012 
7013 	if (ldcp->rcv_thread != NULL) {
7014 		mutex_enter(&ldcp->rxlock);
7015 	} else {
7016 		mutex_enter(&ldcp->cblock);
7017 	}
7018 }
7019 
7020 /*
7021  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
7022  * This thread is woken up by the LDC interrupt handler to process
7023  * LDC packets and receive data.
7024  */
7025 static void
7026 vgen_ldc_rcv_worker(void *arg)
7027 {
7028 	callb_cpr_t	cprinfo;
7029 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
7030 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7031 
7032 	DBG1(vgenp, ldcp, "enter\n");
7033 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
7034 	    "vnet_rcv_thread");
7035 	mutex_enter(&ldcp->rcv_thr_lock);
7036 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
7037 
7038 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
7039 		/*
7040 		 * Wait until the data is received or a stop
7041 		 * request is received.
7042 		 */
7043 		while (!(ldcp->rcv_thr_flags &
7044 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
7045 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
7046 		}
7047 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
7048 
7049 		/*
7050 		 * First process the stop request.
7051 		 */
7052 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
7053 			DBG2(vgenp, ldcp, "stopped\n");
7054 			break;
7055 		}
7056 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
7057 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
7058 		mutex_exit(&ldcp->rcv_thr_lock);
7059 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
7060 		vgen_handle_evt_read(ldcp);
7061 		mutex_enter(&ldcp->rcv_thr_lock);
7062 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
7063 	}
7064 
7065 	/*
7066 	 * Update the run status and wakeup the thread that
7067 	 * has sent the stop request.
7068 	 */
7069 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP;
7070 	ldcp->rcv_thread = NULL;
7071 	CALLB_CPR_EXIT(&cprinfo);
7072 
7073 	thread_exit();
7074 	DBG1(vgenp, ldcp, "exit\n");
7075 }
7076 
7077 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
7078 static void
7079 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
7080 {
7081 	kt_did_t	tid = 0;
7082 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7083 
7084 	DBG1(vgenp, ldcp, "enter\n");
7085 	/*
7086 	 * Send a stop request by setting the stop flag and
7087 	 * wait until the receive thread stops.
7088 	 */
7089 	mutex_enter(&ldcp->rcv_thr_lock);
7090 	if (ldcp->rcv_thread != NULL) {
7091 		tid = ldcp->rcv_thread->t_did;
7092 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
7093 		cv_signal(&ldcp->rcv_thr_cv);
7094 	}
7095 	mutex_exit(&ldcp->rcv_thr_lock);
7096 
7097 	if (tid != 0) {
7098 		thread_join(tid);
7099 	}
7100 	DBG1(vgenp, ldcp, "exit\n");
7101 }
7102 
7103 /*
7104  * Wait for the channel rx-queue to be drained by allowing the receive
7105  * worker thread to read all messages from the rx-queue of the channel.
7106  * Assumption: further callbacks are disabled at this time.
7107  */
7108 static void
7109 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
7110 {
7111 	clock_t	tm;
7112 	clock_t	wt;
7113 	clock_t	rv;
7114 
7115 	/*
7116 	 * If there is data in ldc rx queue, wait until the rx
7117 	 * worker thread runs and drains all msgs in the queue.
7118 	 */
7119 	wt = drv_usectohz(MILLISEC);
7120 
7121 	mutex_enter(&ldcp->rcv_thr_lock);
7122 
7123 	tm = ddi_get_lbolt() + wt;
7124 
7125 	/*
7126 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
7127 	 * If DATARCVD is set, that means the callback has signalled the worker
7128 	 * thread, but the worker hasn't started processing yet. If PROCESSING
7129 	 * is set, that means the thread is awake and processing. Note that the
7130 	 * DATARCVD state can only be seen once, as the assumption is that
7131 	 * further callbacks have been disabled at this point.
7132 	 */
7133 	while (ldcp->rcv_thr_flags &
7134 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
7135 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
7136 		if (rv == -1) {	/* timeout */
7137 			/*
7138 			 * Note that the only way we return is due to a timeout;
7139 			 * we set the new time to wait, before we go back and
7140 			 * check the condition. The other(unlikely) possibility
7141 			 * is a premature wakeup(see cv_timedwait(9F)) in which
7142 			 * case we just continue to use the same time to wait.
7143 			 */
7144 			tm = ddi_get_lbolt() + wt;
7145 		}
7146 	}
7147 
7148 	mutex_exit(&ldcp->rcv_thr_lock);
7149 }
7150 
7151 /*
7152  * vgen_dds_rx -- post DDS messages to vnet.
7153  */
7154 static int
7155 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
7156 {
7157 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
7158 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7159 
7160 	if (dmsg->dds_class != DDS_VNET_NIU) {
7161 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
7162 		return (EBADMSG);
7163 	}
7164 	vnet_dds_rx(vgenp->vnetp, dmsg);
7165 	return (0);
7166 }
7167 
7168 /*
7169  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
7170  */
7171 int
7172 vgen_dds_tx(void *arg, void *msg)
7173 {
7174 	vgen_t *vgenp = arg;
7175 	vio_dds_msg_t *dmsg = msg;
7176 	vgen_portlist_t *plistp = &vgenp->vgenports;
7177 	vgen_ldc_t *ldcp;
7178 	vgen_ldclist_t *ldclp;
7179 	int rv = EIO;
7180 
7181 
7182 	READ_ENTER(&plistp->rwlock);
7183 	ldclp = &(vgenp->vsw_portp->ldclist);
7184 	READ_ENTER(&ldclp->rwlock);
7185 	ldcp = ldclp->headp;
7186 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
7187 		goto vgen_dsend_exit;
7188 	}
7189 
7190 	dmsg->tag.vio_sid = ldcp->local_sid;
7191 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
7192 	if (rv != VGEN_SUCCESS) {
7193 		rv = EIO;
7194 	} else {
7195 		rv = 0;
7196 	}
7197 
7198 vgen_dsend_exit:
7199 	RW_EXIT(&ldclp->rwlock);
7200 	RW_EXIT(&plistp->rwlock);
7201 	return (rv);
7202 
7203 }
7204 
7205 static void
7206 vgen_ldc_reset(vgen_ldc_t *ldcp)
7207 {
7208 	vnet_t	*vnetp = LDC_TO_VNET(ldcp);
7209 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
7210 
7211 	ASSERT(MUTEX_HELD(&ldcp->cblock));
7212 
7213 	if (ldcp->need_ldc_reset == B_TRUE) {
7214 		/* another thread is already in the process of resetting */
7215 		return;
7216 	}
7217 
7218 	/* Set the flag to indicate reset is in progress */
7219 	ldcp->need_ldc_reset = B_TRUE;
7220 
7221 	if (ldcp->portp == vgenp->vsw_portp) {
7222 		mutex_exit(&ldcp->cblock);
7223 		/*
7224 		 * Now cleanup any HIO resources; the above flag also tells
7225 		 * the code that handles dds messages to drop any new msgs
7226 		 * that arrive while we are cleaning up and resetting the
7227 		 * channel.
7228 		 */
7229 		vnet_dds_cleanup_hio(vnetp);
7230 		mutex_enter(&ldcp->cblock);
7231 	}
7232 
7233 	vgen_handshake_retry(ldcp);
7234 }
7235 
7236 #if DEBUG
7237 
7238 /*
7239  * Print debug messages - set to 0xf to enable all msgs
7240  */
7241 static void
7242 debug_printf(const char *fname, vgen_t *vgenp,
7243     vgen_ldc_t *ldcp, const char *fmt, ...)
7244 {
7245 	char    buf[256];
7246 	char    *bufp = buf;
7247 	va_list ap;
7248 
7249 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
7250 		(void) sprintf(bufp, "vnet%d:",
7251 		    ((vnet_t *)(vgenp->vnetp))->instance);
7252 		bufp += strlen(bufp);
7253 	}
7254 	if (ldcp != NULL) {
7255 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
7256 		bufp += strlen(bufp);
7257 	}
7258 	(void) sprintf(bufp, "%s: ", fname);
7259 	bufp += strlen(bufp);
7260 
7261 	va_start(ap, fmt);
7262 	(void) vsprintf(bufp, fmt, ap);
7263 	va_end(ap);
7264 
7265 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
7266 	    (vgendbg_ldcid == ldcp->ldc_id)) {
7267 		cmn_err(CE_CONT, "%s\n", buf);
7268 	}
7269 }
7270 #endif
7271 
7272 #ifdef	VNET_IOC_DEBUG
7273 
7274 static void
7275 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7276 {
7277 	struct iocblk	*iocp;
7278 	vgen_port_t	*portp;
7279 	enum		ioc_reply {
7280 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
7281 			IOC_ACK			/* OK, just send ACK    */
7282 	}		status;
7283 	int		rv;
7284 
7285 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
7286 	iocp->ioc_error = 0;
7287 	portp = (vgen_port_t *)arg;
7288 
7289 	if (portp == NULL) {
7290 		status = IOC_INVAL;
7291 		goto vgen_ioc_exit;
7292 	}
7293 
7294 	mutex_enter(&portp->lock);
7295 
7296 	switch (iocp->ioc_cmd) {
7297 
7298 	case VNET_FORCE_LINK_DOWN:
7299 	case VNET_FORCE_LINK_UP:
7300 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
7301 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
7302 		break;
7303 
7304 	default:
7305 		status = IOC_INVAL;
7306 		break;
7307 
7308 	}
7309 
7310 	mutex_exit(&portp->lock);
7311 
7312 vgen_ioc_exit:
7313 
7314 	switch (status) {
7315 	default:
7316 	case IOC_INVAL:
7317 		/* Error, reply with a NAK and EINVAL error */
7318 		miocnak(q, mp, 0, EINVAL);
7319 		break;
7320 	case IOC_ACK:
7321 		/* OK, reply with an ACK */
7322 		miocack(q, mp, 0, 0);
7323 		break;
7324 	}
7325 }
7326 
7327 static int
7328 vgen_force_link_state(vgen_port_t *portp, int cmd)
7329 {
7330 	ldc_status_t	istatus;
7331 	vgen_ldclist_t	*ldclp;
7332 	vgen_ldc_t	*ldcp;
7333 	vgen_t		*vgenp = portp->vgenp;
7334 	int		rv;
7335 
7336 	ldclp = &portp->ldclist;
7337 	READ_ENTER(&ldclp->rwlock);
7338 
7339 	/*
7340 	 * NOTE: for now, we will assume we have a single channel.
7341 	 */
7342 	if (ldclp->headp == NULL) {
7343 		RW_EXIT(&ldclp->rwlock);
7344 		return (1);
7345 	}
7346 	ldcp = ldclp->headp;
7347 	mutex_enter(&ldcp->cblock);
7348 
7349 	switch (cmd) {
7350 
7351 	case VNET_FORCE_LINK_DOWN:
7352 		(void) ldc_down(ldcp->ldc_handle);
7353 		ldcp->link_down_forced = B_TRUE;
7354 		break;
7355 
7356 	case VNET_FORCE_LINK_UP:
7357 		rv = ldc_up(ldcp->ldc_handle);
7358 		if (rv != 0) {
7359 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
7360 		}
7361 		ldcp->link_down_forced = B_FALSE;
7362 
7363 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
7364 			DWARN(vgenp, ldcp, "ldc_status err\n");
7365 		} else {
7366 			ldcp->ldc_status = istatus;
7367 		}
7368 
7369 		/* if channel is already UP - restart handshake */
7370 		if (ldcp->ldc_status == LDC_UP) {
7371 			vgen_handle_evt_up(ldcp);
7372 		}
7373 		break;
7374 
7375 	}
7376 
7377 	mutex_exit(&ldcp->cblock);
7378 	RW_EXIT(&ldclp->rwlock);
7379 
7380 	return (0);
7381 }
7382 
7383 #else
7384 
7385 static void
7386 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7387 {
7388 	vgen_port_t	*portp;
7389 
7390 	portp = (vgen_port_t *)arg;
7391 
7392 	if (portp == NULL) {
7393 		miocnak(q, mp, 0, EINVAL);
7394 		return;
7395 	}
7396 
7397 	miocnak(q, mp, 0, ENOTSUP);
7398 }
7399 
7400 #endif
7401