xref: /titanic_41/usr/src/uts/sun4v/io/vnet_gen.c (revision de6d0fcd730431ea0d586564c1551016efe19f56)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 int vgen_init_mdeg(void *arg);
77 void vgen_uninit(void *arg);
78 int vgen_dds_tx(void *arg, void *dmsg);
79 void vgen_mod_init(void);
80 int vgen_mod_cleanup(void);
81 void vgen_mod_fini(void);
82 int vgen_enable_intr(void *arg);
83 int vgen_disable_intr(void *arg);
84 mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
85 static int vgen_start(void *arg);
86 static void vgen_stop(void *arg);
87 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
88 static int vgen_multicst(void *arg, boolean_t add,
89 	const uint8_t *mca);
90 static int vgen_promisc(void *arg, boolean_t on);
91 static int vgen_unicst(void *arg, const uint8_t *mca);
92 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
93 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
94 #ifdef	VNET_IOC_DEBUG
95 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
96 #endif
97 
98 /* vgen internal functions */
99 static int vgen_read_mdprops(vgen_t *vgenp);
100 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
101 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
102 	mde_cookie_t node);
103 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
104 	uint32_t *mtu);
105 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
106 	boolean_t *pls);
107 static void vgen_detach_ports(vgen_t *vgenp);
108 static void vgen_port_detach(vgen_port_t *portp);
109 static void vgen_port_list_insert(vgen_port_t *portp);
110 static void vgen_port_list_remove(vgen_port_t *portp);
111 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
112 	int port_num);
113 static int vgen_mdeg_reg(vgen_t *vgenp);
114 static void vgen_mdeg_unreg(vgen_t *vgenp);
115 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
116 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
117 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
118 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
119 	mde_cookie_t mdex);
120 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
121 static int vgen_port_attach(vgen_port_t *portp);
122 static void vgen_port_detach_mdeg(vgen_port_t *portp);
123 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
124 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
125 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
126 static void vgen_port_reset(vgen_port_t *portp);
127 static void vgen_reset_vsw_port(vgen_t *vgenp);
128 static void vgen_ldc_reset(vgen_ldc_t *ldcp);
129 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
130 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
131 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
132 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
133 static void vgen_init_ports(vgen_t *vgenp);
134 static void vgen_port_init(vgen_port_t *portp);
135 static void vgen_uninit_ports(vgen_t *vgenp);
136 static void vgen_port_uninit(vgen_port_t *portp);
137 static void vgen_init_ldcs(vgen_port_t *portp);
138 static void vgen_uninit_ldcs(vgen_port_t *portp);
139 static int vgen_ldc_init(vgen_ldc_t *ldcp);
140 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
141 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
142 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
143 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
144 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
145 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
146 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
147 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
148 static int vgen_ldcsend(void *arg, mblk_t *mp);
149 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
150 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
151 static void vgen_reclaim(vgen_ldc_t *ldcp);
152 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
153 static int vgen_num_txpending(vgen_ldc_t *ldcp);
154 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
155 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
156 static void vgen_ldc_watchdog(void *arg);
157 static mblk_t *vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup);
158 
159 /* vgen handshake functions */
160 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
161 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
162 	boolean_t caller_holds_lock);
163 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
164 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
165 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
166 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
167 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
168 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
169 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
170 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
171 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
172 static void vgen_handshake(vgen_ldc_t *ldcp);
173 static int vgen_handshake_done(vgen_ldc_t *ldcp);
174 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
175 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
176 	vio_msg_tag_t *tagp);
177 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
179 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
182 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
183 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
184 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
185 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
186 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
187 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
188 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
189 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
190 	uint32_t start, int32_t end, uint8_t pstate);
191 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
192 	uint32_t msglen);
193 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
194 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
195 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
196 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
197 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
198 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
199 static void vgen_hwatchdog(void *arg);
200 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
201 static void vgen_print_hparams(vgen_hparams_t *hp);
202 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
203 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
204 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
205 static void vgen_ldc_rcv_worker(void *arg);
206 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
207 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt);
208 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
209 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
210 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
211 
212 /* VLAN routines */
213 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
214 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
215 	uint16_t *nvidsp, uint16_t *default_idp);
216 static void vgen_vlan_create_hash(vgen_port_t *portp);
217 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
218 static void vgen_vlan_add_ids(vgen_port_t *portp);
219 static void vgen_vlan_remove_ids(vgen_port_t *portp);
220 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
221 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
222 	uint16_t *vidp);
223 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
224 	boolean_t is_tagged, uint16_t vid);
225 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
226 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
227 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
228 
229 /* externs */
230 extern void vnet_dds_rx(void *arg, void *dmsg);
231 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
232 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
233 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
234 
235 /*
236  * The handshake process consists of 5 phases defined below, with VH_PHASE0
237  * being the pre-handshake phase and VH_DONE is the phase to indicate
238  * successful completion of all phases.
239  * Each phase may have one to several handshake states which are required
240  * to complete successfully to move to the next phase.
241  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
242  * more details.
243  */
244 /* handshake phases */
245 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
246 
247 /* handshake states */
248 enum {
249 
250 	VER_INFO_SENT	=	0x1,
251 	VER_ACK_RCVD	=	0x2,
252 	VER_INFO_RCVD	=	0x4,
253 	VER_ACK_SENT	=	0x8,
254 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
255 
256 	ATTR_INFO_SENT	=	0x10,
257 	ATTR_ACK_RCVD	=	0x20,
258 	ATTR_INFO_RCVD	=	0x40,
259 	ATTR_ACK_SENT	=	0x80,
260 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
261 
262 	DRING_INFO_SENT	=	0x100,
263 	DRING_ACK_RCVD	=	0x200,
264 	DRING_INFO_RCVD	=	0x400,
265 	DRING_ACK_SENT	=	0x800,
266 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
267 
268 	RDX_INFO_SENT	=	0x1000,
269 	RDX_ACK_RCVD	=	0x2000,
270 	RDX_INFO_RCVD	=	0x4000,
271 	RDX_ACK_SENT	=	0x8000,
272 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
273 
274 };
275 
276 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
277 
278 #define	LDC_LOCK(ldcp)	\
279 				mutex_enter(&((ldcp)->cblock));\
280 				mutex_enter(&((ldcp)->rxlock));\
281 				mutex_enter(&((ldcp)->wrlock));\
282 				mutex_enter(&((ldcp)->txlock));\
283 				mutex_enter(&((ldcp)->tclock));
284 #define	LDC_UNLOCK(ldcp)	\
285 				mutex_exit(&((ldcp)->tclock));\
286 				mutex_exit(&((ldcp)->txlock));\
287 				mutex_exit(&((ldcp)->wrlock));\
288 				mutex_exit(&((ldcp)->rxlock));\
289 				mutex_exit(&((ldcp)->cblock));
290 
291 #define	VGEN_VER_EQ(ldcp, major, minor)	\
292 	((ldcp)->local_hparams.ver_major == (major) &&	\
293 	    (ldcp)->local_hparams.ver_minor == (minor))
294 
295 #define	VGEN_VER_LT(ldcp, major, minor)	\
296 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
297 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
298 	    (ldcp)->local_hparams.ver_minor < (minor)))
299 
300 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
301 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
302 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
303 	    (ldcp)->local_hparams.ver_minor >= (minor)))
304 
305 static struct ether_addr etherbroadcastaddr = {
306 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
307 };
308 /*
309  * MIB II broadcast/multicast packets
310  */
311 #define	IS_BROADCAST(ehp) \
312 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
313 #define	IS_MULTICAST(ehp) \
314 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
315 
316 /*
317  * Property names
318  */
319 static char macaddr_propname[] = "mac-address";
320 static char rmacaddr_propname[] = "remote-mac-address";
321 static char channel_propname[] = "channel-endpoint";
322 static char reg_propname[] = "reg";
323 static char port_propname[] = "port";
324 static char swport_propname[] = "switch-port";
325 static char id_propname[] = "id";
326 static char vdev_propname[] = "virtual-device";
327 static char vnet_propname[] = "network";
328 static char pri_types_propname[] = "priority-ether-types";
329 static char vgen_pvid_propname[] = "port-vlan-id";
330 static char vgen_vid_propname[] = "vlan-id";
331 static char vgen_dvid_propname[] = "default-vlan-id";
332 static char port_pvid_propname[] = "remote-port-vlan-id";
333 static char port_vid_propname[] = "remote-vlan-id";
334 static char vgen_mtu_propname[] = "mtu";
335 static char vgen_linkprop_propname[] = "linkprop";
336 
337 /*
338  * VIO Protocol Version Info:
339  *
340  * The version specified below represents the version of protocol currently
341  * supported in the driver. It means the driver can negotiate with peers with
342  * versions <= this version. Here is a summary of the feature(s) that are
343  * supported at each version of the protocol:
344  *
345  * 1.0			Basic VIO protocol.
346  * 1.1			vDisk protocol update (no virtual network update).
347  * 1.2			Support for priority frames (priority-ether-types).
348  * 1.3			VLAN and HybridIO support.
349  * 1.4			Jumbo Frame support.
350  * 1.5			Link State Notification support with optional support
351  * 			for Physical Link information.
352  */
353 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 5} };
354 
355 /* Tunables */
356 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
357 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
358 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
359 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
360 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
361 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
362 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
363 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
364 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
365 
366 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
367 
368 static vio_mblk_pool_t	*vgen_rx_poolp = NULL;
369 static krwlock_t	vgen_rw;
370 
371 /*
372  * max # of packets accumulated prior to sending them up. It is best
373  * to keep this at 60% of the number of recieve buffers.
374  */
375 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
376 
377 /*
378  * Internal tunables for receive buffer pools, that is,  the size and number of
379  * mblks for each pool. At least 3 sizes must be specified if these are used.
380  * The sizes must be specified in increasing order. Non-zero value of the first
381  * size will be used as a hint to use these values instead of the algorithm
382  * that determines the sizes based on MTU.
383  */
384 uint32_t vgen_rbufsz1 = 0;
385 uint32_t vgen_rbufsz2 = 0;
386 uint32_t vgen_rbufsz3 = 0;
387 uint32_t vgen_rbufsz4 = 0;
388 
389 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
390 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
391 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
392 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
393 
394 /*
395  * In the absence of "priority-ether-types" property in MD, the following
396  * internal tunable can be set to specify a single priority ethertype.
397  */
398 uint64_t vgen_pri_eth_type = 0;
399 
400 /*
401  * Number of transmit priority buffers that are preallocated per device.
402  * This number is chosen to be a small value to throttle transmission
403  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
404  */
405 uint32_t vgen_pri_tx_nmblks = 64;
406 
407 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
408 
409 #ifdef DEBUG
410 /* flags to simulate error conditions for debugging */
411 int vgen_trigger_txtimeout = 0;
412 int vgen_trigger_rxlost = 0;
413 #endif
414 
415 /*
416  * Matching criteria passed to the MDEG to register interest
417  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
418  * by their 'name' and 'cfg-handle' properties.
419  */
420 static md_prop_match_t vdev_prop_match[] = {
421 	{ MDET_PROP_STR,    "name"   },
422 	{ MDET_PROP_VAL,    "cfg-handle" },
423 	{ MDET_LIST_END,    NULL    }
424 };
425 
426 static mdeg_node_match_t vdev_match = { "virtual-device",
427 						vdev_prop_match };
428 
429 /* MD update matching structure */
430 static md_prop_match_t	vport_prop_match[] = {
431 	{ MDET_PROP_VAL,	"id" },
432 	{ MDET_LIST_END,	NULL }
433 };
434 
435 static mdeg_node_match_t vport_match = { "virtual-device-port",
436 					vport_prop_match };
437 
438 /* template for matching a particular vnet instance */
439 static mdeg_prop_spec_t vgen_prop_template[] = {
440 	{ MDET_PROP_STR,	"name",		"network" },
441 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
442 	{ MDET_LIST_END,	NULL,		NULL }
443 };
444 
445 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
446 
447 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
448 
449 #ifdef	VNET_IOC_DEBUG
450 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
451 #else
452 #define	VGEN_M_CALLBACK_FLAGS	(0)
453 #endif
454 
455 static mac_callbacks_t vgen_m_callbacks = {
456 	VGEN_M_CALLBACK_FLAGS,
457 	vgen_stat,
458 	vgen_start,
459 	vgen_stop,
460 	vgen_promisc,
461 	vgen_multicst,
462 	vgen_unicst,
463 	vgen_tx,
464 	NULL,
465 	vgen_ioctl,
466 	NULL,
467 	NULL
468 };
469 
470 /* externs */
471 extern pri_t	maxclsyspri;
472 extern proc_t	p0;
473 extern uint32_t vnet_ntxds;
474 extern uint32_t vnet_ldcwd_interval;
475 extern uint32_t vnet_ldcwd_txtimeout;
476 extern uint32_t vnet_ldc_mtu;
477 extern uint32_t vnet_nrbufs;
478 extern uint32_t	vnet_ethermtu;
479 extern uint16_t	vnet_default_vlan_id;
480 extern boolean_t vnet_jumbo_rxpools;
481 
482 #ifdef DEBUG
483 
484 extern int vnet_dbglevel;
485 static void debug_printf(const char *fname, vgen_t *vgenp,
486 	vgen_ldc_t *ldcp, const char *fmt, ...);
487 
488 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
489 int vgendbg_ldcid = -1;
490 
491 /* simulate handshake error conditions for debug */
492 uint32_t vgen_hdbg;
493 #define	HDBG_VERSION	0x1
494 #define	HDBG_TIMEOUT	0x2
495 #define	HDBG_BAD_SID	0x4
496 #define	HDBG_OUT_STATE	0x8
497 
498 #endif
499 
500 /*
501  * vgen_init() is called by an instance of vnet driver to initialize the
502  * corresponding generic proxy transport layer. The arguments passed by vnet
503  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
504  * the mac address of the vnet device, and a pointer to vgen_t is passed
505  * back as a handle to vnet.
506  */
507 int
508 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
509     const uint8_t *macaddr, void **vgenhdl)
510 {
511 	vgen_t *vgenp;
512 	int instance;
513 	int rv;
514 
515 	if ((vnetp == NULL) || (vnetdip == NULL))
516 		return (DDI_FAILURE);
517 
518 	instance = ddi_get_instance(vnetdip);
519 
520 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
521 
522 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
523 
524 	vgenp->vnetp = vnetp;
525 	vgenp->instance = instance;
526 	vgenp->regprop = regprop;
527 	vgenp->vnetdip = vnetdip;
528 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
529 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
530 
531 	/* allocate multicast table */
532 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
533 	    sizeof (struct ether_addr), KM_SLEEP);
534 	vgenp->mccount = 0;
535 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
536 
537 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
538 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
539 
540 	rv = vgen_read_mdprops(vgenp);
541 	if (rv != 0) {
542 		goto vgen_init_fail;
543 	}
544 	*vgenhdl = (void *)vgenp;
545 
546 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
547 	return (DDI_SUCCESS);
548 
549 vgen_init_fail:
550 	rw_destroy(&vgenp->vgenports.rwlock);
551 	mutex_destroy(&vgenp->lock);
552 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
553 	    sizeof (struct ether_addr));
554 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
555 		kmem_free(vgenp->pri_types,
556 		    sizeof (uint16_t) * vgenp->pri_num_types);
557 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
558 	}
559 	KMEM_FREE(vgenp);
560 	return (DDI_FAILURE);
561 }
562 
563 int
564 vgen_init_mdeg(void *arg)
565 {
566 	vgen_t	*vgenp = (vgen_t *)arg;
567 
568 	/* register with MD event generator */
569 	return (vgen_mdeg_reg(vgenp));
570 }
571 
572 /*
573  * Called by vnet to undo the initializations done by vgen_init().
574  * The handle provided by generic transport during vgen_init() is the argument.
575  */
576 void
577 vgen_uninit(void *arg)
578 {
579 	vgen_t		*vgenp = (vgen_t *)arg;
580 	vio_mblk_pool_t	*rp;
581 	vio_mblk_pool_t	*nrp;
582 
583 	if (vgenp == NULL) {
584 		return;
585 	}
586 
587 	DBG1(vgenp, NULL, "enter\n");
588 
589 	/* unregister with MD event generator */
590 	vgen_mdeg_unreg(vgenp);
591 
592 	mutex_enter(&vgenp->lock);
593 
594 	/* detach all ports from the device */
595 	vgen_detach_ports(vgenp);
596 
597 	/*
598 	 * free any pending rx mblk pools,
599 	 * that couldn't be freed previously during channel detach.
600 	 */
601 	rp = vgenp->rmp;
602 	while (rp != NULL) {
603 		nrp = vgenp->rmp = rp->nextp;
604 		if (vio_destroy_mblks(rp)) {
605 			WRITE_ENTER(&vgen_rw);
606 			rp->nextp = vgen_rx_poolp;
607 			vgen_rx_poolp = rp;
608 			RW_EXIT(&vgen_rw);
609 		}
610 		rp = nrp;
611 	}
612 
613 	/* free multicast table */
614 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
615 
616 	/* free pri_types table */
617 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
618 		kmem_free(vgenp->pri_types,
619 		    sizeof (uint16_t) * vgenp->pri_num_types);
620 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
621 	}
622 
623 	mutex_exit(&vgenp->lock);
624 
625 	rw_destroy(&vgenp->vgenports.rwlock);
626 	mutex_destroy(&vgenp->lock);
627 
628 	DBG1(vgenp, NULL, "exit\n");
629 	KMEM_FREE(vgenp);
630 }
631 
632 /*
633  * module specific initialization common to all instances of vnet/vgen.
634  */
635 void
636 vgen_mod_init(void)
637 {
638 	rw_init(&vgen_rw, NULL, RW_DRIVER, NULL);
639 }
640 
641 /*
642  * module specific cleanup common to all instances of vnet/vgen.
643  */
644 int
645 vgen_mod_cleanup(void)
646 {
647 	vio_mblk_pool_t	*poolp, *npoolp;
648 
649 	/*
650 	 * If any rx mblk pools are still in use, return
651 	 * error and stop the module from unloading.
652 	 */
653 	WRITE_ENTER(&vgen_rw);
654 	poolp = vgen_rx_poolp;
655 	while (poolp != NULL) {
656 		npoolp = vgen_rx_poolp = poolp->nextp;
657 		if (vio_destroy_mblks(poolp) != 0) {
658 			vgen_rx_poolp = poolp;
659 			RW_EXIT(&vgen_rw);
660 			return (EBUSY);
661 		}
662 		poolp = npoolp;
663 	}
664 	RW_EXIT(&vgen_rw);
665 
666 	return (0);
667 }
668 
669 /*
670  * module specific uninitialization common to all instances of vnet/vgen.
671  */
672 void
673 vgen_mod_fini(void)
674 {
675 	rw_destroy(&vgen_rw);
676 }
677 
678 /* enable transmit/receive for the device */
679 int
680 vgen_start(void *arg)
681 {
682 	vgen_port_t	*portp = (vgen_port_t *)arg;
683 	vgen_t		*vgenp = portp->vgenp;
684 
685 	DBG1(vgenp, NULL, "enter\n");
686 	mutex_enter(&portp->lock);
687 	vgen_port_init(portp);
688 	portp->flags |= VGEN_STARTED;
689 	mutex_exit(&portp->lock);
690 	DBG1(vgenp, NULL, "exit\n");
691 
692 	return (DDI_SUCCESS);
693 }
694 
695 /* stop transmit/receive */
696 void
697 vgen_stop(void *arg)
698 {
699 	vgen_port_t	*portp = (vgen_port_t *)arg;
700 	vgen_t		*vgenp = portp->vgenp;
701 
702 	DBG1(vgenp, NULL, "enter\n");
703 
704 	mutex_enter(&portp->lock);
705 	if (portp->flags & VGEN_STARTED) {
706 		vgen_port_uninit(portp);
707 		portp->flags &= ~(VGEN_STARTED);
708 	}
709 	mutex_exit(&portp->lock);
710 	DBG1(vgenp, NULL, "exit\n");
711 
712 }
713 
714 /* vgen transmit function */
715 static mblk_t *
716 vgen_tx(void *arg, mblk_t *mp)
717 {
718 	int i;
719 	vgen_port_t *portp;
720 	int status = VGEN_FAILURE;
721 
722 	portp = (vgen_port_t *)arg;
723 	/*
724 	 * Retry so that we avoid reporting a failure
725 	 * to the upper layer. Returning a failure may cause the
726 	 * upper layer to go into single threaded mode there by
727 	 * causing performance degradation, especially for a large
728 	 * number of connections.
729 	 */
730 	for (i = 0; i < vgen_tx_retries; ) {
731 		status = vgen_portsend(portp, mp);
732 		if (status == VGEN_SUCCESS) {
733 			break;
734 		}
735 		if (++i < vgen_tx_retries)
736 			delay(drv_usectohz(vgen_tx_delay));
737 	}
738 	if (status != VGEN_SUCCESS) {
739 		/* failure */
740 		return (mp);
741 	}
742 	/* success */
743 	return (NULL);
744 }
745 
746 /*
747  * This function provides any necessary tagging/untagging of the frames
748  * that are being transmitted over the port. It first verifies the vlan
749  * membership of the destination(port) and drops the packet if the
750  * destination doesn't belong to the given vlan.
751  *
752  * Arguments:
753  *   portp:     port over which the frames should be transmitted
754  *   mp:        frame to be transmitted
755  *   is_tagged:
756  *              B_TRUE: indicates frame header contains the vlan tag already.
757  *              B_FALSE: indicates frame is untagged.
758  *   vid:       vlan in which the frame should be transmitted.
759  *
760  * Returns:
761  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
762  *              Failure: NULL
763  */
764 static mblk_t *
765 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
766 	uint16_t vid)
767 {
768 	vgen_t				*vgenp;
769 	boolean_t			dst_tagged;
770 	int				rv;
771 
772 	vgenp = portp->vgenp;
773 
774 	/*
775 	 * If the packet is going to a vnet:
776 	 *   Check if the destination vnet is in the same vlan.
777 	 *   Check the frame header if tag or untag is needed.
778 	 *
779 	 * We do not check the above conditions if the packet is going to vsw:
780 	 *   vsw must be present implicitly in all the vlans that a vnet device
781 	 *   is configured into; even if vsw itself is not assigned to those
782 	 *   vlans as an interface. For instance, the packet might be destined
783 	 *   to another vnet(indirectly through vsw) or to an external host
784 	 *   which is in the same vlan as this vnet and vsw itself may not be
785 	 *   present in that vlan. Similarly packets going to vsw must be
786 	 *   always tagged(unless in the default-vlan) if not already tagged,
787 	 *   as we do not know the final destination. This is needed because
788 	 *   vsw must always invoke its switching function only after tagging
789 	 *   the packet; otherwise after switching function determines the
790 	 *   destination we cannot figure out if the destination belongs to the
791 	 *   the same vlan that the frame originated from and if it needs tag/
792 	 *   untag. Note that vsw will tag the packet itself when it receives
793 	 *   it over the channel from a client if needed. However, that is
794 	 *   needed only in the case of vlan unaware clients such as obp or
795 	 *   earlier versions of vnet.
796 	 *
797 	 */
798 	if (portp != vgenp->vsw_portp) {
799 		/*
800 		 * Packet going to a vnet. Check if the destination vnet is in
801 		 * the same vlan. Then check the frame header if tag/untag is
802 		 * needed.
803 		 */
804 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
805 		if (rv == B_FALSE) {
806 			/* drop the packet */
807 			freemsg(mp);
808 			return (NULL);
809 		}
810 
811 		/* is the destination tagged or untagged in this vlan? */
812 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
813 		    (dst_tagged = B_TRUE);
814 
815 		if (is_tagged == dst_tagged) {
816 			/* no tagging/untagging needed */
817 			return (mp);
818 		}
819 
820 		if (is_tagged == B_TRUE) {
821 			/* frame is tagged; destination needs untagged */
822 			mp = vnet_vlan_remove_tag(mp);
823 			return (mp);
824 		}
825 
826 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
827 	}
828 
829 	/*
830 	 * Packet going to a vnet needs tagging.
831 	 * OR
832 	 * If the packet is going to vsw, then it must be tagged in all cases:
833 	 * unknown unicast, broadcast/multicast or to vsw interface.
834 	 */
835 
836 	if (is_tagged == B_FALSE) {
837 		mp = vnet_vlan_insert_tag(mp, vid);
838 	}
839 
840 	return (mp);
841 }
842 
843 /* transmit packets over the given port */
844 static int
845 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
846 {
847 	vgen_ldclist_t		*ldclp;
848 	vgen_ldc_t		*ldcp;
849 	int			status;
850 	int			rv = VGEN_SUCCESS;
851 	vgen_t			*vgenp = portp->vgenp;
852 	vnet_t			*vnetp = vgenp->vnetp;
853 	boolean_t		is_tagged;
854 	boolean_t		dec_refcnt = B_FALSE;
855 	uint16_t		vlan_id;
856 	struct ether_header	*ehp;
857 
858 	if (portp->use_vsw_port) {
859 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
860 		portp = portp->vgenp->vsw_portp;
861 		dec_refcnt = B_TRUE;
862 	}
863 	if (portp == NULL) {
864 		return (VGEN_FAILURE);
865 	}
866 
867 	/*
868 	 * Determine the vlan id that the frame belongs to.
869 	 */
870 	ehp = (struct ether_header *)mp->b_rptr;
871 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
872 
873 	if (vlan_id == vnetp->default_vlan_id) {
874 
875 		/* Frames in default vlan must be untagged */
876 		ASSERT(is_tagged == B_FALSE);
877 
878 		/*
879 		 * If the destination is a vnet-port verify it belongs to the
880 		 * default vlan; otherwise drop the packet. We do not need
881 		 * this check for vsw-port, as it should implicitly belong to
882 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
883 		 */
884 		if (portp != vgenp->vsw_portp &&
885 		    portp->pvid != vnetp->default_vlan_id) {
886 			freemsg(mp);
887 			goto portsend_ret;
888 		}
889 
890 	} else {	/* frame not in default-vlan */
891 
892 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
893 		if (mp == NULL) {
894 			goto portsend_ret;
895 		}
896 
897 	}
898 
899 	ldclp = &portp->ldclist;
900 	READ_ENTER(&ldclp->rwlock);
901 	/*
902 	 * NOTE: for now, we will assume we have a single channel.
903 	 */
904 	if (ldclp->headp == NULL) {
905 		RW_EXIT(&ldclp->rwlock);
906 		rv = VGEN_FAILURE;
907 		goto portsend_ret;
908 	}
909 	ldcp = ldclp->headp;
910 
911 	status = ldcp->tx(ldcp, mp);
912 
913 	RW_EXIT(&ldclp->rwlock);
914 
915 	if (status != VGEN_TX_SUCCESS) {
916 		rv = VGEN_FAILURE;
917 	}
918 
919 portsend_ret:
920 	if (dec_refcnt == B_TRUE) {
921 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
922 	}
923 	return (rv);
924 }
925 
926 /*
927  * Wrapper function to transmit normal and/or priority frames over the channel.
928  */
929 static int
930 vgen_ldcsend(void *arg, mblk_t *mp)
931 {
932 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
933 	int			status;
934 	struct ether_header	*ehp;
935 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
936 	uint32_t		num_types;
937 	uint16_t		*types;
938 	int			i;
939 
940 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
941 
942 	num_types = vgenp->pri_num_types;
943 	types = vgenp->pri_types;
944 	ehp = (struct ether_header *)mp->b_rptr;
945 
946 	for (i = 0; i < num_types; i++) {
947 
948 		if (ehp->ether_type == types[i]) {
949 			/* priority frame, use pri tx function */
950 			vgen_ldcsend_pkt(ldcp, mp);
951 			return (VGEN_SUCCESS);
952 		}
953 
954 	}
955 
956 	status  = vgen_ldcsend_dring(ldcp, mp);
957 
958 	return (status);
959 }
960 
961 /*
962  * This functions handles ldc channel reset while in the context
963  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
964  */
965 static void
966 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
967 {
968 	ldc_status_t	istatus;
969 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
970 
971 	if (mutex_tryenter(&ldcp->cblock)) {
972 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
973 			DWARN(vgenp, ldcp, "ldc_status() error\n");
974 		} else {
975 			ldcp->ldc_status = istatus;
976 		}
977 		if (ldcp->ldc_status != LDC_UP) {
978 			vgen_handle_evt_reset(ldcp);
979 		}
980 		mutex_exit(&ldcp->cblock);
981 	}
982 }
983 
984 /*
985  * This function transmits the frame in the payload of a raw data
986  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
987  * send special frames with high priorities, without going through
988  * the normal data path which uses descriptor ring mechanism.
989  */
990 static void
991 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
992 {
993 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
994 	vio_raw_data_msg_t	*pkt;
995 	mblk_t			*bp;
996 	mblk_t			*nmp = NULL;
997 	caddr_t			dst;
998 	uint32_t		mblksz;
999 	uint32_t		size;
1000 	uint32_t		nbytes;
1001 	int			rv;
1002 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1003 	vgen_stats_t		*statsp = &ldcp->stats;
1004 
1005 	/* drop the packet if ldc is not up or handshake is not done */
1006 	if (ldcp->ldc_status != LDC_UP) {
1007 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1008 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1009 		    ldcp->ldc_status);
1010 		goto send_pkt_exit;
1011 	}
1012 
1013 	if (ldcp->hphase != VH_DONE) {
1014 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1015 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1016 		    ldcp->hphase);
1017 		goto send_pkt_exit;
1018 	}
1019 
1020 	size = msgsize(mp);
1021 
1022 	/* frame size bigger than available payload len of raw data msg ? */
1023 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
1024 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1025 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1026 		goto send_pkt_exit;
1027 	}
1028 
1029 	if (size < ETHERMIN)
1030 		size = ETHERMIN;
1031 
1032 	/* alloc space for a raw data message */
1033 	nmp = vio_allocb(vgenp->pri_tx_vmp);
1034 	if (nmp == NULL) {
1035 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1036 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
1037 		goto send_pkt_exit;
1038 	}
1039 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
1040 
1041 	/* copy frame into the payload of raw data message */
1042 	dst = (caddr_t)pkt->data;
1043 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1044 		mblksz = MBLKL(bp);
1045 		bcopy(bp->b_rptr, dst, mblksz);
1046 		dst += mblksz;
1047 	}
1048 
1049 	/* setup the raw data msg */
1050 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
1051 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
1052 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
1053 	pkt->tag.vio_sid = ldcp->local_sid;
1054 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
1055 
1056 	/* send the msg over ldc */
1057 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
1058 	if (rv != VGEN_SUCCESS) {
1059 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1060 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
1061 		if (rv == ECONNRESET) {
1062 			vgen_ldcsend_process_reset(ldcp);
1063 		}
1064 		goto send_pkt_exit;
1065 	}
1066 
1067 	/* update stats */
1068 	(void) atomic_inc_64(&statsp->tx_pri_packets);
1069 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
1070 
1071 send_pkt_exit:
1072 	if (nmp != NULL)
1073 		freemsg(nmp);
1074 	freemsg(mp);
1075 }
1076 
1077 /*
1078  * This function transmits normal (non-priority) data frames over
1079  * the channel. It queues the frame into the transmit descriptor ring
1080  * and sends a VIO_DRING_DATA message if needed, to wake up the
1081  * peer to (re)start processing.
1082  */
1083 static int
1084 vgen_ldcsend_dring(void *arg, mblk_t *mp)
1085 {
1086 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
1087 	vgen_private_desc_t	*tbufp;
1088 	vgen_private_desc_t	*rtbufp;
1089 	vnet_public_desc_t	*rtxdp;
1090 	vgen_private_desc_t	*ntbufp;
1091 	vnet_public_desc_t	*txdp;
1092 	vio_dring_entry_hdr_t	*hdrp;
1093 	vgen_stats_t		*statsp;
1094 	struct ether_header	*ehp;
1095 	boolean_t		is_bcast = B_FALSE;
1096 	boolean_t		is_mcast = B_FALSE;
1097 	size_t			mblksz;
1098 	caddr_t			dst;
1099 	mblk_t			*bp;
1100 	size_t			size;
1101 	int			rv = 0;
1102 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1103 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1104 
1105 	statsp = &ldcp->stats;
1106 	size = msgsize(mp);
1107 
1108 	DBG1(vgenp, ldcp, "enter\n");
1109 
1110 	if (ldcp->ldc_status != LDC_UP) {
1111 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1112 		    ldcp->ldc_status);
1113 		/* retry ldc_up() if needed */
1114 #ifdef	VNET_IOC_DEBUG
1115 		if (ldcp->flags & CHANNEL_STARTED && !ldcp->link_down_forced) {
1116 #else
1117 		if (ldcp->flags & CHANNEL_STARTED) {
1118 #endif
1119 			(void) ldc_up(ldcp->ldc_handle);
1120 		}
1121 		goto send_dring_exit;
1122 	}
1123 
1124 	/* drop the packet if ldc is not up or handshake is not done */
1125 	if (ldcp->hphase != VH_DONE) {
1126 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1127 		    ldcp->hphase);
1128 		goto send_dring_exit;
1129 	}
1130 
1131 	if (size > (size_t)lp->mtu) {
1132 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1133 		goto send_dring_exit;
1134 	}
1135 	if (size < ETHERMIN)
1136 		size = ETHERMIN;
1137 
1138 	ehp = (struct ether_header *)mp->b_rptr;
1139 	is_bcast = IS_BROADCAST(ehp);
1140 	is_mcast = IS_MULTICAST(ehp);
1141 
1142 	mutex_enter(&ldcp->txlock);
1143 	/*
1144 	 * allocate a descriptor
1145 	 */
1146 	tbufp = ldcp->next_tbufp;
1147 	ntbufp = NEXTTBUF(ldcp, tbufp);
1148 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1149 
1150 		mutex_enter(&ldcp->tclock);
1151 		/* Try reclaiming now */
1152 		vgen_reclaim_dring(ldcp);
1153 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1154 
1155 		if (ntbufp == ldcp->cur_tbufp) {
1156 			/* Now we are really out of tbuf/txds */
1157 			ldcp->need_resched = B_TRUE;
1158 			mutex_exit(&ldcp->tclock);
1159 
1160 			statsp->tx_no_desc++;
1161 			mutex_exit(&ldcp->txlock);
1162 
1163 			return (VGEN_TX_NORESOURCES);
1164 		}
1165 		mutex_exit(&ldcp->tclock);
1166 	}
1167 	/* update next available tbuf in the ring and update tx index */
1168 	ldcp->next_tbufp = ntbufp;
1169 	INCR_TXI(ldcp->next_txi, ldcp);
1170 
1171 	/* Mark the buffer busy before releasing the lock */
1172 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1173 	mutex_exit(&ldcp->txlock);
1174 
1175 	/* copy data into pre-allocated transmit buffer */
1176 	dst = tbufp->datap + VNET_IPALIGN;
1177 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1178 		mblksz = MBLKL(bp);
1179 		bcopy(bp->b_rptr, dst, mblksz);
1180 		dst += mblksz;
1181 	}
1182 
1183 	tbufp->datalen = size;
1184 
1185 	/* initialize the corresponding public descriptor (txd) */
1186 	txdp = tbufp->descp;
1187 	hdrp = &txdp->hdr;
1188 	txdp->nbytes = size;
1189 	txdp->ncookies = tbufp->ncookies;
1190 	bcopy((tbufp->memcookie), (txdp->memcookie),
1191 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1192 
1193 	mutex_enter(&ldcp->wrlock);
1194 	/*
1195 	 * If the flags not set to BUSY, it implies that the clobber
1196 	 * was done while we were copying the data. In such case,
1197 	 * discard the packet and return.
1198 	 */
1199 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1200 		statsp->oerrors++;
1201 		mutex_exit(&ldcp->wrlock);
1202 		goto send_dring_exit;
1203 	}
1204 	hdrp->dstate = VIO_DESC_READY;
1205 
1206 	/* update stats */
1207 	statsp->opackets++;
1208 	statsp->obytes += size;
1209 	if (is_bcast)
1210 		statsp->brdcstxmt++;
1211 	else if (is_mcast)
1212 		statsp->multixmt++;
1213 
1214 	/* send dring datamsg to the peer */
1215 	if (ldcp->resched_peer) {
1216 
1217 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1218 		rtxdp = rtbufp->descp;
1219 
1220 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1221 
1222 			rv = vgen_send_dring_data(ldcp,
1223 			    (uint32_t)ldcp->resched_peer_txi, -1);
1224 			if (rv != 0) {
1225 				/* error: drop the packet */
1226 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1227 				    "failed: rv(%d) len(%d)\n",
1228 				    ldcp->ldc_id, rv, size);
1229 				statsp->oerrors++;
1230 			} else {
1231 				ldcp->resched_peer = B_FALSE;
1232 			}
1233 
1234 		}
1235 
1236 	}
1237 
1238 	mutex_exit(&ldcp->wrlock);
1239 
1240 send_dring_exit:
1241 	if (rv == ECONNRESET) {
1242 		vgen_ldcsend_process_reset(ldcp);
1243 	}
1244 	freemsg(mp);
1245 	DBG1(vgenp, ldcp, "exit\n");
1246 	return (VGEN_TX_SUCCESS);
1247 }
1248 
1249 /*
1250  * enable/disable a multicast address
1251  * note that the cblock of the ldc channel connected to the vsw is used for
1252  * synchronization of the mctab.
1253  */
1254 int
1255 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1256 {
1257 	vgen_t			*vgenp;
1258 	vnet_mcast_msg_t	mcastmsg;
1259 	vio_msg_tag_t		*tagp;
1260 	vgen_port_t		*portp;
1261 	vgen_ldc_t		*ldcp;
1262 	vgen_ldclist_t		*ldclp;
1263 	struct ether_addr	*addrp;
1264 	int			rv = DDI_FAILURE;
1265 	uint32_t		i;
1266 
1267 	portp = (vgen_port_t *)arg;
1268 	vgenp = portp->vgenp;
1269 
1270 	if (portp->is_vsw_port != B_TRUE) {
1271 		return (DDI_SUCCESS);
1272 	}
1273 
1274 	addrp = (struct ether_addr *)mca;
1275 	tagp = &mcastmsg.tag;
1276 	bzero(&mcastmsg, sizeof (mcastmsg));
1277 
1278 	ldclp = &portp->ldclist;
1279 
1280 	READ_ENTER(&ldclp->rwlock);
1281 
1282 	ldcp = ldclp->headp;
1283 	if (ldcp == NULL) {
1284 		RW_EXIT(&ldclp->rwlock);
1285 		return (DDI_FAILURE);
1286 	}
1287 
1288 	mutex_enter(&ldcp->cblock);
1289 
1290 	if (ldcp->hphase == VH_DONE) {
1291 		/*
1292 		 * If handshake is done, send a msg to vsw to add/remove
1293 		 * the multicast address. Otherwise, we just update this
1294 		 * mcast address in our table and the table will be sync'd
1295 		 * with vsw when handshake completes.
1296 		 */
1297 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1298 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1299 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1300 		tagp->vio_sid = ldcp->local_sid;
1301 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1302 		mcastmsg.set = add;
1303 		mcastmsg.count = 1;
1304 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1305 		    B_FALSE) != VGEN_SUCCESS) {
1306 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1307 			rv = DDI_FAILURE;
1308 			goto vgen_mcast_exit;
1309 		}
1310 	}
1311 
1312 	if (add) {
1313 
1314 		/* expand multicast table if necessary */
1315 		if (vgenp->mccount >= vgenp->mcsize) {
1316 			struct ether_addr	*newtab;
1317 			uint32_t		newsize;
1318 
1319 
1320 			newsize = vgenp->mcsize * 2;
1321 
1322 			newtab = kmem_zalloc(newsize *
1323 			    sizeof (struct ether_addr), KM_NOSLEEP);
1324 			if (newtab == NULL)
1325 				goto vgen_mcast_exit;
1326 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1327 			    sizeof (struct ether_addr));
1328 			kmem_free(vgenp->mctab,
1329 			    vgenp->mcsize * sizeof (struct ether_addr));
1330 
1331 			vgenp->mctab = newtab;
1332 			vgenp->mcsize = newsize;
1333 		}
1334 
1335 		/* add address to the table */
1336 		vgenp->mctab[vgenp->mccount++] = *addrp;
1337 
1338 	} else {
1339 
1340 		/* delete address from the table */
1341 		for (i = 0; i < vgenp->mccount; i++) {
1342 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1343 
1344 				/*
1345 				 * If there's more than one address in this
1346 				 * table, delete the unwanted one by moving
1347 				 * the last one in the list over top of it;
1348 				 * otherwise, just remove it.
1349 				 */
1350 				if (vgenp->mccount > 1) {
1351 					vgenp->mctab[i] =
1352 					    vgenp->mctab[vgenp->mccount-1];
1353 				}
1354 				vgenp->mccount--;
1355 				break;
1356 			}
1357 		}
1358 	}
1359 
1360 	rv = DDI_SUCCESS;
1361 
1362 vgen_mcast_exit:
1363 	mutex_exit(&ldcp->cblock);
1364 	RW_EXIT(&ldclp->rwlock);
1365 
1366 	return (rv);
1367 }
1368 
1369 /* set or clear promiscuous mode on the device */
1370 static int
1371 vgen_promisc(void *arg, boolean_t on)
1372 {
1373 	_NOTE(ARGUNUSED(arg, on))
1374 	return (DDI_SUCCESS);
1375 }
1376 
1377 /* set the unicast mac address of the device */
1378 static int
1379 vgen_unicst(void *arg, const uint8_t *mca)
1380 {
1381 	_NOTE(ARGUNUSED(arg, mca))
1382 	return (DDI_SUCCESS);
1383 }
1384 
1385 /* get device statistics */
1386 int
1387 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1388 {
1389 	vgen_port_t	*portp = (vgen_port_t *)arg;
1390 
1391 	*val = vgen_port_stat(portp, stat);
1392 
1393 	return (0);
1394 }
1395 
1396 /* vgen internal functions */
1397 /* detach all ports from the device */
1398 static void
1399 vgen_detach_ports(vgen_t *vgenp)
1400 {
1401 	vgen_port_t	*portp;
1402 	vgen_portlist_t	*plistp;
1403 
1404 	plistp = &(vgenp->vgenports);
1405 	WRITE_ENTER(&plistp->rwlock);
1406 	while ((portp = plistp->headp) != NULL) {
1407 		vgen_port_detach(portp);
1408 	}
1409 	RW_EXIT(&plistp->rwlock);
1410 }
1411 
1412 /*
1413  * detach the given port.
1414  */
1415 static void
1416 vgen_port_detach(vgen_port_t *portp)
1417 {
1418 	vgen_t		*vgenp;
1419 	vgen_ldclist_t	*ldclp;
1420 	int		port_num;
1421 
1422 	vgenp = portp->vgenp;
1423 	port_num = portp->port_num;
1424 
1425 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1426 
1427 	/*
1428 	 * If this port is connected to the vswitch, then
1429 	 * potentially there could be ports that may be using
1430 	 * this port to transmit packets. To address this do
1431 	 * the following:
1432 	 *	- First set vgenp->vsw_portp to NULL, so that
1433 	 *	  its not used after that.
1434 	 *	- Then wait for the refcnt to go down to 0.
1435 	 *	- Now we can safely detach this port.
1436 	 */
1437 	if (vgenp->vsw_portp == portp) {
1438 		vgenp->vsw_portp = NULL;
1439 		while (vgenp->vsw_port_refcnt > 0) {
1440 			delay(drv_usectohz(vgen_tx_delay));
1441 		}
1442 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1443 	}
1444 
1445 	if (portp->vhp != NULL) {
1446 		vio_net_resource_unreg(portp->vhp);
1447 		portp->vhp = NULL;
1448 	}
1449 
1450 	vgen_vlan_destroy_hash(portp);
1451 
1452 	/* remove it from port list */
1453 	vgen_port_list_remove(portp);
1454 
1455 	/* detach channels from this port */
1456 	ldclp = &portp->ldclist;
1457 	WRITE_ENTER(&ldclp->rwlock);
1458 	while (ldclp->headp) {
1459 		vgen_ldc_detach(ldclp->headp);
1460 	}
1461 	RW_EXIT(&ldclp->rwlock);
1462 	rw_destroy(&ldclp->rwlock);
1463 
1464 	if (portp->num_ldcs != 0) {
1465 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1466 		portp->num_ldcs = 0;
1467 	}
1468 
1469 	mutex_destroy(&portp->lock);
1470 	KMEM_FREE(portp);
1471 
1472 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1473 }
1474 
1475 /* add a port to port list */
1476 static void
1477 vgen_port_list_insert(vgen_port_t *portp)
1478 {
1479 	vgen_portlist_t *plistp;
1480 	vgen_t *vgenp;
1481 
1482 	vgenp = portp->vgenp;
1483 	plistp = &(vgenp->vgenports);
1484 
1485 	if (plistp->headp == NULL) {
1486 		plistp->headp = portp;
1487 	} else {
1488 		plistp->tailp->nextp = portp;
1489 	}
1490 	plistp->tailp = portp;
1491 	portp->nextp = NULL;
1492 }
1493 
1494 /* remove a port from port list */
1495 static void
1496 vgen_port_list_remove(vgen_port_t *portp)
1497 {
1498 	vgen_port_t *prevp;
1499 	vgen_port_t *nextp;
1500 	vgen_portlist_t *plistp;
1501 	vgen_t *vgenp;
1502 
1503 	vgenp = portp->vgenp;
1504 
1505 	plistp = &(vgenp->vgenports);
1506 
1507 	if (plistp->headp == NULL)
1508 		return;
1509 
1510 	if (portp == plistp->headp) {
1511 		plistp->headp = portp->nextp;
1512 		if (portp == plistp->tailp)
1513 			plistp->tailp = plistp->headp;
1514 	} else {
1515 		for (prevp = plistp->headp;
1516 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1517 		    prevp = nextp)
1518 			;
1519 		if (nextp == portp) {
1520 			prevp->nextp = portp->nextp;
1521 		}
1522 		if (portp == plistp->tailp)
1523 			plistp->tailp = prevp;
1524 	}
1525 }
1526 
1527 /* lookup a port in the list based on port_num */
1528 static vgen_port_t *
1529 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1530 {
1531 	vgen_port_t *portp = NULL;
1532 
1533 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1534 		if (portp->port_num == port_num) {
1535 			break;
1536 		}
1537 	}
1538 
1539 	return (portp);
1540 }
1541 
1542 /* enable ports for transmit/receive */
1543 static void
1544 vgen_init_ports(vgen_t *vgenp)
1545 {
1546 	vgen_port_t	*portp;
1547 	vgen_portlist_t	*plistp;
1548 
1549 	plistp = &(vgenp->vgenports);
1550 	READ_ENTER(&plistp->rwlock);
1551 
1552 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1553 		vgen_port_init(portp);
1554 	}
1555 
1556 	RW_EXIT(&plistp->rwlock);
1557 }
1558 
1559 static void
1560 vgen_port_init(vgen_port_t *portp)
1561 {
1562 	/* Add the port to the specified vlans */
1563 	vgen_vlan_add_ids(portp);
1564 
1565 	/* Bring up the channels of this port */
1566 	vgen_init_ldcs(portp);
1567 }
1568 
1569 /* disable transmit/receive on ports */
1570 static void
1571 vgen_uninit_ports(vgen_t *vgenp)
1572 {
1573 	vgen_port_t	*portp;
1574 	vgen_portlist_t	*plistp;
1575 
1576 	plistp = &(vgenp->vgenports);
1577 	READ_ENTER(&plistp->rwlock);
1578 
1579 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1580 		vgen_port_uninit(portp);
1581 	}
1582 
1583 	RW_EXIT(&plistp->rwlock);
1584 }
1585 
1586 static void
1587 vgen_port_uninit(vgen_port_t *portp)
1588 {
1589 	vgen_uninit_ldcs(portp);
1590 
1591 	/* remove the port from vlans it has been assigned to */
1592 	vgen_vlan_remove_ids(portp);
1593 }
1594 
1595 /*
1596  * Scan the machine description for this instance of vnet
1597  * and read its properties. Called only from vgen_init().
1598  * Returns: 0 on success, 1 on failure.
1599  */
1600 static int
1601 vgen_read_mdprops(vgen_t *vgenp)
1602 {
1603 	vnet_t		*vnetp = vgenp->vnetp;
1604 	md_t		*mdp = NULL;
1605 	mde_cookie_t	rootnode;
1606 	mde_cookie_t	*listp = NULL;
1607 	uint64_t	cfgh;
1608 	char		*name;
1609 	int		rv = 1;
1610 	int		num_nodes = 0;
1611 	int		num_devs = 0;
1612 	int		listsz = 0;
1613 	int		i;
1614 
1615 	if ((mdp = md_get_handle()) == NULL) {
1616 		return (rv);
1617 	}
1618 
1619 	num_nodes = md_node_count(mdp);
1620 	ASSERT(num_nodes > 0);
1621 
1622 	listsz = num_nodes * sizeof (mde_cookie_t);
1623 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1624 
1625 	rootnode = md_root_node(mdp);
1626 
1627 	/* search for all "virtual_device" nodes */
1628 	num_devs = md_scan_dag(mdp, rootnode,
1629 	    md_find_name(mdp, vdev_propname),
1630 	    md_find_name(mdp, "fwd"), listp);
1631 	if (num_devs <= 0) {
1632 		goto vgen_readmd_exit;
1633 	}
1634 
1635 	/*
1636 	 * Now loop through the list of virtual-devices looking for
1637 	 * devices with name "network" and for each such device compare
1638 	 * its instance with what we have from the 'reg' property to
1639 	 * find the right node in MD and then read all its properties.
1640 	 */
1641 	for (i = 0; i < num_devs; i++) {
1642 
1643 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1644 			goto vgen_readmd_exit;
1645 		}
1646 
1647 		/* is this a "network" device? */
1648 		if (strcmp(name, vnet_propname) != 0)
1649 			continue;
1650 
1651 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1652 			goto vgen_readmd_exit;
1653 		}
1654 
1655 		/* is this the required instance of vnet? */
1656 		if (vgenp->regprop != cfgh)
1657 			continue;
1658 
1659 		/*
1660 		 * Read the 'linkprop' property to know if this vnet
1661 		 * device should get physical link updates from vswitch.
1662 		 */
1663 		vgen_linkprop_read(vgenp, mdp, listp[i],
1664 		    &vnetp->pls_update);
1665 
1666 		/*
1667 		 * Read the mtu. Note that we set the mtu of vnet device within
1668 		 * this routine itself, after validating the range.
1669 		 */
1670 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1671 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1672 			vnetp->mtu = ETHERMTU;
1673 		}
1674 		vgenp->max_frame_size = vnetp->mtu +
1675 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1676 
1677 		/* read priority ether types */
1678 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1679 
1680 		/* read vlan id properties of this vnet instance */
1681 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1682 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1683 		    &vnetp->default_vlan_id);
1684 
1685 		rv = 0;
1686 		break;
1687 	}
1688 
1689 vgen_readmd_exit:
1690 
1691 	kmem_free(listp, listsz);
1692 	(void) md_fini_handle(mdp);
1693 	return (rv);
1694 }
1695 
1696 /*
1697  * Read vlan id properties of the given MD node.
1698  * Arguments:
1699  *   arg:          device argument(vnet device or a port)
1700  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1701  *   mdp:          machine description
1702  *   node:         md node cookie
1703  *
1704  * Returns:
1705  *   pvidp:        port-vlan-id of the node
1706  *   vidspp:       list of vlan-ids of the node
1707  *   nvidsp:       # of vlan-ids in the list
1708  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1709  */
1710 static void
1711 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1712 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1713 	uint16_t *default_idp)
1714 {
1715 	vgen_t		*vgenp;
1716 	vnet_t		*vnetp;
1717 	vgen_port_t	*portp;
1718 	char		*pvid_propname;
1719 	char		*vid_propname;
1720 	uint_t		nvids;
1721 	uint32_t	vids_size;
1722 	int		rv;
1723 	int		i;
1724 	uint64_t	*data;
1725 	uint64_t	val;
1726 	int		size;
1727 	int		inst;
1728 
1729 	if (type == VGEN_LOCAL) {
1730 
1731 		vgenp = (vgen_t *)arg;
1732 		vnetp = vgenp->vnetp;
1733 		pvid_propname = vgen_pvid_propname;
1734 		vid_propname = vgen_vid_propname;
1735 		inst = vnetp->instance;
1736 
1737 	} else if (type == VGEN_PEER) {
1738 
1739 		portp = (vgen_port_t *)arg;
1740 		vgenp = portp->vgenp;
1741 		vnetp = vgenp->vnetp;
1742 		pvid_propname = port_pvid_propname;
1743 		vid_propname = port_vid_propname;
1744 		inst = portp->port_num;
1745 
1746 	} else {
1747 		return;
1748 	}
1749 
1750 	if (type == VGEN_LOCAL && default_idp != NULL) {
1751 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1752 		if (rv != 0) {
1753 			DWARN(vgenp, NULL, "prop(%s) not found",
1754 			    vgen_dvid_propname);
1755 
1756 			*default_idp = vnet_default_vlan_id;
1757 		} else {
1758 			*default_idp = val & 0xFFF;
1759 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1760 			    inst, *default_idp);
1761 		}
1762 	}
1763 
1764 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1765 	if (rv != 0) {
1766 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1767 		*pvidp = vnet_default_vlan_id;
1768 	} else {
1769 
1770 		*pvidp = val & 0xFFF;
1771 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1772 		    pvid_propname, inst, *pvidp);
1773 	}
1774 
1775 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1776 	    &size);
1777 	if (rv != 0) {
1778 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1779 		size = 0;
1780 	} else {
1781 		size /= sizeof (uint64_t);
1782 	}
1783 	nvids = size;
1784 
1785 	if (nvids != 0) {
1786 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1787 		vids_size = sizeof (uint16_t) * nvids;
1788 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1789 		for (i = 0; i < nvids; i++) {
1790 			(*vidspp)[i] = data[i] & 0xFFFF;
1791 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1792 		}
1793 		DBG2(vgenp, NULL, "\n");
1794 	}
1795 
1796 	*nvidsp = nvids;
1797 }
1798 
1799 /*
1800  * Create a vlan id hash table for the given port.
1801  */
1802 static void
1803 vgen_vlan_create_hash(vgen_port_t *portp)
1804 {
1805 	char		hashname[MAXNAMELEN];
1806 
1807 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1808 	    portp->port_num);
1809 
1810 	portp->vlan_nchains = vgen_vlan_nchains;
1811 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1812 	    portp->vlan_nchains, mod_hash_null_valdtor);
1813 }
1814 
1815 /*
1816  * Destroy the vlan id hash table in the given port.
1817  */
1818 static void
1819 vgen_vlan_destroy_hash(vgen_port_t *portp)
1820 {
1821 	if (portp->vlan_hashp != NULL) {
1822 		mod_hash_destroy_hash(portp->vlan_hashp);
1823 		portp->vlan_hashp = NULL;
1824 		portp->vlan_nchains = 0;
1825 	}
1826 }
1827 
1828 /*
1829  * Add a port to the vlans specified in its port properites.
1830  */
1831 static void
1832 vgen_vlan_add_ids(vgen_port_t *portp)
1833 {
1834 	int		rv;
1835 	int		i;
1836 
1837 	rv = mod_hash_insert(portp->vlan_hashp,
1838 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1839 	    (mod_hash_val_t)B_TRUE);
1840 	ASSERT(rv == 0);
1841 
1842 	for (i = 0; i < portp->nvids; i++) {
1843 		rv = mod_hash_insert(portp->vlan_hashp,
1844 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1845 		    (mod_hash_val_t)B_TRUE);
1846 		ASSERT(rv == 0);
1847 	}
1848 }
1849 
1850 /*
1851  * Remove a port from the vlans it has been assigned to.
1852  */
1853 static void
1854 vgen_vlan_remove_ids(vgen_port_t *portp)
1855 {
1856 	int		rv;
1857 	int		i;
1858 	mod_hash_val_t	vp;
1859 
1860 	rv = mod_hash_remove(portp->vlan_hashp,
1861 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1862 	    (mod_hash_val_t *)&vp);
1863 	ASSERT(rv == 0);
1864 
1865 	for (i = 0; i < portp->nvids; i++) {
1866 		rv = mod_hash_remove(portp->vlan_hashp,
1867 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1868 		    (mod_hash_val_t *)&vp);
1869 		ASSERT(rv == 0);
1870 	}
1871 }
1872 
1873 /*
1874  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1875  * then the vlan-id is available in the tag; otherwise, its vlan id is
1876  * implicitly obtained from the port-vlan-id of the vnet device.
1877  * The vlan id determined is returned in vidp.
1878  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1879  */
1880 static boolean_t
1881 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1882 {
1883 	struct ether_vlan_header	*evhp;
1884 
1885 	/* If it's a tagged frame, get the vlan id from vlan header */
1886 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1887 
1888 		evhp = (struct ether_vlan_header *)ehp;
1889 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1890 		return (B_TRUE);
1891 	}
1892 
1893 	/* Untagged frame, vlan-id is the pvid of vnet device */
1894 	*vidp = vnetp->pvid;
1895 	return (B_FALSE);
1896 }
1897 
1898 /*
1899  * Find the given vlan id in the hash table.
1900  * Return: B_TRUE if the id is found; B_FALSE if not found.
1901  */
1902 static boolean_t
1903 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1904 {
1905 	int		rv;
1906 	mod_hash_val_t	vp;
1907 
1908 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1909 
1910 	if (rv != 0)
1911 		return (B_FALSE);
1912 
1913 	return (B_TRUE);
1914 }
1915 
1916 /*
1917  * This function reads "priority-ether-types" property from md. This property
1918  * is used to enable support for priority frames. Applications which need
1919  * guaranteed and timely delivery of certain high priority frames to/from
1920  * a vnet or vsw within ldoms, should configure this property by providing
1921  * the ether type(s) for which the priority facility is needed.
1922  * Normal data frames are delivered over a ldc channel using the descriptor
1923  * ring mechanism which is constrained by factors such as descriptor ring size,
1924  * the rate at which the ring is processed at the peer ldc end point, etc.
1925  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1926  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1927  * descriptor ring path and enables a more reliable and timely delivery of
1928  * frames to the peer.
1929  */
1930 static void
1931 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1932 {
1933 	int		rv;
1934 	uint16_t	*types;
1935 	uint64_t	*data;
1936 	int		size;
1937 	int		i;
1938 	size_t		mblk_sz;
1939 
1940 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1941 	    (uint8_t **)&data, &size);
1942 	if (rv != 0) {
1943 		/*
1944 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1945 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1946 		 */
1947 		if (vgen_pri_eth_type != 0) {
1948 			size = sizeof (vgen_pri_eth_type);
1949 			data = &vgen_pri_eth_type;
1950 		} else {
1951 			DBG2(vgenp, NULL,
1952 			    "prop(%s) not found", pri_types_propname);
1953 			size = 0;
1954 		}
1955 	}
1956 
1957 	if (size == 0) {
1958 		vgenp->pri_num_types = 0;
1959 		return;
1960 	}
1961 
1962 	/*
1963 	 * we have some priority-ether-types defined;
1964 	 * allocate a table of these types and also
1965 	 * allocate a pool of mblks to transmit these
1966 	 * priority packets.
1967 	 */
1968 	size /= sizeof (uint64_t);
1969 	vgenp->pri_num_types = size;
1970 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1971 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1972 		types[i] = data[i] & 0xFFFF;
1973 	}
1974 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1975 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1976 	    &vgenp->pri_tx_vmp);
1977 }
1978 
1979 static void
1980 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1981 {
1982 	int		rv;
1983 	uint64_t	val;
1984 	char		*mtu_propname;
1985 
1986 	mtu_propname = vgen_mtu_propname;
1987 
1988 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1989 	if (rv != 0) {
1990 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1991 		*mtu = vnet_ethermtu;
1992 	} else {
1993 
1994 		*mtu = val & 0xFFFF;
1995 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1996 		    vgenp->instance, *mtu);
1997 	}
1998 }
1999 
2000 static void
2001 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
2002 	boolean_t *pls)
2003 {
2004 	int		rv;
2005 	uint64_t	val;
2006 	char		*linkpropname;
2007 
2008 	linkpropname = vgen_linkprop_propname;
2009 
2010 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
2011 	if (rv != 0) {
2012 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
2013 		*pls = B_FALSE;
2014 	} else {
2015 
2016 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
2017 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
2018 		    vgenp->instance, *pls);
2019 	}
2020 }
2021 
2022 /* register with MD event generator */
2023 static int
2024 vgen_mdeg_reg(vgen_t *vgenp)
2025 {
2026 	mdeg_prop_spec_t	*pspecp;
2027 	mdeg_node_spec_t	*parentp;
2028 	uint_t			templatesz;
2029 	int			rv;
2030 	mdeg_handle_t		dev_hdl = NULL;
2031 	mdeg_handle_t		port_hdl = NULL;
2032 
2033 	templatesz = sizeof (vgen_prop_template);
2034 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
2035 	if (pspecp == NULL) {
2036 		return (DDI_FAILURE);
2037 	}
2038 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
2039 	if (parentp == NULL) {
2040 		kmem_free(pspecp, templatesz);
2041 		return (DDI_FAILURE);
2042 	}
2043 
2044 	bcopy(vgen_prop_template, pspecp, templatesz);
2045 
2046 	/*
2047 	 * NOTE: The instance here refers to the value of "reg" property and
2048 	 * not the dev_info instance (ddi_get_instance()) of vnet.
2049 	 */
2050 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
2051 
2052 	parentp->namep = "virtual-device";
2053 	parentp->specp = pspecp;
2054 
2055 	/* save parentp in vgen_t */
2056 	vgenp->mdeg_parentp = parentp;
2057 
2058 	/*
2059 	 * Register an interest in 'virtual-device' nodes with a
2060 	 * 'name' property of 'network'
2061 	 */
2062 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
2063 	if (rv != MDEG_SUCCESS) {
2064 		DERR(vgenp, NULL, "mdeg_register failed\n");
2065 		goto mdeg_reg_fail;
2066 	}
2067 
2068 	/* Register an interest in 'port' nodes */
2069 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
2070 	    &port_hdl);
2071 	if (rv != MDEG_SUCCESS) {
2072 		DERR(vgenp, NULL, "mdeg_register failed\n");
2073 		goto mdeg_reg_fail;
2074 	}
2075 
2076 	/* save mdeg handle in vgen_t */
2077 	vgenp->mdeg_dev_hdl = dev_hdl;
2078 	vgenp->mdeg_port_hdl = port_hdl;
2079 
2080 	return (DDI_SUCCESS);
2081 
2082 mdeg_reg_fail:
2083 	if (dev_hdl != NULL) {
2084 		(void) mdeg_unregister(dev_hdl);
2085 	}
2086 	KMEM_FREE(parentp);
2087 	kmem_free(pspecp, templatesz);
2088 	vgenp->mdeg_parentp = NULL;
2089 	return (DDI_FAILURE);
2090 }
2091 
2092 /* unregister with MD event generator */
2093 static void
2094 vgen_mdeg_unreg(vgen_t *vgenp)
2095 {
2096 	if (vgenp->mdeg_dev_hdl != NULL) {
2097 		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
2098 		vgenp->mdeg_dev_hdl = NULL;
2099 	}
2100 	if (vgenp->mdeg_port_hdl != NULL) {
2101 		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
2102 		vgenp->mdeg_port_hdl = NULL;
2103 	}
2104 
2105 	if (vgenp->mdeg_parentp != NULL) {
2106 		kmem_free(vgenp->mdeg_parentp->specp,
2107 		    sizeof (vgen_prop_template));
2108 		KMEM_FREE(vgenp->mdeg_parentp);
2109 		vgenp->mdeg_parentp = NULL;
2110 	}
2111 }
2112 
2113 /* mdeg callback function for the port node */
2114 static int
2115 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
2116 {
2117 	int idx;
2118 	int vsw_idx = -1;
2119 	uint64_t val;
2120 	vgen_t *vgenp;
2121 
2122 	if ((resp == NULL) || (cb_argp == NULL)) {
2123 		return (MDEG_FAILURE);
2124 	}
2125 
2126 	vgenp = (vgen_t *)cb_argp;
2127 	DBG1(vgenp, NULL, "enter\n");
2128 
2129 	mutex_enter(&vgenp->lock);
2130 
2131 	DBG1(vgenp, NULL, "ports: removed(%x), "
2132 	"added(%x), updated(%x)\n", resp->removed.nelem,
2133 	    resp->added.nelem, resp->match_curr.nelem);
2134 
2135 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2136 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2137 		    resp->removed.mdep[idx]);
2138 	}
2139 
2140 	if (vgenp->vsw_portp == NULL) {
2141 		/*
2142 		 * find vsw_port and add it first, because other ports need
2143 		 * this when adding fdb entry (see vgen_port_init()).
2144 		 */
2145 		for (idx = 0; idx < resp->added.nelem; idx++) {
2146 			if (!(md_get_prop_val(resp->added.mdp,
2147 			    resp->added.mdep[idx], swport_propname, &val))) {
2148 				if (val == 0) {
2149 					/*
2150 					 * This port is connected to the
2151 					 * vsw on service domain.
2152 					 */
2153 					vsw_idx = idx;
2154 					if (vgen_add_port(vgenp,
2155 					    resp->added.mdp,
2156 					    resp->added.mdep[idx]) !=
2157 					    DDI_SUCCESS) {
2158 						cmn_err(CE_NOTE, "vnet%d Could "
2159 						    "not initialize virtual "
2160 						    "switch port.",
2161 						    vgenp->instance);
2162 						mutex_exit(&vgenp->lock);
2163 						return (MDEG_FAILURE);
2164 					}
2165 					break;
2166 				}
2167 			}
2168 		}
2169 		if (vsw_idx == -1) {
2170 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2171 			mutex_exit(&vgenp->lock);
2172 			return (MDEG_FAILURE);
2173 		}
2174 	}
2175 
2176 	for (idx = 0; idx < resp->added.nelem; idx++) {
2177 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2178 			continue;
2179 
2180 		/* If this port can't be added just skip it. */
2181 		(void) vgen_add_port(vgenp, resp->added.mdp,
2182 		    resp->added.mdep[idx]);
2183 	}
2184 
2185 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2186 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2187 		    resp->match_curr.mdep[idx],
2188 		    resp->match_prev.mdp,
2189 		    resp->match_prev.mdep[idx]);
2190 	}
2191 
2192 	mutex_exit(&vgenp->lock);
2193 	DBG1(vgenp, NULL, "exit\n");
2194 	return (MDEG_SUCCESS);
2195 }
2196 
2197 /* mdeg callback function for the vnet node */
2198 static int
2199 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2200 {
2201 	vgen_t		*vgenp;
2202 	vnet_t		*vnetp;
2203 	md_t		*mdp;
2204 	mde_cookie_t	node;
2205 	uint64_t	inst;
2206 	char		*node_name = NULL;
2207 
2208 	if ((resp == NULL) || (cb_argp == NULL)) {
2209 		return (MDEG_FAILURE);
2210 	}
2211 
2212 	vgenp = (vgen_t *)cb_argp;
2213 	vnetp = vgenp->vnetp;
2214 
2215 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2216 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2217 	    resp->match_curr.nelem, resp->match_prev.nelem);
2218 
2219 	mutex_enter(&vgenp->lock);
2220 
2221 	/*
2222 	 * We get an initial callback for this node as 'added' after
2223 	 * registering with mdeg. Note that we would have already gathered
2224 	 * information about this vnet node by walking MD earlier during attach
2225 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2226 	 * of this node might have changed when we get this initial 'added'
2227 	 * callback. We handle this as if an update occured and invoke the same
2228 	 * function which handles updates to the properties of this vnet-node
2229 	 * if any. A non-zero 'match' value indicates that the MD has been
2230 	 * updated and that a 'network' node is present which may or may not
2231 	 * have been updated. It is up to the clients to examine their own
2232 	 * nodes and determine if they have changed.
2233 	 */
2234 	if (resp->added.nelem != 0) {
2235 
2236 		if (resp->added.nelem != 1) {
2237 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2238 			    "invalid: %d\n", vnetp->instance,
2239 			    resp->added.nelem);
2240 			goto vgen_mdeg_cb_err;
2241 		}
2242 
2243 		mdp = resp->added.mdp;
2244 		node = resp->added.mdep[0];
2245 
2246 	} else if (resp->match_curr.nelem != 0) {
2247 
2248 		if (resp->match_curr.nelem != 1) {
2249 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2250 			    "invalid: %d\n", vnetp->instance,
2251 			    resp->match_curr.nelem);
2252 			goto vgen_mdeg_cb_err;
2253 		}
2254 
2255 		mdp = resp->match_curr.mdp;
2256 		node = resp->match_curr.mdep[0];
2257 
2258 	} else {
2259 		goto vgen_mdeg_cb_err;
2260 	}
2261 
2262 	/* Validate name and instance */
2263 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2264 		DERR(vgenp, NULL, "unable to get node name\n");
2265 		goto vgen_mdeg_cb_err;
2266 	}
2267 
2268 	/* is this a virtual-network device? */
2269 	if (strcmp(node_name, vnet_propname) != 0) {
2270 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2271 		goto vgen_mdeg_cb_err;
2272 	}
2273 
2274 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2275 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2276 		goto vgen_mdeg_cb_err;
2277 	}
2278 
2279 	/* is this the right instance of vnet? */
2280 	if (inst != vgenp->regprop) {
2281 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2282 		goto vgen_mdeg_cb_err;
2283 	}
2284 
2285 	vgen_update_md_prop(vgenp, mdp, node);
2286 
2287 	mutex_exit(&vgenp->lock);
2288 	return (MDEG_SUCCESS);
2289 
2290 vgen_mdeg_cb_err:
2291 	mutex_exit(&vgenp->lock);
2292 	return (MDEG_FAILURE);
2293 }
2294 
2295 /*
2296  * Check to see if the relevant properties in the specified node have
2297  * changed, and if so take the appropriate action.
2298  */
2299 static void
2300 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2301 {
2302 	uint16_t	pvid;
2303 	uint16_t	*vids;
2304 	uint16_t	nvids;
2305 	vnet_t		*vnetp = vgenp->vnetp;
2306 	uint32_t	mtu;
2307 	boolean_t	pls_update;
2308 	enum		{ MD_init = 0x1,
2309 			    MD_vlans = 0x2,
2310 			    MD_mtu = 0x4,
2311 			    MD_pls = 0x8 } updated;
2312 	int		rv;
2313 
2314 	updated = MD_init;
2315 
2316 	/* Read the vlan ids */
2317 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2318 	    &nvids, NULL);
2319 
2320 	/* Determine if there are any vlan id updates */
2321 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2322 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2323 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2324 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2325 		updated |= MD_vlans;
2326 	}
2327 
2328 	/* Read mtu */
2329 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2330 	if (mtu != vnetp->mtu) {
2331 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2332 			updated |= MD_mtu;
2333 		} else {
2334 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2335 			    " as the specified value:%d is invalid\n",
2336 			    vnetp->instance, mtu);
2337 		}
2338 	}
2339 
2340 	/*
2341 	 * Read the 'linkprop' property.
2342 	 */
2343 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2344 	if (pls_update != vnetp->pls_update) {
2345 		updated |= MD_pls;
2346 	}
2347 
2348 	/* Now process the updated props */
2349 
2350 	if (updated & MD_vlans) {
2351 
2352 		/* save the new vlan ids */
2353 		vnetp->pvid = pvid;
2354 		if (vnetp->nvids != 0) {
2355 			kmem_free(vnetp->vids,
2356 			    sizeof (uint16_t) * vnetp->nvids);
2357 			vnetp->nvids = 0;
2358 		}
2359 		if (nvids != 0) {
2360 			vnetp->nvids = nvids;
2361 			vnetp->vids = vids;
2362 		}
2363 
2364 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2365 		vgen_reset_vlan_unaware_ports(vgenp);
2366 
2367 	} else {
2368 
2369 		if (nvids != 0) {
2370 			kmem_free(vids, sizeof (uint16_t) * nvids);
2371 		}
2372 	}
2373 
2374 	if (updated & MD_mtu) {
2375 
2376 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2377 		    vnetp->mtu, mtu);
2378 
2379 		rv = vnet_mtu_update(vnetp, mtu);
2380 		if (rv == 0) {
2381 			vgenp->max_frame_size = mtu +
2382 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2383 		}
2384 	}
2385 
2386 	if (updated & MD_pls) {
2387 		/* enable/disable physical link state updates */
2388 		vnetp->pls_update = pls_update;
2389 		mutex_exit(&vgenp->lock);
2390 
2391 		/* reset vsw-port to re-negotiate with the updated prop. */
2392 		vgen_reset_vsw_port(vgenp);
2393 
2394 		mutex_enter(&vgenp->lock);
2395 	}
2396 }
2397 
2398 /* add a new port to the device */
2399 static int
2400 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2401 {
2402 	vgen_port_t	*portp;
2403 	int		rv;
2404 
2405 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2406 
2407 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2408 	if (rv != DDI_SUCCESS) {
2409 		KMEM_FREE(portp);
2410 		return (DDI_FAILURE);
2411 	}
2412 
2413 	rv = vgen_port_attach(portp);
2414 	if (rv != DDI_SUCCESS) {
2415 		return (DDI_FAILURE);
2416 	}
2417 
2418 	return (DDI_SUCCESS);
2419 }
2420 
2421 /* read properties of the port from its md node */
2422 static int
2423 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2424 	mde_cookie_t mdex)
2425 {
2426 	uint64_t		port_num;
2427 	uint64_t		*ldc_ids;
2428 	uint64_t		macaddr;
2429 	uint64_t		val;
2430 	int			num_ldcs;
2431 	int			i;
2432 	int			addrsz;
2433 	int			num_nodes = 0;
2434 	int			listsz = 0;
2435 	mde_cookie_t		*listp = NULL;
2436 	uint8_t			*addrp;
2437 	struct ether_addr	ea;
2438 
2439 	/* read "id" property to get the port number */
2440 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2441 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2442 		return (DDI_FAILURE);
2443 	}
2444 
2445 	/*
2446 	 * Find the channel endpoint node(s) under this port node.
2447 	 */
2448 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2449 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2450 		    num_nodes);
2451 		return (DDI_FAILURE);
2452 	}
2453 
2454 	/* allocate space for node list */
2455 	listsz = num_nodes * sizeof (mde_cookie_t);
2456 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2457 	if (listp == NULL)
2458 		return (DDI_FAILURE);
2459 
2460 	num_ldcs = md_scan_dag(mdp, mdex,
2461 	    md_find_name(mdp, channel_propname),
2462 	    md_find_name(mdp, "fwd"), listp);
2463 
2464 	if (num_ldcs <= 0) {
2465 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2466 		kmem_free(listp, listsz);
2467 		return (DDI_FAILURE);
2468 	}
2469 
2470 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2471 
2472 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2473 	if (ldc_ids == NULL) {
2474 		kmem_free(listp, listsz);
2475 		return (DDI_FAILURE);
2476 	}
2477 
2478 	for (i = 0; i < num_ldcs; i++) {
2479 		/* read channel ids */
2480 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2481 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2482 			    id_propname);
2483 			kmem_free(listp, listsz);
2484 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2485 			return (DDI_FAILURE);
2486 		}
2487 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2488 	}
2489 
2490 	kmem_free(listp, listsz);
2491 
2492 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2493 	    &addrsz)) {
2494 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2495 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2496 		return (DDI_FAILURE);
2497 	}
2498 
2499 	if (addrsz < ETHERADDRL) {
2500 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2501 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2502 		return (DDI_FAILURE);
2503 	}
2504 
2505 	macaddr = *((uint64_t *)addrp);
2506 
2507 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2508 
2509 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2510 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2511 		macaddr >>= 8;
2512 	}
2513 
2514 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2515 		if (val == 0) {
2516 			/* This port is connected to the vswitch */
2517 			portp->is_vsw_port = B_TRUE;
2518 		} else {
2519 			portp->is_vsw_port = B_FALSE;
2520 		}
2521 	}
2522 
2523 	/* now update all properties into the port */
2524 	portp->vgenp = vgenp;
2525 	portp->port_num = port_num;
2526 	ether_copy(&ea, &portp->macaddr);
2527 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2528 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2529 	portp->num_ldcs = num_ldcs;
2530 
2531 	/* read vlan id properties of this port node */
2532 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2533 	    &portp->vids, &portp->nvids, NULL);
2534 
2535 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2536 
2537 	return (DDI_SUCCESS);
2538 }
2539 
2540 /* remove a port from the device */
2541 static int
2542 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2543 {
2544 	uint64_t	port_num;
2545 	vgen_port_t	*portp;
2546 	vgen_portlist_t	*plistp;
2547 
2548 	/* read "id" property to get the port number */
2549 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2550 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2551 		return (DDI_FAILURE);
2552 	}
2553 
2554 	plistp = &(vgenp->vgenports);
2555 
2556 	WRITE_ENTER(&plistp->rwlock);
2557 	portp = vgen_port_lookup(plistp, (int)port_num);
2558 	if (portp == NULL) {
2559 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2560 		RW_EXIT(&plistp->rwlock);
2561 		return (DDI_FAILURE);
2562 	}
2563 
2564 	vgen_port_detach_mdeg(portp);
2565 	RW_EXIT(&plistp->rwlock);
2566 
2567 	return (DDI_SUCCESS);
2568 }
2569 
2570 /* attach a port to the device based on mdeg data */
2571 static int
2572 vgen_port_attach(vgen_port_t *portp)
2573 {
2574 	int			i;
2575 	vgen_portlist_t		*plistp;
2576 	vgen_t			*vgenp;
2577 	uint64_t		*ldcids;
2578 	uint32_t		num_ldcs;
2579 	mac_register_t		*macp;
2580 	vio_net_res_type_t	type;
2581 	int			rv;
2582 
2583 	ASSERT(portp != NULL);
2584 
2585 	vgenp = portp->vgenp;
2586 	ldcids = portp->ldc_ids;
2587 	num_ldcs = portp->num_ldcs;
2588 
2589 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2590 
2591 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2592 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2593 	portp->ldclist.headp = NULL;
2594 
2595 	for (i = 0; i < num_ldcs; i++) {
2596 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2597 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2598 			vgen_port_detach(portp);
2599 			return (DDI_FAILURE);
2600 		}
2601 	}
2602 
2603 	/* create vlan id hash table */
2604 	vgen_vlan_create_hash(portp);
2605 
2606 	if (portp->is_vsw_port == B_TRUE) {
2607 		/* This port is connected to the switch port */
2608 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2609 		type = VIO_NET_RES_LDC_SERVICE;
2610 	} else {
2611 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2612 		type = VIO_NET_RES_LDC_GUEST;
2613 	}
2614 
2615 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2616 		vgen_port_detach(portp);
2617 		return (DDI_FAILURE);
2618 	}
2619 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2620 	macp->m_driver = portp;
2621 	macp->m_dip = vgenp->vnetdip;
2622 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2623 	macp->m_callbacks = &vgen_m_callbacks;
2624 	macp->m_min_sdu = 0;
2625 	macp->m_max_sdu = ETHERMTU;
2626 
2627 	mutex_enter(&portp->lock);
2628 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2629 	    portp->macaddr, &portp->vhp, &portp->vcb);
2630 	mutex_exit(&portp->lock);
2631 	mac_free(macp);
2632 
2633 	if (rv == 0) {
2634 		/* link it into the list of ports */
2635 		plistp = &(vgenp->vgenports);
2636 		WRITE_ENTER(&plistp->rwlock);
2637 		vgen_port_list_insert(portp);
2638 		RW_EXIT(&plistp->rwlock);
2639 
2640 		if (portp->is_vsw_port == B_TRUE) {
2641 			/* We now have the vswitch port attached */
2642 			vgenp->vsw_portp = portp;
2643 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2644 		}
2645 	} else {
2646 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2647 		    portp);
2648 		vgen_port_detach(portp);
2649 	}
2650 
2651 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2652 	return (DDI_SUCCESS);
2653 }
2654 
2655 /* detach a port from the device based on mdeg data */
2656 static void
2657 vgen_port_detach_mdeg(vgen_port_t *portp)
2658 {
2659 	vgen_t *vgenp = portp->vgenp;
2660 
2661 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2662 
2663 	mutex_enter(&portp->lock);
2664 
2665 	/* stop the port if needed */
2666 	if (portp->flags & VGEN_STARTED) {
2667 		vgen_port_uninit(portp);
2668 		portp->flags &= ~(VGEN_STARTED);
2669 	}
2670 
2671 	mutex_exit(&portp->lock);
2672 	vgen_port_detach(portp);
2673 
2674 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2675 }
2676 
2677 static int
2678 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2679 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2680 {
2681 	uint64_t	cport_num;
2682 	uint64_t	pport_num;
2683 	vgen_portlist_t	*plistp;
2684 	vgen_port_t	*portp;
2685 	boolean_t	updated_vlans = B_FALSE;
2686 	uint16_t	pvid;
2687 	uint16_t	*vids;
2688 	uint16_t	nvids;
2689 
2690 	/*
2691 	 * For now, we get port updates only if vlan ids changed.
2692 	 * We read the port num and do some sanity check.
2693 	 */
2694 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2695 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2696 		return (DDI_FAILURE);
2697 	}
2698 
2699 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2700 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2701 		return (DDI_FAILURE);
2702 	}
2703 	if (cport_num != pport_num)
2704 		return (DDI_FAILURE);
2705 
2706 	plistp = &(vgenp->vgenports);
2707 
2708 	READ_ENTER(&plistp->rwlock);
2709 
2710 	portp = vgen_port_lookup(plistp, (int)cport_num);
2711 	if (portp == NULL) {
2712 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2713 		RW_EXIT(&plistp->rwlock);
2714 		return (DDI_FAILURE);
2715 	}
2716 
2717 	/* Read the vlan ids */
2718 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2719 	    &nvids, NULL);
2720 
2721 	/* Determine if there are any vlan id updates */
2722 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2723 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2724 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2725 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2726 		updated_vlans = B_TRUE;
2727 	}
2728 
2729 	if (updated_vlans == B_FALSE) {
2730 		RW_EXIT(&plistp->rwlock);
2731 		return (DDI_FAILURE);
2732 	}
2733 
2734 	/* remove the port from vlans it has been assigned to */
2735 	vgen_vlan_remove_ids(portp);
2736 
2737 	/* save the new vlan ids */
2738 	portp->pvid = pvid;
2739 	if (portp->nvids != 0) {
2740 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2741 		portp->nvids = 0;
2742 	}
2743 	if (nvids != 0) {
2744 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2745 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2746 		portp->nvids = nvids;
2747 		kmem_free(vids, sizeof (uint16_t) * nvids);
2748 	}
2749 
2750 	/* add port to the new vlans */
2751 	vgen_vlan_add_ids(portp);
2752 
2753 	/* reset the port if it is vlan unaware (ver < 1.3) */
2754 	vgen_vlan_unaware_port_reset(portp);
2755 
2756 	RW_EXIT(&plistp->rwlock);
2757 
2758 	return (DDI_SUCCESS);
2759 }
2760 
2761 static uint64_t
2762 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2763 {
2764 	vgen_ldclist_t	*ldclp;
2765 	vgen_ldc_t *ldcp;
2766 	uint64_t	val;
2767 
2768 	val = 0;
2769 	ldclp = &portp->ldclist;
2770 
2771 	READ_ENTER(&ldclp->rwlock);
2772 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2773 		val += vgen_ldc_stat(ldcp, stat);
2774 	}
2775 	RW_EXIT(&ldclp->rwlock);
2776 
2777 	return (val);
2778 }
2779 
2780 /* allocate receive resources */
2781 static int
2782 vgen_init_multipools(vgen_ldc_t *ldcp)
2783 {
2784 	size_t		data_sz;
2785 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2786 	int		status;
2787 	uint32_t	sz1 = 0;
2788 	uint32_t	sz2 = 0;
2789 	uint32_t	sz3 = 0;
2790 	uint32_t	sz4 = 0;
2791 
2792 	/*
2793 	 * We round up the mtu specified to be a multiple of 2K.
2794 	 * We then create rx pools based on the rounded up size.
2795 	 */
2796 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2797 	data_sz = VNET_ROUNDUP_2K(data_sz);
2798 
2799 	/*
2800 	 * If pool sizes are specified, use them. Note that the presence of
2801 	 * the first tunable will be used as a hint.
2802 	 */
2803 	if (vgen_rbufsz1 != 0) {
2804 
2805 		sz1 = vgen_rbufsz1;
2806 		sz2 = vgen_rbufsz2;
2807 		sz3 = vgen_rbufsz3;
2808 		sz4 = vgen_rbufsz4;
2809 
2810 		if (sz4 == 0) { /* need 3 pools */
2811 
2812 			ldcp->max_rxpool_size = sz3;
2813 			status = vio_init_multipools(&ldcp->vmp,
2814 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2815 			    vgen_nrbufs2, vgen_nrbufs3);
2816 
2817 		} else {
2818 
2819 			ldcp->max_rxpool_size = sz4;
2820 			status = vio_init_multipools(&ldcp->vmp,
2821 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2822 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2823 			    vgen_nrbufs4);
2824 		}
2825 		return (status);
2826 	}
2827 
2828 	/*
2829 	 * Pool sizes are not specified. We select the pool sizes based on the
2830 	 * mtu if vnet_jumbo_rxpools is enabled.
2831 	 */
2832 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2833 		/*
2834 		 * Receive buffer pool allocation based on mtu is disabled.
2835 		 * Use the default mechanism of standard size pool allocation.
2836 		 */
2837 		sz1 = VGEN_DBLK_SZ_128;
2838 		sz2 = VGEN_DBLK_SZ_256;
2839 		sz3 = VGEN_DBLK_SZ_2048;
2840 		ldcp->max_rxpool_size = sz3;
2841 
2842 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2843 		    sz1, sz2, sz3,
2844 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2845 
2846 		return (status);
2847 	}
2848 
2849 	switch (data_sz) {
2850 
2851 	case VNET_4K:
2852 
2853 		sz1 = VGEN_DBLK_SZ_128;
2854 		sz2 = VGEN_DBLK_SZ_256;
2855 		sz3 = VGEN_DBLK_SZ_2048;
2856 		sz4 = sz3 << 1;			/* 4K */
2857 		ldcp->max_rxpool_size = sz4;
2858 
2859 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2860 		    sz1, sz2, sz3, sz4,
2861 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2862 		break;
2863 
2864 	default:	/* data_sz:  4K+ to 16K */
2865 
2866 		sz1 = VGEN_DBLK_SZ_256;
2867 		sz2 = VGEN_DBLK_SZ_2048;
2868 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2869 		sz4 = data_sz;		/* Jumbo-size  */
2870 		ldcp->max_rxpool_size = sz4;
2871 
2872 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2873 		    sz1, sz2, sz3, sz4,
2874 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2875 		break;
2876 
2877 	}
2878 
2879 	return (status);
2880 }
2881 
2882 /* attach the channel corresponding to the given ldc_id to the port */
2883 static int
2884 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2885 {
2886 	vgen_t 		*vgenp;
2887 	vgen_ldclist_t	*ldclp;
2888 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2889 	ldc_attr_t 	attr;
2890 	int 		status;
2891 	ldc_status_t	istatus;
2892 	char		kname[MAXNAMELEN];
2893 	int		instance;
2894 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2895 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2896 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2897 		AST_create_rxmblks = 0x20,
2898 		AST_create_rcv_thread = 0x40} attach_state;
2899 
2900 	attach_state = AST_init;
2901 	vgenp = portp->vgenp;
2902 	ldclp = &portp->ldclist;
2903 
2904 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2905 	if (ldcp == NULL) {
2906 		goto ldc_attach_failed;
2907 	}
2908 	ldcp->ldc_id = ldc_id;
2909 	ldcp->portp = portp;
2910 
2911 	attach_state |= AST_ldc_alloc;
2912 
2913 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2914 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2915 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2916 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2917 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2918 	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2919 
2920 	attach_state |= AST_mutex_init;
2921 
2922 	attr.devclass = LDC_DEV_NT;
2923 	attr.instance = vgenp->instance;
2924 	attr.mode = LDC_MODE_UNRELIABLE;
2925 	attr.mtu = vnet_ldc_mtu;
2926 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2927 	if (status != 0) {
2928 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2929 		goto ldc_attach_failed;
2930 	}
2931 	attach_state |= AST_ldc_init;
2932 
2933 	if (vgen_rcv_thread_enabled) {
2934 		ldcp->rcv_thr_flags = 0;
2935 
2936 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2937 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2938 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2939 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2940 
2941 		attach_state |= AST_create_rcv_thread;
2942 		if (ldcp->rcv_thread == NULL) {
2943 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2944 			goto ldc_attach_failed;
2945 		}
2946 	}
2947 
2948 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2949 	if (status != 0) {
2950 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2951 		    status);
2952 		goto ldc_attach_failed;
2953 	}
2954 	/*
2955 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2956 	 * data msgs, including raw data msgs used to recv priority frames.
2957 	 */
2958 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2959 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2960 	attach_state |= AST_ldc_reg_cb;
2961 
2962 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2963 	ASSERT(istatus == LDC_INIT);
2964 	ldcp->ldc_status = istatus;
2965 
2966 	/* allocate transmit resources */
2967 	status = vgen_alloc_tx_ring(ldcp);
2968 	if (status != 0) {
2969 		goto ldc_attach_failed;
2970 	}
2971 	attach_state |= AST_alloc_tx_ring;
2972 
2973 	/* allocate receive resources */
2974 	status = vgen_init_multipools(ldcp);
2975 	if (status != 0) {
2976 		/*
2977 		 * We do not return failure if receive mblk pools can't be
2978 		 * allocated; instead allocb(9F) will be used to dynamically
2979 		 * allocate buffers during receive.
2980 		 */
2981 		DWARN(vgenp, ldcp,
2982 		    "vnet%d: status(%d), failed to allocate rx mblk pools for "
2983 		    "channel(0x%lx)\n",
2984 		    vgenp->instance, status, ldcp->ldc_id);
2985 	} else {
2986 		attach_state |= AST_create_rxmblks;
2987 	}
2988 
2989 	/* Setup kstats for the channel */
2990 	instance = vgenp->instance;
2991 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2992 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2993 	if (ldcp->ksp == NULL) {
2994 		goto ldc_attach_failed;
2995 	}
2996 
2997 	/* initialize vgen_versions supported */
2998 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2999 	vgen_reset_vnet_proto_ops(ldcp);
3000 
3001 	/* link it into the list of channels for this port */
3002 	WRITE_ENTER(&ldclp->rwlock);
3003 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
3004 	ldcp->nextp = *prev_ldcp;
3005 	*prev_ldcp = ldcp;
3006 	RW_EXIT(&ldclp->rwlock);
3007 
3008 	ldcp->link_state = LINK_STATE_UNKNOWN;
3009 #ifdef	VNET_IOC_DEBUG
3010 	ldcp->link_down_forced = B_FALSE;
3011 #endif
3012 	ldcp->flags |= CHANNEL_ATTACHED;
3013 	return (DDI_SUCCESS);
3014 
3015 ldc_attach_failed:
3016 	if (attach_state & AST_ldc_reg_cb) {
3017 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3018 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3019 	}
3020 	if (attach_state & AST_create_rcv_thread) {
3021 		if (ldcp->rcv_thread != NULL) {
3022 			vgen_stop_rcv_thread(ldcp);
3023 		}
3024 		mutex_destroy(&ldcp->rcv_thr_lock);
3025 		cv_destroy(&ldcp->rcv_thr_cv);
3026 	}
3027 	if (attach_state & AST_create_rxmblks) {
3028 		vio_mblk_pool_t *fvmp = NULL;
3029 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
3030 		ASSERT(fvmp == NULL);
3031 	}
3032 	if (attach_state & AST_alloc_tx_ring) {
3033 		vgen_free_tx_ring(ldcp);
3034 	}
3035 	if (attach_state & AST_ldc_init) {
3036 		(void) ldc_fini(ldcp->ldc_handle);
3037 	}
3038 	if (attach_state & AST_mutex_init) {
3039 		mutex_destroy(&ldcp->tclock);
3040 		mutex_destroy(&ldcp->txlock);
3041 		mutex_destroy(&ldcp->cblock);
3042 		mutex_destroy(&ldcp->wrlock);
3043 		mutex_destroy(&ldcp->rxlock);
3044 		mutex_destroy(&ldcp->pollq_lock);
3045 	}
3046 	if (attach_state & AST_ldc_alloc) {
3047 		KMEM_FREE(ldcp);
3048 	}
3049 	return (DDI_FAILURE);
3050 }
3051 
3052 /* detach a channel from the port */
3053 static void
3054 vgen_ldc_detach(vgen_ldc_t *ldcp)
3055 {
3056 	vgen_port_t	*portp;
3057 	vgen_t 		*vgenp;
3058 	vgen_ldc_t 	*pldcp;
3059 	vgen_ldc_t	**prev_ldcp;
3060 	vgen_ldclist_t	*ldclp;
3061 
3062 	portp = ldcp->portp;
3063 	vgenp = portp->vgenp;
3064 	ldclp = &portp->ldclist;
3065 
3066 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
3067 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
3068 		if (pldcp == ldcp) {
3069 			break;
3070 		}
3071 	}
3072 
3073 	if (pldcp == NULL) {
3074 		/* invalid ldcp? */
3075 		return;
3076 	}
3077 
3078 	if (ldcp->ldc_status != LDC_INIT) {
3079 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
3080 	}
3081 
3082 	if (ldcp->flags & CHANNEL_ATTACHED) {
3083 		ldcp->flags &= ~(CHANNEL_ATTACHED);
3084 
3085 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3086 		if (ldcp->rcv_thread != NULL) {
3087 			/* First stop the receive thread */
3088 			vgen_stop_rcv_thread(ldcp);
3089 			mutex_destroy(&ldcp->rcv_thr_lock);
3090 			cv_destroy(&ldcp->rcv_thr_cv);
3091 		}
3092 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3093 
3094 		vgen_destroy_kstats(ldcp->ksp);
3095 		ldcp->ksp = NULL;
3096 
3097 		/*
3098 		 * if we cannot reclaim all mblks, put this
3099 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
3100 		 * device gets detached (see vgen_uninit()).
3101 		 */
3102 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
3103 
3104 		/* free transmit resources */
3105 		vgen_free_tx_ring(ldcp);
3106 
3107 		(void) ldc_fini(ldcp->ldc_handle);
3108 		mutex_destroy(&ldcp->tclock);
3109 		mutex_destroy(&ldcp->txlock);
3110 		mutex_destroy(&ldcp->cblock);
3111 		mutex_destroy(&ldcp->wrlock);
3112 		mutex_destroy(&ldcp->rxlock);
3113 		mutex_destroy(&ldcp->pollq_lock);
3114 
3115 		/* unlink it from the list */
3116 		*prev_ldcp = ldcp->nextp;
3117 		KMEM_FREE(ldcp);
3118 	}
3119 }
3120 
3121 /*
3122  * This function allocates transmit resources for the channel.
3123  * The resources consist of a transmit descriptor ring and an associated
3124  * transmit buffer ring.
3125  */
3126 static int
3127 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
3128 {
3129 	void *tbufp;
3130 	ldc_mem_info_t minfo;
3131 	uint32_t txdsize;
3132 	uint32_t tbufsize;
3133 	int status;
3134 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3135 
3136 	ldcp->num_txds = vnet_ntxds;
3137 	txdsize = sizeof (vnet_public_desc_t);
3138 	tbufsize = sizeof (vgen_private_desc_t);
3139 
3140 	/* allocate transmit buffer ring */
3141 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
3142 	if (tbufp == NULL) {
3143 		return (DDI_FAILURE);
3144 	}
3145 
3146 	/* create transmit descriptor ring */
3147 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
3148 	    &ldcp->tx_dhandle);
3149 	if (status) {
3150 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
3151 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3152 		return (DDI_FAILURE);
3153 	}
3154 
3155 	/* get the addr of descripror ring */
3156 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
3157 	if (status) {
3158 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3159 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3160 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3161 		ldcp->tbufp = NULL;
3162 		return (DDI_FAILURE);
3163 	}
3164 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3165 	ldcp->tbufp = tbufp;
3166 
3167 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3168 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3169 
3170 	return (DDI_SUCCESS);
3171 }
3172 
3173 /* Free transmit resources for the channel */
3174 static void
3175 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3176 {
3177 	int tbufsize = sizeof (vgen_private_desc_t);
3178 
3179 	/* free transmit descriptor ring */
3180 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3181 
3182 	/* free transmit buffer ring */
3183 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3184 	ldcp->txdp = ldcp->txdendp = NULL;
3185 	ldcp->tbufp = ldcp->tbufendp = NULL;
3186 }
3187 
3188 /* enable transmit/receive on the channels for the port */
3189 static void
3190 vgen_init_ldcs(vgen_port_t *portp)
3191 {
3192 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3193 	vgen_ldc_t	*ldcp;
3194 
3195 	READ_ENTER(&ldclp->rwlock);
3196 	ldcp =  ldclp->headp;
3197 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3198 		(void) vgen_ldc_init(ldcp);
3199 	}
3200 	RW_EXIT(&ldclp->rwlock);
3201 }
3202 
3203 /* stop transmit/receive on the channels for the port */
3204 static void
3205 vgen_uninit_ldcs(vgen_port_t *portp)
3206 {
3207 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3208 	vgen_ldc_t	*ldcp;
3209 
3210 	READ_ENTER(&ldclp->rwlock);
3211 	ldcp =  ldclp->headp;
3212 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3213 		vgen_ldc_uninit(ldcp);
3214 	}
3215 	RW_EXIT(&ldclp->rwlock);
3216 }
3217 
3218 /* enable transmit/receive on the channel */
3219 static int
3220 vgen_ldc_init(vgen_ldc_t *ldcp)
3221 {
3222 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3223 	ldc_status_t	istatus;
3224 	int		rv;
3225 	uint32_t	retries = 0;
3226 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3227 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3228 	init_state = ST_init;
3229 
3230 	DBG1(vgenp, ldcp, "enter\n");
3231 	LDC_LOCK(ldcp);
3232 
3233 	rv = ldc_open(ldcp->ldc_handle);
3234 	if (rv != 0) {
3235 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3236 		goto ldcinit_failed;
3237 	}
3238 	init_state |= ST_ldc_open;
3239 
3240 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3241 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3242 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3243 		goto ldcinit_failed;
3244 	}
3245 	ldcp->ldc_status = istatus;
3246 
3247 	rv = vgen_init_tbufs(ldcp);
3248 	if (rv != 0) {
3249 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3250 		goto ldcinit_failed;
3251 	}
3252 	init_state |= ST_init_tbufs;
3253 
3254 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3255 	if (rv != 0) {
3256 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3257 		goto ldcinit_failed;
3258 	}
3259 
3260 	init_state |= ST_cb_enable;
3261 
3262 	do {
3263 		rv = ldc_up(ldcp->ldc_handle);
3264 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3265 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3266 			drv_usecwait(VGEN_LDC_UP_DELAY);
3267 		}
3268 		if (retries++ >= vgen_ldcup_retries)
3269 			break;
3270 	} while (rv == EWOULDBLOCK);
3271 
3272 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3273 	if (istatus == LDC_UP) {
3274 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3275 	}
3276 
3277 	ldcp->ldc_status = istatus;
3278 
3279 	/* initialize transmit watchdog timeout */
3280 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3281 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3282 
3283 	ldcp->hphase = -1;
3284 	ldcp->flags |= CHANNEL_STARTED;
3285 
3286 	/* if channel is already UP - start handshake */
3287 	if (istatus == LDC_UP) {
3288 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3289 		if (ldcp->portp != vgenp->vsw_portp) {
3290 			/*
3291 			 * As the channel is up, use this port from now on.
3292 			 */
3293 			(void) atomic_swap_32(
3294 			    &ldcp->portp->use_vsw_port, B_FALSE);
3295 		}
3296 
3297 		/* Initialize local session id */
3298 		ldcp->local_sid = ddi_get_lbolt();
3299 
3300 		/* clear peer session id */
3301 		ldcp->peer_sid = 0;
3302 		ldcp->hretries = 0;
3303 
3304 		/* Initiate Handshake process with peer ldc endpoint */
3305 		vgen_reset_hphase(ldcp);
3306 
3307 		mutex_exit(&ldcp->tclock);
3308 		mutex_exit(&ldcp->txlock);
3309 		mutex_exit(&ldcp->wrlock);
3310 		mutex_exit(&ldcp->rxlock);
3311 		vgen_handshake(vh_nextphase(ldcp));
3312 		mutex_exit(&ldcp->cblock);
3313 	} else {
3314 		LDC_UNLOCK(ldcp);
3315 	}
3316 
3317 	return (DDI_SUCCESS);
3318 
3319 ldcinit_failed:
3320 	if (init_state & ST_cb_enable) {
3321 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3322 	}
3323 	if (init_state & ST_init_tbufs) {
3324 		vgen_uninit_tbufs(ldcp);
3325 	}
3326 	if (init_state & ST_ldc_open) {
3327 		(void) ldc_close(ldcp->ldc_handle);
3328 	}
3329 	LDC_UNLOCK(ldcp);
3330 	DBG1(vgenp, ldcp, "exit\n");
3331 	return (DDI_FAILURE);
3332 }
3333 
3334 /* stop transmit/receive on the channel */
3335 static void
3336 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3337 {
3338 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3339 	int	rv;
3340 	uint_t	retries = 0;
3341 
3342 	DBG1(vgenp, ldcp, "enter\n");
3343 	LDC_LOCK(ldcp);
3344 
3345 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3346 		LDC_UNLOCK(ldcp);
3347 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3348 		return;
3349 	}
3350 
3351 	/* disable further callbacks */
3352 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3353 	if (rv != 0) {
3354 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3355 	}
3356 
3357 	/*
3358 	 * clear handshake done bit and wait for pending tx and cb to finish.
3359 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3360 	 */
3361 	ldcp->hphase &= ~(VH_DONE);
3362 	LDC_UNLOCK(ldcp);
3363 
3364 	if (vgenp->vsw_portp == ldcp->portp) {
3365 		vio_net_report_err_t rep_err =
3366 		    ldcp->portp->vcb.vio_net_report_err;
3367 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3368 	}
3369 
3370 	/* cancel handshake watchdog timeout */
3371 	if (ldcp->htid) {
3372 		(void) untimeout(ldcp->htid);
3373 		ldcp->htid = 0;
3374 	}
3375 
3376 	if (ldcp->cancel_htid) {
3377 		(void) untimeout(ldcp->cancel_htid);
3378 		ldcp->cancel_htid = 0;
3379 	}
3380 
3381 	/* cancel transmit watchdog timeout */
3382 	if (ldcp->wd_tid) {
3383 		(void) untimeout(ldcp->wd_tid);
3384 		ldcp->wd_tid = 0;
3385 	}
3386 
3387 	drv_usecwait(1000);
3388 
3389 	if (ldcp->rcv_thread != NULL) {
3390 		/*
3391 		 * Note that callbacks have been disabled already(above). The
3392 		 * drain function takes care of the condition when an already
3393 		 * executing callback signals the worker to start processing or
3394 		 * the worker has already been signalled and is in the middle of
3395 		 * processing.
3396 		 */
3397 		vgen_drain_rcv_thread(ldcp);
3398 	}
3399 
3400 	/* acquire locks again; any pending transmits and callbacks are done */
3401 	LDC_LOCK(ldcp);
3402 
3403 	vgen_reset_hphase(ldcp);
3404 
3405 	vgen_uninit_tbufs(ldcp);
3406 
3407 	/* close the channel - retry on EAGAIN */
3408 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
3409 		if (++retries > vgen_ldccl_retries) {
3410 			break;
3411 		}
3412 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
3413 	}
3414 	if (rv != 0) {
3415 		cmn_err(CE_NOTE,
3416 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
3417 		    vgenp->instance, rv, ldcp->ldc_id);
3418 	}
3419 
3420 	ldcp->ldc_status = LDC_INIT;
3421 	ldcp->flags &= ~(CHANNEL_STARTED);
3422 
3423 	LDC_UNLOCK(ldcp);
3424 
3425 	DBG1(vgenp, ldcp, "exit\n");
3426 }
3427 
3428 /* Initialize the transmit buffer ring for the channel */
3429 static int
3430 vgen_init_tbufs(vgen_ldc_t *ldcp)
3431 {
3432 	vgen_private_desc_t	*tbufp;
3433 	vnet_public_desc_t	*txdp;
3434 	vio_dring_entry_hdr_t		*hdrp;
3435 	int 			i;
3436 	int 			rv;
3437 	caddr_t			datap = NULL;
3438 	int			ci;
3439 	uint32_t		ncookies;
3440 	size_t			data_sz;
3441 	vgen_t			*vgenp;
3442 
3443 	vgenp = LDC_TO_VGEN(ldcp);
3444 
3445 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3446 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3447 
3448 	/*
3449 	 * In order to ensure that the number of ldc cookies per descriptor is
3450 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3451 	 * outlined below:
3452 	 *
3453 	 * Align the entire data buffer area to 8K and carve out per descriptor
3454 	 * data buffers starting from this 8K aligned base address.
3455 	 *
3456 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3457 	 * For sizes up to 12K we round up the size to the next 2K.
3458 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3459 	 * 14K could end up needing 3 cookies, with the buffer spread across
3460 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3461 	 */
3462 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3463 	if (data_sz <= VNET_12K) {
3464 		data_sz = VNET_ROUNDUP_2K(data_sz);
3465 	} else {
3466 		data_sz = VNET_ROUNDUP_4K(data_sz);
3467 	}
3468 
3469 	/* allocate extra 8K bytes for alignment */
3470 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3471 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3472 	ldcp->tx_datap = datap;
3473 
3474 
3475 	/* align the starting address of the data area to 8K */
3476 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3477 
3478 	/*
3479 	 * for each private descriptor, allocate a ldc mem_handle which is
3480 	 * required to map the data during transmit, set the flags
3481 	 * to free (available for use by transmit routine).
3482 	 */
3483 
3484 	for (i = 0; i < ldcp->num_txds; i++) {
3485 
3486 		tbufp = &(ldcp->tbufp[i]);
3487 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3488 		    &(tbufp->memhandle));
3489 		if (rv) {
3490 			tbufp->memhandle = 0;
3491 			goto init_tbufs_failed;
3492 		}
3493 
3494 		/*
3495 		 * bind ldc memhandle to the corresponding transmit buffer.
3496 		 */
3497 		ci = ncookies = 0;
3498 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3499 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3500 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3501 		if (rv != 0) {
3502 			goto init_tbufs_failed;
3503 		}
3504 
3505 		/*
3506 		 * successful in binding the handle to tx data buffer.
3507 		 * set datap in the private descr to this buffer.
3508 		 */
3509 		tbufp->datap = datap;
3510 
3511 		if ((ncookies == 0) ||
3512 		    (ncookies > MAX_COOKIES)) {
3513 			goto init_tbufs_failed;
3514 		}
3515 
3516 		for (ci = 1; ci < ncookies; ci++) {
3517 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3518 			    &(tbufp->memcookie[ci]));
3519 			if (rv != 0) {
3520 				goto init_tbufs_failed;
3521 			}
3522 		}
3523 
3524 		tbufp->ncookies = ncookies;
3525 		datap += data_sz;
3526 
3527 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3528 		txdp = &(ldcp->txdp[i]);
3529 		hdrp = &txdp->hdr;
3530 		hdrp->dstate = VIO_DESC_FREE;
3531 		hdrp->ack = B_FALSE;
3532 		tbufp->descp = txdp;
3533 
3534 	}
3535 
3536 	/* reset tbuf walking pointers */
3537 	ldcp->next_tbufp = ldcp->tbufp;
3538 	ldcp->cur_tbufp = ldcp->tbufp;
3539 
3540 	/* initialize tx seqnum and index */
3541 	ldcp->next_txseq = VNET_ISS;
3542 	ldcp->next_txi = 0;
3543 
3544 	ldcp->resched_peer = B_TRUE;
3545 	ldcp->resched_peer_txi = 0;
3546 
3547 	return (DDI_SUCCESS);
3548 
3549 init_tbufs_failed:;
3550 	vgen_uninit_tbufs(ldcp);
3551 	return (DDI_FAILURE);
3552 }
3553 
3554 /* Uninitialize transmit buffer ring for the channel */
3555 static void
3556 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3557 {
3558 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3559 	int 			i;
3560 
3561 	/* for each tbuf (priv_desc), free ldc mem_handle */
3562 	for (i = 0; i < ldcp->num_txds; i++) {
3563 
3564 		tbufp = &(ldcp->tbufp[i]);
3565 
3566 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3567 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3568 			tbufp->datap = NULL;
3569 		}
3570 		if (tbufp->memhandle) {
3571 			(void) ldc_mem_free_handle(tbufp->memhandle);
3572 			tbufp->memhandle = 0;
3573 		}
3574 	}
3575 
3576 	if (ldcp->tx_datap) {
3577 		/* prealloc'd tx data buffer */
3578 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3579 		ldcp->tx_datap = NULL;
3580 		ldcp->tx_data_sz = 0;
3581 	}
3582 
3583 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3584 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3585 }
3586 
3587 /* clobber tx descriptor ring */
3588 static void
3589 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3590 {
3591 	vnet_public_desc_t	*txdp;
3592 	vgen_private_desc_t	*tbufp;
3593 	vio_dring_entry_hdr_t	*hdrp;
3594 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3595 	int i;
3596 #ifdef DEBUG
3597 	int ndone = 0;
3598 #endif
3599 
3600 	for (i = 0; i < ldcp->num_txds; i++) {
3601 
3602 		tbufp = &(ldcp->tbufp[i]);
3603 		txdp = tbufp->descp;
3604 		hdrp = &txdp->hdr;
3605 
3606 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3607 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3608 #ifdef DEBUG
3609 			if (hdrp->dstate == VIO_DESC_DONE)
3610 				ndone++;
3611 #endif
3612 			hdrp->dstate = VIO_DESC_FREE;
3613 			hdrp->ack = B_FALSE;
3614 		}
3615 	}
3616 	/* reset tbuf walking pointers */
3617 	ldcp->next_tbufp = ldcp->tbufp;
3618 	ldcp->cur_tbufp = ldcp->tbufp;
3619 
3620 	/* reset tx seqnum and index */
3621 	ldcp->next_txseq = VNET_ISS;
3622 	ldcp->next_txi = 0;
3623 
3624 	ldcp->resched_peer = B_TRUE;
3625 	ldcp->resched_peer_txi = 0;
3626 
3627 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3628 }
3629 
3630 /* clobber receive descriptor ring */
3631 static void
3632 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3633 {
3634 	ldcp->rx_dhandle = 0;
3635 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3636 	ldcp->rxdp = NULL;
3637 	ldcp->next_rxi = 0;
3638 	ldcp->num_rxds = 0;
3639 	ldcp->next_rxseq = VNET_ISS;
3640 }
3641 
3642 /* initialize receive descriptor ring */
3643 static int
3644 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3645 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3646 {
3647 	int rv;
3648 	ldc_mem_info_t minfo;
3649 
3650 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3651 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3652 	if (rv != 0) {
3653 		return (DDI_FAILURE);
3654 	}
3655 
3656 	/*
3657 	 * sucessfully mapped, now try to
3658 	 * get info about the mapped dring
3659 	 */
3660 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3661 	if (rv != 0) {
3662 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3663 		return (DDI_FAILURE);
3664 	}
3665 
3666 	/*
3667 	 * save ring address, number of descriptors.
3668 	 */
3669 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3670 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3671 	ldcp->num_rxdcookies = ncookies;
3672 	ldcp->num_rxds = num_desc;
3673 	ldcp->next_rxi = 0;
3674 	ldcp->next_rxseq = VNET_ISS;
3675 	ldcp->dring_mtype = minfo.mtype;
3676 
3677 	return (DDI_SUCCESS);
3678 }
3679 
3680 /* get channel statistics */
3681 static uint64_t
3682 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3683 {
3684 	vgen_stats_t *statsp;
3685 	uint64_t val;
3686 
3687 	val = 0;
3688 	statsp = &ldcp->stats;
3689 	switch (stat) {
3690 
3691 	case MAC_STAT_MULTIRCV:
3692 		val = statsp->multircv;
3693 		break;
3694 
3695 	case MAC_STAT_BRDCSTRCV:
3696 		val = statsp->brdcstrcv;
3697 		break;
3698 
3699 	case MAC_STAT_MULTIXMT:
3700 		val = statsp->multixmt;
3701 		break;
3702 
3703 	case MAC_STAT_BRDCSTXMT:
3704 		val = statsp->brdcstxmt;
3705 		break;
3706 
3707 	case MAC_STAT_NORCVBUF:
3708 		val = statsp->norcvbuf;
3709 		break;
3710 
3711 	case MAC_STAT_IERRORS:
3712 		val = statsp->ierrors;
3713 		break;
3714 
3715 	case MAC_STAT_NOXMTBUF:
3716 		val = statsp->noxmtbuf;
3717 		break;
3718 
3719 	case MAC_STAT_OERRORS:
3720 		val = statsp->oerrors;
3721 		break;
3722 
3723 	case MAC_STAT_COLLISIONS:
3724 		break;
3725 
3726 	case MAC_STAT_RBYTES:
3727 		val = statsp->rbytes;
3728 		break;
3729 
3730 	case MAC_STAT_IPACKETS:
3731 		val = statsp->ipackets;
3732 		break;
3733 
3734 	case MAC_STAT_OBYTES:
3735 		val = statsp->obytes;
3736 		break;
3737 
3738 	case MAC_STAT_OPACKETS:
3739 		val = statsp->opackets;
3740 		break;
3741 
3742 	/* stats not relevant to ldc, return 0 */
3743 	case MAC_STAT_IFSPEED:
3744 	case ETHER_STAT_ALIGN_ERRORS:
3745 	case ETHER_STAT_FCS_ERRORS:
3746 	case ETHER_STAT_FIRST_COLLISIONS:
3747 	case ETHER_STAT_MULTI_COLLISIONS:
3748 	case ETHER_STAT_DEFER_XMTS:
3749 	case ETHER_STAT_TX_LATE_COLLISIONS:
3750 	case ETHER_STAT_EX_COLLISIONS:
3751 	case ETHER_STAT_MACXMT_ERRORS:
3752 	case ETHER_STAT_CARRIER_ERRORS:
3753 	case ETHER_STAT_TOOLONG_ERRORS:
3754 	case ETHER_STAT_XCVR_ADDR:
3755 	case ETHER_STAT_XCVR_ID:
3756 	case ETHER_STAT_XCVR_INUSE:
3757 	case ETHER_STAT_CAP_1000FDX:
3758 	case ETHER_STAT_CAP_1000HDX:
3759 	case ETHER_STAT_CAP_100FDX:
3760 	case ETHER_STAT_CAP_100HDX:
3761 	case ETHER_STAT_CAP_10FDX:
3762 	case ETHER_STAT_CAP_10HDX:
3763 	case ETHER_STAT_CAP_ASMPAUSE:
3764 	case ETHER_STAT_CAP_PAUSE:
3765 	case ETHER_STAT_CAP_AUTONEG:
3766 	case ETHER_STAT_ADV_CAP_1000FDX:
3767 	case ETHER_STAT_ADV_CAP_1000HDX:
3768 	case ETHER_STAT_ADV_CAP_100FDX:
3769 	case ETHER_STAT_ADV_CAP_100HDX:
3770 	case ETHER_STAT_ADV_CAP_10FDX:
3771 	case ETHER_STAT_ADV_CAP_10HDX:
3772 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3773 	case ETHER_STAT_ADV_CAP_PAUSE:
3774 	case ETHER_STAT_ADV_CAP_AUTONEG:
3775 	case ETHER_STAT_LP_CAP_1000FDX:
3776 	case ETHER_STAT_LP_CAP_1000HDX:
3777 	case ETHER_STAT_LP_CAP_100FDX:
3778 	case ETHER_STAT_LP_CAP_100HDX:
3779 	case ETHER_STAT_LP_CAP_10FDX:
3780 	case ETHER_STAT_LP_CAP_10HDX:
3781 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3782 	case ETHER_STAT_LP_CAP_PAUSE:
3783 	case ETHER_STAT_LP_CAP_AUTONEG:
3784 	case ETHER_STAT_LINK_ASMPAUSE:
3785 	case ETHER_STAT_LINK_PAUSE:
3786 	case ETHER_STAT_LINK_AUTONEG:
3787 	case ETHER_STAT_LINK_DUPLEX:
3788 	default:
3789 		val = 0;
3790 		break;
3791 
3792 	}
3793 	return (val);
3794 }
3795 
3796 /*
3797  * LDC channel is UP, start handshake process with peer.
3798  */
3799 static void
3800 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3801 {
3802 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3803 
3804 	DBG1(vgenp, ldcp, "enter\n");
3805 
3806 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3807 
3808 	if (ldcp->portp != vgenp->vsw_portp) {
3809 		/*
3810 		 * As the channel is up, use this port from now on.
3811 		 */
3812 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3813 	}
3814 
3815 	/* Initialize local session id */
3816 	ldcp->local_sid = ddi_get_lbolt();
3817 
3818 	/* clear peer session id */
3819 	ldcp->peer_sid = 0;
3820 	ldcp->hretries = 0;
3821 
3822 	if (ldcp->hphase != VH_PHASE0) {
3823 		vgen_handshake_reset(ldcp);
3824 	}
3825 
3826 	/* Initiate Handshake process with peer ldc endpoint */
3827 	vgen_handshake(vh_nextphase(ldcp));
3828 
3829 	DBG1(vgenp, ldcp, "exit\n");
3830 }
3831 
3832 /*
3833  * LDC channel is Reset, terminate connection with peer and try to
3834  * bring the channel up again.
3835  */
3836 static void
3837 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3838 {
3839 	ldc_status_t istatus;
3840 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3841 	int	rv;
3842 
3843 	DBG1(vgenp, ldcp, "enter\n");
3844 
3845 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3846 
3847 	if ((ldcp->portp != vgenp->vsw_portp) &&
3848 	    (vgenp->vsw_portp != NULL)) {
3849 		/*
3850 		 * As the channel is down, use the switch port until
3851 		 * the channel becomes ready to be used.
3852 		 */
3853 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3854 	}
3855 
3856 	if (vgenp->vsw_portp == ldcp->portp) {
3857 		vio_net_report_err_t rep_err =
3858 		    ldcp->portp->vcb.vio_net_report_err;
3859 
3860 		/* Post a reset message */
3861 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3862 	}
3863 
3864 	if (ldcp->hphase != VH_PHASE0) {
3865 		vgen_handshake_reset(ldcp);
3866 	}
3867 
3868 	/* try to bring the channel up */
3869 #ifdef	VNET_IOC_DEBUG
3870 	if (ldcp->link_down_forced == B_FALSE) {
3871 		rv = ldc_up(ldcp->ldc_handle);
3872 		if (rv != 0) {
3873 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3874 		}
3875 	}
3876 #else
3877 	rv = ldc_up(ldcp->ldc_handle);
3878 	if (rv != 0) {
3879 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3880 	}
3881 #endif
3882 
3883 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3884 		DWARN(vgenp, ldcp, "ldc_status err\n");
3885 	} else {
3886 		ldcp->ldc_status = istatus;
3887 	}
3888 
3889 	/* if channel is already UP - restart handshake */
3890 	if (ldcp->ldc_status == LDC_UP) {
3891 		vgen_handle_evt_up(ldcp);
3892 	}
3893 
3894 	DBG1(vgenp, ldcp, "exit\n");
3895 }
3896 
3897 /* Interrupt handler for the channel */
3898 static uint_t
3899 vgen_ldc_cb(uint64_t event, caddr_t arg)
3900 {
3901 	_NOTE(ARGUNUSED(event))
3902 	vgen_ldc_t	*ldcp;
3903 	vgen_t		*vgenp;
3904 	ldc_status_t 	istatus;
3905 	vgen_stats_t	*statsp;
3906 	timeout_id_t	cancel_htid = 0;
3907 	uint_t		ret = LDC_SUCCESS;
3908 
3909 	ldcp = (vgen_ldc_t *)arg;
3910 	vgenp = LDC_TO_VGEN(ldcp);
3911 	statsp = &ldcp->stats;
3912 
3913 	DBG1(vgenp, ldcp, "enter\n");
3914 
3915 	mutex_enter(&ldcp->cblock);
3916 	statsp->callbacks++;
3917 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3918 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3919 		    ldcp->ldc_status);
3920 		mutex_exit(&ldcp->cblock);
3921 		return (LDC_SUCCESS);
3922 	}
3923 
3924 	/*
3925 	 * cache cancel_htid before the events specific
3926 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3927 	 * as it is also used to indicate the timer to quit immediately.
3928 	 */
3929 	cancel_htid = ldcp->cancel_htid;
3930 
3931 	/*
3932 	 * NOTE: not using switch() as event could be triggered by
3933 	 * a state change and a read request. Also the ordering	of the
3934 	 * check for the event types is deliberate.
3935 	 */
3936 	if (event & LDC_EVT_UP) {
3937 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3938 			DWARN(vgenp, ldcp, "ldc_status err\n");
3939 			/* status couldn't be determined */
3940 			ret = LDC_FAILURE;
3941 			goto ldc_cb_ret;
3942 		}
3943 		ldcp->ldc_status = istatus;
3944 		if (ldcp->ldc_status != LDC_UP) {
3945 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3946 			    " but ldc status is not UP(0x%x)\n",
3947 			    ldcp->ldc_status);
3948 			/* spurious interrupt, return success */
3949 			goto ldc_cb_ret;
3950 		}
3951 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3952 		    event, ldcp->ldc_status);
3953 
3954 		vgen_handle_evt_up(ldcp);
3955 
3956 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3957 	}
3958 
3959 	/* Handle RESET/DOWN before READ event */
3960 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3961 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3962 			DWARN(vgenp, ldcp, "ldc_status error\n");
3963 			/* status couldn't be determined */
3964 			ret = LDC_FAILURE;
3965 			goto ldc_cb_ret;
3966 		}
3967 		ldcp->ldc_status = istatus;
3968 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3969 		    event, ldcp->ldc_status);
3970 
3971 		vgen_handle_evt_reset(ldcp);
3972 
3973 		/*
3974 		 * As the channel is down/reset, ignore READ event
3975 		 * but print a debug warning message.
3976 		 */
3977 		if (event & LDC_EVT_READ) {
3978 			DWARN(vgenp, ldcp,
3979 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3980 			event &= ~LDC_EVT_READ;
3981 		}
3982 	}
3983 
3984 	if (event & LDC_EVT_READ) {
3985 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3986 		    event, ldcp->ldc_status);
3987 
3988 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3989 
3990 		if (ldcp->rcv_thread != NULL) {
3991 			/*
3992 			 * If the receive thread is enabled, then
3993 			 * wakeup the receive thread to process the
3994 			 * LDC messages.
3995 			 */
3996 			mutex_exit(&ldcp->cblock);
3997 			mutex_enter(&ldcp->rcv_thr_lock);
3998 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3999 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
4000 				cv_signal(&ldcp->rcv_thr_cv);
4001 			}
4002 			mutex_exit(&ldcp->rcv_thr_lock);
4003 			mutex_enter(&ldcp->cblock);
4004 		} else  {
4005 			vgen_handle_evt_read(ldcp);
4006 		}
4007 	}
4008 
4009 ldc_cb_ret:
4010 	/*
4011 	 * Check to see if the status of cancel_htid has
4012 	 * changed. If another timer needs to be cancelled,
4013 	 * then let the next callback to clear it.
4014 	 */
4015 	if (cancel_htid == 0) {
4016 		cancel_htid = ldcp->cancel_htid;
4017 	}
4018 	mutex_exit(&ldcp->cblock);
4019 
4020 	if (cancel_htid) {
4021 		/*
4022 		 * Cancel handshake timer.
4023 		 * untimeout(9F) will not return until the pending callback is
4024 		 * cancelled or has run. No problems will result from calling
4025 		 * untimeout if the handler has already completed.
4026 		 * If the timeout handler did run, then it would just
4027 		 * return as cancel_htid is set.
4028 		 */
4029 		DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
4030 		(void) untimeout(cancel_htid);
4031 		mutex_enter(&ldcp->cblock);
4032 		/* clear it only if its the same as the one we cancelled */
4033 		if (ldcp->cancel_htid == cancel_htid) {
4034 			ldcp->cancel_htid = 0;
4035 		}
4036 		mutex_exit(&ldcp->cblock);
4037 	}
4038 	DBG1(vgenp, ldcp, "exit\n");
4039 	return (ret);
4040 }
4041 
4042 static void
4043 vgen_handle_evt_read(vgen_ldc_t *ldcp)
4044 {
4045 	int		rv;
4046 	uint64_t	*ldcmsg;
4047 	size_t		msglen;
4048 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4049 	vio_msg_tag_t	*tagp;
4050 	ldc_status_t 	istatus;
4051 	boolean_t 	has_data;
4052 
4053 	DBG1(vgenp, ldcp, "enter\n");
4054 
4055 	ldcmsg = ldcp->ldcmsg;
4056 	/*
4057 	 * If the receive thread is enabled, then the cblock
4058 	 * need to be acquired here. If not, the vgen_ldc_cb()
4059 	 * calls this function with cblock held already.
4060 	 */
4061 	if (ldcp->rcv_thread != NULL) {
4062 		mutex_enter(&ldcp->cblock);
4063 	} else {
4064 		ASSERT(MUTEX_HELD(&ldcp->cblock));
4065 	}
4066 
4067 vgen_evt_read:
4068 	do {
4069 		msglen = ldcp->msglen;
4070 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
4071 
4072 		if (rv != 0) {
4073 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
4074 			    rv, msglen);
4075 			if (rv == ECONNRESET)
4076 				goto vgen_evtread_error;
4077 			break;
4078 		}
4079 		if (msglen == 0) {
4080 			DBG2(vgenp, ldcp, "ldc_read NODATA");
4081 			break;
4082 		}
4083 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
4084 
4085 		tagp = (vio_msg_tag_t *)ldcmsg;
4086 
4087 		if (ldcp->peer_sid) {
4088 			/*
4089 			 * check sid only after we have received peer's sid
4090 			 * in the version negotiate msg.
4091 			 */
4092 #ifdef DEBUG
4093 			if (vgen_hdbg & HDBG_BAD_SID) {
4094 				/* simulate bad sid condition */
4095 				tagp->vio_sid = 0;
4096 				vgen_hdbg &= ~(HDBG_BAD_SID);
4097 			}
4098 #endif
4099 			rv = vgen_check_sid(ldcp, tagp);
4100 			if (rv != VGEN_SUCCESS) {
4101 				/*
4102 				 * If sid mismatch is detected,
4103 				 * reset the channel.
4104 				 */
4105 				goto vgen_evtread_error;
4106 			}
4107 		}
4108 
4109 		switch (tagp->vio_msgtype) {
4110 		case VIO_TYPE_CTRL:
4111 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
4112 			break;
4113 
4114 		case VIO_TYPE_DATA:
4115 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
4116 			break;
4117 
4118 		case VIO_TYPE_ERR:
4119 			vgen_handle_errmsg(ldcp, tagp);
4120 			break;
4121 
4122 		default:
4123 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
4124 			    tagp->vio_msgtype);
4125 			break;
4126 		}
4127 
4128 		/*
4129 		 * If an error is encountered, stop processing and
4130 		 * handle the error.
4131 		 */
4132 		if (rv != 0) {
4133 			goto vgen_evtread_error;
4134 		}
4135 
4136 	} while (msglen);
4137 
4138 	/* check once more before exiting */
4139 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
4140 	if ((rv == 0) && (has_data == B_TRUE)) {
4141 		DTRACE_PROBE(vgen_chkq);
4142 		goto vgen_evt_read;
4143 	}
4144 
4145 vgen_evtread_error:
4146 	if (rv == ECONNRESET) {
4147 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4148 			DWARN(vgenp, ldcp, "ldc_status err\n");
4149 		} else {
4150 			ldcp->ldc_status = istatus;
4151 		}
4152 		vgen_handle_evt_reset(ldcp);
4153 	} else if (rv) {
4154 		vgen_ldc_reset(ldcp);
4155 	}
4156 
4157 	/*
4158 	 * If the receive thread is enabled, then cancel the
4159 	 * handshake timeout here.
4160 	 */
4161 	if (ldcp->rcv_thread != NULL) {
4162 		timeout_id_t cancel_htid = ldcp->cancel_htid;
4163 
4164 		mutex_exit(&ldcp->cblock);
4165 		if (cancel_htid) {
4166 			/*
4167 			 * Cancel handshake timer. untimeout(9F) will
4168 			 * not return until the pending callback is cancelled
4169 			 * or has run. No problems will result from calling
4170 			 * untimeout if the handler has already completed.
4171 			 * If the timeout handler did run, then it would just
4172 			 * return as cancel_htid is set.
4173 			 */
4174 			DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid);
4175 			(void) untimeout(cancel_htid);
4176 
4177 			/*
4178 			 * clear it only if its the same as the one we
4179 			 * cancelled
4180 			 */
4181 			mutex_enter(&ldcp->cblock);
4182 			if (ldcp->cancel_htid == cancel_htid) {
4183 				ldcp->cancel_htid = 0;
4184 			}
4185 			mutex_exit(&ldcp->cblock);
4186 		}
4187 	}
4188 
4189 	DBG1(vgenp, ldcp, "exit\n");
4190 }
4191 
4192 /* vgen handshake functions */
4193 
4194 /* change the hphase for the channel to the next phase */
4195 static vgen_ldc_t *
4196 vh_nextphase(vgen_ldc_t *ldcp)
4197 {
4198 	if (ldcp->hphase == VH_PHASE3) {
4199 		ldcp->hphase = VH_DONE;
4200 	} else {
4201 		ldcp->hphase++;
4202 	}
4203 	return (ldcp);
4204 }
4205 
4206 /*
4207  * wrapper routine to send the given message over ldc using ldc_write().
4208  */
4209 static int
4210 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4211     boolean_t caller_holds_lock)
4212 {
4213 	int			rv;
4214 	size_t			len;
4215 	uint32_t		retries = 0;
4216 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4217 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4218 	vio_dring_msg_t		*dmsg;
4219 	vio_raw_data_msg_t	*rmsg;
4220 	boolean_t		data_msg = B_FALSE;
4221 
4222 	len = msglen;
4223 	if ((len == 0) || (msg == NULL))
4224 		return (VGEN_FAILURE);
4225 
4226 	if (!caller_holds_lock) {
4227 		mutex_enter(&ldcp->wrlock);
4228 	}
4229 
4230 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4231 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4232 			dmsg = (vio_dring_msg_t *)tagp;
4233 			dmsg->seq_num = ldcp->next_txseq;
4234 			data_msg = B_TRUE;
4235 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4236 			rmsg = (vio_raw_data_msg_t *)tagp;
4237 			rmsg->seq_num = ldcp->next_txseq;
4238 			data_msg = B_TRUE;
4239 		}
4240 	}
4241 
4242 	do {
4243 		len = msglen;
4244 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4245 		if (retries++ >= vgen_ldcwr_retries)
4246 			break;
4247 	} while (rv == EWOULDBLOCK);
4248 
4249 	if (rv == 0 && data_msg == B_TRUE) {
4250 		ldcp->next_txseq++;
4251 	}
4252 
4253 	if (!caller_holds_lock) {
4254 		mutex_exit(&ldcp->wrlock);
4255 	}
4256 
4257 	if (rv != 0) {
4258 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4259 		    rv, msglen);
4260 		return (rv);
4261 	}
4262 
4263 	if (len != msglen) {
4264 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4265 		    rv, msglen);
4266 		return (VGEN_FAILURE);
4267 	}
4268 
4269 	return (VGEN_SUCCESS);
4270 }
4271 
4272 /* send version negotiate message to the peer over ldc */
4273 static int
4274 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4275 {
4276 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4277 	vio_ver_msg_t	vermsg;
4278 	vio_msg_tag_t	*tagp = &vermsg.tag;
4279 	int		rv;
4280 
4281 	bzero(&vermsg, sizeof (vermsg));
4282 
4283 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4284 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4285 	tagp->vio_subtype_env = VIO_VER_INFO;
4286 	tagp->vio_sid = ldcp->local_sid;
4287 
4288 	/* get version msg payload from ldcp->local */
4289 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4290 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4291 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4292 
4293 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4294 	if (rv != VGEN_SUCCESS) {
4295 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4296 		return (rv);
4297 	}
4298 
4299 	ldcp->hstate |= VER_INFO_SENT;
4300 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4301 	    vermsg.ver_major, vermsg.ver_minor);
4302 
4303 	return (VGEN_SUCCESS);
4304 }
4305 
4306 /* send attr info message to the peer over ldc */
4307 static int
4308 vgen_send_attr_info(vgen_ldc_t *ldcp)
4309 {
4310 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4311 	vnet_attr_msg_t	attrmsg;
4312 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4313 	int		rv;
4314 
4315 	bzero(&attrmsg, sizeof (attrmsg));
4316 
4317 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4318 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4319 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4320 	tagp->vio_sid = ldcp->local_sid;
4321 
4322 	/* get attr msg payload from ldcp->local */
4323 	attrmsg.mtu = ldcp->local_hparams.mtu;
4324 	attrmsg.addr = ldcp->local_hparams.addr;
4325 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4326 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4327 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4328 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
4329 
4330 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4331 	if (rv != VGEN_SUCCESS) {
4332 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4333 		return (rv);
4334 	}
4335 
4336 	ldcp->hstate |= ATTR_INFO_SENT;
4337 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4338 
4339 	return (VGEN_SUCCESS);
4340 }
4341 
4342 /* send descriptor ring register message to the peer over ldc */
4343 static int
4344 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4345 {
4346 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4347 	vio_dring_reg_msg_t	msg;
4348 	vio_msg_tag_t		*tagp = &msg.tag;
4349 	int		rv;
4350 
4351 	bzero(&msg, sizeof (msg));
4352 
4353 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4354 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4355 	tagp->vio_subtype_env = VIO_DRING_REG;
4356 	tagp->vio_sid = ldcp->local_sid;
4357 
4358 	/* get dring info msg payload from ldcp->local */
4359 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4360 	    sizeof (ldc_mem_cookie_t));
4361 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4362 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4363 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4364 
4365 	/*
4366 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4367 	 * value and sends it in the ack, which is saved in
4368 	 * vgen_handle_dring_reg().
4369 	 */
4370 	msg.dring_ident = 0;
4371 
4372 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4373 	if (rv != VGEN_SUCCESS) {
4374 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4375 		return (rv);
4376 	}
4377 
4378 	ldcp->hstate |= DRING_INFO_SENT;
4379 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4380 
4381 	return (VGEN_SUCCESS);
4382 }
4383 
4384 static int
4385 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4386 {
4387 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4388 	vio_rdx_msg_t	rdxmsg;
4389 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4390 	int		rv;
4391 
4392 	bzero(&rdxmsg, sizeof (rdxmsg));
4393 
4394 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4395 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4396 	tagp->vio_subtype_env = VIO_RDX;
4397 	tagp->vio_sid = ldcp->local_sid;
4398 
4399 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4400 	if (rv != VGEN_SUCCESS) {
4401 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4402 		return (rv);
4403 	}
4404 
4405 	ldcp->hstate |= RDX_INFO_SENT;
4406 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4407 
4408 	return (VGEN_SUCCESS);
4409 }
4410 
4411 /* send descriptor ring data message to the peer over ldc */
4412 static int
4413 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4414 {
4415 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4416 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4417 	vio_msg_tag_t	*tagp = &msgp->tag;
4418 	vgen_stats_t	*statsp = &ldcp->stats;
4419 	int		rv;
4420 
4421 	bzero(msgp, sizeof (*msgp));
4422 
4423 	tagp->vio_msgtype = VIO_TYPE_DATA;
4424 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4425 	tagp->vio_subtype_env = VIO_DRING_DATA;
4426 	tagp->vio_sid = ldcp->local_sid;
4427 
4428 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4429 	msgp->start_idx = start;
4430 	msgp->end_idx = end;
4431 
4432 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4433 	if (rv != VGEN_SUCCESS) {
4434 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4435 		return (rv);
4436 	}
4437 
4438 	statsp->dring_data_msgs++;
4439 
4440 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4441 
4442 	return (VGEN_SUCCESS);
4443 }
4444 
4445 /* send multicast addr info message to vsw */
4446 static int
4447 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4448 {
4449 	vnet_mcast_msg_t	mcastmsg;
4450 	vnet_mcast_msg_t	*msgp;
4451 	vio_msg_tag_t		*tagp;
4452 	vgen_t			*vgenp;
4453 	struct ether_addr	*mca;
4454 	int			rv;
4455 	int			i;
4456 	uint32_t		size;
4457 	uint32_t		mccount;
4458 	uint32_t		n;
4459 
4460 	msgp = &mcastmsg;
4461 	tagp = &msgp->tag;
4462 	vgenp = LDC_TO_VGEN(ldcp);
4463 
4464 	mccount = vgenp->mccount;
4465 	i = 0;
4466 
4467 	do {
4468 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4469 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4470 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4471 		tagp->vio_sid = ldcp->local_sid;
4472 
4473 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4474 		size = n * sizeof (struct ether_addr);
4475 
4476 		mca = &(vgenp->mctab[i]);
4477 		bcopy(mca, (msgp->mca), size);
4478 		msgp->set = B_TRUE;
4479 		msgp->count = n;
4480 
4481 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4482 		    B_FALSE);
4483 		if (rv != VGEN_SUCCESS) {
4484 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4485 			return (rv);
4486 		}
4487 
4488 		mccount -= n;
4489 		i += n;
4490 
4491 	} while (mccount);
4492 
4493 	return (VGEN_SUCCESS);
4494 }
4495 
4496 /* Initiate Phase 2 of handshake */
4497 static int
4498 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4499 {
4500 	int rv;
4501 	uint32_t ncookies = 0;
4502 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4503 
4504 #ifdef DEBUG
4505 	if (vgen_hdbg & HDBG_OUT_STATE) {
4506 		/* simulate out of state condition */
4507 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4508 		rv = vgen_send_rdx_info(ldcp);
4509 		return (rv);
4510 	}
4511 	if (vgen_hdbg & HDBG_TIMEOUT) {
4512 		/* simulate timeout condition */
4513 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4514 		return (VGEN_SUCCESS);
4515 	}
4516 #endif
4517 	rv = vgen_send_attr_info(ldcp);
4518 	if (rv != VGEN_SUCCESS) {
4519 		return (rv);
4520 	}
4521 
4522 	/* Bind descriptor ring to the channel */
4523 	if (ldcp->num_txdcookies == 0) {
4524 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4525 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4526 		    &ldcp->tx_dcookie, &ncookies);
4527 		if (rv != 0) {
4528 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4529 			    "rv(%x)\n", rv);
4530 			return (rv);
4531 		}
4532 		ASSERT(ncookies == 1);
4533 		ldcp->num_txdcookies = ncookies;
4534 	}
4535 
4536 	/* update local dring_info params */
4537 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4538 	    sizeof (ldc_mem_cookie_t));
4539 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4540 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4541 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4542 
4543 	rv = vgen_send_dring_reg(ldcp);
4544 	if (rv != VGEN_SUCCESS) {
4545 		return (rv);
4546 	}
4547 
4548 	return (VGEN_SUCCESS);
4549 }
4550 
4551 /*
4552  * Set vnet-protocol-version dependent functions based on version.
4553  */
4554 static void
4555 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4556 {
4557 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4558 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4559 
4560 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
4561 		vgen_port_t	*portp = ldcp->portp;
4562 		vnet_t		*vnetp = vgenp->vnetp;
4563 		/*
4564 		 * If the version negotiated with vswitch is >= 1.5 (link
4565 		 * status update support), set the required bits in our
4566 		 * attributes if this vnet device has been configured to get
4567 		 * physical link state updates.
4568 		 */
4569 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
4570 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
4571 		} else {
4572 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
4573 		}
4574 	}
4575 
4576 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4577 		/*
4578 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4579 		 * Support), set the mtu in our attributes to max_frame_size.
4580 		 */
4581 		lp->mtu = vgenp->max_frame_size;
4582 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4583 		/*
4584 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4585 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4586 		 */
4587 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4588 	} else {
4589 		vgen_port_t	*portp = ldcp->portp;
4590 		vnet_t		*vnetp = vgenp->vnetp;
4591 		/*
4592 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4593 		 * We can negotiate that size with those peers provided the
4594 		 * following conditions are true:
4595 		 * - Only pvid is defined for our peer and there are no vids.
4596 		 * - pvids are equal.
4597 		 * If the above conditions are true, then we can send/recv only
4598 		 * untagged frames of max size ETHERMAX.
4599 		 */
4600 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4601 			lp->mtu = ETHERMAX;
4602 		}
4603 	}
4604 
4605 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4606 		/* Versions >= 1.2 */
4607 
4608 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4609 			/*
4610 			 * enable priority routines and pkt mode only if
4611 			 * at least one pri-eth-type is specified in MD.
4612 			 */
4613 
4614 			ldcp->tx = vgen_ldcsend;
4615 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4616 
4617 			/* set xfer mode for vgen_send_attr_info() */
4618 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4619 
4620 		} else {
4621 			/* no priority eth types defined in MD */
4622 
4623 			ldcp->tx = vgen_ldcsend_dring;
4624 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4625 
4626 			/* set xfer mode for vgen_send_attr_info() */
4627 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4628 
4629 		}
4630 	} else {
4631 		/* Versions prior to 1.2  */
4632 
4633 		vgen_reset_vnet_proto_ops(ldcp);
4634 	}
4635 }
4636 
4637 /*
4638  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4639  */
4640 static void
4641 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4642 {
4643 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4644 
4645 	ldcp->tx = vgen_ldcsend_dring;
4646 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4647 
4648 	/* set xfer mode for vgen_send_attr_info() */
4649 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4650 }
4651 
4652 static void
4653 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4654 {
4655 	vgen_ldclist_t	*ldclp;
4656 	vgen_ldc_t	*ldcp;
4657 	vgen_t		*vgenp = portp->vgenp;
4658 	vnet_t		*vnetp = vgenp->vnetp;
4659 
4660 	ldclp = &portp->ldclist;
4661 
4662 	READ_ENTER(&ldclp->rwlock);
4663 
4664 	/*
4665 	 * NOTE: for now, we will assume we have a single channel.
4666 	 */
4667 	if (ldclp->headp == NULL) {
4668 		RW_EXIT(&ldclp->rwlock);
4669 		return;
4670 	}
4671 	ldcp = ldclp->headp;
4672 
4673 	mutex_enter(&ldcp->cblock);
4674 
4675 	/*
4676 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4677 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4678 	 */
4679 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4680 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4681 		vgen_ldc_reset(ldcp);
4682 	}
4683 
4684 	mutex_exit(&ldcp->cblock);
4685 
4686 	RW_EXIT(&ldclp->rwlock);
4687 }
4688 
4689 static void
4690 vgen_port_reset(vgen_port_t *portp)
4691 {
4692 	vgen_ldclist_t	*ldclp;
4693 	vgen_ldc_t	*ldcp;
4694 
4695 	ldclp = &portp->ldclist;
4696 
4697 	READ_ENTER(&ldclp->rwlock);
4698 
4699 	/*
4700 	 * NOTE: for now, we will assume we have a single channel.
4701 	 */
4702 	if (ldclp->headp == NULL) {
4703 		RW_EXIT(&ldclp->rwlock);
4704 		return;
4705 	}
4706 	ldcp = ldclp->headp;
4707 
4708 	mutex_enter(&ldcp->cblock);
4709 
4710 	vgen_ldc_reset(ldcp);
4711 
4712 	mutex_exit(&ldcp->cblock);
4713 
4714 	RW_EXIT(&ldclp->rwlock);
4715 }
4716 
4717 static void
4718 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4719 {
4720 	vgen_port_t	*portp;
4721 	vgen_portlist_t	*plistp;
4722 
4723 	plistp = &(vgenp->vgenports);
4724 	READ_ENTER(&plistp->rwlock);
4725 
4726 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4727 
4728 		vgen_vlan_unaware_port_reset(portp);
4729 
4730 	}
4731 
4732 	RW_EXIT(&plistp->rwlock);
4733 }
4734 
4735 static void
4736 vgen_reset_vsw_port(vgen_t *vgenp)
4737 {
4738 	vgen_port_t	*portp;
4739 
4740 	if ((portp = vgenp->vsw_portp) != NULL) {
4741 		vgen_port_reset(portp);
4742 	}
4743 }
4744 
4745 /*
4746  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4747  * This can happen after a channel comes up (status: LDC_UP) or
4748  * when handshake gets terminated due to various conditions.
4749  */
4750 static void
4751 vgen_reset_hphase(vgen_ldc_t *ldcp)
4752 {
4753 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4754 	ldc_status_t istatus;
4755 	int rv;
4756 
4757 	DBG1(vgenp, ldcp, "enter\n");
4758 	/* reset hstate and hphase */
4759 	ldcp->hstate = 0;
4760 	ldcp->hphase = VH_PHASE0;
4761 
4762 	vgen_reset_vnet_proto_ops(ldcp);
4763 
4764 	/*
4765 	 * Save the id of pending handshake timer in cancel_htid.
4766 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4767 	 * be cancelled after releasing cblock.
4768 	 */
4769 	if (ldcp->htid) {
4770 		ldcp->cancel_htid = ldcp->htid;
4771 		ldcp->htid = 0;
4772 	}
4773 
4774 	if (ldcp->local_hparams.dring_ready) {
4775 		ldcp->local_hparams.dring_ready = B_FALSE;
4776 	}
4777 
4778 	/* Unbind tx descriptor ring from the channel */
4779 	if (ldcp->num_txdcookies) {
4780 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4781 		if (rv != 0) {
4782 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4783 		}
4784 		ldcp->num_txdcookies = 0;
4785 	}
4786 
4787 	if (ldcp->peer_hparams.dring_ready) {
4788 		ldcp->peer_hparams.dring_ready = B_FALSE;
4789 		/* Unmap peer's dring */
4790 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4791 		vgen_clobber_rxds(ldcp);
4792 	}
4793 
4794 	vgen_clobber_tbufs(ldcp);
4795 
4796 	/*
4797 	 * clear local handshake params and initialize.
4798 	 */
4799 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4800 
4801 	/* set version to the highest version supported */
4802 	ldcp->local_hparams.ver_major =
4803 	    ldcp->vgen_versions[0].ver_major;
4804 	ldcp->local_hparams.ver_minor =
4805 	    ldcp->vgen_versions[0].ver_minor;
4806 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4807 
4808 	/* set attr_info params */
4809 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4810 	ldcp->local_hparams.addr =
4811 	    vnet_macaddr_strtoul(vgenp->macaddr);
4812 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4813 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4814 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4815 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
4816 
4817 	/*
4818 	 * Note: dring is created, but not bound yet.
4819 	 * local dring_info params will be updated when we bind the dring in
4820 	 * vgen_handshake_phase2().
4821 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4822 	 * value and sends it in the ack, which is saved in
4823 	 * vgen_handle_dring_reg().
4824 	 */
4825 	ldcp->local_hparams.dring_ident = 0;
4826 
4827 	/* clear peer_hparams */
4828 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4829 
4830 	/* reset the channel if required */
4831 #ifdef	VNET_IOC_DEBUG
4832 	if (ldcp->need_ldc_reset && !ldcp->link_down_forced) {
4833 #else
4834 	if (ldcp->need_ldc_reset) {
4835 #endif
4836 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4837 		ldcp->need_ldc_reset = B_FALSE;
4838 		(void) ldc_down(ldcp->ldc_handle);
4839 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4840 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4841 		ldcp->ldc_status = istatus;
4842 
4843 		/* clear sids */
4844 		ldcp->local_sid = 0;
4845 		ldcp->peer_sid = 0;
4846 
4847 		/* try to bring the channel up */
4848 		rv = ldc_up(ldcp->ldc_handle);
4849 		if (rv != 0) {
4850 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4851 		}
4852 
4853 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4854 			DWARN(vgenp, ldcp, "ldc_status err\n");
4855 		} else {
4856 			ldcp->ldc_status = istatus;
4857 		}
4858 	}
4859 }
4860 
4861 /* wrapper function for vgen_reset_hphase */
4862 static void
4863 vgen_handshake_reset(vgen_ldc_t *ldcp)
4864 {
4865 	vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
4866 
4867 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4868 	mutex_enter(&ldcp->rxlock);
4869 	mutex_enter(&ldcp->wrlock);
4870 	mutex_enter(&ldcp->txlock);
4871 	mutex_enter(&ldcp->tclock);
4872 
4873 	vgen_reset_hphase(ldcp);
4874 
4875 	mutex_exit(&ldcp->tclock);
4876 	mutex_exit(&ldcp->txlock);
4877 	mutex_exit(&ldcp->wrlock);
4878 	mutex_exit(&ldcp->rxlock);
4879 
4880 	/*
4881 	 * As the connection is now reset, mark the channel
4882 	 * link_state as 'down' and notify the stack if needed.
4883 	 */
4884 	if (ldcp->link_state != LINK_STATE_DOWN) {
4885 		ldcp->link_state = LINK_STATE_DOWN;
4886 
4887 		if (ldcp->portp == vgenp->vsw_portp) { /* vswitch port ? */
4888 			/*
4889 			 * As the channel link is down, mark physical link also
4890 			 * as down. After the channel comes back up and
4891 			 * handshake completes, we will get an update on the
4892 			 * physlink state from vswitch (if this device has been
4893 			 * configured to get phys link updates).
4894 			 */
4895 			vgenp->phys_link_state = LINK_STATE_DOWN;
4896 
4897 			/* Now update the stack */
4898 			mutex_exit(&ldcp->cblock);
4899 			vgen_link_update(vgenp, ldcp->link_state);
4900 			mutex_enter(&ldcp->cblock);
4901 		}
4902 	}
4903 }
4904 
4905 /*
4906  * Initiate handshake with the peer by sending various messages
4907  * based on the handshake-phase that the channel is currently in.
4908  */
4909 static void
4910 vgen_handshake(vgen_ldc_t *ldcp)
4911 {
4912 	uint32_t	hphase = ldcp->hphase;
4913 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4914 	ldc_status_t	istatus;
4915 	int		rv = 0;
4916 
4917 	switch (hphase) {
4918 
4919 	case VH_PHASE1:
4920 
4921 		/*
4922 		 * start timer, for entire handshake process, turn this timer
4923 		 * off if all phases of handshake complete successfully and
4924 		 * hphase goes to VH_DONE(below) or
4925 		 * vgen_reset_hphase() gets called or
4926 		 * channel is reset due to errors or
4927 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4928 		 */
4929 		ASSERT(ldcp->htid == 0);
4930 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4931 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4932 
4933 		/* Phase 1 involves negotiating the version */
4934 		rv = vgen_send_version_negotiate(ldcp);
4935 		break;
4936 
4937 	case VH_PHASE2:
4938 		rv = vgen_handshake_phase2(ldcp);
4939 		break;
4940 
4941 	case VH_PHASE3:
4942 		rv = vgen_send_rdx_info(ldcp);
4943 		break;
4944 
4945 	case VH_DONE:
4946 		/*
4947 		 * Save the id of pending handshake timer in cancel_htid.
4948 		 * This will be checked in vgen_ldc_cb() and the handshake
4949 		 * timer will be cancelled after releasing cblock.
4950 		 */
4951 		if (ldcp->htid) {
4952 			ldcp->cancel_htid = ldcp->htid;
4953 			ldcp->htid = 0;
4954 		}
4955 		ldcp->hretries = 0;
4956 		DBG1(vgenp, ldcp, "Handshake Done\n");
4957 
4958 		/*
4959 		 * The channel is up and handshake is done successfully. Now we
4960 		 * can mark the channel link_state as 'up'. We also notify the
4961 		 * stack if the channel is connected to vswitch.
4962 		 */
4963 		ldcp->link_state = LINK_STATE_UP;
4964 
4965 		if (ldcp->portp == vgenp->vsw_portp) {
4966 			/*
4967 			 * If this channel(port) is connected to vsw,
4968 			 * need to sync multicast table with vsw.
4969 			 */
4970 			rv = vgen_send_mcast_info(ldcp);
4971 			if (rv != VGEN_SUCCESS) {
4972 				break;
4973 			}
4974 
4975 			if (vgenp->pls_negotiated == B_FALSE) {
4976 				/*
4977 				 * We haven't negotiated with vswitch to get
4978 				 * physical link state updates. We can update
4979 				 * update the stack at this point as the
4980 				 * channel to vswitch is up and the handshake
4981 				 * is done successfully.
4982 				 *
4983 				 * If we have negotiated to get physical link
4984 				 * state updates, then we won't notify the
4985 				 * the stack here; we do that as soon as
4986 				 * vswitch sends us the initial phys link state
4987 				 * (see vgen_handle_physlink_info()).
4988 				 */
4989 				mutex_exit(&ldcp->cblock);
4990 				vgen_link_update(vgenp, ldcp->link_state);
4991 				mutex_enter(&ldcp->cblock);
4992 			}
4993 
4994 		}
4995 
4996 		/*
4997 		 * Check if mac layer should be notified to restart
4998 		 * transmissions. This can happen if the channel got
4999 		 * reset and vgen_clobber_tbufs() is called, while
5000 		 * need_resched is set.
5001 		 */
5002 		mutex_enter(&ldcp->tclock);
5003 		if (ldcp->need_resched) {
5004 			vio_net_tx_update_t vtx_update =
5005 			    ldcp->portp->vcb.vio_net_tx_update;
5006 
5007 			ldcp->need_resched = B_FALSE;
5008 			vtx_update(ldcp->portp->vhp);
5009 		}
5010 		mutex_exit(&ldcp->tclock);
5011 
5012 		break;
5013 
5014 	default:
5015 		break;
5016 	}
5017 
5018 	if (rv == ECONNRESET) {
5019 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5020 			DWARN(vgenp, ldcp, "ldc_status err\n");
5021 		} else {
5022 			ldcp->ldc_status = istatus;
5023 		}
5024 		vgen_handle_evt_reset(ldcp);
5025 	} else if (rv) {
5026 		vgen_handshake_reset(ldcp);
5027 	}
5028 }
5029 
5030 /*
5031  * Check if the current handshake phase has completed successfully and
5032  * return the status.
5033  */
5034 static int
5035 vgen_handshake_done(vgen_ldc_t *ldcp)
5036 {
5037 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5038 	uint32_t	hphase = ldcp->hphase;
5039 	int 		status = 0;
5040 
5041 	switch (hphase) {
5042 
5043 	case VH_PHASE1:
5044 		/*
5045 		 * Phase1 is done, if version negotiation
5046 		 * completed successfully.
5047 		 */
5048 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
5049 		    VER_NEGOTIATED);
5050 		break;
5051 
5052 	case VH_PHASE2:
5053 		/*
5054 		 * Phase 2 is done, if attr info and dring info
5055 		 * have been exchanged successfully.
5056 		 */
5057 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
5058 		    ATTR_INFO_EXCHANGED) &&
5059 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
5060 		    DRING_INFO_EXCHANGED));
5061 		break;
5062 
5063 	case VH_PHASE3:
5064 		/* Phase 3 is done, if rdx msg has been exchanged */
5065 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
5066 		    RDX_EXCHANGED);
5067 		break;
5068 
5069 	default:
5070 		break;
5071 	}
5072 
5073 	if (status == 0) {
5074 		return (VGEN_FAILURE);
5075 	}
5076 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
5077 	return (VGEN_SUCCESS);
5078 }
5079 
5080 /* retry handshake on failure */
5081 static void
5082 vgen_handshake_retry(vgen_ldc_t *ldcp)
5083 {
5084 	/* reset handshake phase */
5085 	vgen_handshake_reset(ldcp);
5086 
5087 	/* handshake retry is specified and the channel is UP */
5088 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
5089 		if (ldcp->hretries++ < vgen_max_hretries) {
5090 			ldcp->local_sid = ddi_get_lbolt();
5091 			vgen_handshake(vh_nextphase(ldcp));
5092 		}
5093 	}
5094 }
5095 
5096 
5097 /*
5098  * Link State Update Notes:
5099  * The link state of the channel connected to vswitch is reported as the link
5100  * state of the vnet device, by default. If the channel is down or reset, then
5101  * the link state is marked 'down'. If the channel is 'up' *and* handshake
5102  * between the vnet and vswitch is successful, then the link state is marked
5103  * 'up'. If physical network link state is desired, then the vnet device must
5104  * be configured to get physical link updates and the 'linkprop' property
5105  * in the virtual-device MD node indicates this. As part of attribute exchange
5106  * the vnet device negotiates with the vswitch to obtain physical link state
5107  * updates. If it successfully negotiates, vswitch sends an initial physlink
5108  * msg once the handshake is done and further whenever the physical link state
5109  * changes. Currently we don't have mac layer interfaces to report two distinct
5110  * link states - virtual and physical. Thus, if the vnet has been configured to
5111  * get physical link updates, then the link status will be reported as 'up'
5112  * only when both the virtual and physical links are up.
5113  */
5114 static void
5115 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
5116 {
5117 	vnet_link_update(vgenp->vnetp, link_state);
5118 }
5119 
5120 /*
5121  * Handle a version info msg from the peer or an ACK/NACK from the peer
5122  * to a version info msg that we sent.
5123  */
5124 static int
5125 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5126 {
5127 	vgen_t		*vgenp;
5128 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
5129 	int		ack = 0;
5130 	int		failed = 0;
5131 	int		idx;
5132 	vgen_ver_t	*versions = ldcp->vgen_versions;
5133 	int		rv = 0;
5134 
5135 	vgenp = LDC_TO_VGEN(ldcp);
5136 	DBG1(vgenp, ldcp, "enter\n");
5137 	switch (tagp->vio_subtype) {
5138 	case VIO_SUBTYPE_INFO:
5139 
5140 		/*  Cache sid of peer if this is the first time */
5141 		if (ldcp->peer_sid == 0) {
5142 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
5143 			    tagp->vio_sid);
5144 			ldcp->peer_sid = tagp->vio_sid;
5145 		}
5146 
5147 		if (ldcp->hphase != VH_PHASE1) {
5148 			/*
5149 			 * If we are not already in VH_PHASE1, reset to
5150 			 * pre-handshake state, and initiate handshake
5151 			 * to the peer too.
5152 			 */
5153 			vgen_handshake_reset(ldcp);
5154 			vgen_handshake(vh_nextphase(ldcp));
5155 		}
5156 		ldcp->hstate |= VER_INFO_RCVD;
5157 
5158 		/* save peer's requested values */
5159 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
5160 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
5161 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
5162 
5163 		if ((vermsg->dev_class != VDEV_NETWORK) &&
5164 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
5165 			/* unsupported dev_class, send NACK */
5166 
5167 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5168 
5169 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5170 			tagp->vio_sid = ldcp->local_sid;
5171 			/* send reply msg back to peer */
5172 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5173 			    sizeof (*vermsg), B_FALSE);
5174 			if (rv != VGEN_SUCCESS) {
5175 				return (rv);
5176 			}
5177 			return (VGEN_FAILURE);
5178 		}
5179 
5180 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
5181 		    vermsg->ver_major,  vermsg->ver_minor);
5182 
5183 		idx = 0;
5184 
5185 		for (;;) {
5186 
5187 			if (vermsg->ver_major > versions[idx].ver_major) {
5188 
5189 				/* nack with next lower version */
5190 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5191 				vermsg->ver_major = versions[idx].ver_major;
5192 				vermsg->ver_minor = versions[idx].ver_minor;
5193 				break;
5194 			}
5195 
5196 			if (vermsg->ver_major == versions[idx].ver_major) {
5197 
5198 				/* major version match - ACK version */
5199 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
5200 				ack = 1;
5201 
5202 				/*
5203 				 * lower minor version to the one this endpt
5204 				 * supports, if necessary
5205 				 */
5206 				if (vermsg->ver_minor >
5207 				    versions[idx].ver_minor) {
5208 					vermsg->ver_minor =
5209 					    versions[idx].ver_minor;
5210 					ldcp->peer_hparams.ver_minor =
5211 					    versions[idx].ver_minor;
5212 				}
5213 				break;
5214 			}
5215 
5216 			idx++;
5217 
5218 			if (idx == VGEN_NUM_VER) {
5219 
5220 				/* no version match - send NACK */
5221 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5222 				vermsg->ver_major = 0;
5223 				vermsg->ver_minor = 0;
5224 				failed = 1;
5225 				break;
5226 			}
5227 
5228 		}
5229 
5230 		tagp->vio_sid = ldcp->local_sid;
5231 
5232 		/* send reply msg back to peer */
5233 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
5234 		    B_FALSE);
5235 		if (rv != VGEN_SUCCESS) {
5236 			return (rv);
5237 		}
5238 
5239 		if (ack) {
5240 			ldcp->hstate |= VER_ACK_SENT;
5241 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
5242 			    vermsg->ver_major, vermsg->ver_minor);
5243 		}
5244 		if (failed) {
5245 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
5246 			return (VGEN_FAILURE);
5247 		}
5248 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5249 
5250 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5251 
5252 			/* local and peer versions match? */
5253 			ASSERT((ldcp->local_hparams.ver_major ==
5254 			    ldcp->peer_hparams.ver_major) &&
5255 			    (ldcp->local_hparams.ver_minor ==
5256 			    ldcp->peer_hparams.ver_minor));
5257 
5258 			vgen_set_vnet_proto_ops(ldcp);
5259 
5260 			/* move to the next phase */
5261 			vgen_handshake(vh_nextphase(ldcp));
5262 		}
5263 
5264 		break;
5265 
5266 	case VIO_SUBTYPE_ACK:
5267 
5268 		if (ldcp->hphase != VH_PHASE1) {
5269 			/*  This should not happen. */
5270 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
5271 			return (VGEN_FAILURE);
5272 		}
5273 
5274 		/* SUCCESS - we have agreed on a version */
5275 		ldcp->local_hparams.ver_major = vermsg->ver_major;
5276 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
5277 		ldcp->hstate |= VER_ACK_RCVD;
5278 
5279 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
5280 		    vermsg->ver_major,  vermsg->ver_minor);
5281 
5282 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5283 
5284 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5285 
5286 			/* local and peer versions match? */
5287 			ASSERT((ldcp->local_hparams.ver_major ==
5288 			    ldcp->peer_hparams.ver_major) &&
5289 			    (ldcp->local_hparams.ver_minor ==
5290 			    ldcp->peer_hparams.ver_minor));
5291 
5292 			vgen_set_vnet_proto_ops(ldcp);
5293 
5294 			/* move to the next phase */
5295 			vgen_handshake(vh_nextphase(ldcp));
5296 		}
5297 		break;
5298 
5299 	case VIO_SUBTYPE_NACK:
5300 
5301 		if (ldcp->hphase != VH_PHASE1) {
5302 			/*  This should not happen.  */
5303 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
5304 			"Phase(%u)\n", ldcp->hphase);
5305 			return (VGEN_FAILURE);
5306 		}
5307 
5308 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
5309 		    vermsg->ver_major, vermsg->ver_minor);
5310 
5311 		/* check if version in NACK is zero */
5312 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
5313 			/*
5314 			 * Version Negotiation has failed.
5315 			 */
5316 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5317 			return (VGEN_FAILURE);
5318 		}
5319 
5320 		idx = 0;
5321 
5322 		for (;;) {
5323 
5324 			if (vermsg->ver_major > versions[idx].ver_major) {
5325 				/* select next lower version */
5326 
5327 				ldcp->local_hparams.ver_major =
5328 				    versions[idx].ver_major;
5329 				ldcp->local_hparams.ver_minor =
5330 				    versions[idx].ver_minor;
5331 				break;
5332 			}
5333 
5334 			if (vermsg->ver_major == versions[idx].ver_major) {
5335 				/* major version match */
5336 
5337 				ldcp->local_hparams.ver_major =
5338 				    versions[idx].ver_major;
5339 
5340 				ldcp->local_hparams.ver_minor =
5341 				    versions[idx].ver_minor;
5342 				break;
5343 			}
5344 
5345 			idx++;
5346 
5347 			if (idx == VGEN_NUM_VER) {
5348 				/*
5349 				 * no version match.
5350 				 * Version Negotiation has failed.
5351 				 */
5352 				DWARN(vgenp, ldcp,
5353 				    "Version Negotiation Failed\n");
5354 				return (VGEN_FAILURE);
5355 			}
5356 
5357 		}
5358 
5359 		rv = vgen_send_version_negotiate(ldcp);
5360 		if (rv != VGEN_SUCCESS) {
5361 			return (rv);
5362 		}
5363 
5364 		break;
5365 	}
5366 
5367 	DBG1(vgenp, ldcp, "exit\n");
5368 	return (VGEN_SUCCESS);
5369 }
5370 
5371 /* Check if the attributes are supported */
5372 static int
5373 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5374 {
5375 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5376 
5377 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5378 	    (msg->ack_freq > 64) ||
5379 	    (msg->xfer_mode != lp->xfer_mode)) {
5380 		return (VGEN_FAILURE);
5381 	}
5382 
5383 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5384 		/* versions < 1.4, mtu must match */
5385 		if (msg->mtu != lp->mtu) {
5386 			return (VGEN_FAILURE);
5387 		}
5388 	} else {
5389 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5390 		if (msg->mtu < ETHERMAX) {
5391 			return (VGEN_FAILURE);
5392 		}
5393 	}
5394 
5395 	return (VGEN_SUCCESS);
5396 }
5397 
5398 /*
5399  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5400  * to an attr info msg that we sent.
5401  */
5402 static int
5403 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5404 {
5405 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5406 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5407 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5408 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5409 	int		ack = 1;
5410 	int		rv = 0;
5411 	uint32_t	mtu;
5412 
5413 	DBG1(vgenp, ldcp, "enter\n");
5414 	if (ldcp->hphase != VH_PHASE2) {
5415 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5416 		" Invalid Phase(%u)\n",
5417 		    tagp->vio_subtype, ldcp->hphase);
5418 		return (VGEN_FAILURE);
5419 	}
5420 	switch (tagp->vio_subtype) {
5421 	case VIO_SUBTYPE_INFO:
5422 
5423 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5424 		ldcp->hstate |= ATTR_INFO_RCVD;
5425 
5426 		/* save peer's values */
5427 		rp->mtu = msg->mtu;
5428 		rp->addr = msg->addr;
5429 		rp->addr_type = msg->addr_type;
5430 		rp->xfer_mode = msg->xfer_mode;
5431 		rp->ack_freq = msg->ack_freq;
5432 
5433 		rv = vgen_check_attr_info(ldcp, msg);
5434 		if (rv == VGEN_FAILURE) {
5435 			/* unsupported attr, send NACK */
5436 			ack = 0;
5437 		} else {
5438 
5439 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5440 
5441 				/*
5442 				 * Versions >= 1.4:
5443 				 * The mtu is negotiated down to the
5444 				 * minimum of our mtu and peer's mtu.
5445 				 */
5446 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5447 
5448 				/*
5449 				 * If we have received an ack for the attr info
5450 				 * that we sent, then check if the mtu computed
5451 				 * above matches the mtu that the peer had ack'd
5452 				 * (saved in local hparams). If they don't
5453 				 * match, we fail the handshake.
5454 				 */
5455 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5456 					if (mtu != lp->mtu) {
5457 						/* send NACK */
5458 						ack = 0;
5459 					}
5460 				} else {
5461 					/*
5462 					 * Save the mtu computed above in our
5463 					 * attr parameters, so it gets sent in
5464 					 * the attr info from us to the peer.
5465 					 */
5466 					lp->mtu = mtu;
5467 				}
5468 
5469 				/* save the MIN mtu in the msg to be replied */
5470 				msg->mtu = mtu;
5471 
5472 			}
5473 		}
5474 
5475 
5476 		if (ack) {
5477 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5478 		} else {
5479 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5480 		}
5481 		tagp->vio_sid = ldcp->local_sid;
5482 
5483 		/* send reply msg back to peer */
5484 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5485 		    B_FALSE);
5486 		if (rv != VGEN_SUCCESS) {
5487 			return (rv);
5488 		}
5489 
5490 		if (ack) {
5491 			ldcp->hstate |= ATTR_ACK_SENT;
5492 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5493 		} else {
5494 			/* failed */
5495 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5496 			return (VGEN_FAILURE);
5497 		}
5498 
5499 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5500 			vgen_handshake(vh_nextphase(ldcp));
5501 		}
5502 
5503 		break;
5504 
5505 	case VIO_SUBTYPE_ACK:
5506 
5507 		if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
5508 		    ldcp->portp == vgenp->vsw_portp) {
5509 			/*
5510 			 * Versions >= 1.5:
5511 			 * If the vnet device has been configured to get
5512 			 * physical link state updates, check the corresponding
5513 			 * bits in the ack msg, if the peer is vswitch.
5514 			 */
5515 			if (((lp->physlink_update &
5516 			    PHYSLINK_UPDATE_STATE_MASK) ==
5517 			    PHYSLINK_UPDATE_STATE) &&
5518 
5519 			    ((msg->physlink_update &
5520 			    PHYSLINK_UPDATE_STATE_MASK) ==
5521 			    PHYSLINK_UPDATE_STATE_ACK)) {
5522 				vgenp->pls_negotiated = B_TRUE;
5523 			} else {
5524 				vgenp->pls_negotiated = B_FALSE;
5525 			}
5526 		}
5527 
5528 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5529 			/*
5530 			 * Versions >= 1.4:
5531 			 * The ack msg sent by the peer contains the minimum of
5532 			 * our mtu (that we had sent in our attr info) and the
5533 			 * peer's mtu.
5534 			 *
5535 			 * If we have sent an ack for the attr info msg from
5536 			 * the peer, check if the mtu that was computed then
5537 			 * (saved in local hparams) matches the mtu that the
5538 			 * peer has ack'd. If they don't match, we fail the
5539 			 * handshake.
5540 			 */
5541 			if (ldcp->hstate & ATTR_ACK_SENT) {
5542 				if (lp->mtu != msg->mtu) {
5543 					return (VGEN_FAILURE);
5544 				}
5545 			} else {
5546 				/*
5547 				 * If the mtu ack'd by the peer is > our mtu
5548 				 * fail handshake. Otherwise, save the mtu, so
5549 				 * we can validate it when we receive attr info
5550 				 * from our peer.
5551 				 */
5552 				if (msg->mtu > lp->mtu) {
5553 					return (VGEN_FAILURE);
5554 				}
5555 				if (msg->mtu <= lp->mtu) {
5556 					lp->mtu = msg->mtu;
5557 				}
5558 			}
5559 		}
5560 
5561 		ldcp->hstate |= ATTR_ACK_RCVD;
5562 
5563 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5564 
5565 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5566 			vgen_handshake(vh_nextphase(ldcp));
5567 		}
5568 		break;
5569 
5570 	case VIO_SUBTYPE_NACK:
5571 
5572 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5573 		return (VGEN_FAILURE);
5574 	}
5575 	DBG1(vgenp, ldcp, "exit\n");
5576 	return (VGEN_SUCCESS);
5577 }
5578 
5579 /* Check if the dring info msg is ok */
5580 static int
5581 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5582 {
5583 	/* check if msg contents are ok */
5584 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5585 	    sizeof (vnet_public_desc_t))) {
5586 		return (VGEN_FAILURE);
5587 	}
5588 	return (VGEN_SUCCESS);
5589 }
5590 
5591 /*
5592  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5593  * the peer to a dring register msg that we sent.
5594  */
5595 static int
5596 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5597 {
5598 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5599 	ldc_mem_cookie_t dcookie;
5600 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5601 	int ack = 0;
5602 	int rv = 0;
5603 
5604 	DBG1(vgenp, ldcp, "enter\n");
5605 	if (ldcp->hphase < VH_PHASE2) {
5606 		/* dring_info can be rcvd in any of the phases after Phase1 */
5607 		DWARN(vgenp, ldcp,
5608 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5609 		    tagp->vio_subtype, ldcp->hphase);
5610 		return (VGEN_FAILURE);
5611 	}
5612 	switch (tagp->vio_subtype) {
5613 	case VIO_SUBTYPE_INFO:
5614 
5615 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5616 		ldcp->hstate |= DRING_INFO_RCVD;
5617 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5618 
5619 		ASSERT(msg->ncookies == 1);
5620 
5621 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5622 			/*
5623 			 * verified dring info msg to be ok,
5624 			 * now try to map the remote dring.
5625 			 */
5626 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5627 			    msg->descriptor_size, &dcookie,
5628 			    msg->ncookies);
5629 			if (rv == DDI_SUCCESS) {
5630 				/* now we can ack the peer */
5631 				ack = 1;
5632 			}
5633 		}
5634 		if (ack == 0) {
5635 			/* failed, send NACK */
5636 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5637 		} else {
5638 			if (!(ldcp->peer_hparams.dring_ready)) {
5639 
5640 				/* save peer's dring_info values */
5641 				bcopy(&dcookie,
5642 				    &(ldcp->peer_hparams.dring_cookie),
5643 				    sizeof (dcookie));
5644 				ldcp->peer_hparams.num_desc =
5645 				    msg->num_descriptors;
5646 				ldcp->peer_hparams.desc_size =
5647 				    msg->descriptor_size;
5648 				ldcp->peer_hparams.num_dcookies =
5649 				    msg->ncookies;
5650 
5651 				/* set dring_ident for the peer */
5652 				ldcp->peer_hparams.dring_ident =
5653 				    (uint64_t)ldcp->rxdp;
5654 				/* return the dring_ident in ack msg */
5655 				msg->dring_ident =
5656 				    (uint64_t)ldcp->rxdp;
5657 
5658 				ldcp->peer_hparams.dring_ready = B_TRUE;
5659 			}
5660 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5661 		}
5662 		tagp->vio_sid = ldcp->local_sid;
5663 		/* send reply msg back to peer */
5664 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5665 		    B_FALSE);
5666 		if (rv != VGEN_SUCCESS) {
5667 			return (rv);
5668 		}
5669 
5670 		if (ack) {
5671 			ldcp->hstate |= DRING_ACK_SENT;
5672 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5673 		} else {
5674 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5675 			return (VGEN_FAILURE);
5676 		}
5677 
5678 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5679 			vgen_handshake(vh_nextphase(ldcp));
5680 		}
5681 
5682 		break;
5683 
5684 	case VIO_SUBTYPE_ACK:
5685 
5686 		ldcp->hstate |= DRING_ACK_RCVD;
5687 
5688 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5689 
5690 		if (!(ldcp->local_hparams.dring_ready)) {
5691 			/* local dring is now ready */
5692 			ldcp->local_hparams.dring_ready = B_TRUE;
5693 
5694 			/* save dring_ident acked by peer */
5695 			ldcp->local_hparams.dring_ident =
5696 			    msg->dring_ident;
5697 		}
5698 
5699 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5700 			vgen_handshake(vh_nextphase(ldcp));
5701 		}
5702 
5703 		break;
5704 
5705 	case VIO_SUBTYPE_NACK:
5706 
5707 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5708 		return (VGEN_FAILURE);
5709 	}
5710 	DBG1(vgenp, ldcp, "exit\n");
5711 	return (VGEN_SUCCESS);
5712 }
5713 
5714 /*
5715  * Handle a rdx info msg from the peer or an ACK/NACK
5716  * from the peer to a rdx info msg that we sent.
5717  */
5718 static int
5719 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5720 {
5721 	int rv = 0;
5722 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5723 
5724 	DBG1(vgenp, ldcp, "enter\n");
5725 	if (ldcp->hphase != VH_PHASE3) {
5726 		DWARN(vgenp, ldcp,
5727 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5728 		    tagp->vio_subtype, ldcp->hphase);
5729 		return (VGEN_FAILURE);
5730 	}
5731 	switch (tagp->vio_subtype) {
5732 	case VIO_SUBTYPE_INFO:
5733 
5734 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5735 		ldcp->hstate |= RDX_INFO_RCVD;
5736 
5737 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5738 		tagp->vio_sid = ldcp->local_sid;
5739 		/* send reply msg back to peer */
5740 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5741 		    B_FALSE);
5742 		if (rv != VGEN_SUCCESS) {
5743 			return (rv);
5744 		}
5745 
5746 		ldcp->hstate |= RDX_ACK_SENT;
5747 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5748 
5749 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5750 			vgen_handshake(vh_nextphase(ldcp));
5751 		}
5752 
5753 		break;
5754 
5755 	case VIO_SUBTYPE_ACK:
5756 
5757 		ldcp->hstate |= RDX_ACK_RCVD;
5758 
5759 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5760 
5761 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5762 			vgen_handshake(vh_nextphase(ldcp));
5763 		}
5764 		break;
5765 
5766 	case VIO_SUBTYPE_NACK:
5767 
5768 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5769 		return (VGEN_FAILURE);
5770 	}
5771 	DBG1(vgenp, ldcp, "exit\n");
5772 	return (VGEN_SUCCESS);
5773 }
5774 
5775 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5776 static int
5777 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5778 {
5779 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5780 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5781 	struct ether_addr *addrp;
5782 	int count;
5783 	int i;
5784 
5785 	DBG1(vgenp, ldcp, "enter\n");
5786 	switch (tagp->vio_subtype) {
5787 
5788 	case VIO_SUBTYPE_INFO:
5789 
5790 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5791 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5792 		break;
5793 
5794 	case VIO_SUBTYPE_ACK:
5795 
5796 		/* success adding/removing multicast addr */
5797 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5798 		break;
5799 
5800 	case VIO_SUBTYPE_NACK:
5801 
5802 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5803 		if (!(msgp->set)) {
5804 			/* multicast remove request failed */
5805 			break;
5806 		}
5807 
5808 		/* multicast add request failed */
5809 		for (count = 0; count < msgp->count; count++) {
5810 			addrp = &(msgp->mca[count]);
5811 
5812 			/* delete address from the table */
5813 			for (i = 0; i < vgenp->mccount; i++) {
5814 				if (ether_cmp(addrp,
5815 				    &(vgenp->mctab[i])) == 0) {
5816 					if (vgenp->mccount > 1) {
5817 						int t = vgenp->mccount - 1;
5818 						vgenp->mctab[i] =
5819 						    vgenp->mctab[t];
5820 					}
5821 					vgenp->mccount--;
5822 					break;
5823 				}
5824 			}
5825 		}
5826 		break;
5827 
5828 	}
5829 	DBG1(vgenp, ldcp, "exit\n");
5830 
5831 	return (VGEN_SUCCESS);
5832 }
5833 
5834 /*
5835  * Physical link information message from the peer. Only vswitch should send
5836  * us this message; if the vnet device has been configured to get physical link
5837  * state updates. Note that we must have already negotiated this with the
5838  * vswitch during attribute exchange phase of handshake.
5839  */
5840 static int
5841 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5842 {
5843 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5844 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5845 	link_state_t		link_state;
5846 	int			rv;
5847 
5848 	if (ldcp->portp != vgenp->vsw_portp) {
5849 		/*
5850 		 * drop the message and don't process; as we should
5851 		 * receive physlink_info message from only vswitch.
5852 		 */
5853 		return (VGEN_SUCCESS);
5854 	}
5855 
5856 	if (vgenp->pls_negotiated == B_FALSE) {
5857 		/*
5858 		 * drop the message and don't process; as we should receive
5859 		 * physlink_info message only if physlink update is enabled for
5860 		 * the device and negotiated with vswitch.
5861 		 */
5862 		return (VGEN_SUCCESS);
5863 	}
5864 
5865 	switch (tagp->vio_subtype) {
5866 
5867 	case VIO_SUBTYPE_INFO:
5868 
5869 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5870 		    VNET_PHYSLINK_STATE_UP) {
5871 			link_state = LINK_STATE_UP;
5872 		} else {
5873 			link_state = LINK_STATE_DOWN;
5874 		}
5875 
5876 		if (vgenp->phys_link_state != link_state) {
5877 			vgenp->phys_link_state = link_state;
5878 			mutex_exit(&ldcp->cblock);
5879 
5880 			/* Now update the stack */
5881 			vgen_link_update(vgenp, link_state);
5882 
5883 			mutex_enter(&ldcp->cblock);
5884 		}
5885 
5886 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5887 		tagp->vio_sid = ldcp->local_sid;
5888 
5889 		/* send reply msg back to peer */
5890 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5891 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5892 		if (rv != VGEN_SUCCESS) {
5893 			return (rv);
5894 		}
5895 		break;
5896 
5897 	case VIO_SUBTYPE_ACK:
5898 
5899 		/* vnet shouldn't recv physlink acks */
5900 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5901 		break;
5902 
5903 	case VIO_SUBTYPE_NACK:
5904 
5905 		/* vnet shouldn't recv physlink nacks */
5906 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5907 		break;
5908 
5909 	}
5910 	DBG1(vgenp, ldcp, "exit\n");
5911 
5912 	return (VGEN_SUCCESS);
5913 }
5914 
5915 /* handler for control messages received from the peer ldc end-point */
5916 static int
5917 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5918 {
5919 	int rv = 0;
5920 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5921 
5922 	DBG1(vgenp, ldcp, "enter\n");
5923 	switch (tagp->vio_subtype_env) {
5924 
5925 	case VIO_VER_INFO:
5926 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5927 		break;
5928 
5929 	case VIO_ATTR_INFO:
5930 		rv = vgen_handle_attr_info(ldcp, tagp);
5931 		break;
5932 
5933 	case VIO_DRING_REG:
5934 		rv = vgen_handle_dring_reg(ldcp, tagp);
5935 		break;
5936 
5937 	case VIO_RDX:
5938 		rv = vgen_handle_rdx_info(ldcp, tagp);
5939 		break;
5940 
5941 	case VNET_MCAST_INFO:
5942 		rv = vgen_handle_mcast_info(ldcp, tagp);
5943 		break;
5944 
5945 	case VIO_DDS_INFO:
5946 		/*
5947 		 * If we are in the process of resetting the vswitch channel,
5948 		 * drop the dds message. A new handshake will be initiated
5949 		 * when the channel comes back up after the reset and dds
5950 		 * negotiation can then continue.
5951 		 */
5952 		if (ldcp->need_ldc_reset == B_TRUE) {
5953 			break;
5954 		}
5955 		rv = vgen_dds_rx(ldcp, tagp);
5956 		break;
5957 
5958 	case VNET_PHYSLINK_INFO:
5959 		rv = vgen_handle_physlink_info(ldcp, tagp);
5960 		break;
5961 	}
5962 
5963 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5964 	return (rv);
5965 }
5966 
5967 /* handler for data messages received from the peer ldc end-point */
5968 static int
5969 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5970 {
5971 	int rv = 0;
5972 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5973 
5974 	DBG1(vgenp, ldcp, "enter\n");
5975 
5976 	if (ldcp->hphase != VH_DONE)
5977 		return (rv);
5978 
5979 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5980 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5981 		if (rv != 0) {
5982 			return (rv);
5983 		}
5984 	}
5985 
5986 	switch (tagp->vio_subtype_env) {
5987 	case VIO_DRING_DATA:
5988 		rv = vgen_handle_dring_data(ldcp, tagp);
5989 		break;
5990 
5991 	case VIO_PKT_DATA:
5992 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5993 		break;
5994 	default:
5995 		break;
5996 	}
5997 
5998 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5999 	return (rv);
6000 }
6001 
6002 /*
6003  * dummy pkt data handler function for vnet protocol version 1.0
6004  */
6005 static void
6006 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
6007 {
6008 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
6009 }
6010 
6011 /*
6012  * This function handles raw pkt data messages received over the channel.
6013  * Currently, only priority-eth-type frames are received through this mechanism.
6014  * In this case, the frame(data) is present within the message itself which
6015  * is copied into an mblk before sending it up the stack.
6016  */
6017 static void
6018 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
6019 {
6020 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
6021 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
6022 	uint32_t		size;
6023 	mblk_t			*mp;
6024 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6025 	vgen_stats_t		*statsp = &ldcp->stats;
6026 	vgen_hparams_t		*lp = &ldcp->local_hparams;
6027 	vio_net_rx_cb_t		vrx_cb;
6028 
6029 	ASSERT(MUTEX_HELD(&ldcp->cblock));
6030 
6031 	mutex_exit(&ldcp->cblock);
6032 
6033 	size = msglen - VIO_PKT_DATA_HDRSIZE;
6034 	if (size < ETHERMIN || size > lp->mtu) {
6035 		(void) atomic_inc_32(&statsp->rx_pri_fail);
6036 		goto exit;
6037 	}
6038 
6039 	mp = vio_multipool_allocb(&ldcp->vmp, size);
6040 	if (mp == NULL) {
6041 		mp = allocb(size, BPRI_MED);
6042 		if (mp == NULL) {
6043 			(void) atomic_inc_32(&statsp->rx_pri_fail);
6044 			DWARN(vgenp, ldcp, "allocb failure, "
6045 			    "unable to process priority frame\n");
6046 			goto exit;
6047 		}
6048 	}
6049 
6050 	/* copy the frame from the payload of raw data msg into the mblk */
6051 	bcopy(pkt->data, mp->b_rptr, size);
6052 	mp->b_wptr = mp->b_rptr + size;
6053 
6054 	/* update stats */
6055 	(void) atomic_inc_64(&statsp->rx_pri_packets);
6056 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
6057 
6058 	/* send up; call vrx_cb() as cblock is already released */
6059 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6060 	vrx_cb(ldcp->portp->vhp, mp);
6061 
6062 exit:
6063 	mutex_enter(&ldcp->cblock);
6064 }
6065 
6066 static int
6067 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
6068     int32_t end, uint8_t pstate)
6069 {
6070 	int rv = 0;
6071 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6072 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
6073 
6074 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
6075 	tagp->vio_sid = ldcp->local_sid;
6076 	msgp->start_idx = start;
6077 	msgp->end_idx = end;
6078 	msgp->dring_process_state = pstate;
6079 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
6080 	if (rv != VGEN_SUCCESS) {
6081 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
6082 	}
6083 	return (rv);
6084 }
6085 
6086 static int
6087 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6088 {
6089 	int rv = 0;
6090 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6091 
6092 
6093 	DBG1(vgenp, ldcp, "enter\n");
6094 	switch (tagp->vio_subtype) {
6095 
6096 	case VIO_SUBTYPE_INFO:
6097 		/*
6098 		 * To reduce the locking contention, release the
6099 		 * cblock here and re-acquire it once we are done
6100 		 * receiving packets.
6101 		 */
6102 		mutex_exit(&ldcp->cblock);
6103 		mutex_enter(&ldcp->rxlock);
6104 		rv = vgen_handle_dring_data_info(ldcp, tagp);
6105 		mutex_exit(&ldcp->rxlock);
6106 		mutex_enter(&ldcp->cblock);
6107 		break;
6108 
6109 	case VIO_SUBTYPE_ACK:
6110 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
6111 		break;
6112 
6113 	case VIO_SUBTYPE_NACK:
6114 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
6115 		break;
6116 	}
6117 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6118 	return (rv);
6119 }
6120 
6121 static int
6122 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6123 {
6124 	uint32_t start;
6125 	int32_t end;
6126 	int rv = 0;
6127 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6128 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6129 #ifdef VGEN_HANDLE_LOST_PKTS
6130 	vgen_stats_t *statsp = &ldcp->stats;
6131 	uint32_t rxi;
6132 	int n;
6133 #endif
6134 
6135 	DBG1(vgenp, ldcp, "enter\n");
6136 
6137 	start = dringmsg->start_idx;
6138 	end = dringmsg->end_idx;
6139 	/*
6140 	 * received a data msg, which contains the start and end
6141 	 * indices of the descriptors within the rx ring holding data,
6142 	 * the seq_num of data packet corresponding to the start index,
6143 	 * and the dring_ident.
6144 	 * We can now read the contents of each of these descriptors
6145 	 * and gather data from it.
6146 	 */
6147 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
6148 	    start, end);
6149 
6150 	/* validate rx start and end indeces */
6151 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
6152 	    !(CHECK_RXI(end, ldcp)))) {
6153 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
6154 		    start, end);
6155 		/* drop the message if invalid index */
6156 		return (rv);
6157 	}
6158 
6159 	/* validate dring_ident */
6160 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
6161 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6162 		    dringmsg->dring_ident);
6163 		/* invalid dring_ident, drop the msg */
6164 		return (rv);
6165 	}
6166 #ifdef DEBUG
6167 	if (vgen_trigger_rxlost) {
6168 		/* drop this msg to simulate lost pkts for debugging */
6169 		vgen_trigger_rxlost = 0;
6170 		return (rv);
6171 	}
6172 #endif
6173 
6174 #ifdef	VGEN_HANDLE_LOST_PKTS
6175 
6176 	/* receive start index doesn't match expected index */
6177 	if (ldcp->next_rxi != start) {
6178 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
6179 		    ldcp->next_rxi, start);
6180 
6181 		/* calculate the number of pkts lost */
6182 		if (start >= ldcp->next_rxi) {
6183 			n = start - ldcp->next_rxi;
6184 		} else  {
6185 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
6186 		}
6187 
6188 		statsp->rx_lost_pkts += n;
6189 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
6190 		tagp->vio_sid = ldcp->local_sid;
6191 		/* indicate the range of lost descriptors */
6192 		dringmsg->start_idx = ldcp->next_rxi;
6193 		rxi = start;
6194 		DECR_RXI(rxi, ldcp);
6195 		dringmsg->end_idx = rxi;
6196 		/* dring ident is left unchanged */
6197 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
6198 		    sizeof (*dringmsg), B_FALSE);
6199 		if (rv != VGEN_SUCCESS) {
6200 			DWARN(vgenp, ldcp,
6201 			    "vgen_sendmsg failed, stype:NACK\n");
6202 			return (rv);
6203 		}
6204 		/*
6205 		 * treat this range of descrs/pkts as dropped
6206 		 * and set the new expected value of next_rxi
6207 		 * and continue(below) to process from the new
6208 		 * start index.
6209 		 */
6210 		ldcp->next_rxi = start;
6211 	}
6212 
6213 #endif	/* VGEN_HANDLE_LOST_PKTS */
6214 
6215 	/* Now receive messages */
6216 	rv = vgen_process_dring_data(ldcp, tagp);
6217 
6218 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6219 	return (rv);
6220 }
6221 
6222 static int
6223 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6224 {
6225 	boolean_t set_ack_start = B_FALSE;
6226 	uint32_t start;
6227 	uint32_t ack_end;
6228 	uint32_t next_rxi;
6229 	uint32_t rxi;
6230 	int count = 0;
6231 	int rv = 0;
6232 	uint32_t retries = 0;
6233 	vgen_stats_t *statsp;
6234 	vnet_public_desc_t rxd;
6235 	vio_dring_entry_hdr_t *hdrp;
6236 	mblk_t *bp = NULL;
6237 	mblk_t *bpt = NULL;
6238 	uint32_t ack_start;
6239 	boolean_t rxd_err = B_FALSE;
6240 	mblk_t *mp = NULL;
6241 	size_t nbytes;
6242 	boolean_t ack_needed = B_FALSE;
6243 	size_t nread;
6244 	uint64_t off = 0;
6245 	struct ether_header *ehp;
6246 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6247 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6248 	vgen_hparams_t	*lp = &ldcp->local_hparams;
6249 
6250 	DBG1(vgenp, ldcp, "enter\n");
6251 
6252 	statsp = &ldcp->stats;
6253 	start = dringmsg->start_idx;
6254 
6255 	/*
6256 	 * start processing the descriptors from the specified
6257 	 * start index, up to the index a descriptor is not ready
6258 	 * to be processed or we process the entire descriptor ring
6259 	 * and wrap around upto the start index.
6260 	 */
6261 
6262 	/* need to set the start index of descriptors to be ack'd */
6263 	set_ack_start = B_TRUE;
6264 
6265 	/* index upto which we have ack'd */
6266 	ack_end = start;
6267 	DECR_RXI(ack_end, ldcp);
6268 
6269 	next_rxi = rxi =  start;
6270 	do {
6271 vgen_recv_retry:
6272 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
6273 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
6274 		if (rv != 0) {
6275 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
6276 			    " rv(%d)\n", rv);
6277 			statsp->ierrors++;
6278 			return (rv);
6279 		}
6280 
6281 		hdrp = &rxd.hdr;
6282 
6283 		if (hdrp->dstate != VIO_DESC_READY) {
6284 			/*
6285 			 * Before waiting and retry here, send up
6286 			 * the packets that are received already
6287 			 */
6288 			if (bp != NULL) {
6289 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6290 				vgen_rx(ldcp, bp, bpt);
6291 				count = 0;
6292 				bp = bpt = NULL;
6293 			}
6294 			/*
6295 			 * descriptor is not ready.
6296 			 * retry descriptor acquire, stop processing
6297 			 * after max # retries.
6298 			 */
6299 			if (retries == vgen_recv_retries)
6300 				break;
6301 			retries++;
6302 			drv_usecwait(vgen_recv_delay);
6303 			goto vgen_recv_retry;
6304 		}
6305 		retries = 0;
6306 
6307 		if (set_ack_start) {
6308 			/*
6309 			 * initialize the start index of the range
6310 			 * of descriptors to be ack'd.
6311 			 */
6312 			ack_start = rxi;
6313 			set_ack_start = B_FALSE;
6314 		}
6315 
6316 		if ((rxd.nbytes < ETHERMIN) ||
6317 		    (rxd.nbytes > lp->mtu) ||
6318 		    (rxd.ncookies == 0) ||
6319 		    (rxd.ncookies > MAX_COOKIES)) {
6320 			rxd_err = B_TRUE;
6321 		} else {
6322 			/*
6323 			 * Try to allocate an mblk from the free pool
6324 			 * of recv mblks for the channel.
6325 			 * If this fails, use allocb().
6326 			 */
6327 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
6328 			if (nbytes > ldcp->max_rxpool_size) {
6329 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
6330 				    BPRI_MED);
6331 			} else {
6332 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
6333 				if (mp == NULL) {
6334 					statsp->rx_vio_allocb_fail++;
6335 					/*
6336 					 * Data buffer returned by allocb(9F)
6337 					 * is 8byte aligned. We allocate extra
6338 					 * 8 bytes to ensure size is multiple
6339 					 * of 8 bytes for ldc_mem_copy().
6340 					 */
6341 					mp = allocb(VNET_IPALIGN +
6342 					    rxd.nbytes + 8, BPRI_MED);
6343 				}
6344 			}
6345 		}
6346 		if ((rxd_err) || (mp == NULL)) {
6347 			/*
6348 			 * rxd_err or allocb() failure,
6349 			 * drop this packet, get next.
6350 			 */
6351 			if (rxd_err) {
6352 				statsp->ierrors++;
6353 				rxd_err = B_FALSE;
6354 			} else {
6355 				statsp->rx_allocb_fail++;
6356 			}
6357 
6358 			ack_needed = hdrp->ack;
6359 
6360 			/* set descriptor done bit */
6361 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6362 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6363 			    VIO_DESC_DONE);
6364 			if (rv != 0) {
6365 				DWARN(vgenp, ldcp,
6366 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
6367 				    rv);
6368 				return (rv);
6369 			}
6370 
6371 			if (ack_needed) {
6372 				ack_needed = B_FALSE;
6373 				/*
6374 				 * sender needs ack for this packet,
6375 				 * ack pkts upto this index.
6376 				 */
6377 				ack_end = rxi;
6378 
6379 				rv = vgen_send_dring_ack(ldcp, tagp,
6380 				    ack_start, ack_end,
6381 				    VIO_DP_ACTIVE);
6382 				if (rv != VGEN_SUCCESS) {
6383 					goto error_ret;
6384 				}
6385 
6386 				/* need to set new ack start index */
6387 				set_ack_start = B_TRUE;
6388 			}
6389 			goto vgen_next_rxi;
6390 		}
6391 
6392 		nread = nbytes;
6393 		rv = ldc_mem_copy(ldcp->ldc_handle,
6394 		    (caddr_t)mp->b_rptr, off, &nread,
6395 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
6396 
6397 		/* if ldc_mem_copy() failed */
6398 		if (rv) {
6399 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
6400 			statsp->ierrors++;
6401 			freemsg(mp);
6402 			goto error_ret;
6403 		}
6404 
6405 		ack_needed = hdrp->ack;
6406 
6407 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6408 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6409 		    VIO_DESC_DONE);
6410 		if (rv != 0) {
6411 			DWARN(vgenp, ldcp,
6412 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
6413 			goto error_ret;
6414 		}
6415 
6416 		mp->b_rptr += VNET_IPALIGN;
6417 
6418 		if (ack_needed) {
6419 			ack_needed = B_FALSE;
6420 			/*
6421 			 * sender needs ack for this packet,
6422 			 * ack pkts upto this index.
6423 			 */
6424 			ack_end = rxi;
6425 
6426 			rv = vgen_send_dring_ack(ldcp, tagp,
6427 			    ack_start, ack_end, VIO_DP_ACTIVE);
6428 			if (rv != VGEN_SUCCESS) {
6429 				goto error_ret;
6430 			}
6431 
6432 			/* need to set new ack start index */
6433 			set_ack_start = B_TRUE;
6434 		}
6435 
6436 		if (nread != nbytes) {
6437 			DWARN(vgenp, ldcp,
6438 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6439 			    nread, nbytes);
6440 			statsp->ierrors++;
6441 			freemsg(mp);
6442 			goto vgen_next_rxi;
6443 		}
6444 
6445 		/* point to the actual end of data */
6446 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6447 
6448 		/* update stats */
6449 		statsp->ipackets++;
6450 		statsp->rbytes += rxd.nbytes;
6451 		ehp = (struct ether_header *)mp->b_rptr;
6452 		if (IS_BROADCAST(ehp))
6453 			statsp->brdcstrcv++;
6454 		else if (IS_MULTICAST(ehp))
6455 			statsp->multircv++;
6456 
6457 		/* build a chain of received packets */
6458 		if (bp == NULL) {
6459 			/* first pkt */
6460 			bp = mp;
6461 			bpt = bp;
6462 			bpt->b_next = NULL;
6463 		} else {
6464 			mp->b_next = NULL;
6465 			bpt->b_next = mp;
6466 			bpt = mp;
6467 		}
6468 
6469 		if (count++ > vgen_chain_len) {
6470 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6471 			vgen_rx(ldcp, bp, bpt);
6472 			count = 0;
6473 			bp = bpt = NULL;
6474 		}
6475 
6476 vgen_next_rxi:
6477 		/* update end index of range of descrs to be ack'd */
6478 		ack_end = rxi;
6479 
6480 		/* update the next index to be processed */
6481 		INCR_RXI(next_rxi, ldcp);
6482 		if (next_rxi == start) {
6483 			/*
6484 			 * processed the entire descriptor ring upto
6485 			 * the index at which we started.
6486 			 */
6487 			break;
6488 		}
6489 
6490 		rxi = next_rxi;
6491 
6492 	_NOTE(CONSTCOND)
6493 	} while (1);
6494 
6495 	/*
6496 	 * send an ack message to peer indicating that we have stopped
6497 	 * processing descriptors.
6498 	 */
6499 	if (set_ack_start) {
6500 		/*
6501 		 * We have ack'd upto some index and we have not
6502 		 * processed any descriptors beyond that index.
6503 		 * Use the last ack'd index as both the start and
6504 		 * end of range of descrs being ack'd.
6505 		 * Note: This results in acking the last index twice
6506 		 * and should be harmless.
6507 		 */
6508 		ack_start = ack_end;
6509 	}
6510 
6511 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6512 	    VIO_DP_STOPPED);
6513 	if (rv != VGEN_SUCCESS) {
6514 		goto error_ret;
6515 	}
6516 
6517 	/* save new recv index of next dring msg */
6518 	ldcp->next_rxi = next_rxi;
6519 
6520 error_ret:
6521 	/* send up packets received so far */
6522 	if (bp != NULL) {
6523 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6524 		vgen_rx(ldcp, bp, bpt);
6525 		bp = bpt = NULL;
6526 	}
6527 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6528 	return (rv);
6529 
6530 }
6531 
6532 static int
6533 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6534 {
6535 	int rv = 0;
6536 	uint32_t start;
6537 	int32_t end;
6538 	uint32_t txi;
6539 	boolean_t ready_txd = B_FALSE;
6540 	vgen_stats_t *statsp;
6541 	vgen_private_desc_t *tbufp;
6542 	vnet_public_desc_t *txdp;
6543 	vio_dring_entry_hdr_t *hdrp;
6544 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6545 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6546 
6547 	DBG1(vgenp, ldcp, "enter\n");
6548 	start = dringmsg->start_idx;
6549 	end = dringmsg->end_idx;
6550 	statsp = &ldcp->stats;
6551 
6552 	/*
6553 	 * received an ack corresponding to a specific descriptor for
6554 	 * which we had set the ACK bit in the descriptor (during
6555 	 * transmit). This enables us to reclaim descriptors.
6556 	 */
6557 
6558 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6559 
6560 	/* validate start and end indeces in the tx ack msg */
6561 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6562 		/* drop the message if invalid index */
6563 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6564 		    start, end);
6565 		return (rv);
6566 	}
6567 	/* validate dring_ident */
6568 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6569 		/* invalid dring_ident, drop the msg */
6570 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6571 		    dringmsg->dring_ident);
6572 		return (rv);
6573 	}
6574 	statsp->dring_data_acks++;
6575 
6576 	/* reclaim descriptors that are done */
6577 	vgen_reclaim(ldcp);
6578 
6579 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6580 		/*
6581 		 * receiver continued processing descriptors after
6582 		 * sending us the ack.
6583 		 */
6584 		return (rv);
6585 	}
6586 
6587 	statsp->dring_stopped_acks++;
6588 
6589 	/* receiver stopped processing descriptors */
6590 	mutex_enter(&ldcp->wrlock);
6591 	mutex_enter(&ldcp->tclock);
6592 
6593 	/*
6594 	 * determine if there are any pending tx descriptors
6595 	 * ready to be processed by the receiver(peer) and if so,
6596 	 * send a message to the peer to restart receiving.
6597 	 */
6598 	ready_txd = B_FALSE;
6599 
6600 	/*
6601 	 * using the end index of the descriptor range for which
6602 	 * we received the ack, check if the next descriptor is
6603 	 * ready.
6604 	 */
6605 	txi = end;
6606 	INCR_TXI(txi, ldcp);
6607 	tbufp = &ldcp->tbufp[txi];
6608 	txdp = tbufp->descp;
6609 	hdrp = &txdp->hdr;
6610 	if (hdrp->dstate == VIO_DESC_READY) {
6611 		ready_txd = B_TRUE;
6612 	} else {
6613 		/*
6614 		 * descr next to the end of ack'd descr range is not
6615 		 * ready.
6616 		 * starting from the current reclaim index, check
6617 		 * if any descriptor is ready.
6618 		 */
6619 
6620 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6621 		tbufp = &ldcp->tbufp[txi];
6622 
6623 		txdp = tbufp->descp;
6624 		hdrp = &txdp->hdr;
6625 		if (hdrp->dstate == VIO_DESC_READY) {
6626 			ready_txd = B_TRUE;
6627 		}
6628 
6629 	}
6630 
6631 	if (ready_txd) {
6632 		/*
6633 		 * we have tx descriptor(s) ready to be
6634 		 * processed by the receiver.
6635 		 * send a message to the peer with the start index
6636 		 * of ready descriptors.
6637 		 */
6638 		rv = vgen_send_dring_data(ldcp, txi, -1);
6639 		if (rv != VGEN_SUCCESS) {
6640 			ldcp->resched_peer = B_TRUE;
6641 			ldcp->resched_peer_txi = txi;
6642 			mutex_exit(&ldcp->tclock);
6643 			mutex_exit(&ldcp->wrlock);
6644 			return (rv);
6645 		}
6646 	} else {
6647 		/*
6648 		 * no ready tx descriptors. set the flag to send a
6649 		 * message to peer when tx descriptors are ready in
6650 		 * transmit routine.
6651 		 */
6652 		ldcp->resched_peer = B_TRUE;
6653 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6654 	}
6655 
6656 	mutex_exit(&ldcp->tclock);
6657 	mutex_exit(&ldcp->wrlock);
6658 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6659 	return (rv);
6660 }
6661 
6662 static int
6663 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6664 {
6665 	int rv = 0;
6666 	uint32_t start;
6667 	int32_t end;
6668 	uint32_t txi;
6669 	vnet_public_desc_t *txdp;
6670 	vio_dring_entry_hdr_t *hdrp;
6671 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6672 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6673 
6674 	DBG1(vgenp, ldcp, "enter\n");
6675 	start = dringmsg->start_idx;
6676 	end = dringmsg->end_idx;
6677 
6678 	/*
6679 	 * peer sent a NACK msg to indicate lost packets.
6680 	 * The start and end correspond to the range of descriptors
6681 	 * for which the peer didn't receive a dring data msg and so
6682 	 * didn't receive the corresponding data.
6683 	 */
6684 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6685 
6686 	/* validate start and end indeces in the tx nack msg */
6687 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6688 		/* drop the message if invalid index */
6689 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6690 		    start, end);
6691 		return (rv);
6692 	}
6693 	/* validate dring_ident */
6694 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6695 		/* invalid dring_ident, drop the msg */
6696 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6697 		    dringmsg->dring_ident);
6698 		return (rv);
6699 	}
6700 	mutex_enter(&ldcp->txlock);
6701 	mutex_enter(&ldcp->tclock);
6702 
6703 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6704 		/* no busy descriptors, bogus nack ? */
6705 		mutex_exit(&ldcp->tclock);
6706 		mutex_exit(&ldcp->txlock);
6707 		return (rv);
6708 	}
6709 
6710 	/* we just mark the descrs as done so they can be reclaimed */
6711 	for (txi = start; txi <= end; ) {
6712 		txdp = &(ldcp->txdp[txi]);
6713 		hdrp = &txdp->hdr;
6714 		if (hdrp->dstate == VIO_DESC_READY)
6715 			hdrp->dstate = VIO_DESC_DONE;
6716 		INCR_TXI(txi, ldcp);
6717 	}
6718 	mutex_exit(&ldcp->tclock);
6719 	mutex_exit(&ldcp->txlock);
6720 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6721 	return (rv);
6722 }
6723 
6724 static void
6725 vgen_reclaim(vgen_ldc_t *ldcp)
6726 {
6727 	mutex_enter(&ldcp->tclock);
6728 
6729 	vgen_reclaim_dring(ldcp);
6730 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6731 
6732 	mutex_exit(&ldcp->tclock);
6733 }
6734 
6735 /*
6736  * transmit reclaim function. starting from the current reclaim index
6737  * look for descriptors marked DONE and reclaim the descriptor and the
6738  * corresponding buffers (tbuf).
6739  */
6740 static void
6741 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6742 {
6743 	int count = 0;
6744 	vnet_public_desc_t *txdp;
6745 	vgen_private_desc_t *tbufp;
6746 	vio_dring_entry_hdr_t	*hdrp;
6747 
6748 #ifdef DEBUG
6749 	if (vgen_trigger_txtimeout)
6750 		return;
6751 #endif
6752 
6753 	tbufp = ldcp->cur_tbufp;
6754 	txdp = tbufp->descp;
6755 	hdrp = &txdp->hdr;
6756 
6757 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6758 	    (tbufp != ldcp->next_tbufp)) {
6759 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6760 		hdrp->dstate = VIO_DESC_FREE;
6761 		hdrp->ack = B_FALSE;
6762 
6763 		tbufp = NEXTTBUF(ldcp, tbufp);
6764 		txdp = tbufp->descp;
6765 		hdrp = &txdp->hdr;
6766 		count++;
6767 	}
6768 
6769 	ldcp->cur_tbufp = tbufp;
6770 
6771 	/*
6772 	 * Check if mac layer should be notified to restart transmissions
6773 	 */
6774 	if ((ldcp->need_resched) && (count > 0)) {
6775 		vio_net_tx_update_t vtx_update =
6776 		    ldcp->portp->vcb.vio_net_tx_update;
6777 
6778 		ldcp->need_resched = B_FALSE;
6779 		vtx_update(ldcp->portp->vhp);
6780 	}
6781 }
6782 
6783 /* return the number of pending transmits for the channel */
6784 static int
6785 vgen_num_txpending(vgen_ldc_t *ldcp)
6786 {
6787 	int n;
6788 
6789 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6790 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6791 	} else  {
6792 		/* cur_tbufp > next_tbufp */
6793 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6794 	}
6795 
6796 	return (n);
6797 }
6798 
6799 /* determine if the transmit descriptor ring is full */
6800 static int
6801 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6802 {
6803 	vgen_private_desc_t	*tbufp;
6804 	vgen_private_desc_t	*ntbufp;
6805 
6806 	tbufp = ldcp->next_tbufp;
6807 	ntbufp = NEXTTBUF(ldcp, tbufp);
6808 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6809 		return (VGEN_SUCCESS);
6810 	}
6811 	return (VGEN_FAILURE);
6812 }
6813 
6814 /* determine if timeout condition has occured */
6815 static int
6816 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6817 {
6818 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6819 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6820 	    (vnet_ldcwd_txtimeout) &&
6821 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6822 		return (VGEN_SUCCESS);
6823 	} else {
6824 		return (VGEN_FAILURE);
6825 	}
6826 }
6827 
6828 /* transmit watchdog timeout handler */
6829 static void
6830 vgen_ldc_watchdog(void *arg)
6831 {
6832 	vgen_ldc_t *ldcp;
6833 	vgen_t *vgenp;
6834 	int rv;
6835 
6836 	ldcp = (vgen_ldc_t *)arg;
6837 	vgenp = LDC_TO_VGEN(ldcp);
6838 
6839 	rv = vgen_ldc_txtimeout(ldcp);
6840 	if (rv == VGEN_SUCCESS) {
6841 		DWARN(vgenp, ldcp, "transmit timeout\n");
6842 #ifdef DEBUG
6843 		if (vgen_trigger_txtimeout) {
6844 			/* tx timeout triggered for debugging */
6845 			vgen_trigger_txtimeout = 0;
6846 		}
6847 #endif
6848 		mutex_enter(&ldcp->cblock);
6849 		vgen_ldc_reset(ldcp);
6850 		mutex_exit(&ldcp->cblock);
6851 		if (ldcp->need_resched) {
6852 			vio_net_tx_update_t vtx_update =
6853 			    ldcp->portp->vcb.vio_net_tx_update;
6854 
6855 			ldcp->need_resched = B_FALSE;
6856 			vtx_update(ldcp->portp->vhp);
6857 		}
6858 	}
6859 
6860 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6861 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6862 }
6863 
6864 /* handler for error messages received from the peer ldc end-point */
6865 static void
6866 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6867 {
6868 	_NOTE(ARGUNUSED(ldcp, tagp))
6869 }
6870 
6871 static int
6872 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6873 {
6874 	vio_raw_data_msg_t	*rmsg;
6875 	vio_dring_msg_t		*dmsg;
6876 	uint64_t		seq_num;
6877 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6878 
6879 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6880 		dmsg = (vio_dring_msg_t *)tagp;
6881 		seq_num = dmsg->seq_num;
6882 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6883 		rmsg = (vio_raw_data_msg_t *)tagp;
6884 		seq_num = rmsg->seq_num;
6885 	} else {
6886 		return (EINVAL);
6887 	}
6888 
6889 	if (seq_num != ldcp->next_rxseq) {
6890 
6891 		/* seqnums don't match */
6892 		DWARN(vgenp, ldcp,
6893 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6894 		    ldcp->next_rxseq, seq_num);
6895 
6896 		return (EINVAL);
6897 
6898 	}
6899 
6900 	ldcp->next_rxseq++;
6901 
6902 	return (0);
6903 }
6904 
6905 /* Check if the session id in the received message is valid */
6906 static int
6907 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6908 {
6909 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6910 
6911 	if (tagp->vio_sid != ldcp->peer_sid) {
6912 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6913 		    ldcp->peer_sid, tagp->vio_sid);
6914 		return (VGEN_FAILURE);
6915 	}
6916 	else
6917 		return (VGEN_SUCCESS);
6918 }
6919 
6920 static caddr_t
6921 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6922 {
6923 	(void) sprintf(ebuf,
6924 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6925 	return (ebuf);
6926 }
6927 
6928 /* Handshake watchdog timeout handler */
6929 static void
6930 vgen_hwatchdog(void *arg)
6931 {
6932 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6933 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6934 
6935 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
6936 	    ldcp->hphase, ldcp->hstate);
6937 
6938 	mutex_enter(&ldcp->cblock);
6939 	if (ldcp->cancel_htid) {
6940 		ldcp->cancel_htid = 0;
6941 		mutex_exit(&ldcp->cblock);
6942 		return;
6943 	}
6944 	ldcp->htid = 0;
6945 	vgen_ldc_reset(ldcp);
6946 	mutex_exit(&ldcp->cblock);
6947 }
6948 
6949 static void
6950 vgen_print_hparams(vgen_hparams_t *hp)
6951 {
6952 	uint8_t	addr[6];
6953 	char	ea[6];
6954 	ldc_mem_cookie_t *dc;
6955 
6956 	cmn_err(CE_CONT, "version_info:\n");
6957 	cmn_err(CE_CONT,
6958 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6959 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6960 
6961 	vnet_macaddr_ultostr(hp->addr, addr);
6962 	cmn_err(CE_CONT, "attr_info:\n");
6963 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6964 	    vgen_print_ethaddr(addr, ea));
6965 	cmn_err(CE_CONT,
6966 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6967 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6968 
6969 	dc = &hp->dring_cookie;
6970 	cmn_err(CE_CONT, "dring_info:\n");
6971 	cmn_err(CE_CONT,
6972 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6973 	cmn_err(CE_CONT,
6974 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6975 	    dc->addr, dc->size);
6976 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6977 }
6978 
6979 static void
6980 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6981 {
6982 	vgen_hparams_t *hp;
6983 
6984 	cmn_err(CE_CONT, "Channel Information:\n");
6985 	cmn_err(CE_CONT,
6986 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6987 	    ldcp->ldc_id, ldcp->ldc_status);
6988 	cmn_err(CE_CONT,
6989 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6990 	    ldcp->local_sid, ldcp->peer_sid);
6991 	cmn_err(CE_CONT,
6992 	    "\thphase: 0x%x, hstate: 0x%x\n",
6993 	    ldcp->hphase, ldcp->hstate);
6994 
6995 	cmn_err(CE_CONT, "Local handshake params:\n");
6996 	hp = &ldcp->local_hparams;
6997 	vgen_print_hparams(hp);
6998 
6999 	cmn_err(CE_CONT, "Peer handshake params:\n");
7000 	hp = &ldcp->peer_hparams;
7001 	vgen_print_hparams(hp);
7002 }
7003 
7004 /*
7005  * Send received packets up the stack.
7006  */
7007 static void
7008 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt)
7009 {
7010 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
7011 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
7012 
7013 	if (ldcp->rcv_thread != NULL) {
7014 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
7015 	} else {
7016 		ASSERT(MUTEX_HELD(&ldcp->cblock));
7017 	}
7018 
7019 	mutex_enter(&ldcp->pollq_lock);
7020 
7021 	if (ldcp->polling_on == B_TRUE) {
7022 		/*
7023 		 * If we are in polling mode, simply queue
7024 		 * the packets onto the poll queue and return.
7025 		 */
7026 		if (ldcp->pollq_headp == NULL) {
7027 			ldcp->pollq_headp = bp;
7028 			ldcp->pollq_tailp = bpt;
7029 		} else {
7030 			ldcp->pollq_tailp->b_next = bp;
7031 			ldcp->pollq_tailp = bpt;
7032 		}
7033 
7034 		mutex_exit(&ldcp->pollq_lock);
7035 		return;
7036 	}
7037 
7038 	/*
7039 	 * Prepend any pending mblks in the poll queue, now that we
7040 	 * are in interrupt mode, before sending up the chain of pkts.
7041 	 */
7042 	if (ldcp->pollq_headp != NULL) {
7043 		DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n",
7044 		    (uintptr_t)ldcp);
7045 		ldcp->pollq_tailp->b_next = bp;
7046 		bp = ldcp->pollq_headp;
7047 		ldcp->pollq_headp = ldcp->pollq_tailp = NULL;
7048 	}
7049 
7050 	mutex_exit(&ldcp->pollq_lock);
7051 
7052 	if (ldcp->rcv_thread != NULL) {
7053 		mutex_exit(&ldcp->rxlock);
7054 	} else {
7055 		mutex_exit(&ldcp->cblock);
7056 	}
7057 
7058 	/* Send up the packets */
7059 	vrx_cb(ldcp->portp->vhp, bp);
7060 
7061 	if (ldcp->rcv_thread != NULL) {
7062 		mutex_enter(&ldcp->rxlock);
7063 	} else {
7064 		mutex_enter(&ldcp->cblock);
7065 	}
7066 }
7067 
7068 /*
7069  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
7070  * This thread is woken up by the LDC interrupt handler to process
7071  * LDC packets and receive data.
7072  */
7073 static void
7074 vgen_ldc_rcv_worker(void *arg)
7075 {
7076 	callb_cpr_t	cprinfo;
7077 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
7078 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7079 
7080 	DBG1(vgenp, ldcp, "enter\n");
7081 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
7082 	    "vnet_rcv_thread");
7083 	mutex_enter(&ldcp->rcv_thr_lock);
7084 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
7085 
7086 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
7087 		/*
7088 		 * Wait until the data is received or a stop
7089 		 * request is received.
7090 		 */
7091 		while (!(ldcp->rcv_thr_flags &
7092 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
7093 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
7094 		}
7095 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
7096 
7097 		/*
7098 		 * First process the stop request.
7099 		 */
7100 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
7101 			DBG2(vgenp, ldcp, "stopped\n");
7102 			break;
7103 		}
7104 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
7105 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
7106 		mutex_exit(&ldcp->rcv_thr_lock);
7107 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
7108 		vgen_handle_evt_read(ldcp);
7109 		mutex_enter(&ldcp->rcv_thr_lock);
7110 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
7111 	}
7112 
7113 	/*
7114 	 * Update the run status and wakeup the thread that
7115 	 * has sent the stop request.
7116 	 */
7117 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP;
7118 	ldcp->rcv_thread = NULL;
7119 	CALLB_CPR_EXIT(&cprinfo);
7120 
7121 	thread_exit();
7122 	DBG1(vgenp, ldcp, "exit\n");
7123 }
7124 
7125 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
7126 static void
7127 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
7128 {
7129 	kt_did_t	tid = 0;
7130 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7131 
7132 	DBG1(vgenp, ldcp, "enter\n");
7133 	/*
7134 	 * Send a stop request by setting the stop flag and
7135 	 * wait until the receive thread stops.
7136 	 */
7137 	mutex_enter(&ldcp->rcv_thr_lock);
7138 	if (ldcp->rcv_thread != NULL) {
7139 		tid = ldcp->rcv_thread->t_did;
7140 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
7141 		cv_signal(&ldcp->rcv_thr_cv);
7142 	}
7143 	mutex_exit(&ldcp->rcv_thr_lock);
7144 
7145 	if (tid != 0) {
7146 		thread_join(tid);
7147 	}
7148 	DBG1(vgenp, ldcp, "exit\n");
7149 }
7150 
7151 /*
7152  * Wait for the channel rx-queue to be drained by allowing the receive
7153  * worker thread to read all messages from the rx-queue of the channel.
7154  * Assumption: further callbacks are disabled at this time.
7155  */
7156 static void
7157 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
7158 {
7159 	clock_t	tm;
7160 	clock_t	wt;
7161 	clock_t	rv;
7162 
7163 	/*
7164 	 * If there is data in ldc rx queue, wait until the rx
7165 	 * worker thread runs and drains all msgs in the queue.
7166 	 */
7167 	wt = drv_usectohz(MILLISEC);
7168 
7169 	mutex_enter(&ldcp->rcv_thr_lock);
7170 
7171 	tm = ddi_get_lbolt() + wt;
7172 
7173 	/*
7174 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
7175 	 * If DATARCVD is set, that means the callback has signalled the worker
7176 	 * thread, but the worker hasn't started processing yet. If PROCESSING
7177 	 * is set, that means the thread is awake and processing. Note that the
7178 	 * DATARCVD state can only be seen once, as the assumption is that
7179 	 * further callbacks have been disabled at this point.
7180 	 */
7181 	while (ldcp->rcv_thr_flags &
7182 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
7183 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
7184 		if (rv == -1) {	/* timeout */
7185 			/*
7186 			 * Note that the only way we return is due to a timeout;
7187 			 * we set the new time to wait, before we go back and
7188 			 * check the condition. The other(unlikely) possibility
7189 			 * is a premature wakeup(see cv_timedwait(9F)) in which
7190 			 * case we just continue to use the same time to wait.
7191 			 */
7192 			tm = ddi_get_lbolt() + wt;
7193 		}
7194 	}
7195 
7196 	mutex_exit(&ldcp->rcv_thr_lock);
7197 }
7198 
7199 /*
7200  * vgen_dds_rx -- post DDS messages to vnet.
7201  */
7202 static int
7203 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
7204 {
7205 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
7206 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7207 
7208 	if (dmsg->dds_class != DDS_VNET_NIU) {
7209 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
7210 		return (EBADMSG);
7211 	}
7212 	vnet_dds_rx(vgenp->vnetp, dmsg);
7213 	return (0);
7214 }
7215 
7216 /*
7217  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
7218  */
7219 int
7220 vgen_dds_tx(void *arg, void *msg)
7221 {
7222 	vgen_t *vgenp = arg;
7223 	vio_dds_msg_t *dmsg = msg;
7224 	vgen_portlist_t *plistp = &vgenp->vgenports;
7225 	vgen_ldc_t *ldcp;
7226 	vgen_ldclist_t *ldclp;
7227 	int rv = EIO;
7228 
7229 
7230 	READ_ENTER(&plistp->rwlock);
7231 	ldclp = &(vgenp->vsw_portp->ldclist);
7232 	READ_ENTER(&ldclp->rwlock);
7233 	ldcp = ldclp->headp;
7234 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
7235 		goto vgen_dsend_exit;
7236 	}
7237 
7238 	dmsg->tag.vio_sid = ldcp->local_sid;
7239 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
7240 	if (rv != VGEN_SUCCESS) {
7241 		rv = EIO;
7242 	} else {
7243 		rv = 0;
7244 	}
7245 
7246 vgen_dsend_exit:
7247 	RW_EXIT(&ldclp->rwlock);
7248 	RW_EXIT(&plistp->rwlock);
7249 	return (rv);
7250 
7251 }
7252 
7253 static void
7254 vgen_ldc_reset(vgen_ldc_t *ldcp)
7255 {
7256 	vnet_t	*vnetp = LDC_TO_VNET(ldcp);
7257 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
7258 
7259 	ASSERT(MUTEX_HELD(&ldcp->cblock));
7260 
7261 	if (ldcp->need_ldc_reset == B_TRUE) {
7262 		/* another thread is already in the process of resetting */
7263 		return;
7264 	}
7265 
7266 	/* Set the flag to indicate reset is in progress */
7267 	ldcp->need_ldc_reset = B_TRUE;
7268 
7269 	if (ldcp->portp == vgenp->vsw_portp) {
7270 		mutex_exit(&ldcp->cblock);
7271 		/*
7272 		 * Now cleanup any HIO resources; the above flag also tells
7273 		 * the code that handles dds messages to drop any new msgs
7274 		 * that arrive while we are cleaning up and resetting the
7275 		 * channel.
7276 		 */
7277 		vnet_dds_cleanup_hio(vnetp);
7278 		mutex_enter(&ldcp->cblock);
7279 	}
7280 
7281 	vgen_handshake_retry(ldcp);
7282 }
7283 
7284 int
7285 vgen_enable_intr(void *arg)
7286 {
7287 	vgen_port_t		*portp = (vgen_port_t *)arg;
7288 	vgen_ldclist_t		*ldclp;
7289 	vgen_ldc_t		*ldcp;
7290 
7291 	ldclp = &portp->ldclist;
7292 	READ_ENTER(&ldclp->rwlock);
7293 	/*
7294 	 * NOTE: for now, we will assume we have a single channel.
7295 	 */
7296 	if (ldclp->headp == NULL) {
7297 		RW_EXIT(&ldclp->rwlock);
7298 		return (1);
7299 	}
7300 	ldcp = ldclp->headp;
7301 
7302 	mutex_enter(&ldcp->pollq_lock);
7303 	ldcp->polling_on = B_FALSE;
7304 	mutex_exit(&ldcp->pollq_lock);
7305 
7306 	RW_EXIT(&ldclp->rwlock);
7307 
7308 	return (0);
7309 }
7310 
7311 int
7312 vgen_disable_intr(void *arg)
7313 {
7314 	vgen_port_t		*portp = (vgen_port_t *)arg;
7315 	vgen_ldclist_t		*ldclp;
7316 	vgen_ldc_t		*ldcp;
7317 
7318 	ldclp = &portp->ldclist;
7319 	READ_ENTER(&ldclp->rwlock);
7320 	/*
7321 	 * NOTE: for now, we will assume we have a single channel.
7322 	 */
7323 	if (ldclp->headp == NULL) {
7324 		RW_EXIT(&ldclp->rwlock);
7325 		return (1);
7326 	}
7327 	ldcp = ldclp->headp;
7328 
7329 
7330 	mutex_enter(&ldcp->pollq_lock);
7331 	ldcp->polling_on = B_TRUE;
7332 	mutex_exit(&ldcp->pollq_lock);
7333 
7334 	RW_EXIT(&ldclp->rwlock);
7335 
7336 	return (0);
7337 }
7338 
7339 mblk_t *
7340 vgen_poll(void *arg, int bytes_to_pickup)
7341 {
7342 	vgen_port_t		*portp = (vgen_port_t *)arg;
7343 	vgen_ldclist_t		*ldclp;
7344 	vgen_ldc_t		*ldcp;
7345 	mblk_t			*mp = NULL;
7346 
7347 	ldclp = &portp->ldclist;
7348 	READ_ENTER(&ldclp->rwlock);
7349 	/*
7350 	 * NOTE: for now, we will assume we have a single channel.
7351 	 */
7352 	if (ldclp->headp == NULL) {
7353 		RW_EXIT(&ldclp->rwlock);
7354 		return (NULL);
7355 	}
7356 	ldcp = ldclp->headp;
7357 
7358 	mp = vgen_ldc_poll(ldcp, bytes_to_pickup);
7359 
7360 	RW_EXIT(&ldclp->rwlock);
7361 	return (mp);
7362 }
7363 
7364 static mblk_t *
7365 vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup)
7366 {
7367 	mblk_t	*bp = NULL;
7368 	mblk_t	*bpt = NULL;
7369 	mblk_t	*mp = NULL;
7370 	size_t	mblk_sz = 0;
7371 	size_t	sz = 0;
7372 	uint_t	count = 0;
7373 
7374 	mutex_enter(&ldcp->pollq_lock);
7375 
7376 	bp = ldcp->pollq_headp;
7377 	while (bp != NULL) {
7378 		/* get the size of this packet */
7379 		mblk_sz = msgdsize(bp);
7380 
7381 		/* if adding this pkt, exceeds the size limit, we are done. */
7382 		if (sz + mblk_sz >  bytes_to_pickup) {
7383 			break;
7384 		}
7385 
7386 		/* we have room for this packet */
7387 		sz += mblk_sz;
7388 
7389 		/* increment the # of packets being sent up */
7390 		count++;
7391 
7392 		/* track the last processed pkt */
7393 		bpt = bp;
7394 
7395 		/* get the next pkt */
7396 		bp = bp->b_next;
7397 	}
7398 
7399 	if (count != 0) {
7400 		/*
7401 		 * picked up some packets; save the head of pkts to be sent up.
7402 		 */
7403 		mp = ldcp->pollq_headp;
7404 
7405 		/* move the pollq_headp to skip over the pkts being sent up */
7406 		ldcp->pollq_headp = bp;
7407 
7408 		/* picked up all pending pkts in the queue; reset tail also */
7409 		if (ldcp->pollq_headp == NULL) {
7410 			ldcp->pollq_tailp = NULL;
7411 		}
7412 
7413 		/* terminate the tail of pkts to be sent up */
7414 		bpt->b_next = NULL;
7415 	}
7416 
7417 	mutex_exit(&ldcp->pollq_lock);
7418 
7419 	DTRACE_PROBE1(vgen_poll_pkts, uint_t, count);
7420 	return (mp);
7421 }
7422 
7423 #if DEBUG
7424 
7425 /*
7426  * Print debug messages - set to 0xf to enable all msgs
7427  */
7428 static void
7429 debug_printf(const char *fname, vgen_t *vgenp,
7430     vgen_ldc_t *ldcp, const char *fmt, ...)
7431 {
7432 	char    buf[256];
7433 	char    *bufp = buf;
7434 	va_list ap;
7435 
7436 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
7437 		(void) sprintf(bufp, "vnet%d:",
7438 		    ((vnet_t *)(vgenp->vnetp))->instance);
7439 		bufp += strlen(bufp);
7440 	}
7441 	if (ldcp != NULL) {
7442 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
7443 		bufp += strlen(bufp);
7444 	}
7445 	(void) sprintf(bufp, "%s: ", fname);
7446 	bufp += strlen(bufp);
7447 
7448 	va_start(ap, fmt);
7449 	(void) vsprintf(bufp, fmt, ap);
7450 	va_end(ap);
7451 
7452 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
7453 	    (vgendbg_ldcid == ldcp->ldc_id)) {
7454 		cmn_err(CE_CONT, "%s\n", buf);
7455 	}
7456 }
7457 #endif
7458 
7459 #ifdef	VNET_IOC_DEBUG
7460 
7461 static void
7462 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7463 {
7464 	struct iocblk	*iocp;
7465 	vgen_port_t	*portp;
7466 	enum		ioc_reply {
7467 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
7468 			IOC_ACK			/* OK, just send ACK    */
7469 	}		status;
7470 	int		rv;
7471 
7472 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
7473 	iocp->ioc_error = 0;
7474 	portp = (vgen_port_t *)arg;
7475 
7476 	if (portp == NULL) {
7477 		status = IOC_INVAL;
7478 		goto vgen_ioc_exit;
7479 	}
7480 
7481 	mutex_enter(&portp->lock);
7482 
7483 	switch (iocp->ioc_cmd) {
7484 
7485 	case VNET_FORCE_LINK_DOWN:
7486 	case VNET_FORCE_LINK_UP:
7487 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
7488 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
7489 		break;
7490 
7491 	default:
7492 		status = IOC_INVAL;
7493 		break;
7494 
7495 	}
7496 
7497 	mutex_exit(&portp->lock);
7498 
7499 vgen_ioc_exit:
7500 
7501 	switch (status) {
7502 	default:
7503 	case IOC_INVAL:
7504 		/* Error, reply with a NAK and EINVAL error */
7505 		miocnak(q, mp, 0, EINVAL);
7506 		break;
7507 	case IOC_ACK:
7508 		/* OK, reply with an ACK */
7509 		miocack(q, mp, 0, 0);
7510 		break;
7511 	}
7512 }
7513 
7514 static int
7515 vgen_force_link_state(vgen_port_t *portp, int cmd)
7516 {
7517 	ldc_status_t	istatus;
7518 	vgen_ldclist_t	*ldclp;
7519 	vgen_ldc_t	*ldcp;
7520 	vgen_t		*vgenp = portp->vgenp;
7521 	int		rv;
7522 
7523 	ldclp = &portp->ldclist;
7524 	READ_ENTER(&ldclp->rwlock);
7525 
7526 	/*
7527 	 * NOTE: for now, we will assume we have a single channel.
7528 	 */
7529 	if (ldclp->headp == NULL) {
7530 		RW_EXIT(&ldclp->rwlock);
7531 		return (1);
7532 	}
7533 	ldcp = ldclp->headp;
7534 	mutex_enter(&ldcp->cblock);
7535 
7536 	switch (cmd) {
7537 
7538 	case VNET_FORCE_LINK_DOWN:
7539 		(void) ldc_down(ldcp->ldc_handle);
7540 		ldcp->link_down_forced = B_TRUE;
7541 		break;
7542 
7543 	case VNET_FORCE_LINK_UP:
7544 		rv = ldc_up(ldcp->ldc_handle);
7545 		if (rv != 0) {
7546 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
7547 		}
7548 		ldcp->link_down_forced = B_FALSE;
7549 
7550 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
7551 			DWARN(vgenp, ldcp, "ldc_status err\n");
7552 		} else {
7553 			ldcp->ldc_status = istatus;
7554 		}
7555 
7556 		/* if channel is already UP - restart handshake */
7557 		if (ldcp->ldc_status == LDC_UP) {
7558 			vgen_handle_evt_up(ldcp);
7559 		}
7560 		break;
7561 
7562 	}
7563 
7564 	mutex_exit(&ldcp->cblock);
7565 	RW_EXIT(&ldclp->rwlock);
7566 
7567 	return (0);
7568 }
7569 
7570 #else
7571 
7572 static void
7573 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7574 {
7575 	vgen_port_t	*portp;
7576 
7577 	portp = (vgen_port_t *)arg;
7578 
7579 	if (portp == NULL) {
7580 		miocnak(q, mp, 0, EINVAL);
7581 		return;
7582 	}
7583 
7584 	miocnak(q, mp, 0, ENOTSUP);
7585 }
7586 
7587 #endif
7588