xref: /illumos-gate/usr/src/uts/sun4v/io/vnet_gen.c (revision de81e71e031139a0a7f13b7bf64152c3faa76698)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 void vgen_uninit(void *arg);
77 int vgen_dds_tx(void *arg, void *dmsg);
78 void vgen_mod_init(void);
79 int vgen_mod_cleanup(void);
80 void vgen_mod_fini(void);
81 static int vgen_start(void *arg);
82 static void vgen_stop(void *arg);
83 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
84 static int vgen_multicst(void *arg, boolean_t add,
85 	const uint8_t *mca);
86 static int vgen_promisc(void *arg, boolean_t on);
87 static int vgen_unicst(void *arg, const uint8_t *mca);
88 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
89 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
90 #ifdef	VNET_IOC_DEBUG
91 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
92 #endif
93 
94 /* vgen internal functions */
95 static int vgen_read_mdprops(vgen_t *vgenp);
96 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
97 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
98 	mde_cookie_t node);
99 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
100 	uint32_t *mtu);
101 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
102 	boolean_t *pls);
103 static void vgen_detach_ports(vgen_t *vgenp);
104 static void vgen_port_detach(vgen_port_t *portp);
105 static void vgen_port_list_insert(vgen_port_t *portp);
106 static void vgen_port_list_remove(vgen_port_t *portp);
107 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
108 	int port_num);
109 static int vgen_mdeg_reg(vgen_t *vgenp);
110 static void vgen_mdeg_unreg(vgen_t *vgenp);
111 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
112 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
113 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
114 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
115 	mde_cookie_t mdex);
116 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
117 static int vgen_port_attach(vgen_port_t *portp);
118 static void vgen_port_detach_mdeg(vgen_port_t *portp);
119 static void vgen_port_detach_mdeg(vgen_port_t *portp);
120 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
121 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
122 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
123 static void vgen_port_reset(vgen_port_t *portp);
124 static void vgen_reset_vsw_port(vgen_t *vgenp);
125 
126 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
127 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
128 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
129 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
130 static void vgen_init_ports(vgen_t *vgenp);
131 static void vgen_port_init(vgen_port_t *portp);
132 static void vgen_uninit_ports(vgen_t *vgenp);
133 static void vgen_port_uninit(vgen_port_t *portp);
134 static void vgen_init_ldcs(vgen_port_t *portp);
135 static void vgen_uninit_ldcs(vgen_port_t *portp);
136 static int vgen_ldc_init(vgen_ldc_t *ldcp);
137 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
138 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
139 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
140 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
141 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
142 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
143 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
144 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
145 static int vgen_ldcsend(void *arg, mblk_t *mp);
146 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
147 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
148 static void vgen_reclaim(vgen_ldc_t *ldcp);
149 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
150 static int vgen_num_txpending(vgen_ldc_t *ldcp);
151 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
152 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
153 static void vgen_ldc_watchdog(void *arg);
154 
155 /* vgen handshake functions */
156 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
157 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
158 	boolean_t caller_holds_lock);
159 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
160 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
161 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
162 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
163 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
164 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
165 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
166 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
167 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
168 static void vgen_handshake(vgen_ldc_t *ldcp);
169 static int vgen_handshake_done(vgen_ldc_t *ldcp);
170 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
171 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
172 	vio_msg_tag_t *tagp);
173 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
179 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
180 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
182 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
184 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
185 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
186 	uint32_t start, int32_t end, uint8_t pstate);
187 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
188 	uint32_t msglen);
189 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
190 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
191 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
192 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
193 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
194 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
195 static void vgen_hwatchdog(void *arg);
196 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
197 static void vgen_print_hparams(vgen_hparams_t *hp);
198 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
199 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
200 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
201 static void vgen_ldc_rcv_worker(void *arg);
202 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
203 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
204 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
205 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
206 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
207 
208 /* VLAN routines */
209 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
210 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
211 	uint16_t *nvidsp, uint16_t *default_idp);
212 static void vgen_vlan_create_hash(vgen_port_t *portp);
213 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
214 static void vgen_vlan_add_ids(vgen_port_t *portp);
215 static void vgen_vlan_remove_ids(vgen_port_t *portp);
216 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
217 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
218 	uint16_t *vidp);
219 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
220 	boolean_t is_tagged, uint16_t vid);
221 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
222 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
223 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
224 
225 /* externs */
226 extern void vnet_dds_rx(void *arg, void *dmsg);
227 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
228 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
229 
230 /*
231  * The handshake process consists of 5 phases defined below, with VH_PHASE0
232  * being the pre-handshake phase and VH_DONE is the phase to indicate
233  * successful completion of all phases.
234  * Each phase may have one to several handshake states which are required
235  * to complete successfully to move to the next phase.
236  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
237  * more details.
238  */
239 /* handshake phases */
240 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
241 
242 /* handshake states */
243 enum {
244 
245 	VER_INFO_SENT	=	0x1,
246 	VER_ACK_RCVD	=	0x2,
247 	VER_INFO_RCVD	=	0x4,
248 	VER_ACK_SENT	=	0x8,
249 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
250 
251 	ATTR_INFO_SENT	=	0x10,
252 	ATTR_ACK_RCVD	=	0x20,
253 	ATTR_INFO_RCVD	=	0x40,
254 	ATTR_ACK_SENT	=	0x80,
255 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
256 
257 	DRING_INFO_SENT	=	0x100,
258 	DRING_ACK_RCVD	=	0x200,
259 	DRING_INFO_RCVD	=	0x400,
260 	DRING_ACK_SENT	=	0x800,
261 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
262 
263 	RDX_INFO_SENT	=	0x1000,
264 	RDX_ACK_RCVD	=	0x2000,
265 	RDX_INFO_RCVD	=	0x4000,
266 	RDX_ACK_SENT	=	0x8000,
267 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
268 
269 };
270 
271 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
272 
273 #define	LDC_LOCK(ldcp)	\
274 				mutex_enter(&((ldcp)->cblock));\
275 				mutex_enter(&((ldcp)->rxlock));\
276 				mutex_enter(&((ldcp)->wrlock));\
277 				mutex_enter(&((ldcp)->txlock));\
278 				mutex_enter(&((ldcp)->tclock));
279 #define	LDC_UNLOCK(ldcp)	\
280 				mutex_exit(&((ldcp)->tclock));\
281 				mutex_exit(&((ldcp)->txlock));\
282 				mutex_exit(&((ldcp)->wrlock));\
283 				mutex_exit(&((ldcp)->rxlock));\
284 				mutex_exit(&((ldcp)->cblock));
285 
286 #define	VGEN_VER_EQ(ldcp, major, minor)	\
287 	((ldcp)->local_hparams.ver_major == (major) &&	\
288 	    (ldcp)->local_hparams.ver_minor == (minor))
289 
290 #define	VGEN_VER_LT(ldcp, major, minor)	\
291 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
292 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
293 	    (ldcp)->local_hparams.ver_minor < (minor)))
294 
295 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
296 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
297 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
298 	    (ldcp)->local_hparams.ver_minor >= (minor)))
299 
300 static struct ether_addr etherbroadcastaddr = {
301 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
302 };
303 /*
304  * MIB II broadcast/multicast packets
305  */
306 #define	IS_BROADCAST(ehp) \
307 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
308 #define	IS_MULTICAST(ehp) \
309 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
310 
311 /*
312  * Property names
313  */
314 static char macaddr_propname[] = "mac-address";
315 static char rmacaddr_propname[] = "remote-mac-address";
316 static char channel_propname[] = "channel-endpoint";
317 static char reg_propname[] = "reg";
318 static char port_propname[] = "port";
319 static char swport_propname[] = "switch-port";
320 static char id_propname[] = "id";
321 static char vdev_propname[] = "virtual-device";
322 static char vnet_propname[] = "network";
323 static char pri_types_propname[] = "priority-ether-types";
324 static char vgen_pvid_propname[] = "port-vlan-id";
325 static char vgen_vid_propname[] = "vlan-id";
326 static char vgen_dvid_propname[] = "default-vlan-id";
327 static char port_pvid_propname[] = "remote-port-vlan-id";
328 static char port_vid_propname[] = "remote-vlan-id";
329 static char vgen_mtu_propname[] = "mtu";
330 static char vgen_linkprop_propname[] = "linkprop";
331 
332 /*
333  * VIO Protocol Version Info:
334  *
335  * The version specified below represents the version of protocol currently
336  * supported in the driver. It means the driver can negotiate with peers with
337  * versions <= this version. Here is a summary of the feature(s) that are
338  * supported at each version of the protocol:
339  *
340  * 1.0			Basic VIO protocol.
341  * 1.1			vDisk protocol update (no virtual network update).
342  * 1.2			Support for priority frames (priority-ether-types).
343  * 1.3			VLAN and HybridIO support.
344  * 1.4			Jumbo Frame support.
345  * 1.5			Link State Notification support with optional support
346  * 			for Physical Link information.
347  */
348 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 5} };
349 
350 /* Tunables */
351 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
352 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
353 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
354 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
355 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
356 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
357 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
358 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
359 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
360 
361 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
362 
363 static vio_mblk_pool_t	*vgen_rx_poolp = NULL;
364 static krwlock_t	vgen_rw;
365 
366 /*
367  * max # of packets accumulated prior to sending them up. It is best
368  * to keep this at 60% of the number of recieve buffers.
369  */
370 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
371 
372 /*
373  * Internal tunables for receive buffer pools, that is,  the size and number of
374  * mblks for each pool. At least 3 sizes must be specified if these are used.
375  * The sizes must be specified in increasing order. Non-zero value of the first
376  * size will be used as a hint to use these values instead of the algorithm
377  * that determines the sizes based on MTU.
378  */
379 uint32_t vgen_rbufsz1 = 0;
380 uint32_t vgen_rbufsz2 = 0;
381 uint32_t vgen_rbufsz3 = 0;
382 uint32_t vgen_rbufsz4 = 0;
383 
384 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
385 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
386 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
387 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
388 
389 /*
390  * In the absence of "priority-ether-types" property in MD, the following
391  * internal tunable can be set to specify a single priority ethertype.
392  */
393 uint64_t vgen_pri_eth_type = 0;
394 
395 /*
396  * Number of transmit priority buffers that are preallocated per device.
397  * This number is chosen to be a small value to throttle transmission
398  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
399  */
400 uint32_t vgen_pri_tx_nmblks = 64;
401 
402 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
403 
404 #ifdef DEBUG
405 /* flags to simulate error conditions for debugging */
406 int vgen_trigger_txtimeout = 0;
407 int vgen_trigger_rxlost = 0;
408 #endif
409 
410 /*
411  * Matching criteria passed to the MDEG to register interest
412  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
413  * by their 'name' and 'cfg-handle' properties.
414  */
415 static md_prop_match_t vdev_prop_match[] = {
416 	{ MDET_PROP_STR,    "name"   },
417 	{ MDET_PROP_VAL,    "cfg-handle" },
418 	{ MDET_LIST_END,    NULL    }
419 };
420 
421 static mdeg_node_match_t vdev_match = { "virtual-device",
422 						vdev_prop_match };
423 
424 /* MD update matching structure */
425 static md_prop_match_t	vport_prop_match[] = {
426 	{ MDET_PROP_VAL,	"id" },
427 	{ MDET_LIST_END,	NULL }
428 };
429 
430 static mdeg_node_match_t vport_match = { "virtual-device-port",
431 					vport_prop_match };
432 
433 /* template for matching a particular vnet instance */
434 static mdeg_prop_spec_t vgen_prop_template[] = {
435 	{ MDET_PROP_STR,	"name",		"network" },
436 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
437 	{ MDET_LIST_END,	NULL,		NULL }
438 };
439 
440 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
441 
442 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
443 
444 #ifdef	VNET_IOC_DEBUG
445 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
446 #else
447 #define	VGEN_M_CALLBACK_FLAGS	(0)
448 #endif
449 
450 static mac_callbacks_t vgen_m_callbacks = {
451 	VGEN_M_CALLBACK_FLAGS,
452 	vgen_stat,
453 	vgen_start,
454 	vgen_stop,
455 	vgen_promisc,
456 	vgen_multicst,
457 	vgen_unicst,
458 	vgen_tx,
459 	vgen_ioctl,
460 	NULL,
461 	NULL
462 };
463 
464 /* externs */
465 extern pri_t	maxclsyspri;
466 extern proc_t	p0;
467 extern uint32_t vnet_ntxds;
468 extern uint32_t vnet_ldcwd_interval;
469 extern uint32_t vnet_ldcwd_txtimeout;
470 extern uint32_t vnet_ldc_mtu;
471 extern uint32_t vnet_nrbufs;
472 extern uint32_t	vnet_ethermtu;
473 extern uint16_t	vnet_default_vlan_id;
474 extern boolean_t vnet_jumbo_rxpools;
475 
476 #ifdef DEBUG
477 
478 extern int vnet_dbglevel;
479 static void debug_printf(const char *fname, vgen_t *vgenp,
480 	vgen_ldc_t *ldcp, const char *fmt, ...);
481 
482 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
483 int vgendbg_ldcid = -1;
484 
485 /* simulate handshake error conditions for debug */
486 uint32_t vgen_hdbg;
487 #define	HDBG_VERSION	0x1
488 #define	HDBG_TIMEOUT	0x2
489 #define	HDBG_BAD_SID	0x4
490 #define	HDBG_OUT_STATE	0x8
491 
492 #endif
493 
494 /*
495  * vgen_init() is called by an instance of vnet driver to initialize the
496  * corresponding generic proxy transport layer. The arguments passed by vnet
497  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
498  * the mac address of the vnet device, and a pointer to vgen_t is passed
499  * back as a handle to vnet.
500  */
501 int
502 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
503     const uint8_t *macaddr, void **vgenhdl)
504 {
505 	vgen_t *vgenp;
506 	int instance;
507 	int rv;
508 
509 	if ((vnetp == NULL) || (vnetdip == NULL))
510 		return (DDI_FAILURE);
511 
512 	instance = ddi_get_instance(vnetdip);
513 
514 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
515 
516 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
517 
518 	vgenp->vnetp = vnetp;
519 	vgenp->instance = instance;
520 	vgenp->regprop = regprop;
521 	vgenp->vnetdip = vnetdip;
522 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
523 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
524 
525 	/* allocate multicast table */
526 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
527 	    sizeof (struct ether_addr), KM_SLEEP);
528 	vgenp->mccount = 0;
529 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
530 
531 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
532 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
533 
534 	rv = vgen_read_mdprops(vgenp);
535 	if (rv != 0) {
536 		goto vgen_init_fail;
537 	}
538 
539 	/* register with MD event generator */
540 	rv = vgen_mdeg_reg(vgenp);
541 	if (rv != DDI_SUCCESS) {
542 		goto vgen_init_fail;
543 	}
544 
545 	*vgenhdl = (void *)vgenp;
546 
547 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
548 	return (DDI_SUCCESS);
549 
550 vgen_init_fail:
551 	rw_destroy(&vgenp->vgenports.rwlock);
552 	mutex_destroy(&vgenp->lock);
553 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
554 	    sizeof (struct ether_addr));
555 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
556 		kmem_free(vgenp->pri_types,
557 		    sizeof (uint16_t) * vgenp->pri_num_types);
558 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
559 	}
560 	KMEM_FREE(vgenp);
561 	return (DDI_FAILURE);
562 }
563 
564 /*
565  * Called by vnet to undo the initializations done by vgen_init().
566  * The handle provided by generic transport during vgen_init() is the argument.
567  */
568 void
569 vgen_uninit(void *arg)
570 {
571 	vgen_t		*vgenp = (vgen_t *)arg;
572 	vio_mblk_pool_t	*rp;
573 	vio_mblk_pool_t	*nrp;
574 
575 	if (vgenp == NULL) {
576 		return;
577 	}
578 
579 	DBG1(vgenp, NULL, "enter\n");
580 
581 	/* unregister with MD event generator */
582 	vgen_mdeg_unreg(vgenp);
583 
584 	mutex_enter(&vgenp->lock);
585 
586 	/* detach all ports from the device */
587 	vgen_detach_ports(vgenp);
588 
589 	/*
590 	 * free any pending rx mblk pools,
591 	 * that couldn't be freed previously during channel detach.
592 	 */
593 	rp = vgenp->rmp;
594 	while (rp != NULL) {
595 		nrp = vgenp->rmp = rp->nextp;
596 		if (vio_destroy_mblks(rp)) {
597 			WRITE_ENTER(&vgen_rw);
598 			rp->nextp = vgen_rx_poolp;
599 			vgen_rx_poolp = rp;
600 			RW_EXIT(&vgen_rw);
601 		}
602 		rp = nrp;
603 	}
604 
605 	/* free multicast table */
606 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
607 
608 	/* free pri_types table */
609 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
610 		kmem_free(vgenp->pri_types,
611 		    sizeof (uint16_t) * vgenp->pri_num_types);
612 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
613 	}
614 
615 	mutex_exit(&vgenp->lock);
616 
617 	rw_destroy(&vgenp->vgenports.rwlock);
618 	mutex_destroy(&vgenp->lock);
619 
620 	DBG1(vgenp, NULL, "exit\n");
621 	KMEM_FREE(vgenp);
622 }
623 
624 /*
625  * module specific initialization common to all instances of vnet/vgen.
626  */
627 void
628 vgen_mod_init(void)
629 {
630 	rw_init(&vgen_rw, NULL, RW_DRIVER, NULL);
631 }
632 
633 /*
634  * module specific cleanup common to all instances of vnet/vgen.
635  */
636 int
637 vgen_mod_cleanup(void)
638 {
639 	vio_mblk_pool_t	*poolp, *npoolp;
640 
641 	/*
642 	 * If any rx mblk pools are still in use, return
643 	 * error and stop the module from unloading.
644 	 */
645 	WRITE_ENTER(&vgen_rw);
646 	poolp = vgen_rx_poolp;
647 	while (poolp != NULL) {
648 		npoolp = vgen_rx_poolp = poolp->nextp;
649 		if (vio_destroy_mblks(poolp) != 0) {
650 			vgen_rx_poolp = poolp;
651 			RW_EXIT(&vgen_rw);
652 			return (EBUSY);
653 		}
654 		poolp = npoolp;
655 	}
656 	RW_EXIT(&vgen_rw);
657 
658 	return (0);
659 }
660 
661 /*
662  * module specific uninitialization common to all instances of vnet/vgen.
663  */
664 void
665 vgen_mod_fini(void)
666 {
667 	rw_destroy(&vgen_rw);
668 }
669 
670 /* enable transmit/receive for the device */
671 int
672 vgen_start(void *arg)
673 {
674 	vgen_port_t	*portp = (vgen_port_t *)arg;
675 	vgen_t		*vgenp = portp->vgenp;
676 
677 	DBG1(vgenp, NULL, "enter\n");
678 	mutex_enter(&portp->lock);
679 	vgen_port_init(portp);
680 	portp->flags |= VGEN_STARTED;
681 	mutex_exit(&portp->lock);
682 	DBG1(vgenp, NULL, "exit\n");
683 
684 	return (DDI_SUCCESS);
685 }
686 
687 /* stop transmit/receive */
688 void
689 vgen_stop(void *arg)
690 {
691 	vgen_port_t	*portp = (vgen_port_t *)arg;
692 	vgen_t		*vgenp = portp->vgenp;
693 
694 	DBG1(vgenp, NULL, "enter\n");
695 
696 	mutex_enter(&portp->lock);
697 	vgen_port_uninit(portp);
698 	portp->flags &= ~(VGEN_STARTED);
699 	mutex_exit(&portp->lock);
700 	DBG1(vgenp, NULL, "exit\n");
701 
702 }
703 
704 /* vgen transmit function */
705 static mblk_t *
706 vgen_tx(void *arg, mblk_t *mp)
707 {
708 	int i;
709 	vgen_port_t *portp;
710 	int status = VGEN_FAILURE;
711 
712 	portp = (vgen_port_t *)arg;
713 	/*
714 	 * Retry so that we avoid reporting a failure
715 	 * to the upper layer. Returning a failure may cause the
716 	 * upper layer to go into single threaded mode there by
717 	 * causing performance degradation, especially for a large
718 	 * number of connections.
719 	 */
720 	for (i = 0; i < vgen_tx_retries; ) {
721 		status = vgen_portsend(portp, mp);
722 		if (status == VGEN_SUCCESS) {
723 			break;
724 		}
725 		if (++i < vgen_tx_retries)
726 			delay(drv_usectohz(vgen_tx_delay));
727 	}
728 	if (status != VGEN_SUCCESS) {
729 		/* failure */
730 		return (mp);
731 	}
732 	/* success */
733 	return (NULL);
734 }
735 
736 /*
737  * This function provides any necessary tagging/untagging of the frames
738  * that are being transmitted over the port. It first verifies the vlan
739  * membership of the destination(port) and drops the packet if the
740  * destination doesn't belong to the given vlan.
741  *
742  * Arguments:
743  *   portp:     port over which the frames should be transmitted
744  *   mp:        frame to be transmitted
745  *   is_tagged:
746  *              B_TRUE: indicates frame header contains the vlan tag already.
747  *              B_FALSE: indicates frame is untagged.
748  *   vid:       vlan in which the frame should be transmitted.
749  *
750  * Returns:
751  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
752  *              Failure: NULL
753  */
754 static mblk_t *
755 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
756 	uint16_t vid)
757 {
758 	vgen_t				*vgenp;
759 	boolean_t			dst_tagged;
760 	int				rv;
761 
762 	vgenp = portp->vgenp;
763 
764 	/*
765 	 * If the packet is going to a vnet:
766 	 *   Check if the destination vnet is in the same vlan.
767 	 *   Check the frame header if tag or untag is needed.
768 	 *
769 	 * We do not check the above conditions if the packet is going to vsw:
770 	 *   vsw must be present implicitly in all the vlans that a vnet device
771 	 *   is configured into; even if vsw itself is not assigned to those
772 	 *   vlans as an interface. For instance, the packet might be destined
773 	 *   to another vnet(indirectly through vsw) or to an external host
774 	 *   which is in the same vlan as this vnet and vsw itself may not be
775 	 *   present in that vlan. Similarly packets going to vsw must be
776 	 *   always tagged(unless in the default-vlan) if not already tagged,
777 	 *   as we do not know the final destination. This is needed because
778 	 *   vsw must always invoke its switching function only after tagging
779 	 *   the packet; otherwise after switching function determines the
780 	 *   destination we cannot figure out if the destination belongs to the
781 	 *   the same vlan that the frame originated from and if it needs tag/
782 	 *   untag. Note that vsw will tag the packet itself when it receives
783 	 *   it over the channel from a client if needed. However, that is
784 	 *   needed only in the case of vlan unaware clients such as obp or
785 	 *   earlier versions of vnet.
786 	 *
787 	 */
788 	if (portp != vgenp->vsw_portp) {
789 		/*
790 		 * Packet going to a vnet. Check if the destination vnet is in
791 		 * the same vlan. Then check the frame header if tag/untag is
792 		 * needed.
793 		 */
794 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
795 		if (rv == B_FALSE) {
796 			/* drop the packet */
797 			freemsg(mp);
798 			return (NULL);
799 		}
800 
801 		/* is the destination tagged or untagged in this vlan? */
802 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
803 		    (dst_tagged = B_TRUE);
804 
805 		if (is_tagged == dst_tagged) {
806 			/* no tagging/untagging needed */
807 			return (mp);
808 		}
809 
810 		if (is_tagged == B_TRUE) {
811 			/* frame is tagged; destination needs untagged */
812 			mp = vnet_vlan_remove_tag(mp);
813 			return (mp);
814 		}
815 
816 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
817 	}
818 
819 	/*
820 	 * Packet going to a vnet needs tagging.
821 	 * OR
822 	 * If the packet is going to vsw, then it must be tagged in all cases:
823 	 * unknown unicast, broadcast/multicast or to vsw interface.
824 	 */
825 
826 	if (is_tagged == B_FALSE) {
827 		mp = vnet_vlan_insert_tag(mp, vid);
828 	}
829 
830 	return (mp);
831 }
832 
833 /* transmit packets over the given port */
834 static int
835 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
836 {
837 	vgen_ldclist_t		*ldclp;
838 	vgen_ldc_t		*ldcp;
839 	int			status;
840 	int			rv = VGEN_SUCCESS;
841 	vgen_t			*vgenp = portp->vgenp;
842 	vnet_t			*vnetp = vgenp->vnetp;
843 	boolean_t		is_tagged;
844 	boolean_t		dec_refcnt = B_FALSE;
845 	uint16_t		vlan_id;
846 	struct ether_header	*ehp;
847 
848 	if (portp->use_vsw_port) {
849 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
850 		portp = portp->vgenp->vsw_portp;
851 		dec_refcnt = B_TRUE;
852 	}
853 	if (portp == NULL) {
854 		return (VGEN_FAILURE);
855 	}
856 
857 	/*
858 	 * Determine the vlan id that the frame belongs to.
859 	 */
860 	ehp = (struct ether_header *)mp->b_rptr;
861 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
862 
863 	if (vlan_id == vnetp->default_vlan_id) {
864 
865 		/* Frames in default vlan must be untagged */
866 		ASSERT(is_tagged == B_FALSE);
867 
868 		/*
869 		 * If the destination is a vnet-port verify it belongs to the
870 		 * default vlan; otherwise drop the packet. We do not need
871 		 * this check for vsw-port, as it should implicitly belong to
872 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
873 		 */
874 		if (portp != vgenp->vsw_portp &&
875 		    portp->pvid != vnetp->default_vlan_id) {
876 			freemsg(mp);
877 			goto portsend_ret;
878 		}
879 
880 	} else {	/* frame not in default-vlan */
881 
882 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
883 		if (mp == NULL) {
884 			goto portsend_ret;
885 		}
886 
887 	}
888 
889 	ldclp = &portp->ldclist;
890 	READ_ENTER(&ldclp->rwlock);
891 	/*
892 	 * NOTE: for now, we will assume we have a single channel.
893 	 */
894 	if (ldclp->headp == NULL) {
895 		RW_EXIT(&ldclp->rwlock);
896 		rv = VGEN_FAILURE;
897 		goto portsend_ret;
898 	}
899 	ldcp = ldclp->headp;
900 
901 	status = ldcp->tx(ldcp, mp);
902 
903 	RW_EXIT(&ldclp->rwlock);
904 
905 	if (status != VGEN_TX_SUCCESS) {
906 		rv = VGEN_FAILURE;
907 	}
908 
909 portsend_ret:
910 	if (dec_refcnt == B_TRUE) {
911 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
912 	}
913 	return (rv);
914 }
915 
916 /*
917  * Wrapper function to transmit normal and/or priority frames over the channel.
918  */
919 static int
920 vgen_ldcsend(void *arg, mblk_t *mp)
921 {
922 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
923 	int			status;
924 	struct ether_header	*ehp;
925 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
926 	uint32_t		num_types;
927 	uint16_t		*types;
928 	int			i;
929 
930 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
931 
932 	num_types = vgenp->pri_num_types;
933 	types = vgenp->pri_types;
934 	ehp = (struct ether_header *)mp->b_rptr;
935 
936 	for (i = 0; i < num_types; i++) {
937 
938 		if (ehp->ether_type == types[i]) {
939 			/* priority frame, use pri tx function */
940 			vgen_ldcsend_pkt(ldcp, mp);
941 			return (VGEN_SUCCESS);
942 		}
943 
944 	}
945 
946 	status  = vgen_ldcsend_dring(ldcp, mp);
947 
948 	return (status);
949 }
950 
951 /*
952  * This functions handles ldc channel reset while in the context
953  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
954  */
955 static void
956 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
957 {
958 	ldc_status_t	istatus;
959 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
960 
961 	if (mutex_tryenter(&ldcp->cblock)) {
962 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
963 			DWARN(vgenp, ldcp, "ldc_status() error\n");
964 		} else {
965 			ldcp->ldc_status = istatus;
966 		}
967 		if (ldcp->ldc_status != LDC_UP) {
968 			vgen_handle_evt_reset(ldcp);
969 		}
970 		mutex_exit(&ldcp->cblock);
971 	}
972 }
973 
974 /*
975  * This function transmits the frame in the payload of a raw data
976  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
977  * send special frames with high priorities, without going through
978  * the normal data path which uses descriptor ring mechanism.
979  */
980 static void
981 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
982 {
983 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
984 	vio_raw_data_msg_t	*pkt;
985 	mblk_t			*bp;
986 	mblk_t			*nmp = NULL;
987 	caddr_t			dst;
988 	uint32_t		mblksz;
989 	uint32_t		size;
990 	uint32_t		nbytes;
991 	int			rv;
992 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
993 	vgen_stats_t		*statsp = &ldcp->stats;
994 
995 	/* drop the packet if ldc is not up or handshake is not done */
996 	if (ldcp->ldc_status != LDC_UP) {
997 		(void) atomic_inc_32(&statsp->tx_pri_fail);
998 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
999 		    ldcp->ldc_status);
1000 		goto send_pkt_exit;
1001 	}
1002 
1003 	if (ldcp->hphase != VH_DONE) {
1004 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1005 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1006 		    ldcp->hphase);
1007 		goto send_pkt_exit;
1008 	}
1009 
1010 	size = msgsize(mp);
1011 
1012 	/* frame size bigger than available payload len of raw data msg ? */
1013 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
1014 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1015 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1016 		goto send_pkt_exit;
1017 	}
1018 
1019 	if (size < ETHERMIN)
1020 		size = ETHERMIN;
1021 
1022 	/* alloc space for a raw data message */
1023 	nmp = vio_allocb(vgenp->pri_tx_vmp);
1024 	if (nmp == NULL) {
1025 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1026 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
1027 		goto send_pkt_exit;
1028 	}
1029 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
1030 
1031 	/* copy frame into the payload of raw data message */
1032 	dst = (caddr_t)pkt->data;
1033 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1034 		mblksz = MBLKL(bp);
1035 		bcopy(bp->b_rptr, dst, mblksz);
1036 		dst += mblksz;
1037 	}
1038 
1039 	/* setup the raw data msg */
1040 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
1041 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
1042 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
1043 	pkt->tag.vio_sid = ldcp->local_sid;
1044 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
1045 
1046 	/* send the msg over ldc */
1047 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
1048 	if (rv != VGEN_SUCCESS) {
1049 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1050 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
1051 		if (rv == ECONNRESET) {
1052 			vgen_ldcsend_process_reset(ldcp);
1053 		}
1054 		goto send_pkt_exit;
1055 	}
1056 
1057 	/* update stats */
1058 	(void) atomic_inc_64(&statsp->tx_pri_packets);
1059 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
1060 
1061 send_pkt_exit:
1062 	if (nmp != NULL)
1063 		freemsg(nmp);
1064 	freemsg(mp);
1065 }
1066 
1067 /*
1068  * This function transmits normal (non-priority) data frames over
1069  * the channel. It queues the frame into the transmit descriptor ring
1070  * and sends a VIO_DRING_DATA message if needed, to wake up the
1071  * peer to (re)start processing.
1072  */
1073 static int
1074 vgen_ldcsend_dring(void *arg, mblk_t *mp)
1075 {
1076 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
1077 	vgen_private_desc_t	*tbufp;
1078 	vgen_private_desc_t	*rtbufp;
1079 	vnet_public_desc_t	*rtxdp;
1080 	vgen_private_desc_t	*ntbufp;
1081 	vnet_public_desc_t	*txdp;
1082 	vio_dring_entry_hdr_t	*hdrp;
1083 	vgen_stats_t		*statsp;
1084 	struct ether_header	*ehp;
1085 	boolean_t		is_bcast = B_FALSE;
1086 	boolean_t		is_mcast = B_FALSE;
1087 	size_t			mblksz;
1088 	caddr_t			dst;
1089 	mblk_t			*bp;
1090 	size_t			size;
1091 	int			rv = 0;
1092 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1093 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1094 
1095 	statsp = &ldcp->stats;
1096 	size = msgsize(mp);
1097 
1098 	DBG1(vgenp, ldcp, "enter\n");
1099 
1100 	if (ldcp->ldc_status != LDC_UP) {
1101 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1102 		    ldcp->ldc_status);
1103 		/* retry ldc_up() if needed */
1104 #ifdef	VNET_IOC_DEBUG
1105 		if (ldcp->flags & CHANNEL_STARTED && !ldcp->link_down_forced) {
1106 #else
1107 		if (ldcp->flags & CHANNEL_STARTED) {
1108 #endif
1109 			(void) ldc_up(ldcp->ldc_handle);
1110 		}
1111 		goto send_dring_exit;
1112 	}
1113 
1114 	/* drop the packet if ldc is not up or handshake is not done */
1115 	if (ldcp->hphase != VH_DONE) {
1116 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1117 		    ldcp->hphase);
1118 		goto send_dring_exit;
1119 	}
1120 
1121 	if (size > (size_t)lp->mtu) {
1122 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1123 		goto send_dring_exit;
1124 	}
1125 	if (size < ETHERMIN)
1126 		size = ETHERMIN;
1127 
1128 	ehp = (struct ether_header *)mp->b_rptr;
1129 	is_bcast = IS_BROADCAST(ehp);
1130 	is_mcast = IS_MULTICAST(ehp);
1131 
1132 	mutex_enter(&ldcp->txlock);
1133 	/*
1134 	 * allocate a descriptor
1135 	 */
1136 	tbufp = ldcp->next_tbufp;
1137 	ntbufp = NEXTTBUF(ldcp, tbufp);
1138 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1139 
1140 		mutex_enter(&ldcp->tclock);
1141 		/* Try reclaiming now */
1142 		vgen_reclaim_dring(ldcp);
1143 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1144 
1145 		if (ntbufp == ldcp->cur_tbufp) {
1146 			/* Now we are really out of tbuf/txds */
1147 			ldcp->need_resched = B_TRUE;
1148 			mutex_exit(&ldcp->tclock);
1149 
1150 			statsp->tx_no_desc++;
1151 			mutex_exit(&ldcp->txlock);
1152 
1153 			return (VGEN_TX_NORESOURCES);
1154 		}
1155 		mutex_exit(&ldcp->tclock);
1156 	}
1157 	/* update next available tbuf in the ring and update tx index */
1158 	ldcp->next_tbufp = ntbufp;
1159 	INCR_TXI(ldcp->next_txi, ldcp);
1160 
1161 	/* Mark the buffer busy before releasing the lock */
1162 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1163 	mutex_exit(&ldcp->txlock);
1164 
1165 	/* copy data into pre-allocated transmit buffer */
1166 	dst = tbufp->datap + VNET_IPALIGN;
1167 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1168 		mblksz = MBLKL(bp);
1169 		bcopy(bp->b_rptr, dst, mblksz);
1170 		dst += mblksz;
1171 	}
1172 
1173 	tbufp->datalen = size;
1174 
1175 	/* initialize the corresponding public descriptor (txd) */
1176 	txdp = tbufp->descp;
1177 	hdrp = &txdp->hdr;
1178 	txdp->nbytes = size;
1179 	txdp->ncookies = tbufp->ncookies;
1180 	bcopy((tbufp->memcookie), (txdp->memcookie),
1181 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1182 
1183 	mutex_enter(&ldcp->wrlock);
1184 	/*
1185 	 * If the flags not set to BUSY, it implies that the clobber
1186 	 * was done while we were copying the data. In such case,
1187 	 * discard the packet and return.
1188 	 */
1189 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1190 		statsp->oerrors++;
1191 		mutex_exit(&ldcp->wrlock);
1192 		goto send_dring_exit;
1193 	}
1194 	hdrp->dstate = VIO_DESC_READY;
1195 
1196 	/* update stats */
1197 	statsp->opackets++;
1198 	statsp->obytes += size;
1199 	if (is_bcast)
1200 		statsp->brdcstxmt++;
1201 	else if (is_mcast)
1202 		statsp->multixmt++;
1203 
1204 	/* send dring datamsg to the peer */
1205 	if (ldcp->resched_peer) {
1206 
1207 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1208 		rtxdp = rtbufp->descp;
1209 
1210 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1211 
1212 			rv = vgen_send_dring_data(ldcp,
1213 			    (uint32_t)ldcp->resched_peer_txi, -1);
1214 			if (rv != 0) {
1215 				/* error: drop the packet */
1216 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1217 				    "failed: rv(%d) len(%d)\n",
1218 				    ldcp->ldc_id, rv, size);
1219 				statsp->oerrors++;
1220 			} else {
1221 				ldcp->resched_peer = B_FALSE;
1222 			}
1223 
1224 		}
1225 
1226 	}
1227 
1228 	mutex_exit(&ldcp->wrlock);
1229 
1230 send_dring_exit:
1231 	if (rv == ECONNRESET) {
1232 		vgen_ldcsend_process_reset(ldcp);
1233 	}
1234 	freemsg(mp);
1235 	DBG1(vgenp, ldcp, "exit\n");
1236 	return (VGEN_TX_SUCCESS);
1237 }
1238 
1239 /* enable/disable a multicast address */
1240 int
1241 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1242 {
1243 	vgen_t			*vgenp;
1244 	vnet_mcast_msg_t	mcastmsg;
1245 	vio_msg_tag_t		*tagp;
1246 	vgen_port_t		*portp;
1247 	vgen_portlist_t		*plistp;
1248 	vgen_ldc_t		*ldcp;
1249 	vgen_ldclist_t		*ldclp;
1250 	struct ether_addr	*addrp;
1251 	int			rv = DDI_FAILURE;
1252 	uint32_t		i;
1253 
1254 	portp = (vgen_port_t *)arg;
1255 	vgenp = portp->vgenp;
1256 
1257 	if (portp != vgenp->vsw_portp) {
1258 		return (DDI_SUCCESS);
1259 	}
1260 
1261 	addrp = (struct ether_addr *)mca;
1262 	tagp = &mcastmsg.tag;
1263 	bzero(&mcastmsg, sizeof (mcastmsg));
1264 
1265 	mutex_enter(&vgenp->lock);
1266 
1267 	plistp = &(vgenp->vgenports);
1268 
1269 	READ_ENTER(&plistp->rwlock);
1270 
1271 	portp = vgenp->vsw_portp;
1272 	if (portp == NULL) {
1273 		RW_EXIT(&plistp->rwlock);
1274 		mutex_exit(&vgenp->lock);
1275 		return (rv);
1276 	}
1277 	ldclp = &portp->ldclist;
1278 
1279 	READ_ENTER(&ldclp->rwlock);
1280 
1281 	ldcp = ldclp->headp;
1282 	if (ldcp == NULL)
1283 		goto vgen_mcast_exit;
1284 
1285 	mutex_enter(&ldcp->cblock);
1286 
1287 	if (ldcp->hphase == VH_DONE) {
1288 		/*
1289 		 * If handshake is done, send a msg to vsw to add/remove
1290 		 * the multicast address. Otherwise, we just update this
1291 		 * mcast address in our table and the table will be sync'd
1292 		 * with vsw when handshake completes.
1293 		 */
1294 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1295 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1296 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1297 		tagp->vio_sid = ldcp->local_sid;
1298 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1299 		mcastmsg.set = add;
1300 		mcastmsg.count = 1;
1301 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1302 		    B_FALSE) != VGEN_SUCCESS) {
1303 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1304 			mutex_exit(&ldcp->cblock);
1305 			goto vgen_mcast_exit;
1306 		}
1307 	}
1308 
1309 	mutex_exit(&ldcp->cblock);
1310 
1311 	if (add) {
1312 
1313 		/* expand multicast table if necessary */
1314 		if (vgenp->mccount >= vgenp->mcsize) {
1315 			struct ether_addr	*newtab;
1316 			uint32_t		newsize;
1317 
1318 
1319 			newsize = vgenp->mcsize * 2;
1320 
1321 			newtab = kmem_zalloc(newsize *
1322 			    sizeof (struct ether_addr), KM_NOSLEEP);
1323 			if (newtab == NULL)
1324 				goto vgen_mcast_exit;
1325 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1326 			    sizeof (struct ether_addr));
1327 			kmem_free(vgenp->mctab,
1328 			    vgenp->mcsize * sizeof (struct ether_addr));
1329 
1330 			vgenp->mctab = newtab;
1331 			vgenp->mcsize = newsize;
1332 		}
1333 
1334 		/* add address to the table */
1335 		vgenp->mctab[vgenp->mccount++] = *addrp;
1336 
1337 	} else {
1338 
1339 		/* delete address from the table */
1340 		for (i = 0; i < vgenp->mccount; i++) {
1341 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1342 
1343 				/*
1344 				 * If there's more than one address in this
1345 				 * table, delete the unwanted one by moving
1346 				 * the last one in the list over top of it;
1347 				 * otherwise, just remove it.
1348 				 */
1349 				if (vgenp->mccount > 1) {
1350 					vgenp->mctab[i] =
1351 					    vgenp->mctab[vgenp->mccount-1];
1352 				}
1353 				vgenp->mccount--;
1354 				break;
1355 			}
1356 		}
1357 	}
1358 
1359 	rv = DDI_SUCCESS;
1360 
1361 vgen_mcast_exit:
1362 	RW_EXIT(&ldclp->rwlock);
1363 	RW_EXIT(&plistp->rwlock);
1364 
1365 	mutex_exit(&vgenp->lock);
1366 	return (rv);
1367 }
1368 
1369 /* set or clear promiscuous mode on the device */
1370 static int
1371 vgen_promisc(void *arg, boolean_t on)
1372 {
1373 	_NOTE(ARGUNUSED(arg, on))
1374 	return (DDI_SUCCESS);
1375 }
1376 
1377 /* set the unicast mac address of the device */
1378 static int
1379 vgen_unicst(void *arg, const uint8_t *mca)
1380 {
1381 	_NOTE(ARGUNUSED(arg, mca))
1382 	return (DDI_SUCCESS);
1383 }
1384 
1385 /* get device statistics */
1386 int
1387 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1388 {
1389 	vgen_port_t	*portp = (vgen_port_t *)arg;
1390 
1391 	*val = vgen_port_stat(portp, stat);
1392 
1393 	return (0);
1394 }
1395 
1396 /* vgen internal functions */
1397 /* detach all ports from the device */
1398 static void
1399 vgen_detach_ports(vgen_t *vgenp)
1400 {
1401 	vgen_port_t	*portp;
1402 	vgen_portlist_t	*plistp;
1403 
1404 	plistp = &(vgenp->vgenports);
1405 	WRITE_ENTER(&plistp->rwlock);
1406 	while ((portp = plistp->headp) != NULL) {
1407 		vgen_port_detach(portp);
1408 	}
1409 	RW_EXIT(&plistp->rwlock);
1410 }
1411 
1412 /*
1413  * detach the given port.
1414  */
1415 static void
1416 vgen_port_detach(vgen_port_t *portp)
1417 {
1418 	vgen_t		*vgenp;
1419 	vgen_ldclist_t	*ldclp;
1420 	int		port_num;
1421 
1422 	vgenp = portp->vgenp;
1423 	port_num = portp->port_num;
1424 
1425 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1426 
1427 	/*
1428 	 * If this port is connected to the vswitch, then
1429 	 * potentially there could be ports that may be using
1430 	 * this port to transmit packets. To address this do
1431 	 * the following:
1432 	 *	- First set vgenp->vsw_portp to NULL, so that
1433 	 *	  its not used after that.
1434 	 *	- Then wait for the refcnt to go down to 0.
1435 	 *	- Now we can safely detach this port.
1436 	 */
1437 	if (vgenp->vsw_portp == portp) {
1438 		vgenp->vsw_portp = NULL;
1439 		while (vgenp->vsw_port_refcnt > 0) {
1440 			delay(drv_usectohz(vgen_tx_delay));
1441 		}
1442 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1443 	}
1444 
1445 	if (portp->vhp != NULL) {
1446 		vio_net_resource_unreg(portp->vhp);
1447 		portp->vhp = NULL;
1448 	}
1449 
1450 	vgen_vlan_destroy_hash(portp);
1451 
1452 	/* remove it from port list */
1453 	vgen_port_list_remove(portp);
1454 
1455 	/* detach channels from this port */
1456 	ldclp = &portp->ldclist;
1457 	WRITE_ENTER(&ldclp->rwlock);
1458 	while (ldclp->headp) {
1459 		vgen_ldc_detach(ldclp->headp);
1460 	}
1461 	RW_EXIT(&ldclp->rwlock);
1462 	rw_destroy(&ldclp->rwlock);
1463 
1464 	if (portp->num_ldcs != 0) {
1465 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1466 		portp->num_ldcs = 0;
1467 	}
1468 
1469 	mutex_destroy(&portp->lock);
1470 	KMEM_FREE(portp);
1471 
1472 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1473 }
1474 
1475 /* add a port to port list */
1476 static void
1477 vgen_port_list_insert(vgen_port_t *portp)
1478 {
1479 	vgen_portlist_t *plistp;
1480 	vgen_t *vgenp;
1481 
1482 	vgenp = portp->vgenp;
1483 	plistp = &(vgenp->vgenports);
1484 
1485 	if (plistp->headp == NULL) {
1486 		plistp->headp = portp;
1487 	} else {
1488 		plistp->tailp->nextp = portp;
1489 	}
1490 	plistp->tailp = portp;
1491 	portp->nextp = NULL;
1492 }
1493 
1494 /* remove a port from port list */
1495 static void
1496 vgen_port_list_remove(vgen_port_t *portp)
1497 {
1498 	vgen_port_t *prevp;
1499 	vgen_port_t *nextp;
1500 	vgen_portlist_t *plistp;
1501 	vgen_t *vgenp;
1502 
1503 	vgenp = portp->vgenp;
1504 
1505 	plistp = &(vgenp->vgenports);
1506 
1507 	if (plistp->headp == NULL)
1508 		return;
1509 
1510 	if (portp == plistp->headp) {
1511 		plistp->headp = portp->nextp;
1512 		if (portp == plistp->tailp)
1513 			plistp->tailp = plistp->headp;
1514 	} else {
1515 		for (prevp = plistp->headp;
1516 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1517 		    prevp = nextp)
1518 			;
1519 		if (nextp == portp) {
1520 			prevp->nextp = portp->nextp;
1521 		}
1522 		if (portp == plistp->tailp)
1523 			plistp->tailp = prevp;
1524 	}
1525 }
1526 
1527 /* lookup a port in the list based on port_num */
1528 static vgen_port_t *
1529 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1530 {
1531 	vgen_port_t *portp = NULL;
1532 
1533 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1534 		if (portp->port_num == port_num) {
1535 			break;
1536 		}
1537 	}
1538 
1539 	return (portp);
1540 }
1541 
1542 /* enable ports for transmit/receive */
1543 static void
1544 vgen_init_ports(vgen_t *vgenp)
1545 {
1546 	vgen_port_t	*portp;
1547 	vgen_portlist_t	*plistp;
1548 
1549 	plistp = &(vgenp->vgenports);
1550 	READ_ENTER(&plistp->rwlock);
1551 
1552 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1553 		vgen_port_init(portp);
1554 	}
1555 
1556 	RW_EXIT(&plistp->rwlock);
1557 }
1558 
1559 static void
1560 vgen_port_init(vgen_port_t *portp)
1561 {
1562 	/* Add the port to the specified vlans */
1563 	vgen_vlan_add_ids(portp);
1564 
1565 	/* Bring up the channels of this port */
1566 	vgen_init_ldcs(portp);
1567 }
1568 
1569 /* disable transmit/receive on ports */
1570 static void
1571 vgen_uninit_ports(vgen_t *vgenp)
1572 {
1573 	vgen_port_t	*portp;
1574 	vgen_portlist_t	*plistp;
1575 
1576 	plistp = &(vgenp->vgenports);
1577 	READ_ENTER(&plistp->rwlock);
1578 
1579 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1580 		vgen_port_uninit(portp);
1581 	}
1582 
1583 	RW_EXIT(&plistp->rwlock);
1584 }
1585 
1586 static void
1587 vgen_port_uninit(vgen_port_t *portp)
1588 {
1589 	vgen_uninit_ldcs(portp);
1590 
1591 	/* remove the port from vlans it has been assigned to */
1592 	vgen_vlan_remove_ids(portp);
1593 }
1594 
1595 /*
1596  * Scan the machine description for this instance of vnet
1597  * and read its properties. Called only from vgen_init().
1598  * Returns: 0 on success, 1 on failure.
1599  */
1600 static int
1601 vgen_read_mdprops(vgen_t *vgenp)
1602 {
1603 	vnet_t		*vnetp = vgenp->vnetp;
1604 	md_t		*mdp = NULL;
1605 	mde_cookie_t	rootnode;
1606 	mde_cookie_t	*listp = NULL;
1607 	uint64_t	cfgh;
1608 	char		*name;
1609 	int		rv = 1;
1610 	int		num_nodes = 0;
1611 	int		num_devs = 0;
1612 	int		listsz = 0;
1613 	int		i;
1614 
1615 	if ((mdp = md_get_handle()) == NULL) {
1616 		return (rv);
1617 	}
1618 
1619 	num_nodes = md_node_count(mdp);
1620 	ASSERT(num_nodes > 0);
1621 
1622 	listsz = num_nodes * sizeof (mde_cookie_t);
1623 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1624 
1625 	rootnode = md_root_node(mdp);
1626 
1627 	/* search for all "virtual_device" nodes */
1628 	num_devs = md_scan_dag(mdp, rootnode,
1629 	    md_find_name(mdp, vdev_propname),
1630 	    md_find_name(mdp, "fwd"), listp);
1631 	if (num_devs <= 0) {
1632 		goto vgen_readmd_exit;
1633 	}
1634 
1635 	/*
1636 	 * Now loop through the list of virtual-devices looking for
1637 	 * devices with name "network" and for each such device compare
1638 	 * its instance with what we have from the 'reg' property to
1639 	 * find the right node in MD and then read all its properties.
1640 	 */
1641 	for (i = 0; i < num_devs; i++) {
1642 
1643 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1644 			goto vgen_readmd_exit;
1645 		}
1646 
1647 		/* is this a "network" device? */
1648 		if (strcmp(name, vnet_propname) != 0)
1649 			continue;
1650 
1651 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1652 			goto vgen_readmd_exit;
1653 		}
1654 
1655 		/* is this the required instance of vnet? */
1656 		if (vgenp->regprop != cfgh)
1657 			continue;
1658 
1659 		/*
1660 		 * Read the 'linkprop' property to know if this vnet
1661 		 * device should get physical link updates from vswitch.
1662 		 */
1663 		vgen_linkprop_read(vgenp, mdp, listp[i],
1664 		    &vnetp->pls_update);
1665 
1666 		/*
1667 		 * Read the mtu. Note that we set the mtu of vnet device within
1668 		 * this routine itself, after validating the range.
1669 		 */
1670 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1671 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1672 			vnetp->mtu = ETHERMTU;
1673 		}
1674 		vgenp->max_frame_size = vnetp->mtu +
1675 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1676 
1677 		/* read priority ether types */
1678 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1679 
1680 		/* read vlan id properties of this vnet instance */
1681 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1682 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1683 		    &vnetp->default_vlan_id);
1684 
1685 		rv = 0;
1686 		break;
1687 	}
1688 
1689 vgen_readmd_exit:
1690 
1691 	kmem_free(listp, listsz);
1692 	(void) md_fini_handle(mdp);
1693 	return (rv);
1694 }
1695 
1696 /*
1697  * Read vlan id properties of the given MD node.
1698  * Arguments:
1699  *   arg:          device argument(vnet device or a port)
1700  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1701  *   mdp:          machine description
1702  *   node:         md node cookie
1703  *
1704  * Returns:
1705  *   pvidp:        port-vlan-id of the node
1706  *   vidspp:       list of vlan-ids of the node
1707  *   nvidsp:       # of vlan-ids in the list
1708  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1709  */
1710 static void
1711 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1712 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1713 	uint16_t *default_idp)
1714 {
1715 	vgen_t		*vgenp;
1716 	vnet_t		*vnetp;
1717 	vgen_port_t	*portp;
1718 	char		*pvid_propname;
1719 	char		*vid_propname;
1720 	uint_t		nvids;
1721 	uint32_t	vids_size;
1722 	int		rv;
1723 	int		i;
1724 	uint64_t	*data;
1725 	uint64_t	val;
1726 	int		size;
1727 	int		inst;
1728 
1729 	if (type == VGEN_LOCAL) {
1730 
1731 		vgenp = (vgen_t *)arg;
1732 		vnetp = vgenp->vnetp;
1733 		pvid_propname = vgen_pvid_propname;
1734 		vid_propname = vgen_vid_propname;
1735 		inst = vnetp->instance;
1736 
1737 	} else if (type == VGEN_PEER) {
1738 
1739 		portp = (vgen_port_t *)arg;
1740 		vgenp = portp->vgenp;
1741 		vnetp = vgenp->vnetp;
1742 		pvid_propname = port_pvid_propname;
1743 		vid_propname = port_vid_propname;
1744 		inst = portp->port_num;
1745 
1746 	} else {
1747 		return;
1748 	}
1749 
1750 	if (type == VGEN_LOCAL && default_idp != NULL) {
1751 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1752 		if (rv != 0) {
1753 			DWARN(vgenp, NULL, "prop(%s) not found",
1754 			    vgen_dvid_propname);
1755 
1756 			*default_idp = vnet_default_vlan_id;
1757 		} else {
1758 			*default_idp = val & 0xFFF;
1759 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1760 			    inst, *default_idp);
1761 		}
1762 	}
1763 
1764 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1765 	if (rv != 0) {
1766 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1767 		*pvidp = vnet_default_vlan_id;
1768 	} else {
1769 
1770 		*pvidp = val & 0xFFF;
1771 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1772 		    pvid_propname, inst, *pvidp);
1773 	}
1774 
1775 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1776 	    &size);
1777 	if (rv != 0) {
1778 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1779 		size = 0;
1780 	} else {
1781 		size /= sizeof (uint64_t);
1782 	}
1783 	nvids = size;
1784 
1785 	if (nvids != 0) {
1786 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1787 		vids_size = sizeof (uint16_t) * nvids;
1788 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1789 		for (i = 0; i < nvids; i++) {
1790 			(*vidspp)[i] = data[i] & 0xFFFF;
1791 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1792 		}
1793 		DBG2(vgenp, NULL, "\n");
1794 	}
1795 
1796 	*nvidsp = nvids;
1797 }
1798 
1799 /*
1800  * Create a vlan id hash table for the given port.
1801  */
1802 static void
1803 vgen_vlan_create_hash(vgen_port_t *portp)
1804 {
1805 	char		hashname[MAXNAMELEN];
1806 
1807 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1808 	    portp->port_num);
1809 
1810 	portp->vlan_nchains = vgen_vlan_nchains;
1811 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1812 	    portp->vlan_nchains, mod_hash_null_valdtor);
1813 }
1814 
1815 /*
1816  * Destroy the vlan id hash table in the given port.
1817  */
1818 static void
1819 vgen_vlan_destroy_hash(vgen_port_t *portp)
1820 {
1821 	if (portp->vlan_hashp != NULL) {
1822 		mod_hash_destroy_hash(portp->vlan_hashp);
1823 		portp->vlan_hashp = NULL;
1824 		portp->vlan_nchains = 0;
1825 	}
1826 }
1827 
1828 /*
1829  * Add a port to the vlans specified in its port properites.
1830  */
1831 static void
1832 vgen_vlan_add_ids(vgen_port_t *portp)
1833 {
1834 	int		rv;
1835 	int		i;
1836 
1837 	rv = mod_hash_insert(portp->vlan_hashp,
1838 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1839 	    (mod_hash_val_t)B_TRUE);
1840 	ASSERT(rv == 0);
1841 
1842 	for (i = 0; i < portp->nvids; i++) {
1843 		rv = mod_hash_insert(portp->vlan_hashp,
1844 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1845 		    (mod_hash_val_t)B_TRUE);
1846 		ASSERT(rv == 0);
1847 	}
1848 }
1849 
1850 /*
1851  * Remove a port from the vlans it has been assigned to.
1852  */
1853 static void
1854 vgen_vlan_remove_ids(vgen_port_t *portp)
1855 {
1856 	int		rv;
1857 	int		i;
1858 	mod_hash_val_t	vp;
1859 
1860 	rv = mod_hash_remove(portp->vlan_hashp,
1861 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1862 	    (mod_hash_val_t *)&vp);
1863 	ASSERT(rv == 0);
1864 
1865 	for (i = 0; i < portp->nvids; i++) {
1866 		rv = mod_hash_remove(portp->vlan_hashp,
1867 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1868 		    (mod_hash_val_t *)&vp);
1869 		ASSERT(rv == 0);
1870 	}
1871 }
1872 
1873 /*
1874  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1875  * then the vlan-id is available in the tag; otherwise, its vlan id is
1876  * implicitly obtained from the port-vlan-id of the vnet device.
1877  * The vlan id determined is returned in vidp.
1878  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1879  */
1880 static boolean_t
1881 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1882 {
1883 	struct ether_vlan_header	*evhp;
1884 
1885 	/* If it's a tagged frame, get the vlan id from vlan header */
1886 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1887 
1888 		evhp = (struct ether_vlan_header *)ehp;
1889 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1890 		return (B_TRUE);
1891 	}
1892 
1893 	/* Untagged frame, vlan-id is the pvid of vnet device */
1894 	*vidp = vnetp->pvid;
1895 	return (B_FALSE);
1896 }
1897 
1898 /*
1899  * Find the given vlan id in the hash table.
1900  * Return: B_TRUE if the id is found; B_FALSE if not found.
1901  */
1902 static boolean_t
1903 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1904 {
1905 	int		rv;
1906 	mod_hash_val_t	vp;
1907 
1908 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1909 
1910 	if (rv != 0)
1911 		return (B_FALSE);
1912 
1913 	return (B_TRUE);
1914 }
1915 
1916 /*
1917  * This function reads "priority-ether-types" property from md. This property
1918  * is used to enable support for priority frames. Applications which need
1919  * guaranteed and timely delivery of certain high priority frames to/from
1920  * a vnet or vsw within ldoms, should configure this property by providing
1921  * the ether type(s) for which the priority facility is needed.
1922  * Normal data frames are delivered over a ldc channel using the descriptor
1923  * ring mechanism which is constrained by factors such as descriptor ring size,
1924  * the rate at which the ring is processed at the peer ldc end point, etc.
1925  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1926  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1927  * descriptor ring path and enables a more reliable and timely delivery of
1928  * frames to the peer.
1929  */
1930 static void
1931 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1932 {
1933 	int		rv;
1934 	uint16_t	*types;
1935 	uint64_t	*data;
1936 	int		size;
1937 	int		i;
1938 	size_t		mblk_sz;
1939 
1940 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1941 	    (uint8_t **)&data, &size);
1942 	if (rv != 0) {
1943 		/*
1944 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1945 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1946 		 */
1947 		if (vgen_pri_eth_type != 0) {
1948 			size = sizeof (vgen_pri_eth_type);
1949 			data = &vgen_pri_eth_type;
1950 		} else {
1951 			DBG2(vgenp, NULL,
1952 			    "prop(%s) not found", pri_types_propname);
1953 			size = 0;
1954 		}
1955 	}
1956 
1957 	if (size == 0) {
1958 		vgenp->pri_num_types = 0;
1959 		return;
1960 	}
1961 
1962 	/*
1963 	 * we have some priority-ether-types defined;
1964 	 * allocate a table of these types and also
1965 	 * allocate a pool of mblks to transmit these
1966 	 * priority packets.
1967 	 */
1968 	size /= sizeof (uint64_t);
1969 	vgenp->pri_num_types = size;
1970 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1971 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1972 		types[i] = data[i] & 0xFFFF;
1973 	}
1974 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1975 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1976 	    &vgenp->pri_tx_vmp);
1977 }
1978 
1979 static void
1980 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1981 {
1982 	int		rv;
1983 	uint64_t	val;
1984 	char		*mtu_propname;
1985 
1986 	mtu_propname = vgen_mtu_propname;
1987 
1988 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1989 	if (rv != 0) {
1990 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1991 		*mtu = vnet_ethermtu;
1992 	} else {
1993 
1994 		*mtu = val & 0xFFFF;
1995 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1996 		    vgenp->instance, *mtu);
1997 	}
1998 }
1999 
2000 static void
2001 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
2002 	boolean_t *pls)
2003 {
2004 	int		rv;
2005 	uint64_t	val;
2006 	char		*linkpropname;
2007 
2008 	linkpropname = vgen_linkprop_propname;
2009 
2010 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
2011 	if (rv != 0) {
2012 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
2013 		*pls = B_FALSE;
2014 	} else {
2015 
2016 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
2017 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
2018 		    vgenp->instance, *pls);
2019 	}
2020 }
2021 
2022 /* register with MD event generator */
2023 static int
2024 vgen_mdeg_reg(vgen_t *vgenp)
2025 {
2026 	mdeg_prop_spec_t	*pspecp;
2027 	mdeg_node_spec_t	*parentp;
2028 	uint_t			templatesz;
2029 	int			rv;
2030 	mdeg_handle_t		dev_hdl = NULL;
2031 	mdeg_handle_t		port_hdl = NULL;
2032 
2033 	templatesz = sizeof (vgen_prop_template);
2034 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
2035 	if (pspecp == NULL) {
2036 		return (DDI_FAILURE);
2037 	}
2038 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
2039 	if (parentp == NULL) {
2040 		kmem_free(pspecp, templatesz);
2041 		return (DDI_FAILURE);
2042 	}
2043 
2044 	bcopy(vgen_prop_template, pspecp, templatesz);
2045 
2046 	/*
2047 	 * NOTE: The instance here refers to the value of "reg" property and
2048 	 * not the dev_info instance (ddi_get_instance()) of vnet.
2049 	 */
2050 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
2051 
2052 	parentp->namep = "virtual-device";
2053 	parentp->specp = pspecp;
2054 
2055 	/* save parentp in vgen_t */
2056 	vgenp->mdeg_parentp = parentp;
2057 
2058 	/*
2059 	 * Register an interest in 'virtual-device' nodes with a
2060 	 * 'name' property of 'network'
2061 	 */
2062 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
2063 	if (rv != MDEG_SUCCESS) {
2064 		DERR(vgenp, NULL, "mdeg_register failed\n");
2065 		goto mdeg_reg_fail;
2066 	}
2067 
2068 	/* Register an interest in 'port' nodes */
2069 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
2070 	    &port_hdl);
2071 	if (rv != MDEG_SUCCESS) {
2072 		DERR(vgenp, NULL, "mdeg_register failed\n");
2073 		goto mdeg_reg_fail;
2074 	}
2075 
2076 	/* save mdeg handle in vgen_t */
2077 	vgenp->mdeg_dev_hdl = dev_hdl;
2078 	vgenp->mdeg_port_hdl = port_hdl;
2079 
2080 	return (DDI_SUCCESS);
2081 
2082 mdeg_reg_fail:
2083 	if (dev_hdl != NULL) {
2084 		(void) mdeg_unregister(dev_hdl);
2085 	}
2086 	KMEM_FREE(parentp);
2087 	kmem_free(pspecp, templatesz);
2088 	vgenp->mdeg_parentp = NULL;
2089 	return (DDI_FAILURE);
2090 }
2091 
2092 /* unregister with MD event generator */
2093 static void
2094 vgen_mdeg_unreg(vgen_t *vgenp)
2095 {
2096 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
2097 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
2098 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
2099 	KMEM_FREE(vgenp->mdeg_parentp);
2100 	vgenp->mdeg_parentp = NULL;
2101 	vgenp->mdeg_dev_hdl = NULL;
2102 	vgenp->mdeg_port_hdl = NULL;
2103 }
2104 
2105 /* mdeg callback function for the port node */
2106 static int
2107 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
2108 {
2109 	int idx;
2110 	int vsw_idx = -1;
2111 	uint64_t val;
2112 	vgen_t *vgenp;
2113 
2114 	if ((resp == NULL) || (cb_argp == NULL)) {
2115 		return (MDEG_FAILURE);
2116 	}
2117 
2118 	vgenp = (vgen_t *)cb_argp;
2119 	DBG1(vgenp, NULL, "enter\n");
2120 
2121 	mutex_enter(&vgenp->lock);
2122 
2123 	DBG1(vgenp, NULL, "ports: removed(%x), "
2124 	"added(%x), updated(%x)\n", resp->removed.nelem,
2125 	    resp->added.nelem, resp->match_curr.nelem);
2126 
2127 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2128 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2129 		    resp->removed.mdep[idx]);
2130 	}
2131 
2132 	if (vgenp->vsw_portp == NULL) {
2133 		/*
2134 		 * find vsw_port and add it first, because other ports need
2135 		 * this when adding fdb entry (see vgen_port_init()).
2136 		 */
2137 		for (idx = 0; idx < resp->added.nelem; idx++) {
2138 			if (!(md_get_prop_val(resp->added.mdp,
2139 			    resp->added.mdep[idx], swport_propname, &val))) {
2140 				if (val == 0) {
2141 					/*
2142 					 * This port is connected to the
2143 					 * vsw on service domain.
2144 					 */
2145 					vsw_idx = idx;
2146 					if (vgen_add_port(vgenp,
2147 					    resp->added.mdp,
2148 					    resp->added.mdep[idx]) !=
2149 					    DDI_SUCCESS) {
2150 						cmn_err(CE_NOTE, "vnet%d Could "
2151 						    "not initialize virtual "
2152 						    "switch port.",
2153 						    vgenp->instance);
2154 						mutex_exit(&vgenp->lock);
2155 						return (MDEG_FAILURE);
2156 					}
2157 					break;
2158 				}
2159 			}
2160 		}
2161 		if (vsw_idx == -1) {
2162 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2163 			mutex_exit(&vgenp->lock);
2164 			return (MDEG_FAILURE);
2165 		}
2166 	}
2167 
2168 	for (idx = 0; idx < resp->added.nelem; idx++) {
2169 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2170 			continue;
2171 
2172 		/* If this port can't be added just skip it. */
2173 		(void) vgen_add_port(vgenp, resp->added.mdp,
2174 		    resp->added.mdep[idx]);
2175 	}
2176 
2177 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2178 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2179 		    resp->match_curr.mdep[idx],
2180 		    resp->match_prev.mdp,
2181 		    resp->match_prev.mdep[idx]);
2182 	}
2183 
2184 	mutex_exit(&vgenp->lock);
2185 	DBG1(vgenp, NULL, "exit\n");
2186 	return (MDEG_SUCCESS);
2187 }
2188 
2189 /* mdeg callback function for the vnet node */
2190 static int
2191 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2192 {
2193 	vgen_t		*vgenp;
2194 	vnet_t		*vnetp;
2195 	md_t		*mdp;
2196 	mde_cookie_t	node;
2197 	uint64_t	inst;
2198 	char		*node_name = NULL;
2199 
2200 	if ((resp == NULL) || (cb_argp == NULL)) {
2201 		return (MDEG_FAILURE);
2202 	}
2203 
2204 	vgenp = (vgen_t *)cb_argp;
2205 	vnetp = vgenp->vnetp;
2206 
2207 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2208 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2209 	    resp->match_curr.nelem, resp->match_prev.nelem);
2210 
2211 	mutex_enter(&vgenp->lock);
2212 
2213 	/*
2214 	 * We get an initial callback for this node as 'added' after
2215 	 * registering with mdeg. Note that we would have already gathered
2216 	 * information about this vnet node by walking MD earlier during attach
2217 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2218 	 * of this node might have changed when we get this initial 'added'
2219 	 * callback. We handle this as if an update occured and invoke the same
2220 	 * function which handles updates to the properties of this vnet-node
2221 	 * if any. A non-zero 'match' value indicates that the MD has been
2222 	 * updated and that a 'network' node is present which may or may not
2223 	 * have been updated. It is up to the clients to examine their own
2224 	 * nodes and determine if they have changed.
2225 	 */
2226 	if (resp->added.nelem != 0) {
2227 
2228 		if (resp->added.nelem != 1) {
2229 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2230 			    "invalid: %d\n", vnetp->instance,
2231 			    resp->added.nelem);
2232 			goto vgen_mdeg_cb_err;
2233 		}
2234 
2235 		mdp = resp->added.mdp;
2236 		node = resp->added.mdep[0];
2237 
2238 	} else if (resp->match_curr.nelem != 0) {
2239 
2240 		if (resp->match_curr.nelem != 1) {
2241 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2242 			    "invalid: %d\n", vnetp->instance,
2243 			    resp->match_curr.nelem);
2244 			goto vgen_mdeg_cb_err;
2245 		}
2246 
2247 		mdp = resp->match_curr.mdp;
2248 		node = resp->match_curr.mdep[0];
2249 
2250 	} else {
2251 		goto vgen_mdeg_cb_err;
2252 	}
2253 
2254 	/* Validate name and instance */
2255 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2256 		DERR(vgenp, NULL, "unable to get node name\n");
2257 		goto vgen_mdeg_cb_err;
2258 	}
2259 
2260 	/* is this a virtual-network device? */
2261 	if (strcmp(node_name, vnet_propname) != 0) {
2262 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2263 		goto vgen_mdeg_cb_err;
2264 	}
2265 
2266 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2267 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2268 		goto vgen_mdeg_cb_err;
2269 	}
2270 
2271 	/* is this the right instance of vnet? */
2272 	if (inst != vgenp->regprop) {
2273 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2274 		goto vgen_mdeg_cb_err;
2275 	}
2276 
2277 	vgen_update_md_prop(vgenp, mdp, node);
2278 
2279 	mutex_exit(&vgenp->lock);
2280 	return (MDEG_SUCCESS);
2281 
2282 vgen_mdeg_cb_err:
2283 	mutex_exit(&vgenp->lock);
2284 	return (MDEG_FAILURE);
2285 }
2286 
2287 /*
2288  * Check to see if the relevant properties in the specified node have
2289  * changed, and if so take the appropriate action.
2290  */
2291 static void
2292 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2293 {
2294 	uint16_t	pvid;
2295 	uint16_t	*vids;
2296 	uint16_t	nvids;
2297 	vnet_t		*vnetp = vgenp->vnetp;
2298 	uint32_t	mtu;
2299 	boolean_t	pls_update;
2300 	enum		{ MD_init = 0x1,
2301 			    MD_vlans = 0x2,
2302 			    MD_mtu = 0x4,
2303 			    MD_pls = 0x8 } updated;
2304 	int		rv;
2305 
2306 	updated = MD_init;
2307 
2308 	/* Read the vlan ids */
2309 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2310 	    &nvids, NULL);
2311 
2312 	/* Determine if there are any vlan id updates */
2313 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2314 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2315 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2316 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2317 		updated |= MD_vlans;
2318 	}
2319 
2320 	/* Read mtu */
2321 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2322 	if (mtu != vnetp->mtu) {
2323 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2324 			updated |= MD_mtu;
2325 		} else {
2326 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2327 			    " as the specified value:%d is invalid\n",
2328 			    vnetp->instance, mtu);
2329 		}
2330 	}
2331 
2332 	/*
2333 	 * Read the 'linkprop' property.
2334 	 */
2335 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2336 	if (pls_update != vnetp->pls_update) {
2337 		updated |= MD_pls;
2338 	}
2339 
2340 	/* Now process the updated props */
2341 
2342 	if (updated & MD_vlans) {
2343 
2344 		/* save the new vlan ids */
2345 		vnetp->pvid = pvid;
2346 		if (vnetp->nvids != 0) {
2347 			kmem_free(vnetp->vids,
2348 			    sizeof (uint16_t) * vnetp->nvids);
2349 			vnetp->nvids = 0;
2350 		}
2351 		if (nvids != 0) {
2352 			vnetp->nvids = nvids;
2353 			vnetp->vids = vids;
2354 		}
2355 
2356 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2357 		vgen_reset_vlan_unaware_ports(vgenp);
2358 
2359 	} else {
2360 
2361 		if (nvids != 0) {
2362 			kmem_free(vids, sizeof (uint16_t) * nvids);
2363 		}
2364 	}
2365 
2366 	if (updated & MD_mtu) {
2367 
2368 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2369 		    vnetp->mtu, mtu);
2370 
2371 		rv = vnet_mtu_update(vnetp, mtu);
2372 		if (rv == 0) {
2373 			vgenp->max_frame_size = mtu +
2374 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2375 		}
2376 	}
2377 
2378 	if (updated & MD_pls) {
2379 		/* enable/disable physical link state updates */
2380 		vnetp->pls_update = pls_update;
2381 
2382 		/* reset vsw-port to re-negotiate with the updated prop. */
2383 		vgen_reset_vsw_port(vgenp);
2384 	}
2385 }
2386 
2387 /* add a new port to the device */
2388 static int
2389 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2390 {
2391 	vgen_port_t	*portp;
2392 	int		rv;
2393 
2394 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2395 
2396 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2397 	if (rv != DDI_SUCCESS) {
2398 		KMEM_FREE(portp);
2399 		return (DDI_FAILURE);
2400 	}
2401 
2402 	rv = vgen_port_attach(portp);
2403 	if (rv != DDI_SUCCESS) {
2404 		return (DDI_FAILURE);
2405 	}
2406 
2407 	return (DDI_SUCCESS);
2408 }
2409 
2410 /* read properties of the port from its md node */
2411 static int
2412 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2413 	mde_cookie_t mdex)
2414 {
2415 	uint64_t		port_num;
2416 	uint64_t		*ldc_ids;
2417 	uint64_t		macaddr;
2418 	uint64_t		val;
2419 	int			num_ldcs;
2420 	int			i;
2421 	int			addrsz;
2422 	int			num_nodes = 0;
2423 	int			listsz = 0;
2424 	mde_cookie_t		*listp = NULL;
2425 	uint8_t			*addrp;
2426 	struct ether_addr	ea;
2427 
2428 	/* read "id" property to get the port number */
2429 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2430 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2431 		return (DDI_FAILURE);
2432 	}
2433 
2434 	/*
2435 	 * Find the channel endpoint node(s) under this port node.
2436 	 */
2437 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2438 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2439 		    num_nodes);
2440 		return (DDI_FAILURE);
2441 	}
2442 
2443 	/* allocate space for node list */
2444 	listsz = num_nodes * sizeof (mde_cookie_t);
2445 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2446 	if (listp == NULL)
2447 		return (DDI_FAILURE);
2448 
2449 	num_ldcs = md_scan_dag(mdp, mdex,
2450 	    md_find_name(mdp, channel_propname),
2451 	    md_find_name(mdp, "fwd"), listp);
2452 
2453 	if (num_ldcs <= 0) {
2454 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2455 		kmem_free(listp, listsz);
2456 		return (DDI_FAILURE);
2457 	}
2458 
2459 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2460 
2461 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2462 	if (ldc_ids == NULL) {
2463 		kmem_free(listp, listsz);
2464 		return (DDI_FAILURE);
2465 	}
2466 
2467 	for (i = 0; i < num_ldcs; i++) {
2468 		/* read channel ids */
2469 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2470 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2471 			    id_propname);
2472 			kmem_free(listp, listsz);
2473 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2474 			return (DDI_FAILURE);
2475 		}
2476 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2477 	}
2478 
2479 	kmem_free(listp, listsz);
2480 
2481 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2482 	    &addrsz)) {
2483 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2484 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2485 		return (DDI_FAILURE);
2486 	}
2487 
2488 	if (addrsz < ETHERADDRL) {
2489 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2490 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2491 		return (DDI_FAILURE);
2492 	}
2493 
2494 	macaddr = *((uint64_t *)addrp);
2495 
2496 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2497 
2498 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2499 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2500 		macaddr >>= 8;
2501 	}
2502 
2503 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2504 		if (val == 0) {
2505 			/* This port is connected to the vswitch */
2506 			portp->is_vsw_port = B_TRUE;
2507 		} else {
2508 			portp->is_vsw_port = B_FALSE;
2509 		}
2510 	}
2511 
2512 	/* now update all properties into the port */
2513 	portp->vgenp = vgenp;
2514 	portp->port_num = port_num;
2515 	ether_copy(&ea, &portp->macaddr);
2516 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2517 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2518 	portp->num_ldcs = num_ldcs;
2519 
2520 	/* read vlan id properties of this port node */
2521 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2522 	    &portp->vids, &portp->nvids, NULL);
2523 
2524 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2525 
2526 	return (DDI_SUCCESS);
2527 }
2528 
2529 /* remove a port from the device */
2530 static int
2531 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2532 {
2533 	uint64_t	port_num;
2534 	vgen_port_t	*portp;
2535 	vgen_portlist_t	*plistp;
2536 
2537 	/* read "id" property to get the port number */
2538 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2539 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2540 		return (DDI_FAILURE);
2541 	}
2542 
2543 	plistp = &(vgenp->vgenports);
2544 
2545 	WRITE_ENTER(&plistp->rwlock);
2546 	portp = vgen_port_lookup(plistp, (int)port_num);
2547 	if (portp == NULL) {
2548 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2549 		RW_EXIT(&plistp->rwlock);
2550 		return (DDI_FAILURE);
2551 	}
2552 
2553 	vgen_port_detach_mdeg(portp);
2554 	RW_EXIT(&plistp->rwlock);
2555 
2556 	return (DDI_SUCCESS);
2557 }
2558 
2559 /* attach a port to the device based on mdeg data */
2560 static int
2561 vgen_port_attach(vgen_port_t *portp)
2562 {
2563 	int			i;
2564 	vgen_portlist_t		*plistp;
2565 	vgen_t			*vgenp;
2566 	uint64_t		*ldcids;
2567 	uint32_t		num_ldcs;
2568 	mac_register_t		*macp;
2569 	vio_net_res_type_t	type;
2570 	int			rv;
2571 
2572 	ASSERT(portp != NULL);
2573 
2574 	vgenp = portp->vgenp;
2575 	ldcids = portp->ldc_ids;
2576 	num_ldcs = portp->num_ldcs;
2577 
2578 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2579 
2580 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2581 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2582 	portp->ldclist.headp = NULL;
2583 
2584 	for (i = 0; i < num_ldcs; i++) {
2585 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2586 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2587 			vgen_port_detach(portp);
2588 			return (DDI_FAILURE);
2589 		}
2590 	}
2591 
2592 	/* create vlan id hash table */
2593 	vgen_vlan_create_hash(portp);
2594 
2595 	if (portp->is_vsw_port == B_TRUE) {
2596 		/* This port is connected to the switch port */
2597 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2598 		type = VIO_NET_RES_LDC_SERVICE;
2599 	} else {
2600 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2601 		type = VIO_NET_RES_LDC_GUEST;
2602 	}
2603 
2604 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2605 		vgen_port_detach(portp);
2606 		return (DDI_FAILURE);
2607 	}
2608 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2609 	macp->m_driver = portp;
2610 	macp->m_dip = vgenp->vnetdip;
2611 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2612 	macp->m_callbacks = &vgen_m_callbacks;
2613 	macp->m_min_sdu = 0;
2614 	macp->m_max_sdu = ETHERMTU;
2615 
2616 	mutex_enter(&portp->lock);
2617 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2618 	    portp->macaddr, &portp->vhp, &portp->vcb);
2619 	mutex_exit(&portp->lock);
2620 	mac_free(macp);
2621 
2622 	if (rv == 0) {
2623 		/* link it into the list of ports */
2624 		plistp = &(vgenp->vgenports);
2625 		WRITE_ENTER(&plistp->rwlock);
2626 		vgen_port_list_insert(portp);
2627 		RW_EXIT(&plistp->rwlock);
2628 
2629 		if (portp->is_vsw_port == B_TRUE) {
2630 			/* We now have the vswitch port attached */
2631 			vgenp->vsw_portp = portp;
2632 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2633 		}
2634 	} else {
2635 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2636 		    portp);
2637 		vgen_port_detach(portp);
2638 	}
2639 
2640 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2641 	return (DDI_SUCCESS);
2642 }
2643 
2644 /* detach a port from the device based on mdeg data */
2645 static void
2646 vgen_port_detach_mdeg(vgen_port_t *portp)
2647 {
2648 	vgen_t *vgenp = portp->vgenp;
2649 
2650 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2651 
2652 	mutex_enter(&portp->lock);
2653 
2654 	/* stop the port if needed */
2655 	if (portp->flags & VGEN_STARTED) {
2656 		vgen_port_uninit(portp);
2657 	}
2658 
2659 	mutex_exit(&portp->lock);
2660 	vgen_port_detach(portp);
2661 
2662 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2663 }
2664 
2665 static int
2666 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2667 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2668 {
2669 	uint64_t	cport_num;
2670 	uint64_t	pport_num;
2671 	vgen_portlist_t	*plistp;
2672 	vgen_port_t	*portp;
2673 	boolean_t	updated_vlans = B_FALSE;
2674 	uint16_t	pvid;
2675 	uint16_t	*vids;
2676 	uint16_t	nvids;
2677 
2678 	/*
2679 	 * For now, we get port updates only if vlan ids changed.
2680 	 * We read the port num and do some sanity check.
2681 	 */
2682 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2683 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2684 		return (DDI_FAILURE);
2685 	}
2686 
2687 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2688 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2689 		return (DDI_FAILURE);
2690 	}
2691 	if (cport_num != pport_num)
2692 		return (DDI_FAILURE);
2693 
2694 	plistp = &(vgenp->vgenports);
2695 
2696 	READ_ENTER(&plistp->rwlock);
2697 
2698 	portp = vgen_port_lookup(plistp, (int)cport_num);
2699 	if (portp == NULL) {
2700 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2701 		RW_EXIT(&plistp->rwlock);
2702 		return (DDI_FAILURE);
2703 	}
2704 
2705 	/* Read the vlan ids */
2706 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2707 	    &nvids, NULL);
2708 
2709 	/* Determine if there are any vlan id updates */
2710 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2711 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2712 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2713 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2714 		updated_vlans = B_TRUE;
2715 	}
2716 
2717 	if (updated_vlans == B_FALSE) {
2718 		RW_EXIT(&plistp->rwlock);
2719 		return (DDI_FAILURE);
2720 	}
2721 
2722 	/* remove the port from vlans it has been assigned to */
2723 	vgen_vlan_remove_ids(portp);
2724 
2725 	/* save the new vlan ids */
2726 	portp->pvid = pvid;
2727 	if (portp->nvids != 0) {
2728 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2729 		portp->nvids = 0;
2730 	}
2731 	if (nvids != 0) {
2732 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2733 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2734 		portp->nvids = nvids;
2735 		kmem_free(vids, sizeof (uint16_t) * nvids);
2736 	}
2737 
2738 	/* add port to the new vlans */
2739 	vgen_vlan_add_ids(portp);
2740 
2741 	/* reset the port if it is vlan unaware (ver < 1.3) */
2742 	vgen_vlan_unaware_port_reset(portp);
2743 
2744 	RW_EXIT(&plistp->rwlock);
2745 
2746 	return (DDI_SUCCESS);
2747 }
2748 
2749 static uint64_t
2750 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2751 {
2752 	vgen_ldclist_t	*ldclp;
2753 	vgen_ldc_t *ldcp;
2754 	uint64_t	val;
2755 
2756 	val = 0;
2757 	ldclp = &portp->ldclist;
2758 
2759 	READ_ENTER(&ldclp->rwlock);
2760 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2761 		val += vgen_ldc_stat(ldcp, stat);
2762 	}
2763 	RW_EXIT(&ldclp->rwlock);
2764 
2765 	return (val);
2766 }
2767 
2768 /* allocate receive resources */
2769 static int
2770 vgen_init_multipools(vgen_ldc_t *ldcp)
2771 {
2772 	size_t		data_sz;
2773 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2774 	int		status;
2775 	uint32_t	sz1 = 0;
2776 	uint32_t	sz2 = 0;
2777 	uint32_t	sz3 = 0;
2778 	uint32_t	sz4 = 0;
2779 
2780 	/*
2781 	 * We round up the mtu specified to be a multiple of 2K.
2782 	 * We then create rx pools based on the rounded up size.
2783 	 */
2784 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2785 	data_sz = VNET_ROUNDUP_2K(data_sz);
2786 
2787 	/*
2788 	 * If pool sizes are specified, use them. Note that the presence of
2789 	 * the first tunable will be used as a hint.
2790 	 */
2791 	if (vgen_rbufsz1 != 0) {
2792 
2793 		sz1 = vgen_rbufsz1;
2794 		sz2 = vgen_rbufsz2;
2795 		sz3 = vgen_rbufsz3;
2796 		sz4 = vgen_rbufsz4;
2797 
2798 		if (sz4 == 0) { /* need 3 pools */
2799 
2800 			ldcp->max_rxpool_size = sz3;
2801 			status = vio_init_multipools(&ldcp->vmp,
2802 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2803 			    vgen_nrbufs2, vgen_nrbufs3);
2804 
2805 		} else {
2806 
2807 			ldcp->max_rxpool_size = sz4;
2808 			status = vio_init_multipools(&ldcp->vmp,
2809 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2810 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2811 			    vgen_nrbufs4);
2812 		}
2813 		return (status);
2814 	}
2815 
2816 	/*
2817 	 * Pool sizes are not specified. We select the pool sizes based on the
2818 	 * mtu if vnet_jumbo_rxpools is enabled.
2819 	 */
2820 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2821 		/*
2822 		 * Receive buffer pool allocation based on mtu is disabled.
2823 		 * Use the default mechanism of standard size pool allocation.
2824 		 */
2825 		sz1 = VGEN_DBLK_SZ_128;
2826 		sz2 = VGEN_DBLK_SZ_256;
2827 		sz3 = VGEN_DBLK_SZ_2048;
2828 		ldcp->max_rxpool_size = sz3;
2829 
2830 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2831 		    sz1, sz2, sz3,
2832 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2833 
2834 		return (status);
2835 	}
2836 
2837 	switch (data_sz) {
2838 
2839 	case VNET_4K:
2840 
2841 		sz1 = VGEN_DBLK_SZ_128;
2842 		sz2 = VGEN_DBLK_SZ_256;
2843 		sz3 = VGEN_DBLK_SZ_2048;
2844 		sz4 = sz3 << 1;			/* 4K */
2845 		ldcp->max_rxpool_size = sz4;
2846 
2847 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2848 		    sz1, sz2, sz3, sz4,
2849 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2850 		break;
2851 
2852 	default:	/* data_sz:  4K+ to 16K */
2853 
2854 		sz1 = VGEN_DBLK_SZ_256;
2855 		sz2 = VGEN_DBLK_SZ_2048;
2856 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2857 		sz4 = data_sz;		/* Jumbo-size  */
2858 		ldcp->max_rxpool_size = sz4;
2859 
2860 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2861 		    sz1, sz2, sz3, sz4,
2862 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2863 		break;
2864 
2865 	}
2866 
2867 	return (status);
2868 }
2869 
2870 /* attach the channel corresponding to the given ldc_id to the port */
2871 static int
2872 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2873 {
2874 	vgen_t 		*vgenp;
2875 	vgen_ldclist_t	*ldclp;
2876 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2877 	ldc_attr_t 	attr;
2878 	int 		status;
2879 	ldc_status_t	istatus;
2880 	char		kname[MAXNAMELEN];
2881 	int		instance;
2882 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2883 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2884 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2885 		AST_create_rxmblks = 0x20,
2886 		AST_create_rcv_thread = 0x40} attach_state;
2887 
2888 	attach_state = AST_init;
2889 	vgenp = portp->vgenp;
2890 	ldclp = &portp->ldclist;
2891 
2892 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2893 	if (ldcp == NULL) {
2894 		goto ldc_attach_failed;
2895 	}
2896 	ldcp->ldc_id = ldc_id;
2897 	ldcp->portp = portp;
2898 
2899 	attach_state |= AST_ldc_alloc;
2900 
2901 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2902 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2903 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2904 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2905 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2906 
2907 	attach_state |= AST_mutex_init;
2908 
2909 	attr.devclass = LDC_DEV_NT;
2910 	attr.instance = vgenp->instance;
2911 	attr.mode = LDC_MODE_UNRELIABLE;
2912 	attr.mtu = vnet_ldc_mtu;
2913 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2914 	if (status != 0) {
2915 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2916 		goto ldc_attach_failed;
2917 	}
2918 	attach_state |= AST_ldc_init;
2919 
2920 	if (vgen_rcv_thread_enabled) {
2921 		ldcp->rcv_thr_flags = 0;
2922 
2923 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2924 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2925 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2926 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2927 
2928 		attach_state |= AST_create_rcv_thread;
2929 		if (ldcp->rcv_thread == NULL) {
2930 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2931 			goto ldc_attach_failed;
2932 		}
2933 	}
2934 
2935 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2936 	if (status != 0) {
2937 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2938 		    status);
2939 		goto ldc_attach_failed;
2940 	}
2941 	/*
2942 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2943 	 * data msgs, including raw data msgs used to recv priority frames.
2944 	 */
2945 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2946 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2947 	attach_state |= AST_ldc_reg_cb;
2948 
2949 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2950 	ASSERT(istatus == LDC_INIT);
2951 	ldcp->ldc_status = istatus;
2952 
2953 	/* allocate transmit resources */
2954 	status = vgen_alloc_tx_ring(ldcp);
2955 	if (status != 0) {
2956 		goto ldc_attach_failed;
2957 	}
2958 	attach_state |= AST_alloc_tx_ring;
2959 
2960 	/* allocate receive resources */
2961 	status = vgen_init_multipools(ldcp);
2962 	if (status != 0) {
2963 		/*
2964 		 * We do not return failure if receive mblk pools can't be
2965 		 * allocated; instead allocb(9F) will be used to dynamically
2966 		 * allocate buffers during receive.
2967 		 */
2968 		DWARN(vgenp, ldcp,
2969 		    "vnet%d: status(%d), failed to allocate rx mblk pools for "
2970 		    "channel(0x%lx)\n",
2971 		    vgenp->instance, status, ldcp->ldc_id);
2972 	} else {
2973 		attach_state |= AST_create_rxmblks;
2974 	}
2975 
2976 	/* Setup kstats for the channel */
2977 	instance = vgenp->instance;
2978 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2979 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2980 	if (ldcp->ksp == NULL) {
2981 		goto ldc_attach_failed;
2982 	}
2983 
2984 	/* initialize vgen_versions supported */
2985 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2986 	vgen_reset_vnet_proto_ops(ldcp);
2987 
2988 	/* link it into the list of channels for this port */
2989 	WRITE_ENTER(&ldclp->rwlock);
2990 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2991 	ldcp->nextp = *prev_ldcp;
2992 	*prev_ldcp = ldcp;
2993 	RW_EXIT(&ldclp->rwlock);
2994 
2995 	ldcp->link_state = LINK_STATE_UNKNOWN;
2996 #ifdef	VNET_IOC_DEBUG
2997 	ldcp->link_down_forced = B_FALSE;
2998 #endif
2999 	ldcp->flags |= CHANNEL_ATTACHED;
3000 	return (DDI_SUCCESS);
3001 
3002 ldc_attach_failed:
3003 	if (attach_state & AST_ldc_reg_cb) {
3004 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3005 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3006 	}
3007 	if (attach_state & AST_create_rcv_thread) {
3008 		if (ldcp->rcv_thread != NULL) {
3009 			vgen_stop_rcv_thread(ldcp);
3010 		}
3011 		mutex_destroy(&ldcp->rcv_thr_lock);
3012 		cv_destroy(&ldcp->rcv_thr_cv);
3013 	}
3014 	if (attach_state & AST_create_rxmblks) {
3015 		vio_mblk_pool_t *fvmp = NULL;
3016 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
3017 		ASSERT(fvmp == NULL);
3018 	}
3019 	if (attach_state & AST_alloc_tx_ring) {
3020 		vgen_free_tx_ring(ldcp);
3021 	}
3022 	if (attach_state & AST_ldc_init) {
3023 		(void) ldc_fini(ldcp->ldc_handle);
3024 	}
3025 	if (attach_state & AST_mutex_init) {
3026 		mutex_destroy(&ldcp->tclock);
3027 		mutex_destroy(&ldcp->txlock);
3028 		mutex_destroy(&ldcp->cblock);
3029 		mutex_destroy(&ldcp->wrlock);
3030 		mutex_destroy(&ldcp->rxlock);
3031 	}
3032 	if (attach_state & AST_ldc_alloc) {
3033 		KMEM_FREE(ldcp);
3034 	}
3035 	return (DDI_FAILURE);
3036 }
3037 
3038 /* detach a channel from the port */
3039 static void
3040 vgen_ldc_detach(vgen_ldc_t *ldcp)
3041 {
3042 	vgen_port_t	*portp;
3043 	vgen_t 		*vgenp;
3044 	vgen_ldc_t 	*pldcp;
3045 	vgen_ldc_t	**prev_ldcp;
3046 	vgen_ldclist_t	*ldclp;
3047 
3048 	portp = ldcp->portp;
3049 	vgenp = portp->vgenp;
3050 	ldclp = &portp->ldclist;
3051 
3052 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
3053 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
3054 		if (pldcp == ldcp) {
3055 			break;
3056 		}
3057 	}
3058 
3059 	if (pldcp == NULL) {
3060 		/* invalid ldcp? */
3061 		return;
3062 	}
3063 
3064 	if (ldcp->ldc_status != LDC_INIT) {
3065 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
3066 	}
3067 
3068 	if (ldcp->flags & CHANNEL_ATTACHED) {
3069 		ldcp->flags &= ~(CHANNEL_ATTACHED);
3070 
3071 		(void) ldc_unreg_callback(ldcp->ldc_handle);
3072 		if (ldcp->rcv_thread != NULL) {
3073 			/* First stop the receive thread */
3074 			vgen_stop_rcv_thread(ldcp);
3075 			mutex_destroy(&ldcp->rcv_thr_lock);
3076 			cv_destroy(&ldcp->rcv_thr_cv);
3077 		}
3078 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
3079 
3080 		vgen_destroy_kstats(ldcp->ksp);
3081 		ldcp->ksp = NULL;
3082 
3083 		/*
3084 		 * if we cannot reclaim all mblks, put this
3085 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
3086 		 * device gets detached (see vgen_uninit()).
3087 		 */
3088 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
3089 
3090 		/* free transmit resources */
3091 		vgen_free_tx_ring(ldcp);
3092 
3093 		(void) ldc_fini(ldcp->ldc_handle);
3094 		mutex_destroy(&ldcp->tclock);
3095 		mutex_destroy(&ldcp->txlock);
3096 		mutex_destroy(&ldcp->cblock);
3097 		mutex_destroy(&ldcp->wrlock);
3098 		mutex_destroy(&ldcp->rxlock);
3099 
3100 		/* unlink it from the list */
3101 		*prev_ldcp = ldcp->nextp;
3102 		KMEM_FREE(ldcp);
3103 	}
3104 }
3105 
3106 /*
3107  * This function allocates transmit resources for the channel.
3108  * The resources consist of a transmit descriptor ring and an associated
3109  * transmit buffer ring.
3110  */
3111 static int
3112 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
3113 {
3114 	void *tbufp;
3115 	ldc_mem_info_t minfo;
3116 	uint32_t txdsize;
3117 	uint32_t tbufsize;
3118 	int status;
3119 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3120 
3121 	ldcp->num_txds = vnet_ntxds;
3122 	txdsize = sizeof (vnet_public_desc_t);
3123 	tbufsize = sizeof (vgen_private_desc_t);
3124 
3125 	/* allocate transmit buffer ring */
3126 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
3127 	if (tbufp == NULL) {
3128 		return (DDI_FAILURE);
3129 	}
3130 
3131 	/* create transmit descriptor ring */
3132 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
3133 	    &ldcp->tx_dhandle);
3134 	if (status) {
3135 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
3136 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3137 		return (DDI_FAILURE);
3138 	}
3139 
3140 	/* get the addr of descripror ring */
3141 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
3142 	if (status) {
3143 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3144 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3145 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3146 		ldcp->tbufp = NULL;
3147 		return (DDI_FAILURE);
3148 	}
3149 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3150 	ldcp->tbufp = tbufp;
3151 
3152 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3153 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3154 
3155 	return (DDI_SUCCESS);
3156 }
3157 
3158 /* Free transmit resources for the channel */
3159 static void
3160 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3161 {
3162 	int tbufsize = sizeof (vgen_private_desc_t);
3163 
3164 	/* free transmit descriptor ring */
3165 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3166 
3167 	/* free transmit buffer ring */
3168 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3169 	ldcp->txdp = ldcp->txdendp = NULL;
3170 	ldcp->tbufp = ldcp->tbufendp = NULL;
3171 }
3172 
3173 /* enable transmit/receive on the channels for the port */
3174 static void
3175 vgen_init_ldcs(vgen_port_t *portp)
3176 {
3177 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3178 	vgen_ldc_t	*ldcp;
3179 
3180 	READ_ENTER(&ldclp->rwlock);
3181 	ldcp =  ldclp->headp;
3182 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3183 		(void) vgen_ldc_init(ldcp);
3184 	}
3185 	RW_EXIT(&ldclp->rwlock);
3186 }
3187 
3188 /* stop transmit/receive on the channels for the port */
3189 static void
3190 vgen_uninit_ldcs(vgen_port_t *portp)
3191 {
3192 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3193 	vgen_ldc_t	*ldcp;
3194 
3195 	READ_ENTER(&ldclp->rwlock);
3196 	ldcp =  ldclp->headp;
3197 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3198 		vgen_ldc_uninit(ldcp);
3199 	}
3200 	RW_EXIT(&ldclp->rwlock);
3201 }
3202 
3203 /* enable transmit/receive on the channel */
3204 static int
3205 vgen_ldc_init(vgen_ldc_t *ldcp)
3206 {
3207 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3208 	ldc_status_t	istatus;
3209 	int		rv;
3210 	uint32_t	retries = 0;
3211 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3212 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3213 	init_state = ST_init;
3214 
3215 	DBG1(vgenp, ldcp, "enter\n");
3216 	LDC_LOCK(ldcp);
3217 
3218 	rv = ldc_open(ldcp->ldc_handle);
3219 	if (rv != 0) {
3220 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3221 		goto ldcinit_failed;
3222 	}
3223 	init_state |= ST_ldc_open;
3224 
3225 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3226 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3227 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3228 		goto ldcinit_failed;
3229 	}
3230 	ldcp->ldc_status = istatus;
3231 
3232 	rv = vgen_init_tbufs(ldcp);
3233 	if (rv != 0) {
3234 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3235 		goto ldcinit_failed;
3236 	}
3237 	init_state |= ST_init_tbufs;
3238 
3239 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3240 	if (rv != 0) {
3241 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3242 		goto ldcinit_failed;
3243 	}
3244 
3245 	init_state |= ST_cb_enable;
3246 
3247 	do {
3248 		rv = ldc_up(ldcp->ldc_handle);
3249 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3250 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3251 			drv_usecwait(VGEN_LDC_UP_DELAY);
3252 		}
3253 		if (retries++ >= vgen_ldcup_retries)
3254 			break;
3255 	} while (rv == EWOULDBLOCK);
3256 
3257 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3258 	if (istatus == LDC_UP) {
3259 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3260 	}
3261 
3262 	ldcp->ldc_status = istatus;
3263 
3264 	/* initialize transmit watchdog timeout */
3265 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3266 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3267 
3268 	ldcp->hphase = -1;
3269 	ldcp->flags |= CHANNEL_STARTED;
3270 
3271 	/* if channel is already UP - start handshake */
3272 	if (istatus == LDC_UP) {
3273 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3274 		if (ldcp->portp != vgenp->vsw_portp) {
3275 			/*
3276 			 * As the channel is up, use this port from now on.
3277 			 */
3278 			(void) atomic_swap_32(
3279 			    &ldcp->portp->use_vsw_port, B_FALSE);
3280 		}
3281 
3282 		/* Initialize local session id */
3283 		ldcp->local_sid = ddi_get_lbolt();
3284 
3285 		/* clear peer session id */
3286 		ldcp->peer_sid = 0;
3287 		ldcp->hretries = 0;
3288 
3289 		/* Initiate Handshake process with peer ldc endpoint */
3290 		vgen_reset_hphase(ldcp);
3291 
3292 		mutex_exit(&ldcp->tclock);
3293 		mutex_exit(&ldcp->txlock);
3294 		mutex_exit(&ldcp->wrlock);
3295 		mutex_exit(&ldcp->rxlock);
3296 		vgen_handshake(vh_nextphase(ldcp));
3297 		mutex_exit(&ldcp->cblock);
3298 	} else {
3299 		LDC_UNLOCK(ldcp);
3300 	}
3301 
3302 	return (DDI_SUCCESS);
3303 
3304 ldcinit_failed:
3305 	if (init_state & ST_cb_enable) {
3306 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3307 	}
3308 	if (init_state & ST_init_tbufs) {
3309 		vgen_uninit_tbufs(ldcp);
3310 	}
3311 	if (init_state & ST_ldc_open) {
3312 		(void) ldc_close(ldcp->ldc_handle);
3313 	}
3314 	LDC_UNLOCK(ldcp);
3315 	DBG1(vgenp, ldcp, "exit\n");
3316 	return (DDI_FAILURE);
3317 }
3318 
3319 /* stop transmit/receive on the channel */
3320 static void
3321 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3322 {
3323 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3324 	int	rv;
3325 	uint_t	retries = 0;
3326 
3327 	DBG1(vgenp, ldcp, "enter\n");
3328 	LDC_LOCK(ldcp);
3329 
3330 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3331 		LDC_UNLOCK(ldcp);
3332 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3333 		return;
3334 	}
3335 
3336 	/* disable further callbacks */
3337 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3338 	if (rv != 0) {
3339 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3340 	}
3341 
3342 	if (vgenp->vsw_portp == ldcp->portp) {
3343 		vio_net_report_err_t rep_err =
3344 		    ldcp->portp->vcb.vio_net_report_err;
3345 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3346 	}
3347 
3348 	/*
3349 	 * clear handshake done bit and wait for pending tx and cb to finish.
3350 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3351 	 */
3352 	ldcp->hphase &= ~(VH_DONE);
3353 	LDC_UNLOCK(ldcp);
3354 
3355 	/* cancel handshake watchdog timeout */
3356 	if (ldcp->htid) {
3357 		(void) untimeout(ldcp->htid);
3358 		ldcp->htid = 0;
3359 	}
3360 
3361 	if (ldcp->cancel_htid) {
3362 		(void) untimeout(ldcp->cancel_htid);
3363 		ldcp->cancel_htid = 0;
3364 	}
3365 
3366 	/* cancel transmit watchdog timeout */
3367 	if (ldcp->wd_tid) {
3368 		(void) untimeout(ldcp->wd_tid);
3369 		ldcp->wd_tid = 0;
3370 	}
3371 
3372 	drv_usecwait(1000);
3373 
3374 	if (ldcp->rcv_thread != NULL) {
3375 		/*
3376 		 * Note that callbacks have been disabled already(above). The
3377 		 * drain function takes care of the condition when an already
3378 		 * executing callback signals the worker to start processing or
3379 		 * the worker has already been signalled and is in the middle of
3380 		 * processing.
3381 		 */
3382 		vgen_drain_rcv_thread(ldcp);
3383 	}
3384 
3385 	/* acquire locks again; any pending transmits and callbacks are done */
3386 	LDC_LOCK(ldcp);
3387 
3388 	vgen_reset_hphase(ldcp);
3389 
3390 	vgen_uninit_tbufs(ldcp);
3391 
3392 	/* close the channel - retry on EAGAIN */
3393 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
3394 		if (++retries > vgen_ldccl_retries) {
3395 			break;
3396 		}
3397 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
3398 	}
3399 	if (rv != 0) {
3400 		cmn_err(CE_NOTE,
3401 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
3402 		    vgenp->instance, rv, ldcp->ldc_id);
3403 	}
3404 
3405 	ldcp->ldc_status = LDC_INIT;
3406 	ldcp->flags &= ~(CHANNEL_STARTED);
3407 
3408 	LDC_UNLOCK(ldcp);
3409 
3410 	DBG1(vgenp, ldcp, "exit\n");
3411 }
3412 
3413 /* Initialize the transmit buffer ring for the channel */
3414 static int
3415 vgen_init_tbufs(vgen_ldc_t *ldcp)
3416 {
3417 	vgen_private_desc_t	*tbufp;
3418 	vnet_public_desc_t	*txdp;
3419 	vio_dring_entry_hdr_t		*hdrp;
3420 	int 			i;
3421 	int 			rv;
3422 	caddr_t			datap = NULL;
3423 	int			ci;
3424 	uint32_t		ncookies;
3425 	size_t			data_sz;
3426 	vgen_t			*vgenp;
3427 
3428 	vgenp = LDC_TO_VGEN(ldcp);
3429 
3430 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3431 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3432 
3433 	/*
3434 	 * In order to ensure that the number of ldc cookies per descriptor is
3435 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3436 	 * outlined below:
3437 	 *
3438 	 * Align the entire data buffer area to 8K and carve out per descriptor
3439 	 * data buffers starting from this 8K aligned base address.
3440 	 *
3441 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3442 	 * For sizes up to 12K we round up the size to the next 2K.
3443 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3444 	 * 14K could end up needing 3 cookies, with the buffer spread across
3445 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3446 	 */
3447 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3448 	if (data_sz <= VNET_12K) {
3449 		data_sz = VNET_ROUNDUP_2K(data_sz);
3450 	} else {
3451 		data_sz = VNET_ROUNDUP_4K(data_sz);
3452 	}
3453 
3454 	/* allocate extra 8K bytes for alignment */
3455 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3456 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3457 	ldcp->tx_datap = datap;
3458 
3459 
3460 	/* align the starting address of the data area to 8K */
3461 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3462 
3463 	/*
3464 	 * for each private descriptor, allocate a ldc mem_handle which is
3465 	 * required to map the data during transmit, set the flags
3466 	 * to free (available for use by transmit routine).
3467 	 */
3468 
3469 	for (i = 0; i < ldcp->num_txds; i++) {
3470 
3471 		tbufp = &(ldcp->tbufp[i]);
3472 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3473 		    &(tbufp->memhandle));
3474 		if (rv) {
3475 			tbufp->memhandle = 0;
3476 			goto init_tbufs_failed;
3477 		}
3478 
3479 		/*
3480 		 * bind ldc memhandle to the corresponding transmit buffer.
3481 		 */
3482 		ci = ncookies = 0;
3483 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3484 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3485 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3486 		if (rv != 0) {
3487 			goto init_tbufs_failed;
3488 		}
3489 
3490 		/*
3491 		 * successful in binding the handle to tx data buffer.
3492 		 * set datap in the private descr to this buffer.
3493 		 */
3494 		tbufp->datap = datap;
3495 
3496 		if ((ncookies == 0) ||
3497 		    (ncookies > MAX_COOKIES)) {
3498 			goto init_tbufs_failed;
3499 		}
3500 
3501 		for (ci = 1; ci < ncookies; ci++) {
3502 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3503 			    &(tbufp->memcookie[ci]));
3504 			if (rv != 0) {
3505 				goto init_tbufs_failed;
3506 			}
3507 		}
3508 
3509 		tbufp->ncookies = ncookies;
3510 		datap += data_sz;
3511 
3512 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3513 		txdp = &(ldcp->txdp[i]);
3514 		hdrp = &txdp->hdr;
3515 		hdrp->dstate = VIO_DESC_FREE;
3516 		hdrp->ack = B_FALSE;
3517 		tbufp->descp = txdp;
3518 
3519 	}
3520 
3521 	/* reset tbuf walking pointers */
3522 	ldcp->next_tbufp = ldcp->tbufp;
3523 	ldcp->cur_tbufp = ldcp->tbufp;
3524 
3525 	/* initialize tx seqnum and index */
3526 	ldcp->next_txseq = VNET_ISS;
3527 	ldcp->next_txi = 0;
3528 
3529 	ldcp->resched_peer = B_TRUE;
3530 	ldcp->resched_peer_txi = 0;
3531 
3532 	return (DDI_SUCCESS);
3533 
3534 init_tbufs_failed:;
3535 	vgen_uninit_tbufs(ldcp);
3536 	return (DDI_FAILURE);
3537 }
3538 
3539 /* Uninitialize transmit buffer ring for the channel */
3540 static void
3541 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3542 {
3543 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3544 	int 			i;
3545 
3546 	/* for each tbuf (priv_desc), free ldc mem_handle */
3547 	for (i = 0; i < ldcp->num_txds; i++) {
3548 
3549 		tbufp = &(ldcp->tbufp[i]);
3550 
3551 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3552 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3553 			tbufp->datap = NULL;
3554 		}
3555 		if (tbufp->memhandle) {
3556 			(void) ldc_mem_free_handle(tbufp->memhandle);
3557 			tbufp->memhandle = 0;
3558 		}
3559 	}
3560 
3561 	if (ldcp->tx_datap) {
3562 		/* prealloc'd tx data buffer */
3563 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3564 		ldcp->tx_datap = NULL;
3565 		ldcp->tx_data_sz = 0;
3566 	}
3567 
3568 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3569 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3570 }
3571 
3572 /* clobber tx descriptor ring */
3573 static void
3574 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3575 {
3576 	vnet_public_desc_t	*txdp;
3577 	vgen_private_desc_t	*tbufp;
3578 	vio_dring_entry_hdr_t	*hdrp;
3579 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3580 	int i;
3581 #ifdef DEBUG
3582 	int ndone = 0;
3583 #endif
3584 
3585 	for (i = 0; i < ldcp->num_txds; i++) {
3586 
3587 		tbufp = &(ldcp->tbufp[i]);
3588 		txdp = tbufp->descp;
3589 		hdrp = &txdp->hdr;
3590 
3591 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3592 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3593 #ifdef DEBUG
3594 			if (hdrp->dstate == VIO_DESC_DONE)
3595 				ndone++;
3596 #endif
3597 			hdrp->dstate = VIO_DESC_FREE;
3598 			hdrp->ack = B_FALSE;
3599 		}
3600 	}
3601 	/* reset tbuf walking pointers */
3602 	ldcp->next_tbufp = ldcp->tbufp;
3603 	ldcp->cur_tbufp = ldcp->tbufp;
3604 
3605 	/* reset tx seqnum and index */
3606 	ldcp->next_txseq = VNET_ISS;
3607 	ldcp->next_txi = 0;
3608 
3609 	ldcp->resched_peer = B_TRUE;
3610 	ldcp->resched_peer_txi = 0;
3611 
3612 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3613 }
3614 
3615 /* clobber receive descriptor ring */
3616 static void
3617 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3618 {
3619 	ldcp->rx_dhandle = 0;
3620 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3621 	ldcp->rxdp = NULL;
3622 	ldcp->next_rxi = 0;
3623 	ldcp->num_rxds = 0;
3624 	ldcp->next_rxseq = VNET_ISS;
3625 }
3626 
3627 /* initialize receive descriptor ring */
3628 static int
3629 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3630 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3631 {
3632 	int rv;
3633 	ldc_mem_info_t minfo;
3634 
3635 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3636 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3637 	if (rv != 0) {
3638 		return (DDI_FAILURE);
3639 	}
3640 
3641 	/*
3642 	 * sucessfully mapped, now try to
3643 	 * get info about the mapped dring
3644 	 */
3645 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3646 	if (rv != 0) {
3647 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3648 		return (DDI_FAILURE);
3649 	}
3650 
3651 	/*
3652 	 * save ring address, number of descriptors.
3653 	 */
3654 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3655 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3656 	ldcp->num_rxdcookies = ncookies;
3657 	ldcp->num_rxds = num_desc;
3658 	ldcp->next_rxi = 0;
3659 	ldcp->next_rxseq = VNET_ISS;
3660 	ldcp->dring_mtype = minfo.mtype;
3661 
3662 	return (DDI_SUCCESS);
3663 }
3664 
3665 /* get channel statistics */
3666 static uint64_t
3667 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3668 {
3669 	vgen_stats_t *statsp;
3670 	uint64_t val;
3671 
3672 	val = 0;
3673 	statsp = &ldcp->stats;
3674 	switch (stat) {
3675 
3676 	case MAC_STAT_MULTIRCV:
3677 		val = statsp->multircv;
3678 		break;
3679 
3680 	case MAC_STAT_BRDCSTRCV:
3681 		val = statsp->brdcstrcv;
3682 		break;
3683 
3684 	case MAC_STAT_MULTIXMT:
3685 		val = statsp->multixmt;
3686 		break;
3687 
3688 	case MAC_STAT_BRDCSTXMT:
3689 		val = statsp->brdcstxmt;
3690 		break;
3691 
3692 	case MAC_STAT_NORCVBUF:
3693 		val = statsp->norcvbuf;
3694 		break;
3695 
3696 	case MAC_STAT_IERRORS:
3697 		val = statsp->ierrors;
3698 		break;
3699 
3700 	case MAC_STAT_NOXMTBUF:
3701 		val = statsp->noxmtbuf;
3702 		break;
3703 
3704 	case MAC_STAT_OERRORS:
3705 		val = statsp->oerrors;
3706 		break;
3707 
3708 	case MAC_STAT_COLLISIONS:
3709 		break;
3710 
3711 	case MAC_STAT_RBYTES:
3712 		val = statsp->rbytes;
3713 		break;
3714 
3715 	case MAC_STAT_IPACKETS:
3716 		val = statsp->ipackets;
3717 		break;
3718 
3719 	case MAC_STAT_OBYTES:
3720 		val = statsp->obytes;
3721 		break;
3722 
3723 	case MAC_STAT_OPACKETS:
3724 		val = statsp->opackets;
3725 		break;
3726 
3727 	/* stats not relevant to ldc, return 0 */
3728 	case MAC_STAT_IFSPEED:
3729 	case ETHER_STAT_ALIGN_ERRORS:
3730 	case ETHER_STAT_FCS_ERRORS:
3731 	case ETHER_STAT_FIRST_COLLISIONS:
3732 	case ETHER_STAT_MULTI_COLLISIONS:
3733 	case ETHER_STAT_DEFER_XMTS:
3734 	case ETHER_STAT_TX_LATE_COLLISIONS:
3735 	case ETHER_STAT_EX_COLLISIONS:
3736 	case ETHER_STAT_MACXMT_ERRORS:
3737 	case ETHER_STAT_CARRIER_ERRORS:
3738 	case ETHER_STAT_TOOLONG_ERRORS:
3739 	case ETHER_STAT_XCVR_ADDR:
3740 	case ETHER_STAT_XCVR_ID:
3741 	case ETHER_STAT_XCVR_INUSE:
3742 	case ETHER_STAT_CAP_1000FDX:
3743 	case ETHER_STAT_CAP_1000HDX:
3744 	case ETHER_STAT_CAP_100FDX:
3745 	case ETHER_STAT_CAP_100HDX:
3746 	case ETHER_STAT_CAP_10FDX:
3747 	case ETHER_STAT_CAP_10HDX:
3748 	case ETHER_STAT_CAP_ASMPAUSE:
3749 	case ETHER_STAT_CAP_PAUSE:
3750 	case ETHER_STAT_CAP_AUTONEG:
3751 	case ETHER_STAT_ADV_CAP_1000FDX:
3752 	case ETHER_STAT_ADV_CAP_1000HDX:
3753 	case ETHER_STAT_ADV_CAP_100FDX:
3754 	case ETHER_STAT_ADV_CAP_100HDX:
3755 	case ETHER_STAT_ADV_CAP_10FDX:
3756 	case ETHER_STAT_ADV_CAP_10HDX:
3757 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3758 	case ETHER_STAT_ADV_CAP_PAUSE:
3759 	case ETHER_STAT_ADV_CAP_AUTONEG:
3760 	case ETHER_STAT_LP_CAP_1000FDX:
3761 	case ETHER_STAT_LP_CAP_1000HDX:
3762 	case ETHER_STAT_LP_CAP_100FDX:
3763 	case ETHER_STAT_LP_CAP_100HDX:
3764 	case ETHER_STAT_LP_CAP_10FDX:
3765 	case ETHER_STAT_LP_CAP_10HDX:
3766 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3767 	case ETHER_STAT_LP_CAP_PAUSE:
3768 	case ETHER_STAT_LP_CAP_AUTONEG:
3769 	case ETHER_STAT_LINK_ASMPAUSE:
3770 	case ETHER_STAT_LINK_PAUSE:
3771 	case ETHER_STAT_LINK_AUTONEG:
3772 	case ETHER_STAT_LINK_DUPLEX:
3773 	default:
3774 		val = 0;
3775 		break;
3776 
3777 	}
3778 	return (val);
3779 }
3780 
3781 /*
3782  * LDC channel is UP, start handshake process with peer.
3783  */
3784 static void
3785 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3786 {
3787 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3788 
3789 	DBG1(vgenp, ldcp, "enter\n");
3790 
3791 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3792 
3793 	if (ldcp->portp != vgenp->vsw_portp) {
3794 		/*
3795 		 * As the channel is up, use this port from now on.
3796 		 */
3797 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3798 	}
3799 
3800 	/* Initialize local session id */
3801 	ldcp->local_sid = ddi_get_lbolt();
3802 
3803 	/* clear peer session id */
3804 	ldcp->peer_sid = 0;
3805 	ldcp->hretries = 0;
3806 
3807 	if (ldcp->hphase != VH_PHASE0) {
3808 		vgen_handshake_reset(ldcp);
3809 	}
3810 
3811 	/* Initiate Handshake process with peer ldc endpoint */
3812 	vgen_handshake(vh_nextphase(ldcp));
3813 
3814 	DBG1(vgenp, ldcp, "exit\n");
3815 }
3816 
3817 /*
3818  * LDC channel is Reset, terminate connection with peer and try to
3819  * bring the channel up again.
3820  */
3821 static void
3822 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3823 {
3824 	ldc_status_t istatus;
3825 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3826 	int	rv;
3827 
3828 	DBG1(vgenp, ldcp, "enter\n");
3829 
3830 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3831 
3832 	if ((ldcp->portp != vgenp->vsw_portp) &&
3833 	    (vgenp->vsw_portp != NULL)) {
3834 		/*
3835 		 * As the channel is down, use the switch port until
3836 		 * the channel becomes ready to be used.
3837 		 */
3838 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3839 	}
3840 
3841 	if (vgenp->vsw_portp == ldcp->portp) {
3842 		vio_net_report_err_t rep_err =
3843 		    ldcp->portp->vcb.vio_net_report_err;
3844 
3845 		/* Post a reset message */
3846 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3847 	}
3848 
3849 	if (ldcp->hphase != VH_PHASE0) {
3850 		vgen_handshake_reset(ldcp);
3851 	}
3852 
3853 	/* try to bring the channel up */
3854 #ifdef	VNET_IOC_DEBUG
3855 	if (ldcp->link_down_forced == B_FALSE) {
3856 		rv = ldc_up(ldcp->ldc_handle);
3857 		if (rv != 0) {
3858 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3859 		}
3860 	}
3861 #else
3862 	rv = ldc_up(ldcp->ldc_handle);
3863 	if (rv != 0) {
3864 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3865 	}
3866 #endif
3867 
3868 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3869 		DWARN(vgenp, ldcp, "ldc_status err\n");
3870 	} else {
3871 		ldcp->ldc_status = istatus;
3872 	}
3873 
3874 	/* if channel is already UP - restart handshake */
3875 	if (ldcp->ldc_status == LDC_UP) {
3876 		vgen_handle_evt_up(ldcp);
3877 	}
3878 
3879 	DBG1(vgenp, ldcp, "exit\n");
3880 }
3881 
3882 /* Interrupt handler for the channel */
3883 static uint_t
3884 vgen_ldc_cb(uint64_t event, caddr_t arg)
3885 {
3886 	_NOTE(ARGUNUSED(event))
3887 	vgen_ldc_t	*ldcp;
3888 	vgen_t		*vgenp;
3889 	ldc_status_t 	istatus;
3890 	vgen_stats_t	*statsp;
3891 	timeout_id_t	cancel_htid = 0;
3892 	uint_t		ret = LDC_SUCCESS;
3893 
3894 	ldcp = (vgen_ldc_t *)arg;
3895 	vgenp = LDC_TO_VGEN(ldcp);
3896 	statsp = &ldcp->stats;
3897 
3898 	DBG1(vgenp, ldcp, "enter\n");
3899 
3900 	mutex_enter(&ldcp->cblock);
3901 	statsp->callbacks++;
3902 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3903 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3904 		    ldcp->ldc_status);
3905 		mutex_exit(&ldcp->cblock);
3906 		return (LDC_SUCCESS);
3907 	}
3908 
3909 	/*
3910 	 * cache cancel_htid before the events specific
3911 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3912 	 * as it is also used to indicate the timer to quit immediately.
3913 	 */
3914 	cancel_htid = ldcp->cancel_htid;
3915 
3916 	/*
3917 	 * NOTE: not using switch() as event could be triggered by
3918 	 * a state change and a read request. Also the ordering	of the
3919 	 * check for the event types is deliberate.
3920 	 */
3921 	if (event & LDC_EVT_UP) {
3922 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3923 			DWARN(vgenp, ldcp, "ldc_status err\n");
3924 			/* status couldn't be determined */
3925 			ret = LDC_FAILURE;
3926 			goto ldc_cb_ret;
3927 		}
3928 		ldcp->ldc_status = istatus;
3929 		if (ldcp->ldc_status != LDC_UP) {
3930 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3931 			    " but ldc status is not UP(0x%x)\n",
3932 			    ldcp->ldc_status);
3933 			/* spurious interrupt, return success */
3934 			goto ldc_cb_ret;
3935 		}
3936 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3937 		    event, ldcp->ldc_status);
3938 
3939 		vgen_handle_evt_up(ldcp);
3940 
3941 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3942 	}
3943 
3944 	/* Handle RESET/DOWN before READ event */
3945 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3946 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3947 			DWARN(vgenp, ldcp, "ldc_status error\n");
3948 			/* status couldn't be determined */
3949 			ret = LDC_FAILURE;
3950 			goto ldc_cb_ret;
3951 		}
3952 		ldcp->ldc_status = istatus;
3953 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3954 		    event, ldcp->ldc_status);
3955 
3956 		vgen_handle_evt_reset(ldcp);
3957 
3958 		/*
3959 		 * As the channel is down/reset, ignore READ event
3960 		 * but print a debug warning message.
3961 		 */
3962 		if (event & LDC_EVT_READ) {
3963 			DWARN(vgenp, ldcp,
3964 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3965 			event &= ~LDC_EVT_READ;
3966 		}
3967 	}
3968 
3969 	if (event & LDC_EVT_READ) {
3970 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3971 		    event, ldcp->ldc_status);
3972 
3973 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3974 
3975 		if (ldcp->rcv_thread != NULL) {
3976 			/*
3977 			 * If the receive thread is enabled, then
3978 			 * wakeup the receive thread to process the
3979 			 * LDC messages.
3980 			 */
3981 			mutex_exit(&ldcp->cblock);
3982 			mutex_enter(&ldcp->rcv_thr_lock);
3983 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3984 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3985 				cv_signal(&ldcp->rcv_thr_cv);
3986 			}
3987 			mutex_exit(&ldcp->rcv_thr_lock);
3988 			mutex_enter(&ldcp->cblock);
3989 		} else  {
3990 			vgen_handle_evt_read(ldcp);
3991 		}
3992 	}
3993 
3994 ldc_cb_ret:
3995 	/*
3996 	 * Check to see if the status of cancel_htid has
3997 	 * changed. If another timer needs to be cancelled,
3998 	 * then let the next callback to clear it.
3999 	 */
4000 	if (cancel_htid == 0) {
4001 		cancel_htid = ldcp->cancel_htid;
4002 	}
4003 	mutex_exit(&ldcp->cblock);
4004 
4005 	if (cancel_htid) {
4006 		/*
4007 		 * Cancel handshake timer.
4008 		 * untimeout(9F) will not return until the pending callback is
4009 		 * cancelled or has run. No problems will result from calling
4010 		 * untimeout if the handler has already completed.
4011 		 * If the timeout handler did run, then it would just
4012 		 * return as cancel_htid is set.
4013 		 */
4014 		DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n", cancel_htid);
4015 		(void) untimeout(cancel_htid);
4016 		mutex_enter(&ldcp->cblock);
4017 		/* clear it only if its the same as the one we cancelled */
4018 		if (ldcp->cancel_htid == cancel_htid) {
4019 			ldcp->cancel_htid = 0;
4020 		}
4021 		mutex_exit(&ldcp->cblock);
4022 	}
4023 	DBG1(vgenp, ldcp, "exit\n");
4024 	return (ret);
4025 }
4026 
4027 static void
4028 vgen_handle_evt_read(vgen_ldc_t *ldcp)
4029 {
4030 	int		rv;
4031 	uint64_t	*ldcmsg;
4032 	size_t		msglen;
4033 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4034 	vio_msg_tag_t	*tagp;
4035 	ldc_status_t 	istatus;
4036 	boolean_t 	has_data;
4037 
4038 	DBG1(vgenp, ldcp, "enter\n");
4039 
4040 	ldcmsg = ldcp->ldcmsg;
4041 	/*
4042 	 * If the receive thread is enabled, then the cblock
4043 	 * need to be acquired here. If not, the vgen_ldc_cb()
4044 	 * calls this function with cblock held already.
4045 	 */
4046 	if (ldcp->rcv_thread != NULL) {
4047 		mutex_enter(&ldcp->cblock);
4048 	} else {
4049 		ASSERT(MUTEX_HELD(&ldcp->cblock));
4050 	}
4051 
4052 vgen_evt_read:
4053 	do {
4054 		msglen = ldcp->msglen;
4055 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
4056 
4057 		if (rv != 0) {
4058 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
4059 			    rv, msglen);
4060 			if (rv == ECONNRESET)
4061 				goto vgen_evtread_error;
4062 			break;
4063 		}
4064 		if (msglen == 0) {
4065 			DBG2(vgenp, ldcp, "ldc_read NODATA");
4066 			break;
4067 		}
4068 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
4069 
4070 		tagp = (vio_msg_tag_t *)ldcmsg;
4071 
4072 		if (ldcp->peer_sid) {
4073 			/*
4074 			 * check sid only after we have received peer's sid
4075 			 * in the version negotiate msg.
4076 			 */
4077 #ifdef DEBUG
4078 			if (vgen_hdbg & HDBG_BAD_SID) {
4079 				/* simulate bad sid condition */
4080 				tagp->vio_sid = 0;
4081 				vgen_hdbg &= ~(HDBG_BAD_SID);
4082 			}
4083 #endif
4084 			rv = vgen_check_sid(ldcp, tagp);
4085 			if (rv != VGEN_SUCCESS) {
4086 				/*
4087 				 * If sid mismatch is detected,
4088 				 * reset the channel.
4089 				 */
4090 				ldcp->need_ldc_reset = B_TRUE;
4091 				goto vgen_evtread_error;
4092 			}
4093 		}
4094 
4095 		switch (tagp->vio_msgtype) {
4096 		case VIO_TYPE_CTRL:
4097 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
4098 			break;
4099 
4100 		case VIO_TYPE_DATA:
4101 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
4102 			break;
4103 
4104 		case VIO_TYPE_ERR:
4105 			vgen_handle_errmsg(ldcp, tagp);
4106 			break;
4107 
4108 		default:
4109 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
4110 			    tagp->vio_msgtype);
4111 			break;
4112 		}
4113 
4114 		/*
4115 		 * If an error is encountered, stop processing and
4116 		 * handle the error.
4117 		 */
4118 		if (rv != 0) {
4119 			goto vgen_evtread_error;
4120 		}
4121 
4122 	} while (msglen);
4123 
4124 	/* check once more before exiting */
4125 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
4126 	if ((rv == 0) && (has_data == B_TRUE)) {
4127 		DTRACE_PROBE(vgen_chkq);
4128 		goto vgen_evt_read;
4129 	}
4130 
4131 vgen_evtread_error:
4132 	if (rv == ECONNRESET) {
4133 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4134 			DWARN(vgenp, ldcp, "ldc_status err\n");
4135 		} else {
4136 			ldcp->ldc_status = istatus;
4137 		}
4138 		vgen_handle_evt_reset(ldcp);
4139 	} else if (rv) {
4140 		vgen_handshake_retry(ldcp);
4141 	}
4142 
4143 	/*
4144 	 * If the receive thread is enabled, then cancel the
4145 	 * handshake timeout here.
4146 	 */
4147 	if (ldcp->rcv_thread != NULL) {
4148 		timeout_id_t cancel_htid = ldcp->cancel_htid;
4149 
4150 		mutex_exit(&ldcp->cblock);
4151 		if (cancel_htid) {
4152 			/*
4153 			 * Cancel handshake timer. untimeout(9F) will
4154 			 * not return until the pending callback is cancelled
4155 			 * or has run. No problems will result from calling
4156 			 * untimeout if the handler has already completed.
4157 			 * If the timeout handler did run, then it would just
4158 			 * return as cancel_htid is set.
4159 			 */
4160 			DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n",
4161 			    cancel_htid);
4162 			(void) untimeout(cancel_htid);
4163 
4164 			/*
4165 			 * clear it only if its the same as the one we
4166 			 * cancelled
4167 			 */
4168 			mutex_enter(&ldcp->cblock);
4169 			if (ldcp->cancel_htid == cancel_htid) {
4170 				ldcp->cancel_htid = 0;
4171 			}
4172 			mutex_exit(&ldcp->cblock);
4173 		}
4174 	}
4175 
4176 	DBG1(vgenp, ldcp, "exit\n");
4177 }
4178 
4179 /* vgen handshake functions */
4180 
4181 /* change the hphase for the channel to the next phase */
4182 static vgen_ldc_t *
4183 vh_nextphase(vgen_ldc_t *ldcp)
4184 {
4185 	if (ldcp->hphase == VH_PHASE3) {
4186 		ldcp->hphase = VH_DONE;
4187 	} else {
4188 		ldcp->hphase++;
4189 	}
4190 	return (ldcp);
4191 }
4192 
4193 /*
4194  * wrapper routine to send the given message over ldc using ldc_write().
4195  */
4196 static int
4197 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4198     boolean_t caller_holds_lock)
4199 {
4200 	int			rv;
4201 	size_t			len;
4202 	uint32_t		retries = 0;
4203 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4204 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4205 	vio_dring_msg_t		*dmsg;
4206 	vio_raw_data_msg_t	*rmsg;
4207 	boolean_t		data_msg = B_FALSE;
4208 
4209 	len = msglen;
4210 	if ((len == 0) || (msg == NULL))
4211 		return (VGEN_FAILURE);
4212 
4213 	if (!caller_holds_lock) {
4214 		mutex_enter(&ldcp->wrlock);
4215 	}
4216 
4217 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4218 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4219 			dmsg = (vio_dring_msg_t *)tagp;
4220 			dmsg->seq_num = ldcp->next_txseq;
4221 			data_msg = B_TRUE;
4222 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4223 			rmsg = (vio_raw_data_msg_t *)tagp;
4224 			rmsg->seq_num = ldcp->next_txseq;
4225 			data_msg = B_TRUE;
4226 		}
4227 	}
4228 
4229 	do {
4230 		len = msglen;
4231 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4232 		if (retries++ >= vgen_ldcwr_retries)
4233 			break;
4234 	} while (rv == EWOULDBLOCK);
4235 
4236 	if (rv == 0 && data_msg == B_TRUE) {
4237 		ldcp->next_txseq++;
4238 	}
4239 
4240 	if (!caller_holds_lock) {
4241 		mutex_exit(&ldcp->wrlock);
4242 	}
4243 
4244 	if (rv != 0) {
4245 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4246 		    rv, msglen);
4247 		return (rv);
4248 	}
4249 
4250 	if (len != msglen) {
4251 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4252 		    rv, msglen);
4253 		return (VGEN_FAILURE);
4254 	}
4255 
4256 	return (VGEN_SUCCESS);
4257 }
4258 
4259 /* send version negotiate message to the peer over ldc */
4260 static int
4261 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4262 {
4263 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4264 	vio_ver_msg_t	vermsg;
4265 	vio_msg_tag_t	*tagp = &vermsg.tag;
4266 	int		rv;
4267 
4268 	bzero(&vermsg, sizeof (vermsg));
4269 
4270 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4271 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4272 	tagp->vio_subtype_env = VIO_VER_INFO;
4273 	tagp->vio_sid = ldcp->local_sid;
4274 
4275 	/* get version msg payload from ldcp->local */
4276 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4277 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4278 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4279 
4280 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4281 	if (rv != VGEN_SUCCESS) {
4282 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4283 		return (rv);
4284 	}
4285 
4286 	ldcp->hstate |= VER_INFO_SENT;
4287 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4288 	    vermsg.ver_major, vermsg.ver_minor);
4289 
4290 	return (VGEN_SUCCESS);
4291 }
4292 
4293 /* send attr info message to the peer over ldc */
4294 static int
4295 vgen_send_attr_info(vgen_ldc_t *ldcp)
4296 {
4297 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4298 	vnet_attr_msg_t	attrmsg;
4299 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4300 	int		rv;
4301 
4302 	bzero(&attrmsg, sizeof (attrmsg));
4303 
4304 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4305 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4306 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4307 	tagp->vio_sid = ldcp->local_sid;
4308 
4309 	/* get attr msg payload from ldcp->local */
4310 	attrmsg.mtu = ldcp->local_hparams.mtu;
4311 	attrmsg.addr = ldcp->local_hparams.addr;
4312 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4313 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4314 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4315 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
4316 
4317 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4318 	if (rv != VGEN_SUCCESS) {
4319 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4320 		return (rv);
4321 	}
4322 
4323 	ldcp->hstate |= ATTR_INFO_SENT;
4324 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4325 
4326 	return (VGEN_SUCCESS);
4327 }
4328 
4329 /* send descriptor ring register message to the peer over ldc */
4330 static int
4331 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4332 {
4333 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4334 	vio_dring_reg_msg_t	msg;
4335 	vio_msg_tag_t		*tagp = &msg.tag;
4336 	int		rv;
4337 
4338 	bzero(&msg, sizeof (msg));
4339 
4340 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4341 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4342 	tagp->vio_subtype_env = VIO_DRING_REG;
4343 	tagp->vio_sid = ldcp->local_sid;
4344 
4345 	/* get dring info msg payload from ldcp->local */
4346 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4347 	    sizeof (ldc_mem_cookie_t));
4348 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4349 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4350 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4351 
4352 	/*
4353 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4354 	 * value and sends it in the ack, which is saved in
4355 	 * vgen_handle_dring_reg().
4356 	 */
4357 	msg.dring_ident = 0;
4358 
4359 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4360 	if (rv != VGEN_SUCCESS) {
4361 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4362 		return (rv);
4363 	}
4364 
4365 	ldcp->hstate |= DRING_INFO_SENT;
4366 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4367 
4368 	return (VGEN_SUCCESS);
4369 }
4370 
4371 static int
4372 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4373 {
4374 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4375 	vio_rdx_msg_t	rdxmsg;
4376 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4377 	int		rv;
4378 
4379 	bzero(&rdxmsg, sizeof (rdxmsg));
4380 
4381 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4382 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4383 	tagp->vio_subtype_env = VIO_RDX;
4384 	tagp->vio_sid = ldcp->local_sid;
4385 
4386 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4387 	if (rv != VGEN_SUCCESS) {
4388 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4389 		return (rv);
4390 	}
4391 
4392 	ldcp->hstate |= RDX_INFO_SENT;
4393 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4394 
4395 	return (VGEN_SUCCESS);
4396 }
4397 
4398 /* send descriptor ring data message to the peer over ldc */
4399 static int
4400 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4401 {
4402 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4403 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4404 	vio_msg_tag_t	*tagp = &msgp->tag;
4405 	vgen_stats_t	*statsp = &ldcp->stats;
4406 	int		rv;
4407 
4408 	bzero(msgp, sizeof (*msgp));
4409 
4410 	tagp->vio_msgtype = VIO_TYPE_DATA;
4411 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4412 	tagp->vio_subtype_env = VIO_DRING_DATA;
4413 	tagp->vio_sid = ldcp->local_sid;
4414 
4415 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4416 	msgp->start_idx = start;
4417 	msgp->end_idx = end;
4418 
4419 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4420 	if (rv != VGEN_SUCCESS) {
4421 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4422 		return (rv);
4423 	}
4424 
4425 	statsp->dring_data_msgs++;
4426 
4427 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4428 
4429 	return (VGEN_SUCCESS);
4430 }
4431 
4432 /* send multicast addr info message to vsw */
4433 static int
4434 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4435 {
4436 	vnet_mcast_msg_t	mcastmsg;
4437 	vnet_mcast_msg_t	*msgp;
4438 	vio_msg_tag_t		*tagp;
4439 	vgen_t			*vgenp;
4440 	struct ether_addr	*mca;
4441 	int			rv;
4442 	int			i;
4443 	uint32_t		size;
4444 	uint32_t		mccount;
4445 	uint32_t		n;
4446 
4447 	msgp = &mcastmsg;
4448 	tagp = &msgp->tag;
4449 	vgenp = LDC_TO_VGEN(ldcp);
4450 
4451 	mccount = vgenp->mccount;
4452 	i = 0;
4453 
4454 	do {
4455 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4456 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4457 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4458 		tagp->vio_sid = ldcp->local_sid;
4459 
4460 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4461 		size = n * sizeof (struct ether_addr);
4462 
4463 		mca = &(vgenp->mctab[i]);
4464 		bcopy(mca, (msgp->mca), size);
4465 		msgp->set = B_TRUE;
4466 		msgp->count = n;
4467 
4468 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4469 		    B_FALSE);
4470 		if (rv != VGEN_SUCCESS) {
4471 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4472 			return (rv);
4473 		}
4474 
4475 		mccount -= n;
4476 		i += n;
4477 
4478 	} while (mccount);
4479 
4480 	return (VGEN_SUCCESS);
4481 }
4482 
4483 /* Initiate Phase 2 of handshake */
4484 static int
4485 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4486 {
4487 	int rv;
4488 	uint32_t ncookies = 0;
4489 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4490 
4491 #ifdef DEBUG
4492 	if (vgen_hdbg & HDBG_OUT_STATE) {
4493 		/* simulate out of state condition */
4494 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4495 		rv = vgen_send_rdx_info(ldcp);
4496 		return (rv);
4497 	}
4498 	if (vgen_hdbg & HDBG_TIMEOUT) {
4499 		/* simulate timeout condition */
4500 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4501 		return (VGEN_SUCCESS);
4502 	}
4503 #endif
4504 	rv = vgen_send_attr_info(ldcp);
4505 	if (rv != VGEN_SUCCESS) {
4506 		return (rv);
4507 	}
4508 
4509 	/* Bind descriptor ring to the channel */
4510 	if (ldcp->num_txdcookies == 0) {
4511 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4512 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4513 		    &ldcp->tx_dcookie, &ncookies);
4514 		if (rv != 0) {
4515 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4516 			    "rv(%x)\n", rv);
4517 			return (rv);
4518 		}
4519 		ASSERT(ncookies == 1);
4520 		ldcp->num_txdcookies = ncookies;
4521 	}
4522 
4523 	/* update local dring_info params */
4524 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4525 	    sizeof (ldc_mem_cookie_t));
4526 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4527 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4528 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4529 
4530 	rv = vgen_send_dring_reg(ldcp);
4531 	if (rv != VGEN_SUCCESS) {
4532 		return (rv);
4533 	}
4534 
4535 	return (VGEN_SUCCESS);
4536 }
4537 
4538 /*
4539  * Set vnet-protocol-version dependent functions based on version.
4540  */
4541 static void
4542 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4543 {
4544 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4545 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4546 
4547 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
4548 		vgen_port_t	*portp = ldcp->portp;
4549 		vnet_t		*vnetp = vgenp->vnetp;
4550 		/*
4551 		 * If the version negotiated with vswitch is >= 1.5 (link
4552 		 * status update support), set the required bits in our
4553 		 * attributes if this vnet device has been configured to get
4554 		 * physical link state updates.
4555 		 */
4556 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
4557 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
4558 		} else {
4559 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
4560 		}
4561 	}
4562 
4563 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4564 		/*
4565 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4566 		 * Support), set the mtu in our attributes to max_frame_size.
4567 		 */
4568 		lp->mtu = vgenp->max_frame_size;
4569 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4570 		/*
4571 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4572 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4573 		 */
4574 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4575 	} else {
4576 		vgen_port_t	*portp = ldcp->portp;
4577 		vnet_t		*vnetp = vgenp->vnetp;
4578 		/*
4579 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4580 		 * We can negotiate that size with those peers provided the
4581 		 * following conditions are true:
4582 		 * - Only pvid is defined for our peer and there are no vids.
4583 		 * - pvids are equal.
4584 		 * If the above conditions are true, then we can send/recv only
4585 		 * untagged frames of max size ETHERMAX.
4586 		 */
4587 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4588 			lp->mtu = ETHERMAX;
4589 		}
4590 	}
4591 
4592 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4593 		/* Versions >= 1.2 */
4594 
4595 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4596 			/*
4597 			 * enable priority routines and pkt mode only if
4598 			 * at least one pri-eth-type is specified in MD.
4599 			 */
4600 
4601 			ldcp->tx = vgen_ldcsend;
4602 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4603 
4604 			/* set xfer mode for vgen_send_attr_info() */
4605 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4606 
4607 		} else {
4608 			/* no priority eth types defined in MD */
4609 
4610 			ldcp->tx = vgen_ldcsend_dring;
4611 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4612 
4613 			/* set xfer mode for vgen_send_attr_info() */
4614 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4615 
4616 		}
4617 	} else {
4618 		/* Versions prior to 1.2  */
4619 
4620 		vgen_reset_vnet_proto_ops(ldcp);
4621 	}
4622 }
4623 
4624 /*
4625  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4626  */
4627 static void
4628 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4629 {
4630 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4631 
4632 	ldcp->tx = vgen_ldcsend_dring;
4633 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4634 
4635 	/* set xfer mode for vgen_send_attr_info() */
4636 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4637 }
4638 
4639 static void
4640 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4641 {
4642 	vgen_ldclist_t	*ldclp;
4643 	vgen_ldc_t	*ldcp;
4644 	vgen_t		*vgenp = portp->vgenp;
4645 	vnet_t		*vnetp = vgenp->vnetp;
4646 
4647 	ldclp = &portp->ldclist;
4648 
4649 	READ_ENTER(&ldclp->rwlock);
4650 
4651 	/*
4652 	 * NOTE: for now, we will assume we have a single channel.
4653 	 */
4654 	if (ldclp->headp == NULL) {
4655 		RW_EXIT(&ldclp->rwlock);
4656 		return;
4657 	}
4658 	ldcp = ldclp->headp;
4659 
4660 	mutex_enter(&ldcp->cblock);
4661 
4662 	/*
4663 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4664 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4665 	 */
4666 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4667 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4668 		ldcp->need_ldc_reset = B_TRUE;
4669 		vgen_handshake_retry(ldcp);
4670 	}
4671 
4672 	mutex_exit(&ldcp->cblock);
4673 
4674 	RW_EXIT(&ldclp->rwlock);
4675 }
4676 
4677 static void
4678 vgen_port_reset(vgen_port_t *portp)
4679 {
4680 	vgen_ldclist_t	*ldclp;
4681 	vgen_ldc_t	*ldcp;
4682 
4683 	ldclp = &portp->ldclist;
4684 
4685 	READ_ENTER(&ldclp->rwlock);
4686 
4687 	/*
4688 	 * NOTE: for now, we will assume we have a single channel.
4689 	 */
4690 	if (ldclp->headp == NULL) {
4691 		RW_EXIT(&ldclp->rwlock);
4692 		return;
4693 	}
4694 	ldcp = ldclp->headp;
4695 
4696 	mutex_enter(&ldcp->cblock);
4697 
4698 	ldcp->need_ldc_reset = B_TRUE;
4699 	vgen_handshake_retry(ldcp);
4700 
4701 	mutex_exit(&ldcp->cblock);
4702 
4703 	RW_EXIT(&ldclp->rwlock);
4704 }
4705 
4706 static void
4707 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4708 {
4709 	vgen_port_t	*portp;
4710 	vgen_portlist_t	*plistp;
4711 
4712 	plistp = &(vgenp->vgenports);
4713 	READ_ENTER(&plistp->rwlock);
4714 
4715 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4716 
4717 		vgen_vlan_unaware_port_reset(portp);
4718 
4719 	}
4720 
4721 	RW_EXIT(&plistp->rwlock);
4722 }
4723 
4724 static void
4725 vgen_reset_vsw_port(vgen_t *vgenp)
4726 {
4727 	vgen_port_t	*portp;
4728 
4729 	if ((portp = vgenp->vsw_portp) != NULL) {
4730 		vgen_port_reset(portp);
4731 	}
4732 }
4733 
4734 /*
4735  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4736  * This can happen after a channel comes up (status: LDC_UP) or
4737  * when handshake gets terminated due to various conditions.
4738  */
4739 static void
4740 vgen_reset_hphase(vgen_ldc_t *ldcp)
4741 {
4742 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4743 	ldc_status_t istatus;
4744 	int rv;
4745 
4746 	DBG1(vgenp, ldcp, "enter\n");
4747 	/* reset hstate and hphase */
4748 	ldcp->hstate = 0;
4749 	ldcp->hphase = VH_PHASE0;
4750 
4751 	vgen_reset_vnet_proto_ops(ldcp);
4752 
4753 	/*
4754 	 * Save the id of pending handshake timer in cancel_htid.
4755 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4756 	 * be cancelled after releasing cblock.
4757 	 */
4758 	if (ldcp->htid) {
4759 		ldcp->cancel_htid = ldcp->htid;
4760 		ldcp->htid = 0;
4761 	}
4762 
4763 	if (ldcp->local_hparams.dring_ready) {
4764 		ldcp->local_hparams.dring_ready = B_FALSE;
4765 	}
4766 
4767 	/* Unbind tx descriptor ring from the channel */
4768 	if (ldcp->num_txdcookies) {
4769 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4770 		if (rv != 0) {
4771 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4772 		}
4773 		ldcp->num_txdcookies = 0;
4774 	}
4775 
4776 	if (ldcp->peer_hparams.dring_ready) {
4777 		ldcp->peer_hparams.dring_ready = B_FALSE;
4778 		/* Unmap peer's dring */
4779 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4780 		vgen_clobber_rxds(ldcp);
4781 	}
4782 
4783 	vgen_clobber_tbufs(ldcp);
4784 
4785 	/*
4786 	 * clear local handshake params and initialize.
4787 	 */
4788 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4789 
4790 	/* set version to the highest version supported */
4791 	ldcp->local_hparams.ver_major =
4792 	    ldcp->vgen_versions[0].ver_major;
4793 	ldcp->local_hparams.ver_minor =
4794 	    ldcp->vgen_versions[0].ver_minor;
4795 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4796 
4797 	/* set attr_info params */
4798 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4799 	ldcp->local_hparams.addr =
4800 	    vnet_macaddr_strtoul(vgenp->macaddr);
4801 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4802 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4803 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4804 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
4805 
4806 	/*
4807 	 * Note: dring is created, but not bound yet.
4808 	 * local dring_info params will be updated when we bind the dring in
4809 	 * vgen_handshake_phase2().
4810 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4811 	 * value and sends it in the ack, which is saved in
4812 	 * vgen_handle_dring_reg().
4813 	 */
4814 	ldcp->local_hparams.dring_ident = 0;
4815 
4816 	/* clear peer_hparams */
4817 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4818 
4819 	/* reset the channel if required */
4820 #ifdef	VNET_IOC_DEBUG
4821 	if (ldcp->need_ldc_reset && !ldcp->link_down_forced) {
4822 #else
4823 	if (ldcp->need_ldc_reset) {
4824 #endif
4825 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4826 		ldcp->need_ldc_reset = B_FALSE;
4827 		(void) ldc_down(ldcp->ldc_handle);
4828 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4829 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4830 		ldcp->ldc_status = istatus;
4831 
4832 		/* clear sids */
4833 		ldcp->local_sid = 0;
4834 		ldcp->peer_sid = 0;
4835 
4836 		/* try to bring the channel up */
4837 		rv = ldc_up(ldcp->ldc_handle);
4838 		if (rv != 0) {
4839 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4840 		}
4841 
4842 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4843 			DWARN(vgenp, ldcp, "ldc_status err\n");
4844 		} else {
4845 			ldcp->ldc_status = istatus;
4846 		}
4847 	}
4848 }
4849 
4850 /* wrapper function for vgen_reset_hphase */
4851 static void
4852 vgen_handshake_reset(vgen_ldc_t *ldcp)
4853 {
4854 	vgen_t  *vgenp = LDC_TO_VGEN(ldcp);
4855 
4856 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4857 	mutex_enter(&ldcp->rxlock);
4858 	mutex_enter(&ldcp->wrlock);
4859 	mutex_enter(&ldcp->txlock);
4860 	mutex_enter(&ldcp->tclock);
4861 
4862 	vgen_reset_hphase(ldcp);
4863 
4864 	mutex_exit(&ldcp->tclock);
4865 	mutex_exit(&ldcp->txlock);
4866 	mutex_exit(&ldcp->wrlock);
4867 	mutex_exit(&ldcp->rxlock);
4868 
4869 	/*
4870 	 * As the connection is now reset, mark the channel
4871 	 * link_state as 'down' and notify the stack if needed.
4872 	 */
4873 	if (ldcp->link_state != LINK_STATE_DOWN) {
4874 		ldcp->link_state = LINK_STATE_DOWN;
4875 
4876 		if (ldcp->portp == vgenp->vsw_portp) { /* vswitch port ? */
4877 			/*
4878 			 * As the channel link is down, mark physical link also
4879 			 * as down. After the channel comes back up and
4880 			 * handshake completes, we will get an update on the
4881 			 * physlink state from vswitch (if this device has been
4882 			 * configured to get phys link updates).
4883 			 */
4884 			vgenp->phys_link_state = LINK_STATE_DOWN;
4885 
4886 			/* Now update the stack */
4887 			mutex_exit(&ldcp->cblock);
4888 			vgen_link_update(vgenp, ldcp->link_state);
4889 			mutex_enter(&ldcp->cblock);
4890 		}
4891 	}
4892 }
4893 
4894 /*
4895  * Initiate handshake with the peer by sending various messages
4896  * based on the handshake-phase that the channel is currently in.
4897  */
4898 static void
4899 vgen_handshake(vgen_ldc_t *ldcp)
4900 {
4901 	uint32_t	hphase = ldcp->hphase;
4902 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4903 	ldc_status_t	istatus;
4904 	int		rv = 0;
4905 
4906 	switch (hphase) {
4907 
4908 	case VH_PHASE1:
4909 
4910 		/*
4911 		 * start timer, for entire handshake process, turn this timer
4912 		 * off if all phases of handshake complete successfully and
4913 		 * hphase goes to VH_DONE(below) or
4914 		 * vgen_reset_hphase() gets called or
4915 		 * channel is reset due to errors or
4916 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4917 		 */
4918 		ASSERT(ldcp->htid == 0);
4919 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4920 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4921 
4922 		/* Phase 1 involves negotiating the version */
4923 		rv = vgen_send_version_negotiate(ldcp);
4924 		break;
4925 
4926 	case VH_PHASE2:
4927 		rv = vgen_handshake_phase2(ldcp);
4928 		break;
4929 
4930 	case VH_PHASE3:
4931 		rv = vgen_send_rdx_info(ldcp);
4932 		break;
4933 
4934 	case VH_DONE:
4935 		/*
4936 		 * Save the id of pending handshake timer in cancel_htid.
4937 		 * This will be checked in vgen_ldc_cb() and the handshake
4938 		 * timer will be cancelled after releasing cblock.
4939 		 */
4940 		if (ldcp->htid) {
4941 			ldcp->cancel_htid = ldcp->htid;
4942 			ldcp->htid = 0;
4943 		}
4944 		ldcp->hretries = 0;
4945 		DBG1(vgenp, ldcp, "Handshake Done\n");
4946 
4947 		/*
4948 		 * The channel is up and handshake is done successfully. Now we
4949 		 * can mark the channel link_state as 'up'. We also notify the
4950 		 * stack if the channel is connected to vswitch.
4951 		 */
4952 		ldcp->link_state = LINK_STATE_UP;
4953 
4954 		if (ldcp->portp == vgenp->vsw_portp) {
4955 			/*
4956 			 * If this channel(port) is connected to vsw,
4957 			 * need to sync multicast table with vsw.
4958 			 */
4959 			mutex_exit(&ldcp->cblock);
4960 
4961 			mutex_enter(&vgenp->lock);
4962 			rv = vgen_send_mcast_info(ldcp);
4963 			mutex_exit(&vgenp->lock);
4964 
4965 			if (vgenp->pls_negotiated == B_FALSE) {
4966 				/*
4967 				 * We haven't negotiated with vswitch to get
4968 				 * physical link state updates. We can update
4969 				 * update the stack at this point as the
4970 				 * channel to vswitch is up and the handshake
4971 				 * is done successfully.
4972 				 *
4973 				 * If we have negotiated to get physical link
4974 				 * state updates, then we won't notify the
4975 				 * the stack here; we do that as soon as
4976 				 * vswitch sends us the initial phys link state
4977 				 * (see vgen_handle_physlink_info()).
4978 				 */
4979 				vgen_link_update(vgenp, ldcp->link_state);
4980 			}
4981 
4982 			mutex_enter(&ldcp->cblock);
4983 			if (rv != VGEN_SUCCESS)
4984 				break;
4985 		}
4986 
4987 		/*
4988 		 * Check if mac layer should be notified to restart
4989 		 * transmissions. This can happen if the channel got
4990 		 * reset and vgen_clobber_tbufs() is called, while
4991 		 * need_resched is set.
4992 		 */
4993 		mutex_enter(&ldcp->tclock);
4994 		if (ldcp->need_resched) {
4995 			vio_net_tx_update_t vtx_update =
4996 			    ldcp->portp->vcb.vio_net_tx_update;
4997 
4998 			ldcp->need_resched = B_FALSE;
4999 			vtx_update(ldcp->portp->vhp);
5000 		}
5001 		mutex_exit(&ldcp->tclock);
5002 
5003 		break;
5004 
5005 	default:
5006 		break;
5007 	}
5008 
5009 	if (rv == ECONNRESET) {
5010 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5011 			DWARN(vgenp, ldcp, "ldc_status err\n");
5012 		} else {
5013 			ldcp->ldc_status = istatus;
5014 		}
5015 		vgen_handle_evt_reset(ldcp);
5016 	} else if (rv) {
5017 		vgen_handshake_reset(ldcp);
5018 	}
5019 }
5020 
5021 /*
5022  * Check if the current handshake phase has completed successfully and
5023  * return the status.
5024  */
5025 static int
5026 vgen_handshake_done(vgen_ldc_t *ldcp)
5027 {
5028 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5029 	uint32_t	hphase = ldcp->hphase;
5030 	int 		status = 0;
5031 
5032 	switch (hphase) {
5033 
5034 	case VH_PHASE1:
5035 		/*
5036 		 * Phase1 is done, if version negotiation
5037 		 * completed successfully.
5038 		 */
5039 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
5040 		    VER_NEGOTIATED);
5041 		break;
5042 
5043 	case VH_PHASE2:
5044 		/*
5045 		 * Phase 2 is done, if attr info and dring info
5046 		 * have been exchanged successfully.
5047 		 */
5048 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
5049 		    ATTR_INFO_EXCHANGED) &&
5050 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
5051 		    DRING_INFO_EXCHANGED));
5052 		break;
5053 
5054 	case VH_PHASE3:
5055 		/* Phase 3 is done, if rdx msg has been exchanged */
5056 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
5057 		    RDX_EXCHANGED);
5058 		break;
5059 
5060 	default:
5061 		break;
5062 	}
5063 
5064 	if (status == 0) {
5065 		return (VGEN_FAILURE);
5066 	}
5067 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
5068 	return (VGEN_SUCCESS);
5069 }
5070 
5071 /* retry handshake on failure */
5072 static void
5073 vgen_handshake_retry(vgen_ldc_t *ldcp)
5074 {
5075 	/* reset handshake phase */
5076 	vgen_handshake_reset(ldcp);
5077 
5078 	/* handshake retry is specified and the channel is UP */
5079 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
5080 		if (ldcp->hretries++ < vgen_max_hretries) {
5081 			ldcp->local_sid = ddi_get_lbolt();
5082 			vgen_handshake(vh_nextphase(ldcp));
5083 		}
5084 	}
5085 }
5086 
5087 
5088 /*
5089  * Link State Update Notes:
5090  * The link state of the channel connected to vswitch is reported as the link
5091  * state of the vnet device, by default. If the channel is down or reset, then
5092  * the link state is marked 'down'. If the channel is 'up' *and* handshake
5093  * between the vnet and vswitch is successful, then the link state is marked
5094  * 'up'. If physical network link state is desired, then the vnet device must
5095  * be configured to get physical link updates and the 'linkprop' property
5096  * in the virtual-device MD node indicates this. As part of attribute exchange
5097  * the vnet device negotiates with the vswitch to obtain physical link state
5098  * updates. If it successfully negotiates, vswitch sends an initial physlink
5099  * msg once the handshake is done and further whenever the physical link state
5100  * changes. Currently we don't have mac layer interfaces to report two distinct
5101  * link states - virtual and physical. Thus, if the vnet has been configured to
5102  * get physical link updates, then the link status will be reported as 'up'
5103  * only when both the virtual and physical links are up.
5104  */
5105 static void
5106 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
5107 {
5108 	vnet_link_update(vgenp->vnetp, link_state);
5109 }
5110 
5111 /*
5112  * Handle a version info msg from the peer or an ACK/NACK from the peer
5113  * to a version info msg that we sent.
5114  */
5115 static int
5116 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5117 {
5118 	vgen_t		*vgenp;
5119 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
5120 	int		ack = 0;
5121 	int		failed = 0;
5122 	int		idx;
5123 	vgen_ver_t	*versions = ldcp->vgen_versions;
5124 	int		rv = 0;
5125 
5126 	vgenp = LDC_TO_VGEN(ldcp);
5127 	DBG1(vgenp, ldcp, "enter\n");
5128 	switch (tagp->vio_subtype) {
5129 	case VIO_SUBTYPE_INFO:
5130 
5131 		/*  Cache sid of peer if this is the first time */
5132 		if (ldcp->peer_sid == 0) {
5133 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
5134 			    tagp->vio_sid);
5135 			ldcp->peer_sid = tagp->vio_sid;
5136 		}
5137 
5138 		if (ldcp->hphase != VH_PHASE1) {
5139 			/*
5140 			 * If we are not already in VH_PHASE1, reset to
5141 			 * pre-handshake state, and initiate handshake
5142 			 * to the peer too.
5143 			 */
5144 			vgen_handshake_reset(ldcp);
5145 			vgen_handshake(vh_nextphase(ldcp));
5146 		}
5147 		ldcp->hstate |= VER_INFO_RCVD;
5148 
5149 		/* save peer's requested values */
5150 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
5151 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
5152 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
5153 
5154 		if ((vermsg->dev_class != VDEV_NETWORK) &&
5155 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
5156 			/* unsupported dev_class, send NACK */
5157 
5158 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5159 
5160 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5161 			tagp->vio_sid = ldcp->local_sid;
5162 			/* send reply msg back to peer */
5163 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5164 			    sizeof (*vermsg), B_FALSE);
5165 			if (rv != VGEN_SUCCESS) {
5166 				return (rv);
5167 			}
5168 			return (VGEN_FAILURE);
5169 		}
5170 
5171 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
5172 		    vermsg->ver_major,  vermsg->ver_minor);
5173 
5174 		idx = 0;
5175 
5176 		for (;;) {
5177 
5178 			if (vermsg->ver_major > versions[idx].ver_major) {
5179 
5180 				/* nack with next lower version */
5181 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5182 				vermsg->ver_major = versions[idx].ver_major;
5183 				vermsg->ver_minor = versions[idx].ver_minor;
5184 				break;
5185 			}
5186 
5187 			if (vermsg->ver_major == versions[idx].ver_major) {
5188 
5189 				/* major version match - ACK version */
5190 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
5191 				ack = 1;
5192 
5193 				/*
5194 				 * lower minor version to the one this endpt
5195 				 * supports, if necessary
5196 				 */
5197 				if (vermsg->ver_minor >
5198 				    versions[idx].ver_minor) {
5199 					vermsg->ver_minor =
5200 					    versions[idx].ver_minor;
5201 					ldcp->peer_hparams.ver_minor =
5202 					    versions[idx].ver_minor;
5203 				}
5204 				break;
5205 			}
5206 
5207 			idx++;
5208 
5209 			if (idx == VGEN_NUM_VER) {
5210 
5211 				/* no version match - send NACK */
5212 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
5213 				vermsg->ver_major = 0;
5214 				vermsg->ver_minor = 0;
5215 				failed = 1;
5216 				break;
5217 			}
5218 
5219 		}
5220 
5221 		tagp->vio_sid = ldcp->local_sid;
5222 
5223 		/* send reply msg back to peer */
5224 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
5225 		    B_FALSE);
5226 		if (rv != VGEN_SUCCESS) {
5227 			return (rv);
5228 		}
5229 
5230 		if (ack) {
5231 			ldcp->hstate |= VER_ACK_SENT;
5232 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
5233 			    vermsg->ver_major, vermsg->ver_minor);
5234 		}
5235 		if (failed) {
5236 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
5237 			return (VGEN_FAILURE);
5238 		}
5239 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5240 
5241 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5242 
5243 			/* local and peer versions match? */
5244 			ASSERT((ldcp->local_hparams.ver_major ==
5245 			    ldcp->peer_hparams.ver_major) &&
5246 			    (ldcp->local_hparams.ver_minor ==
5247 			    ldcp->peer_hparams.ver_minor));
5248 
5249 			vgen_set_vnet_proto_ops(ldcp);
5250 
5251 			/* move to the next phase */
5252 			vgen_handshake(vh_nextphase(ldcp));
5253 		}
5254 
5255 		break;
5256 
5257 	case VIO_SUBTYPE_ACK:
5258 
5259 		if (ldcp->hphase != VH_PHASE1) {
5260 			/*  This should not happen. */
5261 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
5262 			return (VGEN_FAILURE);
5263 		}
5264 
5265 		/* SUCCESS - we have agreed on a version */
5266 		ldcp->local_hparams.ver_major = vermsg->ver_major;
5267 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
5268 		ldcp->hstate |= VER_ACK_RCVD;
5269 
5270 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
5271 		    vermsg->ver_major,  vermsg->ver_minor);
5272 
5273 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5274 
5275 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5276 
5277 			/* local and peer versions match? */
5278 			ASSERT((ldcp->local_hparams.ver_major ==
5279 			    ldcp->peer_hparams.ver_major) &&
5280 			    (ldcp->local_hparams.ver_minor ==
5281 			    ldcp->peer_hparams.ver_minor));
5282 
5283 			vgen_set_vnet_proto_ops(ldcp);
5284 
5285 			/* move to the next phase */
5286 			vgen_handshake(vh_nextphase(ldcp));
5287 		}
5288 		break;
5289 
5290 	case VIO_SUBTYPE_NACK:
5291 
5292 		if (ldcp->hphase != VH_PHASE1) {
5293 			/*  This should not happen.  */
5294 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
5295 			"Phase(%u)\n", ldcp->hphase);
5296 			return (VGEN_FAILURE);
5297 		}
5298 
5299 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
5300 		    vermsg->ver_major, vermsg->ver_minor);
5301 
5302 		/* check if version in NACK is zero */
5303 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
5304 			/*
5305 			 * Version Negotiation has failed.
5306 			 */
5307 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5308 			return (VGEN_FAILURE);
5309 		}
5310 
5311 		idx = 0;
5312 
5313 		for (;;) {
5314 
5315 			if (vermsg->ver_major > versions[idx].ver_major) {
5316 				/* select next lower version */
5317 
5318 				ldcp->local_hparams.ver_major =
5319 				    versions[idx].ver_major;
5320 				ldcp->local_hparams.ver_minor =
5321 				    versions[idx].ver_minor;
5322 				break;
5323 			}
5324 
5325 			if (vermsg->ver_major == versions[idx].ver_major) {
5326 				/* major version match */
5327 
5328 				ldcp->local_hparams.ver_major =
5329 				    versions[idx].ver_major;
5330 
5331 				ldcp->local_hparams.ver_minor =
5332 				    versions[idx].ver_minor;
5333 				break;
5334 			}
5335 
5336 			idx++;
5337 
5338 			if (idx == VGEN_NUM_VER) {
5339 				/*
5340 				 * no version match.
5341 				 * Version Negotiation has failed.
5342 				 */
5343 				DWARN(vgenp, ldcp,
5344 				    "Version Negotiation Failed\n");
5345 				return (VGEN_FAILURE);
5346 			}
5347 
5348 		}
5349 
5350 		rv = vgen_send_version_negotiate(ldcp);
5351 		if (rv != VGEN_SUCCESS) {
5352 			return (rv);
5353 		}
5354 
5355 		break;
5356 	}
5357 
5358 	DBG1(vgenp, ldcp, "exit\n");
5359 	return (VGEN_SUCCESS);
5360 }
5361 
5362 /* Check if the attributes are supported */
5363 static int
5364 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5365 {
5366 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5367 
5368 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5369 	    (msg->ack_freq > 64) ||
5370 	    (msg->xfer_mode != lp->xfer_mode)) {
5371 		return (VGEN_FAILURE);
5372 	}
5373 
5374 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5375 		/* versions < 1.4, mtu must match */
5376 		if (msg->mtu != lp->mtu) {
5377 			return (VGEN_FAILURE);
5378 		}
5379 	} else {
5380 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5381 		if (msg->mtu < ETHERMAX) {
5382 			return (VGEN_FAILURE);
5383 		}
5384 	}
5385 
5386 	return (VGEN_SUCCESS);
5387 }
5388 
5389 /*
5390  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5391  * to an attr info msg that we sent.
5392  */
5393 static int
5394 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5395 {
5396 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5397 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5398 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5399 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5400 	int		ack = 1;
5401 	int		rv = 0;
5402 	uint32_t	mtu;
5403 
5404 	DBG1(vgenp, ldcp, "enter\n");
5405 	if (ldcp->hphase != VH_PHASE2) {
5406 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5407 		" Invalid Phase(%u)\n",
5408 		    tagp->vio_subtype, ldcp->hphase);
5409 		return (VGEN_FAILURE);
5410 	}
5411 	switch (tagp->vio_subtype) {
5412 	case VIO_SUBTYPE_INFO:
5413 
5414 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5415 		ldcp->hstate |= ATTR_INFO_RCVD;
5416 
5417 		/* save peer's values */
5418 		rp->mtu = msg->mtu;
5419 		rp->addr = msg->addr;
5420 		rp->addr_type = msg->addr_type;
5421 		rp->xfer_mode = msg->xfer_mode;
5422 		rp->ack_freq = msg->ack_freq;
5423 
5424 		rv = vgen_check_attr_info(ldcp, msg);
5425 		if (rv == VGEN_FAILURE) {
5426 			/* unsupported attr, send NACK */
5427 			ack = 0;
5428 		} else {
5429 
5430 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5431 
5432 				/*
5433 				 * Versions >= 1.4:
5434 				 * The mtu is negotiated down to the
5435 				 * minimum of our mtu and peer's mtu.
5436 				 */
5437 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5438 
5439 				/*
5440 				 * If we have received an ack for the attr info
5441 				 * that we sent, then check if the mtu computed
5442 				 * above matches the mtu that the peer had ack'd
5443 				 * (saved in local hparams). If they don't
5444 				 * match, we fail the handshake.
5445 				 */
5446 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5447 					if (mtu != lp->mtu) {
5448 						/* send NACK */
5449 						ack = 0;
5450 					}
5451 				} else {
5452 					/*
5453 					 * Save the mtu computed above in our
5454 					 * attr parameters, so it gets sent in
5455 					 * the attr info from us to the peer.
5456 					 */
5457 					lp->mtu = mtu;
5458 				}
5459 
5460 				/* save the MIN mtu in the msg to be replied */
5461 				msg->mtu = mtu;
5462 
5463 			}
5464 		}
5465 
5466 
5467 		if (ack) {
5468 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5469 		} else {
5470 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5471 		}
5472 		tagp->vio_sid = ldcp->local_sid;
5473 
5474 		/* send reply msg back to peer */
5475 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5476 		    B_FALSE);
5477 		if (rv != VGEN_SUCCESS) {
5478 			return (rv);
5479 		}
5480 
5481 		if (ack) {
5482 			ldcp->hstate |= ATTR_ACK_SENT;
5483 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5484 		} else {
5485 			/* failed */
5486 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5487 			return (VGEN_FAILURE);
5488 		}
5489 
5490 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5491 			vgen_handshake(vh_nextphase(ldcp));
5492 		}
5493 
5494 		break;
5495 
5496 	case VIO_SUBTYPE_ACK:
5497 
5498 		if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
5499 		    ldcp->portp == vgenp->vsw_portp) {
5500 			/*
5501 			 * Versions >= 1.5:
5502 			 * If the vnet device has been configured to get
5503 			 * physical link state updates, check the corresponding
5504 			 * bits in the ack msg, if the peer is vswitch.
5505 			 */
5506 			if (((lp->physlink_update &
5507 			    PHYSLINK_UPDATE_STATE_MASK) ==
5508 			    PHYSLINK_UPDATE_STATE) &&
5509 
5510 			    ((msg->physlink_update &
5511 			    PHYSLINK_UPDATE_STATE_MASK) ==
5512 			    PHYSLINK_UPDATE_STATE_ACK)) {
5513 				vgenp->pls_negotiated = B_TRUE;
5514 			} else {
5515 				vgenp->pls_negotiated = B_FALSE;
5516 			}
5517 		}
5518 
5519 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5520 			/*
5521 			 * Versions >= 1.4:
5522 			 * The ack msg sent by the peer contains the minimum of
5523 			 * our mtu (that we had sent in our attr info) and the
5524 			 * peer's mtu.
5525 			 *
5526 			 * If we have sent an ack for the attr info msg from
5527 			 * the peer, check if the mtu that was computed then
5528 			 * (saved in local hparams) matches the mtu that the
5529 			 * peer has ack'd. If they don't match, we fail the
5530 			 * handshake.
5531 			 */
5532 			if (ldcp->hstate & ATTR_ACK_SENT) {
5533 				if (lp->mtu != msg->mtu) {
5534 					return (VGEN_FAILURE);
5535 				}
5536 			} else {
5537 				/*
5538 				 * If the mtu ack'd by the peer is > our mtu
5539 				 * fail handshake. Otherwise, save the mtu, so
5540 				 * we can validate it when we receive attr info
5541 				 * from our peer.
5542 				 */
5543 				if (msg->mtu > lp->mtu) {
5544 					return (VGEN_FAILURE);
5545 				}
5546 				if (msg->mtu <= lp->mtu) {
5547 					lp->mtu = msg->mtu;
5548 				}
5549 			}
5550 		}
5551 
5552 		ldcp->hstate |= ATTR_ACK_RCVD;
5553 
5554 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5555 
5556 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5557 			vgen_handshake(vh_nextphase(ldcp));
5558 		}
5559 		break;
5560 
5561 	case VIO_SUBTYPE_NACK:
5562 
5563 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5564 		return (VGEN_FAILURE);
5565 	}
5566 	DBG1(vgenp, ldcp, "exit\n");
5567 	return (VGEN_SUCCESS);
5568 }
5569 
5570 /* Check if the dring info msg is ok */
5571 static int
5572 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5573 {
5574 	/* check if msg contents are ok */
5575 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5576 	    sizeof (vnet_public_desc_t))) {
5577 		return (VGEN_FAILURE);
5578 	}
5579 	return (VGEN_SUCCESS);
5580 }
5581 
5582 /*
5583  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5584  * the peer to a dring register msg that we sent.
5585  */
5586 static int
5587 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5588 {
5589 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5590 	ldc_mem_cookie_t dcookie;
5591 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5592 	int ack = 0;
5593 	int rv = 0;
5594 
5595 	DBG1(vgenp, ldcp, "enter\n");
5596 	if (ldcp->hphase < VH_PHASE2) {
5597 		/* dring_info can be rcvd in any of the phases after Phase1 */
5598 		DWARN(vgenp, ldcp,
5599 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5600 		    tagp->vio_subtype, ldcp->hphase);
5601 		return (VGEN_FAILURE);
5602 	}
5603 	switch (tagp->vio_subtype) {
5604 	case VIO_SUBTYPE_INFO:
5605 
5606 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5607 		ldcp->hstate |= DRING_INFO_RCVD;
5608 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5609 
5610 		ASSERT(msg->ncookies == 1);
5611 
5612 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5613 			/*
5614 			 * verified dring info msg to be ok,
5615 			 * now try to map the remote dring.
5616 			 */
5617 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5618 			    msg->descriptor_size, &dcookie,
5619 			    msg->ncookies);
5620 			if (rv == DDI_SUCCESS) {
5621 				/* now we can ack the peer */
5622 				ack = 1;
5623 			}
5624 		}
5625 		if (ack == 0) {
5626 			/* failed, send NACK */
5627 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5628 		} else {
5629 			if (!(ldcp->peer_hparams.dring_ready)) {
5630 
5631 				/* save peer's dring_info values */
5632 				bcopy(&dcookie,
5633 				    &(ldcp->peer_hparams.dring_cookie),
5634 				    sizeof (dcookie));
5635 				ldcp->peer_hparams.num_desc =
5636 				    msg->num_descriptors;
5637 				ldcp->peer_hparams.desc_size =
5638 				    msg->descriptor_size;
5639 				ldcp->peer_hparams.num_dcookies =
5640 				    msg->ncookies;
5641 
5642 				/* set dring_ident for the peer */
5643 				ldcp->peer_hparams.dring_ident =
5644 				    (uint64_t)ldcp->rxdp;
5645 				/* return the dring_ident in ack msg */
5646 				msg->dring_ident =
5647 				    (uint64_t)ldcp->rxdp;
5648 
5649 				ldcp->peer_hparams.dring_ready = B_TRUE;
5650 			}
5651 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5652 		}
5653 		tagp->vio_sid = ldcp->local_sid;
5654 		/* send reply msg back to peer */
5655 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5656 		    B_FALSE);
5657 		if (rv != VGEN_SUCCESS) {
5658 			return (rv);
5659 		}
5660 
5661 		if (ack) {
5662 			ldcp->hstate |= DRING_ACK_SENT;
5663 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5664 		} else {
5665 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5666 			return (VGEN_FAILURE);
5667 		}
5668 
5669 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5670 			vgen_handshake(vh_nextphase(ldcp));
5671 		}
5672 
5673 		break;
5674 
5675 	case VIO_SUBTYPE_ACK:
5676 
5677 		ldcp->hstate |= DRING_ACK_RCVD;
5678 
5679 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5680 
5681 		if (!(ldcp->local_hparams.dring_ready)) {
5682 			/* local dring is now ready */
5683 			ldcp->local_hparams.dring_ready = B_TRUE;
5684 
5685 			/* save dring_ident acked by peer */
5686 			ldcp->local_hparams.dring_ident =
5687 			    msg->dring_ident;
5688 		}
5689 
5690 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5691 			vgen_handshake(vh_nextphase(ldcp));
5692 		}
5693 
5694 		break;
5695 
5696 	case VIO_SUBTYPE_NACK:
5697 
5698 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5699 		return (VGEN_FAILURE);
5700 	}
5701 	DBG1(vgenp, ldcp, "exit\n");
5702 	return (VGEN_SUCCESS);
5703 }
5704 
5705 /*
5706  * Handle a rdx info msg from the peer or an ACK/NACK
5707  * from the peer to a rdx info msg that we sent.
5708  */
5709 static int
5710 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5711 {
5712 	int rv = 0;
5713 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5714 
5715 	DBG1(vgenp, ldcp, "enter\n");
5716 	if (ldcp->hphase != VH_PHASE3) {
5717 		DWARN(vgenp, ldcp,
5718 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5719 		    tagp->vio_subtype, ldcp->hphase);
5720 		return (VGEN_FAILURE);
5721 	}
5722 	switch (tagp->vio_subtype) {
5723 	case VIO_SUBTYPE_INFO:
5724 
5725 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5726 		ldcp->hstate |= RDX_INFO_RCVD;
5727 
5728 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5729 		tagp->vio_sid = ldcp->local_sid;
5730 		/* send reply msg back to peer */
5731 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5732 		    B_FALSE);
5733 		if (rv != VGEN_SUCCESS) {
5734 			return (rv);
5735 		}
5736 
5737 		ldcp->hstate |= RDX_ACK_SENT;
5738 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5739 
5740 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5741 			vgen_handshake(vh_nextphase(ldcp));
5742 		}
5743 
5744 		break;
5745 
5746 	case VIO_SUBTYPE_ACK:
5747 
5748 		ldcp->hstate |= RDX_ACK_RCVD;
5749 
5750 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5751 
5752 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5753 			vgen_handshake(vh_nextphase(ldcp));
5754 		}
5755 		break;
5756 
5757 	case VIO_SUBTYPE_NACK:
5758 
5759 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5760 		return (VGEN_FAILURE);
5761 	}
5762 	DBG1(vgenp, ldcp, "exit\n");
5763 	return (VGEN_SUCCESS);
5764 }
5765 
5766 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5767 static int
5768 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5769 {
5770 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5771 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5772 	struct ether_addr *addrp;
5773 	int count;
5774 	int i;
5775 
5776 	DBG1(vgenp, ldcp, "enter\n");
5777 	switch (tagp->vio_subtype) {
5778 
5779 	case VIO_SUBTYPE_INFO:
5780 
5781 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5782 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5783 		break;
5784 
5785 	case VIO_SUBTYPE_ACK:
5786 
5787 		/* success adding/removing multicast addr */
5788 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5789 		break;
5790 
5791 	case VIO_SUBTYPE_NACK:
5792 
5793 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5794 		if (!(msgp->set)) {
5795 			/* multicast remove request failed */
5796 			break;
5797 		}
5798 
5799 		/* multicast add request failed */
5800 		for (count = 0; count < msgp->count; count++) {
5801 			addrp = &(msgp->mca[count]);
5802 
5803 			/* delete address from the table */
5804 			for (i = 0; i < vgenp->mccount; i++) {
5805 				if (ether_cmp(addrp,
5806 				    &(vgenp->mctab[i])) == 0) {
5807 					if (vgenp->mccount > 1) {
5808 						int t = vgenp->mccount - 1;
5809 						vgenp->mctab[i] =
5810 						    vgenp->mctab[t];
5811 					}
5812 					vgenp->mccount--;
5813 					break;
5814 				}
5815 			}
5816 		}
5817 		break;
5818 
5819 	}
5820 	DBG1(vgenp, ldcp, "exit\n");
5821 
5822 	return (VGEN_SUCCESS);
5823 }
5824 
5825 /*
5826  * Physical link information message from the peer. Only vswitch should send
5827  * us this message; if the vnet device has been configured to get physical link
5828  * state updates. Note that we must have already negotiated this with the
5829  * vswitch during attribute exchange phase of handshake.
5830  */
5831 static int
5832 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5833 {
5834 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5835 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5836 	link_state_t		link_state;
5837 	int			rv;
5838 
5839 	if (ldcp->portp != vgenp->vsw_portp) {
5840 		/*
5841 		 * drop the message and don't process; as we should
5842 		 * receive physlink_info message from only vswitch.
5843 		 */
5844 		return (VGEN_SUCCESS);
5845 	}
5846 
5847 	if (vgenp->pls_negotiated == B_FALSE) {
5848 		/*
5849 		 * drop the message and don't process; as we should receive
5850 		 * physlink_info message only if physlink update is enabled for
5851 		 * the device and negotiated with vswitch.
5852 		 */
5853 		return (VGEN_SUCCESS);
5854 	}
5855 
5856 	switch (tagp->vio_subtype) {
5857 
5858 	case VIO_SUBTYPE_INFO:
5859 
5860 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5861 		    VNET_PHYSLINK_STATE_UP) {
5862 			link_state = LINK_STATE_UP;
5863 		} else {
5864 			link_state = LINK_STATE_DOWN;
5865 		}
5866 
5867 		if (vgenp->phys_link_state != link_state) {
5868 			vgenp->phys_link_state = link_state;
5869 			mutex_exit(&ldcp->cblock);
5870 
5871 			/* Now update the stack */
5872 			vgen_link_update(vgenp, link_state);
5873 
5874 			mutex_enter(&ldcp->cblock);
5875 		}
5876 
5877 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5878 		tagp->vio_sid = ldcp->local_sid;
5879 
5880 		/* send reply msg back to peer */
5881 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5882 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5883 		if (rv != VGEN_SUCCESS) {
5884 			return (rv);
5885 		}
5886 		break;
5887 
5888 	case VIO_SUBTYPE_ACK:
5889 
5890 		/* vnet shouldn't recv physlink acks */
5891 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5892 		break;
5893 
5894 	case VIO_SUBTYPE_NACK:
5895 
5896 		/* vnet shouldn't recv physlink nacks */
5897 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5898 		break;
5899 
5900 	}
5901 	DBG1(vgenp, ldcp, "exit\n");
5902 
5903 	return (VGEN_SUCCESS);
5904 }
5905 
5906 /* handler for control messages received from the peer ldc end-point */
5907 static int
5908 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5909 {
5910 	int rv = 0;
5911 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5912 
5913 	DBG1(vgenp, ldcp, "enter\n");
5914 	switch (tagp->vio_subtype_env) {
5915 
5916 	case VIO_VER_INFO:
5917 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5918 		break;
5919 
5920 	case VIO_ATTR_INFO:
5921 		rv = vgen_handle_attr_info(ldcp, tagp);
5922 		break;
5923 
5924 	case VIO_DRING_REG:
5925 		rv = vgen_handle_dring_reg(ldcp, tagp);
5926 		break;
5927 
5928 	case VIO_RDX:
5929 		rv = vgen_handle_rdx_info(ldcp, tagp);
5930 		break;
5931 
5932 	case VNET_MCAST_INFO:
5933 		rv = vgen_handle_mcast_info(ldcp, tagp);
5934 		break;
5935 
5936 	case VIO_DDS_INFO:
5937 		rv = vgen_dds_rx(ldcp, tagp);
5938 		break;
5939 
5940 	case VNET_PHYSLINK_INFO:
5941 		rv = vgen_handle_physlink_info(ldcp, tagp);
5942 		break;
5943 	}
5944 
5945 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5946 	return (rv);
5947 }
5948 
5949 /* handler for data messages received from the peer ldc end-point */
5950 static int
5951 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5952 {
5953 	int rv = 0;
5954 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5955 
5956 	DBG1(vgenp, ldcp, "enter\n");
5957 
5958 	if (ldcp->hphase != VH_DONE)
5959 		return (rv);
5960 
5961 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5962 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5963 		if (rv != 0) {
5964 			return (rv);
5965 		}
5966 	}
5967 
5968 	switch (tagp->vio_subtype_env) {
5969 	case VIO_DRING_DATA:
5970 		rv = vgen_handle_dring_data(ldcp, tagp);
5971 		break;
5972 
5973 	case VIO_PKT_DATA:
5974 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5975 		break;
5976 	default:
5977 		break;
5978 	}
5979 
5980 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5981 	return (rv);
5982 }
5983 
5984 /*
5985  * dummy pkt data handler function for vnet protocol version 1.0
5986  */
5987 static void
5988 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5989 {
5990 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5991 }
5992 
5993 /*
5994  * This function handles raw pkt data messages received over the channel.
5995  * Currently, only priority-eth-type frames are received through this mechanism.
5996  * In this case, the frame(data) is present within the message itself which
5997  * is copied into an mblk before sending it up the stack.
5998  */
5999 static void
6000 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
6001 {
6002 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
6003 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
6004 	uint32_t		size;
6005 	mblk_t			*mp;
6006 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6007 	vgen_stats_t		*statsp = &ldcp->stats;
6008 	vgen_hparams_t		*lp = &ldcp->local_hparams;
6009 	vio_net_rx_cb_t		vrx_cb;
6010 
6011 	ASSERT(MUTEX_HELD(&ldcp->cblock));
6012 
6013 	mutex_exit(&ldcp->cblock);
6014 
6015 	size = msglen - VIO_PKT_DATA_HDRSIZE;
6016 	if (size < ETHERMIN || size > lp->mtu) {
6017 		(void) atomic_inc_32(&statsp->rx_pri_fail);
6018 		goto exit;
6019 	}
6020 
6021 	mp = vio_multipool_allocb(&ldcp->vmp, size);
6022 	if (mp == NULL) {
6023 		mp = allocb(size, BPRI_MED);
6024 		if (mp == NULL) {
6025 			(void) atomic_inc_32(&statsp->rx_pri_fail);
6026 			DWARN(vgenp, ldcp, "allocb failure, "
6027 			    "unable to process priority frame\n");
6028 			goto exit;
6029 		}
6030 	}
6031 
6032 	/* copy the frame from the payload of raw data msg into the mblk */
6033 	bcopy(pkt->data, mp->b_rptr, size);
6034 	mp->b_wptr = mp->b_rptr + size;
6035 
6036 	/* update stats */
6037 	(void) atomic_inc_64(&statsp->rx_pri_packets);
6038 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
6039 
6040 	/* send up; call vrx_cb() as cblock is already released */
6041 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6042 	vrx_cb(ldcp->portp->vhp, mp);
6043 
6044 exit:
6045 	mutex_enter(&ldcp->cblock);
6046 }
6047 
6048 static int
6049 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
6050     int32_t end, uint8_t pstate)
6051 {
6052 	int rv = 0;
6053 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6054 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
6055 
6056 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
6057 	tagp->vio_sid = ldcp->local_sid;
6058 	msgp->start_idx = start;
6059 	msgp->end_idx = end;
6060 	msgp->dring_process_state = pstate;
6061 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
6062 	if (rv != VGEN_SUCCESS) {
6063 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
6064 	}
6065 	return (rv);
6066 }
6067 
6068 static int
6069 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6070 {
6071 	int rv = 0;
6072 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6073 
6074 
6075 	DBG1(vgenp, ldcp, "enter\n");
6076 	switch (tagp->vio_subtype) {
6077 
6078 	case VIO_SUBTYPE_INFO:
6079 		/*
6080 		 * To reduce the locking contention, release the
6081 		 * cblock here and re-acquire it once we are done
6082 		 * receiving packets.
6083 		 */
6084 		mutex_exit(&ldcp->cblock);
6085 		mutex_enter(&ldcp->rxlock);
6086 		rv = vgen_handle_dring_data_info(ldcp, tagp);
6087 		mutex_exit(&ldcp->rxlock);
6088 		mutex_enter(&ldcp->cblock);
6089 		break;
6090 
6091 	case VIO_SUBTYPE_ACK:
6092 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
6093 		break;
6094 
6095 	case VIO_SUBTYPE_NACK:
6096 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
6097 		break;
6098 	}
6099 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6100 	return (rv);
6101 }
6102 
6103 static int
6104 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6105 {
6106 	uint32_t start;
6107 	int32_t end;
6108 	int rv = 0;
6109 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6110 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6111 #ifdef VGEN_HANDLE_LOST_PKTS
6112 	vgen_stats_t *statsp = &ldcp->stats;
6113 	uint32_t rxi;
6114 	int n;
6115 #endif
6116 
6117 	DBG1(vgenp, ldcp, "enter\n");
6118 
6119 	start = dringmsg->start_idx;
6120 	end = dringmsg->end_idx;
6121 	/*
6122 	 * received a data msg, which contains the start and end
6123 	 * indices of the descriptors within the rx ring holding data,
6124 	 * the seq_num of data packet corresponding to the start index,
6125 	 * and the dring_ident.
6126 	 * We can now read the contents of each of these descriptors
6127 	 * and gather data from it.
6128 	 */
6129 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
6130 	    start, end);
6131 
6132 	/* validate rx start and end indeces */
6133 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
6134 	    !(CHECK_RXI(end, ldcp)))) {
6135 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
6136 		    start, end);
6137 		/* drop the message if invalid index */
6138 		return (rv);
6139 	}
6140 
6141 	/* validate dring_ident */
6142 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
6143 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6144 		    dringmsg->dring_ident);
6145 		/* invalid dring_ident, drop the msg */
6146 		return (rv);
6147 	}
6148 #ifdef DEBUG
6149 	if (vgen_trigger_rxlost) {
6150 		/* drop this msg to simulate lost pkts for debugging */
6151 		vgen_trigger_rxlost = 0;
6152 		return (rv);
6153 	}
6154 #endif
6155 
6156 #ifdef	VGEN_HANDLE_LOST_PKTS
6157 
6158 	/* receive start index doesn't match expected index */
6159 	if (ldcp->next_rxi != start) {
6160 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
6161 		    ldcp->next_rxi, start);
6162 
6163 		/* calculate the number of pkts lost */
6164 		if (start >= ldcp->next_rxi) {
6165 			n = start - ldcp->next_rxi;
6166 		} else  {
6167 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
6168 		}
6169 
6170 		statsp->rx_lost_pkts += n;
6171 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
6172 		tagp->vio_sid = ldcp->local_sid;
6173 		/* indicate the range of lost descriptors */
6174 		dringmsg->start_idx = ldcp->next_rxi;
6175 		rxi = start;
6176 		DECR_RXI(rxi, ldcp);
6177 		dringmsg->end_idx = rxi;
6178 		/* dring ident is left unchanged */
6179 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
6180 		    sizeof (*dringmsg), B_FALSE);
6181 		if (rv != VGEN_SUCCESS) {
6182 			DWARN(vgenp, ldcp,
6183 			    "vgen_sendmsg failed, stype:NACK\n");
6184 			return (rv);
6185 		}
6186 		/*
6187 		 * treat this range of descrs/pkts as dropped
6188 		 * and set the new expected value of next_rxi
6189 		 * and continue(below) to process from the new
6190 		 * start index.
6191 		 */
6192 		ldcp->next_rxi = start;
6193 	}
6194 
6195 #endif	/* VGEN_HANDLE_LOST_PKTS */
6196 
6197 	/* Now receive messages */
6198 	rv = vgen_process_dring_data(ldcp, tagp);
6199 
6200 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6201 	return (rv);
6202 }
6203 
6204 static int
6205 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6206 {
6207 	boolean_t set_ack_start = B_FALSE;
6208 	uint32_t start;
6209 	uint32_t ack_end;
6210 	uint32_t next_rxi;
6211 	uint32_t rxi;
6212 	int count = 0;
6213 	int rv = 0;
6214 	uint32_t retries = 0;
6215 	vgen_stats_t *statsp;
6216 	vnet_public_desc_t rxd;
6217 	vio_dring_entry_hdr_t *hdrp;
6218 	mblk_t *bp = NULL;
6219 	mblk_t *bpt = NULL;
6220 	uint32_t ack_start;
6221 	boolean_t rxd_err = B_FALSE;
6222 	mblk_t *mp = NULL;
6223 	size_t nbytes;
6224 	boolean_t ack_needed = B_FALSE;
6225 	size_t nread;
6226 	uint64_t off = 0;
6227 	struct ether_header *ehp;
6228 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6229 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6230 	vgen_hparams_t	*lp = &ldcp->local_hparams;
6231 
6232 	DBG1(vgenp, ldcp, "enter\n");
6233 
6234 	statsp = &ldcp->stats;
6235 	start = dringmsg->start_idx;
6236 
6237 	/*
6238 	 * start processing the descriptors from the specified
6239 	 * start index, up to the index a descriptor is not ready
6240 	 * to be processed or we process the entire descriptor ring
6241 	 * and wrap around upto the start index.
6242 	 */
6243 
6244 	/* need to set the start index of descriptors to be ack'd */
6245 	set_ack_start = B_TRUE;
6246 
6247 	/* index upto which we have ack'd */
6248 	ack_end = start;
6249 	DECR_RXI(ack_end, ldcp);
6250 
6251 	next_rxi = rxi =  start;
6252 	do {
6253 vgen_recv_retry:
6254 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
6255 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
6256 		if (rv != 0) {
6257 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
6258 			    " rv(%d)\n", rv);
6259 			statsp->ierrors++;
6260 			return (rv);
6261 		}
6262 
6263 		hdrp = &rxd.hdr;
6264 
6265 		if (hdrp->dstate != VIO_DESC_READY) {
6266 			/*
6267 			 * Before waiting and retry here, send up
6268 			 * the packets that are received already
6269 			 */
6270 			if (bp != NULL) {
6271 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6272 				vgen_rx(ldcp, bp);
6273 				count = 0;
6274 				bp = bpt = NULL;
6275 			}
6276 			/*
6277 			 * descriptor is not ready.
6278 			 * retry descriptor acquire, stop processing
6279 			 * after max # retries.
6280 			 */
6281 			if (retries == vgen_recv_retries)
6282 				break;
6283 			retries++;
6284 			drv_usecwait(vgen_recv_delay);
6285 			goto vgen_recv_retry;
6286 		}
6287 		retries = 0;
6288 
6289 		if (set_ack_start) {
6290 			/*
6291 			 * initialize the start index of the range
6292 			 * of descriptors to be ack'd.
6293 			 */
6294 			ack_start = rxi;
6295 			set_ack_start = B_FALSE;
6296 		}
6297 
6298 		if ((rxd.nbytes < ETHERMIN) ||
6299 		    (rxd.nbytes > lp->mtu) ||
6300 		    (rxd.ncookies == 0) ||
6301 		    (rxd.ncookies > MAX_COOKIES)) {
6302 			rxd_err = B_TRUE;
6303 		} else {
6304 			/*
6305 			 * Try to allocate an mblk from the free pool
6306 			 * of recv mblks for the channel.
6307 			 * If this fails, use allocb().
6308 			 */
6309 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
6310 			if (nbytes > ldcp->max_rxpool_size) {
6311 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
6312 				    BPRI_MED);
6313 			} else {
6314 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
6315 				if (mp == NULL) {
6316 					statsp->rx_vio_allocb_fail++;
6317 					/*
6318 					 * Data buffer returned by allocb(9F)
6319 					 * is 8byte aligned. We allocate extra
6320 					 * 8 bytes to ensure size is multiple
6321 					 * of 8 bytes for ldc_mem_copy().
6322 					 */
6323 					mp = allocb(VNET_IPALIGN +
6324 					    rxd.nbytes + 8, BPRI_MED);
6325 				}
6326 			}
6327 		}
6328 		if ((rxd_err) || (mp == NULL)) {
6329 			/*
6330 			 * rxd_err or allocb() failure,
6331 			 * drop this packet, get next.
6332 			 */
6333 			if (rxd_err) {
6334 				statsp->ierrors++;
6335 				rxd_err = B_FALSE;
6336 			} else {
6337 				statsp->rx_allocb_fail++;
6338 			}
6339 
6340 			ack_needed = hdrp->ack;
6341 
6342 			/* set descriptor done bit */
6343 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6344 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6345 			    VIO_DESC_DONE);
6346 			if (rv != 0) {
6347 				DWARN(vgenp, ldcp,
6348 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
6349 				    rv);
6350 				return (rv);
6351 			}
6352 
6353 			if (ack_needed) {
6354 				ack_needed = B_FALSE;
6355 				/*
6356 				 * sender needs ack for this packet,
6357 				 * ack pkts upto this index.
6358 				 */
6359 				ack_end = rxi;
6360 
6361 				rv = vgen_send_dring_ack(ldcp, tagp,
6362 				    ack_start, ack_end,
6363 				    VIO_DP_ACTIVE);
6364 				if (rv != VGEN_SUCCESS) {
6365 					goto error_ret;
6366 				}
6367 
6368 				/* need to set new ack start index */
6369 				set_ack_start = B_TRUE;
6370 			}
6371 			goto vgen_next_rxi;
6372 		}
6373 
6374 		nread = nbytes;
6375 		rv = ldc_mem_copy(ldcp->ldc_handle,
6376 		    (caddr_t)mp->b_rptr, off, &nread,
6377 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
6378 
6379 		/* if ldc_mem_copy() failed */
6380 		if (rv) {
6381 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
6382 			statsp->ierrors++;
6383 			freemsg(mp);
6384 			goto error_ret;
6385 		}
6386 
6387 		ack_needed = hdrp->ack;
6388 
6389 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6390 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6391 		    VIO_DESC_DONE);
6392 		if (rv != 0) {
6393 			DWARN(vgenp, ldcp,
6394 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
6395 			goto error_ret;
6396 		}
6397 
6398 		mp->b_rptr += VNET_IPALIGN;
6399 
6400 		if (ack_needed) {
6401 			ack_needed = B_FALSE;
6402 			/*
6403 			 * sender needs ack for this packet,
6404 			 * ack pkts upto this index.
6405 			 */
6406 			ack_end = rxi;
6407 
6408 			rv = vgen_send_dring_ack(ldcp, tagp,
6409 			    ack_start, ack_end, VIO_DP_ACTIVE);
6410 			if (rv != VGEN_SUCCESS) {
6411 				goto error_ret;
6412 			}
6413 
6414 			/* need to set new ack start index */
6415 			set_ack_start = B_TRUE;
6416 		}
6417 
6418 		if (nread != nbytes) {
6419 			DWARN(vgenp, ldcp,
6420 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6421 			    nread, nbytes);
6422 			statsp->ierrors++;
6423 			freemsg(mp);
6424 			goto vgen_next_rxi;
6425 		}
6426 
6427 		/* point to the actual end of data */
6428 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6429 
6430 		/* update stats */
6431 		statsp->ipackets++;
6432 		statsp->rbytes += rxd.nbytes;
6433 		ehp = (struct ether_header *)mp->b_rptr;
6434 		if (IS_BROADCAST(ehp))
6435 			statsp->brdcstrcv++;
6436 		else if (IS_MULTICAST(ehp))
6437 			statsp->multircv++;
6438 
6439 		/* build a chain of received packets */
6440 		if (bp == NULL) {
6441 			/* first pkt */
6442 			bp = mp;
6443 			bpt = bp;
6444 			bpt->b_next = NULL;
6445 		} else {
6446 			mp->b_next = NULL;
6447 			bpt->b_next = mp;
6448 			bpt = mp;
6449 		}
6450 
6451 		if (count++ > vgen_chain_len) {
6452 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6453 			vgen_rx(ldcp, bp);
6454 			count = 0;
6455 			bp = bpt = NULL;
6456 		}
6457 
6458 vgen_next_rxi:
6459 		/* update end index of range of descrs to be ack'd */
6460 		ack_end = rxi;
6461 
6462 		/* update the next index to be processed */
6463 		INCR_RXI(next_rxi, ldcp);
6464 		if (next_rxi == start) {
6465 			/*
6466 			 * processed the entire descriptor ring upto
6467 			 * the index at which we started.
6468 			 */
6469 			break;
6470 		}
6471 
6472 		rxi = next_rxi;
6473 
6474 	_NOTE(CONSTCOND)
6475 	} while (1);
6476 
6477 	/*
6478 	 * send an ack message to peer indicating that we have stopped
6479 	 * processing descriptors.
6480 	 */
6481 	if (set_ack_start) {
6482 		/*
6483 		 * We have ack'd upto some index and we have not
6484 		 * processed any descriptors beyond that index.
6485 		 * Use the last ack'd index as both the start and
6486 		 * end of range of descrs being ack'd.
6487 		 * Note: This results in acking the last index twice
6488 		 * and should be harmless.
6489 		 */
6490 		ack_start = ack_end;
6491 	}
6492 
6493 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6494 	    VIO_DP_STOPPED);
6495 	if (rv != VGEN_SUCCESS) {
6496 		goto error_ret;
6497 	}
6498 
6499 	/* save new recv index of next dring msg */
6500 	ldcp->next_rxi = next_rxi;
6501 
6502 error_ret:
6503 	/* send up packets received so far */
6504 	if (bp != NULL) {
6505 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6506 		vgen_rx(ldcp, bp);
6507 		bp = bpt = NULL;
6508 	}
6509 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6510 	return (rv);
6511 
6512 }
6513 
6514 static int
6515 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6516 {
6517 	int rv = 0;
6518 	uint32_t start;
6519 	int32_t end;
6520 	uint32_t txi;
6521 	boolean_t ready_txd = B_FALSE;
6522 	vgen_stats_t *statsp;
6523 	vgen_private_desc_t *tbufp;
6524 	vnet_public_desc_t *txdp;
6525 	vio_dring_entry_hdr_t *hdrp;
6526 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6527 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6528 
6529 	DBG1(vgenp, ldcp, "enter\n");
6530 	start = dringmsg->start_idx;
6531 	end = dringmsg->end_idx;
6532 	statsp = &ldcp->stats;
6533 
6534 	/*
6535 	 * received an ack corresponding to a specific descriptor for
6536 	 * which we had set the ACK bit in the descriptor (during
6537 	 * transmit). This enables us to reclaim descriptors.
6538 	 */
6539 
6540 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6541 
6542 	/* validate start and end indeces in the tx ack msg */
6543 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6544 		/* drop the message if invalid index */
6545 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6546 		    start, end);
6547 		return (rv);
6548 	}
6549 	/* validate dring_ident */
6550 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6551 		/* invalid dring_ident, drop the msg */
6552 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6553 		    dringmsg->dring_ident);
6554 		return (rv);
6555 	}
6556 	statsp->dring_data_acks++;
6557 
6558 	/* reclaim descriptors that are done */
6559 	vgen_reclaim(ldcp);
6560 
6561 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6562 		/*
6563 		 * receiver continued processing descriptors after
6564 		 * sending us the ack.
6565 		 */
6566 		return (rv);
6567 	}
6568 
6569 	statsp->dring_stopped_acks++;
6570 
6571 	/* receiver stopped processing descriptors */
6572 	mutex_enter(&ldcp->wrlock);
6573 	mutex_enter(&ldcp->tclock);
6574 
6575 	/*
6576 	 * determine if there are any pending tx descriptors
6577 	 * ready to be processed by the receiver(peer) and if so,
6578 	 * send a message to the peer to restart receiving.
6579 	 */
6580 	ready_txd = B_FALSE;
6581 
6582 	/*
6583 	 * using the end index of the descriptor range for which
6584 	 * we received the ack, check if the next descriptor is
6585 	 * ready.
6586 	 */
6587 	txi = end;
6588 	INCR_TXI(txi, ldcp);
6589 	tbufp = &ldcp->tbufp[txi];
6590 	txdp = tbufp->descp;
6591 	hdrp = &txdp->hdr;
6592 	if (hdrp->dstate == VIO_DESC_READY) {
6593 		ready_txd = B_TRUE;
6594 	} else {
6595 		/*
6596 		 * descr next to the end of ack'd descr range is not
6597 		 * ready.
6598 		 * starting from the current reclaim index, check
6599 		 * if any descriptor is ready.
6600 		 */
6601 
6602 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6603 		tbufp = &ldcp->tbufp[txi];
6604 
6605 		txdp = tbufp->descp;
6606 		hdrp = &txdp->hdr;
6607 		if (hdrp->dstate == VIO_DESC_READY) {
6608 			ready_txd = B_TRUE;
6609 		}
6610 
6611 	}
6612 
6613 	if (ready_txd) {
6614 		/*
6615 		 * we have tx descriptor(s) ready to be
6616 		 * processed by the receiver.
6617 		 * send a message to the peer with the start index
6618 		 * of ready descriptors.
6619 		 */
6620 		rv = vgen_send_dring_data(ldcp, txi, -1);
6621 		if (rv != VGEN_SUCCESS) {
6622 			ldcp->resched_peer = B_TRUE;
6623 			ldcp->resched_peer_txi = txi;
6624 			mutex_exit(&ldcp->tclock);
6625 			mutex_exit(&ldcp->wrlock);
6626 			return (rv);
6627 		}
6628 	} else {
6629 		/*
6630 		 * no ready tx descriptors. set the flag to send a
6631 		 * message to peer when tx descriptors are ready in
6632 		 * transmit routine.
6633 		 */
6634 		ldcp->resched_peer = B_TRUE;
6635 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6636 	}
6637 
6638 	mutex_exit(&ldcp->tclock);
6639 	mutex_exit(&ldcp->wrlock);
6640 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6641 	return (rv);
6642 }
6643 
6644 static int
6645 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6646 {
6647 	int rv = 0;
6648 	uint32_t start;
6649 	int32_t end;
6650 	uint32_t txi;
6651 	vnet_public_desc_t *txdp;
6652 	vio_dring_entry_hdr_t *hdrp;
6653 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6654 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6655 
6656 	DBG1(vgenp, ldcp, "enter\n");
6657 	start = dringmsg->start_idx;
6658 	end = dringmsg->end_idx;
6659 
6660 	/*
6661 	 * peer sent a NACK msg to indicate lost packets.
6662 	 * The start and end correspond to the range of descriptors
6663 	 * for which the peer didn't receive a dring data msg and so
6664 	 * didn't receive the corresponding data.
6665 	 */
6666 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6667 
6668 	/* validate start and end indeces in the tx nack msg */
6669 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6670 		/* drop the message if invalid index */
6671 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6672 		    start, end);
6673 		return (rv);
6674 	}
6675 	/* validate dring_ident */
6676 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6677 		/* invalid dring_ident, drop the msg */
6678 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6679 		    dringmsg->dring_ident);
6680 		return (rv);
6681 	}
6682 	mutex_enter(&ldcp->txlock);
6683 	mutex_enter(&ldcp->tclock);
6684 
6685 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6686 		/* no busy descriptors, bogus nack ? */
6687 		mutex_exit(&ldcp->tclock);
6688 		mutex_exit(&ldcp->txlock);
6689 		return (rv);
6690 	}
6691 
6692 	/* we just mark the descrs as done so they can be reclaimed */
6693 	for (txi = start; txi <= end; ) {
6694 		txdp = &(ldcp->txdp[txi]);
6695 		hdrp = &txdp->hdr;
6696 		if (hdrp->dstate == VIO_DESC_READY)
6697 			hdrp->dstate = VIO_DESC_DONE;
6698 		INCR_TXI(txi, ldcp);
6699 	}
6700 	mutex_exit(&ldcp->tclock);
6701 	mutex_exit(&ldcp->txlock);
6702 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6703 	return (rv);
6704 }
6705 
6706 static void
6707 vgen_reclaim(vgen_ldc_t *ldcp)
6708 {
6709 	mutex_enter(&ldcp->tclock);
6710 
6711 	vgen_reclaim_dring(ldcp);
6712 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6713 
6714 	mutex_exit(&ldcp->tclock);
6715 }
6716 
6717 /*
6718  * transmit reclaim function. starting from the current reclaim index
6719  * look for descriptors marked DONE and reclaim the descriptor and the
6720  * corresponding buffers (tbuf).
6721  */
6722 static void
6723 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6724 {
6725 	int count = 0;
6726 	vnet_public_desc_t *txdp;
6727 	vgen_private_desc_t *tbufp;
6728 	vio_dring_entry_hdr_t	*hdrp;
6729 
6730 #ifdef DEBUG
6731 	if (vgen_trigger_txtimeout)
6732 		return;
6733 #endif
6734 
6735 	tbufp = ldcp->cur_tbufp;
6736 	txdp = tbufp->descp;
6737 	hdrp = &txdp->hdr;
6738 
6739 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6740 	    (tbufp != ldcp->next_tbufp)) {
6741 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6742 		hdrp->dstate = VIO_DESC_FREE;
6743 		hdrp->ack = B_FALSE;
6744 
6745 		tbufp = NEXTTBUF(ldcp, tbufp);
6746 		txdp = tbufp->descp;
6747 		hdrp = &txdp->hdr;
6748 		count++;
6749 	}
6750 
6751 	ldcp->cur_tbufp = tbufp;
6752 
6753 	/*
6754 	 * Check if mac layer should be notified to restart transmissions
6755 	 */
6756 	if ((ldcp->need_resched) && (count > 0)) {
6757 		vio_net_tx_update_t vtx_update =
6758 		    ldcp->portp->vcb.vio_net_tx_update;
6759 
6760 		ldcp->need_resched = B_FALSE;
6761 		vtx_update(ldcp->portp->vhp);
6762 	}
6763 }
6764 
6765 /* return the number of pending transmits for the channel */
6766 static int
6767 vgen_num_txpending(vgen_ldc_t *ldcp)
6768 {
6769 	int n;
6770 
6771 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6772 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6773 	} else  {
6774 		/* cur_tbufp > next_tbufp */
6775 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6776 	}
6777 
6778 	return (n);
6779 }
6780 
6781 /* determine if the transmit descriptor ring is full */
6782 static int
6783 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6784 {
6785 	vgen_private_desc_t	*tbufp;
6786 	vgen_private_desc_t	*ntbufp;
6787 
6788 	tbufp = ldcp->next_tbufp;
6789 	ntbufp = NEXTTBUF(ldcp, tbufp);
6790 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6791 		return (VGEN_SUCCESS);
6792 	}
6793 	return (VGEN_FAILURE);
6794 }
6795 
6796 /* determine if timeout condition has occured */
6797 static int
6798 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6799 {
6800 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6801 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6802 	    (vnet_ldcwd_txtimeout) &&
6803 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6804 		return (VGEN_SUCCESS);
6805 	} else {
6806 		return (VGEN_FAILURE);
6807 	}
6808 }
6809 
6810 /* transmit watchdog timeout handler */
6811 static void
6812 vgen_ldc_watchdog(void *arg)
6813 {
6814 	vgen_ldc_t *ldcp;
6815 	vgen_t *vgenp;
6816 	int rv;
6817 
6818 	ldcp = (vgen_ldc_t *)arg;
6819 	vgenp = LDC_TO_VGEN(ldcp);
6820 
6821 	rv = vgen_ldc_txtimeout(ldcp);
6822 	if (rv == VGEN_SUCCESS) {
6823 		DWARN(vgenp, ldcp, "transmit timeout\n");
6824 #ifdef DEBUG
6825 		if (vgen_trigger_txtimeout) {
6826 			/* tx timeout triggered for debugging */
6827 			vgen_trigger_txtimeout = 0;
6828 		}
6829 #endif
6830 		mutex_enter(&ldcp->cblock);
6831 		ldcp->need_ldc_reset = B_TRUE;
6832 		vgen_handshake_retry(ldcp);
6833 		mutex_exit(&ldcp->cblock);
6834 		if (ldcp->need_resched) {
6835 			vio_net_tx_update_t vtx_update =
6836 			    ldcp->portp->vcb.vio_net_tx_update;
6837 
6838 			ldcp->need_resched = B_FALSE;
6839 			vtx_update(ldcp->portp->vhp);
6840 		}
6841 	}
6842 
6843 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6844 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6845 }
6846 
6847 /* handler for error messages received from the peer ldc end-point */
6848 static void
6849 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6850 {
6851 	_NOTE(ARGUNUSED(ldcp, tagp))
6852 }
6853 
6854 static int
6855 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6856 {
6857 	vio_raw_data_msg_t	*rmsg;
6858 	vio_dring_msg_t		*dmsg;
6859 	uint64_t		seq_num;
6860 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6861 
6862 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6863 		dmsg = (vio_dring_msg_t *)tagp;
6864 		seq_num = dmsg->seq_num;
6865 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6866 		rmsg = (vio_raw_data_msg_t *)tagp;
6867 		seq_num = rmsg->seq_num;
6868 	} else {
6869 		return (EINVAL);
6870 	}
6871 
6872 	if (seq_num != ldcp->next_rxseq) {
6873 
6874 		/* seqnums don't match */
6875 		DWARN(vgenp, ldcp,
6876 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6877 		    ldcp->next_rxseq, seq_num);
6878 
6879 		ldcp->need_ldc_reset = B_TRUE;
6880 		return (EINVAL);
6881 
6882 	}
6883 
6884 	ldcp->next_rxseq++;
6885 
6886 	return (0);
6887 }
6888 
6889 /* Check if the session id in the received message is valid */
6890 static int
6891 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6892 {
6893 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6894 
6895 	if (tagp->vio_sid != ldcp->peer_sid) {
6896 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6897 		    ldcp->peer_sid, tagp->vio_sid);
6898 		return (VGEN_FAILURE);
6899 	}
6900 	else
6901 		return (VGEN_SUCCESS);
6902 }
6903 
6904 static caddr_t
6905 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6906 {
6907 	(void) sprintf(ebuf,
6908 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6909 	return (ebuf);
6910 }
6911 
6912 /* Handshake watchdog timeout handler */
6913 static void
6914 vgen_hwatchdog(void *arg)
6915 {
6916 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6917 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6918 
6919 	DWARN(vgenp, ldcp,
6920 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
6921 	    ldcp->hphase, ldcp->hstate);
6922 
6923 	mutex_enter(&ldcp->cblock);
6924 	if (ldcp->cancel_htid) {
6925 		ldcp->cancel_htid = 0;
6926 		mutex_exit(&ldcp->cblock);
6927 		return;
6928 	}
6929 	ldcp->htid = 0;
6930 	ldcp->need_ldc_reset = B_TRUE;
6931 	vgen_handshake_retry(ldcp);
6932 	mutex_exit(&ldcp->cblock);
6933 }
6934 
6935 static void
6936 vgen_print_hparams(vgen_hparams_t *hp)
6937 {
6938 	uint8_t	addr[6];
6939 	char	ea[6];
6940 	ldc_mem_cookie_t *dc;
6941 
6942 	cmn_err(CE_CONT, "version_info:\n");
6943 	cmn_err(CE_CONT,
6944 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6945 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6946 
6947 	vnet_macaddr_ultostr(hp->addr, addr);
6948 	cmn_err(CE_CONT, "attr_info:\n");
6949 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6950 	    vgen_print_ethaddr(addr, ea));
6951 	cmn_err(CE_CONT,
6952 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6953 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6954 
6955 	dc = &hp->dring_cookie;
6956 	cmn_err(CE_CONT, "dring_info:\n");
6957 	cmn_err(CE_CONT,
6958 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6959 	cmn_err(CE_CONT,
6960 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6961 	    dc->addr, dc->size);
6962 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6963 }
6964 
6965 static void
6966 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6967 {
6968 	vgen_hparams_t *hp;
6969 
6970 	cmn_err(CE_CONT, "Channel Information:\n");
6971 	cmn_err(CE_CONT,
6972 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6973 	    ldcp->ldc_id, ldcp->ldc_status);
6974 	cmn_err(CE_CONT,
6975 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6976 	    ldcp->local_sid, ldcp->peer_sid);
6977 	cmn_err(CE_CONT,
6978 	    "\thphase: 0x%x, hstate: 0x%x\n",
6979 	    ldcp->hphase, ldcp->hstate);
6980 
6981 	cmn_err(CE_CONT, "Local handshake params:\n");
6982 	hp = &ldcp->local_hparams;
6983 	vgen_print_hparams(hp);
6984 
6985 	cmn_err(CE_CONT, "Peer handshake params:\n");
6986 	hp = &ldcp->peer_hparams;
6987 	vgen_print_hparams(hp);
6988 }
6989 
6990 /*
6991  * Send received packets up the stack.
6992  */
6993 static void
6994 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
6995 {
6996 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6997 
6998 	if (ldcp->rcv_thread != NULL) {
6999 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
7000 		mutex_exit(&ldcp->rxlock);
7001 	} else {
7002 		ASSERT(MUTEX_HELD(&ldcp->cblock));
7003 		mutex_exit(&ldcp->cblock);
7004 	}
7005 
7006 	vrx_cb(ldcp->portp->vhp, bp);
7007 
7008 	if (ldcp->rcv_thread != NULL) {
7009 		mutex_enter(&ldcp->rxlock);
7010 	} else {
7011 		mutex_enter(&ldcp->cblock);
7012 	}
7013 }
7014 
7015 /*
7016  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
7017  * This thread is woken up by the LDC interrupt handler to process
7018  * LDC packets and receive data.
7019  */
7020 static void
7021 vgen_ldc_rcv_worker(void *arg)
7022 {
7023 	callb_cpr_t	cprinfo;
7024 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
7025 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7026 
7027 	DBG1(vgenp, ldcp, "enter\n");
7028 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
7029 	    "vnet_rcv_thread");
7030 	mutex_enter(&ldcp->rcv_thr_lock);
7031 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
7032 
7033 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
7034 		/*
7035 		 * Wait until the data is received or a stop
7036 		 * request is received.
7037 		 */
7038 		while (!(ldcp->rcv_thr_flags &
7039 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
7040 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
7041 		}
7042 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
7043 
7044 		/*
7045 		 * First process the stop request.
7046 		 */
7047 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
7048 			DBG2(vgenp, ldcp, "stopped\n");
7049 			break;
7050 		}
7051 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
7052 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
7053 		mutex_exit(&ldcp->rcv_thr_lock);
7054 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
7055 		vgen_handle_evt_read(ldcp);
7056 		mutex_enter(&ldcp->rcv_thr_lock);
7057 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
7058 	}
7059 
7060 	/*
7061 	 * Update the run status and wakeup the thread that
7062 	 * has sent the stop request.
7063 	 */
7064 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP;
7065 	ldcp->rcv_thread = NULL;
7066 	CALLB_CPR_EXIT(&cprinfo);
7067 
7068 	thread_exit();
7069 	DBG1(vgenp, ldcp, "exit\n");
7070 }
7071 
7072 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
7073 static void
7074 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
7075 {
7076 	kt_did_t	tid = 0;
7077 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7078 
7079 	DBG1(vgenp, ldcp, "enter\n");
7080 	/*
7081 	 * Send a stop request by setting the stop flag and
7082 	 * wait until the receive thread stops.
7083 	 */
7084 	mutex_enter(&ldcp->rcv_thr_lock);
7085 	if (ldcp->rcv_thread != NULL) {
7086 		tid = ldcp->rcv_thread->t_did;
7087 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
7088 		cv_signal(&ldcp->rcv_thr_cv);
7089 	}
7090 	mutex_exit(&ldcp->rcv_thr_lock);
7091 
7092 	if (tid != 0) {
7093 		thread_join(tid);
7094 	}
7095 	DBG1(vgenp, ldcp, "exit\n");
7096 }
7097 
7098 /*
7099  * Wait for the channel rx-queue to be drained by allowing the receive
7100  * worker thread to read all messages from the rx-queue of the channel.
7101  * Assumption: further callbacks are disabled at this time.
7102  */
7103 static void
7104 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
7105 {
7106 	clock_t	tm;
7107 	clock_t	wt;
7108 	clock_t	rv;
7109 
7110 	/*
7111 	 * If there is data in ldc rx queue, wait until the rx
7112 	 * worker thread runs and drains all msgs in the queue.
7113 	 */
7114 	wt = drv_usectohz(MILLISEC);
7115 
7116 	mutex_enter(&ldcp->rcv_thr_lock);
7117 
7118 	tm = ddi_get_lbolt() + wt;
7119 
7120 	/*
7121 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
7122 	 * If DATARCVD is set, that means the callback has signalled the worker
7123 	 * thread, but the worker hasn't started processing yet. If PROCESSING
7124 	 * is set, that means the thread is awake and processing. Note that the
7125 	 * DATARCVD state can only be seen once, as the assumption is that
7126 	 * further callbacks have been disabled at this point.
7127 	 */
7128 	while (ldcp->rcv_thr_flags &
7129 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
7130 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
7131 		if (rv == -1) {	/* timeout */
7132 			/*
7133 			 * Note that the only way we return is due to a timeout;
7134 			 * we set the new time to wait, before we go back and
7135 			 * check the condition. The other(unlikely) possibility
7136 			 * is a premature wakeup(see cv_timedwait(9F)) in which
7137 			 * case we just continue to use the same time to wait.
7138 			 */
7139 			tm = ddi_get_lbolt() + wt;
7140 		}
7141 	}
7142 
7143 	mutex_exit(&ldcp->rcv_thr_lock);
7144 }
7145 
7146 /*
7147  * vgen_dds_rx -- post DDS messages to vnet.
7148  */
7149 static int
7150 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
7151 {
7152 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
7153 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
7154 
7155 	if (dmsg->dds_class != DDS_VNET_NIU) {
7156 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
7157 		return (EBADMSG);
7158 	}
7159 	vnet_dds_rx(vgenp->vnetp, dmsg);
7160 	return (0);
7161 }
7162 
7163 /*
7164  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
7165  */
7166 int
7167 vgen_dds_tx(void *arg, void *msg)
7168 {
7169 	vgen_t *vgenp = arg;
7170 	vio_dds_msg_t *dmsg = msg;
7171 	vgen_portlist_t *plistp = &vgenp->vgenports;
7172 	vgen_ldc_t *ldcp;
7173 	vgen_ldclist_t *ldclp;
7174 	int rv = EIO;
7175 
7176 
7177 	READ_ENTER(&plistp->rwlock);
7178 	ldclp = &(vgenp->vsw_portp->ldclist);
7179 	READ_ENTER(&ldclp->rwlock);
7180 	ldcp = ldclp->headp;
7181 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
7182 		goto vgen_dsend_exit;
7183 	}
7184 
7185 	dmsg->tag.vio_sid = ldcp->local_sid;
7186 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
7187 	if (rv != VGEN_SUCCESS) {
7188 		rv = EIO;
7189 	} else {
7190 		rv = 0;
7191 	}
7192 
7193 vgen_dsend_exit:
7194 	RW_EXIT(&ldclp->rwlock);
7195 	RW_EXIT(&plistp->rwlock);
7196 	return (rv);
7197 
7198 }
7199 
7200 #if DEBUG
7201 
7202 /*
7203  * Print debug messages - set to 0xf to enable all msgs
7204  */
7205 static void
7206 debug_printf(const char *fname, vgen_t *vgenp,
7207     vgen_ldc_t *ldcp, const char *fmt, ...)
7208 {
7209 	char    buf[256];
7210 	char    *bufp = buf;
7211 	va_list ap;
7212 
7213 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
7214 		(void) sprintf(bufp, "vnet%d:",
7215 		    ((vnet_t *)(vgenp->vnetp))->instance);
7216 		bufp += strlen(bufp);
7217 	}
7218 	if (ldcp != NULL) {
7219 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
7220 		bufp += strlen(bufp);
7221 	}
7222 	(void) sprintf(bufp, "%s: ", fname);
7223 	bufp += strlen(bufp);
7224 
7225 	va_start(ap, fmt);
7226 	(void) vsprintf(bufp, fmt, ap);
7227 	va_end(ap);
7228 
7229 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
7230 	    (vgendbg_ldcid == ldcp->ldc_id)) {
7231 		cmn_err(CE_CONT, "%s\n", buf);
7232 	}
7233 }
7234 #endif
7235 
7236 #ifdef	VNET_IOC_DEBUG
7237 
7238 static void
7239 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7240 {
7241 	struct iocblk	*iocp;
7242 	vgen_port_t	*portp;
7243 	enum		ioc_reply {
7244 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
7245 			IOC_ACK			/* OK, just send ACK    */
7246 	}		status;
7247 	int		rv;
7248 
7249 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
7250 	iocp->ioc_error = 0;
7251 	portp = (vgen_port_t *)arg;
7252 
7253 	if (portp == NULL) {
7254 		status = IOC_INVAL;
7255 		goto vgen_ioc_exit;
7256 	}
7257 
7258 	mutex_enter(&portp->lock);
7259 
7260 	switch (iocp->ioc_cmd) {
7261 
7262 	case VNET_FORCE_LINK_DOWN:
7263 	case VNET_FORCE_LINK_UP:
7264 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
7265 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
7266 		break;
7267 
7268 	default:
7269 		status = IOC_INVAL;
7270 		break;
7271 
7272 	}
7273 
7274 	mutex_exit(&portp->lock);
7275 
7276 vgen_ioc_exit:
7277 
7278 	switch (status) {
7279 	default:
7280 	case IOC_INVAL:
7281 		/* Error, reply with a NAK and EINVAL error */
7282 		miocnak(q, mp, 0, EINVAL);
7283 		break;
7284 	case IOC_ACK:
7285 		/* OK, reply with an ACK */
7286 		miocack(q, mp, 0, 0);
7287 		break;
7288 	}
7289 }
7290 
7291 static int
7292 vgen_force_link_state(vgen_port_t *portp, int cmd)
7293 {
7294 	ldc_status_t	istatus;
7295 	vgen_ldclist_t	*ldclp;
7296 	vgen_ldc_t	*ldcp;
7297 	vgen_t		*vgenp = portp->vgenp;
7298 	int		rv;
7299 
7300 	ldclp = &portp->ldclist;
7301 	READ_ENTER(&ldclp->rwlock);
7302 
7303 	/*
7304 	 * NOTE: for now, we will assume we have a single channel.
7305 	 */
7306 	if (ldclp->headp == NULL) {
7307 		RW_EXIT(&ldclp->rwlock);
7308 		return (1);
7309 	}
7310 	ldcp = ldclp->headp;
7311 	mutex_enter(&ldcp->cblock);
7312 
7313 	switch (cmd) {
7314 
7315 	case VNET_FORCE_LINK_DOWN:
7316 		(void) ldc_down(ldcp->ldc_handle);
7317 		ldcp->link_down_forced = B_TRUE;
7318 		break;
7319 
7320 	case VNET_FORCE_LINK_UP:
7321 		rv = ldc_up(ldcp->ldc_handle);
7322 		if (rv != 0) {
7323 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
7324 		}
7325 		ldcp->link_down_forced = B_FALSE;
7326 
7327 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
7328 			DWARN(vgenp, ldcp, "ldc_status err\n");
7329 		} else {
7330 			ldcp->ldc_status = istatus;
7331 		}
7332 
7333 		/* if channel is already UP - restart handshake */
7334 		if (ldcp->ldc_status == LDC_UP) {
7335 			vgen_handle_evt_up(ldcp);
7336 		}
7337 		break;
7338 
7339 	}
7340 
7341 	mutex_exit(&ldcp->cblock);
7342 	RW_EXIT(&ldclp->rwlock);
7343 
7344 	return (0);
7345 }
7346 
7347 #else
7348 
7349 static void
7350 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
7351 {
7352 	vgen_port_t	*portp;
7353 
7354 	portp = (vgen_port_t *)arg;
7355 
7356 	if (portp == NULL) {
7357 		miocnak(q, mp, 0, EINVAL);
7358 		return;
7359 	}
7360 
7361 	miocnak(q, mp, 0, ENOTSUP);
7362 }
7363 
7364 #endif
7365