xref: /illumos-gate/usr/src/uts/sun4v/io/vnet_gen.c (revision 1843d056c3ae0950dec6c3453e589966ba280bee)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 int vgen_uninit(void *arg);
77 int vgen_dds_tx(void *arg, void *dmsg);
78 static int vgen_start(void *arg);
79 static void vgen_stop(void *arg);
80 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
81 static int vgen_multicst(void *arg, boolean_t add,
82 	const uint8_t *mca);
83 static int vgen_promisc(void *arg, boolean_t on);
84 static int vgen_unicst(void *arg, const uint8_t *mca);
85 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
86 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
87 
88 /* vgen internal functions */
89 static int vgen_read_mdprops(vgen_t *vgenp);
90 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
91 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
92 	mde_cookie_t node);
93 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
94 	uint32_t *mtu);
95 static void vgen_detach_ports(vgen_t *vgenp);
96 static void vgen_port_detach(vgen_port_t *portp);
97 static void vgen_port_list_insert(vgen_port_t *portp);
98 static void vgen_port_list_remove(vgen_port_t *portp);
99 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
100 	int port_num);
101 static int vgen_mdeg_reg(vgen_t *vgenp);
102 static void vgen_mdeg_unreg(vgen_t *vgenp);
103 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
104 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
105 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
106 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
107 	mde_cookie_t mdex);
108 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
109 static int vgen_port_attach(vgen_port_t *portp);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static void vgen_port_detach_mdeg(vgen_port_t *portp);
112 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
113 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
114 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
115 
116 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
117 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
118 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
120 static void vgen_init_ports(vgen_t *vgenp);
121 static void vgen_port_init(vgen_port_t *portp);
122 static void vgen_uninit_ports(vgen_t *vgenp);
123 static void vgen_port_uninit(vgen_port_t *portp);
124 static void vgen_init_ldcs(vgen_port_t *portp);
125 static void vgen_uninit_ldcs(vgen_port_t *portp);
126 static int vgen_ldc_init(vgen_ldc_t *ldcp);
127 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
128 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
131 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
132 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
133 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
134 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
135 static int vgen_ldcsend(void *arg, mblk_t *mp);
136 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
137 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
138 static void vgen_reclaim(vgen_ldc_t *ldcp);
139 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
140 static int vgen_num_txpending(vgen_ldc_t *ldcp);
141 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
142 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
143 static void vgen_ldc_watchdog(void *arg);
144 
145 /* vgen handshake functions */
146 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
147 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
148 	boolean_t caller_holds_lock);
149 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
150 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
151 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
152 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
153 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
154 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
155 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
156 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
157 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
158 static void vgen_handshake(vgen_ldc_t *ldcp);
159 static int vgen_handshake_done(vgen_ldc_t *ldcp);
160 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
161 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
162 	vio_msg_tag_t *tagp);
163 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
166 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
169 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
170 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
176 	uint32_t start, int32_t end, uint8_t pstate);
177 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
178 	uint32_t msglen);
179 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
181 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
182 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
184 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
185 static void vgen_hwatchdog(void *arg);
186 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
187 static void vgen_print_hparams(vgen_hparams_t *hp);
188 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
189 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
190 static void vgen_ldc_rcv_worker(void *arg);
191 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
192 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
193 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
194 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
195 
196 /* VLAN routines */
197 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
198 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
199 	uint16_t *nvidsp, uint16_t *default_idp);
200 static void vgen_vlan_create_hash(vgen_port_t *portp);
201 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
202 static void vgen_vlan_add_ids(vgen_port_t *portp);
203 static void vgen_vlan_remove_ids(vgen_port_t *portp);
204 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
205 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
206 	uint16_t *vidp);
207 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
208 	boolean_t is_tagged, uint16_t vid);
209 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
210 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
211 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
212 
213 /* externs */
214 extern void vnet_dds_rx(void *arg, void *dmsg);
215 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
216 
217 /*
218  * The handshake process consists of 5 phases defined below, with VH_PHASE0
219  * being the pre-handshake phase and VH_DONE is the phase to indicate
220  * successful completion of all phases.
221  * Each phase may have one to several handshake states which are required
222  * to complete successfully to move to the next phase.
223  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
224  * more details.
225  */
226 /* handshake phases */
227 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
228 
229 /* handshake states */
230 enum {
231 
232 	VER_INFO_SENT	=	0x1,
233 	VER_ACK_RCVD	=	0x2,
234 	VER_INFO_RCVD	=	0x4,
235 	VER_ACK_SENT	=	0x8,
236 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
237 
238 	ATTR_INFO_SENT	=	0x10,
239 	ATTR_ACK_RCVD	=	0x20,
240 	ATTR_INFO_RCVD	=	0x40,
241 	ATTR_ACK_SENT	=	0x80,
242 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
243 
244 	DRING_INFO_SENT	=	0x100,
245 	DRING_ACK_RCVD	=	0x200,
246 	DRING_INFO_RCVD	=	0x400,
247 	DRING_ACK_SENT	=	0x800,
248 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
249 
250 	RDX_INFO_SENT	=	0x1000,
251 	RDX_ACK_RCVD	=	0x2000,
252 	RDX_INFO_RCVD	=	0x4000,
253 	RDX_ACK_SENT	=	0x8000,
254 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
255 
256 };
257 
258 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
259 
260 #define	LDC_LOCK(ldcp)	\
261 				mutex_enter(&((ldcp)->cblock));\
262 				mutex_enter(&((ldcp)->rxlock));\
263 				mutex_enter(&((ldcp)->wrlock));\
264 				mutex_enter(&((ldcp)->txlock));\
265 				mutex_enter(&((ldcp)->tclock));
266 #define	LDC_UNLOCK(ldcp)	\
267 				mutex_exit(&((ldcp)->tclock));\
268 				mutex_exit(&((ldcp)->txlock));\
269 				mutex_exit(&((ldcp)->wrlock));\
270 				mutex_exit(&((ldcp)->rxlock));\
271 				mutex_exit(&((ldcp)->cblock));
272 
273 #define	VGEN_VER_EQ(ldcp, major, minor)	\
274 	((ldcp)->local_hparams.ver_major == (major) &&	\
275 	    (ldcp)->local_hparams.ver_minor == (minor))
276 
277 #define	VGEN_VER_LT(ldcp, major, minor)	\
278 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
279 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
280 	    (ldcp)->local_hparams.ver_minor < (minor)))
281 
282 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
283 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
284 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
285 	    (ldcp)->local_hparams.ver_minor >= (minor)))
286 
287 static struct ether_addr etherbroadcastaddr = {
288 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
289 };
290 /*
291  * MIB II broadcast/multicast packets
292  */
293 #define	IS_BROADCAST(ehp) \
294 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
295 #define	IS_MULTICAST(ehp) \
296 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
297 
298 /*
299  * Property names
300  */
301 static char macaddr_propname[] = "mac-address";
302 static char rmacaddr_propname[] = "remote-mac-address";
303 static char channel_propname[] = "channel-endpoint";
304 static char reg_propname[] = "reg";
305 static char port_propname[] = "port";
306 static char swport_propname[] = "switch-port";
307 static char id_propname[] = "id";
308 static char vdev_propname[] = "virtual-device";
309 static char vnet_propname[] = "network";
310 static char pri_types_propname[] = "priority-ether-types";
311 static char vgen_pvid_propname[] = "port-vlan-id";
312 static char vgen_vid_propname[] = "vlan-id";
313 static char vgen_dvid_propname[] = "default-vlan-id";
314 static char port_pvid_propname[] = "remote-port-vlan-id";
315 static char port_vid_propname[] = "remote-vlan-id";
316 static char vgen_mtu_propname[] = "mtu";
317 
318 /* versions supported - in decreasing order */
319 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 4} };
320 
321 /* Tunables */
322 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
323 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
324 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
325 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
326 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
327 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
328 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
329 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
330 
331 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
332 
333 /*
334  * max # of packets accumulated prior to sending them up. It is best
335  * to keep this at 60% of the number of recieve buffers.
336  */
337 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
338 
339 /*
340  * Internal tunables for receive buffer pools, that is,  the size and number of
341  * mblks for each pool. At least 3 sizes must be specified if these are used.
342  * The sizes must be specified in increasing order. Non-zero value of the first
343  * size will be used as a hint to use these values instead of the algorithm
344  * that determines the sizes based on MTU.
345  */
346 uint32_t vgen_rbufsz1 = 0;
347 uint32_t vgen_rbufsz2 = 0;
348 uint32_t vgen_rbufsz3 = 0;
349 uint32_t vgen_rbufsz4 = 0;
350 
351 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
352 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
353 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
354 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
355 
356 /*
357  * In the absence of "priority-ether-types" property in MD, the following
358  * internal tunable can be set to specify a single priority ethertype.
359  */
360 uint64_t vgen_pri_eth_type = 0;
361 
362 /*
363  * Number of transmit priority buffers that are preallocated per device.
364  * This number is chosen to be a small value to throttle transmission
365  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
366  */
367 uint32_t vgen_pri_tx_nmblks = 64;
368 
369 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
370 
371 #ifdef DEBUG
372 /* flags to simulate error conditions for debugging */
373 int vgen_trigger_txtimeout = 0;
374 int vgen_trigger_rxlost = 0;
375 #endif
376 
377 /*
378  * Matching criteria passed to the MDEG to register interest
379  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
380  * by their 'name' and 'cfg-handle' properties.
381  */
382 static md_prop_match_t vdev_prop_match[] = {
383 	{ MDET_PROP_STR,    "name"   },
384 	{ MDET_PROP_VAL,    "cfg-handle" },
385 	{ MDET_LIST_END,    NULL    }
386 };
387 
388 static mdeg_node_match_t vdev_match = { "virtual-device",
389 						vdev_prop_match };
390 
391 /* MD update matching structure */
392 static md_prop_match_t	vport_prop_match[] = {
393 	{ MDET_PROP_VAL,	"id" },
394 	{ MDET_LIST_END,	NULL }
395 };
396 
397 static mdeg_node_match_t vport_match = { "virtual-device-port",
398 					vport_prop_match };
399 
400 /* template for matching a particular vnet instance */
401 static mdeg_prop_spec_t vgen_prop_template[] = {
402 	{ MDET_PROP_STR,	"name",		"network" },
403 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
404 	{ MDET_LIST_END,	NULL,		NULL }
405 };
406 
407 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
408 
409 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
410 
411 static mac_callbacks_t vgen_m_callbacks = {
412 	0,
413 	vgen_stat,
414 	vgen_start,
415 	vgen_stop,
416 	vgen_promisc,
417 	vgen_multicst,
418 	vgen_unicst,
419 	vgen_tx,
420 	NULL,
421 	NULL,
422 	NULL
423 };
424 
425 /* externs */
426 extern pri_t	maxclsyspri;
427 extern proc_t	p0;
428 extern uint32_t vnet_ntxds;
429 extern uint32_t vnet_ldcwd_interval;
430 extern uint32_t vnet_ldcwd_txtimeout;
431 extern uint32_t vnet_ldc_mtu;
432 extern uint32_t vnet_nrbufs;
433 extern uint32_t	vnet_ethermtu;
434 extern uint16_t	vnet_default_vlan_id;
435 extern boolean_t vnet_jumbo_rxpools;
436 
437 #ifdef DEBUG
438 
439 extern int vnet_dbglevel;
440 static void debug_printf(const char *fname, vgen_t *vgenp,
441 	vgen_ldc_t *ldcp, const char *fmt, ...);
442 
443 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
444 int vgendbg_ldcid = -1;
445 
446 /* simulate handshake error conditions for debug */
447 uint32_t vgen_hdbg;
448 #define	HDBG_VERSION	0x1
449 #define	HDBG_TIMEOUT	0x2
450 #define	HDBG_BAD_SID	0x4
451 #define	HDBG_OUT_STATE	0x8
452 
453 #endif
454 
455 /*
456  * vgen_init() is called by an instance of vnet driver to initialize the
457  * corresponding generic proxy transport layer. The arguments passed by vnet
458  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
459  * the mac address of the vnet device, and a pointer to vgen_t is passed
460  * back as a handle to vnet.
461  */
462 int
463 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
464     const uint8_t *macaddr, void **vgenhdl)
465 {
466 	vgen_t *vgenp;
467 	int instance;
468 	int rv;
469 
470 	if ((vnetp == NULL) || (vnetdip == NULL))
471 		return (DDI_FAILURE);
472 
473 	instance = ddi_get_instance(vnetdip);
474 
475 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
476 
477 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
478 
479 	vgenp->vnetp = vnetp;
480 	vgenp->instance = instance;
481 	vgenp->regprop = regprop;
482 	vgenp->vnetdip = vnetdip;
483 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
484 
485 	/* allocate multicast table */
486 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
487 	    sizeof (struct ether_addr), KM_SLEEP);
488 	vgenp->mccount = 0;
489 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
490 
491 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
492 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
493 
494 	rv = vgen_read_mdprops(vgenp);
495 	if (rv != 0) {
496 		goto vgen_init_fail;
497 	}
498 
499 	/* register with MD event generator */
500 	rv = vgen_mdeg_reg(vgenp);
501 	if (rv != DDI_SUCCESS) {
502 		goto vgen_init_fail;
503 	}
504 
505 	*vgenhdl = (void *)vgenp;
506 
507 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
508 	return (DDI_SUCCESS);
509 
510 vgen_init_fail:
511 	rw_destroy(&vgenp->vgenports.rwlock);
512 	mutex_destroy(&vgenp->lock);
513 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
514 	    sizeof (struct ether_addr));
515 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
516 		kmem_free(vgenp->pri_types,
517 		    sizeof (uint16_t) * vgenp->pri_num_types);
518 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
519 	}
520 	KMEM_FREE(vgenp);
521 	return (DDI_FAILURE);
522 }
523 
524 /*
525  * Called by vnet to undo the initializations done by vgen_init().
526  * The handle provided by generic transport during vgen_init() is the argument.
527  */
528 int
529 vgen_uninit(void *arg)
530 {
531 	vgen_t		*vgenp = (vgen_t *)arg;
532 	vio_mblk_pool_t	*rp;
533 	vio_mblk_pool_t	*nrp;
534 
535 	if (vgenp == NULL) {
536 		return (DDI_FAILURE);
537 	}
538 
539 	DBG1(vgenp, NULL, "enter\n");
540 
541 	/* unregister with MD event generator */
542 	vgen_mdeg_unreg(vgenp);
543 
544 	mutex_enter(&vgenp->lock);
545 
546 	/* detach all ports from the device */
547 	vgen_detach_ports(vgenp);
548 
549 	/*
550 	 * free any pending rx mblk pools,
551 	 * that couldn't be freed previously during channel detach.
552 	 */
553 	rp = vgenp->rmp;
554 	while (rp != NULL) {
555 		nrp = vgenp->rmp = rp->nextp;
556 		if (vio_destroy_mblks(rp)) {
557 			vgenp->rmp = rp;
558 			mutex_exit(&vgenp->lock);
559 			return (DDI_FAILURE);
560 		}
561 		rp = nrp;
562 	}
563 
564 	/* free multicast table */
565 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
566 
567 	/* free pri_types table */
568 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
569 		kmem_free(vgenp->pri_types,
570 		    sizeof (uint16_t) * vgenp->pri_num_types);
571 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
572 	}
573 
574 	mutex_exit(&vgenp->lock);
575 
576 	rw_destroy(&vgenp->vgenports.rwlock);
577 	mutex_destroy(&vgenp->lock);
578 
579 	DBG1(vgenp, NULL, "exit\n");
580 	KMEM_FREE(vgenp);
581 
582 	return (DDI_SUCCESS);
583 }
584 
585 /* enable transmit/receive for the device */
586 int
587 vgen_start(void *arg)
588 {
589 	vgen_port_t	*portp = (vgen_port_t *)arg;
590 	vgen_t		*vgenp = portp->vgenp;
591 
592 	DBG1(vgenp, NULL, "enter\n");
593 	mutex_enter(&portp->lock);
594 	vgen_port_init(portp);
595 	portp->flags |= VGEN_STARTED;
596 	mutex_exit(&portp->lock);
597 	DBG1(vgenp, NULL, "exit\n");
598 
599 	return (DDI_SUCCESS);
600 }
601 
602 /* stop transmit/receive */
603 void
604 vgen_stop(void *arg)
605 {
606 	vgen_port_t	*portp = (vgen_port_t *)arg;
607 	vgen_t		*vgenp = portp->vgenp;
608 
609 	DBG1(vgenp, NULL, "enter\n");
610 
611 	mutex_enter(&portp->lock);
612 	vgen_port_uninit(portp);
613 	portp->flags &= ~(VGEN_STARTED);
614 	mutex_exit(&portp->lock);
615 	DBG1(vgenp, NULL, "exit\n");
616 
617 }
618 
619 /* vgen transmit function */
620 static mblk_t *
621 vgen_tx(void *arg, mblk_t *mp)
622 {
623 	int i;
624 	vgen_port_t *portp;
625 	int status = VGEN_FAILURE;
626 
627 	portp = (vgen_port_t *)arg;
628 	/*
629 	 * Retry so that we avoid reporting a failure
630 	 * to the upper layer. Returning a failure may cause the
631 	 * upper layer to go into single threaded mode there by
632 	 * causing performance degradation, especially for a large
633 	 * number of connections.
634 	 */
635 	for (i = 0; i < vgen_tx_retries; ) {
636 		status = vgen_portsend(portp, mp);
637 		if (status == VGEN_SUCCESS) {
638 			break;
639 		}
640 		if (++i < vgen_tx_retries)
641 			delay(drv_usectohz(vgen_tx_delay));
642 	}
643 	if (status != VGEN_SUCCESS) {
644 		/* failure */
645 		return (mp);
646 	}
647 	/* success */
648 	return (NULL);
649 }
650 
651 /*
652  * This function provides any necessary tagging/untagging of the frames
653  * that are being transmitted over the port. It first verifies the vlan
654  * membership of the destination(port) and drops the packet if the
655  * destination doesn't belong to the given vlan.
656  *
657  * Arguments:
658  *   portp:     port over which the frames should be transmitted
659  *   mp:        frame to be transmitted
660  *   is_tagged:
661  *              B_TRUE: indicates frame header contains the vlan tag already.
662  *              B_FALSE: indicates frame is untagged.
663  *   vid:       vlan in which the frame should be transmitted.
664  *
665  * Returns:
666  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
667  *              Failure: NULL
668  */
669 static mblk_t *
670 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
671 	uint16_t vid)
672 {
673 	vgen_t				*vgenp;
674 	boolean_t			dst_tagged;
675 	int				rv;
676 
677 	vgenp = portp->vgenp;
678 
679 	/*
680 	 * If the packet is going to a vnet:
681 	 *   Check if the destination vnet is in the same vlan.
682 	 *   Check the frame header if tag or untag is needed.
683 	 *
684 	 * We do not check the above conditions if the packet is going to vsw:
685 	 *   vsw must be present implicitly in all the vlans that a vnet device
686 	 *   is configured into; even if vsw itself is not assigned to those
687 	 *   vlans as an interface. For instance, the packet might be destined
688 	 *   to another vnet(indirectly through vsw) or to an external host
689 	 *   which is in the same vlan as this vnet and vsw itself may not be
690 	 *   present in that vlan. Similarly packets going to vsw must be
691 	 *   always tagged(unless in the default-vlan) if not already tagged,
692 	 *   as we do not know the final destination. This is needed because
693 	 *   vsw must always invoke its switching function only after tagging
694 	 *   the packet; otherwise after switching function determines the
695 	 *   destination we cannot figure out if the destination belongs to the
696 	 *   the same vlan that the frame originated from and if it needs tag/
697 	 *   untag. Note that vsw will tag the packet itself when it receives
698 	 *   it over the channel from a client if needed. However, that is
699 	 *   needed only in the case of vlan unaware clients such as obp or
700 	 *   earlier versions of vnet.
701 	 *
702 	 */
703 	if (portp != vgenp->vsw_portp) {
704 		/*
705 		 * Packet going to a vnet. Check if the destination vnet is in
706 		 * the same vlan. Then check the frame header if tag/untag is
707 		 * needed.
708 		 */
709 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
710 		if (rv == B_FALSE) {
711 			/* drop the packet */
712 			freemsg(mp);
713 			return (NULL);
714 		}
715 
716 		/* is the destination tagged or untagged in this vlan? */
717 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
718 		    (dst_tagged = B_TRUE);
719 
720 		if (is_tagged == dst_tagged) {
721 			/* no tagging/untagging needed */
722 			return (mp);
723 		}
724 
725 		if (is_tagged == B_TRUE) {
726 			/* frame is tagged; destination needs untagged */
727 			mp = vnet_vlan_remove_tag(mp);
728 			return (mp);
729 		}
730 
731 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
732 	}
733 
734 	/*
735 	 * Packet going to a vnet needs tagging.
736 	 * OR
737 	 * If the packet is going to vsw, then it must be tagged in all cases:
738 	 * unknown unicast, broadcast/multicast or to vsw interface.
739 	 */
740 
741 	if (is_tagged == B_FALSE) {
742 		mp = vnet_vlan_insert_tag(mp, vid);
743 	}
744 
745 	return (mp);
746 }
747 
748 /* transmit packets over the given port */
749 static int
750 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
751 {
752 	vgen_ldclist_t		*ldclp;
753 	vgen_ldc_t		*ldcp;
754 	int			status;
755 	int			rv = VGEN_SUCCESS;
756 	vgen_t			*vgenp = portp->vgenp;
757 	vnet_t			*vnetp = vgenp->vnetp;
758 	boolean_t		is_tagged;
759 	boolean_t		dec_refcnt = B_FALSE;
760 	uint16_t		vlan_id;
761 	struct ether_header	*ehp;
762 
763 	if (portp->use_vsw_port) {
764 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
765 		portp = portp->vgenp->vsw_portp;
766 		dec_refcnt = B_TRUE;
767 	}
768 	if (portp == NULL) {
769 		return (VGEN_FAILURE);
770 	}
771 
772 	/*
773 	 * Determine the vlan id that the frame belongs to.
774 	 */
775 	ehp = (struct ether_header *)mp->b_rptr;
776 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
777 
778 	if (vlan_id == vnetp->default_vlan_id) {
779 
780 		/* Frames in default vlan must be untagged */
781 		ASSERT(is_tagged == B_FALSE);
782 
783 		/*
784 		 * If the destination is a vnet-port verify it belongs to the
785 		 * default vlan; otherwise drop the packet. We do not need
786 		 * this check for vsw-port, as it should implicitly belong to
787 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
788 		 */
789 		if (portp != vgenp->vsw_portp &&
790 		    portp->pvid != vnetp->default_vlan_id) {
791 			freemsg(mp);
792 			goto portsend_ret;
793 		}
794 
795 	} else {	/* frame not in default-vlan */
796 
797 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
798 		if (mp == NULL) {
799 			goto portsend_ret;
800 		}
801 
802 	}
803 
804 	ldclp = &portp->ldclist;
805 	READ_ENTER(&ldclp->rwlock);
806 	/*
807 	 * NOTE: for now, we will assume we have a single channel.
808 	 */
809 	if (ldclp->headp == NULL) {
810 		RW_EXIT(&ldclp->rwlock);
811 		rv = VGEN_FAILURE;
812 		goto portsend_ret;
813 	}
814 	ldcp = ldclp->headp;
815 
816 	status = ldcp->tx(ldcp, mp);
817 
818 	RW_EXIT(&ldclp->rwlock);
819 
820 	if (status != VGEN_TX_SUCCESS) {
821 		rv = VGEN_FAILURE;
822 	}
823 
824 portsend_ret:
825 	if (dec_refcnt == B_TRUE) {
826 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
827 	}
828 	return (rv);
829 }
830 
831 /*
832  * Wrapper function to transmit normal and/or priority frames over the channel.
833  */
834 static int
835 vgen_ldcsend(void *arg, mblk_t *mp)
836 {
837 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
838 	int			status;
839 	struct ether_header	*ehp;
840 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
841 	uint32_t		num_types;
842 	uint16_t		*types;
843 	int			i;
844 
845 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
846 
847 	num_types = vgenp->pri_num_types;
848 	types = vgenp->pri_types;
849 	ehp = (struct ether_header *)mp->b_rptr;
850 
851 	for (i = 0; i < num_types; i++) {
852 
853 		if (ehp->ether_type == types[i]) {
854 			/* priority frame, use pri tx function */
855 			vgen_ldcsend_pkt(ldcp, mp);
856 			return (VGEN_SUCCESS);
857 		}
858 
859 	}
860 
861 	status  = vgen_ldcsend_dring(ldcp, mp);
862 
863 	return (status);
864 }
865 
866 /*
867  * This functions handles ldc channel reset while in the context
868  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
869  */
870 static void
871 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
872 {
873 	ldc_status_t	istatus;
874 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
875 
876 	if (mutex_tryenter(&ldcp->cblock)) {
877 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
878 			DWARN(vgenp, ldcp, "ldc_status() error\n");
879 		} else {
880 			ldcp->ldc_status = istatus;
881 		}
882 		if (ldcp->ldc_status != LDC_UP) {
883 			vgen_handle_evt_reset(ldcp);
884 		}
885 		mutex_exit(&ldcp->cblock);
886 	}
887 }
888 
889 /*
890  * This function transmits the frame in the payload of a raw data
891  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
892  * send special frames with high priorities, without going through
893  * the normal data path which uses descriptor ring mechanism.
894  */
895 static void
896 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
897 {
898 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
899 	vio_raw_data_msg_t	*pkt;
900 	mblk_t			*bp;
901 	mblk_t			*nmp = NULL;
902 	caddr_t			dst;
903 	uint32_t		mblksz;
904 	uint32_t		size;
905 	uint32_t		nbytes;
906 	int			rv;
907 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
908 	vgen_stats_t		*statsp = &ldcp->stats;
909 
910 	/* drop the packet if ldc is not up or handshake is not done */
911 	if (ldcp->ldc_status != LDC_UP) {
912 		(void) atomic_inc_32(&statsp->tx_pri_fail);
913 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
914 		    ldcp->ldc_status);
915 		goto send_pkt_exit;
916 	}
917 
918 	if (ldcp->hphase != VH_DONE) {
919 		(void) atomic_inc_32(&statsp->tx_pri_fail);
920 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
921 		    ldcp->hphase);
922 		goto send_pkt_exit;
923 	}
924 
925 	size = msgsize(mp);
926 
927 	/* frame size bigger than available payload len of raw data msg ? */
928 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
929 		(void) atomic_inc_32(&statsp->tx_pri_fail);
930 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
931 		goto send_pkt_exit;
932 	}
933 
934 	if (size < ETHERMIN)
935 		size = ETHERMIN;
936 
937 	/* alloc space for a raw data message */
938 	nmp = vio_allocb(vgenp->pri_tx_vmp);
939 	if (nmp == NULL) {
940 		(void) atomic_inc_32(&statsp->tx_pri_fail);
941 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
942 		goto send_pkt_exit;
943 	}
944 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
945 
946 	/* copy frame into the payload of raw data message */
947 	dst = (caddr_t)pkt->data;
948 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
949 		mblksz = MBLKL(bp);
950 		bcopy(bp->b_rptr, dst, mblksz);
951 		dst += mblksz;
952 	}
953 
954 	/* setup the raw data msg */
955 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
956 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
957 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
958 	pkt->tag.vio_sid = ldcp->local_sid;
959 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
960 
961 	/* send the msg over ldc */
962 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
963 	if (rv != VGEN_SUCCESS) {
964 		(void) atomic_inc_32(&statsp->tx_pri_fail);
965 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
966 		if (rv == ECONNRESET) {
967 			vgen_ldcsend_process_reset(ldcp);
968 		}
969 		goto send_pkt_exit;
970 	}
971 
972 	/* update stats */
973 	(void) atomic_inc_64(&statsp->tx_pri_packets);
974 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
975 
976 send_pkt_exit:
977 	if (nmp != NULL)
978 		freemsg(nmp);
979 	freemsg(mp);
980 }
981 
982 /*
983  * This function transmits normal (non-priority) data frames over
984  * the channel. It queues the frame into the transmit descriptor ring
985  * and sends a VIO_DRING_DATA message if needed, to wake up the
986  * peer to (re)start processing.
987  */
988 static int
989 vgen_ldcsend_dring(void *arg, mblk_t *mp)
990 {
991 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
992 	vgen_private_desc_t	*tbufp;
993 	vgen_private_desc_t	*rtbufp;
994 	vnet_public_desc_t	*rtxdp;
995 	vgen_private_desc_t	*ntbufp;
996 	vnet_public_desc_t	*txdp;
997 	vio_dring_entry_hdr_t	*hdrp;
998 	vgen_stats_t		*statsp;
999 	struct ether_header	*ehp;
1000 	boolean_t		is_bcast = B_FALSE;
1001 	boolean_t		is_mcast = B_FALSE;
1002 	size_t			mblksz;
1003 	caddr_t			dst;
1004 	mblk_t			*bp;
1005 	size_t			size;
1006 	int			rv = 0;
1007 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1008 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1009 
1010 	statsp = &ldcp->stats;
1011 	size = msgsize(mp);
1012 
1013 	DBG1(vgenp, ldcp, "enter\n");
1014 
1015 	if (ldcp->ldc_status != LDC_UP) {
1016 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1017 		    ldcp->ldc_status);
1018 		/* retry ldc_up() if needed */
1019 		if (ldcp->flags & CHANNEL_STARTED)
1020 			(void) ldc_up(ldcp->ldc_handle);
1021 		goto send_dring_exit;
1022 	}
1023 
1024 	/* drop the packet if ldc is not up or handshake is not done */
1025 	if (ldcp->hphase != VH_DONE) {
1026 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1027 		    ldcp->hphase);
1028 		goto send_dring_exit;
1029 	}
1030 
1031 	if (size > (size_t)lp->mtu) {
1032 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1033 		goto send_dring_exit;
1034 	}
1035 	if (size < ETHERMIN)
1036 		size = ETHERMIN;
1037 
1038 	ehp = (struct ether_header *)mp->b_rptr;
1039 	is_bcast = IS_BROADCAST(ehp);
1040 	is_mcast = IS_MULTICAST(ehp);
1041 
1042 	mutex_enter(&ldcp->txlock);
1043 	/*
1044 	 * allocate a descriptor
1045 	 */
1046 	tbufp = ldcp->next_tbufp;
1047 	ntbufp = NEXTTBUF(ldcp, tbufp);
1048 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1049 
1050 		mutex_enter(&ldcp->tclock);
1051 		/* Try reclaiming now */
1052 		vgen_reclaim_dring(ldcp);
1053 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1054 
1055 		if (ntbufp == ldcp->cur_tbufp) {
1056 			/* Now we are really out of tbuf/txds */
1057 			ldcp->need_resched = B_TRUE;
1058 			mutex_exit(&ldcp->tclock);
1059 
1060 			statsp->tx_no_desc++;
1061 			mutex_exit(&ldcp->txlock);
1062 
1063 			return (VGEN_TX_NORESOURCES);
1064 		}
1065 		mutex_exit(&ldcp->tclock);
1066 	}
1067 	/* update next available tbuf in the ring and update tx index */
1068 	ldcp->next_tbufp = ntbufp;
1069 	INCR_TXI(ldcp->next_txi, ldcp);
1070 
1071 	/* Mark the buffer busy before releasing the lock */
1072 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1073 	mutex_exit(&ldcp->txlock);
1074 
1075 	/* copy data into pre-allocated transmit buffer */
1076 	dst = tbufp->datap + VNET_IPALIGN;
1077 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1078 		mblksz = MBLKL(bp);
1079 		bcopy(bp->b_rptr, dst, mblksz);
1080 		dst += mblksz;
1081 	}
1082 
1083 	tbufp->datalen = size;
1084 
1085 	/* initialize the corresponding public descriptor (txd) */
1086 	txdp = tbufp->descp;
1087 	hdrp = &txdp->hdr;
1088 	txdp->nbytes = size;
1089 	txdp->ncookies = tbufp->ncookies;
1090 	bcopy((tbufp->memcookie), (txdp->memcookie),
1091 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1092 
1093 	mutex_enter(&ldcp->wrlock);
1094 	/*
1095 	 * If the flags not set to BUSY, it implies that the clobber
1096 	 * was done while we were copying the data. In such case,
1097 	 * discard the packet and return.
1098 	 */
1099 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1100 		statsp->oerrors++;
1101 		mutex_exit(&ldcp->wrlock);
1102 		goto send_dring_exit;
1103 	}
1104 	hdrp->dstate = VIO_DESC_READY;
1105 
1106 	/* update stats */
1107 	statsp->opackets++;
1108 	statsp->obytes += size;
1109 	if (is_bcast)
1110 		statsp->brdcstxmt++;
1111 	else if (is_mcast)
1112 		statsp->multixmt++;
1113 
1114 	/* send dring datamsg to the peer */
1115 	if (ldcp->resched_peer) {
1116 
1117 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1118 		rtxdp = rtbufp->descp;
1119 
1120 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1121 
1122 			rv = vgen_send_dring_data(ldcp,
1123 			    (uint32_t)ldcp->resched_peer_txi, -1);
1124 			if (rv != 0) {
1125 				/* error: drop the packet */
1126 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1127 				    "failed: rv(%d) len(%d)\n",
1128 				    ldcp->ldc_id, rv, size);
1129 				statsp->oerrors++;
1130 			} else {
1131 				ldcp->resched_peer = B_FALSE;
1132 			}
1133 
1134 		}
1135 
1136 	}
1137 
1138 	mutex_exit(&ldcp->wrlock);
1139 
1140 send_dring_exit:
1141 	if (rv == ECONNRESET) {
1142 		vgen_ldcsend_process_reset(ldcp);
1143 	}
1144 	freemsg(mp);
1145 	DBG1(vgenp, ldcp, "exit\n");
1146 	return (VGEN_TX_SUCCESS);
1147 }
1148 
1149 /* enable/disable a multicast address */
1150 int
1151 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1152 {
1153 	vgen_t			*vgenp;
1154 	vnet_mcast_msg_t	mcastmsg;
1155 	vio_msg_tag_t		*tagp;
1156 	vgen_port_t		*portp;
1157 	vgen_portlist_t		*plistp;
1158 	vgen_ldc_t		*ldcp;
1159 	vgen_ldclist_t		*ldclp;
1160 	struct ether_addr	*addrp;
1161 	int			rv = DDI_FAILURE;
1162 	uint32_t		i;
1163 
1164 	portp = (vgen_port_t *)arg;
1165 	vgenp = portp->vgenp;
1166 
1167 	if (portp != vgenp->vsw_portp) {
1168 		return (DDI_SUCCESS);
1169 	}
1170 
1171 	addrp = (struct ether_addr *)mca;
1172 	tagp = &mcastmsg.tag;
1173 	bzero(&mcastmsg, sizeof (mcastmsg));
1174 
1175 	mutex_enter(&vgenp->lock);
1176 
1177 	plistp = &(vgenp->vgenports);
1178 
1179 	READ_ENTER(&plistp->rwlock);
1180 
1181 	portp = vgenp->vsw_portp;
1182 	if (portp == NULL) {
1183 		RW_EXIT(&plistp->rwlock);
1184 		mutex_exit(&vgenp->lock);
1185 		return (rv);
1186 	}
1187 	ldclp = &portp->ldclist;
1188 
1189 	READ_ENTER(&ldclp->rwlock);
1190 
1191 	ldcp = ldclp->headp;
1192 	if (ldcp == NULL)
1193 		goto vgen_mcast_exit;
1194 
1195 	mutex_enter(&ldcp->cblock);
1196 
1197 	if (ldcp->hphase == VH_DONE) {
1198 		/*
1199 		 * If handshake is done, send a msg to vsw to add/remove
1200 		 * the multicast address. Otherwise, we just update this
1201 		 * mcast address in our table and the table will be sync'd
1202 		 * with vsw when handshake completes.
1203 		 */
1204 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1205 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1206 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1207 		tagp->vio_sid = ldcp->local_sid;
1208 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1209 		mcastmsg.set = add;
1210 		mcastmsg.count = 1;
1211 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1212 		    B_FALSE) != VGEN_SUCCESS) {
1213 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1214 			mutex_exit(&ldcp->cblock);
1215 			goto vgen_mcast_exit;
1216 		}
1217 	}
1218 
1219 	mutex_exit(&ldcp->cblock);
1220 
1221 	if (add) {
1222 
1223 		/* expand multicast table if necessary */
1224 		if (vgenp->mccount >= vgenp->mcsize) {
1225 			struct ether_addr	*newtab;
1226 			uint32_t		newsize;
1227 
1228 
1229 			newsize = vgenp->mcsize * 2;
1230 
1231 			newtab = kmem_zalloc(newsize *
1232 			    sizeof (struct ether_addr), KM_NOSLEEP);
1233 			if (newtab == NULL)
1234 				goto vgen_mcast_exit;
1235 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1236 			    sizeof (struct ether_addr));
1237 			kmem_free(vgenp->mctab,
1238 			    vgenp->mcsize * sizeof (struct ether_addr));
1239 
1240 			vgenp->mctab = newtab;
1241 			vgenp->mcsize = newsize;
1242 		}
1243 
1244 		/* add address to the table */
1245 		vgenp->mctab[vgenp->mccount++] = *addrp;
1246 
1247 	} else {
1248 
1249 		/* delete address from the table */
1250 		for (i = 0; i < vgenp->mccount; i++) {
1251 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1252 
1253 				/*
1254 				 * If there's more than one address in this
1255 				 * table, delete the unwanted one by moving
1256 				 * the last one in the list over top of it;
1257 				 * otherwise, just remove it.
1258 				 */
1259 				if (vgenp->mccount > 1) {
1260 					vgenp->mctab[i] =
1261 					    vgenp->mctab[vgenp->mccount-1];
1262 				}
1263 				vgenp->mccount--;
1264 				break;
1265 			}
1266 		}
1267 	}
1268 
1269 	rv = DDI_SUCCESS;
1270 
1271 vgen_mcast_exit:
1272 	RW_EXIT(&ldclp->rwlock);
1273 	RW_EXIT(&plistp->rwlock);
1274 
1275 	mutex_exit(&vgenp->lock);
1276 	return (rv);
1277 }
1278 
1279 /* set or clear promiscuous mode on the device */
1280 static int
1281 vgen_promisc(void *arg, boolean_t on)
1282 {
1283 	_NOTE(ARGUNUSED(arg, on))
1284 	return (DDI_SUCCESS);
1285 }
1286 
1287 /* set the unicast mac address of the device */
1288 static int
1289 vgen_unicst(void *arg, const uint8_t *mca)
1290 {
1291 	_NOTE(ARGUNUSED(arg, mca))
1292 	return (DDI_SUCCESS);
1293 }
1294 
1295 /* get device statistics */
1296 int
1297 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1298 {
1299 	vgen_port_t	*portp = (vgen_port_t *)arg;
1300 
1301 	*val = vgen_port_stat(portp, stat);
1302 
1303 	return (0);
1304 }
1305 
1306 static void
1307 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1308 {
1309 	 _NOTE(ARGUNUSED(arg, wq, mp))
1310 }
1311 
1312 /* vgen internal functions */
1313 /* detach all ports from the device */
1314 static void
1315 vgen_detach_ports(vgen_t *vgenp)
1316 {
1317 	vgen_port_t	*portp;
1318 	vgen_portlist_t	*plistp;
1319 
1320 	plistp = &(vgenp->vgenports);
1321 	WRITE_ENTER(&plistp->rwlock);
1322 
1323 	while ((portp = plistp->headp) != NULL) {
1324 		vgen_port_detach(portp);
1325 	}
1326 
1327 	RW_EXIT(&plistp->rwlock);
1328 }
1329 
1330 /*
1331  * detach the given port.
1332  */
1333 static void
1334 vgen_port_detach(vgen_port_t *portp)
1335 {
1336 	vgen_t		*vgenp;
1337 	vgen_ldclist_t	*ldclp;
1338 	int		port_num;
1339 
1340 	vgenp = portp->vgenp;
1341 	port_num = portp->port_num;
1342 
1343 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1344 
1345 	/*
1346 	 * If this port is connected to the vswitch, then
1347 	 * potentially there could be ports that may be using
1348 	 * this port to transmit packets. To address this do
1349 	 * the following:
1350 	 *	- First set vgenp->vsw_portp to NULL, so that
1351 	 *	  its not used after that.
1352 	 *	- Then wait for the refcnt to go down to 0.
1353 	 *	- Now we can safely detach this port.
1354 	 */
1355 	if (vgenp->vsw_portp == portp) {
1356 		vgenp->vsw_portp = NULL;
1357 		while (vgenp->vsw_port_refcnt > 0) {
1358 			delay(drv_usectohz(vgen_tx_delay));
1359 		}
1360 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1361 	}
1362 
1363 	if (portp->vhp != NULL) {
1364 		vio_net_resource_unreg(portp->vhp);
1365 		portp->vhp = NULL;
1366 	}
1367 
1368 	vgen_vlan_destroy_hash(portp);
1369 
1370 	/* remove it from port list */
1371 	vgen_port_list_remove(portp);
1372 
1373 	/* detach channels from this port */
1374 	ldclp = &portp->ldclist;
1375 	WRITE_ENTER(&ldclp->rwlock);
1376 	while (ldclp->headp) {
1377 		vgen_ldc_detach(ldclp->headp);
1378 	}
1379 	RW_EXIT(&ldclp->rwlock);
1380 	rw_destroy(&ldclp->rwlock);
1381 
1382 	if (portp->num_ldcs != 0) {
1383 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1384 		portp->num_ldcs = 0;
1385 	}
1386 
1387 	mutex_destroy(&portp->lock);
1388 	KMEM_FREE(portp);
1389 
1390 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1391 }
1392 
1393 /* add a port to port list */
1394 static void
1395 vgen_port_list_insert(vgen_port_t *portp)
1396 {
1397 	vgen_portlist_t *plistp;
1398 	vgen_t *vgenp;
1399 
1400 	vgenp = portp->vgenp;
1401 	plistp = &(vgenp->vgenports);
1402 
1403 	if (plistp->headp == NULL) {
1404 		plistp->headp = portp;
1405 	} else {
1406 		plistp->tailp->nextp = portp;
1407 	}
1408 	plistp->tailp = portp;
1409 	portp->nextp = NULL;
1410 }
1411 
1412 /* remove a port from port list */
1413 static void
1414 vgen_port_list_remove(vgen_port_t *portp)
1415 {
1416 	vgen_port_t *prevp;
1417 	vgen_port_t *nextp;
1418 	vgen_portlist_t *plistp;
1419 	vgen_t *vgenp;
1420 
1421 	vgenp = portp->vgenp;
1422 
1423 	plistp = &(vgenp->vgenports);
1424 
1425 	if (plistp->headp == NULL)
1426 		return;
1427 
1428 	if (portp == plistp->headp) {
1429 		plistp->headp = portp->nextp;
1430 		if (portp == plistp->tailp)
1431 			plistp->tailp = plistp->headp;
1432 	} else {
1433 		for (prevp = plistp->headp;
1434 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1435 		    prevp = nextp)
1436 			;
1437 		if (nextp == portp) {
1438 			prevp->nextp = portp->nextp;
1439 		}
1440 		if (portp == plistp->tailp)
1441 			plistp->tailp = prevp;
1442 	}
1443 }
1444 
1445 /* lookup a port in the list based on port_num */
1446 static vgen_port_t *
1447 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1448 {
1449 	vgen_port_t *portp = NULL;
1450 
1451 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1452 		if (portp->port_num == port_num) {
1453 			break;
1454 		}
1455 	}
1456 
1457 	return (portp);
1458 }
1459 
1460 /* enable ports for transmit/receive */
1461 static void
1462 vgen_init_ports(vgen_t *vgenp)
1463 {
1464 	vgen_port_t	*portp;
1465 	vgen_portlist_t	*plistp;
1466 
1467 	plistp = &(vgenp->vgenports);
1468 	READ_ENTER(&plistp->rwlock);
1469 
1470 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1471 		vgen_port_init(portp);
1472 	}
1473 
1474 	RW_EXIT(&plistp->rwlock);
1475 }
1476 
1477 static void
1478 vgen_port_init(vgen_port_t *portp)
1479 {
1480 	/* Add the port to the specified vlans */
1481 	vgen_vlan_add_ids(portp);
1482 
1483 	/* Bring up the channels of this port */
1484 	vgen_init_ldcs(portp);
1485 }
1486 
1487 /* disable transmit/receive on ports */
1488 static void
1489 vgen_uninit_ports(vgen_t *vgenp)
1490 {
1491 	vgen_port_t	*portp;
1492 	vgen_portlist_t	*plistp;
1493 
1494 	plistp = &(vgenp->vgenports);
1495 	READ_ENTER(&plistp->rwlock);
1496 
1497 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1498 		vgen_port_uninit(portp);
1499 	}
1500 
1501 	RW_EXIT(&plistp->rwlock);
1502 }
1503 
1504 static void
1505 vgen_port_uninit(vgen_port_t *portp)
1506 {
1507 	vgen_uninit_ldcs(portp);
1508 
1509 	/* remove the port from vlans it has been assigned to */
1510 	vgen_vlan_remove_ids(portp);
1511 }
1512 
1513 /*
1514  * Scan the machine description for this instance of vnet
1515  * and read its properties. Called only from vgen_init().
1516  * Returns: 0 on success, 1 on failure.
1517  */
1518 static int
1519 vgen_read_mdprops(vgen_t *vgenp)
1520 {
1521 	vnet_t		*vnetp = vgenp->vnetp;
1522 	md_t		*mdp = NULL;
1523 	mde_cookie_t	rootnode;
1524 	mde_cookie_t	*listp = NULL;
1525 	uint64_t	cfgh;
1526 	char		*name;
1527 	int		rv = 1;
1528 	int		num_nodes = 0;
1529 	int		num_devs = 0;
1530 	int		listsz = 0;
1531 	int		i;
1532 
1533 	if ((mdp = md_get_handle()) == NULL) {
1534 		return (rv);
1535 	}
1536 
1537 	num_nodes = md_node_count(mdp);
1538 	ASSERT(num_nodes > 0);
1539 
1540 	listsz = num_nodes * sizeof (mde_cookie_t);
1541 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1542 
1543 	rootnode = md_root_node(mdp);
1544 
1545 	/* search for all "virtual_device" nodes */
1546 	num_devs = md_scan_dag(mdp, rootnode,
1547 	    md_find_name(mdp, vdev_propname),
1548 	    md_find_name(mdp, "fwd"), listp);
1549 	if (num_devs <= 0) {
1550 		goto vgen_readmd_exit;
1551 	}
1552 
1553 	/*
1554 	 * Now loop through the list of virtual-devices looking for
1555 	 * devices with name "network" and for each such device compare
1556 	 * its instance with what we have from the 'reg' property to
1557 	 * find the right node in MD and then read all its properties.
1558 	 */
1559 	for (i = 0; i < num_devs; i++) {
1560 
1561 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1562 			goto vgen_readmd_exit;
1563 		}
1564 
1565 		/* is this a "network" device? */
1566 		if (strcmp(name, vnet_propname) != 0)
1567 			continue;
1568 
1569 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1570 			goto vgen_readmd_exit;
1571 		}
1572 
1573 		/* is this the required instance of vnet? */
1574 		if (vgenp->regprop != cfgh)
1575 			continue;
1576 
1577 		/*
1578 		 * Read the mtu. Note that we set the mtu of vnet device within
1579 		 * this routine itself, after validating the range.
1580 		 */
1581 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1582 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1583 			vnetp->mtu = ETHERMTU;
1584 		}
1585 		vgenp->max_frame_size = vnetp->mtu +
1586 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1587 
1588 		/* read priority ether types */
1589 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1590 
1591 		/* read vlan id properties of this vnet instance */
1592 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1593 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1594 		    &vnetp->default_vlan_id);
1595 
1596 		rv = 0;
1597 		break;
1598 	}
1599 
1600 vgen_readmd_exit:
1601 
1602 	kmem_free(listp, listsz);
1603 	(void) md_fini_handle(mdp);
1604 	return (rv);
1605 }
1606 
1607 /*
1608  * Read vlan id properties of the given MD node.
1609  * Arguments:
1610  *   arg:          device argument(vnet device or a port)
1611  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1612  *   mdp:          machine description
1613  *   node:         md node cookie
1614  *
1615  * Returns:
1616  *   pvidp:        port-vlan-id of the node
1617  *   vidspp:       list of vlan-ids of the node
1618  *   nvidsp:       # of vlan-ids in the list
1619  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1620  */
1621 static void
1622 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1623 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1624 	uint16_t *default_idp)
1625 {
1626 	vgen_t		*vgenp;
1627 	vnet_t		*vnetp;
1628 	vgen_port_t	*portp;
1629 	char		*pvid_propname;
1630 	char		*vid_propname;
1631 	uint_t		nvids;
1632 	uint32_t	vids_size;
1633 	int		rv;
1634 	int		i;
1635 	uint64_t	*data;
1636 	uint64_t	val;
1637 	int		size;
1638 	int		inst;
1639 
1640 	if (type == VGEN_LOCAL) {
1641 
1642 		vgenp = (vgen_t *)arg;
1643 		vnetp = vgenp->vnetp;
1644 		pvid_propname = vgen_pvid_propname;
1645 		vid_propname = vgen_vid_propname;
1646 		inst = vnetp->instance;
1647 
1648 	} else if (type == VGEN_PEER) {
1649 
1650 		portp = (vgen_port_t *)arg;
1651 		vgenp = portp->vgenp;
1652 		vnetp = vgenp->vnetp;
1653 		pvid_propname = port_pvid_propname;
1654 		vid_propname = port_vid_propname;
1655 		inst = portp->port_num;
1656 
1657 	} else {
1658 		return;
1659 	}
1660 
1661 	if (type == VGEN_LOCAL && default_idp != NULL) {
1662 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1663 		if (rv != 0) {
1664 			DWARN(vgenp, NULL, "prop(%s) not found",
1665 			    vgen_dvid_propname);
1666 
1667 			*default_idp = vnet_default_vlan_id;
1668 		} else {
1669 			*default_idp = val & 0xFFF;
1670 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1671 			    inst, *default_idp);
1672 		}
1673 	}
1674 
1675 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1676 	if (rv != 0) {
1677 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1678 		*pvidp = vnet_default_vlan_id;
1679 	} else {
1680 
1681 		*pvidp = val & 0xFFF;
1682 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1683 		    pvid_propname, inst, *pvidp);
1684 	}
1685 
1686 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1687 	    &size);
1688 	if (rv != 0) {
1689 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1690 		size = 0;
1691 	} else {
1692 		size /= sizeof (uint64_t);
1693 	}
1694 	nvids = size;
1695 
1696 	if (nvids != 0) {
1697 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1698 		vids_size = sizeof (uint16_t) * nvids;
1699 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1700 		for (i = 0; i < nvids; i++) {
1701 			(*vidspp)[i] = data[i] & 0xFFFF;
1702 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1703 		}
1704 		DBG2(vgenp, NULL, "\n");
1705 	}
1706 
1707 	*nvidsp = nvids;
1708 }
1709 
1710 /*
1711  * Create a vlan id hash table for the given port.
1712  */
1713 static void
1714 vgen_vlan_create_hash(vgen_port_t *portp)
1715 {
1716 	char		hashname[MAXNAMELEN];
1717 
1718 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1719 	    portp->port_num);
1720 
1721 	portp->vlan_nchains = vgen_vlan_nchains;
1722 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1723 	    portp->vlan_nchains, mod_hash_null_valdtor);
1724 }
1725 
1726 /*
1727  * Destroy the vlan id hash table in the given port.
1728  */
1729 static void
1730 vgen_vlan_destroy_hash(vgen_port_t *portp)
1731 {
1732 	if (portp->vlan_hashp != NULL) {
1733 		mod_hash_destroy_hash(portp->vlan_hashp);
1734 		portp->vlan_hashp = NULL;
1735 		portp->vlan_nchains = 0;
1736 	}
1737 }
1738 
1739 /*
1740  * Add a port to the vlans specified in its port properites.
1741  */
1742 static void
1743 vgen_vlan_add_ids(vgen_port_t *portp)
1744 {
1745 	int		rv;
1746 	int		i;
1747 
1748 	rv = mod_hash_insert(portp->vlan_hashp,
1749 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1750 	    (mod_hash_val_t)B_TRUE);
1751 	ASSERT(rv == 0);
1752 
1753 	for (i = 0; i < portp->nvids; i++) {
1754 		rv = mod_hash_insert(portp->vlan_hashp,
1755 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1756 		    (mod_hash_val_t)B_TRUE);
1757 		ASSERT(rv == 0);
1758 	}
1759 }
1760 
1761 /*
1762  * Remove a port from the vlans it has been assigned to.
1763  */
1764 static void
1765 vgen_vlan_remove_ids(vgen_port_t *portp)
1766 {
1767 	int		rv;
1768 	int		i;
1769 	mod_hash_val_t	vp;
1770 
1771 	rv = mod_hash_remove(portp->vlan_hashp,
1772 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1773 	    (mod_hash_val_t *)&vp);
1774 	ASSERT(rv == 0);
1775 
1776 	for (i = 0; i < portp->nvids; i++) {
1777 		rv = mod_hash_remove(portp->vlan_hashp,
1778 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1779 		    (mod_hash_val_t *)&vp);
1780 		ASSERT(rv == 0);
1781 	}
1782 }
1783 
1784 /*
1785  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1786  * then the vlan-id is available in the tag; otherwise, its vlan id is
1787  * implicitly obtained from the port-vlan-id of the vnet device.
1788  * The vlan id determined is returned in vidp.
1789  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1790  */
1791 static boolean_t
1792 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1793 {
1794 	struct ether_vlan_header	*evhp;
1795 
1796 	/* If it's a tagged frame, get the vlan id from vlan header */
1797 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1798 
1799 		evhp = (struct ether_vlan_header *)ehp;
1800 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1801 		return (B_TRUE);
1802 	}
1803 
1804 	/* Untagged frame, vlan-id is the pvid of vnet device */
1805 	*vidp = vnetp->pvid;
1806 	return (B_FALSE);
1807 }
1808 
1809 /*
1810  * Find the given vlan id in the hash table.
1811  * Return: B_TRUE if the id is found; B_FALSE if not found.
1812  */
1813 static boolean_t
1814 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1815 {
1816 	int		rv;
1817 	mod_hash_val_t	vp;
1818 
1819 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1820 
1821 	if (rv != 0)
1822 		return (B_FALSE);
1823 
1824 	return (B_TRUE);
1825 }
1826 
1827 /*
1828  * This function reads "priority-ether-types" property from md. This property
1829  * is used to enable support for priority frames. Applications which need
1830  * guaranteed and timely delivery of certain high priority frames to/from
1831  * a vnet or vsw within ldoms, should configure this property by providing
1832  * the ether type(s) for which the priority facility is needed.
1833  * Normal data frames are delivered over a ldc channel using the descriptor
1834  * ring mechanism which is constrained by factors such as descriptor ring size,
1835  * the rate at which the ring is processed at the peer ldc end point, etc.
1836  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1837  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1838  * descriptor ring path and enables a more reliable and timely delivery of
1839  * frames to the peer.
1840  */
1841 static void
1842 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1843 {
1844 	int		rv;
1845 	uint16_t	*types;
1846 	uint64_t	*data;
1847 	int		size;
1848 	int		i;
1849 	size_t		mblk_sz;
1850 
1851 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1852 	    (uint8_t **)&data, &size);
1853 	if (rv != 0) {
1854 		/*
1855 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1856 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1857 		 */
1858 		if (vgen_pri_eth_type != 0) {
1859 			size = sizeof (vgen_pri_eth_type);
1860 			data = &vgen_pri_eth_type;
1861 		} else {
1862 			DBG2(vgenp, NULL,
1863 			    "prop(%s) not found", pri_types_propname);
1864 			size = 0;
1865 		}
1866 	}
1867 
1868 	if (size == 0) {
1869 		vgenp->pri_num_types = 0;
1870 		return;
1871 	}
1872 
1873 	/*
1874 	 * we have some priority-ether-types defined;
1875 	 * allocate a table of these types and also
1876 	 * allocate a pool of mblks to transmit these
1877 	 * priority packets.
1878 	 */
1879 	size /= sizeof (uint64_t);
1880 	vgenp->pri_num_types = size;
1881 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1882 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1883 		types[i] = data[i] & 0xFFFF;
1884 	}
1885 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1886 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1887 	    &vgenp->pri_tx_vmp);
1888 }
1889 
1890 static void
1891 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1892 {
1893 	int		rv;
1894 	uint64_t	val;
1895 	char		*mtu_propname;
1896 
1897 	mtu_propname = vgen_mtu_propname;
1898 
1899 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1900 	if (rv != 0) {
1901 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1902 		*mtu = vnet_ethermtu;
1903 	} else {
1904 
1905 		*mtu = val & 0xFFFF;
1906 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1907 		    vgenp->instance, *mtu);
1908 	}
1909 }
1910 
1911 /* register with MD event generator */
1912 static int
1913 vgen_mdeg_reg(vgen_t *vgenp)
1914 {
1915 	mdeg_prop_spec_t	*pspecp;
1916 	mdeg_node_spec_t	*parentp;
1917 	uint_t			templatesz;
1918 	int			rv;
1919 	mdeg_handle_t		dev_hdl = NULL;
1920 	mdeg_handle_t		port_hdl = NULL;
1921 
1922 	templatesz = sizeof (vgen_prop_template);
1923 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1924 	if (pspecp == NULL) {
1925 		return (DDI_FAILURE);
1926 	}
1927 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1928 	if (parentp == NULL) {
1929 		kmem_free(pspecp, templatesz);
1930 		return (DDI_FAILURE);
1931 	}
1932 
1933 	bcopy(vgen_prop_template, pspecp, templatesz);
1934 
1935 	/*
1936 	 * NOTE: The instance here refers to the value of "reg" property and
1937 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1938 	 */
1939 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1940 
1941 	parentp->namep = "virtual-device";
1942 	parentp->specp = pspecp;
1943 
1944 	/* save parentp in vgen_t */
1945 	vgenp->mdeg_parentp = parentp;
1946 
1947 	/*
1948 	 * Register an interest in 'virtual-device' nodes with a
1949 	 * 'name' property of 'network'
1950 	 */
1951 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1952 	if (rv != MDEG_SUCCESS) {
1953 		DERR(vgenp, NULL, "mdeg_register failed\n");
1954 		goto mdeg_reg_fail;
1955 	}
1956 
1957 	/* Register an interest in 'port' nodes */
1958 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1959 	    &port_hdl);
1960 	if (rv != MDEG_SUCCESS) {
1961 		DERR(vgenp, NULL, "mdeg_register failed\n");
1962 		goto mdeg_reg_fail;
1963 	}
1964 
1965 	/* save mdeg handle in vgen_t */
1966 	vgenp->mdeg_dev_hdl = dev_hdl;
1967 	vgenp->mdeg_port_hdl = port_hdl;
1968 
1969 	return (DDI_SUCCESS);
1970 
1971 mdeg_reg_fail:
1972 	if (dev_hdl != NULL) {
1973 		(void) mdeg_unregister(dev_hdl);
1974 	}
1975 	KMEM_FREE(parentp);
1976 	kmem_free(pspecp, templatesz);
1977 	vgenp->mdeg_parentp = NULL;
1978 	return (DDI_FAILURE);
1979 }
1980 
1981 /* unregister with MD event generator */
1982 static void
1983 vgen_mdeg_unreg(vgen_t *vgenp)
1984 {
1985 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1986 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
1987 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1988 	KMEM_FREE(vgenp->mdeg_parentp);
1989 	vgenp->mdeg_parentp = NULL;
1990 	vgenp->mdeg_dev_hdl = NULL;
1991 	vgenp->mdeg_port_hdl = NULL;
1992 }
1993 
1994 /* mdeg callback function for the port node */
1995 static int
1996 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1997 {
1998 	int idx;
1999 	int vsw_idx = -1;
2000 	uint64_t val;
2001 	vgen_t *vgenp;
2002 
2003 	if ((resp == NULL) || (cb_argp == NULL)) {
2004 		return (MDEG_FAILURE);
2005 	}
2006 
2007 	vgenp = (vgen_t *)cb_argp;
2008 	DBG1(vgenp, NULL, "enter\n");
2009 
2010 	mutex_enter(&vgenp->lock);
2011 
2012 	DBG1(vgenp, NULL, "ports: removed(%x), "
2013 	"added(%x), updated(%x)\n", resp->removed.nelem,
2014 	    resp->added.nelem, resp->match_curr.nelem);
2015 
2016 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2017 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2018 		    resp->removed.mdep[idx]);
2019 	}
2020 
2021 	if (vgenp->vsw_portp == NULL) {
2022 		/*
2023 		 * find vsw_port and add it first, because other ports need
2024 		 * this when adding fdb entry (see vgen_port_init()).
2025 		 */
2026 		for (idx = 0; idx < resp->added.nelem; idx++) {
2027 			if (!(md_get_prop_val(resp->added.mdp,
2028 			    resp->added.mdep[idx], swport_propname, &val))) {
2029 				if (val == 0) {
2030 					/*
2031 					 * This port is connected to the
2032 					 * vsw on service domain.
2033 					 */
2034 					vsw_idx = idx;
2035 					if (vgen_add_port(vgenp,
2036 					    resp->added.mdp,
2037 					    resp->added.mdep[idx]) !=
2038 					    DDI_SUCCESS) {
2039 						cmn_err(CE_NOTE, "vnet%d Could "
2040 						    "not initialize virtual "
2041 						    "switch port.",
2042 						    vgenp->instance);
2043 						mutex_exit(&vgenp->lock);
2044 						return (MDEG_FAILURE);
2045 					}
2046 					break;
2047 				}
2048 			}
2049 		}
2050 		if (vsw_idx == -1) {
2051 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2052 			mutex_exit(&vgenp->lock);
2053 			return (MDEG_FAILURE);
2054 		}
2055 	}
2056 
2057 	for (idx = 0; idx < resp->added.nelem; idx++) {
2058 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2059 			continue;
2060 
2061 		/* If this port can't be added just skip it. */
2062 		(void) vgen_add_port(vgenp, resp->added.mdp,
2063 		    resp->added.mdep[idx]);
2064 	}
2065 
2066 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2067 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2068 		    resp->match_curr.mdep[idx],
2069 		    resp->match_prev.mdp,
2070 		    resp->match_prev.mdep[idx]);
2071 	}
2072 
2073 	mutex_exit(&vgenp->lock);
2074 	DBG1(vgenp, NULL, "exit\n");
2075 	return (MDEG_SUCCESS);
2076 }
2077 
2078 /* mdeg callback function for the vnet node */
2079 static int
2080 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2081 {
2082 	vgen_t		*vgenp;
2083 	vnet_t		*vnetp;
2084 	md_t		*mdp;
2085 	mde_cookie_t	node;
2086 	uint64_t	inst;
2087 	char		*node_name = NULL;
2088 
2089 	if ((resp == NULL) || (cb_argp == NULL)) {
2090 		return (MDEG_FAILURE);
2091 	}
2092 
2093 	vgenp = (vgen_t *)cb_argp;
2094 	vnetp = vgenp->vnetp;
2095 
2096 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2097 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2098 	    resp->match_curr.nelem, resp->match_prev.nelem);
2099 
2100 	mutex_enter(&vgenp->lock);
2101 
2102 	/*
2103 	 * We get an initial callback for this node as 'added' after
2104 	 * registering with mdeg. Note that we would have already gathered
2105 	 * information about this vnet node by walking MD earlier during attach
2106 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2107 	 * of this node might have changed when we get this initial 'added'
2108 	 * callback. We handle this as if an update occured and invoke the same
2109 	 * function which handles updates to the properties of this vnet-node
2110 	 * if any. A non-zero 'match' value indicates that the MD has been
2111 	 * updated and that a 'network' node is present which may or may not
2112 	 * have been updated. It is up to the clients to examine their own
2113 	 * nodes and determine if they have changed.
2114 	 */
2115 	if (resp->added.nelem != 0) {
2116 
2117 		if (resp->added.nelem != 1) {
2118 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2119 			    "invalid: %d\n", vnetp->instance,
2120 			    resp->added.nelem);
2121 			goto vgen_mdeg_cb_err;
2122 		}
2123 
2124 		mdp = resp->added.mdp;
2125 		node = resp->added.mdep[0];
2126 
2127 	} else if (resp->match_curr.nelem != 0) {
2128 
2129 		if (resp->match_curr.nelem != 1) {
2130 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2131 			    "invalid: %d\n", vnetp->instance,
2132 			    resp->match_curr.nelem);
2133 			goto vgen_mdeg_cb_err;
2134 		}
2135 
2136 		mdp = resp->match_curr.mdp;
2137 		node = resp->match_curr.mdep[0];
2138 
2139 	} else {
2140 		goto vgen_mdeg_cb_err;
2141 	}
2142 
2143 	/* Validate name and instance */
2144 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2145 		DERR(vgenp, NULL, "unable to get node name\n");
2146 		goto vgen_mdeg_cb_err;
2147 	}
2148 
2149 	/* is this a virtual-network device? */
2150 	if (strcmp(node_name, vnet_propname) != 0) {
2151 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2152 		goto vgen_mdeg_cb_err;
2153 	}
2154 
2155 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2156 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2157 		goto vgen_mdeg_cb_err;
2158 	}
2159 
2160 	/* is this the right instance of vnet? */
2161 	if (inst != vgenp->regprop) {
2162 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2163 		goto vgen_mdeg_cb_err;
2164 	}
2165 
2166 	vgen_update_md_prop(vgenp, mdp, node);
2167 
2168 	mutex_exit(&vgenp->lock);
2169 	return (MDEG_SUCCESS);
2170 
2171 vgen_mdeg_cb_err:
2172 	mutex_exit(&vgenp->lock);
2173 	return (MDEG_FAILURE);
2174 }
2175 
2176 /*
2177  * Check to see if the relevant properties in the specified node have
2178  * changed, and if so take the appropriate action.
2179  */
2180 static void
2181 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2182 {
2183 	uint16_t	pvid;
2184 	uint16_t	*vids;
2185 	uint16_t	nvids;
2186 	vnet_t		*vnetp = vgenp->vnetp;
2187 	uint32_t	mtu;
2188 	enum		{ MD_init = 0x1,
2189 			    MD_vlans = 0x2,
2190 			    MD_mtu = 0x4 } updated;
2191 	int		rv;
2192 
2193 	updated = MD_init;
2194 
2195 	/* Read the vlan ids */
2196 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2197 	    &nvids, NULL);
2198 
2199 	/* Determine if there are any vlan id updates */
2200 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2201 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2202 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2203 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2204 		updated |= MD_vlans;
2205 	}
2206 
2207 	/* Read mtu */
2208 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2209 	if (mtu != vnetp->mtu) {
2210 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2211 			updated |= MD_mtu;
2212 		} else {
2213 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2214 			    " as the specified value:%d is invalid\n",
2215 			    vnetp->instance, mtu);
2216 		}
2217 	}
2218 
2219 	/* Now process the updated props */
2220 
2221 	if (updated & MD_vlans) {
2222 
2223 		/* save the new vlan ids */
2224 		vnetp->pvid = pvid;
2225 		if (vnetp->nvids != 0) {
2226 			kmem_free(vnetp->vids,
2227 			    sizeof (uint16_t) * vnetp->nvids);
2228 			vnetp->nvids = 0;
2229 		}
2230 		if (nvids != 0) {
2231 			vnetp->nvids = nvids;
2232 			vnetp->vids = vids;
2233 		}
2234 
2235 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2236 		vgen_reset_vlan_unaware_ports(vgenp);
2237 
2238 	} else {
2239 
2240 		if (nvids != 0) {
2241 			kmem_free(vids, sizeof (uint16_t) * nvids);
2242 		}
2243 	}
2244 
2245 	if (updated & MD_mtu) {
2246 
2247 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2248 		    vnetp->mtu, mtu);
2249 
2250 		rv = vnet_mtu_update(vnetp, mtu);
2251 		if (rv == 0) {
2252 			vgenp->max_frame_size = mtu +
2253 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2254 		}
2255 	}
2256 }
2257 
2258 /* add a new port to the device */
2259 static int
2260 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2261 {
2262 	vgen_port_t	*portp;
2263 	int		rv;
2264 
2265 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2266 
2267 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2268 	if (rv != DDI_SUCCESS) {
2269 		KMEM_FREE(portp);
2270 		return (DDI_FAILURE);
2271 	}
2272 
2273 	rv = vgen_port_attach(portp);
2274 	if (rv != DDI_SUCCESS) {
2275 		return (DDI_FAILURE);
2276 	}
2277 
2278 	return (DDI_SUCCESS);
2279 }
2280 
2281 /* read properties of the port from its md node */
2282 static int
2283 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2284 	mde_cookie_t mdex)
2285 {
2286 	uint64_t		port_num;
2287 	uint64_t		*ldc_ids;
2288 	uint64_t		macaddr;
2289 	uint64_t		val;
2290 	int			num_ldcs;
2291 	int			i;
2292 	int			addrsz;
2293 	int			num_nodes = 0;
2294 	int			listsz = 0;
2295 	mde_cookie_t		*listp = NULL;
2296 	uint8_t			*addrp;
2297 	struct ether_addr	ea;
2298 
2299 	/* read "id" property to get the port number */
2300 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2301 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2302 		return (DDI_FAILURE);
2303 	}
2304 
2305 	/*
2306 	 * Find the channel endpoint node(s) under this port node.
2307 	 */
2308 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2309 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2310 		    num_nodes);
2311 		return (DDI_FAILURE);
2312 	}
2313 
2314 	/* allocate space for node list */
2315 	listsz = num_nodes * sizeof (mde_cookie_t);
2316 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2317 	if (listp == NULL)
2318 		return (DDI_FAILURE);
2319 
2320 	num_ldcs = md_scan_dag(mdp, mdex,
2321 	    md_find_name(mdp, channel_propname),
2322 	    md_find_name(mdp, "fwd"), listp);
2323 
2324 	if (num_ldcs <= 0) {
2325 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2326 		kmem_free(listp, listsz);
2327 		return (DDI_FAILURE);
2328 	}
2329 
2330 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2331 
2332 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2333 	if (ldc_ids == NULL) {
2334 		kmem_free(listp, listsz);
2335 		return (DDI_FAILURE);
2336 	}
2337 
2338 	for (i = 0; i < num_ldcs; i++) {
2339 		/* read channel ids */
2340 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2341 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2342 			    id_propname);
2343 			kmem_free(listp, listsz);
2344 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2345 			return (DDI_FAILURE);
2346 		}
2347 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2348 	}
2349 
2350 	kmem_free(listp, listsz);
2351 
2352 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2353 	    &addrsz)) {
2354 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2355 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2356 		return (DDI_FAILURE);
2357 	}
2358 
2359 	if (addrsz < ETHERADDRL) {
2360 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2361 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2362 		return (DDI_FAILURE);
2363 	}
2364 
2365 	macaddr = *((uint64_t *)addrp);
2366 
2367 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2368 
2369 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2370 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2371 		macaddr >>= 8;
2372 	}
2373 
2374 	if (vgenp->vsw_portp == NULL) {
2375 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2376 			if (val == 0) {
2377 				(void) atomic_swap_32(
2378 				    &vgenp->vsw_port_refcnt, 0);
2379 				/* This port is connected to the vsw */
2380 				vgenp->vsw_portp = portp;
2381 			}
2382 		}
2383 	}
2384 
2385 	/* now update all properties into the port */
2386 	portp->vgenp = vgenp;
2387 	portp->port_num = port_num;
2388 	ether_copy(&ea, &portp->macaddr);
2389 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2390 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2391 	portp->num_ldcs = num_ldcs;
2392 
2393 	/* read vlan id properties of this port node */
2394 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2395 	    &portp->vids, &portp->nvids, NULL);
2396 
2397 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2398 
2399 	return (DDI_SUCCESS);
2400 }
2401 
2402 /* remove a port from the device */
2403 static int
2404 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2405 {
2406 	uint64_t	port_num;
2407 	vgen_port_t	*portp;
2408 	vgen_portlist_t	*plistp;
2409 
2410 	/* read "id" property to get the port number */
2411 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2412 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2413 		return (DDI_FAILURE);
2414 	}
2415 
2416 	plistp = &(vgenp->vgenports);
2417 
2418 	WRITE_ENTER(&plistp->rwlock);
2419 	portp = vgen_port_lookup(plistp, (int)port_num);
2420 	if (portp == NULL) {
2421 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2422 		RW_EXIT(&plistp->rwlock);
2423 		return (DDI_FAILURE);
2424 	}
2425 
2426 	vgen_port_detach_mdeg(portp);
2427 	RW_EXIT(&plistp->rwlock);
2428 
2429 	return (DDI_SUCCESS);
2430 }
2431 
2432 /* attach a port to the device based on mdeg data */
2433 static int
2434 vgen_port_attach(vgen_port_t *portp)
2435 {
2436 	int			i;
2437 	vgen_portlist_t		*plistp;
2438 	vgen_t			*vgenp;
2439 	uint64_t		*ldcids;
2440 	uint32_t		num_ldcs;
2441 	mac_register_t		*macp;
2442 	vio_net_res_type_t	type;
2443 	int			rv;
2444 
2445 	ASSERT(portp != NULL);
2446 
2447 	vgenp = portp->vgenp;
2448 	ldcids = portp->ldc_ids;
2449 	num_ldcs = portp->num_ldcs;
2450 
2451 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2452 
2453 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2454 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2455 	portp->ldclist.headp = NULL;
2456 
2457 	for (i = 0; i < num_ldcs; i++) {
2458 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2459 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2460 			vgen_port_detach(portp);
2461 			return (DDI_FAILURE);
2462 		}
2463 	}
2464 
2465 	/* create vlan id hash table */
2466 	vgen_vlan_create_hash(portp);
2467 
2468 	if (portp == vgenp->vsw_portp) {
2469 		/* This port is connected to the switch port */
2470 		vgenp->vsw_portp = portp;
2471 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2472 		type = VIO_NET_RES_LDC_SERVICE;
2473 	} else {
2474 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2475 		type = VIO_NET_RES_LDC_GUEST;
2476 	}
2477 
2478 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2479 		vgen_port_detach(portp);
2480 		return (DDI_FAILURE);
2481 	}
2482 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2483 	macp->m_driver = portp;
2484 	macp->m_dip = vgenp->vnetdip;
2485 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2486 	macp->m_callbacks = &vgen_m_callbacks;
2487 	macp->m_min_sdu = 0;
2488 	macp->m_max_sdu = ETHERMTU;
2489 
2490 	mutex_enter(&portp->lock);
2491 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2492 	    portp->macaddr, &portp->vhp, &portp->vcb);
2493 	mutex_exit(&portp->lock);
2494 	mac_free(macp);
2495 
2496 	if (rv == 0) {
2497 		/* link it into the list of ports */
2498 		plistp = &(vgenp->vgenports);
2499 		WRITE_ENTER(&plistp->rwlock);
2500 		vgen_port_list_insert(portp);
2501 		RW_EXIT(&plistp->rwlock);
2502 	} else {
2503 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2504 		    portp);
2505 		vgen_port_detach(portp);
2506 	}
2507 
2508 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2509 	return (DDI_SUCCESS);
2510 }
2511 
2512 /* detach a port from the device based on mdeg data */
2513 static void
2514 vgen_port_detach_mdeg(vgen_port_t *portp)
2515 {
2516 	vgen_t *vgenp = portp->vgenp;
2517 
2518 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2519 
2520 	mutex_enter(&portp->lock);
2521 
2522 	/* stop the port if needed */
2523 	if (portp->flags & VGEN_STARTED) {
2524 		vgen_port_uninit(portp);
2525 	}
2526 
2527 	mutex_exit(&portp->lock);
2528 	vgen_port_detach(portp);
2529 
2530 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2531 }
2532 
2533 static int
2534 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2535 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2536 {
2537 	uint64_t	cport_num;
2538 	uint64_t	pport_num;
2539 	vgen_portlist_t	*plistp;
2540 	vgen_port_t	*portp;
2541 	boolean_t	updated_vlans = B_FALSE;
2542 	uint16_t	pvid;
2543 	uint16_t	*vids;
2544 	uint16_t	nvids;
2545 
2546 	/*
2547 	 * For now, we get port updates only if vlan ids changed.
2548 	 * We read the port num and do some sanity check.
2549 	 */
2550 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2551 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2552 		return (DDI_FAILURE);
2553 	}
2554 
2555 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2556 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2557 		return (DDI_FAILURE);
2558 	}
2559 	if (cport_num != pport_num)
2560 		return (DDI_FAILURE);
2561 
2562 	plistp = &(vgenp->vgenports);
2563 
2564 	READ_ENTER(&plistp->rwlock);
2565 
2566 	portp = vgen_port_lookup(plistp, (int)cport_num);
2567 	if (portp == NULL) {
2568 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2569 		RW_EXIT(&plistp->rwlock);
2570 		return (DDI_FAILURE);
2571 	}
2572 
2573 	/* Read the vlan ids */
2574 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2575 	    &nvids, NULL);
2576 
2577 	/* Determine if there are any vlan id updates */
2578 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2579 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2580 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2581 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2582 		updated_vlans = B_TRUE;
2583 	}
2584 
2585 	if (updated_vlans == B_FALSE) {
2586 		RW_EXIT(&plistp->rwlock);
2587 		return (DDI_FAILURE);
2588 	}
2589 
2590 	/* remove the port from vlans it has been assigned to */
2591 	vgen_vlan_remove_ids(portp);
2592 
2593 	/* save the new vlan ids */
2594 	portp->pvid = pvid;
2595 	if (portp->nvids != 0) {
2596 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2597 		portp->nvids = 0;
2598 	}
2599 	if (nvids != 0) {
2600 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2601 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2602 		portp->nvids = nvids;
2603 		kmem_free(vids, sizeof (uint16_t) * nvids);
2604 	}
2605 
2606 	/* add port to the new vlans */
2607 	vgen_vlan_add_ids(portp);
2608 
2609 	/* reset the port if it is vlan unaware (ver < 1.3) */
2610 	vgen_vlan_unaware_port_reset(portp);
2611 
2612 	RW_EXIT(&plistp->rwlock);
2613 
2614 	return (DDI_SUCCESS);
2615 }
2616 
2617 static uint64_t
2618 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2619 {
2620 	vgen_ldclist_t	*ldclp;
2621 	vgen_ldc_t *ldcp;
2622 	uint64_t	val;
2623 
2624 	val = 0;
2625 	ldclp = &portp->ldclist;
2626 
2627 	READ_ENTER(&ldclp->rwlock);
2628 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2629 		val += vgen_ldc_stat(ldcp, stat);
2630 	}
2631 	RW_EXIT(&ldclp->rwlock);
2632 
2633 	return (val);
2634 }
2635 
2636 /* allocate receive resources */
2637 static int
2638 vgen_init_multipools(vgen_ldc_t *ldcp)
2639 {
2640 	size_t		data_sz;
2641 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2642 	int		status;
2643 	uint32_t	sz1 = 0;
2644 	uint32_t	sz2 = 0;
2645 	uint32_t	sz3 = 0;
2646 	uint32_t	sz4 = 0;
2647 
2648 	/*
2649 	 * We round up the mtu specified to be a multiple of 2K.
2650 	 * We then create rx pools based on the rounded up size.
2651 	 */
2652 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2653 	data_sz = VNET_ROUNDUP_2K(data_sz);
2654 
2655 	/*
2656 	 * If pool sizes are specified, use them. Note that the presence of
2657 	 * the first tunable will be used as a hint.
2658 	 */
2659 	if (vgen_rbufsz1 != 0) {
2660 
2661 		sz1 = vgen_rbufsz1;
2662 		sz2 = vgen_rbufsz2;
2663 		sz3 = vgen_rbufsz3;
2664 		sz4 = vgen_rbufsz4;
2665 
2666 		if (sz4 == 0) { /* need 3 pools */
2667 
2668 			ldcp->max_rxpool_size = sz3;
2669 			status = vio_init_multipools(&ldcp->vmp,
2670 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2671 			    vgen_nrbufs2, vgen_nrbufs3);
2672 
2673 		} else {
2674 
2675 			ldcp->max_rxpool_size = sz4;
2676 			status = vio_init_multipools(&ldcp->vmp,
2677 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2678 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2679 			    vgen_nrbufs4);
2680 		}
2681 		return (status);
2682 	}
2683 
2684 	/*
2685 	 * Pool sizes are not specified. We select the pool sizes based on the
2686 	 * mtu if vnet_jumbo_rxpools is enabled.
2687 	 */
2688 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2689 		/*
2690 		 * Receive buffer pool allocation based on mtu is disabled.
2691 		 * Use the default mechanism of standard size pool allocation.
2692 		 */
2693 		sz1 = VGEN_DBLK_SZ_128;
2694 		sz2 = VGEN_DBLK_SZ_256;
2695 		sz3 = VGEN_DBLK_SZ_2048;
2696 		ldcp->max_rxpool_size = sz3;
2697 
2698 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2699 		    sz1, sz2, sz3,
2700 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2701 
2702 		return (status);
2703 	}
2704 
2705 	switch (data_sz) {
2706 
2707 	case VNET_4K:
2708 
2709 		sz1 = VGEN_DBLK_SZ_128;
2710 		sz2 = VGEN_DBLK_SZ_256;
2711 		sz3 = VGEN_DBLK_SZ_2048;
2712 		sz4 = sz3 << 1;			/* 4K */
2713 		ldcp->max_rxpool_size = sz4;
2714 
2715 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2716 		    sz1, sz2, sz3, sz4,
2717 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2718 		break;
2719 
2720 	default:	/* data_sz:  4K+ to 16K */
2721 
2722 		sz1 = VGEN_DBLK_SZ_256;
2723 		sz2 = VGEN_DBLK_SZ_2048;
2724 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2725 		sz4 = data_sz;		/* Jumbo-size  */
2726 		ldcp->max_rxpool_size = sz4;
2727 
2728 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2729 		    sz1, sz2, sz3, sz4,
2730 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2731 		break;
2732 
2733 	}
2734 
2735 	return (status);
2736 }
2737 
2738 /* attach the channel corresponding to the given ldc_id to the port */
2739 static int
2740 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2741 {
2742 	vgen_t 		*vgenp;
2743 	vgen_ldclist_t	*ldclp;
2744 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2745 	ldc_attr_t 	attr;
2746 	int 		status;
2747 	ldc_status_t	istatus;
2748 	char		kname[MAXNAMELEN];
2749 	int		instance;
2750 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2751 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2752 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2753 		AST_create_rxmblks = 0x20,
2754 		AST_create_rcv_thread = 0x40} attach_state;
2755 
2756 	attach_state = AST_init;
2757 	vgenp = portp->vgenp;
2758 	ldclp = &portp->ldclist;
2759 
2760 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2761 	if (ldcp == NULL) {
2762 		goto ldc_attach_failed;
2763 	}
2764 	ldcp->ldc_id = ldc_id;
2765 	ldcp->portp = portp;
2766 
2767 	attach_state |= AST_ldc_alloc;
2768 
2769 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2770 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2771 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2772 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2773 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2774 
2775 	attach_state |= AST_mutex_init;
2776 
2777 	attr.devclass = LDC_DEV_NT;
2778 	attr.instance = vgenp->instance;
2779 	attr.mode = LDC_MODE_UNRELIABLE;
2780 	attr.mtu = vnet_ldc_mtu;
2781 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2782 	if (status != 0) {
2783 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2784 		goto ldc_attach_failed;
2785 	}
2786 	attach_state |= AST_ldc_init;
2787 
2788 	if (vgen_rcv_thread_enabled) {
2789 		ldcp->rcv_thr_flags = 0;
2790 
2791 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2792 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2793 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2794 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2795 
2796 		attach_state |= AST_create_rcv_thread;
2797 		if (ldcp->rcv_thread == NULL) {
2798 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2799 			goto ldc_attach_failed;
2800 		}
2801 	}
2802 
2803 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2804 	if (status != 0) {
2805 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2806 		    status);
2807 		goto ldc_attach_failed;
2808 	}
2809 	/*
2810 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2811 	 * data msgs, including raw data msgs used to recv priority frames.
2812 	 */
2813 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2814 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2815 	attach_state |= AST_ldc_reg_cb;
2816 
2817 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2818 	ASSERT(istatus == LDC_INIT);
2819 	ldcp->ldc_status = istatus;
2820 
2821 	/* allocate transmit resources */
2822 	status = vgen_alloc_tx_ring(ldcp);
2823 	if (status != 0) {
2824 		goto ldc_attach_failed;
2825 	}
2826 	attach_state |= AST_alloc_tx_ring;
2827 
2828 	/* allocate receive resources */
2829 	status = vgen_init_multipools(ldcp);
2830 	if (status != 0) {
2831 		goto ldc_attach_failed;
2832 	}
2833 	attach_state |= AST_create_rxmblks;
2834 
2835 	/* Setup kstats for the channel */
2836 	instance = vgenp->instance;
2837 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2838 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2839 	if (ldcp->ksp == NULL) {
2840 		goto ldc_attach_failed;
2841 	}
2842 
2843 	/* initialize vgen_versions supported */
2844 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2845 	vgen_reset_vnet_proto_ops(ldcp);
2846 
2847 	/* link it into the list of channels for this port */
2848 	WRITE_ENTER(&ldclp->rwlock);
2849 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2850 	ldcp->nextp = *prev_ldcp;
2851 	*prev_ldcp = ldcp;
2852 	RW_EXIT(&ldclp->rwlock);
2853 
2854 	ldcp->flags |= CHANNEL_ATTACHED;
2855 	return (DDI_SUCCESS);
2856 
2857 ldc_attach_failed:
2858 	if (attach_state & AST_ldc_reg_cb) {
2859 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2860 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2861 	}
2862 	if (attach_state & AST_create_rcv_thread) {
2863 		if (ldcp->rcv_thread != NULL) {
2864 			vgen_stop_rcv_thread(ldcp);
2865 		}
2866 		mutex_destroy(&ldcp->rcv_thr_lock);
2867 		cv_destroy(&ldcp->rcv_thr_cv);
2868 	}
2869 	if (attach_state & AST_create_rxmblks) {
2870 		vio_mblk_pool_t *fvmp = NULL;
2871 
2872 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
2873 		ASSERT(fvmp == NULL);
2874 	}
2875 	if (attach_state & AST_alloc_tx_ring) {
2876 		vgen_free_tx_ring(ldcp);
2877 	}
2878 	if (attach_state & AST_ldc_init) {
2879 		(void) ldc_fini(ldcp->ldc_handle);
2880 	}
2881 	if (attach_state & AST_mutex_init) {
2882 		mutex_destroy(&ldcp->tclock);
2883 		mutex_destroy(&ldcp->txlock);
2884 		mutex_destroy(&ldcp->cblock);
2885 		mutex_destroy(&ldcp->wrlock);
2886 		mutex_destroy(&ldcp->rxlock);
2887 	}
2888 	if (attach_state & AST_ldc_alloc) {
2889 		KMEM_FREE(ldcp);
2890 	}
2891 	return (DDI_FAILURE);
2892 }
2893 
2894 /* detach a channel from the port */
2895 static void
2896 vgen_ldc_detach(vgen_ldc_t *ldcp)
2897 {
2898 	vgen_port_t	*portp;
2899 	vgen_t 		*vgenp;
2900 	vgen_ldc_t 	*pldcp;
2901 	vgen_ldc_t	**prev_ldcp;
2902 	vgen_ldclist_t	*ldclp;
2903 
2904 	portp = ldcp->portp;
2905 	vgenp = portp->vgenp;
2906 	ldclp = &portp->ldclist;
2907 
2908 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
2909 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
2910 		if (pldcp == ldcp) {
2911 			break;
2912 		}
2913 	}
2914 
2915 	if (pldcp == NULL) {
2916 		/* invalid ldcp? */
2917 		return;
2918 	}
2919 
2920 	if (ldcp->ldc_status != LDC_INIT) {
2921 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2922 	}
2923 
2924 	if (ldcp->flags & CHANNEL_ATTACHED) {
2925 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2926 
2927 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2928 		if (ldcp->rcv_thread != NULL) {
2929 			/* First stop the receive thread */
2930 			vgen_stop_rcv_thread(ldcp);
2931 			mutex_destroy(&ldcp->rcv_thr_lock);
2932 			cv_destroy(&ldcp->rcv_thr_cv);
2933 		}
2934 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2935 
2936 		vgen_destroy_kstats(ldcp->ksp);
2937 		ldcp->ksp = NULL;
2938 
2939 		/*
2940 		 * if we cannot reclaim all mblks, put this
2941 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
2942 		 * device gets detached (see vgen_uninit()).
2943 		 */
2944 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
2945 
2946 		/* free transmit resources */
2947 		vgen_free_tx_ring(ldcp);
2948 
2949 		(void) ldc_fini(ldcp->ldc_handle);
2950 		mutex_destroy(&ldcp->tclock);
2951 		mutex_destroy(&ldcp->txlock);
2952 		mutex_destroy(&ldcp->cblock);
2953 		mutex_destroy(&ldcp->wrlock);
2954 		mutex_destroy(&ldcp->rxlock);
2955 
2956 		/* unlink it from the list */
2957 		*prev_ldcp = ldcp->nextp;
2958 		KMEM_FREE(ldcp);
2959 	}
2960 }
2961 
2962 /*
2963  * This function allocates transmit resources for the channel.
2964  * The resources consist of a transmit descriptor ring and an associated
2965  * transmit buffer ring.
2966  */
2967 static int
2968 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
2969 {
2970 	void *tbufp;
2971 	ldc_mem_info_t minfo;
2972 	uint32_t txdsize;
2973 	uint32_t tbufsize;
2974 	int status;
2975 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2976 
2977 	ldcp->num_txds = vnet_ntxds;
2978 	txdsize = sizeof (vnet_public_desc_t);
2979 	tbufsize = sizeof (vgen_private_desc_t);
2980 
2981 	/* allocate transmit buffer ring */
2982 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
2983 	if (tbufp == NULL) {
2984 		return (DDI_FAILURE);
2985 	}
2986 
2987 	/* create transmit descriptor ring */
2988 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
2989 	    &ldcp->tx_dhandle);
2990 	if (status) {
2991 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
2992 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2993 		return (DDI_FAILURE);
2994 	}
2995 
2996 	/* get the addr of descripror ring */
2997 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
2998 	if (status) {
2999 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3000 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3001 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3002 		ldcp->tbufp = NULL;
3003 		return (DDI_FAILURE);
3004 	}
3005 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3006 	ldcp->tbufp = tbufp;
3007 
3008 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3009 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3010 
3011 	return (DDI_SUCCESS);
3012 }
3013 
3014 /* Free transmit resources for the channel */
3015 static void
3016 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3017 {
3018 	int tbufsize = sizeof (vgen_private_desc_t);
3019 
3020 	/* free transmit descriptor ring */
3021 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3022 
3023 	/* free transmit buffer ring */
3024 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3025 	ldcp->txdp = ldcp->txdendp = NULL;
3026 	ldcp->tbufp = ldcp->tbufendp = NULL;
3027 }
3028 
3029 /* enable transmit/receive on the channels for the port */
3030 static void
3031 vgen_init_ldcs(vgen_port_t *portp)
3032 {
3033 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3034 	vgen_ldc_t	*ldcp;
3035 
3036 	READ_ENTER(&ldclp->rwlock);
3037 	ldcp =  ldclp->headp;
3038 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3039 		(void) vgen_ldc_init(ldcp);
3040 	}
3041 	RW_EXIT(&ldclp->rwlock);
3042 }
3043 
3044 /* stop transmit/receive on the channels for the port */
3045 static void
3046 vgen_uninit_ldcs(vgen_port_t *portp)
3047 {
3048 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3049 	vgen_ldc_t	*ldcp;
3050 
3051 	READ_ENTER(&ldclp->rwlock);
3052 	ldcp =  ldclp->headp;
3053 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3054 		vgen_ldc_uninit(ldcp);
3055 	}
3056 	RW_EXIT(&ldclp->rwlock);
3057 }
3058 
3059 /* enable transmit/receive on the channel */
3060 static int
3061 vgen_ldc_init(vgen_ldc_t *ldcp)
3062 {
3063 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3064 	ldc_status_t	istatus;
3065 	int		rv;
3066 	uint32_t	retries = 0;
3067 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3068 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3069 	init_state = ST_init;
3070 
3071 	DBG1(vgenp, ldcp, "enter\n");
3072 	LDC_LOCK(ldcp);
3073 
3074 	rv = ldc_open(ldcp->ldc_handle);
3075 	if (rv != 0) {
3076 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3077 		goto ldcinit_failed;
3078 	}
3079 	init_state |= ST_ldc_open;
3080 
3081 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3082 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3083 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3084 		goto ldcinit_failed;
3085 	}
3086 	ldcp->ldc_status = istatus;
3087 
3088 	rv = vgen_init_tbufs(ldcp);
3089 	if (rv != 0) {
3090 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3091 		goto ldcinit_failed;
3092 	}
3093 	init_state |= ST_init_tbufs;
3094 
3095 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3096 	if (rv != 0) {
3097 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3098 		goto ldcinit_failed;
3099 	}
3100 
3101 	init_state |= ST_cb_enable;
3102 
3103 	do {
3104 		rv = ldc_up(ldcp->ldc_handle);
3105 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3106 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3107 			drv_usecwait(VGEN_LDC_UP_DELAY);
3108 		}
3109 		if (retries++ >= vgen_ldcup_retries)
3110 			break;
3111 	} while (rv == EWOULDBLOCK);
3112 
3113 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3114 	if (istatus == LDC_UP) {
3115 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3116 	}
3117 
3118 	ldcp->ldc_status = istatus;
3119 
3120 	/* initialize transmit watchdog timeout */
3121 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3122 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3123 
3124 	ldcp->hphase = -1;
3125 	ldcp->flags |= CHANNEL_STARTED;
3126 
3127 	/* if channel is already UP - start handshake */
3128 	if (istatus == LDC_UP) {
3129 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3130 		if (ldcp->portp != vgenp->vsw_portp) {
3131 			/*
3132 			 * As the channel is up, use this port from now on.
3133 			 */
3134 			(void) atomic_swap_32(
3135 			    &ldcp->portp->use_vsw_port, B_FALSE);
3136 		}
3137 
3138 		/* Initialize local session id */
3139 		ldcp->local_sid = ddi_get_lbolt();
3140 
3141 		/* clear peer session id */
3142 		ldcp->peer_sid = 0;
3143 		ldcp->hretries = 0;
3144 
3145 		/* Initiate Handshake process with peer ldc endpoint */
3146 		vgen_reset_hphase(ldcp);
3147 
3148 		mutex_exit(&ldcp->tclock);
3149 		mutex_exit(&ldcp->txlock);
3150 		mutex_exit(&ldcp->wrlock);
3151 		mutex_exit(&ldcp->rxlock);
3152 		vgen_handshake(vh_nextphase(ldcp));
3153 		mutex_exit(&ldcp->cblock);
3154 	} else {
3155 		LDC_UNLOCK(ldcp);
3156 	}
3157 
3158 	return (DDI_SUCCESS);
3159 
3160 ldcinit_failed:
3161 	if (init_state & ST_cb_enable) {
3162 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3163 	}
3164 	if (init_state & ST_init_tbufs) {
3165 		vgen_uninit_tbufs(ldcp);
3166 	}
3167 	if (init_state & ST_ldc_open) {
3168 		(void) ldc_close(ldcp->ldc_handle);
3169 	}
3170 	LDC_UNLOCK(ldcp);
3171 	DBG1(vgenp, ldcp, "exit\n");
3172 	return (DDI_FAILURE);
3173 }
3174 
3175 /* stop transmit/receive on the channel */
3176 static void
3177 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3178 {
3179 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3180 	int	rv;
3181 
3182 	DBG1(vgenp, ldcp, "enter\n");
3183 	LDC_LOCK(ldcp);
3184 
3185 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3186 		LDC_UNLOCK(ldcp);
3187 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3188 		return;
3189 	}
3190 
3191 	/* disable further callbacks */
3192 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3193 	if (rv != 0) {
3194 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3195 	}
3196 
3197 	if (vgenp->vsw_portp == ldcp->portp) {
3198 		vio_net_report_err_t rep_err =
3199 		    ldcp->portp->vcb.vio_net_report_err;
3200 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3201 	}
3202 
3203 	/*
3204 	 * clear handshake done bit and wait for pending tx and cb to finish.
3205 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3206 	 */
3207 	ldcp->hphase &= ~(VH_DONE);
3208 	LDC_UNLOCK(ldcp);
3209 
3210 	/* cancel handshake watchdog timeout */
3211 	if (ldcp->htid) {
3212 		(void) untimeout(ldcp->htid);
3213 		ldcp->htid = 0;
3214 	}
3215 
3216 	if (ldcp->cancel_htid) {
3217 		(void) untimeout(ldcp->cancel_htid);
3218 		ldcp->cancel_htid = 0;
3219 	}
3220 
3221 	/* cancel transmit watchdog timeout */
3222 	if (ldcp->wd_tid) {
3223 		(void) untimeout(ldcp->wd_tid);
3224 		ldcp->wd_tid = 0;
3225 	}
3226 
3227 	drv_usecwait(1000);
3228 
3229 	/* acquire locks again; any pending transmits and callbacks are done */
3230 	LDC_LOCK(ldcp);
3231 
3232 	vgen_reset_hphase(ldcp);
3233 
3234 	vgen_uninit_tbufs(ldcp);
3235 
3236 	rv = ldc_close(ldcp->ldc_handle);
3237 	if (rv != 0) {
3238 		DWARN(vgenp, ldcp, "ldc_close err\n");
3239 	}
3240 	ldcp->ldc_status = LDC_INIT;
3241 	ldcp->flags &= ~(CHANNEL_STARTED);
3242 
3243 	LDC_UNLOCK(ldcp);
3244 
3245 	DBG1(vgenp, ldcp, "exit\n");
3246 }
3247 
3248 /* Initialize the transmit buffer ring for the channel */
3249 static int
3250 vgen_init_tbufs(vgen_ldc_t *ldcp)
3251 {
3252 	vgen_private_desc_t	*tbufp;
3253 	vnet_public_desc_t	*txdp;
3254 	vio_dring_entry_hdr_t		*hdrp;
3255 	int 			i;
3256 	int 			rv;
3257 	caddr_t			datap = NULL;
3258 	int			ci;
3259 	uint32_t		ncookies;
3260 	size_t			data_sz;
3261 	vgen_t			*vgenp;
3262 
3263 	vgenp = LDC_TO_VGEN(ldcp);
3264 
3265 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3266 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3267 
3268 	/*
3269 	 * In order to ensure that the number of ldc cookies per descriptor is
3270 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3271 	 * outlined below:
3272 	 *
3273 	 * Align the entire data buffer area to 8K and carve out per descriptor
3274 	 * data buffers starting from this 8K aligned base address.
3275 	 *
3276 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3277 	 * For sizes up to 12K we round up the size to the next 2K.
3278 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3279 	 * 14K could end up needing 3 cookies, with the buffer spread across
3280 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3281 	 */
3282 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3283 	if (data_sz <= VNET_12K) {
3284 		data_sz = VNET_ROUNDUP_2K(data_sz);
3285 	} else {
3286 		data_sz = VNET_ROUNDUP_4K(data_sz);
3287 	}
3288 
3289 	/* allocate extra 8K bytes for alignment */
3290 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3291 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3292 	ldcp->tx_datap = datap;
3293 
3294 
3295 	/* align the starting address of the data area to 8K */
3296 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3297 
3298 	/*
3299 	 * for each private descriptor, allocate a ldc mem_handle which is
3300 	 * required to map the data during transmit, set the flags
3301 	 * to free (available for use by transmit routine).
3302 	 */
3303 
3304 	for (i = 0; i < ldcp->num_txds; i++) {
3305 
3306 		tbufp = &(ldcp->tbufp[i]);
3307 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3308 		    &(tbufp->memhandle));
3309 		if (rv) {
3310 			tbufp->memhandle = 0;
3311 			goto init_tbufs_failed;
3312 		}
3313 
3314 		/*
3315 		 * bind ldc memhandle to the corresponding transmit buffer.
3316 		 */
3317 		ci = ncookies = 0;
3318 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3319 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3320 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3321 		if (rv != 0) {
3322 			goto init_tbufs_failed;
3323 		}
3324 
3325 		/*
3326 		 * successful in binding the handle to tx data buffer.
3327 		 * set datap in the private descr to this buffer.
3328 		 */
3329 		tbufp->datap = datap;
3330 
3331 		if ((ncookies == 0) ||
3332 		    (ncookies > MAX_COOKIES)) {
3333 			goto init_tbufs_failed;
3334 		}
3335 
3336 		for (ci = 1; ci < ncookies; ci++) {
3337 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3338 			    &(tbufp->memcookie[ci]));
3339 			if (rv != 0) {
3340 				goto init_tbufs_failed;
3341 			}
3342 		}
3343 
3344 		tbufp->ncookies = ncookies;
3345 		datap += data_sz;
3346 
3347 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3348 		txdp = &(ldcp->txdp[i]);
3349 		hdrp = &txdp->hdr;
3350 		hdrp->dstate = VIO_DESC_FREE;
3351 		hdrp->ack = B_FALSE;
3352 		tbufp->descp = txdp;
3353 
3354 	}
3355 
3356 	/* reset tbuf walking pointers */
3357 	ldcp->next_tbufp = ldcp->tbufp;
3358 	ldcp->cur_tbufp = ldcp->tbufp;
3359 
3360 	/* initialize tx seqnum and index */
3361 	ldcp->next_txseq = VNET_ISS;
3362 	ldcp->next_txi = 0;
3363 
3364 	ldcp->resched_peer = B_TRUE;
3365 	ldcp->resched_peer_txi = 0;
3366 
3367 	return (DDI_SUCCESS);
3368 
3369 init_tbufs_failed:;
3370 	vgen_uninit_tbufs(ldcp);
3371 	return (DDI_FAILURE);
3372 }
3373 
3374 /* Uninitialize transmit buffer ring for the channel */
3375 static void
3376 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3377 {
3378 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3379 	int 			i;
3380 
3381 	/* for each tbuf (priv_desc), free ldc mem_handle */
3382 	for (i = 0; i < ldcp->num_txds; i++) {
3383 
3384 		tbufp = &(ldcp->tbufp[i]);
3385 
3386 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3387 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3388 			tbufp->datap = NULL;
3389 		}
3390 		if (tbufp->memhandle) {
3391 			(void) ldc_mem_free_handle(tbufp->memhandle);
3392 			tbufp->memhandle = 0;
3393 		}
3394 	}
3395 
3396 	if (ldcp->tx_datap) {
3397 		/* prealloc'd tx data buffer */
3398 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3399 		ldcp->tx_datap = NULL;
3400 		ldcp->tx_data_sz = 0;
3401 	}
3402 
3403 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3404 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3405 }
3406 
3407 /* clobber tx descriptor ring */
3408 static void
3409 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3410 {
3411 	vnet_public_desc_t	*txdp;
3412 	vgen_private_desc_t	*tbufp;
3413 	vio_dring_entry_hdr_t	*hdrp;
3414 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3415 	int i;
3416 #ifdef DEBUG
3417 	int ndone = 0;
3418 #endif
3419 
3420 	for (i = 0; i < ldcp->num_txds; i++) {
3421 
3422 		tbufp = &(ldcp->tbufp[i]);
3423 		txdp = tbufp->descp;
3424 		hdrp = &txdp->hdr;
3425 
3426 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3427 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3428 #ifdef DEBUG
3429 			if (hdrp->dstate == VIO_DESC_DONE)
3430 				ndone++;
3431 #endif
3432 			hdrp->dstate = VIO_DESC_FREE;
3433 			hdrp->ack = B_FALSE;
3434 		}
3435 	}
3436 	/* reset tbuf walking pointers */
3437 	ldcp->next_tbufp = ldcp->tbufp;
3438 	ldcp->cur_tbufp = ldcp->tbufp;
3439 
3440 	/* reset tx seqnum and index */
3441 	ldcp->next_txseq = VNET_ISS;
3442 	ldcp->next_txi = 0;
3443 
3444 	ldcp->resched_peer = B_TRUE;
3445 	ldcp->resched_peer_txi = 0;
3446 
3447 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3448 }
3449 
3450 /* clobber receive descriptor ring */
3451 static void
3452 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3453 {
3454 	ldcp->rx_dhandle = 0;
3455 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3456 	ldcp->rxdp = NULL;
3457 	ldcp->next_rxi = 0;
3458 	ldcp->num_rxds = 0;
3459 	ldcp->next_rxseq = VNET_ISS;
3460 }
3461 
3462 /* initialize receive descriptor ring */
3463 static int
3464 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3465 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3466 {
3467 	int rv;
3468 	ldc_mem_info_t minfo;
3469 
3470 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3471 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3472 	if (rv != 0) {
3473 		return (DDI_FAILURE);
3474 	}
3475 
3476 	/*
3477 	 * sucessfully mapped, now try to
3478 	 * get info about the mapped dring
3479 	 */
3480 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3481 	if (rv != 0) {
3482 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3483 		return (DDI_FAILURE);
3484 	}
3485 
3486 	/*
3487 	 * save ring address, number of descriptors.
3488 	 */
3489 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3490 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3491 	ldcp->num_rxdcookies = ncookies;
3492 	ldcp->num_rxds = num_desc;
3493 	ldcp->next_rxi = 0;
3494 	ldcp->next_rxseq = VNET_ISS;
3495 	ldcp->dring_mtype = minfo.mtype;
3496 
3497 	return (DDI_SUCCESS);
3498 }
3499 
3500 /* get channel statistics */
3501 static uint64_t
3502 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3503 {
3504 	vgen_stats_t *statsp;
3505 	uint64_t val;
3506 
3507 	val = 0;
3508 	statsp = &ldcp->stats;
3509 	switch (stat) {
3510 
3511 	case MAC_STAT_MULTIRCV:
3512 		val = statsp->multircv;
3513 		break;
3514 
3515 	case MAC_STAT_BRDCSTRCV:
3516 		val = statsp->brdcstrcv;
3517 		break;
3518 
3519 	case MAC_STAT_MULTIXMT:
3520 		val = statsp->multixmt;
3521 		break;
3522 
3523 	case MAC_STAT_BRDCSTXMT:
3524 		val = statsp->brdcstxmt;
3525 		break;
3526 
3527 	case MAC_STAT_NORCVBUF:
3528 		val = statsp->norcvbuf;
3529 		break;
3530 
3531 	case MAC_STAT_IERRORS:
3532 		val = statsp->ierrors;
3533 		break;
3534 
3535 	case MAC_STAT_NOXMTBUF:
3536 		val = statsp->noxmtbuf;
3537 		break;
3538 
3539 	case MAC_STAT_OERRORS:
3540 		val = statsp->oerrors;
3541 		break;
3542 
3543 	case MAC_STAT_COLLISIONS:
3544 		break;
3545 
3546 	case MAC_STAT_RBYTES:
3547 		val = statsp->rbytes;
3548 		break;
3549 
3550 	case MAC_STAT_IPACKETS:
3551 		val = statsp->ipackets;
3552 		break;
3553 
3554 	case MAC_STAT_OBYTES:
3555 		val = statsp->obytes;
3556 		break;
3557 
3558 	case MAC_STAT_OPACKETS:
3559 		val = statsp->opackets;
3560 		break;
3561 
3562 	/* stats not relevant to ldc, return 0 */
3563 	case MAC_STAT_IFSPEED:
3564 	case ETHER_STAT_ALIGN_ERRORS:
3565 	case ETHER_STAT_FCS_ERRORS:
3566 	case ETHER_STAT_FIRST_COLLISIONS:
3567 	case ETHER_STAT_MULTI_COLLISIONS:
3568 	case ETHER_STAT_DEFER_XMTS:
3569 	case ETHER_STAT_TX_LATE_COLLISIONS:
3570 	case ETHER_STAT_EX_COLLISIONS:
3571 	case ETHER_STAT_MACXMT_ERRORS:
3572 	case ETHER_STAT_CARRIER_ERRORS:
3573 	case ETHER_STAT_TOOLONG_ERRORS:
3574 	case ETHER_STAT_XCVR_ADDR:
3575 	case ETHER_STAT_XCVR_ID:
3576 	case ETHER_STAT_XCVR_INUSE:
3577 	case ETHER_STAT_CAP_1000FDX:
3578 	case ETHER_STAT_CAP_1000HDX:
3579 	case ETHER_STAT_CAP_100FDX:
3580 	case ETHER_STAT_CAP_100HDX:
3581 	case ETHER_STAT_CAP_10FDX:
3582 	case ETHER_STAT_CAP_10HDX:
3583 	case ETHER_STAT_CAP_ASMPAUSE:
3584 	case ETHER_STAT_CAP_PAUSE:
3585 	case ETHER_STAT_CAP_AUTONEG:
3586 	case ETHER_STAT_ADV_CAP_1000FDX:
3587 	case ETHER_STAT_ADV_CAP_1000HDX:
3588 	case ETHER_STAT_ADV_CAP_100FDX:
3589 	case ETHER_STAT_ADV_CAP_100HDX:
3590 	case ETHER_STAT_ADV_CAP_10FDX:
3591 	case ETHER_STAT_ADV_CAP_10HDX:
3592 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3593 	case ETHER_STAT_ADV_CAP_PAUSE:
3594 	case ETHER_STAT_ADV_CAP_AUTONEG:
3595 	case ETHER_STAT_LP_CAP_1000FDX:
3596 	case ETHER_STAT_LP_CAP_1000HDX:
3597 	case ETHER_STAT_LP_CAP_100FDX:
3598 	case ETHER_STAT_LP_CAP_100HDX:
3599 	case ETHER_STAT_LP_CAP_10FDX:
3600 	case ETHER_STAT_LP_CAP_10HDX:
3601 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3602 	case ETHER_STAT_LP_CAP_PAUSE:
3603 	case ETHER_STAT_LP_CAP_AUTONEG:
3604 	case ETHER_STAT_LINK_ASMPAUSE:
3605 	case ETHER_STAT_LINK_PAUSE:
3606 	case ETHER_STAT_LINK_AUTONEG:
3607 	case ETHER_STAT_LINK_DUPLEX:
3608 	default:
3609 		val = 0;
3610 		break;
3611 
3612 	}
3613 	return (val);
3614 }
3615 
3616 /*
3617  * LDC channel is UP, start handshake process with peer.
3618  */
3619 static void
3620 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3621 {
3622 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3623 
3624 	DBG1(vgenp, ldcp, "enter\n");
3625 
3626 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3627 
3628 	if (ldcp->portp != vgenp->vsw_portp) {
3629 		/*
3630 		 * As the channel is up, use this port from now on.
3631 		 */
3632 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3633 	}
3634 
3635 	/* Initialize local session id */
3636 	ldcp->local_sid = ddi_get_lbolt();
3637 
3638 	/* clear peer session id */
3639 	ldcp->peer_sid = 0;
3640 	ldcp->hretries = 0;
3641 
3642 	if (ldcp->hphase != VH_PHASE0) {
3643 		vgen_handshake_reset(ldcp);
3644 	}
3645 
3646 	/* Initiate Handshake process with peer ldc endpoint */
3647 	vgen_handshake(vh_nextphase(ldcp));
3648 
3649 	DBG1(vgenp, ldcp, "exit\n");
3650 }
3651 
3652 /*
3653  * LDC channel is Reset, terminate connection with peer and try to
3654  * bring the channel up again.
3655  */
3656 static void
3657 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3658 {
3659 	ldc_status_t istatus;
3660 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3661 	int	rv;
3662 
3663 	DBG1(vgenp, ldcp, "enter\n");
3664 
3665 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3666 
3667 	if ((ldcp->portp != vgenp->vsw_portp) &&
3668 	    (vgenp->vsw_portp != NULL)) {
3669 		/*
3670 		 * As the channel is down, use the switch port until
3671 		 * the channel becomes ready to be used.
3672 		 */
3673 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3674 	}
3675 
3676 	if (vgenp->vsw_portp == ldcp->portp) {
3677 		vio_net_report_err_t rep_err =
3678 		    ldcp->portp->vcb.vio_net_report_err;
3679 
3680 		/* Post a reset message */
3681 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3682 	}
3683 
3684 	if (ldcp->hphase != VH_PHASE0) {
3685 		vgen_handshake_reset(ldcp);
3686 	}
3687 
3688 	/* try to bring the channel up */
3689 	rv = ldc_up(ldcp->ldc_handle);
3690 	if (rv != 0) {
3691 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3692 	}
3693 
3694 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3695 		DWARN(vgenp, ldcp, "ldc_status err\n");
3696 	} else {
3697 		ldcp->ldc_status = istatus;
3698 	}
3699 
3700 	/* if channel is already UP - restart handshake */
3701 	if (ldcp->ldc_status == LDC_UP) {
3702 		vgen_handle_evt_up(ldcp);
3703 	}
3704 
3705 	DBG1(vgenp, ldcp, "exit\n");
3706 }
3707 
3708 /* Interrupt handler for the channel */
3709 static uint_t
3710 vgen_ldc_cb(uint64_t event, caddr_t arg)
3711 {
3712 	_NOTE(ARGUNUSED(event))
3713 	vgen_ldc_t	*ldcp;
3714 	vgen_t		*vgenp;
3715 	ldc_status_t 	istatus;
3716 	vgen_stats_t	*statsp;
3717 	timeout_id_t	cancel_htid = 0;
3718 	uint_t		ret = LDC_SUCCESS;
3719 
3720 	ldcp = (vgen_ldc_t *)arg;
3721 	vgenp = LDC_TO_VGEN(ldcp);
3722 	statsp = &ldcp->stats;
3723 
3724 	DBG1(vgenp, ldcp, "enter\n");
3725 
3726 	mutex_enter(&ldcp->cblock);
3727 	statsp->callbacks++;
3728 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3729 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3730 		    ldcp->ldc_status);
3731 		mutex_exit(&ldcp->cblock);
3732 		return (LDC_SUCCESS);
3733 	}
3734 
3735 	/*
3736 	 * cache cancel_htid before the events specific
3737 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3738 	 * as it is also used to indicate the timer to quit immediately.
3739 	 */
3740 	cancel_htid = ldcp->cancel_htid;
3741 
3742 	/*
3743 	 * NOTE: not using switch() as event could be triggered by
3744 	 * a state change and a read request. Also the ordering	of the
3745 	 * check for the event types is deliberate.
3746 	 */
3747 	if (event & LDC_EVT_UP) {
3748 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3749 			DWARN(vgenp, ldcp, "ldc_status err\n");
3750 			/* status couldn't be determined */
3751 			ret = LDC_FAILURE;
3752 			goto ldc_cb_ret;
3753 		}
3754 		ldcp->ldc_status = istatus;
3755 		if (ldcp->ldc_status != LDC_UP) {
3756 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3757 			    " but ldc status is not UP(0x%x)\n",
3758 			    ldcp->ldc_status);
3759 			/* spurious interrupt, return success */
3760 			goto ldc_cb_ret;
3761 		}
3762 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3763 		    event, ldcp->ldc_status);
3764 
3765 		vgen_handle_evt_up(ldcp);
3766 
3767 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3768 	}
3769 
3770 	/* Handle RESET/DOWN before READ event */
3771 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3772 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3773 			DWARN(vgenp, ldcp, "ldc_status error\n");
3774 			/* status couldn't be determined */
3775 			ret = LDC_FAILURE;
3776 			goto ldc_cb_ret;
3777 		}
3778 		ldcp->ldc_status = istatus;
3779 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3780 		    event, ldcp->ldc_status);
3781 
3782 		vgen_handle_evt_reset(ldcp);
3783 
3784 		/*
3785 		 * As the channel is down/reset, ignore READ event
3786 		 * but print a debug warning message.
3787 		 */
3788 		if (event & LDC_EVT_READ) {
3789 			DWARN(vgenp, ldcp,
3790 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3791 			event &= ~LDC_EVT_READ;
3792 		}
3793 	}
3794 
3795 	if (event & LDC_EVT_READ) {
3796 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3797 		    event, ldcp->ldc_status);
3798 
3799 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3800 
3801 		if (ldcp->rcv_thread != NULL) {
3802 			/*
3803 			 * If the receive thread is enabled, then
3804 			 * wakeup the receive thread to process the
3805 			 * LDC messages.
3806 			 */
3807 			mutex_exit(&ldcp->cblock);
3808 			mutex_enter(&ldcp->rcv_thr_lock);
3809 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3810 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3811 				cv_signal(&ldcp->rcv_thr_cv);
3812 			}
3813 			mutex_exit(&ldcp->rcv_thr_lock);
3814 			mutex_enter(&ldcp->cblock);
3815 		} else  {
3816 			vgen_handle_evt_read(ldcp);
3817 		}
3818 	}
3819 
3820 ldc_cb_ret:
3821 	/*
3822 	 * Check to see if the status of cancel_htid has
3823 	 * changed. If another timer needs to be cancelled,
3824 	 * then let the next callback to clear it.
3825 	 */
3826 	if (cancel_htid == 0) {
3827 		cancel_htid = ldcp->cancel_htid;
3828 	}
3829 	mutex_exit(&ldcp->cblock);
3830 
3831 	if (cancel_htid) {
3832 		/*
3833 		 * Cancel handshake timer.
3834 		 * untimeout(9F) will not return until the pending callback is
3835 		 * cancelled or has run. No problems will result from calling
3836 		 * untimeout if the handler has already completed.
3837 		 * If the timeout handler did run, then it would just
3838 		 * return as cancel_htid is set.
3839 		 */
3840 		DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n", cancel_htid);
3841 		(void) untimeout(cancel_htid);
3842 		mutex_enter(&ldcp->cblock);
3843 		/* clear it only if its the same as the one we cancelled */
3844 		if (ldcp->cancel_htid == cancel_htid) {
3845 			ldcp->cancel_htid = 0;
3846 		}
3847 		mutex_exit(&ldcp->cblock);
3848 	}
3849 	DBG1(vgenp, ldcp, "exit\n");
3850 	return (ret);
3851 }
3852 
3853 static void
3854 vgen_handle_evt_read(vgen_ldc_t *ldcp)
3855 {
3856 	int		rv;
3857 	uint64_t	*ldcmsg;
3858 	size_t		msglen;
3859 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3860 	vio_msg_tag_t	*tagp;
3861 	ldc_status_t 	istatus;
3862 	boolean_t 	has_data;
3863 
3864 	DBG1(vgenp, ldcp, "enter\n");
3865 
3866 	ldcmsg = ldcp->ldcmsg;
3867 	/*
3868 	 * If the receive thread is enabled, then the cblock
3869 	 * need to be acquired here. If not, the vgen_ldc_cb()
3870 	 * calls this function with cblock held already.
3871 	 */
3872 	if (ldcp->rcv_thread != NULL) {
3873 		mutex_enter(&ldcp->cblock);
3874 	} else {
3875 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3876 	}
3877 
3878 vgen_evt_read:
3879 	do {
3880 		msglen = ldcp->msglen;
3881 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3882 
3883 		if (rv != 0) {
3884 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
3885 			    rv, msglen);
3886 			if (rv == ECONNRESET)
3887 				goto vgen_evtread_error;
3888 			break;
3889 		}
3890 		if (msglen == 0) {
3891 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3892 			break;
3893 		}
3894 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3895 
3896 		tagp = (vio_msg_tag_t *)ldcmsg;
3897 
3898 		if (ldcp->peer_sid) {
3899 			/*
3900 			 * check sid only after we have received peer's sid
3901 			 * in the version negotiate msg.
3902 			 */
3903 #ifdef DEBUG
3904 			if (vgen_hdbg & HDBG_BAD_SID) {
3905 				/* simulate bad sid condition */
3906 				tagp->vio_sid = 0;
3907 				vgen_hdbg &= ~(HDBG_BAD_SID);
3908 			}
3909 #endif
3910 			rv = vgen_check_sid(ldcp, tagp);
3911 			if (rv != VGEN_SUCCESS) {
3912 				/*
3913 				 * If sid mismatch is detected,
3914 				 * reset the channel.
3915 				 */
3916 				ldcp->need_ldc_reset = B_TRUE;
3917 				goto vgen_evtread_error;
3918 			}
3919 		}
3920 
3921 		switch (tagp->vio_msgtype) {
3922 		case VIO_TYPE_CTRL:
3923 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3924 			break;
3925 
3926 		case VIO_TYPE_DATA:
3927 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3928 			break;
3929 
3930 		case VIO_TYPE_ERR:
3931 			vgen_handle_errmsg(ldcp, tagp);
3932 			break;
3933 
3934 		default:
3935 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3936 			    tagp->vio_msgtype);
3937 			break;
3938 		}
3939 
3940 		/*
3941 		 * If an error is encountered, stop processing and
3942 		 * handle the error.
3943 		 */
3944 		if (rv != 0) {
3945 			goto vgen_evtread_error;
3946 		}
3947 
3948 	} while (msglen);
3949 
3950 	/* check once more before exiting */
3951 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3952 	if ((rv == 0) && (has_data == B_TRUE)) {
3953 		DTRACE_PROBE(vgen_chkq);
3954 		goto vgen_evt_read;
3955 	}
3956 
3957 vgen_evtread_error:
3958 	if (rv == ECONNRESET) {
3959 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3960 			DWARN(vgenp, ldcp, "ldc_status err\n");
3961 		} else {
3962 			ldcp->ldc_status = istatus;
3963 		}
3964 		vgen_handle_evt_reset(ldcp);
3965 	} else if (rv) {
3966 		vgen_handshake_retry(ldcp);
3967 	}
3968 
3969 	/*
3970 	 * If the receive thread is enabled, then cancel the
3971 	 * handshake timeout here.
3972 	 */
3973 	if (ldcp->rcv_thread != NULL) {
3974 		timeout_id_t cancel_htid = ldcp->cancel_htid;
3975 
3976 		mutex_exit(&ldcp->cblock);
3977 		if (cancel_htid) {
3978 			/*
3979 			 * Cancel handshake timer. untimeout(9F) will
3980 			 * not return until the pending callback is cancelled
3981 			 * or has run. No problems will result from calling
3982 			 * untimeout if the handler has already completed.
3983 			 * If the timeout handler did run, then it would just
3984 			 * return as cancel_htid is set.
3985 			 */
3986 			DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n",
3987 			    cancel_htid);
3988 			(void) untimeout(cancel_htid);
3989 
3990 			/*
3991 			 * clear it only if its the same as the one we
3992 			 * cancelled
3993 			 */
3994 			mutex_enter(&ldcp->cblock);
3995 			if (ldcp->cancel_htid == cancel_htid) {
3996 				ldcp->cancel_htid = 0;
3997 			}
3998 			mutex_exit(&ldcp->cblock);
3999 		}
4000 	}
4001 
4002 	DBG1(vgenp, ldcp, "exit\n");
4003 }
4004 
4005 /* vgen handshake functions */
4006 
4007 /* change the hphase for the channel to the next phase */
4008 static vgen_ldc_t *
4009 vh_nextphase(vgen_ldc_t *ldcp)
4010 {
4011 	if (ldcp->hphase == VH_PHASE3) {
4012 		ldcp->hphase = VH_DONE;
4013 	} else {
4014 		ldcp->hphase++;
4015 	}
4016 	return (ldcp);
4017 }
4018 
4019 /*
4020  * wrapper routine to send the given message over ldc using ldc_write().
4021  */
4022 static int
4023 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4024     boolean_t caller_holds_lock)
4025 {
4026 	int			rv;
4027 	size_t			len;
4028 	uint32_t		retries = 0;
4029 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4030 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4031 	vio_dring_msg_t		*dmsg;
4032 	vio_raw_data_msg_t	*rmsg;
4033 	boolean_t		data_msg = B_FALSE;
4034 
4035 	len = msglen;
4036 	if ((len == 0) || (msg == NULL))
4037 		return (VGEN_FAILURE);
4038 
4039 	if (!caller_holds_lock) {
4040 		mutex_enter(&ldcp->wrlock);
4041 	}
4042 
4043 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4044 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4045 			dmsg = (vio_dring_msg_t *)tagp;
4046 			dmsg->seq_num = ldcp->next_txseq;
4047 			data_msg = B_TRUE;
4048 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4049 			rmsg = (vio_raw_data_msg_t *)tagp;
4050 			rmsg->seq_num = ldcp->next_txseq;
4051 			data_msg = B_TRUE;
4052 		}
4053 	}
4054 
4055 	do {
4056 		len = msglen;
4057 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4058 		if (retries++ >= vgen_ldcwr_retries)
4059 			break;
4060 	} while (rv == EWOULDBLOCK);
4061 
4062 	if (rv == 0 && data_msg == B_TRUE) {
4063 		ldcp->next_txseq++;
4064 	}
4065 
4066 	if (!caller_holds_lock) {
4067 		mutex_exit(&ldcp->wrlock);
4068 	}
4069 
4070 	if (rv != 0) {
4071 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4072 		    rv, msglen);
4073 		return (rv);
4074 	}
4075 
4076 	if (len != msglen) {
4077 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4078 		    rv, msglen);
4079 		return (VGEN_FAILURE);
4080 	}
4081 
4082 	return (VGEN_SUCCESS);
4083 }
4084 
4085 /* send version negotiate message to the peer over ldc */
4086 static int
4087 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4088 {
4089 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4090 	vio_ver_msg_t	vermsg;
4091 	vio_msg_tag_t	*tagp = &vermsg.tag;
4092 	int		rv;
4093 
4094 	bzero(&vermsg, sizeof (vermsg));
4095 
4096 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4097 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4098 	tagp->vio_subtype_env = VIO_VER_INFO;
4099 	tagp->vio_sid = ldcp->local_sid;
4100 
4101 	/* get version msg payload from ldcp->local */
4102 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4103 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4104 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4105 
4106 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4107 	if (rv != VGEN_SUCCESS) {
4108 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4109 		return (rv);
4110 	}
4111 
4112 	ldcp->hstate |= VER_INFO_SENT;
4113 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4114 	    vermsg.ver_major, vermsg.ver_minor);
4115 
4116 	return (VGEN_SUCCESS);
4117 }
4118 
4119 /* send attr info message to the peer over ldc */
4120 static int
4121 vgen_send_attr_info(vgen_ldc_t *ldcp)
4122 {
4123 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4124 	vnet_attr_msg_t	attrmsg;
4125 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4126 	int		rv;
4127 
4128 	bzero(&attrmsg, sizeof (attrmsg));
4129 
4130 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4131 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4132 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4133 	tagp->vio_sid = ldcp->local_sid;
4134 
4135 	/* get attr msg payload from ldcp->local */
4136 	attrmsg.mtu = ldcp->local_hparams.mtu;
4137 	attrmsg.addr = ldcp->local_hparams.addr;
4138 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4139 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4140 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4141 
4142 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4143 	if (rv != VGEN_SUCCESS) {
4144 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4145 		return (rv);
4146 	}
4147 
4148 	ldcp->hstate |= ATTR_INFO_SENT;
4149 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4150 
4151 	return (VGEN_SUCCESS);
4152 }
4153 
4154 /* send descriptor ring register message to the peer over ldc */
4155 static int
4156 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4157 {
4158 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4159 	vio_dring_reg_msg_t	msg;
4160 	vio_msg_tag_t		*tagp = &msg.tag;
4161 	int		rv;
4162 
4163 	bzero(&msg, sizeof (msg));
4164 
4165 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4166 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4167 	tagp->vio_subtype_env = VIO_DRING_REG;
4168 	tagp->vio_sid = ldcp->local_sid;
4169 
4170 	/* get dring info msg payload from ldcp->local */
4171 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4172 	    sizeof (ldc_mem_cookie_t));
4173 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4174 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4175 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4176 
4177 	/*
4178 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4179 	 * value and sends it in the ack, which is saved in
4180 	 * vgen_handle_dring_reg().
4181 	 */
4182 	msg.dring_ident = 0;
4183 
4184 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4185 	if (rv != VGEN_SUCCESS) {
4186 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4187 		return (rv);
4188 	}
4189 
4190 	ldcp->hstate |= DRING_INFO_SENT;
4191 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4192 
4193 	return (VGEN_SUCCESS);
4194 }
4195 
4196 static int
4197 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4198 {
4199 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4200 	vio_rdx_msg_t	rdxmsg;
4201 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4202 	int		rv;
4203 
4204 	bzero(&rdxmsg, sizeof (rdxmsg));
4205 
4206 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4207 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4208 	tagp->vio_subtype_env = VIO_RDX;
4209 	tagp->vio_sid = ldcp->local_sid;
4210 
4211 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4212 	if (rv != VGEN_SUCCESS) {
4213 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4214 		return (rv);
4215 	}
4216 
4217 	ldcp->hstate |= RDX_INFO_SENT;
4218 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4219 
4220 	return (VGEN_SUCCESS);
4221 }
4222 
4223 /* send descriptor ring data message to the peer over ldc */
4224 static int
4225 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4226 {
4227 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4228 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4229 	vio_msg_tag_t	*tagp = &msgp->tag;
4230 	vgen_stats_t	*statsp = &ldcp->stats;
4231 	int		rv;
4232 
4233 	bzero(msgp, sizeof (*msgp));
4234 
4235 	tagp->vio_msgtype = VIO_TYPE_DATA;
4236 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4237 	tagp->vio_subtype_env = VIO_DRING_DATA;
4238 	tagp->vio_sid = ldcp->local_sid;
4239 
4240 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4241 	msgp->start_idx = start;
4242 	msgp->end_idx = end;
4243 
4244 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4245 	if (rv != VGEN_SUCCESS) {
4246 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4247 		return (rv);
4248 	}
4249 
4250 	statsp->dring_data_msgs++;
4251 
4252 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4253 
4254 	return (VGEN_SUCCESS);
4255 }
4256 
4257 /* send multicast addr info message to vsw */
4258 static int
4259 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4260 {
4261 	vnet_mcast_msg_t	mcastmsg;
4262 	vnet_mcast_msg_t	*msgp;
4263 	vio_msg_tag_t		*tagp;
4264 	vgen_t			*vgenp;
4265 	struct ether_addr	*mca;
4266 	int			rv;
4267 	int			i;
4268 	uint32_t		size;
4269 	uint32_t		mccount;
4270 	uint32_t		n;
4271 
4272 	msgp = &mcastmsg;
4273 	tagp = &msgp->tag;
4274 	vgenp = LDC_TO_VGEN(ldcp);
4275 
4276 	mccount = vgenp->mccount;
4277 	i = 0;
4278 
4279 	do {
4280 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4281 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4282 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4283 		tagp->vio_sid = ldcp->local_sid;
4284 
4285 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4286 		size = n * sizeof (struct ether_addr);
4287 
4288 		mca = &(vgenp->mctab[i]);
4289 		bcopy(mca, (msgp->mca), size);
4290 		msgp->set = B_TRUE;
4291 		msgp->count = n;
4292 
4293 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4294 		    B_FALSE);
4295 		if (rv != VGEN_SUCCESS) {
4296 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4297 			return (rv);
4298 		}
4299 
4300 		mccount -= n;
4301 		i += n;
4302 
4303 	} while (mccount);
4304 
4305 	return (VGEN_SUCCESS);
4306 }
4307 
4308 /* Initiate Phase 2 of handshake */
4309 static int
4310 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4311 {
4312 	int rv;
4313 	uint32_t ncookies = 0;
4314 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4315 
4316 #ifdef DEBUG
4317 	if (vgen_hdbg & HDBG_OUT_STATE) {
4318 		/* simulate out of state condition */
4319 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4320 		rv = vgen_send_rdx_info(ldcp);
4321 		return (rv);
4322 	}
4323 	if (vgen_hdbg & HDBG_TIMEOUT) {
4324 		/* simulate timeout condition */
4325 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4326 		return (VGEN_SUCCESS);
4327 	}
4328 #endif
4329 	rv = vgen_send_attr_info(ldcp);
4330 	if (rv != VGEN_SUCCESS) {
4331 		return (rv);
4332 	}
4333 
4334 	/* Bind descriptor ring to the channel */
4335 	if (ldcp->num_txdcookies == 0) {
4336 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4337 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4338 		    &ldcp->tx_dcookie, &ncookies);
4339 		if (rv != 0) {
4340 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4341 			    "rv(%x)\n", rv);
4342 			return (rv);
4343 		}
4344 		ASSERT(ncookies == 1);
4345 		ldcp->num_txdcookies = ncookies;
4346 	}
4347 
4348 	/* update local dring_info params */
4349 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4350 	    sizeof (ldc_mem_cookie_t));
4351 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4352 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4353 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4354 
4355 	rv = vgen_send_dring_reg(ldcp);
4356 	if (rv != VGEN_SUCCESS) {
4357 		return (rv);
4358 	}
4359 
4360 	return (VGEN_SUCCESS);
4361 }
4362 
4363 /*
4364  * Set vnet-protocol-version dependent functions based on version.
4365  */
4366 static void
4367 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4368 {
4369 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4370 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4371 
4372 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4373 		/*
4374 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4375 		 * Support), set the mtu in our attributes to max_frame_size.
4376 		 */
4377 		lp->mtu = vgenp->max_frame_size;
4378 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4379 		/*
4380 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4381 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4382 		 */
4383 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4384 	} else {
4385 		vgen_port_t	*portp = ldcp->portp;
4386 		vnet_t		*vnetp = vgenp->vnetp;
4387 		/*
4388 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4389 		 * We can negotiate that size with those peers provided the
4390 		 * following conditions are true:
4391 		 * - Only pvid is defined for our peer and there are no vids.
4392 		 * - pvids are equal.
4393 		 * If the above conditions are true, then we can send/recv only
4394 		 * untagged frames of max size ETHERMAX.
4395 		 */
4396 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4397 			lp->mtu = ETHERMAX;
4398 		}
4399 	}
4400 
4401 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4402 		/* Versions >= 1.2 */
4403 
4404 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4405 			/*
4406 			 * enable priority routines and pkt mode only if
4407 			 * at least one pri-eth-type is specified in MD.
4408 			 */
4409 
4410 			ldcp->tx = vgen_ldcsend;
4411 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4412 
4413 			/* set xfer mode for vgen_send_attr_info() */
4414 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4415 
4416 		} else {
4417 			/* no priority eth types defined in MD */
4418 
4419 			ldcp->tx = vgen_ldcsend_dring;
4420 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4421 
4422 			/* set xfer mode for vgen_send_attr_info() */
4423 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4424 
4425 		}
4426 	} else {
4427 		/* Versions prior to 1.2  */
4428 
4429 		vgen_reset_vnet_proto_ops(ldcp);
4430 	}
4431 }
4432 
4433 /*
4434  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4435  */
4436 static void
4437 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4438 {
4439 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4440 
4441 	ldcp->tx = vgen_ldcsend_dring;
4442 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4443 
4444 	/* set xfer mode for vgen_send_attr_info() */
4445 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4446 }
4447 
4448 static void
4449 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4450 {
4451 	vgen_ldclist_t	*ldclp;
4452 	vgen_ldc_t	*ldcp;
4453 	vgen_t		*vgenp = portp->vgenp;
4454 	vnet_t		*vnetp = vgenp->vnetp;
4455 
4456 	ldclp = &portp->ldclist;
4457 
4458 	READ_ENTER(&ldclp->rwlock);
4459 
4460 	/*
4461 	 * NOTE: for now, we will assume we have a single channel.
4462 	 */
4463 	if (ldclp->headp == NULL) {
4464 		RW_EXIT(&ldclp->rwlock);
4465 		return;
4466 	}
4467 	ldcp = ldclp->headp;
4468 
4469 	mutex_enter(&ldcp->cblock);
4470 
4471 	/*
4472 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4473 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4474 	 */
4475 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4476 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4477 		ldcp->need_ldc_reset = B_TRUE;
4478 		vgen_handshake_retry(ldcp);
4479 	}
4480 
4481 	mutex_exit(&ldcp->cblock);
4482 
4483 	RW_EXIT(&ldclp->rwlock);
4484 }
4485 
4486 static void
4487 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4488 {
4489 	vgen_port_t	*portp;
4490 	vgen_portlist_t	*plistp;
4491 
4492 	plistp = &(vgenp->vgenports);
4493 	READ_ENTER(&plistp->rwlock);
4494 
4495 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4496 
4497 		vgen_vlan_unaware_port_reset(portp);
4498 
4499 	}
4500 
4501 	RW_EXIT(&plistp->rwlock);
4502 }
4503 
4504 /*
4505  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4506  * This can happen after a channel comes up (status: LDC_UP) or
4507  * when handshake gets terminated due to various conditions.
4508  */
4509 static void
4510 vgen_reset_hphase(vgen_ldc_t *ldcp)
4511 {
4512 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4513 	ldc_status_t istatus;
4514 	int rv;
4515 
4516 	DBG1(vgenp, ldcp, "enter\n");
4517 	/* reset hstate and hphase */
4518 	ldcp->hstate = 0;
4519 	ldcp->hphase = VH_PHASE0;
4520 
4521 	vgen_reset_vnet_proto_ops(ldcp);
4522 
4523 	/*
4524 	 * Save the id of pending handshake timer in cancel_htid.
4525 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4526 	 * be cancelled after releasing cblock.
4527 	 */
4528 	if (ldcp->htid) {
4529 		ldcp->cancel_htid = ldcp->htid;
4530 		ldcp->htid = 0;
4531 	}
4532 
4533 	if (ldcp->local_hparams.dring_ready) {
4534 		ldcp->local_hparams.dring_ready = B_FALSE;
4535 	}
4536 
4537 	/* Unbind tx descriptor ring from the channel */
4538 	if (ldcp->num_txdcookies) {
4539 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4540 		if (rv != 0) {
4541 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4542 		}
4543 		ldcp->num_txdcookies = 0;
4544 	}
4545 
4546 	if (ldcp->peer_hparams.dring_ready) {
4547 		ldcp->peer_hparams.dring_ready = B_FALSE;
4548 		/* Unmap peer's dring */
4549 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4550 		vgen_clobber_rxds(ldcp);
4551 	}
4552 
4553 	vgen_clobber_tbufs(ldcp);
4554 
4555 	/*
4556 	 * clear local handshake params and initialize.
4557 	 */
4558 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4559 
4560 	/* set version to the highest version supported */
4561 	ldcp->local_hparams.ver_major =
4562 	    ldcp->vgen_versions[0].ver_major;
4563 	ldcp->local_hparams.ver_minor =
4564 	    ldcp->vgen_versions[0].ver_minor;
4565 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4566 
4567 	/* set attr_info params */
4568 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4569 	ldcp->local_hparams.addr =
4570 	    vnet_macaddr_strtoul(vgenp->macaddr);
4571 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4572 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4573 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4574 
4575 	/*
4576 	 * Note: dring is created, but not bound yet.
4577 	 * local dring_info params will be updated when we bind the dring in
4578 	 * vgen_handshake_phase2().
4579 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4580 	 * value and sends it in the ack, which is saved in
4581 	 * vgen_handle_dring_reg().
4582 	 */
4583 	ldcp->local_hparams.dring_ident = 0;
4584 
4585 	/* clear peer_hparams */
4586 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4587 
4588 	/* reset the channel if required */
4589 	if (ldcp->need_ldc_reset) {
4590 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4591 		ldcp->need_ldc_reset = B_FALSE;
4592 		(void) ldc_down(ldcp->ldc_handle);
4593 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4594 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4595 		ldcp->ldc_status = istatus;
4596 
4597 		/* clear sids */
4598 		ldcp->local_sid = 0;
4599 		ldcp->peer_sid = 0;
4600 
4601 		/* try to bring the channel up */
4602 		rv = ldc_up(ldcp->ldc_handle);
4603 		if (rv != 0) {
4604 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4605 		}
4606 
4607 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4608 			DWARN(vgenp, ldcp, "ldc_status err\n");
4609 		} else {
4610 			ldcp->ldc_status = istatus;
4611 		}
4612 	}
4613 }
4614 
4615 /* wrapper function for vgen_reset_hphase */
4616 static void
4617 vgen_handshake_reset(vgen_ldc_t *ldcp)
4618 {
4619 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4620 	mutex_enter(&ldcp->rxlock);
4621 	mutex_enter(&ldcp->wrlock);
4622 	mutex_enter(&ldcp->txlock);
4623 	mutex_enter(&ldcp->tclock);
4624 
4625 	vgen_reset_hphase(ldcp);
4626 
4627 	mutex_exit(&ldcp->tclock);
4628 	mutex_exit(&ldcp->txlock);
4629 	mutex_exit(&ldcp->wrlock);
4630 	mutex_exit(&ldcp->rxlock);
4631 }
4632 
4633 /*
4634  * Initiate handshake with the peer by sending various messages
4635  * based on the handshake-phase that the channel is currently in.
4636  */
4637 static void
4638 vgen_handshake(vgen_ldc_t *ldcp)
4639 {
4640 	uint32_t hphase = ldcp->hphase;
4641 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4642 	ldc_status_t	istatus;
4643 	int	rv = 0;
4644 
4645 	switch (hphase) {
4646 
4647 	case VH_PHASE1:
4648 
4649 		/*
4650 		 * start timer, for entire handshake process, turn this timer
4651 		 * off if all phases of handshake complete successfully and
4652 		 * hphase goes to VH_DONE(below) or
4653 		 * vgen_reset_hphase() gets called or
4654 		 * channel is reset due to errors or
4655 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4656 		 */
4657 		ASSERT(ldcp->htid == 0);
4658 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4659 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4660 
4661 		/* Phase 1 involves negotiating the version */
4662 		rv = vgen_send_version_negotiate(ldcp);
4663 		break;
4664 
4665 	case VH_PHASE2:
4666 		rv = vgen_handshake_phase2(ldcp);
4667 		break;
4668 
4669 	case VH_PHASE3:
4670 		rv = vgen_send_rdx_info(ldcp);
4671 		break;
4672 
4673 	case VH_DONE:
4674 		/*
4675 		 * Save the id of pending handshake timer in cancel_htid.
4676 		 * This will be checked in vgen_ldc_cb() and the handshake
4677 		 * timer will be cancelled after releasing cblock.
4678 		 */
4679 		if (ldcp->htid) {
4680 			ldcp->cancel_htid = ldcp->htid;
4681 			ldcp->htid = 0;
4682 		}
4683 		ldcp->hretries = 0;
4684 		DBG1(vgenp, ldcp, "Handshake Done\n");
4685 
4686 		if (ldcp->portp == vgenp->vsw_portp) {
4687 			/*
4688 			 * If this channel(port) is connected to vsw,
4689 			 * need to sync multicast table with vsw.
4690 			 */
4691 			mutex_exit(&ldcp->cblock);
4692 
4693 			mutex_enter(&vgenp->lock);
4694 			rv = vgen_send_mcast_info(ldcp);
4695 			mutex_exit(&vgenp->lock);
4696 
4697 			mutex_enter(&ldcp->cblock);
4698 			if (rv != VGEN_SUCCESS)
4699 				break;
4700 		}
4701 
4702 		/*
4703 		 * Check if mac layer should be notified to restart
4704 		 * transmissions. This can happen if the channel got
4705 		 * reset and vgen_clobber_tbufs() is called, while
4706 		 * need_resched is set.
4707 		 */
4708 		mutex_enter(&ldcp->tclock);
4709 		if (ldcp->need_resched) {
4710 			vio_net_tx_update_t vtx_update =
4711 			    ldcp->portp->vcb.vio_net_tx_update;
4712 
4713 			ldcp->need_resched = B_FALSE;
4714 			vtx_update(ldcp->portp->vhp);
4715 		}
4716 		mutex_exit(&ldcp->tclock);
4717 
4718 		break;
4719 
4720 	default:
4721 		break;
4722 	}
4723 
4724 	if (rv == ECONNRESET) {
4725 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4726 			DWARN(vgenp, ldcp, "ldc_status err\n");
4727 		} else {
4728 			ldcp->ldc_status = istatus;
4729 		}
4730 		vgen_handle_evt_reset(ldcp);
4731 	} else if (rv) {
4732 		vgen_handshake_reset(ldcp);
4733 	}
4734 }
4735 
4736 /*
4737  * Check if the current handshake phase has completed successfully and
4738  * return the status.
4739  */
4740 static int
4741 vgen_handshake_done(vgen_ldc_t *ldcp)
4742 {
4743 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4744 	uint32_t	hphase = ldcp->hphase;
4745 	int 		status = 0;
4746 
4747 	switch (hphase) {
4748 
4749 	case VH_PHASE1:
4750 		/*
4751 		 * Phase1 is done, if version negotiation
4752 		 * completed successfully.
4753 		 */
4754 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4755 		    VER_NEGOTIATED);
4756 		break;
4757 
4758 	case VH_PHASE2:
4759 		/*
4760 		 * Phase 2 is done, if attr info and dring info
4761 		 * have been exchanged successfully.
4762 		 */
4763 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4764 		    ATTR_INFO_EXCHANGED) &&
4765 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4766 		    DRING_INFO_EXCHANGED));
4767 		break;
4768 
4769 	case VH_PHASE3:
4770 		/* Phase 3 is done, if rdx msg has been exchanged */
4771 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4772 		    RDX_EXCHANGED);
4773 		break;
4774 
4775 	default:
4776 		break;
4777 	}
4778 
4779 	if (status == 0) {
4780 		return (VGEN_FAILURE);
4781 	}
4782 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4783 	return (VGEN_SUCCESS);
4784 }
4785 
4786 /* retry handshake on failure */
4787 static void
4788 vgen_handshake_retry(vgen_ldc_t *ldcp)
4789 {
4790 	/* reset handshake phase */
4791 	vgen_handshake_reset(ldcp);
4792 
4793 	/* handshake retry is specified and the channel is UP */
4794 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
4795 		if (ldcp->hretries++ < vgen_max_hretries) {
4796 			ldcp->local_sid = ddi_get_lbolt();
4797 			vgen_handshake(vh_nextphase(ldcp));
4798 		}
4799 	}
4800 }
4801 
4802 /*
4803  * Handle a version info msg from the peer or an ACK/NACK from the peer
4804  * to a version info msg that we sent.
4805  */
4806 static int
4807 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4808 {
4809 	vgen_t		*vgenp;
4810 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4811 	int		ack = 0;
4812 	int		failed = 0;
4813 	int		idx;
4814 	vgen_ver_t	*versions = ldcp->vgen_versions;
4815 	int		rv = 0;
4816 
4817 	vgenp = LDC_TO_VGEN(ldcp);
4818 	DBG1(vgenp, ldcp, "enter\n");
4819 	switch (tagp->vio_subtype) {
4820 	case VIO_SUBTYPE_INFO:
4821 
4822 		/*  Cache sid of peer if this is the first time */
4823 		if (ldcp->peer_sid == 0) {
4824 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4825 			    tagp->vio_sid);
4826 			ldcp->peer_sid = tagp->vio_sid;
4827 		}
4828 
4829 		if (ldcp->hphase != VH_PHASE1) {
4830 			/*
4831 			 * If we are not already in VH_PHASE1, reset to
4832 			 * pre-handshake state, and initiate handshake
4833 			 * to the peer too.
4834 			 */
4835 			vgen_handshake_reset(ldcp);
4836 			vgen_handshake(vh_nextphase(ldcp));
4837 		}
4838 		ldcp->hstate |= VER_INFO_RCVD;
4839 
4840 		/* save peer's requested values */
4841 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4842 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4843 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4844 
4845 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4846 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4847 			/* unsupported dev_class, send NACK */
4848 
4849 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4850 
4851 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4852 			tagp->vio_sid = ldcp->local_sid;
4853 			/* send reply msg back to peer */
4854 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4855 			    sizeof (*vermsg), B_FALSE);
4856 			if (rv != VGEN_SUCCESS) {
4857 				return (rv);
4858 			}
4859 			return (VGEN_FAILURE);
4860 		}
4861 
4862 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4863 		    vermsg->ver_major,  vermsg->ver_minor);
4864 
4865 		idx = 0;
4866 
4867 		for (;;) {
4868 
4869 			if (vermsg->ver_major > versions[idx].ver_major) {
4870 
4871 				/* nack with next lower version */
4872 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4873 				vermsg->ver_major = versions[idx].ver_major;
4874 				vermsg->ver_minor = versions[idx].ver_minor;
4875 				break;
4876 			}
4877 
4878 			if (vermsg->ver_major == versions[idx].ver_major) {
4879 
4880 				/* major version match - ACK version */
4881 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4882 				ack = 1;
4883 
4884 				/*
4885 				 * lower minor version to the one this endpt
4886 				 * supports, if necessary
4887 				 */
4888 				if (vermsg->ver_minor >
4889 				    versions[idx].ver_minor) {
4890 					vermsg->ver_minor =
4891 					    versions[idx].ver_minor;
4892 					ldcp->peer_hparams.ver_minor =
4893 					    versions[idx].ver_minor;
4894 				}
4895 				break;
4896 			}
4897 
4898 			idx++;
4899 
4900 			if (idx == VGEN_NUM_VER) {
4901 
4902 				/* no version match - send NACK */
4903 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4904 				vermsg->ver_major = 0;
4905 				vermsg->ver_minor = 0;
4906 				failed = 1;
4907 				break;
4908 			}
4909 
4910 		}
4911 
4912 		tagp->vio_sid = ldcp->local_sid;
4913 
4914 		/* send reply msg back to peer */
4915 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4916 		    B_FALSE);
4917 		if (rv != VGEN_SUCCESS) {
4918 			return (rv);
4919 		}
4920 
4921 		if (ack) {
4922 			ldcp->hstate |= VER_ACK_SENT;
4923 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4924 			    vermsg->ver_major, vermsg->ver_minor);
4925 		}
4926 		if (failed) {
4927 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4928 			return (VGEN_FAILURE);
4929 		}
4930 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4931 
4932 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4933 
4934 			/* local and peer versions match? */
4935 			ASSERT((ldcp->local_hparams.ver_major ==
4936 			    ldcp->peer_hparams.ver_major) &&
4937 			    (ldcp->local_hparams.ver_minor ==
4938 			    ldcp->peer_hparams.ver_minor));
4939 
4940 			vgen_set_vnet_proto_ops(ldcp);
4941 
4942 			/* move to the next phase */
4943 			vgen_handshake(vh_nextphase(ldcp));
4944 		}
4945 
4946 		break;
4947 
4948 	case VIO_SUBTYPE_ACK:
4949 
4950 		if (ldcp->hphase != VH_PHASE1) {
4951 			/*  This should not happen. */
4952 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4953 			return (VGEN_FAILURE);
4954 		}
4955 
4956 		/* SUCCESS - we have agreed on a version */
4957 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4958 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4959 		ldcp->hstate |= VER_ACK_RCVD;
4960 
4961 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4962 		    vermsg->ver_major,  vermsg->ver_minor);
4963 
4964 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4965 
4966 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4967 
4968 			/* local and peer versions match? */
4969 			ASSERT((ldcp->local_hparams.ver_major ==
4970 			    ldcp->peer_hparams.ver_major) &&
4971 			    (ldcp->local_hparams.ver_minor ==
4972 			    ldcp->peer_hparams.ver_minor));
4973 
4974 			vgen_set_vnet_proto_ops(ldcp);
4975 
4976 			/* move to the next phase */
4977 			vgen_handshake(vh_nextphase(ldcp));
4978 		}
4979 		break;
4980 
4981 	case VIO_SUBTYPE_NACK:
4982 
4983 		if (ldcp->hphase != VH_PHASE1) {
4984 			/*  This should not happen.  */
4985 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4986 			"Phase(%u)\n", ldcp->hphase);
4987 			return (VGEN_FAILURE);
4988 		}
4989 
4990 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4991 		    vermsg->ver_major, vermsg->ver_minor);
4992 
4993 		/* check if version in NACK is zero */
4994 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4995 			/*
4996 			 * Version Negotiation has failed.
4997 			 */
4998 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4999 			return (VGEN_FAILURE);
5000 		}
5001 
5002 		idx = 0;
5003 
5004 		for (;;) {
5005 
5006 			if (vermsg->ver_major > versions[idx].ver_major) {
5007 				/* select next lower version */
5008 
5009 				ldcp->local_hparams.ver_major =
5010 				    versions[idx].ver_major;
5011 				ldcp->local_hparams.ver_minor =
5012 				    versions[idx].ver_minor;
5013 				break;
5014 			}
5015 
5016 			if (vermsg->ver_major == versions[idx].ver_major) {
5017 				/* major version match */
5018 
5019 				ldcp->local_hparams.ver_major =
5020 				    versions[idx].ver_major;
5021 
5022 				ldcp->local_hparams.ver_minor =
5023 				    versions[idx].ver_minor;
5024 				break;
5025 			}
5026 
5027 			idx++;
5028 
5029 			if (idx == VGEN_NUM_VER) {
5030 				/*
5031 				 * no version match.
5032 				 * Version Negotiation has failed.
5033 				 */
5034 				DWARN(vgenp, ldcp,
5035 				    "Version Negotiation Failed\n");
5036 				return (VGEN_FAILURE);
5037 			}
5038 
5039 		}
5040 
5041 		rv = vgen_send_version_negotiate(ldcp);
5042 		if (rv != VGEN_SUCCESS) {
5043 			return (rv);
5044 		}
5045 
5046 		break;
5047 	}
5048 
5049 	DBG1(vgenp, ldcp, "exit\n");
5050 	return (VGEN_SUCCESS);
5051 }
5052 
5053 /* Check if the attributes are supported */
5054 static int
5055 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5056 {
5057 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5058 
5059 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5060 	    (msg->ack_freq > 64) ||
5061 	    (msg->xfer_mode != lp->xfer_mode)) {
5062 		return (VGEN_FAILURE);
5063 	}
5064 
5065 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5066 		/* versions < 1.4, mtu must match */
5067 		if (msg->mtu != lp->mtu) {
5068 			return (VGEN_FAILURE);
5069 		}
5070 	} else {
5071 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5072 		if (msg->mtu < ETHERMAX) {
5073 			return (VGEN_FAILURE);
5074 		}
5075 	}
5076 
5077 	return (VGEN_SUCCESS);
5078 }
5079 
5080 /*
5081  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5082  * to an attr info msg that we sent.
5083  */
5084 static int
5085 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5086 {
5087 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5088 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5089 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5090 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5091 	int		ack = 1;
5092 	int		rv = 0;
5093 	uint32_t	mtu;
5094 
5095 	DBG1(vgenp, ldcp, "enter\n");
5096 	if (ldcp->hphase != VH_PHASE2) {
5097 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5098 		" Invalid Phase(%u)\n",
5099 		    tagp->vio_subtype, ldcp->hphase);
5100 		return (VGEN_FAILURE);
5101 	}
5102 	switch (tagp->vio_subtype) {
5103 	case VIO_SUBTYPE_INFO:
5104 
5105 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5106 		ldcp->hstate |= ATTR_INFO_RCVD;
5107 
5108 		/* save peer's values */
5109 		rp->mtu = msg->mtu;
5110 		rp->addr = msg->addr;
5111 		rp->addr_type = msg->addr_type;
5112 		rp->xfer_mode = msg->xfer_mode;
5113 		rp->ack_freq = msg->ack_freq;
5114 
5115 		rv = vgen_check_attr_info(ldcp, msg);
5116 		if (rv == VGEN_FAILURE) {
5117 			/* unsupported attr, send NACK */
5118 			ack = 0;
5119 		} else {
5120 
5121 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5122 
5123 				/*
5124 				 * Versions >= 1.4:
5125 				 * The mtu is negotiated down to the
5126 				 * minimum of our mtu and peer's mtu.
5127 				 */
5128 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5129 
5130 				/*
5131 				 * If we have received an ack for the attr info
5132 				 * that we sent, then check if the mtu computed
5133 				 * above matches the mtu that the peer had ack'd
5134 				 * (saved in local hparams). If they don't
5135 				 * match, we fail the handshake.
5136 				 */
5137 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5138 					if (mtu != lp->mtu) {
5139 						/* send NACK */
5140 						ack = 0;
5141 					}
5142 				} else {
5143 					/*
5144 					 * Save the mtu computed above in our
5145 					 * attr parameters, so it gets sent in
5146 					 * the attr info from us to the peer.
5147 					 */
5148 					lp->mtu = mtu;
5149 				}
5150 
5151 				/* save the MIN mtu in the msg to be replied */
5152 				msg->mtu = mtu;
5153 
5154 			}
5155 		}
5156 
5157 
5158 		if (ack) {
5159 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5160 		} else {
5161 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5162 		}
5163 		tagp->vio_sid = ldcp->local_sid;
5164 
5165 		/* send reply msg back to peer */
5166 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5167 		    B_FALSE);
5168 		if (rv != VGEN_SUCCESS) {
5169 			return (rv);
5170 		}
5171 
5172 		if (ack) {
5173 			ldcp->hstate |= ATTR_ACK_SENT;
5174 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5175 		} else {
5176 			/* failed */
5177 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5178 			return (VGEN_FAILURE);
5179 		}
5180 
5181 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5182 			vgen_handshake(vh_nextphase(ldcp));
5183 		}
5184 
5185 		break;
5186 
5187 	case VIO_SUBTYPE_ACK:
5188 
5189 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5190 			/*
5191 			 * Versions >= 1.4:
5192 			 * The ack msg sent by the peer contains the minimum of
5193 			 * our mtu (that we had sent in our attr info) and the
5194 			 * peer's mtu.
5195 			 *
5196 			 * If we have sent an ack for the attr info msg from
5197 			 * the peer, check if the mtu that was computed then
5198 			 * (saved in local hparams) matches the mtu that the
5199 			 * peer has ack'd. If they don't match, we fail the
5200 			 * handshake.
5201 			 */
5202 			if (ldcp->hstate & ATTR_ACK_SENT) {
5203 				if (lp->mtu != msg->mtu) {
5204 					return (VGEN_FAILURE);
5205 				}
5206 			} else {
5207 				/*
5208 				 * If the mtu ack'd by the peer is > our mtu
5209 				 * fail handshake. Otherwise, save the mtu, so
5210 				 * we can validate it when we receive attr info
5211 				 * from our peer.
5212 				 */
5213 				if (msg->mtu > lp->mtu) {
5214 					return (VGEN_FAILURE);
5215 				}
5216 				if (msg->mtu <= lp->mtu) {
5217 					lp->mtu = msg->mtu;
5218 				}
5219 			}
5220 		}
5221 
5222 		ldcp->hstate |= ATTR_ACK_RCVD;
5223 
5224 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5225 
5226 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5227 			vgen_handshake(vh_nextphase(ldcp));
5228 		}
5229 		break;
5230 
5231 	case VIO_SUBTYPE_NACK:
5232 
5233 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5234 		return (VGEN_FAILURE);
5235 	}
5236 	DBG1(vgenp, ldcp, "exit\n");
5237 	return (VGEN_SUCCESS);
5238 }
5239 
5240 /* Check if the dring info msg is ok */
5241 static int
5242 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5243 {
5244 	/* check if msg contents are ok */
5245 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5246 	    sizeof (vnet_public_desc_t))) {
5247 		return (VGEN_FAILURE);
5248 	}
5249 	return (VGEN_SUCCESS);
5250 }
5251 
5252 /*
5253  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5254  * the peer to a dring register msg that we sent.
5255  */
5256 static int
5257 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5258 {
5259 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5260 	ldc_mem_cookie_t dcookie;
5261 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5262 	int ack = 0;
5263 	int rv = 0;
5264 
5265 	DBG1(vgenp, ldcp, "enter\n");
5266 	if (ldcp->hphase < VH_PHASE2) {
5267 		/* dring_info can be rcvd in any of the phases after Phase1 */
5268 		DWARN(vgenp, ldcp,
5269 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5270 		    tagp->vio_subtype, ldcp->hphase);
5271 		return (VGEN_FAILURE);
5272 	}
5273 	switch (tagp->vio_subtype) {
5274 	case VIO_SUBTYPE_INFO:
5275 
5276 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5277 		ldcp->hstate |= DRING_INFO_RCVD;
5278 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5279 
5280 		ASSERT(msg->ncookies == 1);
5281 
5282 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5283 			/*
5284 			 * verified dring info msg to be ok,
5285 			 * now try to map the remote dring.
5286 			 */
5287 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5288 			    msg->descriptor_size, &dcookie,
5289 			    msg->ncookies);
5290 			if (rv == DDI_SUCCESS) {
5291 				/* now we can ack the peer */
5292 				ack = 1;
5293 			}
5294 		}
5295 		if (ack == 0) {
5296 			/* failed, send NACK */
5297 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5298 		} else {
5299 			if (!(ldcp->peer_hparams.dring_ready)) {
5300 
5301 				/* save peer's dring_info values */
5302 				bcopy(&dcookie,
5303 				    &(ldcp->peer_hparams.dring_cookie),
5304 				    sizeof (dcookie));
5305 				ldcp->peer_hparams.num_desc =
5306 				    msg->num_descriptors;
5307 				ldcp->peer_hparams.desc_size =
5308 				    msg->descriptor_size;
5309 				ldcp->peer_hparams.num_dcookies =
5310 				    msg->ncookies;
5311 
5312 				/* set dring_ident for the peer */
5313 				ldcp->peer_hparams.dring_ident =
5314 				    (uint64_t)ldcp->rxdp;
5315 				/* return the dring_ident in ack msg */
5316 				msg->dring_ident =
5317 				    (uint64_t)ldcp->rxdp;
5318 
5319 				ldcp->peer_hparams.dring_ready = B_TRUE;
5320 			}
5321 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5322 		}
5323 		tagp->vio_sid = ldcp->local_sid;
5324 		/* send reply msg back to peer */
5325 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5326 		    B_FALSE);
5327 		if (rv != VGEN_SUCCESS) {
5328 			return (rv);
5329 		}
5330 
5331 		if (ack) {
5332 			ldcp->hstate |= DRING_ACK_SENT;
5333 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5334 		} else {
5335 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5336 			return (VGEN_FAILURE);
5337 		}
5338 
5339 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5340 			vgen_handshake(vh_nextphase(ldcp));
5341 		}
5342 
5343 		break;
5344 
5345 	case VIO_SUBTYPE_ACK:
5346 
5347 		ldcp->hstate |= DRING_ACK_RCVD;
5348 
5349 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5350 
5351 		if (!(ldcp->local_hparams.dring_ready)) {
5352 			/* local dring is now ready */
5353 			ldcp->local_hparams.dring_ready = B_TRUE;
5354 
5355 			/* save dring_ident acked by peer */
5356 			ldcp->local_hparams.dring_ident =
5357 			    msg->dring_ident;
5358 		}
5359 
5360 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5361 			vgen_handshake(vh_nextphase(ldcp));
5362 		}
5363 
5364 		break;
5365 
5366 	case VIO_SUBTYPE_NACK:
5367 
5368 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5369 		return (VGEN_FAILURE);
5370 	}
5371 	DBG1(vgenp, ldcp, "exit\n");
5372 	return (VGEN_SUCCESS);
5373 }
5374 
5375 /*
5376  * Handle a rdx info msg from the peer or an ACK/NACK
5377  * from the peer to a rdx info msg that we sent.
5378  */
5379 static int
5380 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5381 {
5382 	int rv = 0;
5383 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5384 
5385 	DBG1(vgenp, ldcp, "enter\n");
5386 	if (ldcp->hphase != VH_PHASE3) {
5387 		DWARN(vgenp, ldcp,
5388 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5389 		    tagp->vio_subtype, ldcp->hphase);
5390 		return (VGEN_FAILURE);
5391 	}
5392 	switch (tagp->vio_subtype) {
5393 	case VIO_SUBTYPE_INFO:
5394 
5395 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5396 		ldcp->hstate |= RDX_INFO_RCVD;
5397 
5398 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5399 		tagp->vio_sid = ldcp->local_sid;
5400 		/* send reply msg back to peer */
5401 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5402 		    B_FALSE);
5403 		if (rv != VGEN_SUCCESS) {
5404 			return (rv);
5405 		}
5406 
5407 		ldcp->hstate |= RDX_ACK_SENT;
5408 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5409 
5410 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5411 			vgen_handshake(vh_nextphase(ldcp));
5412 		}
5413 
5414 		break;
5415 
5416 	case VIO_SUBTYPE_ACK:
5417 
5418 		ldcp->hstate |= RDX_ACK_RCVD;
5419 
5420 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5421 
5422 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5423 			vgen_handshake(vh_nextphase(ldcp));
5424 		}
5425 		break;
5426 
5427 	case VIO_SUBTYPE_NACK:
5428 
5429 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5430 		return (VGEN_FAILURE);
5431 	}
5432 	DBG1(vgenp, ldcp, "exit\n");
5433 	return (VGEN_SUCCESS);
5434 }
5435 
5436 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5437 static int
5438 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5439 {
5440 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5441 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5442 	struct ether_addr *addrp;
5443 	int count;
5444 	int i;
5445 
5446 	DBG1(vgenp, ldcp, "enter\n");
5447 	switch (tagp->vio_subtype) {
5448 
5449 	case VIO_SUBTYPE_INFO:
5450 
5451 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5452 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5453 		break;
5454 
5455 	case VIO_SUBTYPE_ACK:
5456 
5457 		/* success adding/removing multicast addr */
5458 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5459 		break;
5460 
5461 	case VIO_SUBTYPE_NACK:
5462 
5463 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5464 		if (!(msgp->set)) {
5465 			/* multicast remove request failed */
5466 			break;
5467 		}
5468 
5469 		/* multicast add request failed */
5470 		for (count = 0; count < msgp->count; count++) {
5471 			addrp = &(msgp->mca[count]);
5472 
5473 			/* delete address from the table */
5474 			for (i = 0; i < vgenp->mccount; i++) {
5475 				if (ether_cmp(addrp,
5476 				    &(vgenp->mctab[i])) == 0) {
5477 					if (vgenp->mccount > 1) {
5478 						int t = vgenp->mccount - 1;
5479 						vgenp->mctab[i] =
5480 						    vgenp->mctab[t];
5481 					}
5482 					vgenp->mccount--;
5483 					break;
5484 				}
5485 			}
5486 		}
5487 		break;
5488 
5489 	}
5490 	DBG1(vgenp, ldcp, "exit\n");
5491 
5492 	return (VGEN_SUCCESS);
5493 }
5494 
5495 /* handler for control messages received from the peer ldc end-point */
5496 static int
5497 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5498 {
5499 	int rv = 0;
5500 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5501 
5502 	DBG1(vgenp, ldcp, "enter\n");
5503 	switch (tagp->vio_subtype_env) {
5504 
5505 	case VIO_VER_INFO:
5506 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5507 		break;
5508 
5509 	case VIO_ATTR_INFO:
5510 		rv = vgen_handle_attr_info(ldcp, tagp);
5511 		break;
5512 
5513 	case VIO_DRING_REG:
5514 		rv = vgen_handle_dring_reg(ldcp, tagp);
5515 		break;
5516 
5517 	case VIO_RDX:
5518 		rv = vgen_handle_rdx_info(ldcp, tagp);
5519 		break;
5520 
5521 	case VNET_MCAST_INFO:
5522 		rv = vgen_handle_mcast_info(ldcp, tagp);
5523 		break;
5524 
5525 	case VIO_DDS_INFO:
5526 		rv = vgen_dds_rx(ldcp, tagp);
5527 		break;
5528 	}
5529 
5530 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5531 	return (rv);
5532 }
5533 
5534 /* handler for data messages received from the peer ldc end-point */
5535 static int
5536 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5537 {
5538 	int rv = 0;
5539 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5540 
5541 	DBG1(vgenp, ldcp, "enter\n");
5542 
5543 	if (ldcp->hphase != VH_DONE)
5544 		return (rv);
5545 
5546 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5547 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5548 		if (rv != 0) {
5549 			return (rv);
5550 		}
5551 	}
5552 
5553 	switch (tagp->vio_subtype_env) {
5554 	case VIO_DRING_DATA:
5555 		rv = vgen_handle_dring_data(ldcp, tagp);
5556 		break;
5557 
5558 	case VIO_PKT_DATA:
5559 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5560 		break;
5561 	default:
5562 		break;
5563 	}
5564 
5565 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5566 	return (rv);
5567 }
5568 
5569 /*
5570  * dummy pkt data handler function for vnet protocol version 1.0
5571  */
5572 static void
5573 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5574 {
5575 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5576 }
5577 
5578 /*
5579  * This function handles raw pkt data messages received over the channel.
5580  * Currently, only priority-eth-type frames are received through this mechanism.
5581  * In this case, the frame(data) is present within the message itself which
5582  * is copied into an mblk before sending it up the stack.
5583  */
5584 static void
5585 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5586 {
5587 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5588 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5589 	uint32_t		size;
5590 	mblk_t			*mp;
5591 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5592 	vgen_stats_t		*statsp = &ldcp->stats;
5593 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5594 	vio_net_rx_cb_t		vrx_cb;
5595 
5596 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5597 
5598 	mutex_exit(&ldcp->cblock);
5599 
5600 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5601 	if (size < ETHERMIN || size > lp->mtu) {
5602 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5603 		goto exit;
5604 	}
5605 
5606 	mp = vio_multipool_allocb(&ldcp->vmp, size);
5607 	if (mp == NULL) {
5608 		mp = allocb(size, BPRI_MED);
5609 		if (mp == NULL) {
5610 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5611 			DWARN(vgenp, ldcp, "allocb failure, "
5612 			    "unable to process priority frame\n");
5613 			goto exit;
5614 		}
5615 	}
5616 
5617 	/* copy the frame from the payload of raw data msg into the mblk */
5618 	bcopy(pkt->data, mp->b_rptr, size);
5619 	mp->b_wptr = mp->b_rptr + size;
5620 
5621 	/* update stats */
5622 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5623 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5624 
5625 	/* send up; call vrx_cb() as cblock is already released */
5626 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5627 	vrx_cb(ldcp->portp->vhp, mp);
5628 
5629 exit:
5630 	mutex_enter(&ldcp->cblock);
5631 }
5632 
5633 static int
5634 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
5635     int32_t end, uint8_t pstate)
5636 {
5637 	int rv = 0;
5638 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5639 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
5640 
5641 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
5642 	tagp->vio_sid = ldcp->local_sid;
5643 	msgp->start_idx = start;
5644 	msgp->end_idx = end;
5645 	msgp->dring_process_state = pstate;
5646 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
5647 	if (rv != VGEN_SUCCESS) {
5648 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
5649 	}
5650 	return (rv);
5651 }
5652 
5653 static int
5654 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5655 {
5656 	int rv = 0;
5657 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5658 
5659 
5660 	DBG1(vgenp, ldcp, "enter\n");
5661 	switch (tagp->vio_subtype) {
5662 
5663 	case VIO_SUBTYPE_INFO:
5664 		/*
5665 		 * To reduce the locking contention, release the
5666 		 * cblock here and re-acquire it once we are done
5667 		 * receiving packets.
5668 		 */
5669 		mutex_exit(&ldcp->cblock);
5670 		mutex_enter(&ldcp->rxlock);
5671 		rv = vgen_handle_dring_data_info(ldcp, tagp);
5672 		mutex_exit(&ldcp->rxlock);
5673 		mutex_enter(&ldcp->cblock);
5674 		break;
5675 
5676 	case VIO_SUBTYPE_ACK:
5677 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
5678 		break;
5679 
5680 	case VIO_SUBTYPE_NACK:
5681 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
5682 		break;
5683 	}
5684 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5685 	return (rv);
5686 }
5687 
5688 static int
5689 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5690 {
5691 	uint32_t start;
5692 	int32_t end;
5693 	int rv = 0;
5694 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5695 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5696 #ifdef VGEN_HANDLE_LOST_PKTS
5697 	vgen_stats_t *statsp = &ldcp->stats;
5698 	uint32_t rxi;
5699 	int n;
5700 #endif
5701 
5702 	DBG1(vgenp, ldcp, "enter\n");
5703 
5704 	start = dringmsg->start_idx;
5705 	end = dringmsg->end_idx;
5706 	/*
5707 	 * received a data msg, which contains the start and end
5708 	 * indices of the descriptors within the rx ring holding data,
5709 	 * the seq_num of data packet corresponding to the start index,
5710 	 * and the dring_ident.
5711 	 * We can now read the contents of each of these descriptors
5712 	 * and gather data from it.
5713 	 */
5714 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
5715 	    start, end);
5716 
5717 	/* validate rx start and end indeces */
5718 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
5719 	    !(CHECK_RXI(end, ldcp)))) {
5720 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
5721 		    start, end);
5722 		/* drop the message if invalid index */
5723 		return (rv);
5724 	}
5725 
5726 	/* validate dring_ident */
5727 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
5728 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5729 		    dringmsg->dring_ident);
5730 		/* invalid dring_ident, drop the msg */
5731 		return (rv);
5732 	}
5733 #ifdef DEBUG
5734 	if (vgen_trigger_rxlost) {
5735 		/* drop this msg to simulate lost pkts for debugging */
5736 		vgen_trigger_rxlost = 0;
5737 		return (rv);
5738 	}
5739 #endif
5740 
5741 #ifdef	VGEN_HANDLE_LOST_PKTS
5742 
5743 	/* receive start index doesn't match expected index */
5744 	if (ldcp->next_rxi != start) {
5745 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
5746 		    ldcp->next_rxi, start);
5747 
5748 		/* calculate the number of pkts lost */
5749 		if (start >= ldcp->next_rxi) {
5750 			n = start - ldcp->next_rxi;
5751 		} else  {
5752 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
5753 		}
5754 
5755 		statsp->rx_lost_pkts += n;
5756 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
5757 		tagp->vio_sid = ldcp->local_sid;
5758 		/* indicate the range of lost descriptors */
5759 		dringmsg->start_idx = ldcp->next_rxi;
5760 		rxi = start;
5761 		DECR_RXI(rxi, ldcp);
5762 		dringmsg->end_idx = rxi;
5763 		/* dring ident is left unchanged */
5764 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5765 		    sizeof (*dringmsg), B_FALSE);
5766 		if (rv != VGEN_SUCCESS) {
5767 			DWARN(vgenp, ldcp,
5768 			    "vgen_sendmsg failed, stype:NACK\n");
5769 			return (rv);
5770 		}
5771 		/*
5772 		 * treat this range of descrs/pkts as dropped
5773 		 * and set the new expected value of next_rxi
5774 		 * and continue(below) to process from the new
5775 		 * start index.
5776 		 */
5777 		ldcp->next_rxi = start;
5778 	}
5779 
5780 #endif	/* VGEN_HANDLE_LOST_PKTS */
5781 
5782 	/* Now receive messages */
5783 	rv = vgen_process_dring_data(ldcp, tagp);
5784 
5785 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5786 	return (rv);
5787 }
5788 
5789 static int
5790 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5791 {
5792 	boolean_t set_ack_start = B_FALSE;
5793 	uint32_t start;
5794 	uint32_t ack_end;
5795 	uint32_t next_rxi;
5796 	uint32_t rxi;
5797 	int count = 0;
5798 	int rv = 0;
5799 	uint32_t retries = 0;
5800 	vgen_stats_t *statsp;
5801 	vnet_public_desc_t rxd;
5802 	vio_dring_entry_hdr_t *hdrp;
5803 	mblk_t *bp = NULL;
5804 	mblk_t *bpt = NULL;
5805 	uint32_t ack_start;
5806 	boolean_t rxd_err = B_FALSE;
5807 	mblk_t *mp = NULL;
5808 	size_t nbytes;
5809 	boolean_t ack_needed = B_FALSE;
5810 	size_t nread;
5811 	uint64_t off = 0;
5812 	struct ether_header *ehp;
5813 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5814 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5815 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5816 
5817 	DBG1(vgenp, ldcp, "enter\n");
5818 
5819 	statsp = &ldcp->stats;
5820 	start = dringmsg->start_idx;
5821 
5822 	/*
5823 	 * start processing the descriptors from the specified
5824 	 * start index, up to the index a descriptor is not ready
5825 	 * to be processed or we process the entire descriptor ring
5826 	 * and wrap around upto the start index.
5827 	 */
5828 
5829 	/* need to set the start index of descriptors to be ack'd */
5830 	set_ack_start = B_TRUE;
5831 
5832 	/* index upto which we have ack'd */
5833 	ack_end = start;
5834 	DECR_RXI(ack_end, ldcp);
5835 
5836 	next_rxi = rxi =  start;
5837 	do {
5838 vgen_recv_retry:
5839 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
5840 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
5841 		if (rv != 0) {
5842 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
5843 			    " rv(%d)\n", rv);
5844 			statsp->ierrors++;
5845 			return (rv);
5846 		}
5847 
5848 		hdrp = &rxd.hdr;
5849 
5850 		if (hdrp->dstate != VIO_DESC_READY) {
5851 			/*
5852 			 * Before waiting and retry here, send up
5853 			 * the packets that are received already
5854 			 */
5855 			if (bp != NULL) {
5856 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5857 				vgen_rx(ldcp, bp);
5858 				count = 0;
5859 				bp = bpt = NULL;
5860 			}
5861 			/*
5862 			 * descriptor is not ready.
5863 			 * retry descriptor acquire, stop processing
5864 			 * after max # retries.
5865 			 */
5866 			if (retries == vgen_recv_retries)
5867 				break;
5868 			retries++;
5869 			drv_usecwait(vgen_recv_delay);
5870 			goto vgen_recv_retry;
5871 		}
5872 		retries = 0;
5873 
5874 		if (set_ack_start) {
5875 			/*
5876 			 * initialize the start index of the range
5877 			 * of descriptors to be ack'd.
5878 			 */
5879 			ack_start = rxi;
5880 			set_ack_start = B_FALSE;
5881 		}
5882 
5883 		if ((rxd.nbytes < ETHERMIN) ||
5884 		    (rxd.nbytes > lp->mtu) ||
5885 		    (rxd.ncookies == 0) ||
5886 		    (rxd.ncookies > MAX_COOKIES)) {
5887 			rxd_err = B_TRUE;
5888 		} else {
5889 			/*
5890 			 * Try to allocate an mblk from the free pool
5891 			 * of recv mblks for the channel.
5892 			 * If this fails, use allocb().
5893 			 */
5894 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
5895 			if (nbytes > ldcp->max_rxpool_size) {
5896 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
5897 				    BPRI_MED);
5898 			} else {
5899 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
5900 				if (mp == NULL) {
5901 					statsp->rx_vio_allocb_fail++;
5902 					/*
5903 					 * Data buffer returned by allocb(9F)
5904 					 * is 8byte aligned. We allocate extra
5905 					 * 8 bytes to ensure size is multiple
5906 					 * of 8 bytes for ldc_mem_copy().
5907 					 */
5908 					mp = allocb(VNET_IPALIGN +
5909 					    rxd.nbytes + 8, BPRI_MED);
5910 				}
5911 			}
5912 		}
5913 		if ((rxd_err) || (mp == NULL)) {
5914 			/*
5915 			 * rxd_err or allocb() failure,
5916 			 * drop this packet, get next.
5917 			 */
5918 			if (rxd_err) {
5919 				statsp->ierrors++;
5920 				rxd_err = B_FALSE;
5921 			} else {
5922 				statsp->rx_allocb_fail++;
5923 			}
5924 
5925 			ack_needed = hdrp->ack;
5926 
5927 			/* set descriptor done bit */
5928 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
5929 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
5930 			    VIO_DESC_DONE);
5931 			if (rv != 0) {
5932 				DWARN(vgenp, ldcp,
5933 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
5934 				    rv);
5935 				return (rv);
5936 			}
5937 
5938 			if (ack_needed) {
5939 				ack_needed = B_FALSE;
5940 				/*
5941 				 * sender needs ack for this packet,
5942 				 * ack pkts upto this index.
5943 				 */
5944 				ack_end = rxi;
5945 
5946 				rv = vgen_send_dring_ack(ldcp, tagp,
5947 				    ack_start, ack_end,
5948 				    VIO_DP_ACTIVE);
5949 				if (rv != VGEN_SUCCESS) {
5950 					goto error_ret;
5951 				}
5952 
5953 				/* need to set new ack start index */
5954 				set_ack_start = B_TRUE;
5955 			}
5956 			goto vgen_next_rxi;
5957 		}
5958 
5959 		nread = nbytes;
5960 		rv = ldc_mem_copy(ldcp->ldc_handle,
5961 		    (caddr_t)mp->b_rptr, off, &nread,
5962 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
5963 
5964 		/* if ldc_mem_copy() failed */
5965 		if (rv) {
5966 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
5967 			statsp->ierrors++;
5968 			freemsg(mp);
5969 			goto error_ret;
5970 		}
5971 
5972 		ack_needed = hdrp->ack;
5973 
5974 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
5975 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
5976 		    VIO_DESC_DONE);
5977 		if (rv != 0) {
5978 			DWARN(vgenp, ldcp,
5979 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
5980 			goto error_ret;
5981 		}
5982 
5983 		mp->b_rptr += VNET_IPALIGN;
5984 
5985 		if (ack_needed) {
5986 			ack_needed = B_FALSE;
5987 			/*
5988 			 * sender needs ack for this packet,
5989 			 * ack pkts upto this index.
5990 			 */
5991 			ack_end = rxi;
5992 
5993 			rv = vgen_send_dring_ack(ldcp, tagp,
5994 			    ack_start, ack_end, VIO_DP_ACTIVE);
5995 			if (rv != VGEN_SUCCESS) {
5996 				goto error_ret;
5997 			}
5998 
5999 			/* need to set new ack start index */
6000 			set_ack_start = B_TRUE;
6001 		}
6002 
6003 		if (nread != nbytes) {
6004 			DWARN(vgenp, ldcp,
6005 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6006 			    nread, nbytes);
6007 			statsp->ierrors++;
6008 			freemsg(mp);
6009 			goto vgen_next_rxi;
6010 		}
6011 
6012 		/* point to the actual end of data */
6013 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6014 
6015 		/* update stats */
6016 		statsp->ipackets++;
6017 		statsp->rbytes += rxd.nbytes;
6018 		ehp = (struct ether_header *)mp->b_rptr;
6019 		if (IS_BROADCAST(ehp))
6020 			statsp->brdcstrcv++;
6021 		else if (IS_MULTICAST(ehp))
6022 			statsp->multircv++;
6023 
6024 		/* build a chain of received packets */
6025 		if (bp == NULL) {
6026 			/* first pkt */
6027 			bp = mp;
6028 			bpt = bp;
6029 			bpt->b_next = NULL;
6030 		} else {
6031 			mp->b_next = NULL;
6032 			bpt->b_next = mp;
6033 			bpt = mp;
6034 		}
6035 
6036 		if (count++ > vgen_chain_len) {
6037 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6038 			vgen_rx(ldcp, bp);
6039 			count = 0;
6040 			bp = bpt = NULL;
6041 		}
6042 
6043 vgen_next_rxi:
6044 		/* update end index of range of descrs to be ack'd */
6045 		ack_end = rxi;
6046 
6047 		/* update the next index to be processed */
6048 		INCR_RXI(next_rxi, ldcp);
6049 		if (next_rxi == start) {
6050 			/*
6051 			 * processed the entire descriptor ring upto
6052 			 * the index at which we started.
6053 			 */
6054 			break;
6055 		}
6056 
6057 		rxi = next_rxi;
6058 
6059 	_NOTE(CONSTCOND)
6060 	} while (1);
6061 
6062 	/*
6063 	 * send an ack message to peer indicating that we have stopped
6064 	 * processing descriptors.
6065 	 */
6066 	if (set_ack_start) {
6067 		/*
6068 		 * We have ack'd upto some index and we have not
6069 		 * processed any descriptors beyond that index.
6070 		 * Use the last ack'd index as both the start and
6071 		 * end of range of descrs being ack'd.
6072 		 * Note: This results in acking the last index twice
6073 		 * and should be harmless.
6074 		 */
6075 		ack_start = ack_end;
6076 	}
6077 
6078 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6079 	    VIO_DP_STOPPED);
6080 	if (rv != VGEN_SUCCESS) {
6081 		goto error_ret;
6082 	}
6083 
6084 	/* save new recv index of next dring msg */
6085 	ldcp->next_rxi = next_rxi;
6086 
6087 error_ret:
6088 	/* send up packets received so far */
6089 	if (bp != NULL) {
6090 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6091 		vgen_rx(ldcp, bp);
6092 		bp = bpt = NULL;
6093 	}
6094 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6095 	return (rv);
6096 
6097 }
6098 
6099 static int
6100 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6101 {
6102 	int rv = 0;
6103 	uint32_t start;
6104 	int32_t end;
6105 	uint32_t txi;
6106 	boolean_t ready_txd = B_FALSE;
6107 	vgen_stats_t *statsp;
6108 	vgen_private_desc_t *tbufp;
6109 	vnet_public_desc_t *txdp;
6110 	vio_dring_entry_hdr_t *hdrp;
6111 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6112 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6113 
6114 	DBG1(vgenp, ldcp, "enter\n");
6115 	start = dringmsg->start_idx;
6116 	end = dringmsg->end_idx;
6117 	statsp = &ldcp->stats;
6118 
6119 	/*
6120 	 * received an ack corresponding to a specific descriptor for
6121 	 * which we had set the ACK bit in the descriptor (during
6122 	 * transmit). This enables us to reclaim descriptors.
6123 	 */
6124 
6125 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6126 
6127 	/* validate start and end indeces in the tx ack msg */
6128 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6129 		/* drop the message if invalid index */
6130 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6131 		    start, end);
6132 		return (rv);
6133 	}
6134 	/* validate dring_ident */
6135 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6136 		/* invalid dring_ident, drop the msg */
6137 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6138 		    dringmsg->dring_ident);
6139 		return (rv);
6140 	}
6141 	statsp->dring_data_acks++;
6142 
6143 	/* reclaim descriptors that are done */
6144 	vgen_reclaim(ldcp);
6145 
6146 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6147 		/*
6148 		 * receiver continued processing descriptors after
6149 		 * sending us the ack.
6150 		 */
6151 		return (rv);
6152 	}
6153 
6154 	statsp->dring_stopped_acks++;
6155 
6156 	/* receiver stopped processing descriptors */
6157 	mutex_enter(&ldcp->wrlock);
6158 	mutex_enter(&ldcp->tclock);
6159 
6160 	/*
6161 	 * determine if there are any pending tx descriptors
6162 	 * ready to be processed by the receiver(peer) and if so,
6163 	 * send a message to the peer to restart receiving.
6164 	 */
6165 	ready_txd = B_FALSE;
6166 
6167 	/*
6168 	 * using the end index of the descriptor range for which
6169 	 * we received the ack, check if the next descriptor is
6170 	 * ready.
6171 	 */
6172 	txi = end;
6173 	INCR_TXI(txi, ldcp);
6174 	tbufp = &ldcp->tbufp[txi];
6175 	txdp = tbufp->descp;
6176 	hdrp = &txdp->hdr;
6177 	if (hdrp->dstate == VIO_DESC_READY) {
6178 		ready_txd = B_TRUE;
6179 	} else {
6180 		/*
6181 		 * descr next to the end of ack'd descr range is not
6182 		 * ready.
6183 		 * starting from the current reclaim index, check
6184 		 * if any descriptor is ready.
6185 		 */
6186 
6187 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6188 		tbufp = &ldcp->tbufp[txi];
6189 
6190 		txdp = tbufp->descp;
6191 		hdrp = &txdp->hdr;
6192 		if (hdrp->dstate == VIO_DESC_READY) {
6193 			ready_txd = B_TRUE;
6194 		}
6195 
6196 	}
6197 
6198 	if (ready_txd) {
6199 		/*
6200 		 * we have tx descriptor(s) ready to be
6201 		 * processed by the receiver.
6202 		 * send a message to the peer with the start index
6203 		 * of ready descriptors.
6204 		 */
6205 		rv = vgen_send_dring_data(ldcp, txi, -1);
6206 		if (rv != VGEN_SUCCESS) {
6207 			ldcp->resched_peer = B_TRUE;
6208 			ldcp->resched_peer_txi = txi;
6209 			mutex_exit(&ldcp->tclock);
6210 			mutex_exit(&ldcp->wrlock);
6211 			return (rv);
6212 		}
6213 	} else {
6214 		/*
6215 		 * no ready tx descriptors. set the flag to send a
6216 		 * message to peer when tx descriptors are ready in
6217 		 * transmit routine.
6218 		 */
6219 		ldcp->resched_peer = B_TRUE;
6220 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6221 	}
6222 
6223 	mutex_exit(&ldcp->tclock);
6224 	mutex_exit(&ldcp->wrlock);
6225 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6226 	return (rv);
6227 }
6228 
6229 static int
6230 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6231 {
6232 	int rv = 0;
6233 	uint32_t start;
6234 	int32_t end;
6235 	uint32_t txi;
6236 	vnet_public_desc_t *txdp;
6237 	vio_dring_entry_hdr_t *hdrp;
6238 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6239 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6240 
6241 	DBG1(vgenp, ldcp, "enter\n");
6242 	start = dringmsg->start_idx;
6243 	end = dringmsg->end_idx;
6244 
6245 	/*
6246 	 * peer sent a NACK msg to indicate lost packets.
6247 	 * The start and end correspond to the range of descriptors
6248 	 * for which the peer didn't receive a dring data msg and so
6249 	 * didn't receive the corresponding data.
6250 	 */
6251 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6252 
6253 	/* validate start and end indeces in the tx nack msg */
6254 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6255 		/* drop the message if invalid index */
6256 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6257 		    start, end);
6258 		return (rv);
6259 	}
6260 	/* validate dring_ident */
6261 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6262 		/* invalid dring_ident, drop the msg */
6263 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6264 		    dringmsg->dring_ident);
6265 		return (rv);
6266 	}
6267 	mutex_enter(&ldcp->txlock);
6268 	mutex_enter(&ldcp->tclock);
6269 
6270 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6271 		/* no busy descriptors, bogus nack ? */
6272 		mutex_exit(&ldcp->tclock);
6273 		mutex_exit(&ldcp->txlock);
6274 		return (rv);
6275 	}
6276 
6277 	/* we just mark the descrs as done so they can be reclaimed */
6278 	for (txi = start; txi <= end; ) {
6279 		txdp = &(ldcp->txdp[txi]);
6280 		hdrp = &txdp->hdr;
6281 		if (hdrp->dstate == VIO_DESC_READY)
6282 			hdrp->dstate = VIO_DESC_DONE;
6283 		INCR_TXI(txi, ldcp);
6284 	}
6285 	mutex_exit(&ldcp->tclock);
6286 	mutex_exit(&ldcp->txlock);
6287 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6288 	return (rv);
6289 }
6290 
6291 static void
6292 vgen_reclaim(vgen_ldc_t *ldcp)
6293 {
6294 	mutex_enter(&ldcp->tclock);
6295 
6296 	vgen_reclaim_dring(ldcp);
6297 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6298 
6299 	mutex_exit(&ldcp->tclock);
6300 }
6301 
6302 /*
6303  * transmit reclaim function. starting from the current reclaim index
6304  * look for descriptors marked DONE and reclaim the descriptor and the
6305  * corresponding buffers (tbuf).
6306  */
6307 static void
6308 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6309 {
6310 	int count = 0;
6311 	vnet_public_desc_t *txdp;
6312 	vgen_private_desc_t *tbufp;
6313 	vio_dring_entry_hdr_t	*hdrp;
6314 
6315 #ifdef DEBUG
6316 	if (vgen_trigger_txtimeout)
6317 		return;
6318 #endif
6319 
6320 	tbufp = ldcp->cur_tbufp;
6321 	txdp = tbufp->descp;
6322 	hdrp = &txdp->hdr;
6323 
6324 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6325 	    (tbufp != ldcp->next_tbufp)) {
6326 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6327 		hdrp->dstate = VIO_DESC_FREE;
6328 		hdrp->ack = B_FALSE;
6329 
6330 		tbufp = NEXTTBUF(ldcp, tbufp);
6331 		txdp = tbufp->descp;
6332 		hdrp = &txdp->hdr;
6333 		count++;
6334 	}
6335 
6336 	ldcp->cur_tbufp = tbufp;
6337 
6338 	/*
6339 	 * Check if mac layer should be notified to restart transmissions
6340 	 */
6341 	if ((ldcp->need_resched) && (count > 0)) {
6342 		vio_net_tx_update_t vtx_update =
6343 		    ldcp->portp->vcb.vio_net_tx_update;
6344 
6345 		ldcp->need_resched = B_FALSE;
6346 		vtx_update(ldcp->portp->vhp);
6347 	}
6348 }
6349 
6350 /* return the number of pending transmits for the channel */
6351 static int
6352 vgen_num_txpending(vgen_ldc_t *ldcp)
6353 {
6354 	int n;
6355 
6356 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6357 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6358 	} else  {
6359 		/* cur_tbufp > next_tbufp */
6360 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6361 	}
6362 
6363 	return (n);
6364 }
6365 
6366 /* determine if the transmit descriptor ring is full */
6367 static int
6368 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6369 {
6370 	vgen_private_desc_t	*tbufp;
6371 	vgen_private_desc_t	*ntbufp;
6372 
6373 	tbufp = ldcp->next_tbufp;
6374 	ntbufp = NEXTTBUF(ldcp, tbufp);
6375 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6376 		return (VGEN_SUCCESS);
6377 	}
6378 	return (VGEN_FAILURE);
6379 }
6380 
6381 /* determine if timeout condition has occured */
6382 static int
6383 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6384 {
6385 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6386 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6387 	    (vnet_ldcwd_txtimeout) &&
6388 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6389 		return (VGEN_SUCCESS);
6390 	} else {
6391 		return (VGEN_FAILURE);
6392 	}
6393 }
6394 
6395 /* transmit watchdog timeout handler */
6396 static void
6397 vgen_ldc_watchdog(void *arg)
6398 {
6399 	vgen_ldc_t *ldcp;
6400 	vgen_t *vgenp;
6401 	int rv;
6402 
6403 	ldcp = (vgen_ldc_t *)arg;
6404 	vgenp = LDC_TO_VGEN(ldcp);
6405 
6406 	rv = vgen_ldc_txtimeout(ldcp);
6407 	if (rv == VGEN_SUCCESS) {
6408 		DWARN(vgenp, ldcp, "transmit timeout\n");
6409 #ifdef DEBUG
6410 		if (vgen_trigger_txtimeout) {
6411 			/* tx timeout triggered for debugging */
6412 			vgen_trigger_txtimeout = 0;
6413 		}
6414 #endif
6415 		mutex_enter(&ldcp->cblock);
6416 		ldcp->need_ldc_reset = B_TRUE;
6417 		vgen_handshake_retry(ldcp);
6418 		mutex_exit(&ldcp->cblock);
6419 		if (ldcp->need_resched) {
6420 			vio_net_tx_update_t vtx_update =
6421 			    ldcp->portp->vcb.vio_net_tx_update;
6422 
6423 			ldcp->need_resched = B_FALSE;
6424 			vtx_update(ldcp->portp->vhp);
6425 		}
6426 	}
6427 
6428 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6429 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6430 }
6431 
6432 /* handler for error messages received from the peer ldc end-point */
6433 static void
6434 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6435 {
6436 	_NOTE(ARGUNUSED(ldcp, tagp))
6437 }
6438 
6439 static int
6440 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6441 {
6442 	vio_raw_data_msg_t	*rmsg;
6443 	vio_dring_msg_t		*dmsg;
6444 	uint64_t		seq_num;
6445 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6446 
6447 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6448 		dmsg = (vio_dring_msg_t *)tagp;
6449 		seq_num = dmsg->seq_num;
6450 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6451 		rmsg = (vio_raw_data_msg_t *)tagp;
6452 		seq_num = rmsg->seq_num;
6453 	} else {
6454 		return (EINVAL);
6455 	}
6456 
6457 	if (seq_num != ldcp->next_rxseq) {
6458 
6459 		/* seqnums don't match */
6460 		DWARN(vgenp, ldcp,
6461 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6462 		    ldcp->next_rxseq, seq_num);
6463 
6464 		ldcp->need_ldc_reset = B_TRUE;
6465 		return (EINVAL);
6466 
6467 	}
6468 
6469 	ldcp->next_rxseq++;
6470 
6471 	return (0);
6472 }
6473 
6474 /* Check if the session id in the received message is valid */
6475 static int
6476 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6477 {
6478 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6479 
6480 	if (tagp->vio_sid != ldcp->peer_sid) {
6481 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6482 		    ldcp->peer_sid, tagp->vio_sid);
6483 		return (VGEN_FAILURE);
6484 	}
6485 	else
6486 		return (VGEN_SUCCESS);
6487 }
6488 
6489 static caddr_t
6490 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6491 {
6492 	(void) sprintf(ebuf,
6493 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6494 	return (ebuf);
6495 }
6496 
6497 /* Handshake watchdog timeout handler */
6498 static void
6499 vgen_hwatchdog(void *arg)
6500 {
6501 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6502 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6503 
6504 	DWARN(vgenp, ldcp,
6505 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
6506 	    ldcp->hphase, ldcp->hstate);
6507 
6508 	mutex_enter(&ldcp->cblock);
6509 	if (ldcp->cancel_htid) {
6510 		ldcp->cancel_htid = 0;
6511 		mutex_exit(&ldcp->cblock);
6512 		return;
6513 	}
6514 	ldcp->htid = 0;
6515 	ldcp->need_ldc_reset = B_TRUE;
6516 	vgen_handshake_retry(ldcp);
6517 	mutex_exit(&ldcp->cblock);
6518 }
6519 
6520 static void
6521 vgen_print_hparams(vgen_hparams_t *hp)
6522 {
6523 	uint8_t	addr[6];
6524 	char	ea[6];
6525 	ldc_mem_cookie_t *dc;
6526 
6527 	cmn_err(CE_CONT, "version_info:\n");
6528 	cmn_err(CE_CONT,
6529 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6530 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6531 
6532 	vnet_macaddr_ultostr(hp->addr, addr);
6533 	cmn_err(CE_CONT, "attr_info:\n");
6534 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6535 	    vgen_print_ethaddr(addr, ea));
6536 	cmn_err(CE_CONT,
6537 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6538 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6539 
6540 	dc = &hp->dring_cookie;
6541 	cmn_err(CE_CONT, "dring_info:\n");
6542 	cmn_err(CE_CONT,
6543 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6544 	cmn_err(CE_CONT,
6545 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6546 	    dc->addr, dc->size);
6547 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6548 }
6549 
6550 static void
6551 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6552 {
6553 	vgen_hparams_t *hp;
6554 
6555 	cmn_err(CE_CONT, "Channel Information:\n");
6556 	cmn_err(CE_CONT,
6557 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6558 	    ldcp->ldc_id, ldcp->ldc_status);
6559 	cmn_err(CE_CONT,
6560 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6561 	    ldcp->local_sid, ldcp->peer_sid);
6562 	cmn_err(CE_CONT,
6563 	    "\thphase: 0x%x, hstate: 0x%x\n",
6564 	    ldcp->hphase, ldcp->hstate);
6565 
6566 	cmn_err(CE_CONT, "Local handshake params:\n");
6567 	hp = &ldcp->local_hparams;
6568 	vgen_print_hparams(hp);
6569 
6570 	cmn_err(CE_CONT, "Peer handshake params:\n");
6571 	hp = &ldcp->peer_hparams;
6572 	vgen_print_hparams(hp);
6573 }
6574 
6575 /*
6576  * Send received packets up the stack.
6577  */
6578 static void
6579 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
6580 {
6581 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6582 
6583 	if (ldcp->rcv_thread != NULL) {
6584 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
6585 		mutex_exit(&ldcp->rxlock);
6586 	} else {
6587 		ASSERT(MUTEX_HELD(&ldcp->cblock));
6588 		mutex_exit(&ldcp->cblock);
6589 	}
6590 
6591 	vrx_cb(ldcp->portp->vhp, bp);
6592 
6593 	if (ldcp->rcv_thread != NULL) {
6594 		mutex_enter(&ldcp->rxlock);
6595 	} else {
6596 		mutex_enter(&ldcp->cblock);
6597 	}
6598 }
6599 
6600 /*
6601  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
6602  * This thread is woken up by the LDC interrupt handler to process
6603  * LDC packets and receive data.
6604  */
6605 static void
6606 vgen_ldc_rcv_worker(void *arg)
6607 {
6608 	callb_cpr_t	cprinfo;
6609 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6610 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6611 
6612 	DBG1(vgenp, ldcp, "enter\n");
6613 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
6614 	    "vnet_rcv_thread");
6615 	mutex_enter(&ldcp->rcv_thr_lock);
6616 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
6617 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
6618 
6619 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
6620 		/*
6621 		 * Wait until the data is received or a stop
6622 		 * request is received.
6623 		 */
6624 		while (!(ldcp->rcv_thr_flags &
6625 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
6626 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6627 		}
6628 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
6629 
6630 		/*
6631 		 * First process the stop request.
6632 		 */
6633 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
6634 			DBG2(vgenp, ldcp, "stopped\n");
6635 			break;
6636 		}
6637 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
6638 		mutex_exit(&ldcp->rcv_thr_lock);
6639 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
6640 		vgen_handle_evt_read(ldcp);
6641 		mutex_enter(&ldcp->rcv_thr_lock);
6642 	}
6643 
6644 	/*
6645 	 * Update the run status and wakeup the thread that
6646 	 * has sent the stop request.
6647 	 */
6648 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
6649 	cv_signal(&ldcp->rcv_thr_cv);
6650 	CALLB_CPR_EXIT(&cprinfo);
6651 	thread_exit();
6652 	DBG1(vgenp, ldcp, "exit\n");
6653 }
6654 
6655 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
6656 static void
6657 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
6658 {
6659 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6660 
6661 	DBG1(vgenp, ldcp, "enter\n");
6662 	/*
6663 	 * Send a stop request by setting the stop flag and
6664 	 * wait until the receive thread stops.
6665 	 */
6666 	mutex_enter(&ldcp->rcv_thr_lock);
6667 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6668 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
6669 		cv_signal(&ldcp->rcv_thr_cv);
6670 		DBG2(vgenp, ldcp, "waiting...");
6671 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6672 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6673 		}
6674 	}
6675 	mutex_exit(&ldcp->rcv_thr_lock);
6676 	ldcp->rcv_thread = NULL;
6677 	DBG1(vgenp, ldcp, "exit\n");
6678 }
6679 
6680 /*
6681  * vgen_dds_rx -- post DDS messages to vnet.
6682  */
6683 static int
6684 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6685 {
6686 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
6687 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6688 
6689 	if (dmsg->dds_class != DDS_VNET_NIU) {
6690 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
6691 		return (EBADMSG);
6692 	}
6693 	vnet_dds_rx(vgenp->vnetp, dmsg);
6694 	return (0);
6695 }
6696 
6697 /*
6698  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
6699  */
6700 int
6701 vgen_dds_tx(void *arg, void *msg)
6702 {
6703 	vgen_t *vgenp = arg;
6704 	vio_dds_msg_t *dmsg = msg;
6705 	vgen_portlist_t *plistp = &vgenp->vgenports;
6706 	vgen_ldc_t *ldcp;
6707 	vgen_ldclist_t *ldclp;
6708 	int rv = EIO;
6709 
6710 
6711 	READ_ENTER(&plistp->rwlock);
6712 	ldclp = &(vgenp->vsw_portp->ldclist);
6713 	READ_ENTER(&ldclp->rwlock);
6714 	ldcp = ldclp->headp;
6715 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
6716 		goto vgen_dsend_exit;
6717 	}
6718 
6719 	dmsg->tag.vio_sid = ldcp->local_sid;
6720 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
6721 	if (rv != VGEN_SUCCESS) {
6722 		rv = EIO;
6723 	} else {
6724 		rv = 0;
6725 	}
6726 
6727 vgen_dsend_exit:
6728 	RW_EXIT(&ldclp->rwlock);
6729 	RW_EXIT(&plistp->rwlock);
6730 	return (rv);
6731 
6732 }
6733 
6734 #if DEBUG
6735 
6736 /*
6737  * Print debug messages - set to 0xf to enable all msgs
6738  */
6739 static void
6740 debug_printf(const char *fname, vgen_t *vgenp,
6741     vgen_ldc_t *ldcp, const char *fmt, ...)
6742 {
6743 	char    buf[256];
6744 	char    *bufp = buf;
6745 	va_list ap;
6746 
6747 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
6748 		(void) sprintf(bufp, "vnet%d:",
6749 		    ((vnet_t *)(vgenp->vnetp))->instance);
6750 		bufp += strlen(bufp);
6751 	}
6752 	if (ldcp != NULL) {
6753 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
6754 		bufp += strlen(bufp);
6755 	}
6756 	(void) sprintf(bufp, "%s: ", fname);
6757 	bufp += strlen(bufp);
6758 
6759 	va_start(ap, fmt);
6760 	(void) vsprintf(bufp, fmt, ap);
6761 	va_end(ap);
6762 
6763 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
6764 	    (vgendbg_ldcid == ldcp->ldc_id)) {
6765 		cmn_err(CE_CONT, "%s\n", buf);
6766 	}
6767 }
6768 #endif
6769