xref: /titanic_51/usr/src/uts/sun4v/io/vnet_gen.c (revision a5669307eaef64af8519feb70d42f0aa0e7ec21a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 int vgen_uninit(void *arg);
77 int vgen_dds_tx(void *arg, void *dmsg);
78 void vgen_mod_init(void);
79 int vgen_mod_cleanup(void);
80 void vgen_mod_fini(void);
81 static int vgen_start(void *arg);
82 static void vgen_stop(void *arg);
83 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
84 static int vgen_multicst(void *arg, boolean_t add,
85 	const uint8_t *mca);
86 static int vgen_promisc(void *arg, boolean_t on);
87 static int vgen_unicst(void *arg, const uint8_t *mca);
88 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
89 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
90 
91 /* vgen internal functions */
92 static int vgen_read_mdprops(vgen_t *vgenp);
93 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
94 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
95 	mde_cookie_t node);
96 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
97 	uint32_t *mtu);
98 static void vgen_detach_ports(vgen_t *vgenp);
99 static void vgen_port_detach(vgen_port_t *portp);
100 static void vgen_port_list_insert(vgen_port_t *portp);
101 static void vgen_port_list_remove(vgen_port_t *portp);
102 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
103 	int port_num);
104 static int vgen_mdeg_reg(vgen_t *vgenp);
105 static void vgen_mdeg_unreg(vgen_t *vgenp);
106 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
107 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
108 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
109 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
110 	mde_cookie_t mdex);
111 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
112 static int vgen_port_attach(vgen_port_t *portp);
113 static void vgen_port_detach_mdeg(vgen_port_t *portp);
114 static void vgen_port_detach_mdeg(vgen_port_t *portp);
115 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
116 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
117 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
118 
119 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
120 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
121 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
122 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
123 static void vgen_init_ports(vgen_t *vgenp);
124 static void vgen_port_init(vgen_port_t *portp);
125 static void vgen_uninit_ports(vgen_t *vgenp);
126 static void vgen_port_uninit(vgen_port_t *portp);
127 static void vgen_init_ldcs(vgen_port_t *portp);
128 static void vgen_uninit_ldcs(vgen_port_t *portp);
129 static int vgen_ldc_init(vgen_ldc_t *ldcp);
130 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
131 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
132 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
133 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
134 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
135 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
137 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
138 static int vgen_ldcsend(void *arg, mblk_t *mp);
139 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
140 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
141 static void vgen_reclaim(vgen_ldc_t *ldcp);
142 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
143 static int vgen_num_txpending(vgen_ldc_t *ldcp);
144 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
145 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
146 static void vgen_ldc_watchdog(void *arg);
147 
148 /* vgen handshake functions */
149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
150 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
151 	boolean_t caller_holds_lock);
152 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
153 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
154 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
155 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
156 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
157 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
158 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
159 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
160 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
161 static void vgen_handshake(vgen_ldc_t *ldcp);
162 static int vgen_handshake_done(vgen_ldc_t *ldcp);
163 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
164 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
165 	vio_msg_tag_t *tagp);
166 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
172 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
173 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
179 	uint32_t start, int32_t end, uint8_t pstate);
180 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
181 	uint32_t msglen);
182 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
184 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
185 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
186 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
187 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
188 static void vgen_hwatchdog(void *arg);
189 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
190 static void vgen_print_hparams(vgen_hparams_t *hp);
191 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
192 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
193 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
194 static void vgen_ldc_rcv_worker(void *arg);
195 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
196 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
197 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
198 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
199 
200 /* VLAN routines */
201 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
202 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
203 	uint16_t *nvidsp, uint16_t *default_idp);
204 static void vgen_vlan_create_hash(vgen_port_t *portp);
205 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
206 static void vgen_vlan_add_ids(vgen_port_t *portp);
207 static void vgen_vlan_remove_ids(vgen_port_t *portp);
208 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
209 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
210 	uint16_t *vidp);
211 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
212 	boolean_t is_tagged, uint16_t vid);
213 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
214 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
215 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
216 
217 /* externs */
218 extern void vnet_dds_rx(void *arg, void *dmsg);
219 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
220 
221 /*
222  * The handshake process consists of 5 phases defined below, with VH_PHASE0
223  * being the pre-handshake phase and VH_DONE is the phase to indicate
224  * successful completion of all phases.
225  * Each phase may have one to several handshake states which are required
226  * to complete successfully to move to the next phase.
227  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
228  * more details.
229  */
230 /* handshake phases */
231 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
232 
233 /* handshake states */
234 enum {
235 
236 	VER_INFO_SENT	=	0x1,
237 	VER_ACK_RCVD	=	0x2,
238 	VER_INFO_RCVD	=	0x4,
239 	VER_ACK_SENT	=	0x8,
240 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
241 
242 	ATTR_INFO_SENT	=	0x10,
243 	ATTR_ACK_RCVD	=	0x20,
244 	ATTR_INFO_RCVD	=	0x40,
245 	ATTR_ACK_SENT	=	0x80,
246 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
247 
248 	DRING_INFO_SENT	=	0x100,
249 	DRING_ACK_RCVD	=	0x200,
250 	DRING_INFO_RCVD	=	0x400,
251 	DRING_ACK_SENT	=	0x800,
252 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
253 
254 	RDX_INFO_SENT	=	0x1000,
255 	RDX_ACK_RCVD	=	0x2000,
256 	RDX_INFO_RCVD	=	0x4000,
257 	RDX_ACK_SENT	=	0x8000,
258 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
259 
260 };
261 
262 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
263 
264 #define	LDC_LOCK(ldcp)	\
265 				mutex_enter(&((ldcp)->cblock));\
266 				mutex_enter(&((ldcp)->rxlock));\
267 				mutex_enter(&((ldcp)->wrlock));\
268 				mutex_enter(&((ldcp)->txlock));\
269 				mutex_enter(&((ldcp)->tclock));
270 #define	LDC_UNLOCK(ldcp)	\
271 				mutex_exit(&((ldcp)->tclock));\
272 				mutex_exit(&((ldcp)->txlock));\
273 				mutex_exit(&((ldcp)->wrlock));\
274 				mutex_exit(&((ldcp)->rxlock));\
275 				mutex_exit(&((ldcp)->cblock));
276 
277 #define	VGEN_VER_EQ(ldcp, major, minor)	\
278 	((ldcp)->local_hparams.ver_major == (major) &&	\
279 	    (ldcp)->local_hparams.ver_minor == (minor))
280 
281 #define	VGEN_VER_LT(ldcp, major, minor)	\
282 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
283 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
284 	    (ldcp)->local_hparams.ver_minor < (minor)))
285 
286 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
287 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
288 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
289 	    (ldcp)->local_hparams.ver_minor >= (minor)))
290 
291 static struct ether_addr etherbroadcastaddr = {
292 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
293 };
294 /*
295  * MIB II broadcast/multicast packets
296  */
297 #define	IS_BROADCAST(ehp) \
298 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
299 #define	IS_MULTICAST(ehp) \
300 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
301 
302 /*
303  * Property names
304  */
305 static char macaddr_propname[] = "mac-address";
306 static char rmacaddr_propname[] = "remote-mac-address";
307 static char channel_propname[] = "channel-endpoint";
308 static char reg_propname[] = "reg";
309 static char port_propname[] = "port";
310 static char swport_propname[] = "switch-port";
311 static char id_propname[] = "id";
312 static char vdev_propname[] = "virtual-device";
313 static char vnet_propname[] = "network";
314 static char pri_types_propname[] = "priority-ether-types";
315 static char vgen_pvid_propname[] = "port-vlan-id";
316 static char vgen_vid_propname[] = "vlan-id";
317 static char vgen_dvid_propname[] = "default-vlan-id";
318 static char port_pvid_propname[] = "remote-port-vlan-id";
319 static char port_vid_propname[] = "remote-vlan-id";
320 static char vgen_mtu_propname[] = "mtu";
321 
322 /* versions supported - in decreasing order */
323 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 4} };
324 
325 /* Tunables */
326 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
327 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
328 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
329 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
330 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
331 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
332 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
333 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
334 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
335 
336 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
337 
338 static vio_mblk_pool_t	*vgen_rx_poolp = NULL;
339 static krwlock_t	vgen_rw;
340 
341 /*
342  * max # of packets accumulated prior to sending them up. It is best
343  * to keep this at 60% of the number of recieve buffers.
344  */
345 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
346 
347 /*
348  * Internal tunables for receive buffer pools, that is,  the size and number of
349  * mblks for each pool. At least 3 sizes must be specified if these are used.
350  * The sizes must be specified in increasing order. Non-zero value of the first
351  * size will be used as a hint to use these values instead of the algorithm
352  * that determines the sizes based on MTU.
353  */
354 uint32_t vgen_rbufsz1 = 0;
355 uint32_t vgen_rbufsz2 = 0;
356 uint32_t vgen_rbufsz3 = 0;
357 uint32_t vgen_rbufsz4 = 0;
358 
359 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
360 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
361 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
362 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
363 
364 /*
365  * In the absence of "priority-ether-types" property in MD, the following
366  * internal tunable can be set to specify a single priority ethertype.
367  */
368 uint64_t vgen_pri_eth_type = 0;
369 
370 /*
371  * Number of transmit priority buffers that are preallocated per device.
372  * This number is chosen to be a small value to throttle transmission
373  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
374  */
375 uint32_t vgen_pri_tx_nmblks = 64;
376 
377 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
378 
379 #ifdef DEBUG
380 /* flags to simulate error conditions for debugging */
381 int vgen_trigger_txtimeout = 0;
382 int vgen_trigger_rxlost = 0;
383 #endif
384 
385 /*
386  * Matching criteria passed to the MDEG to register interest
387  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
388  * by their 'name' and 'cfg-handle' properties.
389  */
390 static md_prop_match_t vdev_prop_match[] = {
391 	{ MDET_PROP_STR,    "name"   },
392 	{ MDET_PROP_VAL,    "cfg-handle" },
393 	{ MDET_LIST_END,    NULL    }
394 };
395 
396 static mdeg_node_match_t vdev_match = { "virtual-device",
397 						vdev_prop_match };
398 
399 /* MD update matching structure */
400 static md_prop_match_t	vport_prop_match[] = {
401 	{ MDET_PROP_VAL,	"id" },
402 	{ MDET_LIST_END,	NULL }
403 };
404 
405 static mdeg_node_match_t vport_match = { "virtual-device-port",
406 					vport_prop_match };
407 
408 /* template for matching a particular vnet instance */
409 static mdeg_prop_spec_t vgen_prop_template[] = {
410 	{ MDET_PROP_STR,	"name",		"network" },
411 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
412 	{ MDET_LIST_END,	NULL,		NULL }
413 };
414 
415 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
416 
417 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
418 
419 static mac_callbacks_t vgen_m_callbacks = {
420 	0,
421 	vgen_stat,
422 	vgen_start,
423 	vgen_stop,
424 	vgen_promisc,
425 	vgen_multicst,
426 	vgen_unicst,
427 	vgen_tx,
428 	NULL,
429 	NULL,
430 	NULL
431 };
432 
433 /* externs */
434 extern pri_t	maxclsyspri;
435 extern proc_t	p0;
436 extern uint32_t vnet_ntxds;
437 extern uint32_t vnet_ldcwd_interval;
438 extern uint32_t vnet_ldcwd_txtimeout;
439 extern uint32_t vnet_ldc_mtu;
440 extern uint32_t vnet_nrbufs;
441 extern uint32_t	vnet_ethermtu;
442 extern uint16_t	vnet_default_vlan_id;
443 extern boolean_t vnet_jumbo_rxpools;
444 
445 #ifdef DEBUG
446 
447 extern int vnet_dbglevel;
448 static void debug_printf(const char *fname, vgen_t *vgenp,
449 	vgen_ldc_t *ldcp, const char *fmt, ...);
450 
451 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
452 int vgendbg_ldcid = -1;
453 
454 /* simulate handshake error conditions for debug */
455 uint32_t vgen_hdbg;
456 #define	HDBG_VERSION	0x1
457 #define	HDBG_TIMEOUT	0x2
458 #define	HDBG_BAD_SID	0x4
459 #define	HDBG_OUT_STATE	0x8
460 
461 #endif
462 
463 /*
464  * vgen_init() is called by an instance of vnet driver to initialize the
465  * corresponding generic proxy transport layer. The arguments passed by vnet
466  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
467  * the mac address of the vnet device, and a pointer to vgen_t is passed
468  * back as a handle to vnet.
469  */
470 int
471 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
472     const uint8_t *macaddr, void **vgenhdl)
473 {
474 	vgen_t *vgenp;
475 	int instance;
476 	int rv;
477 
478 	if ((vnetp == NULL) || (vnetdip == NULL))
479 		return (DDI_FAILURE);
480 
481 	instance = ddi_get_instance(vnetdip);
482 
483 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
484 
485 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
486 
487 	vgenp->vnetp = vnetp;
488 	vgenp->instance = instance;
489 	vgenp->regprop = regprop;
490 	vgenp->vnetdip = vnetdip;
491 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
492 
493 	/* allocate multicast table */
494 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
495 	    sizeof (struct ether_addr), KM_SLEEP);
496 	vgenp->mccount = 0;
497 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
498 
499 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
500 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
501 
502 	rv = vgen_read_mdprops(vgenp);
503 	if (rv != 0) {
504 		goto vgen_init_fail;
505 	}
506 
507 	/* register with MD event generator */
508 	rv = vgen_mdeg_reg(vgenp);
509 	if (rv != DDI_SUCCESS) {
510 		goto vgen_init_fail;
511 	}
512 
513 	*vgenhdl = (void *)vgenp;
514 
515 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
516 	return (DDI_SUCCESS);
517 
518 vgen_init_fail:
519 	rw_destroy(&vgenp->vgenports.rwlock);
520 	mutex_destroy(&vgenp->lock);
521 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
522 	    sizeof (struct ether_addr));
523 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
524 		kmem_free(vgenp->pri_types,
525 		    sizeof (uint16_t) * vgenp->pri_num_types);
526 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
527 	}
528 	KMEM_FREE(vgenp);
529 	return (DDI_FAILURE);
530 }
531 
532 /*
533  * Called by vnet to undo the initializations done by vgen_init().
534  * The handle provided by generic transport during vgen_init() is the argument.
535  */
536 int
537 vgen_uninit(void *arg)
538 {
539 	vgen_t		*vgenp = (vgen_t *)arg;
540 	vio_mblk_pool_t	*rp;
541 	vio_mblk_pool_t	*nrp;
542 
543 	if (vgenp == NULL) {
544 		return (DDI_FAILURE);
545 	}
546 
547 	DBG1(vgenp, NULL, "enter\n");
548 
549 	/* unregister with MD event generator */
550 	vgen_mdeg_unreg(vgenp);
551 
552 	mutex_enter(&vgenp->lock);
553 
554 	/* detach all ports from the device */
555 	vgen_detach_ports(vgenp);
556 
557 	/*
558 	 * free any pending rx mblk pools,
559 	 * that couldn't be freed previously during channel detach.
560 	 */
561 	rp = vgenp->rmp;
562 	while (rp != NULL) {
563 		nrp = vgenp->rmp = rp->nextp;
564 		if (vio_destroy_mblks(rp)) {
565 			WRITE_ENTER(&vgen_rw);
566 			rp->nextp = vgen_rx_poolp;
567 			vgen_rx_poolp = rp;
568 			RW_EXIT(&vgen_rw);
569 		}
570 		rp = nrp;
571 	}
572 
573 	/* free multicast table */
574 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
575 
576 	/* free pri_types table */
577 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
578 		kmem_free(vgenp->pri_types,
579 		    sizeof (uint16_t) * vgenp->pri_num_types);
580 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
581 	}
582 
583 	mutex_exit(&vgenp->lock);
584 
585 	rw_destroy(&vgenp->vgenports.rwlock);
586 	mutex_destroy(&vgenp->lock);
587 
588 	DBG1(vgenp, NULL, "exit\n");
589 	KMEM_FREE(vgenp);
590 
591 	return (DDI_SUCCESS);
592 }
593 
594 /*
595  * module specific initialization common to all instances of vnet/vgen.
596  */
597 void
598 vgen_mod_init(void)
599 {
600 	rw_init(&vgen_rw, NULL, RW_DRIVER, NULL);
601 }
602 
603 /*
604  * module specific cleanup common to all instances of vnet/vgen.
605  */
606 int
607 vgen_mod_cleanup(void)
608 {
609 	vio_mblk_pool_t	*poolp, *npoolp;
610 
611 	/*
612 	 * If any rx mblk pools are still in use, return
613 	 * error and stop the module from unloading.
614 	 */
615 	WRITE_ENTER(&vgen_rw);
616 	poolp = vgen_rx_poolp;
617 	while (poolp != NULL) {
618 		npoolp = vgen_rx_poolp = poolp->nextp;
619 		if (vio_destroy_mblks(poolp) != 0) {
620 			vgen_rx_poolp = poolp;
621 			RW_EXIT(&vgen_rw);
622 			return (EBUSY);
623 		}
624 		poolp = npoolp;
625 	}
626 	RW_EXIT(&vgen_rw);
627 
628 	return (0);
629 }
630 
631 /*
632  * module specific uninitialization common to all instances of vnet/vgen.
633  */
634 void
635 vgen_mod_fini(void)
636 {
637 	rw_destroy(&vgen_rw);
638 }
639 
640 /* enable transmit/receive for the device */
641 int
642 vgen_start(void *arg)
643 {
644 	vgen_port_t	*portp = (vgen_port_t *)arg;
645 	vgen_t		*vgenp = portp->vgenp;
646 
647 	DBG1(vgenp, NULL, "enter\n");
648 	mutex_enter(&portp->lock);
649 	vgen_port_init(portp);
650 	portp->flags |= VGEN_STARTED;
651 	mutex_exit(&portp->lock);
652 	DBG1(vgenp, NULL, "exit\n");
653 
654 	return (DDI_SUCCESS);
655 }
656 
657 /* stop transmit/receive */
658 void
659 vgen_stop(void *arg)
660 {
661 	vgen_port_t	*portp = (vgen_port_t *)arg;
662 	vgen_t		*vgenp = portp->vgenp;
663 
664 	DBG1(vgenp, NULL, "enter\n");
665 
666 	mutex_enter(&portp->lock);
667 	vgen_port_uninit(portp);
668 	portp->flags &= ~(VGEN_STARTED);
669 	mutex_exit(&portp->lock);
670 	DBG1(vgenp, NULL, "exit\n");
671 
672 }
673 
674 /* vgen transmit function */
675 static mblk_t *
676 vgen_tx(void *arg, mblk_t *mp)
677 {
678 	int i;
679 	vgen_port_t *portp;
680 	int status = VGEN_FAILURE;
681 
682 	portp = (vgen_port_t *)arg;
683 	/*
684 	 * Retry so that we avoid reporting a failure
685 	 * to the upper layer. Returning a failure may cause the
686 	 * upper layer to go into single threaded mode there by
687 	 * causing performance degradation, especially for a large
688 	 * number of connections.
689 	 */
690 	for (i = 0; i < vgen_tx_retries; ) {
691 		status = vgen_portsend(portp, mp);
692 		if (status == VGEN_SUCCESS) {
693 			break;
694 		}
695 		if (++i < vgen_tx_retries)
696 			delay(drv_usectohz(vgen_tx_delay));
697 	}
698 	if (status != VGEN_SUCCESS) {
699 		/* failure */
700 		return (mp);
701 	}
702 	/* success */
703 	return (NULL);
704 }
705 
706 /*
707  * This function provides any necessary tagging/untagging of the frames
708  * that are being transmitted over the port. It first verifies the vlan
709  * membership of the destination(port) and drops the packet if the
710  * destination doesn't belong to the given vlan.
711  *
712  * Arguments:
713  *   portp:     port over which the frames should be transmitted
714  *   mp:        frame to be transmitted
715  *   is_tagged:
716  *              B_TRUE: indicates frame header contains the vlan tag already.
717  *              B_FALSE: indicates frame is untagged.
718  *   vid:       vlan in which the frame should be transmitted.
719  *
720  * Returns:
721  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
722  *              Failure: NULL
723  */
724 static mblk_t *
725 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
726 	uint16_t vid)
727 {
728 	vgen_t				*vgenp;
729 	boolean_t			dst_tagged;
730 	int				rv;
731 
732 	vgenp = portp->vgenp;
733 
734 	/*
735 	 * If the packet is going to a vnet:
736 	 *   Check if the destination vnet is in the same vlan.
737 	 *   Check the frame header if tag or untag is needed.
738 	 *
739 	 * We do not check the above conditions if the packet is going to vsw:
740 	 *   vsw must be present implicitly in all the vlans that a vnet device
741 	 *   is configured into; even if vsw itself is not assigned to those
742 	 *   vlans as an interface. For instance, the packet might be destined
743 	 *   to another vnet(indirectly through vsw) or to an external host
744 	 *   which is in the same vlan as this vnet and vsw itself may not be
745 	 *   present in that vlan. Similarly packets going to vsw must be
746 	 *   always tagged(unless in the default-vlan) if not already tagged,
747 	 *   as we do not know the final destination. This is needed because
748 	 *   vsw must always invoke its switching function only after tagging
749 	 *   the packet; otherwise after switching function determines the
750 	 *   destination we cannot figure out if the destination belongs to the
751 	 *   the same vlan that the frame originated from and if it needs tag/
752 	 *   untag. Note that vsw will tag the packet itself when it receives
753 	 *   it over the channel from a client if needed. However, that is
754 	 *   needed only in the case of vlan unaware clients such as obp or
755 	 *   earlier versions of vnet.
756 	 *
757 	 */
758 	if (portp != vgenp->vsw_portp) {
759 		/*
760 		 * Packet going to a vnet. Check if the destination vnet is in
761 		 * the same vlan. Then check the frame header if tag/untag is
762 		 * needed.
763 		 */
764 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
765 		if (rv == B_FALSE) {
766 			/* drop the packet */
767 			freemsg(mp);
768 			return (NULL);
769 		}
770 
771 		/* is the destination tagged or untagged in this vlan? */
772 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
773 		    (dst_tagged = B_TRUE);
774 
775 		if (is_tagged == dst_tagged) {
776 			/* no tagging/untagging needed */
777 			return (mp);
778 		}
779 
780 		if (is_tagged == B_TRUE) {
781 			/* frame is tagged; destination needs untagged */
782 			mp = vnet_vlan_remove_tag(mp);
783 			return (mp);
784 		}
785 
786 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
787 	}
788 
789 	/*
790 	 * Packet going to a vnet needs tagging.
791 	 * OR
792 	 * If the packet is going to vsw, then it must be tagged in all cases:
793 	 * unknown unicast, broadcast/multicast or to vsw interface.
794 	 */
795 
796 	if (is_tagged == B_FALSE) {
797 		mp = vnet_vlan_insert_tag(mp, vid);
798 	}
799 
800 	return (mp);
801 }
802 
803 /* transmit packets over the given port */
804 static int
805 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
806 {
807 	vgen_ldclist_t		*ldclp;
808 	vgen_ldc_t		*ldcp;
809 	int			status;
810 	int			rv = VGEN_SUCCESS;
811 	vgen_t			*vgenp = portp->vgenp;
812 	vnet_t			*vnetp = vgenp->vnetp;
813 	boolean_t		is_tagged;
814 	boolean_t		dec_refcnt = B_FALSE;
815 	uint16_t		vlan_id;
816 	struct ether_header	*ehp;
817 
818 	if (portp->use_vsw_port) {
819 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
820 		portp = portp->vgenp->vsw_portp;
821 		dec_refcnt = B_TRUE;
822 	}
823 	if (portp == NULL) {
824 		return (VGEN_FAILURE);
825 	}
826 
827 	/*
828 	 * Determine the vlan id that the frame belongs to.
829 	 */
830 	ehp = (struct ether_header *)mp->b_rptr;
831 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
832 
833 	if (vlan_id == vnetp->default_vlan_id) {
834 
835 		/* Frames in default vlan must be untagged */
836 		ASSERT(is_tagged == B_FALSE);
837 
838 		/*
839 		 * If the destination is a vnet-port verify it belongs to the
840 		 * default vlan; otherwise drop the packet. We do not need
841 		 * this check for vsw-port, as it should implicitly belong to
842 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
843 		 */
844 		if (portp != vgenp->vsw_portp &&
845 		    portp->pvid != vnetp->default_vlan_id) {
846 			freemsg(mp);
847 			goto portsend_ret;
848 		}
849 
850 	} else {	/* frame not in default-vlan */
851 
852 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
853 		if (mp == NULL) {
854 			goto portsend_ret;
855 		}
856 
857 	}
858 
859 	ldclp = &portp->ldclist;
860 	READ_ENTER(&ldclp->rwlock);
861 	/*
862 	 * NOTE: for now, we will assume we have a single channel.
863 	 */
864 	if (ldclp->headp == NULL) {
865 		RW_EXIT(&ldclp->rwlock);
866 		rv = VGEN_FAILURE;
867 		goto portsend_ret;
868 	}
869 	ldcp = ldclp->headp;
870 
871 	status = ldcp->tx(ldcp, mp);
872 
873 	RW_EXIT(&ldclp->rwlock);
874 
875 	if (status != VGEN_TX_SUCCESS) {
876 		rv = VGEN_FAILURE;
877 	}
878 
879 portsend_ret:
880 	if (dec_refcnt == B_TRUE) {
881 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
882 	}
883 	return (rv);
884 }
885 
886 /*
887  * Wrapper function to transmit normal and/or priority frames over the channel.
888  */
889 static int
890 vgen_ldcsend(void *arg, mblk_t *mp)
891 {
892 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
893 	int			status;
894 	struct ether_header	*ehp;
895 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
896 	uint32_t		num_types;
897 	uint16_t		*types;
898 	int			i;
899 
900 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
901 
902 	num_types = vgenp->pri_num_types;
903 	types = vgenp->pri_types;
904 	ehp = (struct ether_header *)mp->b_rptr;
905 
906 	for (i = 0; i < num_types; i++) {
907 
908 		if (ehp->ether_type == types[i]) {
909 			/* priority frame, use pri tx function */
910 			vgen_ldcsend_pkt(ldcp, mp);
911 			return (VGEN_SUCCESS);
912 		}
913 
914 	}
915 
916 	status  = vgen_ldcsend_dring(ldcp, mp);
917 
918 	return (status);
919 }
920 
921 /*
922  * This functions handles ldc channel reset while in the context
923  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
924  */
925 static void
926 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
927 {
928 	ldc_status_t	istatus;
929 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
930 
931 	if (mutex_tryenter(&ldcp->cblock)) {
932 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
933 			DWARN(vgenp, ldcp, "ldc_status() error\n");
934 		} else {
935 			ldcp->ldc_status = istatus;
936 		}
937 		if (ldcp->ldc_status != LDC_UP) {
938 			vgen_handle_evt_reset(ldcp);
939 		}
940 		mutex_exit(&ldcp->cblock);
941 	}
942 }
943 
944 /*
945  * This function transmits the frame in the payload of a raw data
946  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
947  * send special frames with high priorities, without going through
948  * the normal data path which uses descriptor ring mechanism.
949  */
950 static void
951 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
952 {
953 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
954 	vio_raw_data_msg_t	*pkt;
955 	mblk_t			*bp;
956 	mblk_t			*nmp = NULL;
957 	caddr_t			dst;
958 	uint32_t		mblksz;
959 	uint32_t		size;
960 	uint32_t		nbytes;
961 	int			rv;
962 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
963 	vgen_stats_t		*statsp = &ldcp->stats;
964 
965 	/* drop the packet if ldc is not up or handshake is not done */
966 	if (ldcp->ldc_status != LDC_UP) {
967 		(void) atomic_inc_32(&statsp->tx_pri_fail);
968 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
969 		    ldcp->ldc_status);
970 		goto send_pkt_exit;
971 	}
972 
973 	if (ldcp->hphase != VH_DONE) {
974 		(void) atomic_inc_32(&statsp->tx_pri_fail);
975 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
976 		    ldcp->hphase);
977 		goto send_pkt_exit;
978 	}
979 
980 	size = msgsize(mp);
981 
982 	/* frame size bigger than available payload len of raw data msg ? */
983 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
984 		(void) atomic_inc_32(&statsp->tx_pri_fail);
985 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
986 		goto send_pkt_exit;
987 	}
988 
989 	if (size < ETHERMIN)
990 		size = ETHERMIN;
991 
992 	/* alloc space for a raw data message */
993 	nmp = vio_allocb(vgenp->pri_tx_vmp);
994 	if (nmp == NULL) {
995 		(void) atomic_inc_32(&statsp->tx_pri_fail);
996 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
997 		goto send_pkt_exit;
998 	}
999 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
1000 
1001 	/* copy frame into the payload of raw data message */
1002 	dst = (caddr_t)pkt->data;
1003 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1004 		mblksz = MBLKL(bp);
1005 		bcopy(bp->b_rptr, dst, mblksz);
1006 		dst += mblksz;
1007 	}
1008 
1009 	/* setup the raw data msg */
1010 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
1011 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
1012 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
1013 	pkt->tag.vio_sid = ldcp->local_sid;
1014 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
1015 
1016 	/* send the msg over ldc */
1017 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
1018 	if (rv != VGEN_SUCCESS) {
1019 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1020 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
1021 		if (rv == ECONNRESET) {
1022 			vgen_ldcsend_process_reset(ldcp);
1023 		}
1024 		goto send_pkt_exit;
1025 	}
1026 
1027 	/* update stats */
1028 	(void) atomic_inc_64(&statsp->tx_pri_packets);
1029 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
1030 
1031 send_pkt_exit:
1032 	if (nmp != NULL)
1033 		freemsg(nmp);
1034 	freemsg(mp);
1035 }
1036 
1037 /*
1038  * This function transmits normal (non-priority) data frames over
1039  * the channel. It queues the frame into the transmit descriptor ring
1040  * and sends a VIO_DRING_DATA message if needed, to wake up the
1041  * peer to (re)start processing.
1042  */
1043 static int
1044 vgen_ldcsend_dring(void *arg, mblk_t *mp)
1045 {
1046 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
1047 	vgen_private_desc_t	*tbufp;
1048 	vgen_private_desc_t	*rtbufp;
1049 	vnet_public_desc_t	*rtxdp;
1050 	vgen_private_desc_t	*ntbufp;
1051 	vnet_public_desc_t	*txdp;
1052 	vio_dring_entry_hdr_t	*hdrp;
1053 	vgen_stats_t		*statsp;
1054 	struct ether_header	*ehp;
1055 	boolean_t		is_bcast = B_FALSE;
1056 	boolean_t		is_mcast = B_FALSE;
1057 	size_t			mblksz;
1058 	caddr_t			dst;
1059 	mblk_t			*bp;
1060 	size_t			size;
1061 	int			rv = 0;
1062 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1063 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1064 
1065 	statsp = &ldcp->stats;
1066 	size = msgsize(mp);
1067 
1068 	DBG1(vgenp, ldcp, "enter\n");
1069 
1070 	if (ldcp->ldc_status != LDC_UP) {
1071 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1072 		    ldcp->ldc_status);
1073 		/* retry ldc_up() if needed */
1074 		if (ldcp->flags & CHANNEL_STARTED)
1075 			(void) ldc_up(ldcp->ldc_handle);
1076 		goto send_dring_exit;
1077 	}
1078 
1079 	/* drop the packet if ldc is not up or handshake is not done */
1080 	if (ldcp->hphase != VH_DONE) {
1081 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1082 		    ldcp->hphase);
1083 		goto send_dring_exit;
1084 	}
1085 
1086 	if (size > (size_t)lp->mtu) {
1087 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1088 		goto send_dring_exit;
1089 	}
1090 	if (size < ETHERMIN)
1091 		size = ETHERMIN;
1092 
1093 	ehp = (struct ether_header *)mp->b_rptr;
1094 	is_bcast = IS_BROADCAST(ehp);
1095 	is_mcast = IS_MULTICAST(ehp);
1096 
1097 	mutex_enter(&ldcp->txlock);
1098 	/*
1099 	 * allocate a descriptor
1100 	 */
1101 	tbufp = ldcp->next_tbufp;
1102 	ntbufp = NEXTTBUF(ldcp, tbufp);
1103 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1104 
1105 		mutex_enter(&ldcp->tclock);
1106 		/* Try reclaiming now */
1107 		vgen_reclaim_dring(ldcp);
1108 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1109 
1110 		if (ntbufp == ldcp->cur_tbufp) {
1111 			/* Now we are really out of tbuf/txds */
1112 			ldcp->need_resched = B_TRUE;
1113 			mutex_exit(&ldcp->tclock);
1114 
1115 			statsp->tx_no_desc++;
1116 			mutex_exit(&ldcp->txlock);
1117 
1118 			return (VGEN_TX_NORESOURCES);
1119 		}
1120 		mutex_exit(&ldcp->tclock);
1121 	}
1122 	/* update next available tbuf in the ring and update tx index */
1123 	ldcp->next_tbufp = ntbufp;
1124 	INCR_TXI(ldcp->next_txi, ldcp);
1125 
1126 	/* Mark the buffer busy before releasing the lock */
1127 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1128 	mutex_exit(&ldcp->txlock);
1129 
1130 	/* copy data into pre-allocated transmit buffer */
1131 	dst = tbufp->datap + VNET_IPALIGN;
1132 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1133 		mblksz = MBLKL(bp);
1134 		bcopy(bp->b_rptr, dst, mblksz);
1135 		dst += mblksz;
1136 	}
1137 
1138 	tbufp->datalen = size;
1139 
1140 	/* initialize the corresponding public descriptor (txd) */
1141 	txdp = tbufp->descp;
1142 	hdrp = &txdp->hdr;
1143 	txdp->nbytes = size;
1144 	txdp->ncookies = tbufp->ncookies;
1145 	bcopy((tbufp->memcookie), (txdp->memcookie),
1146 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1147 
1148 	mutex_enter(&ldcp->wrlock);
1149 	/*
1150 	 * If the flags not set to BUSY, it implies that the clobber
1151 	 * was done while we were copying the data. In such case,
1152 	 * discard the packet and return.
1153 	 */
1154 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1155 		statsp->oerrors++;
1156 		mutex_exit(&ldcp->wrlock);
1157 		goto send_dring_exit;
1158 	}
1159 	hdrp->dstate = VIO_DESC_READY;
1160 
1161 	/* update stats */
1162 	statsp->opackets++;
1163 	statsp->obytes += size;
1164 	if (is_bcast)
1165 		statsp->brdcstxmt++;
1166 	else if (is_mcast)
1167 		statsp->multixmt++;
1168 
1169 	/* send dring datamsg to the peer */
1170 	if (ldcp->resched_peer) {
1171 
1172 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1173 		rtxdp = rtbufp->descp;
1174 
1175 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1176 
1177 			rv = vgen_send_dring_data(ldcp,
1178 			    (uint32_t)ldcp->resched_peer_txi, -1);
1179 			if (rv != 0) {
1180 				/* error: drop the packet */
1181 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1182 				    "failed: rv(%d) len(%d)\n",
1183 				    ldcp->ldc_id, rv, size);
1184 				statsp->oerrors++;
1185 			} else {
1186 				ldcp->resched_peer = B_FALSE;
1187 			}
1188 
1189 		}
1190 
1191 	}
1192 
1193 	mutex_exit(&ldcp->wrlock);
1194 
1195 send_dring_exit:
1196 	if (rv == ECONNRESET) {
1197 		vgen_ldcsend_process_reset(ldcp);
1198 	}
1199 	freemsg(mp);
1200 	DBG1(vgenp, ldcp, "exit\n");
1201 	return (VGEN_TX_SUCCESS);
1202 }
1203 
1204 /* enable/disable a multicast address */
1205 int
1206 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1207 {
1208 	vgen_t			*vgenp;
1209 	vnet_mcast_msg_t	mcastmsg;
1210 	vio_msg_tag_t		*tagp;
1211 	vgen_port_t		*portp;
1212 	vgen_portlist_t		*plistp;
1213 	vgen_ldc_t		*ldcp;
1214 	vgen_ldclist_t		*ldclp;
1215 	struct ether_addr	*addrp;
1216 	int			rv = DDI_FAILURE;
1217 	uint32_t		i;
1218 
1219 	portp = (vgen_port_t *)arg;
1220 	vgenp = portp->vgenp;
1221 
1222 	if (portp != vgenp->vsw_portp) {
1223 		return (DDI_SUCCESS);
1224 	}
1225 
1226 	addrp = (struct ether_addr *)mca;
1227 	tagp = &mcastmsg.tag;
1228 	bzero(&mcastmsg, sizeof (mcastmsg));
1229 
1230 	mutex_enter(&vgenp->lock);
1231 
1232 	plistp = &(vgenp->vgenports);
1233 
1234 	READ_ENTER(&plistp->rwlock);
1235 
1236 	portp = vgenp->vsw_portp;
1237 	if (portp == NULL) {
1238 		RW_EXIT(&plistp->rwlock);
1239 		mutex_exit(&vgenp->lock);
1240 		return (rv);
1241 	}
1242 	ldclp = &portp->ldclist;
1243 
1244 	READ_ENTER(&ldclp->rwlock);
1245 
1246 	ldcp = ldclp->headp;
1247 	if (ldcp == NULL)
1248 		goto vgen_mcast_exit;
1249 
1250 	mutex_enter(&ldcp->cblock);
1251 
1252 	if (ldcp->hphase == VH_DONE) {
1253 		/*
1254 		 * If handshake is done, send a msg to vsw to add/remove
1255 		 * the multicast address. Otherwise, we just update this
1256 		 * mcast address in our table and the table will be sync'd
1257 		 * with vsw when handshake completes.
1258 		 */
1259 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1260 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1261 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1262 		tagp->vio_sid = ldcp->local_sid;
1263 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1264 		mcastmsg.set = add;
1265 		mcastmsg.count = 1;
1266 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1267 		    B_FALSE) != VGEN_SUCCESS) {
1268 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1269 			mutex_exit(&ldcp->cblock);
1270 			goto vgen_mcast_exit;
1271 		}
1272 	}
1273 
1274 	mutex_exit(&ldcp->cblock);
1275 
1276 	if (add) {
1277 
1278 		/* expand multicast table if necessary */
1279 		if (vgenp->mccount >= vgenp->mcsize) {
1280 			struct ether_addr	*newtab;
1281 			uint32_t		newsize;
1282 
1283 
1284 			newsize = vgenp->mcsize * 2;
1285 
1286 			newtab = kmem_zalloc(newsize *
1287 			    sizeof (struct ether_addr), KM_NOSLEEP);
1288 			if (newtab == NULL)
1289 				goto vgen_mcast_exit;
1290 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1291 			    sizeof (struct ether_addr));
1292 			kmem_free(vgenp->mctab,
1293 			    vgenp->mcsize * sizeof (struct ether_addr));
1294 
1295 			vgenp->mctab = newtab;
1296 			vgenp->mcsize = newsize;
1297 		}
1298 
1299 		/* add address to the table */
1300 		vgenp->mctab[vgenp->mccount++] = *addrp;
1301 
1302 	} else {
1303 
1304 		/* delete address from the table */
1305 		for (i = 0; i < vgenp->mccount; i++) {
1306 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1307 
1308 				/*
1309 				 * If there's more than one address in this
1310 				 * table, delete the unwanted one by moving
1311 				 * the last one in the list over top of it;
1312 				 * otherwise, just remove it.
1313 				 */
1314 				if (vgenp->mccount > 1) {
1315 					vgenp->mctab[i] =
1316 					    vgenp->mctab[vgenp->mccount-1];
1317 				}
1318 				vgenp->mccount--;
1319 				break;
1320 			}
1321 		}
1322 	}
1323 
1324 	rv = DDI_SUCCESS;
1325 
1326 vgen_mcast_exit:
1327 	RW_EXIT(&ldclp->rwlock);
1328 	RW_EXIT(&plistp->rwlock);
1329 
1330 	mutex_exit(&vgenp->lock);
1331 	return (rv);
1332 }
1333 
1334 /* set or clear promiscuous mode on the device */
1335 static int
1336 vgen_promisc(void *arg, boolean_t on)
1337 {
1338 	_NOTE(ARGUNUSED(arg, on))
1339 	return (DDI_SUCCESS);
1340 }
1341 
1342 /* set the unicast mac address of the device */
1343 static int
1344 vgen_unicst(void *arg, const uint8_t *mca)
1345 {
1346 	_NOTE(ARGUNUSED(arg, mca))
1347 	return (DDI_SUCCESS);
1348 }
1349 
1350 /* get device statistics */
1351 int
1352 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1353 {
1354 	vgen_port_t	*portp = (vgen_port_t *)arg;
1355 
1356 	*val = vgen_port_stat(portp, stat);
1357 
1358 	return (0);
1359 }
1360 
1361 static void
1362 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1363 {
1364 	 _NOTE(ARGUNUSED(arg, wq, mp))
1365 }
1366 
1367 /* vgen internal functions */
1368 /* detach all ports from the device */
1369 static void
1370 vgen_detach_ports(vgen_t *vgenp)
1371 {
1372 	vgen_port_t	*portp;
1373 	vgen_portlist_t	*plistp;
1374 
1375 	plistp = &(vgenp->vgenports);
1376 	WRITE_ENTER(&plistp->rwlock);
1377 	while ((portp = plistp->headp) != NULL) {
1378 		vgen_port_detach(portp);
1379 	}
1380 	RW_EXIT(&plistp->rwlock);
1381 }
1382 
1383 /*
1384  * detach the given port.
1385  */
1386 static void
1387 vgen_port_detach(vgen_port_t *portp)
1388 {
1389 	vgen_t		*vgenp;
1390 	vgen_ldclist_t	*ldclp;
1391 	int		port_num;
1392 
1393 	vgenp = portp->vgenp;
1394 	port_num = portp->port_num;
1395 
1396 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1397 
1398 	/*
1399 	 * If this port is connected to the vswitch, then
1400 	 * potentially there could be ports that may be using
1401 	 * this port to transmit packets. To address this do
1402 	 * the following:
1403 	 *	- First set vgenp->vsw_portp to NULL, so that
1404 	 *	  its not used after that.
1405 	 *	- Then wait for the refcnt to go down to 0.
1406 	 *	- Now we can safely detach this port.
1407 	 */
1408 	if (vgenp->vsw_portp == portp) {
1409 		vgenp->vsw_portp = NULL;
1410 		while (vgenp->vsw_port_refcnt > 0) {
1411 			delay(drv_usectohz(vgen_tx_delay));
1412 		}
1413 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1414 	}
1415 
1416 	if (portp->vhp != NULL) {
1417 		vio_net_resource_unreg(portp->vhp);
1418 		portp->vhp = NULL;
1419 	}
1420 
1421 	vgen_vlan_destroy_hash(portp);
1422 
1423 	/* remove it from port list */
1424 	vgen_port_list_remove(portp);
1425 
1426 	/* detach channels from this port */
1427 	ldclp = &portp->ldclist;
1428 	WRITE_ENTER(&ldclp->rwlock);
1429 	while (ldclp->headp) {
1430 		vgen_ldc_detach(ldclp->headp);
1431 	}
1432 	RW_EXIT(&ldclp->rwlock);
1433 	rw_destroy(&ldclp->rwlock);
1434 
1435 	if (portp->num_ldcs != 0) {
1436 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1437 		portp->num_ldcs = 0;
1438 	}
1439 
1440 	mutex_destroy(&portp->lock);
1441 	KMEM_FREE(portp);
1442 
1443 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1444 }
1445 
1446 /* add a port to port list */
1447 static void
1448 vgen_port_list_insert(vgen_port_t *portp)
1449 {
1450 	vgen_portlist_t *plistp;
1451 	vgen_t *vgenp;
1452 
1453 	vgenp = portp->vgenp;
1454 	plistp = &(vgenp->vgenports);
1455 
1456 	if (plistp->headp == NULL) {
1457 		plistp->headp = portp;
1458 	} else {
1459 		plistp->tailp->nextp = portp;
1460 	}
1461 	plistp->tailp = portp;
1462 	portp->nextp = NULL;
1463 }
1464 
1465 /* remove a port from port list */
1466 static void
1467 vgen_port_list_remove(vgen_port_t *portp)
1468 {
1469 	vgen_port_t *prevp;
1470 	vgen_port_t *nextp;
1471 	vgen_portlist_t *plistp;
1472 	vgen_t *vgenp;
1473 
1474 	vgenp = portp->vgenp;
1475 
1476 	plistp = &(vgenp->vgenports);
1477 
1478 	if (plistp->headp == NULL)
1479 		return;
1480 
1481 	if (portp == plistp->headp) {
1482 		plistp->headp = portp->nextp;
1483 		if (portp == plistp->tailp)
1484 			plistp->tailp = plistp->headp;
1485 	} else {
1486 		for (prevp = plistp->headp;
1487 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1488 		    prevp = nextp)
1489 			;
1490 		if (nextp == portp) {
1491 			prevp->nextp = portp->nextp;
1492 		}
1493 		if (portp == plistp->tailp)
1494 			plistp->tailp = prevp;
1495 	}
1496 }
1497 
1498 /* lookup a port in the list based on port_num */
1499 static vgen_port_t *
1500 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1501 {
1502 	vgen_port_t *portp = NULL;
1503 
1504 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1505 		if (portp->port_num == port_num) {
1506 			break;
1507 		}
1508 	}
1509 
1510 	return (portp);
1511 }
1512 
1513 /* enable ports for transmit/receive */
1514 static void
1515 vgen_init_ports(vgen_t *vgenp)
1516 {
1517 	vgen_port_t	*portp;
1518 	vgen_portlist_t	*plistp;
1519 
1520 	plistp = &(vgenp->vgenports);
1521 	READ_ENTER(&plistp->rwlock);
1522 
1523 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1524 		vgen_port_init(portp);
1525 	}
1526 
1527 	RW_EXIT(&plistp->rwlock);
1528 }
1529 
1530 static void
1531 vgen_port_init(vgen_port_t *portp)
1532 {
1533 	/* Add the port to the specified vlans */
1534 	vgen_vlan_add_ids(portp);
1535 
1536 	/* Bring up the channels of this port */
1537 	vgen_init_ldcs(portp);
1538 }
1539 
1540 /* disable transmit/receive on ports */
1541 static void
1542 vgen_uninit_ports(vgen_t *vgenp)
1543 {
1544 	vgen_port_t	*portp;
1545 	vgen_portlist_t	*plistp;
1546 
1547 	plistp = &(vgenp->vgenports);
1548 	READ_ENTER(&plistp->rwlock);
1549 
1550 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1551 		vgen_port_uninit(portp);
1552 	}
1553 
1554 	RW_EXIT(&plistp->rwlock);
1555 }
1556 
1557 static void
1558 vgen_port_uninit(vgen_port_t *portp)
1559 {
1560 	vgen_uninit_ldcs(portp);
1561 
1562 	/* remove the port from vlans it has been assigned to */
1563 	vgen_vlan_remove_ids(portp);
1564 }
1565 
1566 /*
1567  * Scan the machine description for this instance of vnet
1568  * and read its properties. Called only from vgen_init().
1569  * Returns: 0 on success, 1 on failure.
1570  */
1571 static int
1572 vgen_read_mdprops(vgen_t *vgenp)
1573 {
1574 	vnet_t		*vnetp = vgenp->vnetp;
1575 	md_t		*mdp = NULL;
1576 	mde_cookie_t	rootnode;
1577 	mde_cookie_t	*listp = NULL;
1578 	uint64_t	cfgh;
1579 	char		*name;
1580 	int		rv = 1;
1581 	int		num_nodes = 0;
1582 	int		num_devs = 0;
1583 	int		listsz = 0;
1584 	int		i;
1585 
1586 	if ((mdp = md_get_handle()) == NULL) {
1587 		return (rv);
1588 	}
1589 
1590 	num_nodes = md_node_count(mdp);
1591 	ASSERT(num_nodes > 0);
1592 
1593 	listsz = num_nodes * sizeof (mde_cookie_t);
1594 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1595 
1596 	rootnode = md_root_node(mdp);
1597 
1598 	/* search for all "virtual_device" nodes */
1599 	num_devs = md_scan_dag(mdp, rootnode,
1600 	    md_find_name(mdp, vdev_propname),
1601 	    md_find_name(mdp, "fwd"), listp);
1602 	if (num_devs <= 0) {
1603 		goto vgen_readmd_exit;
1604 	}
1605 
1606 	/*
1607 	 * Now loop through the list of virtual-devices looking for
1608 	 * devices with name "network" and for each such device compare
1609 	 * its instance with what we have from the 'reg' property to
1610 	 * find the right node in MD and then read all its properties.
1611 	 */
1612 	for (i = 0; i < num_devs; i++) {
1613 
1614 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1615 			goto vgen_readmd_exit;
1616 		}
1617 
1618 		/* is this a "network" device? */
1619 		if (strcmp(name, vnet_propname) != 0)
1620 			continue;
1621 
1622 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1623 			goto vgen_readmd_exit;
1624 		}
1625 
1626 		/* is this the required instance of vnet? */
1627 		if (vgenp->regprop != cfgh)
1628 			continue;
1629 
1630 		/*
1631 		 * Read the mtu. Note that we set the mtu of vnet device within
1632 		 * this routine itself, after validating the range.
1633 		 */
1634 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1635 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1636 			vnetp->mtu = ETHERMTU;
1637 		}
1638 		vgenp->max_frame_size = vnetp->mtu +
1639 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1640 
1641 		/* read priority ether types */
1642 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1643 
1644 		/* read vlan id properties of this vnet instance */
1645 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1646 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1647 		    &vnetp->default_vlan_id);
1648 
1649 		rv = 0;
1650 		break;
1651 	}
1652 
1653 vgen_readmd_exit:
1654 
1655 	kmem_free(listp, listsz);
1656 	(void) md_fini_handle(mdp);
1657 	return (rv);
1658 }
1659 
1660 /*
1661  * Read vlan id properties of the given MD node.
1662  * Arguments:
1663  *   arg:          device argument(vnet device or a port)
1664  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1665  *   mdp:          machine description
1666  *   node:         md node cookie
1667  *
1668  * Returns:
1669  *   pvidp:        port-vlan-id of the node
1670  *   vidspp:       list of vlan-ids of the node
1671  *   nvidsp:       # of vlan-ids in the list
1672  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1673  */
1674 static void
1675 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1676 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1677 	uint16_t *default_idp)
1678 {
1679 	vgen_t		*vgenp;
1680 	vnet_t		*vnetp;
1681 	vgen_port_t	*portp;
1682 	char		*pvid_propname;
1683 	char		*vid_propname;
1684 	uint_t		nvids;
1685 	uint32_t	vids_size;
1686 	int		rv;
1687 	int		i;
1688 	uint64_t	*data;
1689 	uint64_t	val;
1690 	int		size;
1691 	int		inst;
1692 
1693 	if (type == VGEN_LOCAL) {
1694 
1695 		vgenp = (vgen_t *)arg;
1696 		vnetp = vgenp->vnetp;
1697 		pvid_propname = vgen_pvid_propname;
1698 		vid_propname = vgen_vid_propname;
1699 		inst = vnetp->instance;
1700 
1701 	} else if (type == VGEN_PEER) {
1702 
1703 		portp = (vgen_port_t *)arg;
1704 		vgenp = portp->vgenp;
1705 		vnetp = vgenp->vnetp;
1706 		pvid_propname = port_pvid_propname;
1707 		vid_propname = port_vid_propname;
1708 		inst = portp->port_num;
1709 
1710 	} else {
1711 		return;
1712 	}
1713 
1714 	if (type == VGEN_LOCAL && default_idp != NULL) {
1715 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1716 		if (rv != 0) {
1717 			DWARN(vgenp, NULL, "prop(%s) not found",
1718 			    vgen_dvid_propname);
1719 
1720 			*default_idp = vnet_default_vlan_id;
1721 		} else {
1722 			*default_idp = val & 0xFFF;
1723 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1724 			    inst, *default_idp);
1725 		}
1726 	}
1727 
1728 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1729 	if (rv != 0) {
1730 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1731 		*pvidp = vnet_default_vlan_id;
1732 	} else {
1733 
1734 		*pvidp = val & 0xFFF;
1735 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1736 		    pvid_propname, inst, *pvidp);
1737 	}
1738 
1739 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1740 	    &size);
1741 	if (rv != 0) {
1742 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1743 		size = 0;
1744 	} else {
1745 		size /= sizeof (uint64_t);
1746 	}
1747 	nvids = size;
1748 
1749 	if (nvids != 0) {
1750 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1751 		vids_size = sizeof (uint16_t) * nvids;
1752 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1753 		for (i = 0; i < nvids; i++) {
1754 			(*vidspp)[i] = data[i] & 0xFFFF;
1755 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1756 		}
1757 		DBG2(vgenp, NULL, "\n");
1758 	}
1759 
1760 	*nvidsp = nvids;
1761 }
1762 
1763 /*
1764  * Create a vlan id hash table for the given port.
1765  */
1766 static void
1767 vgen_vlan_create_hash(vgen_port_t *portp)
1768 {
1769 	char		hashname[MAXNAMELEN];
1770 
1771 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1772 	    portp->port_num);
1773 
1774 	portp->vlan_nchains = vgen_vlan_nchains;
1775 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1776 	    portp->vlan_nchains, mod_hash_null_valdtor);
1777 }
1778 
1779 /*
1780  * Destroy the vlan id hash table in the given port.
1781  */
1782 static void
1783 vgen_vlan_destroy_hash(vgen_port_t *portp)
1784 {
1785 	if (portp->vlan_hashp != NULL) {
1786 		mod_hash_destroy_hash(portp->vlan_hashp);
1787 		portp->vlan_hashp = NULL;
1788 		portp->vlan_nchains = 0;
1789 	}
1790 }
1791 
1792 /*
1793  * Add a port to the vlans specified in its port properites.
1794  */
1795 static void
1796 vgen_vlan_add_ids(vgen_port_t *portp)
1797 {
1798 	int		rv;
1799 	int		i;
1800 
1801 	rv = mod_hash_insert(portp->vlan_hashp,
1802 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1803 	    (mod_hash_val_t)B_TRUE);
1804 	ASSERT(rv == 0);
1805 
1806 	for (i = 0; i < portp->nvids; i++) {
1807 		rv = mod_hash_insert(portp->vlan_hashp,
1808 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1809 		    (mod_hash_val_t)B_TRUE);
1810 		ASSERT(rv == 0);
1811 	}
1812 }
1813 
1814 /*
1815  * Remove a port from the vlans it has been assigned to.
1816  */
1817 static void
1818 vgen_vlan_remove_ids(vgen_port_t *portp)
1819 {
1820 	int		rv;
1821 	int		i;
1822 	mod_hash_val_t	vp;
1823 
1824 	rv = mod_hash_remove(portp->vlan_hashp,
1825 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1826 	    (mod_hash_val_t *)&vp);
1827 	ASSERT(rv == 0);
1828 
1829 	for (i = 0; i < portp->nvids; i++) {
1830 		rv = mod_hash_remove(portp->vlan_hashp,
1831 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1832 		    (mod_hash_val_t *)&vp);
1833 		ASSERT(rv == 0);
1834 	}
1835 }
1836 
1837 /*
1838  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1839  * then the vlan-id is available in the tag; otherwise, its vlan id is
1840  * implicitly obtained from the port-vlan-id of the vnet device.
1841  * The vlan id determined is returned in vidp.
1842  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1843  */
1844 static boolean_t
1845 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1846 {
1847 	struct ether_vlan_header	*evhp;
1848 
1849 	/* If it's a tagged frame, get the vlan id from vlan header */
1850 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1851 
1852 		evhp = (struct ether_vlan_header *)ehp;
1853 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1854 		return (B_TRUE);
1855 	}
1856 
1857 	/* Untagged frame, vlan-id is the pvid of vnet device */
1858 	*vidp = vnetp->pvid;
1859 	return (B_FALSE);
1860 }
1861 
1862 /*
1863  * Find the given vlan id in the hash table.
1864  * Return: B_TRUE if the id is found; B_FALSE if not found.
1865  */
1866 static boolean_t
1867 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1868 {
1869 	int		rv;
1870 	mod_hash_val_t	vp;
1871 
1872 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1873 
1874 	if (rv != 0)
1875 		return (B_FALSE);
1876 
1877 	return (B_TRUE);
1878 }
1879 
1880 /*
1881  * This function reads "priority-ether-types" property from md. This property
1882  * is used to enable support for priority frames. Applications which need
1883  * guaranteed and timely delivery of certain high priority frames to/from
1884  * a vnet or vsw within ldoms, should configure this property by providing
1885  * the ether type(s) for which the priority facility is needed.
1886  * Normal data frames are delivered over a ldc channel using the descriptor
1887  * ring mechanism which is constrained by factors such as descriptor ring size,
1888  * the rate at which the ring is processed at the peer ldc end point, etc.
1889  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1890  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1891  * descriptor ring path and enables a more reliable and timely delivery of
1892  * frames to the peer.
1893  */
1894 static void
1895 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1896 {
1897 	int		rv;
1898 	uint16_t	*types;
1899 	uint64_t	*data;
1900 	int		size;
1901 	int		i;
1902 	size_t		mblk_sz;
1903 
1904 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1905 	    (uint8_t **)&data, &size);
1906 	if (rv != 0) {
1907 		/*
1908 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1909 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1910 		 */
1911 		if (vgen_pri_eth_type != 0) {
1912 			size = sizeof (vgen_pri_eth_type);
1913 			data = &vgen_pri_eth_type;
1914 		} else {
1915 			DBG2(vgenp, NULL,
1916 			    "prop(%s) not found", pri_types_propname);
1917 			size = 0;
1918 		}
1919 	}
1920 
1921 	if (size == 0) {
1922 		vgenp->pri_num_types = 0;
1923 		return;
1924 	}
1925 
1926 	/*
1927 	 * we have some priority-ether-types defined;
1928 	 * allocate a table of these types and also
1929 	 * allocate a pool of mblks to transmit these
1930 	 * priority packets.
1931 	 */
1932 	size /= sizeof (uint64_t);
1933 	vgenp->pri_num_types = size;
1934 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1935 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1936 		types[i] = data[i] & 0xFFFF;
1937 	}
1938 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1939 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1940 	    &vgenp->pri_tx_vmp);
1941 }
1942 
1943 static void
1944 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1945 {
1946 	int		rv;
1947 	uint64_t	val;
1948 	char		*mtu_propname;
1949 
1950 	mtu_propname = vgen_mtu_propname;
1951 
1952 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1953 	if (rv != 0) {
1954 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1955 		*mtu = vnet_ethermtu;
1956 	} else {
1957 
1958 		*mtu = val & 0xFFFF;
1959 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1960 		    vgenp->instance, *mtu);
1961 	}
1962 }
1963 
1964 /* register with MD event generator */
1965 static int
1966 vgen_mdeg_reg(vgen_t *vgenp)
1967 {
1968 	mdeg_prop_spec_t	*pspecp;
1969 	mdeg_node_spec_t	*parentp;
1970 	uint_t			templatesz;
1971 	int			rv;
1972 	mdeg_handle_t		dev_hdl = NULL;
1973 	mdeg_handle_t		port_hdl = NULL;
1974 
1975 	templatesz = sizeof (vgen_prop_template);
1976 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1977 	if (pspecp == NULL) {
1978 		return (DDI_FAILURE);
1979 	}
1980 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1981 	if (parentp == NULL) {
1982 		kmem_free(pspecp, templatesz);
1983 		return (DDI_FAILURE);
1984 	}
1985 
1986 	bcopy(vgen_prop_template, pspecp, templatesz);
1987 
1988 	/*
1989 	 * NOTE: The instance here refers to the value of "reg" property and
1990 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1991 	 */
1992 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1993 
1994 	parentp->namep = "virtual-device";
1995 	parentp->specp = pspecp;
1996 
1997 	/* save parentp in vgen_t */
1998 	vgenp->mdeg_parentp = parentp;
1999 
2000 	/*
2001 	 * Register an interest in 'virtual-device' nodes with a
2002 	 * 'name' property of 'network'
2003 	 */
2004 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
2005 	if (rv != MDEG_SUCCESS) {
2006 		DERR(vgenp, NULL, "mdeg_register failed\n");
2007 		goto mdeg_reg_fail;
2008 	}
2009 
2010 	/* Register an interest in 'port' nodes */
2011 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
2012 	    &port_hdl);
2013 	if (rv != MDEG_SUCCESS) {
2014 		DERR(vgenp, NULL, "mdeg_register failed\n");
2015 		goto mdeg_reg_fail;
2016 	}
2017 
2018 	/* save mdeg handle in vgen_t */
2019 	vgenp->mdeg_dev_hdl = dev_hdl;
2020 	vgenp->mdeg_port_hdl = port_hdl;
2021 
2022 	return (DDI_SUCCESS);
2023 
2024 mdeg_reg_fail:
2025 	if (dev_hdl != NULL) {
2026 		(void) mdeg_unregister(dev_hdl);
2027 	}
2028 	KMEM_FREE(parentp);
2029 	kmem_free(pspecp, templatesz);
2030 	vgenp->mdeg_parentp = NULL;
2031 	return (DDI_FAILURE);
2032 }
2033 
2034 /* unregister with MD event generator */
2035 static void
2036 vgen_mdeg_unreg(vgen_t *vgenp)
2037 {
2038 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
2039 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
2040 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
2041 	KMEM_FREE(vgenp->mdeg_parentp);
2042 	vgenp->mdeg_parentp = NULL;
2043 	vgenp->mdeg_dev_hdl = NULL;
2044 	vgenp->mdeg_port_hdl = NULL;
2045 }
2046 
2047 /* mdeg callback function for the port node */
2048 static int
2049 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
2050 {
2051 	int idx;
2052 	int vsw_idx = -1;
2053 	uint64_t val;
2054 	vgen_t *vgenp;
2055 
2056 	if ((resp == NULL) || (cb_argp == NULL)) {
2057 		return (MDEG_FAILURE);
2058 	}
2059 
2060 	vgenp = (vgen_t *)cb_argp;
2061 	DBG1(vgenp, NULL, "enter\n");
2062 
2063 	mutex_enter(&vgenp->lock);
2064 
2065 	DBG1(vgenp, NULL, "ports: removed(%x), "
2066 	"added(%x), updated(%x)\n", resp->removed.nelem,
2067 	    resp->added.nelem, resp->match_curr.nelem);
2068 
2069 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2070 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2071 		    resp->removed.mdep[idx]);
2072 	}
2073 
2074 	if (vgenp->vsw_portp == NULL) {
2075 		/*
2076 		 * find vsw_port and add it first, because other ports need
2077 		 * this when adding fdb entry (see vgen_port_init()).
2078 		 */
2079 		for (idx = 0; idx < resp->added.nelem; idx++) {
2080 			if (!(md_get_prop_val(resp->added.mdp,
2081 			    resp->added.mdep[idx], swport_propname, &val))) {
2082 				if (val == 0) {
2083 					/*
2084 					 * This port is connected to the
2085 					 * vsw on service domain.
2086 					 */
2087 					vsw_idx = idx;
2088 					if (vgen_add_port(vgenp,
2089 					    resp->added.mdp,
2090 					    resp->added.mdep[idx]) !=
2091 					    DDI_SUCCESS) {
2092 						cmn_err(CE_NOTE, "vnet%d Could "
2093 						    "not initialize virtual "
2094 						    "switch port.",
2095 						    vgenp->instance);
2096 						mutex_exit(&vgenp->lock);
2097 						return (MDEG_FAILURE);
2098 					}
2099 					break;
2100 				}
2101 			}
2102 		}
2103 		if (vsw_idx == -1) {
2104 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2105 			mutex_exit(&vgenp->lock);
2106 			return (MDEG_FAILURE);
2107 		}
2108 	}
2109 
2110 	for (idx = 0; idx < resp->added.nelem; idx++) {
2111 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2112 			continue;
2113 
2114 		/* If this port can't be added just skip it. */
2115 		(void) vgen_add_port(vgenp, resp->added.mdp,
2116 		    resp->added.mdep[idx]);
2117 	}
2118 
2119 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2120 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2121 		    resp->match_curr.mdep[idx],
2122 		    resp->match_prev.mdp,
2123 		    resp->match_prev.mdep[idx]);
2124 	}
2125 
2126 	mutex_exit(&vgenp->lock);
2127 	DBG1(vgenp, NULL, "exit\n");
2128 	return (MDEG_SUCCESS);
2129 }
2130 
2131 /* mdeg callback function for the vnet node */
2132 static int
2133 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2134 {
2135 	vgen_t		*vgenp;
2136 	vnet_t		*vnetp;
2137 	md_t		*mdp;
2138 	mde_cookie_t	node;
2139 	uint64_t	inst;
2140 	char		*node_name = NULL;
2141 
2142 	if ((resp == NULL) || (cb_argp == NULL)) {
2143 		return (MDEG_FAILURE);
2144 	}
2145 
2146 	vgenp = (vgen_t *)cb_argp;
2147 	vnetp = vgenp->vnetp;
2148 
2149 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2150 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2151 	    resp->match_curr.nelem, resp->match_prev.nelem);
2152 
2153 	mutex_enter(&vgenp->lock);
2154 
2155 	/*
2156 	 * We get an initial callback for this node as 'added' after
2157 	 * registering with mdeg. Note that we would have already gathered
2158 	 * information about this vnet node by walking MD earlier during attach
2159 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2160 	 * of this node might have changed when we get this initial 'added'
2161 	 * callback. We handle this as if an update occured and invoke the same
2162 	 * function which handles updates to the properties of this vnet-node
2163 	 * if any. A non-zero 'match' value indicates that the MD has been
2164 	 * updated and that a 'network' node is present which may or may not
2165 	 * have been updated. It is up to the clients to examine their own
2166 	 * nodes and determine if they have changed.
2167 	 */
2168 	if (resp->added.nelem != 0) {
2169 
2170 		if (resp->added.nelem != 1) {
2171 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2172 			    "invalid: %d\n", vnetp->instance,
2173 			    resp->added.nelem);
2174 			goto vgen_mdeg_cb_err;
2175 		}
2176 
2177 		mdp = resp->added.mdp;
2178 		node = resp->added.mdep[0];
2179 
2180 	} else if (resp->match_curr.nelem != 0) {
2181 
2182 		if (resp->match_curr.nelem != 1) {
2183 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2184 			    "invalid: %d\n", vnetp->instance,
2185 			    resp->match_curr.nelem);
2186 			goto vgen_mdeg_cb_err;
2187 		}
2188 
2189 		mdp = resp->match_curr.mdp;
2190 		node = resp->match_curr.mdep[0];
2191 
2192 	} else {
2193 		goto vgen_mdeg_cb_err;
2194 	}
2195 
2196 	/* Validate name and instance */
2197 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2198 		DERR(vgenp, NULL, "unable to get node name\n");
2199 		goto vgen_mdeg_cb_err;
2200 	}
2201 
2202 	/* is this a virtual-network device? */
2203 	if (strcmp(node_name, vnet_propname) != 0) {
2204 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2205 		goto vgen_mdeg_cb_err;
2206 	}
2207 
2208 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2209 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2210 		goto vgen_mdeg_cb_err;
2211 	}
2212 
2213 	/* is this the right instance of vnet? */
2214 	if (inst != vgenp->regprop) {
2215 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2216 		goto vgen_mdeg_cb_err;
2217 	}
2218 
2219 	vgen_update_md_prop(vgenp, mdp, node);
2220 
2221 	mutex_exit(&vgenp->lock);
2222 	return (MDEG_SUCCESS);
2223 
2224 vgen_mdeg_cb_err:
2225 	mutex_exit(&vgenp->lock);
2226 	return (MDEG_FAILURE);
2227 }
2228 
2229 /*
2230  * Check to see if the relevant properties in the specified node have
2231  * changed, and if so take the appropriate action.
2232  */
2233 static void
2234 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2235 {
2236 	uint16_t	pvid;
2237 	uint16_t	*vids;
2238 	uint16_t	nvids;
2239 	vnet_t		*vnetp = vgenp->vnetp;
2240 	uint32_t	mtu;
2241 	enum		{ MD_init = 0x1,
2242 			    MD_vlans = 0x2,
2243 			    MD_mtu = 0x4 } updated;
2244 	int		rv;
2245 
2246 	updated = MD_init;
2247 
2248 	/* Read the vlan ids */
2249 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2250 	    &nvids, NULL);
2251 
2252 	/* Determine if there are any vlan id updates */
2253 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2254 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2255 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2256 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2257 		updated |= MD_vlans;
2258 	}
2259 
2260 	/* Read mtu */
2261 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2262 	if (mtu != vnetp->mtu) {
2263 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2264 			updated |= MD_mtu;
2265 		} else {
2266 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2267 			    " as the specified value:%d is invalid\n",
2268 			    vnetp->instance, mtu);
2269 		}
2270 	}
2271 
2272 	/* Now process the updated props */
2273 
2274 	if (updated & MD_vlans) {
2275 
2276 		/* save the new vlan ids */
2277 		vnetp->pvid = pvid;
2278 		if (vnetp->nvids != 0) {
2279 			kmem_free(vnetp->vids,
2280 			    sizeof (uint16_t) * vnetp->nvids);
2281 			vnetp->nvids = 0;
2282 		}
2283 		if (nvids != 0) {
2284 			vnetp->nvids = nvids;
2285 			vnetp->vids = vids;
2286 		}
2287 
2288 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2289 		vgen_reset_vlan_unaware_ports(vgenp);
2290 
2291 	} else {
2292 
2293 		if (nvids != 0) {
2294 			kmem_free(vids, sizeof (uint16_t) * nvids);
2295 		}
2296 	}
2297 
2298 	if (updated & MD_mtu) {
2299 
2300 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2301 		    vnetp->mtu, mtu);
2302 
2303 		rv = vnet_mtu_update(vnetp, mtu);
2304 		if (rv == 0) {
2305 			vgenp->max_frame_size = mtu +
2306 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2307 		}
2308 	}
2309 }
2310 
2311 /* add a new port to the device */
2312 static int
2313 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2314 {
2315 	vgen_port_t	*portp;
2316 	int		rv;
2317 
2318 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2319 
2320 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2321 	if (rv != DDI_SUCCESS) {
2322 		KMEM_FREE(portp);
2323 		return (DDI_FAILURE);
2324 	}
2325 
2326 	rv = vgen_port_attach(portp);
2327 	if (rv != DDI_SUCCESS) {
2328 		return (DDI_FAILURE);
2329 	}
2330 
2331 	return (DDI_SUCCESS);
2332 }
2333 
2334 /* read properties of the port from its md node */
2335 static int
2336 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2337 	mde_cookie_t mdex)
2338 {
2339 	uint64_t		port_num;
2340 	uint64_t		*ldc_ids;
2341 	uint64_t		macaddr;
2342 	uint64_t		val;
2343 	int			num_ldcs;
2344 	int			i;
2345 	int			addrsz;
2346 	int			num_nodes = 0;
2347 	int			listsz = 0;
2348 	mde_cookie_t		*listp = NULL;
2349 	uint8_t			*addrp;
2350 	struct ether_addr	ea;
2351 
2352 	/* read "id" property to get the port number */
2353 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2354 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2355 		return (DDI_FAILURE);
2356 	}
2357 
2358 	/*
2359 	 * Find the channel endpoint node(s) under this port node.
2360 	 */
2361 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2362 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2363 		    num_nodes);
2364 		return (DDI_FAILURE);
2365 	}
2366 
2367 	/* allocate space for node list */
2368 	listsz = num_nodes * sizeof (mde_cookie_t);
2369 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2370 	if (listp == NULL)
2371 		return (DDI_FAILURE);
2372 
2373 	num_ldcs = md_scan_dag(mdp, mdex,
2374 	    md_find_name(mdp, channel_propname),
2375 	    md_find_name(mdp, "fwd"), listp);
2376 
2377 	if (num_ldcs <= 0) {
2378 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2379 		kmem_free(listp, listsz);
2380 		return (DDI_FAILURE);
2381 	}
2382 
2383 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2384 
2385 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2386 	if (ldc_ids == NULL) {
2387 		kmem_free(listp, listsz);
2388 		return (DDI_FAILURE);
2389 	}
2390 
2391 	for (i = 0; i < num_ldcs; i++) {
2392 		/* read channel ids */
2393 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2394 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2395 			    id_propname);
2396 			kmem_free(listp, listsz);
2397 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2398 			return (DDI_FAILURE);
2399 		}
2400 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2401 	}
2402 
2403 	kmem_free(listp, listsz);
2404 
2405 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2406 	    &addrsz)) {
2407 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2408 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2409 		return (DDI_FAILURE);
2410 	}
2411 
2412 	if (addrsz < ETHERADDRL) {
2413 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2414 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2415 		return (DDI_FAILURE);
2416 	}
2417 
2418 	macaddr = *((uint64_t *)addrp);
2419 
2420 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2421 
2422 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2423 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2424 		macaddr >>= 8;
2425 	}
2426 
2427 	if (vgenp->vsw_portp == NULL) {
2428 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2429 			if (val == 0) {
2430 				(void) atomic_swap_32(
2431 				    &vgenp->vsw_port_refcnt, 0);
2432 				/* This port is connected to the vsw */
2433 				vgenp->vsw_portp = portp;
2434 			}
2435 		}
2436 	}
2437 
2438 	/* now update all properties into the port */
2439 	portp->vgenp = vgenp;
2440 	portp->port_num = port_num;
2441 	ether_copy(&ea, &portp->macaddr);
2442 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2443 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2444 	portp->num_ldcs = num_ldcs;
2445 
2446 	/* read vlan id properties of this port node */
2447 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2448 	    &portp->vids, &portp->nvids, NULL);
2449 
2450 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2451 
2452 	return (DDI_SUCCESS);
2453 }
2454 
2455 /* remove a port from the device */
2456 static int
2457 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2458 {
2459 	uint64_t	port_num;
2460 	vgen_port_t	*portp;
2461 	vgen_portlist_t	*plistp;
2462 
2463 	/* read "id" property to get the port number */
2464 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2465 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2466 		return (DDI_FAILURE);
2467 	}
2468 
2469 	plistp = &(vgenp->vgenports);
2470 
2471 	WRITE_ENTER(&plistp->rwlock);
2472 	portp = vgen_port_lookup(plistp, (int)port_num);
2473 	if (portp == NULL) {
2474 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2475 		RW_EXIT(&plistp->rwlock);
2476 		return (DDI_FAILURE);
2477 	}
2478 
2479 	vgen_port_detach_mdeg(portp);
2480 	RW_EXIT(&plistp->rwlock);
2481 
2482 	return (DDI_SUCCESS);
2483 }
2484 
2485 /* attach a port to the device based on mdeg data */
2486 static int
2487 vgen_port_attach(vgen_port_t *portp)
2488 {
2489 	int			i;
2490 	vgen_portlist_t		*plistp;
2491 	vgen_t			*vgenp;
2492 	uint64_t		*ldcids;
2493 	uint32_t		num_ldcs;
2494 	mac_register_t		*macp;
2495 	vio_net_res_type_t	type;
2496 	int			rv;
2497 
2498 	ASSERT(portp != NULL);
2499 
2500 	vgenp = portp->vgenp;
2501 	ldcids = portp->ldc_ids;
2502 	num_ldcs = portp->num_ldcs;
2503 
2504 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2505 
2506 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2507 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2508 	portp->ldclist.headp = NULL;
2509 
2510 	for (i = 0; i < num_ldcs; i++) {
2511 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2512 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2513 			vgen_port_detach(portp);
2514 			return (DDI_FAILURE);
2515 		}
2516 	}
2517 
2518 	/* create vlan id hash table */
2519 	vgen_vlan_create_hash(portp);
2520 
2521 	if (portp == vgenp->vsw_portp) {
2522 		/* This port is connected to the switch port */
2523 		vgenp->vsw_portp = portp;
2524 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2525 		type = VIO_NET_RES_LDC_SERVICE;
2526 	} else {
2527 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2528 		type = VIO_NET_RES_LDC_GUEST;
2529 	}
2530 
2531 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2532 		vgen_port_detach(portp);
2533 		return (DDI_FAILURE);
2534 	}
2535 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2536 	macp->m_driver = portp;
2537 	macp->m_dip = vgenp->vnetdip;
2538 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2539 	macp->m_callbacks = &vgen_m_callbacks;
2540 	macp->m_min_sdu = 0;
2541 	macp->m_max_sdu = ETHERMTU;
2542 
2543 	mutex_enter(&portp->lock);
2544 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2545 	    portp->macaddr, &portp->vhp, &portp->vcb);
2546 	mutex_exit(&portp->lock);
2547 	mac_free(macp);
2548 
2549 	if (rv == 0) {
2550 		/* link it into the list of ports */
2551 		plistp = &(vgenp->vgenports);
2552 		WRITE_ENTER(&plistp->rwlock);
2553 		vgen_port_list_insert(portp);
2554 		RW_EXIT(&plistp->rwlock);
2555 	} else {
2556 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2557 		    portp);
2558 		vgen_port_detach(portp);
2559 	}
2560 
2561 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2562 	return (DDI_SUCCESS);
2563 }
2564 
2565 /* detach a port from the device based on mdeg data */
2566 static void
2567 vgen_port_detach_mdeg(vgen_port_t *portp)
2568 {
2569 	vgen_t *vgenp = portp->vgenp;
2570 
2571 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2572 
2573 	mutex_enter(&portp->lock);
2574 
2575 	/* stop the port if needed */
2576 	if (portp->flags & VGEN_STARTED) {
2577 		vgen_port_uninit(portp);
2578 	}
2579 
2580 	mutex_exit(&portp->lock);
2581 	vgen_port_detach(portp);
2582 
2583 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2584 }
2585 
2586 static int
2587 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2588 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2589 {
2590 	uint64_t	cport_num;
2591 	uint64_t	pport_num;
2592 	vgen_portlist_t	*plistp;
2593 	vgen_port_t	*portp;
2594 	boolean_t	updated_vlans = B_FALSE;
2595 	uint16_t	pvid;
2596 	uint16_t	*vids;
2597 	uint16_t	nvids;
2598 
2599 	/*
2600 	 * For now, we get port updates only if vlan ids changed.
2601 	 * We read the port num and do some sanity check.
2602 	 */
2603 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2604 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2605 		return (DDI_FAILURE);
2606 	}
2607 
2608 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2609 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2610 		return (DDI_FAILURE);
2611 	}
2612 	if (cport_num != pport_num)
2613 		return (DDI_FAILURE);
2614 
2615 	plistp = &(vgenp->vgenports);
2616 
2617 	READ_ENTER(&plistp->rwlock);
2618 
2619 	portp = vgen_port_lookup(plistp, (int)cport_num);
2620 	if (portp == NULL) {
2621 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2622 		RW_EXIT(&plistp->rwlock);
2623 		return (DDI_FAILURE);
2624 	}
2625 
2626 	/* Read the vlan ids */
2627 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2628 	    &nvids, NULL);
2629 
2630 	/* Determine if there are any vlan id updates */
2631 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2632 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2633 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2634 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2635 		updated_vlans = B_TRUE;
2636 	}
2637 
2638 	if (updated_vlans == B_FALSE) {
2639 		RW_EXIT(&plistp->rwlock);
2640 		return (DDI_FAILURE);
2641 	}
2642 
2643 	/* remove the port from vlans it has been assigned to */
2644 	vgen_vlan_remove_ids(portp);
2645 
2646 	/* save the new vlan ids */
2647 	portp->pvid = pvid;
2648 	if (portp->nvids != 0) {
2649 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2650 		portp->nvids = 0;
2651 	}
2652 	if (nvids != 0) {
2653 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2654 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2655 		portp->nvids = nvids;
2656 		kmem_free(vids, sizeof (uint16_t) * nvids);
2657 	}
2658 
2659 	/* add port to the new vlans */
2660 	vgen_vlan_add_ids(portp);
2661 
2662 	/* reset the port if it is vlan unaware (ver < 1.3) */
2663 	vgen_vlan_unaware_port_reset(portp);
2664 
2665 	RW_EXIT(&plistp->rwlock);
2666 
2667 	return (DDI_SUCCESS);
2668 }
2669 
2670 static uint64_t
2671 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2672 {
2673 	vgen_ldclist_t	*ldclp;
2674 	vgen_ldc_t *ldcp;
2675 	uint64_t	val;
2676 
2677 	val = 0;
2678 	ldclp = &portp->ldclist;
2679 
2680 	READ_ENTER(&ldclp->rwlock);
2681 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2682 		val += vgen_ldc_stat(ldcp, stat);
2683 	}
2684 	RW_EXIT(&ldclp->rwlock);
2685 
2686 	return (val);
2687 }
2688 
2689 /* allocate receive resources */
2690 static int
2691 vgen_init_multipools(vgen_ldc_t *ldcp)
2692 {
2693 	size_t		data_sz;
2694 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2695 	int		status;
2696 	uint32_t	sz1 = 0;
2697 	uint32_t	sz2 = 0;
2698 	uint32_t	sz3 = 0;
2699 	uint32_t	sz4 = 0;
2700 
2701 	/*
2702 	 * We round up the mtu specified to be a multiple of 2K.
2703 	 * We then create rx pools based on the rounded up size.
2704 	 */
2705 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2706 	data_sz = VNET_ROUNDUP_2K(data_sz);
2707 
2708 	/*
2709 	 * If pool sizes are specified, use them. Note that the presence of
2710 	 * the first tunable will be used as a hint.
2711 	 */
2712 	if (vgen_rbufsz1 != 0) {
2713 
2714 		sz1 = vgen_rbufsz1;
2715 		sz2 = vgen_rbufsz2;
2716 		sz3 = vgen_rbufsz3;
2717 		sz4 = vgen_rbufsz4;
2718 
2719 		if (sz4 == 0) { /* need 3 pools */
2720 
2721 			ldcp->max_rxpool_size = sz3;
2722 			status = vio_init_multipools(&ldcp->vmp,
2723 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2724 			    vgen_nrbufs2, vgen_nrbufs3);
2725 
2726 		} else {
2727 
2728 			ldcp->max_rxpool_size = sz4;
2729 			status = vio_init_multipools(&ldcp->vmp,
2730 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2731 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2732 			    vgen_nrbufs4);
2733 		}
2734 		return (status);
2735 	}
2736 
2737 	/*
2738 	 * Pool sizes are not specified. We select the pool sizes based on the
2739 	 * mtu if vnet_jumbo_rxpools is enabled.
2740 	 */
2741 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2742 		/*
2743 		 * Receive buffer pool allocation based on mtu is disabled.
2744 		 * Use the default mechanism of standard size pool allocation.
2745 		 */
2746 		sz1 = VGEN_DBLK_SZ_128;
2747 		sz2 = VGEN_DBLK_SZ_256;
2748 		sz3 = VGEN_DBLK_SZ_2048;
2749 		ldcp->max_rxpool_size = sz3;
2750 
2751 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2752 		    sz1, sz2, sz3,
2753 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2754 
2755 		return (status);
2756 	}
2757 
2758 	switch (data_sz) {
2759 
2760 	case VNET_4K:
2761 
2762 		sz1 = VGEN_DBLK_SZ_128;
2763 		sz2 = VGEN_DBLK_SZ_256;
2764 		sz3 = VGEN_DBLK_SZ_2048;
2765 		sz4 = sz3 << 1;			/* 4K */
2766 		ldcp->max_rxpool_size = sz4;
2767 
2768 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2769 		    sz1, sz2, sz3, sz4,
2770 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2771 		break;
2772 
2773 	default:	/* data_sz:  4K+ to 16K */
2774 
2775 		sz1 = VGEN_DBLK_SZ_256;
2776 		sz2 = VGEN_DBLK_SZ_2048;
2777 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2778 		sz4 = data_sz;		/* Jumbo-size  */
2779 		ldcp->max_rxpool_size = sz4;
2780 
2781 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2782 		    sz1, sz2, sz3, sz4,
2783 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2784 		break;
2785 
2786 	}
2787 
2788 	return (status);
2789 }
2790 
2791 /* attach the channel corresponding to the given ldc_id to the port */
2792 static int
2793 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2794 {
2795 	vgen_t 		*vgenp;
2796 	vgen_ldclist_t	*ldclp;
2797 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2798 	ldc_attr_t 	attr;
2799 	int 		status;
2800 	ldc_status_t	istatus;
2801 	char		kname[MAXNAMELEN];
2802 	int		instance;
2803 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2804 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2805 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2806 		AST_create_rxmblks = 0x20,
2807 		AST_create_rcv_thread = 0x40} attach_state;
2808 
2809 	attach_state = AST_init;
2810 	vgenp = portp->vgenp;
2811 	ldclp = &portp->ldclist;
2812 
2813 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2814 	if (ldcp == NULL) {
2815 		goto ldc_attach_failed;
2816 	}
2817 	ldcp->ldc_id = ldc_id;
2818 	ldcp->portp = portp;
2819 
2820 	attach_state |= AST_ldc_alloc;
2821 
2822 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2823 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2824 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2825 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2826 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2827 
2828 	attach_state |= AST_mutex_init;
2829 
2830 	attr.devclass = LDC_DEV_NT;
2831 	attr.instance = vgenp->instance;
2832 	attr.mode = LDC_MODE_UNRELIABLE;
2833 	attr.mtu = vnet_ldc_mtu;
2834 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2835 	if (status != 0) {
2836 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2837 		goto ldc_attach_failed;
2838 	}
2839 	attach_state |= AST_ldc_init;
2840 
2841 	if (vgen_rcv_thread_enabled) {
2842 		ldcp->rcv_thr_flags = 0;
2843 
2844 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2845 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2846 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2847 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2848 
2849 		attach_state |= AST_create_rcv_thread;
2850 		if (ldcp->rcv_thread == NULL) {
2851 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2852 			goto ldc_attach_failed;
2853 		}
2854 	}
2855 
2856 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2857 	if (status != 0) {
2858 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2859 		    status);
2860 		goto ldc_attach_failed;
2861 	}
2862 	/*
2863 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2864 	 * data msgs, including raw data msgs used to recv priority frames.
2865 	 */
2866 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2867 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2868 	attach_state |= AST_ldc_reg_cb;
2869 
2870 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2871 	ASSERT(istatus == LDC_INIT);
2872 	ldcp->ldc_status = istatus;
2873 
2874 	/* allocate transmit resources */
2875 	status = vgen_alloc_tx_ring(ldcp);
2876 	if (status != 0) {
2877 		goto ldc_attach_failed;
2878 	}
2879 	attach_state |= AST_alloc_tx_ring;
2880 
2881 	/* allocate receive resources */
2882 	status = vgen_init_multipools(ldcp);
2883 	if (status != 0) {
2884 		/*
2885 		 * We do not return failure if receive mblk pools can't be
2886 		 * allocated; instead allocb(9F) will be used to dynamically
2887 		 * allocate buffers during receive.
2888 		 */
2889 		DWARN(vgenp, ldcp,
2890 		    "vnet%d: status(%d), failed to allocate rx mblk pools for "
2891 		    "channel(0x%lx)\n",
2892 		    vgenp->instance, status, ldcp->ldc_id);
2893 	} else {
2894 		attach_state |= AST_create_rxmblks;
2895 	}
2896 
2897 	/* Setup kstats for the channel */
2898 	instance = vgenp->instance;
2899 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2900 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2901 	if (ldcp->ksp == NULL) {
2902 		goto ldc_attach_failed;
2903 	}
2904 
2905 	/* initialize vgen_versions supported */
2906 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2907 	vgen_reset_vnet_proto_ops(ldcp);
2908 
2909 	/* link it into the list of channels for this port */
2910 	WRITE_ENTER(&ldclp->rwlock);
2911 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2912 	ldcp->nextp = *prev_ldcp;
2913 	*prev_ldcp = ldcp;
2914 	RW_EXIT(&ldclp->rwlock);
2915 
2916 	ldcp->flags |= CHANNEL_ATTACHED;
2917 	return (DDI_SUCCESS);
2918 
2919 ldc_attach_failed:
2920 	if (attach_state & AST_ldc_reg_cb) {
2921 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2922 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2923 	}
2924 	if (attach_state & AST_create_rcv_thread) {
2925 		if (ldcp->rcv_thread != NULL) {
2926 			vgen_stop_rcv_thread(ldcp);
2927 		}
2928 		mutex_destroy(&ldcp->rcv_thr_lock);
2929 		cv_destroy(&ldcp->rcv_thr_cv);
2930 	}
2931 	if (attach_state & AST_create_rxmblks) {
2932 		vio_mblk_pool_t *fvmp = NULL;
2933 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
2934 		ASSERT(fvmp == NULL);
2935 	}
2936 	if (attach_state & AST_alloc_tx_ring) {
2937 		vgen_free_tx_ring(ldcp);
2938 	}
2939 	if (attach_state & AST_ldc_init) {
2940 		(void) ldc_fini(ldcp->ldc_handle);
2941 	}
2942 	if (attach_state & AST_mutex_init) {
2943 		mutex_destroy(&ldcp->tclock);
2944 		mutex_destroy(&ldcp->txlock);
2945 		mutex_destroy(&ldcp->cblock);
2946 		mutex_destroy(&ldcp->wrlock);
2947 		mutex_destroy(&ldcp->rxlock);
2948 	}
2949 	if (attach_state & AST_ldc_alloc) {
2950 		KMEM_FREE(ldcp);
2951 	}
2952 	return (DDI_FAILURE);
2953 }
2954 
2955 /* detach a channel from the port */
2956 static void
2957 vgen_ldc_detach(vgen_ldc_t *ldcp)
2958 {
2959 	vgen_port_t	*portp;
2960 	vgen_t 		*vgenp;
2961 	vgen_ldc_t 	*pldcp;
2962 	vgen_ldc_t	**prev_ldcp;
2963 	vgen_ldclist_t	*ldclp;
2964 
2965 	portp = ldcp->portp;
2966 	vgenp = portp->vgenp;
2967 	ldclp = &portp->ldclist;
2968 
2969 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
2970 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
2971 		if (pldcp == ldcp) {
2972 			break;
2973 		}
2974 	}
2975 
2976 	if (pldcp == NULL) {
2977 		/* invalid ldcp? */
2978 		return;
2979 	}
2980 
2981 	if (ldcp->ldc_status != LDC_INIT) {
2982 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2983 	}
2984 
2985 	if (ldcp->flags & CHANNEL_ATTACHED) {
2986 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2987 
2988 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2989 		if (ldcp->rcv_thread != NULL) {
2990 			/* First stop the receive thread */
2991 			vgen_stop_rcv_thread(ldcp);
2992 			mutex_destroy(&ldcp->rcv_thr_lock);
2993 			cv_destroy(&ldcp->rcv_thr_cv);
2994 		}
2995 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2996 
2997 		vgen_destroy_kstats(ldcp->ksp);
2998 		ldcp->ksp = NULL;
2999 
3000 		/*
3001 		 * if we cannot reclaim all mblks, put this
3002 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
3003 		 * device gets detached (see vgen_uninit()).
3004 		 */
3005 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
3006 
3007 		/* free transmit resources */
3008 		vgen_free_tx_ring(ldcp);
3009 
3010 		(void) ldc_fini(ldcp->ldc_handle);
3011 		mutex_destroy(&ldcp->tclock);
3012 		mutex_destroy(&ldcp->txlock);
3013 		mutex_destroy(&ldcp->cblock);
3014 		mutex_destroy(&ldcp->wrlock);
3015 		mutex_destroy(&ldcp->rxlock);
3016 
3017 		/* unlink it from the list */
3018 		*prev_ldcp = ldcp->nextp;
3019 		KMEM_FREE(ldcp);
3020 	}
3021 }
3022 
3023 /*
3024  * This function allocates transmit resources for the channel.
3025  * The resources consist of a transmit descriptor ring and an associated
3026  * transmit buffer ring.
3027  */
3028 static int
3029 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
3030 {
3031 	void *tbufp;
3032 	ldc_mem_info_t minfo;
3033 	uint32_t txdsize;
3034 	uint32_t tbufsize;
3035 	int status;
3036 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3037 
3038 	ldcp->num_txds = vnet_ntxds;
3039 	txdsize = sizeof (vnet_public_desc_t);
3040 	tbufsize = sizeof (vgen_private_desc_t);
3041 
3042 	/* allocate transmit buffer ring */
3043 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
3044 	if (tbufp == NULL) {
3045 		return (DDI_FAILURE);
3046 	}
3047 
3048 	/* create transmit descriptor ring */
3049 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
3050 	    &ldcp->tx_dhandle);
3051 	if (status) {
3052 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
3053 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3054 		return (DDI_FAILURE);
3055 	}
3056 
3057 	/* get the addr of descripror ring */
3058 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
3059 	if (status) {
3060 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3061 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3062 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3063 		ldcp->tbufp = NULL;
3064 		return (DDI_FAILURE);
3065 	}
3066 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3067 	ldcp->tbufp = tbufp;
3068 
3069 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3070 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3071 
3072 	return (DDI_SUCCESS);
3073 }
3074 
3075 /* Free transmit resources for the channel */
3076 static void
3077 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3078 {
3079 	int tbufsize = sizeof (vgen_private_desc_t);
3080 
3081 	/* free transmit descriptor ring */
3082 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3083 
3084 	/* free transmit buffer ring */
3085 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3086 	ldcp->txdp = ldcp->txdendp = NULL;
3087 	ldcp->tbufp = ldcp->tbufendp = NULL;
3088 }
3089 
3090 /* enable transmit/receive on the channels for the port */
3091 static void
3092 vgen_init_ldcs(vgen_port_t *portp)
3093 {
3094 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3095 	vgen_ldc_t	*ldcp;
3096 
3097 	READ_ENTER(&ldclp->rwlock);
3098 	ldcp =  ldclp->headp;
3099 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3100 		(void) vgen_ldc_init(ldcp);
3101 	}
3102 	RW_EXIT(&ldclp->rwlock);
3103 }
3104 
3105 /* stop transmit/receive on the channels for the port */
3106 static void
3107 vgen_uninit_ldcs(vgen_port_t *portp)
3108 {
3109 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3110 	vgen_ldc_t	*ldcp;
3111 
3112 	READ_ENTER(&ldclp->rwlock);
3113 	ldcp =  ldclp->headp;
3114 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3115 		vgen_ldc_uninit(ldcp);
3116 	}
3117 	RW_EXIT(&ldclp->rwlock);
3118 }
3119 
3120 /* enable transmit/receive on the channel */
3121 static int
3122 vgen_ldc_init(vgen_ldc_t *ldcp)
3123 {
3124 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3125 	ldc_status_t	istatus;
3126 	int		rv;
3127 	uint32_t	retries = 0;
3128 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3129 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3130 	init_state = ST_init;
3131 
3132 	DBG1(vgenp, ldcp, "enter\n");
3133 	LDC_LOCK(ldcp);
3134 
3135 	rv = ldc_open(ldcp->ldc_handle);
3136 	if (rv != 0) {
3137 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3138 		goto ldcinit_failed;
3139 	}
3140 	init_state |= ST_ldc_open;
3141 
3142 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3143 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3144 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3145 		goto ldcinit_failed;
3146 	}
3147 	ldcp->ldc_status = istatus;
3148 
3149 	rv = vgen_init_tbufs(ldcp);
3150 	if (rv != 0) {
3151 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3152 		goto ldcinit_failed;
3153 	}
3154 	init_state |= ST_init_tbufs;
3155 
3156 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3157 	if (rv != 0) {
3158 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3159 		goto ldcinit_failed;
3160 	}
3161 
3162 	init_state |= ST_cb_enable;
3163 
3164 	do {
3165 		rv = ldc_up(ldcp->ldc_handle);
3166 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3167 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3168 			drv_usecwait(VGEN_LDC_UP_DELAY);
3169 		}
3170 		if (retries++ >= vgen_ldcup_retries)
3171 			break;
3172 	} while (rv == EWOULDBLOCK);
3173 
3174 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3175 	if (istatus == LDC_UP) {
3176 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3177 	}
3178 
3179 	ldcp->ldc_status = istatus;
3180 
3181 	/* initialize transmit watchdog timeout */
3182 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3183 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3184 
3185 	ldcp->hphase = -1;
3186 	ldcp->flags |= CHANNEL_STARTED;
3187 
3188 	/* if channel is already UP - start handshake */
3189 	if (istatus == LDC_UP) {
3190 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3191 		if (ldcp->portp != vgenp->vsw_portp) {
3192 			/*
3193 			 * As the channel is up, use this port from now on.
3194 			 */
3195 			(void) atomic_swap_32(
3196 			    &ldcp->portp->use_vsw_port, B_FALSE);
3197 		}
3198 
3199 		/* Initialize local session id */
3200 		ldcp->local_sid = ddi_get_lbolt();
3201 
3202 		/* clear peer session id */
3203 		ldcp->peer_sid = 0;
3204 		ldcp->hretries = 0;
3205 
3206 		/* Initiate Handshake process with peer ldc endpoint */
3207 		vgen_reset_hphase(ldcp);
3208 
3209 		mutex_exit(&ldcp->tclock);
3210 		mutex_exit(&ldcp->txlock);
3211 		mutex_exit(&ldcp->wrlock);
3212 		mutex_exit(&ldcp->rxlock);
3213 		vgen_handshake(vh_nextphase(ldcp));
3214 		mutex_exit(&ldcp->cblock);
3215 	} else {
3216 		LDC_UNLOCK(ldcp);
3217 	}
3218 
3219 	return (DDI_SUCCESS);
3220 
3221 ldcinit_failed:
3222 	if (init_state & ST_cb_enable) {
3223 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3224 	}
3225 	if (init_state & ST_init_tbufs) {
3226 		vgen_uninit_tbufs(ldcp);
3227 	}
3228 	if (init_state & ST_ldc_open) {
3229 		(void) ldc_close(ldcp->ldc_handle);
3230 	}
3231 	LDC_UNLOCK(ldcp);
3232 	DBG1(vgenp, ldcp, "exit\n");
3233 	return (DDI_FAILURE);
3234 }
3235 
3236 /* stop transmit/receive on the channel */
3237 static void
3238 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3239 {
3240 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3241 	int	rv;
3242 	uint_t	retries = 0;
3243 
3244 	DBG1(vgenp, ldcp, "enter\n");
3245 	LDC_LOCK(ldcp);
3246 
3247 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3248 		LDC_UNLOCK(ldcp);
3249 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3250 		return;
3251 	}
3252 
3253 	/* disable further callbacks */
3254 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3255 	if (rv != 0) {
3256 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3257 	}
3258 
3259 	if (vgenp->vsw_portp == ldcp->portp) {
3260 		vio_net_report_err_t rep_err =
3261 		    ldcp->portp->vcb.vio_net_report_err;
3262 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3263 	}
3264 
3265 	/*
3266 	 * clear handshake done bit and wait for pending tx and cb to finish.
3267 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3268 	 */
3269 	ldcp->hphase &= ~(VH_DONE);
3270 	LDC_UNLOCK(ldcp);
3271 
3272 	/* cancel handshake watchdog timeout */
3273 	if (ldcp->htid) {
3274 		(void) untimeout(ldcp->htid);
3275 		ldcp->htid = 0;
3276 	}
3277 
3278 	if (ldcp->cancel_htid) {
3279 		(void) untimeout(ldcp->cancel_htid);
3280 		ldcp->cancel_htid = 0;
3281 	}
3282 
3283 	/* cancel transmit watchdog timeout */
3284 	if (ldcp->wd_tid) {
3285 		(void) untimeout(ldcp->wd_tid);
3286 		ldcp->wd_tid = 0;
3287 	}
3288 
3289 	drv_usecwait(1000);
3290 
3291 	if (ldcp->rcv_thread != NULL) {
3292 		/*
3293 		 * Note that callbacks have been disabled already(above). The
3294 		 * drain function takes care of the condition when an already
3295 		 * executing callback signals the worker to start processing or
3296 		 * the worker has already been signalled and is in the middle of
3297 		 * processing.
3298 		 */
3299 		vgen_drain_rcv_thread(ldcp);
3300 	}
3301 
3302 	/* acquire locks again; any pending transmits and callbacks are done */
3303 	LDC_LOCK(ldcp);
3304 
3305 	vgen_reset_hphase(ldcp);
3306 
3307 	vgen_uninit_tbufs(ldcp);
3308 
3309 	/* close the channel - retry on EAGAIN */
3310 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
3311 		if (++retries > vgen_ldccl_retries) {
3312 			break;
3313 		}
3314 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
3315 	}
3316 	if (rv != 0) {
3317 		cmn_err(CE_NOTE,
3318 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
3319 		    vgenp->instance, rv, ldcp->ldc_id);
3320 	}
3321 
3322 	ldcp->ldc_status = LDC_INIT;
3323 	ldcp->flags &= ~(CHANNEL_STARTED);
3324 
3325 	LDC_UNLOCK(ldcp);
3326 
3327 	DBG1(vgenp, ldcp, "exit\n");
3328 }
3329 
3330 /* Initialize the transmit buffer ring for the channel */
3331 static int
3332 vgen_init_tbufs(vgen_ldc_t *ldcp)
3333 {
3334 	vgen_private_desc_t	*tbufp;
3335 	vnet_public_desc_t	*txdp;
3336 	vio_dring_entry_hdr_t		*hdrp;
3337 	int 			i;
3338 	int 			rv;
3339 	caddr_t			datap = NULL;
3340 	int			ci;
3341 	uint32_t		ncookies;
3342 	size_t			data_sz;
3343 	vgen_t			*vgenp;
3344 
3345 	vgenp = LDC_TO_VGEN(ldcp);
3346 
3347 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3348 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3349 
3350 	/*
3351 	 * In order to ensure that the number of ldc cookies per descriptor is
3352 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3353 	 * outlined below:
3354 	 *
3355 	 * Align the entire data buffer area to 8K and carve out per descriptor
3356 	 * data buffers starting from this 8K aligned base address.
3357 	 *
3358 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3359 	 * For sizes up to 12K we round up the size to the next 2K.
3360 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3361 	 * 14K could end up needing 3 cookies, with the buffer spread across
3362 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3363 	 */
3364 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3365 	if (data_sz <= VNET_12K) {
3366 		data_sz = VNET_ROUNDUP_2K(data_sz);
3367 	} else {
3368 		data_sz = VNET_ROUNDUP_4K(data_sz);
3369 	}
3370 
3371 	/* allocate extra 8K bytes for alignment */
3372 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3373 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3374 	ldcp->tx_datap = datap;
3375 
3376 
3377 	/* align the starting address of the data area to 8K */
3378 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3379 
3380 	/*
3381 	 * for each private descriptor, allocate a ldc mem_handle which is
3382 	 * required to map the data during transmit, set the flags
3383 	 * to free (available for use by transmit routine).
3384 	 */
3385 
3386 	for (i = 0; i < ldcp->num_txds; i++) {
3387 
3388 		tbufp = &(ldcp->tbufp[i]);
3389 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3390 		    &(tbufp->memhandle));
3391 		if (rv) {
3392 			tbufp->memhandle = 0;
3393 			goto init_tbufs_failed;
3394 		}
3395 
3396 		/*
3397 		 * bind ldc memhandle to the corresponding transmit buffer.
3398 		 */
3399 		ci = ncookies = 0;
3400 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3401 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3402 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3403 		if (rv != 0) {
3404 			goto init_tbufs_failed;
3405 		}
3406 
3407 		/*
3408 		 * successful in binding the handle to tx data buffer.
3409 		 * set datap in the private descr to this buffer.
3410 		 */
3411 		tbufp->datap = datap;
3412 
3413 		if ((ncookies == 0) ||
3414 		    (ncookies > MAX_COOKIES)) {
3415 			goto init_tbufs_failed;
3416 		}
3417 
3418 		for (ci = 1; ci < ncookies; ci++) {
3419 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3420 			    &(tbufp->memcookie[ci]));
3421 			if (rv != 0) {
3422 				goto init_tbufs_failed;
3423 			}
3424 		}
3425 
3426 		tbufp->ncookies = ncookies;
3427 		datap += data_sz;
3428 
3429 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3430 		txdp = &(ldcp->txdp[i]);
3431 		hdrp = &txdp->hdr;
3432 		hdrp->dstate = VIO_DESC_FREE;
3433 		hdrp->ack = B_FALSE;
3434 		tbufp->descp = txdp;
3435 
3436 	}
3437 
3438 	/* reset tbuf walking pointers */
3439 	ldcp->next_tbufp = ldcp->tbufp;
3440 	ldcp->cur_tbufp = ldcp->tbufp;
3441 
3442 	/* initialize tx seqnum and index */
3443 	ldcp->next_txseq = VNET_ISS;
3444 	ldcp->next_txi = 0;
3445 
3446 	ldcp->resched_peer = B_TRUE;
3447 	ldcp->resched_peer_txi = 0;
3448 
3449 	return (DDI_SUCCESS);
3450 
3451 init_tbufs_failed:;
3452 	vgen_uninit_tbufs(ldcp);
3453 	return (DDI_FAILURE);
3454 }
3455 
3456 /* Uninitialize transmit buffer ring for the channel */
3457 static void
3458 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3459 {
3460 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3461 	int 			i;
3462 
3463 	/* for each tbuf (priv_desc), free ldc mem_handle */
3464 	for (i = 0; i < ldcp->num_txds; i++) {
3465 
3466 		tbufp = &(ldcp->tbufp[i]);
3467 
3468 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3469 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3470 			tbufp->datap = NULL;
3471 		}
3472 		if (tbufp->memhandle) {
3473 			(void) ldc_mem_free_handle(tbufp->memhandle);
3474 			tbufp->memhandle = 0;
3475 		}
3476 	}
3477 
3478 	if (ldcp->tx_datap) {
3479 		/* prealloc'd tx data buffer */
3480 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3481 		ldcp->tx_datap = NULL;
3482 		ldcp->tx_data_sz = 0;
3483 	}
3484 
3485 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3486 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3487 }
3488 
3489 /* clobber tx descriptor ring */
3490 static void
3491 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3492 {
3493 	vnet_public_desc_t	*txdp;
3494 	vgen_private_desc_t	*tbufp;
3495 	vio_dring_entry_hdr_t	*hdrp;
3496 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3497 	int i;
3498 #ifdef DEBUG
3499 	int ndone = 0;
3500 #endif
3501 
3502 	for (i = 0; i < ldcp->num_txds; i++) {
3503 
3504 		tbufp = &(ldcp->tbufp[i]);
3505 		txdp = tbufp->descp;
3506 		hdrp = &txdp->hdr;
3507 
3508 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3509 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3510 #ifdef DEBUG
3511 			if (hdrp->dstate == VIO_DESC_DONE)
3512 				ndone++;
3513 #endif
3514 			hdrp->dstate = VIO_DESC_FREE;
3515 			hdrp->ack = B_FALSE;
3516 		}
3517 	}
3518 	/* reset tbuf walking pointers */
3519 	ldcp->next_tbufp = ldcp->tbufp;
3520 	ldcp->cur_tbufp = ldcp->tbufp;
3521 
3522 	/* reset tx seqnum and index */
3523 	ldcp->next_txseq = VNET_ISS;
3524 	ldcp->next_txi = 0;
3525 
3526 	ldcp->resched_peer = B_TRUE;
3527 	ldcp->resched_peer_txi = 0;
3528 
3529 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3530 }
3531 
3532 /* clobber receive descriptor ring */
3533 static void
3534 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3535 {
3536 	ldcp->rx_dhandle = 0;
3537 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3538 	ldcp->rxdp = NULL;
3539 	ldcp->next_rxi = 0;
3540 	ldcp->num_rxds = 0;
3541 	ldcp->next_rxseq = VNET_ISS;
3542 }
3543 
3544 /* initialize receive descriptor ring */
3545 static int
3546 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3547 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3548 {
3549 	int rv;
3550 	ldc_mem_info_t minfo;
3551 
3552 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3553 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3554 	if (rv != 0) {
3555 		return (DDI_FAILURE);
3556 	}
3557 
3558 	/*
3559 	 * sucessfully mapped, now try to
3560 	 * get info about the mapped dring
3561 	 */
3562 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3563 	if (rv != 0) {
3564 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3565 		return (DDI_FAILURE);
3566 	}
3567 
3568 	/*
3569 	 * save ring address, number of descriptors.
3570 	 */
3571 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3572 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3573 	ldcp->num_rxdcookies = ncookies;
3574 	ldcp->num_rxds = num_desc;
3575 	ldcp->next_rxi = 0;
3576 	ldcp->next_rxseq = VNET_ISS;
3577 	ldcp->dring_mtype = minfo.mtype;
3578 
3579 	return (DDI_SUCCESS);
3580 }
3581 
3582 /* get channel statistics */
3583 static uint64_t
3584 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3585 {
3586 	vgen_stats_t *statsp;
3587 	uint64_t val;
3588 
3589 	val = 0;
3590 	statsp = &ldcp->stats;
3591 	switch (stat) {
3592 
3593 	case MAC_STAT_MULTIRCV:
3594 		val = statsp->multircv;
3595 		break;
3596 
3597 	case MAC_STAT_BRDCSTRCV:
3598 		val = statsp->brdcstrcv;
3599 		break;
3600 
3601 	case MAC_STAT_MULTIXMT:
3602 		val = statsp->multixmt;
3603 		break;
3604 
3605 	case MAC_STAT_BRDCSTXMT:
3606 		val = statsp->brdcstxmt;
3607 		break;
3608 
3609 	case MAC_STAT_NORCVBUF:
3610 		val = statsp->norcvbuf;
3611 		break;
3612 
3613 	case MAC_STAT_IERRORS:
3614 		val = statsp->ierrors;
3615 		break;
3616 
3617 	case MAC_STAT_NOXMTBUF:
3618 		val = statsp->noxmtbuf;
3619 		break;
3620 
3621 	case MAC_STAT_OERRORS:
3622 		val = statsp->oerrors;
3623 		break;
3624 
3625 	case MAC_STAT_COLLISIONS:
3626 		break;
3627 
3628 	case MAC_STAT_RBYTES:
3629 		val = statsp->rbytes;
3630 		break;
3631 
3632 	case MAC_STAT_IPACKETS:
3633 		val = statsp->ipackets;
3634 		break;
3635 
3636 	case MAC_STAT_OBYTES:
3637 		val = statsp->obytes;
3638 		break;
3639 
3640 	case MAC_STAT_OPACKETS:
3641 		val = statsp->opackets;
3642 		break;
3643 
3644 	/* stats not relevant to ldc, return 0 */
3645 	case MAC_STAT_IFSPEED:
3646 	case ETHER_STAT_ALIGN_ERRORS:
3647 	case ETHER_STAT_FCS_ERRORS:
3648 	case ETHER_STAT_FIRST_COLLISIONS:
3649 	case ETHER_STAT_MULTI_COLLISIONS:
3650 	case ETHER_STAT_DEFER_XMTS:
3651 	case ETHER_STAT_TX_LATE_COLLISIONS:
3652 	case ETHER_STAT_EX_COLLISIONS:
3653 	case ETHER_STAT_MACXMT_ERRORS:
3654 	case ETHER_STAT_CARRIER_ERRORS:
3655 	case ETHER_STAT_TOOLONG_ERRORS:
3656 	case ETHER_STAT_XCVR_ADDR:
3657 	case ETHER_STAT_XCVR_ID:
3658 	case ETHER_STAT_XCVR_INUSE:
3659 	case ETHER_STAT_CAP_1000FDX:
3660 	case ETHER_STAT_CAP_1000HDX:
3661 	case ETHER_STAT_CAP_100FDX:
3662 	case ETHER_STAT_CAP_100HDX:
3663 	case ETHER_STAT_CAP_10FDX:
3664 	case ETHER_STAT_CAP_10HDX:
3665 	case ETHER_STAT_CAP_ASMPAUSE:
3666 	case ETHER_STAT_CAP_PAUSE:
3667 	case ETHER_STAT_CAP_AUTONEG:
3668 	case ETHER_STAT_ADV_CAP_1000FDX:
3669 	case ETHER_STAT_ADV_CAP_1000HDX:
3670 	case ETHER_STAT_ADV_CAP_100FDX:
3671 	case ETHER_STAT_ADV_CAP_100HDX:
3672 	case ETHER_STAT_ADV_CAP_10FDX:
3673 	case ETHER_STAT_ADV_CAP_10HDX:
3674 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3675 	case ETHER_STAT_ADV_CAP_PAUSE:
3676 	case ETHER_STAT_ADV_CAP_AUTONEG:
3677 	case ETHER_STAT_LP_CAP_1000FDX:
3678 	case ETHER_STAT_LP_CAP_1000HDX:
3679 	case ETHER_STAT_LP_CAP_100FDX:
3680 	case ETHER_STAT_LP_CAP_100HDX:
3681 	case ETHER_STAT_LP_CAP_10FDX:
3682 	case ETHER_STAT_LP_CAP_10HDX:
3683 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3684 	case ETHER_STAT_LP_CAP_PAUSE:
3685 	case ETHER_STAT_LP_CAP_AUTONEG:
3686 	case ETHER_STAT_LINK_ASMPAUSE:
3687 	case ETHER_STAT_LINK_PAUSE:
3688 	case ETHER_STAT_LINK_AUTONEG:
3689 	case ETHER_STAT_LINK_DUPLEX:
3690 	default:
3691 		val = 0;
3692 		break;
3693 
3694 	}
3695 	return (val);
3696 }
3697 
3698 /*
3699  * LDC channel is UP, start handshake process with peer.
3700  */
3701 static void
3702 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3703 {
3704 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3705 
3706 	DBG1(vgenp, ldcp, "enter\n");
3707 
3708 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3709 
3710 	if (ldcp->portp != vgenp->vsw_portp) {
3711 		/*
3712 		 * As the channel is up, use this port from now on.
3713 		 */
3714 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3715 	}
3716 
3717 	/* Initialize local session id */
3718 	ldcp->local_sid = ddi_get_lbolt();
3719 
3720 	/* clear peer session id */
3721 	ldcp->peer_sid = 0;
3722 	ldcp->hretries = 0;
3723 
3724 	if (ldcp->hphase != VH_PHASE0) {
3725 		vgen_handshake_reset(ldcp);
3726 	}
3727 
3728 	/* Initiate Handshake process with peer ldc endpoint */
3729 	vgen_handshake(vh_nextphase(ldcp));
3730 
3731 	DBG1(vgenp, ldcp, "exit\n");
3732 }
3733 
3734 /*
3735  * LDC channel is Reset, terminate connection with peer and try to
3736  * bring the channel up again.
3737  */
3738 static void
3739 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3740 {
3741 	ldc_status_t istatus;
3742 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3743 	int	rv;
3744 
3745 	DBG1(vgenp, ldcp, "enter\n");
3746 
3747 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3748 
3749 	if ((ldcp->portp != vgenp->vsw_portp) &&
3750 	    (vgenp->vsw_portp != NULL)) {
3751 		/*
3752 		 * As the channel is down, use the switch port until
3753 		 * the channel becomes ready to be used.
3754 		 */
3755 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3756 	}
3757 
3758 	if (vgenp->vsw_portp == ldcp->portp) {
3759 		vio_net_report_err_t rep_err =
3760 		    ldcp->portp->vcb.vio_net_report_err;
3761 
3762 		/* Post a reset message */
3763 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3764 	}
3765 
3766 	if (ldcp->hphase != VH_PHASE0) {
3767 		vgen_handshake_reset(ldcp);
3768 	}
3769 
3770 	/* try to bring the channel up */
3771 	rv = ldc_up(ldcp->ldc_handle);
3772 	if (rv != 0) {
3773 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3774 	}
3775 
3776 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3777 		DWARN(vgenp, ldcp, "ldc_status err\n");
3778 	} else {
3779 		ldcp->ldc_status = istatus;
3780 	}
3781 
3782 	/* if channel is already UP - restart handshake */
3783 	if (ldcp->ldc_status == LDC_UP) {
3784 		vgen_handle_evt_up(ldcp);
3785 	}
3786 
3787 	DBG1(vgenp, ldcp, "exit\n");
3788 }
3789 
3790 /* Interrupt handler for the channel */
3791 static uint_t
3792 vgen_ldc_cb(uint64_t event, caddr_t arg)
3793 {
3794 	_NOTE(ARGUNUSED(event))
3795 	vgen_ldc_t	*ldcp;
3796 	vgen_t		*vgenp;
3797 	ldc_status_t 	istatus;
3798 	vgen_stats_t	*statsp;
3799 	timeout_id_t	cancel_htid = 0;
3800 	uint_t		ret = LDC_SUCCESS;
3801 
3802 	ldcp = (vgen_ldc_t *)arg;
3803 	vgenp = LDC_TO_VGEN(ldcp);
3804 	statsp = &ldcp->stats;
3805 
3806 	DBG1(vgenp, ldcp, "enter\n");
3807 
3808 	mutex_enter(&ldcp->cblock);
3809 	statsp->callbacks++;
3810 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3811 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3812 		    ldcp->ldc_status);
3813 		mutex_exit(&ldcp->cblock);
3814 		return (LDC_SUCCESS);
3815 	}
3816 
3817 	/*
3818 	 * cache cancel_htid before the events specific
3819 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3820 	 * as it is also used to indicate the timer to quit immediately.
3821 	 */
3822 	cancel_htid = ldcp->cancel_htid;
3823 
3824 	/*
3825 	 * NOTE: not using switch() as event could be triggered by
3826 	 * a state change and a read request. Also the ordering	of the
3827 	 * check for the event types is deliberate.
3828 	 */
3829 	if (event & LDC_EVT_UP) {
3830 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3831 			DWARN(vgenp, ldcp, "ldc_status err\n");
3832 			/* status couldn't be determined */
3833 			ret = LDC_FAILURE;
3834 			goto ldc_cb_ret;
3835 		}
3836 		ldcp->ldc_status = istatus;
3837 		if (ldcp->ldc_status != LDC_UP) {
3838 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3839 			    " but ldc status is not UP(0x%x)\n",
3840 			    ldcp->ldc_status);
3841 			/* spurious interrupt, return success */
3842 			goto ldc_cb_ret;
3843 		}
3844 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3845 		    event, ldcp->ldc_status);
3846 
3847 		vgen_handle_evt_up(ldcp);
3848 
3849 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3850 	}
3851 
3852 	/* Handle RESET/DOWN before READ event */
3853 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3854 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3855 			DWARN(vgenp, ldcp, "ldc_status error\n");
3856 			/* status couldn't be determined */
3857 			ret = LDC_FAILURE;
3858 			goto ldc_cb_ret;
3859 		}
3860 		ldcp->ldc_status = istatus;
3861 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3862 		    event, ldcp->ldc_status);
3863 
3864 		vgen_handle_evt_reset(ldcp);
3865 
3866 		/*
3867 		 * As the channel is down/reset, ignore READ event
3868 		 * but print a debug warning message.
3869 		 */
3870 		if (event & LDC_EVT_READ) {
3871 			DWARN(vgenp, ldcp,
3872 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3873 			event &= ~LDC_EVT_READ;
3874 		}
3875 	}
3876 
3877 	if (event & LDC_EVT_READ) {
3878 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3879 		    event, ldcp->ldc_status);
3880 
3881 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3882 
3883 		if (ldcp->rcv_thread != NULL) {
3884 			/*
3885 			 * If the receive thread is enabled, then
3886 			 * wakeup the receive thread to process the
3887 			 * LDC messages.
3888 			 */
3889 			mutex_exit(&ldcp->cblock);
3890 			mutex_enter(&ldcp->rcv_thr_lock);
3891 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3892 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3893 				cv_signal(&ldcp->rcv_thr_cv);
3894 			}
3895 			mutex_exit(&ldcp->rcv_thr_lock);
3896 			mutex_enter(&ldcp->cblock);
3897 		} else  {
3898 			vgen_handle_evt_read(ldcp);
3899 		}
3900 	}
3901 
3902 ldc_cb_ret:
3903 	/*
3904 	 * Check to see if the status of cancel_htid has
3905 	 * changed. If another timer needs to be cancelled,
3906 	 * then let the next callback to clear it.
3907 	 */
3908 	if (cancel_htid == 0) {
3909 		cancel_htid = ldcp->cancel_htid;
3910 	}
3911 	mutex_exit(&ldcp->cblock);
3912 
3913 	if (cancel_htid) {
3914 		/*
3915 		 * Cancel handshake timer.
3916 		 * untimeout(9F) will not return until the pending callback is
3917 		 * cancelled or has run. No problems will result from calling
3918 		 * untimeout if the handler has already completed.
3919 		 * If the timeout handler did run, then it would just
3920 		 * return as cancel_htid is set.
3921 		 */
3922 		DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n", cancel_htid);
3923 		(void) untimeout(cancel_htid);
3924 		mutex_enter(&ldcp->cblock);
3925 		/* clear it only if its the same as the one we cancelled */
3926 		if (ldcp->cancel_htid == cancel_htid) {
3927 			ldcp->cancel_htid = 0;
3928 		}
3929 		mutex_exit(&ldcp->cblock);
3930 	}
3931 	DBG1(vgenp, ldcp, "exit\n");
3932 	return (ret);
3933 }
3934 
3935 static void
3936 vgen_handle_evt_read(vgen_ldc_t *ldcp)
3937 {
3938 	int		rv;
3939 	uint64_t	*ldcmsg;
3940 	size_t		msglen;
3941 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3942 	vio_msg_tag_t	*tagp;
3943 	ldc_status_t 	istatus;
3944 	boolean_t 	has_data;
3945 
3946 	DBG1(vgenp, ldcp, "enter\n");
3947 
3948 	ldcmsg = ldcp->ldcmsg;
3949 	/*
3950 	 * If the receive thread is enabled, then the cblock
3951 	 * need to be acquired here. If not, the vgen_ldc_cb()
3952 	 * calls this function with cblock held already.
3953 	 */
3954 	if (ldcp->rcv_thread != NULL) {
3955 		mutex_enter(&ldcp->cblock);
3956 	} else {
3957 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3958 	}
3959 
3960 vgen_evt_read:
3961 	do {
3962 		msglen = ldcp->msglen;
3963 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3964 
3965 		if (rv != 0) {
3966 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
3967 			    rv, msglen);
3968 			if (rv == ECONNRESET)
3969 				goto vgen_evtread_error;
3970 			break;
3971 		}
3972 		if (msglen == 0) {
3973 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3974 			break;
3975 		}
3976 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3977 
3978 		tagp = (vio_msg_tag_t *)ldcmsg;
3979 
3980 		if (ldcp->peer_sid) {
3981 			/*
3982 			 * check sid only after we have received peer's sid
3983 			 * in the version negotiate msg.
3984 			 */
3985 #ifdef DEBUG
3986 			if (vgen_hdbg & HDBG_BAD_SID) {
3987 				/* simulate bad sid condition */
3988 				tagp->vio_sid = 0;
3989 				vgen_hdbg &= ~(HDBG_BAD_SID);
3990 			}
3991 #endif
3992 			rv = vgen_check_sid(ldcp, tagp);
3993 			if (rv != VGEN_SUCCESS) {
3994 				/*
3995 				 * If sid mismatch is detected,
3996 				 * reset the channel.
3997 				 */
3998 				ldcp->need_ldc_reset = B_TRUE;
3999 				goto vgen_evtread_error;
4000 			}
4001 		}
4002 
4003 		switch (tagp->vio_msgtype) {
4004 		case VIO_TYPE_CTRL:
4005 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
4006 			break;
4007 
4008 		case VIO_TYPE_DATA:
4009 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
4010 			break;
4011 
4012 		case VIO_TYPE_ERR:
4013 			vgen_handle_errmsg(ldcp, tagp);
4014 			break;
4015 
4016 		default:
4017 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
4018 			    tagp->vio_msgtype);
4019 			break;
4020 		}
4021 
4022 		/*
4023 		 * If an error is encountered, stop processing and
4024 		 * handle the error.
4025 		 */
4026 		if (rv != 0) {
4027 			goto vgen_evtread_error;
4028 		}
4029 
4030 	} while (msglen);
4031 
4032 	/* check once more before exiting */
4033 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
4034 	if ((rv == 0) && (has_data == B_TRUE)) {
4035 		DTRACE_PROBE(vgen_chkq);
4036 		goto vgen_evt_read;
4037 	}
4038 
4039 vgen_evtread_error:
4040 	if (rv == ECONNRESET) {
4041 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4042 			DWARN(vgenp, ldcp, "ldc_status err\n");
4043 		} else {
4044 			ldcp->ldc_status = istatus;
4045 		}
4046 		vgen_handle_evt_reset(ldcp);
4047 	} else if (rv) {
4048 		vgen_handshake_retry(ldcp);
4049 	}
4050 
4051 	/*
4052 	 * If the receive thread is enabled, then cancel the
4053 	 * handshake timeout here.
4054 	 */
4055 	if (ldcp->rcv_thread != NULL) {
4056 		timeout_id_t cancel_htid = ldcp->cancel_htid;
4057 
4058 		mutex_exit(&ldcp->cblock);
4059 		if (cancel_htid) {
4060 			/*
4061 			 * Cancel handshake timer. untimeout(9F) will
4062 			 * not return until the pending callback is cancelled
4063 			 * or has run. No problems will result from calling
4064 			 * untimeout if the handler has already completed.
4065 			 * If the timeout handler did run, then it would just
4066 			 * return as cancel_htid is set.
4067 			 */
4068 			DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n",
4069 			    cancel_htid);
4070 			(void) untimeout(cancel_htid);
4071 
4072 			/*
4073 			 * clear it only if its the same as the one we
4074 			 * cancelled
4075 			 */
4076 			mutex_enter(&ldcp->cblock);
4077 			if (ldcp->cancel_htid == cancel_htid) {
4078 				ldcp->cancel_htid = 0;
4079 			}
4080 			mutex_exit(&ldcp->cblock);
4081 		}
4082 	}
4083 
4084 	DBG1(vgenp, ldcp, "exit\n");
4085 }
4086 
4087 /* vgen handshake functions */
4088 
4089 /* change the hphase for the channel to the next phase */
4090 static vgen_ldc_t *
4091 vh_nextphase(vgen_ldc_t *ldcp)
4092 {
4093 	if (ldcp->hphase == VH_PHASE3) {
4094 		ldcp->hphase = VH_DONE;
4095 	} else {
4096 		ldcp->hphase++;
4097 	}
4098 	return (ldcp);
4099 }
4100 
4101 /*
4102  * wrapper routine to send the given message over ldc using ldc_write().
4103  */
4104 static int
4105 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4106     boolean_t caller_holds_lock)
4107 {
4108 	int			rv;
4109 	size_t			len;
4110 	uint32_t		retries = 0;
4111 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4112 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4113 	vio_dring_msg_t		*dmsg;
4114 	vio_raw_data_msg_t	*rmsg;
4115 	boolean_t		data_msg = B_FALSE;
4116 
4117 	len = msglen;
4118 	if ((len == 0) || (msg == NULL))
4119 		return (VGEN_FAILURE);
4120 
4121 	if (!caller_holds_lock) {
4122 		mutex_enter(&ldcp->wrlock);
4123 	}
4124 
4125 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4126 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4127 			dmsg = (vio_dring_msg_t *)tagp;
4128 			dmsg->seq_num = ldcp->next_txseq;
4129 			data_msg = B_TRUE;
4130 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4131 			rmsg = (vio_raw_data_msg_t *)tagp;
4132 			rmsg->seq_num = ldcp->next_txseq;
4133 			data_msg = B_TRUE;
4134 		}
4135 	}
4136 
4137 	do {
4138 		len = msglen;
4139 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4140 		if (retries++ >= vgen_ldcwr_retries)
4141 			break;
4142 	} while (rv == EWOULDBLOCK);
4143 
4144 	if (rv == 0 && data_msg == B_TRUE) {
4145 		ldcp->next_txseq++;
4146 	}
4147 
4148 	if (!caller_holds_lock) {
4149 		mutex_exit(&ldcp->wrlock);
4150 	}
4151 
4152 	if (rv != 0) {
4153 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4154 		    rv, msglen);
4155 		return (rv);
4156 	}
4157 
4158 	if (len != msglen) {
4159 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4160 		    rv, msglen);
4161 		return (VGEN_FAILURE);
4162 	}
4163 
4164 	return (VGEN_SUCCESS);
4165 }
4166 
4167 /* send version negotiate message to the peer over ldc */
4168 static int
4169 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4170 {
4171 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4172 	vio_ver_msg_t	vermsg;
4173 	vio_msg_tag_t	*tagp = &vermsg.tag;
4174 	int		rv;
4175 
4176 	bzero(&vermsg, sizeof (vermsg));
4177 
4178 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4179 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4180 	tagp->vio_subtype_env = VIO_VER_INFO;
4181 	tagp->vio_sid = ldcp->local_sid;
4182 
4183 	/* get version msg payload from ldcp->local */
4184 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4185 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4186 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4187 
4188 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4189 	if (rv != VGEN_SUCCESS) {
4190 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4191 		return (rv);
4192 	}
4193 
4194 	ldcp->hstate |= VER_INFO_SENT;
4195 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4196 	    vermsg.ver_major, vermsg.ver_minor);
4197 
4198 	return (VGEN_SUCCESS);
4199 }
4200 
4201 /* send attr info message to the peer over ldc */
4202 static int
4203 vgen_send_attr_info(vgen_ldc_t *ldcp)
4204 {
4205 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4206 	vnet_attr_msg_t	attrmsg;
4207 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4208 	int		rv;
4209 
4210 	bzero(&attrmsg, sizeof (attrmsg));
4211 
4212 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4213 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4214 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4215 	tagp->vio_sid = ldcp->local_sid;
4216 
4217 	/* get attr msg payload from ldcp->local */
4218 	attrmsg.mtu = ldcp->local_hparams.mtu;
4219 	attrmsg.addr = ldcp->local_hparams.addr;
4220 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4221 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4222 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4223 
4224 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4225 	if (rv != VGEN_SUCCESS) {
4226 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4227 		return (rv);
4228 	}
4229 
4230 	ldcp->hstate |= ATTR_INFO_SENT;
4231 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4232 
4233 	return (VGEN_SUCCESS);
4234 }
4235 
4236 /* send descriptor ring register message to the peer over ldc */
4237 static int
4238 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4239 {
4240 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4241 	vio_dring_reg_msg_t	msg;
4242 	vio_msg_tag_t		*tagp = &msg.tag;
4243 	int		rv;
4244 
4245 	bzero(&msg, sizeof (msg));
4246 
4247 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4248 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4249 	tagp->vio_subtype_env = VIO_DRING_REG;
4250 	tagp->vio_sid = ldcp->local_sid;
4251 
4252 	/* get dring info msg payload from ldcp->local */
4253 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4254 	    sizeof (ldc_mem_cookie_t));
4255 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4256 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4257 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4258 
4259 	/*
4260 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4261 	 * value and sends it in the ack, which is saved in
4262 	 * vgen_handle_dring_reg().
4263 	 */
4264 	msg.dring_ident = 0;
4265 
4266 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4267 	if (rv != VGEN_SUCCESS) {
4268 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4269 		return (rv);
4270 	}
4271 
4272 	ldcp->hstate |= DRING_INFO_SENT;
4273 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4274 
4275 	return (VGEN_SUCCESS);
4276 }
4277 
4278 static int
4279 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4280 {
4281 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4282 	vio_rdx_msg_t	rdxmsg;
4283 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4284 	int		rv;
4285 
4286 	bzero(&rdxmsg, sizeof (rdxmsg));
4287 
4288 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4289 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4290 	tagp->vio_subtype_env = VIO_RDX;
4291 	tagp->vio_sid = ldcp->local_sid;
4292 
4293 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4294 	if (rv != VGEN_SUCCESS) {
4295 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4296 		return (rv);
4297 	}
4298 
4299 	ldcp->hstate |= RDX_INFO_SENT;
4300 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4301 
4302 	return (VGEN_SUCCESS);
4303 }
4304 
4305 /* send descriptor ring data message to the peer over ldc */
4306 static int
4307 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4308 {
4309 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4310 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4311 	vio_msg_tag_t	*tagp = &msgp->tag;
4312 	vgen_stats_t	*statsp = &ldcp->stats;
4313 	int		rv;
4314 
4315 	bzero(msgp, sizeof (*msgp));
4316 
4317 	tagp->vio_msgtype = VIO_TYPE_DATA;
4318 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4319 	tagp->vio_subtype_env = VIO_DRING_DATA;
4320 	tagp->vio_sid = ldcp->local_sid;
4321 
4322 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4323 	msgp->start_idx = start;
4324 	msgp->end_idx = end;
4325 
4326 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4327 	if (rv != VGEN_SUCCESS) {
4328 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4329 		return (rv);
4330 	}
4331 
4332 	statsp->dring_data_msgs++;
4333 
4334 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4335 
4336 	return (VGEN_SUCCESS);
4337 }
4338 
4339 /* send multicast addr info message to vsw */
4340 static int
4341 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4342 {
4343 	vnet_mcast_msg_t	mcastmsg;
4344 	vnet_mcast_msg_t	*msgp;
4345 	vio_msg_tag_t		*tagp;
4346 	vgen_t			*vgenp;
4347 	struct ether_addr	*mca;
4348 	int			rv;
4349 	int			i;
4350 	uint32_t		size;
4351 	uint32_t		mccount;
4352 	uint32_t		n;
4353 
4354 	msgp = &mcastmsg;
4355 	tagp = &msgp->tag;
4356 	vgenp = LDC_TO_VGEN(ldcp);
4357 
4358 	mccount = vgenp->mccount;
4359 	i = 0;
4360 
4361 	do {
4362 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4363 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4364 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4365 		tagp->vio_sid = ldcp->local_sid;
4366 
4367 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4368 		size = n * sizeof (struct ether_addr);
4369 
4370 		mca = &(vgenp->mctab[i]);
4371 		bcopy(mca, (msgp->mca), size);
4372 		msgp->set = B_TRUE;
4373 		msgp->count = n;
4374 
4375 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4376 		    B_FALSE);
4377 		if (rv != VGEN_SUCCESS) {
4378 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4379 			return (rv);
4380 		}
4381 
4382 		mccount -= n;
4383 		i += n;
4384 
4385 	} while (mccount);
4386 
4387 	return (VGEN_SUCCESS);
4388 }
4389 
4390 /* Initiate Phase 2 of handshake */
4391 static int
4392 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4393 {
4394 	int rv;
4395 	uint32_t ncookies = 0;
4396 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4397 
4398 #ifdef DEBUG
4399 	if (vgen_hdbg & HDBG_OUT_STATE) {
4400 		/* simulate out of state condition */
4401 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4402 		rv = vgen_send_rdx_info(ldcp);
4403 		return (rv);
4404 	}
4405 	if (vgen_hdbg & HDBG_TIMEOUT) {
4406 		/* simulate timeout condition */
4407 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4408 		return (VGEN_SUCCESS);
4409 	}
4410 #endif
4411 	rv = vgen_send_attr_info(ldcp);
4412 	if (rv != VGEN_SUCCESS) {
4413 		return (rv);
4414 	}
4415 
4416 	/* Bind descriptor ring to the channel */
4417 	if (ldcp->num_txdcookies == 0) {
4418 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4419 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4420 		    &ldcp->tx_dcookie, &ncookies);
4421 		if (rv != 0) {
4422 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4423 			    "rv(%x)\n", rv);
4424 			return (rv);
4425 		}
4426 		ASSERT(ncookies == 1);
4427 		ldcp->num_txdcookies = ncookies;
4428 	}
4429 
4430 	/* update local dring_info params */
4431 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4432 	    sizeof (ldc_mem_cookie_t));
4433 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4434 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4435 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4436 
4437 	rv = vgen_send_dring_reg(ldcp);
4438 	if (rv != VGEN_SUCCESS) {
4439 		return (rv);
4440 	}
4441 
4442 	return (VGEN_SUCCESS);
4443 }
4444 
4445 /*
4446  * Set vnet-protocol-version dependent functions based on version.
4447  */
4448 static void
4449 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4450 {
4451 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4452 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4453 
4454 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4455 		/*
4456 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4457 		 * Support), set the mtu in our attributes to max_frame_size.
4458 		 */
4459 		lp->mtu = vgenp->max_frame_size;
4460 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4461 		/*
4462 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4463 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4464 		 */
4465 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4466 	} else {
4467 		vgen_port_t	*portp = ldcp->portp;
4468 		vnet_t		*vnetp = vgenp->vnetp;
4469 		/*
4470 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4471 		 * We can negotiate that size with those peers provided the
4472 		 * following conditions are true:
4473 		 * - Only pvid is defined for our peer and there are no vids.
4474 		 * - pvids are equal.
4475 		 * If the above conditions are true, then we can send/recv only
4476 		 * untagged frames of max size ETHERMAX.
4477 		 */
4478 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4479 			lp->mtu = ETHERMAX;
4480 		}
4481 	}
4482 
4483 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4484 		/* Versions >= 1.2 */
4485 
4486 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4487 			/*
4488 			 * enable priority routines and pkt mode only if
4489 			 * at least one pri-eth-type is specified in MD.
4490 			 */
4491 
4492 			ldcp->tx = vgen_ldcsend;
4493 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4494 
4495 			/* set xfer mode for vgen_send_attr_info() */
4496 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4497 
4498 		} else {
4499 			/* no priority eth types defined in MD */
4500 
4501 			ldcp->tx = vgen_ldcsend_dring;
4502 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4503 
4504 			/* set xfer mode for vgen_send_attr_info() */
4505 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4506 
4507 		}
4508 	} else {
4509 		/* Versions prior to 1.2  */
4510 
4511 		vgen_reset_vnet_proto_ops(ldcp);
4512 	}
4513 }
4514 
4515 /*
4516  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4517  */
4518 static void
4519 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4520 {
4521 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4522 
4523 	ldcp->tx = vgen_ldcsend_dring;
4524 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4525 
4526 	/* set xfer mode for vgen_send_attr_info() */
4527 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4528 }
4529 
4530 static void
4531 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4532 {
4533 	vgen_ldclist_t	*ldclp;
4534 	vgen_ldc_t	*ldcp;
4535 	vgen_t		*vgenp = portp->vgenp;
4536 	vnet_t		*vnetp = vgenp->vnetp;
4537 
4538 	ldclp = &portp->ldclist;
4539 
4540 	READ_ENTER(&ldclp->rwlock);
4541 
4542 	/*
4543 	 * NOTE: for now, we will assume we have a single channel.
4544 	 */
4545 	if (ldclp->headp == NULL) {
4546 		RW_EXIT(&ldclp->rwlock);
4547 		return;
4548 	}
4549 	ldcp = ldclp->headp;
4550 
4551 	mutex_enter(&ldcp->cblock);
4552 
4553 	/*
4554 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4555 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4556 	 */
4557 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4558 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4559 		ldcp->need_ldc_reset = B_TRUE;
4560 		vgen_handshake_retry(ldcp);
4561 	}
4562 
4563 	mutex_exit(&ldcp->cblock);
4564 
4565 	RW_EXIT(&ldclp->rwlock);
4566 }
4567 
4568 static void
4569 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4570 {
4571 	vgen_port_t	*portp;
4572 	vgen_portlist_t	*plistp;
4573 
4574 	plistp = &(vgenp->vgenports);
4575 	READ_ENTER(&plistp->rwlock);
4576 
4577 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4578 
4579 		vgen_vlan_unaware_port_reset(portp);
4580 
4581 	}
4582 
4583 	RW_EXIT(&plistp->rwlock);
4584 }
4585 
4586 /*
4587  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4588  * This can happen after a channel comes up (status: LDC_UP) or
4589  * when handshake gets terminated due to various conditions.
4590  */
4591 static void
4592 vgen_reset_hphase(vgen_ldc_t *ldcp)
4593 {
4594 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4595 	ldc_status_t istatus;
4596 	int rv;
4597 
4598 	DBG1(vgenp, ldcp, "enter\n");
4599 	/* reset hstate and hphase */
4600 	ldcp->hstate = 0;
4601 	ldcp->hphase = VH_PHASE0;
4602 
4603 	vgen_reset_vnet_proto_ops(ldcp);
4604 
4605 	/*
4606 	 * Save the id of pending handshake timer in cancel_htid.
4607 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4608 	 * be cancelled after releasing cblock.
4609 	 */
4610 	if (ldcp->htid) {
4611 		ldcp->cancel_htid = ldcp->htid;
4612 		ldcp->htid = 0;
4613 	}
4614 
4615 	if (ldcp->local_hparams.dring_ready) {
4616 		ldcp->local_hparams.dring_ready = B_FALSE;
4617 	}
4618 
4619 	/* Unbind tx descriptor ring from the channel */
4620 	if (ldcp->num_txdcookies) {
4621 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4622 		if (rv != 0) {
4623 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4624 		}
4625 		ldcp->num_txdcookies = 0;
4626 	}
4627 
4628 	if (ldcp->peer_hparams.dring_ready) {
4629 		ldcp->peer_hparams.dring_ready = B_FALSE;
4630 		/* Unmap peer's dring */
4631 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4632 		vgen_clobber_rxds(ldcp);
4633 	}
4634 
4635 	vgen_clobber_tbufs(ldcp);
4636 
4637 	/*
4638 	 * clear local handshake params and initialize.
4639 	 */
4640 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4641 
4642 	/* set version to the highest version supported */
4643 	ldcp->local_hparams.ver_major =
4644 	    ldcp->vgen_versions[0].ver_major;
4645 	ldcp->local_hparams.ver_minor =
4646 	    ldcp->vgen_versions[0].ver_minor;
4647 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4648 
4649 	/* set attr_info params */
4650 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4651 	ldcp->local_hparams.addr =
4652 	    vnet_macaddr_strtoul(vgenp->macaddr);
4653 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4654 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4655 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4656 
4657 	/*
4658 	 * Note: dring is created, but not bound yet.
4659 	 * local dring_info params will be updated when we bind the dring in
4660 	 * vgen_handshake_phase2().
4661 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4662 	 * value and sends it in the ack, which is saved in
4663 	 * vgen_handle_dring_reg().
4664 	 */
4665 	ldcp->local_hparams.dring_ident = 0;
4666 
4667 	/* clear peer_hparams */
4668 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4669 
4670 	/* reset the channel if required */
4671 	if (ldcp->need_ldc_reset) {
4672 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4673 		ldcp->need_ldc_reset = B_FALSE;
4674 		(void) ldc_down(ldcp->ldc_handle);
4675 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4676 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4677 		ldcp->ldc_status = istatus;
4678 
4679 		/* clear sids */
4680 		ldcp->local_sid = 0;
4681 		ldcp->peer_sid = 0;
4682 
4683 		/* try to bring the channel up */
4684 		rv = ldc_up(ldcp->ldc_handle);
4685 		if (rv != 0) {
4686 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4687 		}
4688 
4689 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4690 			DWARN(vgenp, ldcp, "ldc_status err\n");
4691 		} else {
4692 			ldcp->ldc_status = istatus;
4693 		}
4694 	}
4695 }
4696 
4697 /* wrapper function for vgen_reset_hphase */
4698 static void
4699 vgen_handshake_reset(vgen_ldc_t *ldcp)
4700 {
4701 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4702 	mutex_enter(&ldcp->rxlock);
4703 	mutex_enter(&ldcp->wrlock);
4704 	mutex_enter(&ldcp->txlock);
4705 	mutex_enter(&ldcp->tclock);
4706 
4707 	vgen_reset_hphase(ldcp);
4708 
4709 	mutex_exit(&ldcp->tclock);
4710 	mutex_exit(&ldcp->txlock);
4711 	mutex_exit(&ldcp->wrlock);
4712 	mutex_exit(&ldcp->rxlock);
4713 }
4714 
4715 /*
4716  * Initiate handshake with the peer by sending various messages
4717  * based on the handshake-phase that the channel is currently in.
4718  */
4719 static void
4720 vgen_handshake(vgen_ldc_t *ldcp)
4721 {
4722 	uint32_t hphase = ldcp->hphase;
4723 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4724 	ldc_status_t	istatus;
4725 	int	rv = 0;
4726 
4727 	switch (hphase) {
4728 
4729 	case VH_PHASE1:
4730 
4731 		/*
4732 		 * start timer, for entire handshake process, turn this timer
4733 		 * off if all phases of handshake complete successfully and
4734 		 * hphase goes to VH_DONE(below) or
4735 		 * vgen_reset_hphase() gets called or
4736 		 * channel is reset due to errors or
4737 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4738 		 */
4739 		ASSERT(ldcp->htid == 0);
4740 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4741 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4742 
4743 		/* Phase 1 involves negotiating the version */
4744 		rv = vgen_send_version_negotiate(ldcp);
4745 		break;
4746 
4747 	case VH_PHASE2:
4748 		rv = vgen_handshake_phase2(ldcp);
4749 		break;
4750 
4751 	case VH_PHASE3:
4752 		rv = vgen_send_rdx_info(ldcp);
4753 		break;
4754 
4755 	case VH_DONE:
4756 		/*
4757 		 * Save the id of pending handshake timer in cancel_htid.
4758 		 * This will be checked in vgen_ldc_cb() and the handshake
4759 		 * timer will be cancelled after releasing cblock.
4760 		 */
4761 		if (ldcp->htid) {
4762 			ldcp->cancel_htid = ldcp->htid;
4763 			ldcp->htid = 0;
4764 		}
4765 		ldcp->hretries = 0;
4766 		DBG1(vgenp, ldcp, "Handshake Done\n");
4767 
4768 		if (ldcp->portp == vgenp->vsw_portp) {
4769 			/*
4770 			 * If this channel(port) is connected to vsw,
4771 			 * need to sync multicast table with vsw.
4772 			 */
4773 			mutex_exit(&ldcp->cblock);
4774 
4775 			mutex_enter(&vgenp->lock);
4776 			rv = vgen_send_mcast_info(ldcp);
4777 			mutex_exit(&vgenp->lock);
4778 
4779 			mutex_enter(&ldcp->cblock);
4780 			if (rv != VGEN_SUCCESS)
4781 				break;
4782 		}
4783 
4784 		/*
4785 		 * Check if mac layer should be notified to restart
4786 		 * transmissions. This can happen if the channel got
4787 		 * reset and vgen_clobber_tbufs() is called, while
4788 		 * need_resched is set.
4789 		 */
4790 		mutex_enter(&ldcp->tclock);
4791 		if (ldcp->need_resched) {
4792 			vio_net_tx_update_t vtx_update =
4793 			    ldcp->portp->vcb.vio_net_tx_update;
4794 
4795 			ldcp->need_resched = B_FALSE;
4796 			vtx_update(ldcp->portp->vhp);
4797 		}
4798 		mutex_exit(&ldcp->tclock);
4799 
4800 		break;
4801 
4802 	default:
4803 		break;
4804 	}
4805 
4806 	if (rv == ECONNRESET) {
4807 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4808 			DWARN(vgenp, ldcp, "ldc_status err\n");
4809 		} else {
4810 			ldcp->ldc_status = istatus;
4811 		}
4812 		vgen_handle_evt_reset(ldcp);
4813 	} else if (rv) {
4814 		vgen_handshake_reset(ldcp);
4815 	}
4816 }
4817 
4818 /*
4819  * Check if the current handshake phase has completed successfully and
4820  * return the status.
4821  */
4822 static int
4823 vgen_handshake_done(vgen_ldc_t *ldcp)
4824 {
4825 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4826 	uint32_t	hphase = ldcp->hphase;
4827 	int 		status = 0;
4828 
4829 	switch (hphase) {
4830 
4831 	case VH_PHASE1:
4832 		/*
4833 		 * Phase1 is done, if version negotiation
4834 		 * completed successfully.
4835 		 */
4836 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4837 		    VER_NEGOTIATED);
4838 		break;
4839 
4840 	case VH_PHASE2:
4841 		/*
4842 		 * Phase 2 is done, if attr info and dring info
4843 		 * have been exchanged successfully.
4844 		 */
4845 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4846 		    ATTR_INFO_EXCHANGED) &&
4847 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4848 		    DRING_INFO_EXCHANGED));
4849 		break;
4850 
4851 	case VH_PHASE3:
4852 		/* Phase 3 is done, if rdx msg has been exchanged */
4853 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4854 		    RDX_EXCHANGED);
4855 		break;
4856 
4857 	default:
4858 		break;
4859 	}
4860 
4861 	if (status == 0) {
4862 		return (VGEN_FAILURE);
4863 	}
4864 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4865 	return (VGEN_SUCCESS);
4866 }
4867 
4868 /* retry handshake on failure */
4869 static void
4870 vgen_handshake_retry(vgen_ldc_t *ldcp)
4871 {
4872 	/* reset handshake phase */
4873 	vgen_handshake_reset(ldcp);
4874 
4875 	/* handshake retry is specified and the channel is UP */
4876 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
4877 		if (ldcp->hretries++ < vgen_max_hretries) {
4878 			ldcp->local_sid = ddi_get_lbolt();
4879 			vgen_handshake(vh_nextphase(ldcp));
4880 		}
4881 	}
4882 }
4883 
4884 /*
4885  * Handle a version info msg from the peer or an ACK/NACK from the peer
4886  * to a version info msg that we sent.
4887  */
4888 static int
4889 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4890 {
4891 	vgen_t		*vgenp;
4892 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4893 	int		ack = 0;
4894 	int		failed = 0;
4895 	int		idx;
4896 	vgen_ver_t	*versions = ldcp->vgen_versions;
4897 	int		rv = 0;
4898 
4899 	vgenp = LDC_TO_VGEN(ldcp);
4900 	DBG1(vgenp, ldcp, "enter\n");
4901 	switch (tagp->vio_subtype) {
4902 	case VIO_SUBTYPE_INFO:
4903 
4904 		/*  Cache sid of peer if this is the first time */
4905 		if (ldcp->peer_sid == 0) {
4906 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4907 			    tagp->vio_sid);
4908 			ldcp->peer_sid = tagp->vio_sid;
4909 		}
4910 
4911 		if (ldcp->hphase != VH_PHASE1) {
4912 			/*
4913 			 * If we are not already in VH_PHASE1, reset to
4914 			 * pre-handshake state, and initiate handshake
4915 			 * to the peer too.
4916 			 */
4917 			vgen_handshake_reset(ldcp);
4918 			vgen_handshake(vh_nextphase(ldcp));
4919 		}
4920 		ldcp->hstate |= VER_INFO_RCVD;
4921 
4922 		/* save peer's requested values */
4923 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4924 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4925 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4926 
4927 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4928 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4929 			/* unsupported dev_class, send NACK */
4930 
4931 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4932 
4933 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4934 			tagp->vio_sid = ldcp->local_sid;
4935 			/* send reply msg back to peer */
4936 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4937 			    sizeof (*vermsg), B_FALSE);
4938 			if (rv != VGEN_SUCCESS) {
4939 				return (rv);
4940 			}
4941 			return (VGEN_FAILURE);
4942 		}
4943 
4944 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4945 		    vermsg->ver_major,  vermsg->ver_minor);
4946 
4947 		idx = 0;
4948 
4949 		for (;;) {
4950 
4951 			if (vermsg->ver_major > versions[idx].ver_major) {
4952 
4953 				/* nack with next lower version */
4954 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4955 				vermsg->ver_major = versions[idx].ver_major;
4956 				vermsg->ver_minor = versions[idx].ver_minor;
4957 				break;
4958 			}
4959 
4960 			if (vermsg->ver_major == versions[idx].ver_major) {
4961 
4962 				/* major version match - ACK version */
4963 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4964 				ack = 1;
4965 
4966 				/*
4967 				 * lower minor version to the one this endpt
4968 				 * supports, if necessary
4969 				 */
4970 				if (vermsg->ver_minor >
4971 				    versions[idx].ver_minor) {
4972 					vermsg->ver_minor =
4973 					    versions[idx].ver_minor;
4974 					ldcp->peer_hparams.ver_minor =
4975 					    versions[idx].ver_minor;
4976 				}
4977 				break;
4978 			}
4979 
4980 			idx++;
4981 
4982 			if (idx == VGEN_NUM_VER) {
4983 
4984 				/* no version match - send NACK */
4985 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4986 				vermsg->ver_major = 0;
4987 				vermsg->ver_minor = 0;
4988 				failed = 1;
4989 				break;
4990 			}
4991 
4992 		}
4993 
4994 		tagp->vio_sid = ldcp->local_sid;
4995 
4996 		/* send reply msg back to peer */
4997 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4998 		    B_FALSE);
4999 		if (rv != VGEN_SUCCESS) {
5000 			return (rv);
5001 		}
5002 
5003 		if (ack) {
5004 			ldcp->hstate |= VER_ACK_SENT;
5005 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
5006 			    vermsg->ver_major, vermsg->ver_minor);
5007 		}
5008 		if (failed) {
5009 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
5010 			return (VGEN_FAILURE);
5011 		}
5012 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5013 
5014 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5015 
5016 			/* local and peer versions match? */
5017 			ASSERT((ldcp->local_hparams.ver_major ==
5018 			    ldcp->peer_hparams.ver_major) &&
5019 			    (ldcp->local_hparams.ver_minor ==
5020 			    ldcp->peer_hparams.ver_minor));
5021 
5022 			vgen_set_vnet_proto_ops(ldcp);
5023 
5024 			/* move to the next phase */
5025 			vgen_handshake(vh_nextphase(ldcp));
5026 		}
5027 
5028 		break;
5029 
5030 	case VIO_SUBTYPE_ACK:
5031 
5032 		if (ldcp->hphase != VH_PHASE1) {
5033 			/*  This should not happen. */
5034 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
5035 			return (VGEN_FAILURE);
5036 		}
5037 
5038 		/* SUCCESS - we have agreed on a version */
5039 		ldcp->local_hparams.ver_major = vermsg->ver_major;
5040 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
5041 		ldcp->hstate |= VER_ACK_RCVD;
5042 
5043 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
5044 		    vermsg->ver_major,  vermsg->ver_minor);
5045 
5046 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5047 
5048 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5049 
5050 			/* local and peer versions match? */
5051 			ASSERT((ldcp->local_hparams.ver_major ==
5052 			    ldcp->peer_hparams.ver_major) &&
5053 			    (ldcp->local_hparams.ver_minor ==
5054 			    ldcp->peer_hparams.ver_minor));
5055 
5056 			vgen_set_vnet_proto_ops(ldcp);
5057 
5058 			/* move to the next phase */
5059 			vgen_handshake(vh_nextphase(ldcp));
5060 		}
5061 		break;
5062 
5063 	case VIO_SUBTYPE_NACK:
5064 
5065 		if (ldcp->hphase != VH_PHASE1) {
5066 			/*  This should not happen.  */
5067 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
5068 			"Phase(%u)\n", ldcp->hphase);
5069 			return (VGEN_FAILURE);
5070 		}
5071 
5072 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
5073 		    vermsg->ver_major, vermsg->ver_minor);
5074 
5075 		/* check if version in NACK is zero */
5076 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
5077 			/*
5078 			 * Version Negotiation has failed.
5079 			 */
5080 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5081 			return (VGEN_FAILURE);
5082 		}
5083 
5084 		idx = 0;
5085 
5086 		for (;;) {
5087 
5088 			if (vermsg->ver_major > versions[idx].ver_major) {
5089 				/* select next lower version */
5090 
5091 				ldcp->local_hparams.ver_major =
5092 				    versions[idx].ver_major;
5093 				ldcp->local_hparams.ver_minor =
5094 				    versions[idx].ver_minor;
5095 				break;
5096 			}
5097 
5098 			if (vermsg->ver_major == versions[idx].ver_major) {
5099 				/* major version match */
5100 
5101 				ldcp->local_hparams.ver_major =
5102 				    versions[idx].ver_major;
5103 
5104 				ldcp->local_hparams.ver_minor =
5105 				    versions[idx].ver_minor;
5106 				break;
5107 			}
5108 
5109 			idx++;
5110 
5111 			if (idx == VGEN_NUM_VER) {
5112 				/*
5113 				 * no version match.
5114 				 * Version Negotiation has failed.
5115 				 */
5116 				DWARN(vgenp, ldcp,
5117 				    "Version Negotiation Failed\n");
5118 				return (VGEN_FAILURE);
5119 			}
5120 
5121 		}
5122 
5123 		rv = vgen_send_version_negotiate(ldcp);
5124 		if (rv != VGEN_SUCCESS) {
5125 			return (rv);
5126 		}
5127 
5128 		break;
5129 	}
5130 
5131 	DBG1(vgenp, ldcp, "exit\n");
5132 	return (VGEN_SUCCESS);
5133 }
5134 
5135 /* Check if the attributes are supported */
5136 static int
5137 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5138 {
5139 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5140 
5141 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5142 	    (msg->ack_freq > 64) ||
5143 	    (msg->xfer_mode != lp->xfer_mode)) {
5144 		return (VGEN_FAILURE);
5145 	}
5146 
5147 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5148 		/* versions < 1.4, mtu must match */
5149 		if (msg->mtu != lp->mtu) {
5150 			return (VGEN_FAILURE);
5151 		}
5152 	} else {
5153 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5154 		if (msg->mtu < ETHERMAX) {
5155 			return (VGEN_FAILURE);
5156 		}
5157 	}
5158 
5159 	return (VGEN_SUCCESS);
5160 }
5161 
5162 /*
5163  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5164  * to an attr info msg that we sent.
5165  */
5166 static int
5167 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5168 {
5169 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5170 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5171 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5172 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5173 	int		ack = 1;
5174 	int		rv = 0;
5175 	uint32_t	mtu;
5176 
5177 	DBG1(vgenp, ldcp, "enter\n");
5178 	if (ldcp->hphase != VH_PHASE2) {
5179 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5180 		" Invalid Phase(%u)\n",
5181 		    tagp->vio_subtype, ldcp->hphase);
5182 		return (VGEN_FAILURE);
5183 	}
5184 	switch (tagp->vio_subtype) {
5185 	case VIO_SUBTYPE_INFO:
5186 
5187 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5188 		ldcp->hstate |= ATTR_INFO_RCVD;
5189 
5190 		/* save peer's values */
5191 		rp->mtu = msg->mtu;
5192 		rp->addr = msg->addr;
5193 		rp->addr_type = msg->addr_type;
5194 		rp->xfer_mode = msg->xfer_mode;
5195 		rp->ack_freq = msg->ack_freq;
5196 
5197 		rv = vgen_check_attr_info(ldcp, msg);
5198 		if (rv == VGEN_FAILURE) {
5199 			/* unsupported attr, send NACK */
5200 			ack = 0;
5201 		} else {
5202 
5203 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5204 
5205 				/*
5206 				 * Versions >= 1.4:
5207 				 * The mtu is negotiated down to the
5208 				 * minimum of our mtu and peer's mtu.
5209 				 */
5210 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5211 
5212 				/*
5213 				 * If we have received an ack for the attr info
5214 				 * that we sent, then check if the mtu computed
5215 				 * above matches the mtu that the peer had ack'd
5216 				 * (saved in local hparams). If they don't
5217 				 * match, we fail the handshake.
5218 				 */
5219 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5220 					if (mtu != lp->mtu) {
5221 						/* send NACK */
5222 						ack = 0;
5223 					}
5224 				} else {
5225 					/*
5226 					 * Save the mtu computed above in our
5227 					 * attr parameters, so it gets sent in
5228 					 * the attr info from us to the peer.
5229 					 */
5230 					lp->mtu = mtu;
5231 				}
5232 
5233 				/* save the MIN mtu in the msg to be replied */
5234 				msg->mtu = mtu;
5235 
5236 			}
5237 		}
5238 
5239 
5240 		if (ack) {
5241 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5242 		} else {
5243 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5244 		}
5245 		tagp->vio_sid = ldcp->local_sid;
5246 
5247 		/* send reply msg back to peer */
5248 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5249 		    B_FALSE);
5250 		if (rv != VGEN_SUCCESS) {
5251 			return (rv);
5252 		}
5253 
5254 		if (ack) {
5255 			ldcp->hstate |= ATTR_ACK_SENT;
5256 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5257 		} else {
5258 			/* failed */
5259 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5260 			return (VGEN_FAILURE);
5261 		}
5262 
5263 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5264 			vgen_handshake(vh_nextphase(ldcp));
5265 		}
5266 
5267 		break;
5268 
5269 	case VIO_SUBTYPE_ACK:
5270 
5271 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5272 			/*
5273 			 * Versions >= 1.4:
5274 			 * The ack msg sent by the peer contains the minimum of
5275 			 * our mtu (that we had sent in our attr info) and the
5276 			 * peer's mtu.
5277 			 *
5278 			 * If we have sent an ack for the attr info msg from
5279 			 * the peer, check if the mtu that was computed then
5280 			 * (saved in local hparams) matches the mtu that the
5281 			 * peer has ack'd. If they don't match, we fail the
5282 			 * handshake.
5283 			 */
5284 			if (ldcp->hstate & ATTR_ACK_SENT) {
5285 				if (lp->mtu != msg->mtu) {
5286 					return (VGEN_FAILURE);
5287 				}
5288 			} else {
5289 				/*
5290 				 * If the mtu ack'd by the peer is > our mtu
5291 				 * fail handshake. Otherwise, save the mtu, so
5292 				 * we can validate it when we receive attr info
5293 				 * from our peer.
5294 				 */
5295 				if (msg->mtu > lp->mtu) {
5296 					return (VGEN_FAILURE);
5297 				}
5298 				if (msg->mtu <= lp->mtu) {
5299 					lp->mtu = msg->mtu;
5300 				}
5301 			}
5302 		}
5303 
5304 		ldcp->hstate |= ATTR_ACK_RCVD;
5305 
5306 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5307 
5308 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5309 			vgen_handshake(vh_nextphase(ldcp));
5310 		}
5311 		break;
5312 
5313 	case VIO_SUBTYPE_NACK:
5314 
5315 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5316 		return (VGEN_FAILURE);
5317 	}
5318 	DBG1(vgenp, ldcp, "exit\n");
5319 	return (VGEN_SUCCESS);
5320 }
5321 
5322 /* Check if the dring info msg is ok */
5323 static int
5324 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5325 {
5326 	/* check if msg contents are ok */
5327 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5328 	    sizeof (vnet_public_desc_t))) {
5329 		return (VGEN_FAILURE);
5330 	}
5331 	return (VGEN_SUCCESS);
5332 }
5333 
5334 /*
5335  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5336  * the peer to a dring register msg that we sent.
5337  */
5338 static int
5339 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5340 {
5341 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5342 	ldc_mem_cookie_t dcookie;
5343 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5344 	int ack = 0;
5345 	int rv = 0;
5346 
5347 	DBG1(vgenp, ldcp, "enter\n");
5348 	if (ldcp->hphase < VH_PHASE2) {
5349 		/* dring_info can be rcvd in any of the phases after Phase1 */
5350 		DWARN(vgenp, ldcp,
5351 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5352 		    tagp->vio_subtype, ldcp->hphase);
5353 		return (VGEN_FAILURE);
5354 	}
5355 	switch (tagp->vio_subtype) {
5356 	case VIO_SUBTYPE_INFO:
5357 
5358 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5359 		ldcp->hstate |= DRING_INFO_RCVD;
5360 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5361 
5362 		ASSERT(msg->ncookies == 1);
5363 
5364 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5365 			/*
5366 			 * verified dring info msg to be ok,
5367 			 * now try to map the remote dring.
5368 			 */
5369 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5370 			    msg->descriptor_size, &dcookie,
5371 			    msg->ncookies);
5372 			if (rv == DDI_SUCCESS) {
5373 				/* now we can ack the peer */
5374 				ack = 1;
5375 			}
5376 		}
5377 		if (ack == 0) {
5378 			/* failed, send NACK */
5379 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5380 		} else {
5381 			if (!(ldcp->peer_hparams.dring_ready)) {
5382 
5383 				/* save peer's dring_info values */
5384 				bcopy(&dcookie,
5385 				    &(ldcp->peer_hparams.dring_cookie),
5386 				    sizeof (dcookie));
5387 				ldcp->peer_hparams.num_desc =
5388 				    msg->num_descriptors;
5389 				ldcp->peer_hparams.desc_size =
5390 				    msg->descriptor_size;
5391 				ldcp->peer_hparams.num_dcookies =
5392 				    msg->ncookies;
5393 
5394 				/* set dring_ident for the peer */
5395 				ldcp->peer_hparams.dring_ident =
5396 				    (uint64_t)ldcp->rxdp;
5397 				/* return the dring_ident in ack msg */
5398 				msg->dring_ident =
5399 				    (uint64_t)ldcp->rxdp;
5400 
5401 				ldcp->peer_hparams.dring_ready = B_TRUE;
5402 			}
5403 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5404 		}
5405 		tagp->vio_sid = ldcp->local_sid;
5406 		/* send reply msg back to peer */
5407 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5408 		    B_FALSE);
5409 		if (rv != VGEN_SUCCESS) {
5410 			return (rv);
5411 		}
5412 
5413 		if (ack) {
5414 			ldcp->hstate |= DRING_ACK_SENT;
5415 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5416 		} else {
5417 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5418 			return (VGEN_FAILURE);
5419 		}
5420 
5421 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5422 			vgen_handshake(vh_nextphase(ldcp));
5423 		}
5424 
5425 		break;
5426 
5427 	case VIO_SUBTYPE_ACK:
5428 
5429 		ldcp->hstate |= DRING_ACK_RCVD;
5430 
5431 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5432 
5433 		if (!(ldcp->local_hparams.dring_ready)) {
5434 			/* local dring is now ready */
5435 			ldcp->local_hparams.dring_ready = B_TRUE;
5436 
5437 			/* save dring_ident acked by peer */
5438 			ldcp->local_hparams.dring_ident =
5439 			    msg->dring_ident;
5440 		}
5441 
5442 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5443 			vgen_handshake(vh_nextphase(ldcp));
5444 		}
5445 
5446 		break;
5447 
5448 	case VIO_SUBTYPE_NACK:
5449 
5450 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5451 		return (VGEN_FAILURE);
5452 	}
5453 	DBG1(vgenp, ldcp, "exit\n");
5454 	return (VGEN_SUCCESS);
5455 }
5456 
5457 /*
5458  * Handle a rdx info msg from the peer or an ACK/NACK
5459  * from the peer to a rdx info msg that we sent.
5460  */
5461 static int
5462 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5463 {
5464 	int rv = 0;
5465 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5466 
5467 	DBG1(vgenp, ldcp, "enter\n");
5468 	if (ldcp->hphase != VH_PHASE3) {
5469 		DWARN(vgenp, ldcp,
5470 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5471 		    tagp->vio_subtype, ldcp->hphase);
5472 		return (VGEN_FAILURE);
5473 	}
5474 	switch (tagp->vio_subtype) {
5475 	case VIO_SUBTYPE_INFO:
5476 
5477 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5478 		ldcp->hstate |= RDX_INFO_RCVD;
5479 
5480 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5481 		tagp->vio_sid = ldcp->local_sid;
5482 		/* send reply msg back to peer */
5483 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5484 		    B_FALSE);
5485 		if (rv != VGEN_SUCCESS) {
5486 			return (rv);
5487 		}
5488 
5489 		ldcp->hstate |= RDX_ACK_SENT;
5490 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5491 
5492 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5493 			vgen_handshake(vh_nextphase(ldcp));
5494 		}
5495 
5496 		break;
5497 
5498 	case VIO_SUBTYPE_ACK:
5499 
5500 		ldcp->hstate |= RDX_ACK_RCVD;
5501 
5502 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5503 
5504 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5505 			vgen_handshake(vh_nextphase(ldcp));
5506 		}
5507 		break;
5508 
5509 	case VIO_SUBTYPE_NACK:
5510 
5511 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5512 		return (VGEN_FAILURE);
5513 	}
5514 	DBG1(vgenp, ldcp, "exit\n");
5515 	return (VGEN_SUCCESS);
5516 }
5517 
5518 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5519 static int
5520 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5521 {
5522 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5523 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5524 	struct ether_addr *addrp;
5525 	int count;
5526 	int i;
5527 
5528 	DBG1(vgenp, ldcp, "enter\n");
5529 	switch (tagp->vio_subtype) {
5530 
5531 	case VIO_SUBTYPE_INFO:
5532 
5533 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5534 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5535 		break;
5536 
5537 	case VIO_SUBTYPE_ACK:
5538 
5539 		/* success adding/removing multicast addr */
5540 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5541 		break;
5542 
5543 	case VIO_SUBTYPE_NACK:
5544 
5545 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5546 		if (!(msgp->set)) {
5547 			/* multicast remove request failed */
5548 			break;
5549 		}
5550 
5551 		/* multicast add request failed */
5552 		for (count = 0; count < msgp->count; count++) {
5553 			addrp = &(msgp->mca[count]);
5554 
5555 			/* delete address from the table */
5556 			for (i = 0; i < vgenp->mccount; i++) {
5557 				if (ether_cmp(addrp,
5558 				    &(vgenp->mctab[i])) == 0) {
5559 					if (vgenp->mccount > 1) {
5560 						int t = vgenp->mccount - 1;
5561 						vgenp->mctab[i] =
5562 						    vgenp->mctab[t];
5563 					}
5564 					vgenp->mccount--;
5565 					break;
5566 				}
5567 			}
5568 		}
5569 		break;
5570 
5571 	}
5572 	DBG1(vgenp, ldcp, "exit\n");
5573 
5574 	return (VGEN_SUCCESS);
5575 }
5576 
5577 /* handler for control messages received from the peer ldc end-point */
5578 static int
5579 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5580 {
5581 	int rv = 0;
5582 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5583 
5584 	DBG1(vgenp, ldcp, "enter\n");
5585 	switch (tagp->vio_subtype_env) {
5586 
5587 	case VIO_VER_INFO:
5588 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5589 		break;
5590 
5591 	case VIO_ATTR_INFO:
5592 		rv = vgen_handle_attr_info(ldcp, tagp);
5593 		break;
5594 
5595 	case VIO_DRING_REG:
5596 		rv = vgen_handle_dring_reg(ldcp, tagp);
5597 		break;
5598 
5599 	case VIO_RDX:
5600 		rv = vgen_handle_rdx_info(ldcp, tagp);
5601 		break;
5602 
5603 	case VNET_MCAST_INFO:
5604 		rv = vgen_handle_mcast_info(ldcp, tagp);
5605 		break;
5606 
5607 	case VIO_DDS_INFO:
5608 		rv = vgen_dds_rx(ldcp, tagp);
5609 		break;
5610 	}
5611 
5612 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5613 	return (rv);
5614 }
5615 
5616 /* handler for data messages received from the peer ldc end-point */
5617 static int
5618 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5619 {
5620 	int rv = 0;
5621 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5622 
5623 	DBG1(vgenp, ldcp, "enter\n");
5624 
5625 	if (ldcp->hphase != VH_DONE)
5626 		return (rv);
5627 
5628 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5629 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5630 		if (rv != 0) {
5631 			return (rv);
5632 		}
5633 	}
5634 
5635 	switch (tagp->vio_subtype_env) {
5636 	case VIO_DRING_DATA:
5637 		rv = vgen_handle_dring_data(ldcp, tagp);
5638 		break;
5639 
5640 	case VIO_PKT_DATA:
5641 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5642 		break;
5643 	default:
5644 		break;
5645 	}
5646 
5647 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5648 	return (rv);
5649 }
5650 
5651 /*
5652  * dummy pkt data handler function for vnet protocol version 1.0
5653  */
5654 static void
5655 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5656 {
5657 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5658 }
5659 
5660 /*
5661  * This function handles raw pkt data messages received over the channel.
5662  * Currently, only priority-eth-type frames are received through this mechanism.
5663  * In this case, the frame(data) is present within the message itself which
5664  * is copied into an mblk before sending it up the stack.
5665  */
5666 static void
5667 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5668 {
5669 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5670 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5671 	uint32_t		size;
5672 	mblk_t			*mp;
5673 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5674 	vgen_stats_t		*statsp = &ldcp->stats;
5675 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5676 	vio_net_rx_cb_t		vrx_cb;
5677 
5678 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5679 
5680 	mutex_exit(&ldcp->cblock);
5681 
5682 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5683 	if (size < ETHERMIN || size > lp->mtu) {
5684 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5685 		goto exit;
5686 	}
5687 
5688 	mp = vio_multipool_allocb(&ldcp->vmp, size);
5689 	if (mp == NULL) {
5690 		mp = allocb(size, BPRI_MED);
5691 		if (mp == NULL) {
5692 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5693 			DWARN(vgenp, ldcp, "allocb failure, "
5694 			    "unable to process priority frame\n");
5695 			goto exit;
5696 		}
5697 	}
5698 
5699 	/* copy the frame from the payload of raw data msg into the mblk */
5700 	bcopy(pkt->data, mp->b_rptr, size);
5701 	mp->b_wptr = mp->b_rptr + size;
5702 
5703 	/* update stats */
5704 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5705 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5706 
5707 	/* send up; call vrx_cb() as cblock is already released */
5708 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5709 	vrx_cb(ldcp->portp->vhp, mp);
5710 
5711 exit:
5712 	mutex_enter(&ldcp->cblock);
5713 }
5714 
5715 static int
5716 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
5717     int32_t end, uint8_t pstate)
5718 {
5719 	int rv = 0;
5720 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5721 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
5722 
5723 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
5724 	tagp->vio_sid = ldcp->local_sid;
5725 	msgp->start_idx = start;
5726 	msgp->end_idx = end;
5727 	msgp->dring_process_state = pstate;
5728 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
5729 	if (rv != VGEN_SUCCESS) {
5730 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
5731 	}
5732 	return (rv);
5733 }
5734 
5735 static int
5736 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5737 {
5738 	int rv = 0;
5739 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5740 
5741 
5742 	DBG1(vgenp, ldcp, "enter\n");
5743 	switch (tagp->vio_subtype) {
5744 
5745 	case VIO_SUBTYPE_INFO:
5746 		/*
5747 		 * To reduce the locking contention, release the
5748 		 * cblock here and re-acquire it once we are done
5749 		 * receiving packets.
5750 		 */
5751 		mutex_exit(&ldcp->cblock);
5752 		mutex_enter(&ldcp->rxlock);
5753 		rv = vgen_handle_dring_data_info(ldcp, tagp);
5754 		mutex_exit(&ldcp->rxlock);
5755 		mutex_enter(&ldcp->cblock);
5756 		break;
5757 
5758 	case VIO_SUBTYPE_ACK:
5759 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
5760 		break;
5761 
5762 	case VIO_SUBTYPE_NACK:
5763 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
5764 		break;
5765 	}
5766 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5767 	return (rv);
5768 }
5769 
5770 static int
5771 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5772 {
5773 	uint32_t start;
5774 	int32_t end;
5775 	int rv = 0;
5776 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5777 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5778 #ifdef VGEN_HANDLE_LOST_PKTS
5779 	vgen_stats_t *statsp = &ldcp->stats;
5780 	uint32_t rxi;
5781 	int n;
5782 #endif
5783 
5784 	DBG1(vgenp, ldcp, "enter\n");
5785 
5786 	start = dringmsg->start_idx;
5787 	end = dringmsg->end_idx;
5788 	/*
5789 	 * received a data msg, which contains the start and end
5790 	 * indices of the descriptors within the rx ring holding data,
5791 	 * the seq_num of data packet corresponding to the start index,
5792 	 * and the dring_ident.
5793 	 * We can now read the contents of each of these descriptors
5794 	 * and gather data from it.
5795 	 */
5796 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
5797 	    start, end);
5798 
5799 	/* validate rx start and end indeces */
5800 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
5801 	    !(CHECK_RXI(end, ldcp)))) {
5802 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
5803 		    start, end);
5804 		/* drop the message if invalid index */
5805 		return (rv);
5806 	}
5807 
5808 	/* validate dring_ident */
5809 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
5810 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5811 		    dringmsg->dring_ident);
5812 		/* invalid dring_ident, drop the msg */
5813 		return (rv);
5814 	}
5815 #ifdef DEBUG
5816 	if (vgen_trigger_rxlost) {
5817 		/* drop this msg to simulate lost pkts for debugging */
5818 		vgen_trigger_rxlost = 0;
5819 		return (rv);
5820 	}
5821 #endif
5822 
5823 #ifdef	VGEN_HANDLE_LOST_PKTS
5824 
5825 	/* receive start index doesn't match expected index */
5826 	if (ldcp->next_rxi != start) {
5827 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
5828 		    ldcp->next_rxi, start);
5829 
5830 		/* calculate the number of pkts lost */
5831 		if (start >= ldcp->next_rxi) {
5832 			n = start - ldcp->next_rxi;
5833 		} else  {
5834 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
5835 		}
5836 
5837 		statsp->rx_lost_pkts += n;
5838 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
5839 		tagp->vio_sid = ldcp->local_sid;
5840 		/* indicate the range of lost descriptors */
5841 		dringmsg->start_idx = ldcp->next_rxi;
5842 		rxi = start;
5843 		DECR_RXI(rxi, ldcp);
5844 		dringmsg->end_idx = rxi;
5845 		/* dring ident is left unchanged */
5846 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5847 		    sizeof (*dringmsg), B_FALSE);
5848 		if (rv != VGEN_SUCCESS) {
5849 			DWARN(vgenp, ldcp,
5850 			    "vgen_sendmsg failed, stype:NACK\n");
5851 			return (rv);
5852 		}
5853 		/*
5854 		 * treat this range of descrs/pkts as dropped
5855 		 * and set the new expected value of next_rxi
5856 		 * and continue(below) to process from the new
5857 		 * start index.
5858 		 */
5859 		ldcp->next_rxi = start;
5860 	}
5861 
5862 #endif	/* VGEN_HANDLE_LOST_PKTS */
5863 
5864 	/* Now receive messages */
5865 	rv = vgen_process_dring_data(ldcp, tagp);
5866 
5867 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5868 	return (rv);
5869 }
5870 
5871 static int
5872 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5873 {
5874 	boolean_t set_ack_start = B_FALSE;
5875 	uint32_t start;
5876 	uint32_t ack_end;
5877 	uint32_t next_rxi;
5878 	uint32_t rxi;
5879 	int count = 0;
5880 	int rv = 0;
5881 	uint32_t retries = 0;
5882 	vgen_stats_t *statsp;
5883 	vnet_public_desc_t rxd;
5884 	vio_dring_entry_hdr_t *hdrp;
5885 	mblk_t *bp = NULL;
5886 	mblk_t *bpt = NULL;
5887 	uint32_t ack_start;
5888 	boolean_t rxd_err = B_FALSE;
5889 	mblk_t *mp = NULL;
5890 	size_t nbytes;
5891 	boolean_t ack_needed = B_FALSE;
5892 	size_t nread;
5893 	uint64_t off = 0;
5894 	struct ether_header *ehp;
5895 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5896 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5897 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5898 
5899 	DBG1(vgenp, ldcp, "enter\n");
5900 
5901 	statsp = &ldcp->stats;
5902 	start = dringmsg->start_idx;
5903 
5904 	/*
5905 	 * start processing the descriptors from the specified
5906 	 * start index, up to the index a descriptor is not ready
5907 	 * to be processed or we process the entire descriptor ring
5908 	 * and wrap around upto the start index.
5909 	 */
5910 
5911 	/* need to set the start index of descriptors to be ack'd */
5912 	set_ack_start = B_TRUE;
5913 
5914 	/* index upto which we have ack'd */
5915 	ack_end = start;
5916 	DECR_RXI(ack_end, ldcp);
5917 
5918 	next_rxi = rxi =  start;
5919 	do {
5920 vgen_recv_retry:
5921 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
5922 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
5923 		if (rv != 0) {
5924 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
5925 			    " rv(%d)\n", rv);
5926 			statsp->ierrors++;
5927 			return (rv);
5928 		}
5929 
5930 		hdrp = &rxd.hdr;
5931 
5932 		if (hdrp->dstate != VIO_DESC_READY) {
5933 			/*
5934 			 * Before waiting and retry here, send up
5935 			 * the packets that are received already
5936 			 */
5937 			if (bp != NULL) {
5938 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5939 				vgen_rx(ldcp, bp);
5940 				count = 0;
5941 				bp = bpt = NULL;
5942 			}
5943 			/*
5944 			 * descriptor is not ready.
5945 			 * retry descriptor acquire, stop processing
5946 			 * after max # retries.
5947 			 */
5948 			if (retries == vgen_recv_retries)
5949 				break;
5950 			retries++;
5951 			drv_usecwait(vgen_recv_delay);
5952 			goto vgen_recv_retry;
5953 		}
5954 		retries = 0;
5955 
5956 		if (set_ack_start) {
5957 			/*
5958 			 * initialize the start index of the range
5959 			 * of descriptors to be ack'd.
5960 			 */
5961 			ack_start = rxi;
5962 			set_ack_start = B_FALSE;
5963 		}
5964 
5965 		if ((rxd.nbytes < ETHERMIN) ||
5966 		    (rxd.nbytes > lp->mtu) ||
5967 		    (rxd.ncookies == 0) ||
5968 		    (rxd.ncookies > MAX_COOKIES)) {
5969 			rxd_err = B_TRUE;
5970 		} else {
5971 			/*
5972 			 * Try to allocate an mblk from the free pool
5973 			 * of recv mblks for the channel.
5974 			 * If this fails, use allocb().
5975 			 */
5976 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
5977 			if (nbytes > ldcp->max_rxpool_size) {
5978 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
5979 				    BPRI_MED);
5980 			} else {
5981 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
5982 				if (mp == NULL) {
5983 					statsp->rx_vio_allocb_fail++;
5984 					/*
5985 					 * Data buffer returned by allocb(9F)
5986 					 * is 8byte aligned. We allocate extra
5987 					 * 8 bytes to ensure size is multiple
5988 					 * of 8 bytes for ldc_mem_copy().
5989 					 */
5990 					mp = allocb(VNET_IPALIGN +
5991 					    rxd.nbytes + 8, BPRI_MED);
5992 				}
5993 			}
5994 		}
5995 		if ((rxd_err) || (mp == NULL)) {
5996 			/*
5997 			 * rxd_err or allocb() failure,
5998 			 * drop this packet, get next.
5999 			 */
6000 			if (rxd_err) {
6001 				statsp->ierrors++;
6002 				rxd_err = B_FALSE;
6003 			} else {
6004 				statsp->rx_allocb_fail++;
6005 			}
6006 
6007 			ack_needed = hdrp->ack;
6008 
6009 			/* set descriptor done bit */
6010 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6011 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6012 			    VIO_DESC_DONE);
6013 			if (rv != 0) {
6014 				DWARN(vgenp, ldcp,
6015 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
6016 				    rv);
6017 				return (rv);
6018 			}
6019 
6020 			if (ack_needed) {
6021 				ack_needed = B_FALSE;
6022 				/*
6023 				 * sender needs ack for this packet,
6024 				 * ack pkts upto this index.
6025 				 */
6026 				ack_end = rxi;
6027 
6028 				rv = vgen_send_dring_ack(ldcp, tagp,
6029 				    ack_start, ack_end,
6030 				    VIO_DP_ACTIVE);
6031 				if (rv != VGEN_SUCCESS) {
6032 					goto error_ret;
6033 				}
6034 
6035 				/* need to set new ack start index */
6036 				set_ack_start = B_TRUE;
6037 			}
6038 			goto vgen_next_rxi;
6039 		}
6040 
6041 		nread = nbytes;
6042 		rv = ldc_mem_copy(ldcp->ldc_handle,
6043 		    (caddr_t)mp->b_rptr, off, &nread,
6044 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
6045 
6046 		/* if ldc_mem_copy() failed */
6047 		if (rv) {
6048 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
6049 			statsp->ierrors++;
6050 			freemsg(mp);
6051 			goto error_ret;
6052 		}
6053 
6054 		ack_needed = hdrp->ack;
6055 
6056 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6057 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6058 		    VIO_DESC_DONE);
6059 		if (rv != 0) {
6060 			DWARN(vgenp, ldcp,
6061 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
6062 			goto error_ret;
6063 		}
6064 
6065 		mp->b_rptr += VNET_IPALIGN;
6066 
6067 		if (ack_needed) {
6068 			ack_needed = B_FALSE;
6069 			/*
6070 			 * sender needs ack for this packet,
6071 			 * ack pkts upto this index.
6072 			 */
6073 			ack_end = rxi;
6074 
6075 			rv = vgen_send_dring_ack(ldcp, tagp,
6076 			    ack_start, ack_end, VIO_DP_ACTIVE);
6077 			if (rv != VGEN_SUCCESS) {
6078 				goto error_ret;
6079 			}
6080 
6081 			/* need to set new ack start index */
6082 			set_ack_start = B_TRUE;
6083 		}
6084 
6085 		if (nread != nbytes) {
6086 			DWARN(vgenp, ldcp,
6087 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6088 			    nread, nbytes);
6089 			statsp->ierrors++;
6090 			freemsg(mp);
6091 			goto vgen_next_rxi;
6092 		}
6093 
6094 		/* point to the actual end of data */
6095 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6096 
6097 		/* update stats */
6098 		statsp->ipackets++;
6099 		statsp->rbytes += rxd.nbytes;
6100 		ehp = (struct ether_header *)mp->b_rptr;
6101 		if (IS_BROADCAST(ehp))
6102 			statsp->brdcstrcv++;
6103 		else if (IS_MULTICAST(ehp))
6104 			statsp->multircv++;
6105 
6106 		/* build a chain of received packets */
6107 		if (bp == NULL) {
6108 			/* first pkt */
6109 			bp = mp;
6110 			bpt = bp;
6111 			bpt->b_next = NULL;
6112 		} else {
6113 			mp->b_next = NULL;
6114 			bpt->b_next = mp;
6115 			bpt = mp;
6116 		}
6117 
6118 		if (count++ > vgen_chain_len) {
6119 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6120 			vgen_rx(ldcp, bp);
6121 			count = 0;
6122 			bp = bpt = NULL;
6123 		}
6124 
6125 vgen_next_rxi:
6126 		/* update end index of range of descrs to be ack'd */
6127 		ack_end = rxi;
6128 
6129 		/* update the next index to be processed */
6130 		INCR_RXI(next_rxi, ldcp);
6131 		if (next_rxi == start) {
6132 			/*
6133 			 * processed the entire descriptor ring upto
6134 			 * the index at which we started.
6135 			 */
6136 			break;
6137 		}
6138 
6139 		rxi = next_rxi;
6140 
6141 	_NOTE(CONSTCOND)
6142 	} while (1);
6143 
6144 	/*
6145 	 * send an ack message to peer indicating that we have stopped
6146 	 * processing descriptors.
6147 	 */
6148 	if (set_ack_start) {
6149 		/*
6150 		 * We have ack'd upto some index and we have not
6151 		 * processed any descriptors beyond that index.
6152 		 * Use the last ack'd index as both the start and
6153 		 * end of range of descrs being ack'd.
6154 		 * Note: This results in acking the last index twice
6155 		 * and should be harmless.
6156 		 */
6157 		ack_start = ack_end;
6158 	}
6159 
6160 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6161 	    VIO_DP_STOPPED);
6162 	if (rv != VGEN_SUCCESS) {
6163 		goto error_ret;
6164 	}
6165 
6166 	/* save new recv index of next dring msg */
6167 	ldcp->next_rxi = next_rxi;
6168 
6169 error_ret:
6170 	/* send up packets received so far */
6171 	if (bp != NULL) {
6172 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6173 		vgen_rx(ldcp, bp);
6174 		bp = bpt = NULL;
6175 	}
6176 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6177 	return (rv);
6178 
6179 }
6180 
6181 static int
6182 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6183 {
6184 	int rv = 0;
6185 	uint32_t start;
6186 	int32_t end;
6187 	uint32_t txi;
6188 	boolean_t ready_txd = B_FALSE;
6189 	vgen_stats_t *statsp;
6190 	vgen_private_desc_t *tbufp;
6191 	vnet_public_desc_t *txdp;
6192 	vio_dring_entry_hdr_t *hdrp;
6193 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6194 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6195 
6196 	DBG1(vgenp, ldcp, "enter\n");
6197 	start = dringmsg->start_idx;
6198 	end = dringmsg->end_idx;
6199 	statsp = &ldcp->stats;
6200 
6201 	/*
6202 	 * received an ack corresponding to a specific descriptor for
6203 	 * which we had set the ACK bit in the descriptor (during
6204 	 * transmit). This enables us to reclaim descriptors.
6205 	 */
6206 
6207 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6208 
6209 	/* validate start and end indeces in the tx ack msg */
6210 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6211 		/* drop the message if invalid index */
6212 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6213 		    start, end);
6214 		return (rv);
6215 	}
6216 	/* validate dring_ident */
6217 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6218 		/* invalid dring_ident, drop the msg */
6219 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6220 		    dringmsg->dring_ident);
6221 		return (rv);
6222 	}
6223 	statsp->dring_data_acks++;
6224 
6225 	/* reclaim descriptors that are done */
6226 	vgen_reclaim(ldcp);
6227 
6228 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6229 		/*
6230 		 * receiver continued processing descriptors after
6231 		 * sending us the ack.
6232 		 */
6233 		return (rv);
6234 	}
6235 
6236 	statsp->dring_stopped_acks++;
6237 
6238 	/* receiver stopped processing descriptors */
6239 	mutex_enter(&ldcp->wrlock);
6240 	mutex_enter(&ldcp->tclock);
6241 
6242 	/*
6243 	 * determine if there are any pending tx descriptors
6244 	 * ready to be processed by the receiver(peer) and if so,
6245 	 * send a message to the peer to restart receiving.
6246 	 */
6247 	ready_txd = B_FALSE;
6248 
6249 	/*
6250 	 * using the end index of the descriptor range for which
6251 	 * we received the ack, check if the next descriptor is
6252 	 * ready.
6253 	 */
6254 	txi = end;
6255 	INCR_TXI(txi, ldcp);
6256 	tbufp = &ldcp->tbufp[txi];
6257 	txdp = tbufp->descp;
6258 	hdrp = &txdp->hdr;
6259 	if (hdrp->dstate == VIO_DESC_READY) {
6260 		ready_txd = B_TRUE;
6261 	} else {
6262 		/*
6263 		 * descr next to the end of ack'd descr range is not
6264 		 * ready.
6265 		 * starting from the current reclaim index, check
6266 		 * if any descriptor is ready.
6267 		 */
6268 
6269 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6270 		tbufp = &ldcp->tbufp[txi];
6271 
6272 		txdp = tbufp->descp;
6273 		hdrp = &txdp->hdr;
6274 		if (hdrp->dstate == VIO_DESC_READY) {
6275 			ready_txd = B_TRUE;
6276 		}
6277 
6278 	}
6279 
6280 	if (ready_txd) {
6281 		/*
6282 		 * we have tx descriptor(s) ready to be
6283 		 * processed by the receiver.
6284 		 * send a message to the peer with the start index
6285 		 * of ready descriptors.
6286 		 */
6287 		rv = vgen_send_dring_data(ldcp, txi, -1);
6288 		if (rv != VGEN_SUCCESS) {
6289 			ldcp->resched_peer = B_TRUE;
6290 			ldcp->resched_peer_txi = txi;
6291 			mutex_exit(&ldcp->tclock);
6292 			mutex_exit(&ldcp->wrlock);
6293 			return (rv);
6294 		}
6295 	} else {
6296 		/*
6297 		 * no ready tx descriptors. set the flag to send a
6298 		 * message to peer when tx descriptors are ready in
6299 		 * transmit routine.
6300 		 */
6301 		ldcp->resched_peer = B_TRUE;
6302 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6303 	}
6304 
6305 	mutex_exit(&ldcp->tclock);
6306 	mutex_exit(&ldcp->wrlock);
6307 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6308 	return (rv);
6309 }
6310 
6311 static int
6312 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6313 {
6314 	int rv = 0;
6315 	uint32_t start;
6316 	int32_t end;
6317 	uint32_t txi;
6318 	vnet_public_desc_t *txdp;
6319 	vio_dring_entry_hdr_t *hdrp;
6320 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6321 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6322 
6323 	DBG1(vgenp, ldcp, "enter\n");
6324 	start = dringmsg->start_idx;
6325 	end = dringmsg->end_idx;
6326 
6327 	/*
6328 	 * peer sent a NACK msg to indicate lost packets.
6329 	 * The start and end correspond to the range of descriptors
6330 	 * for which the peer didn't receive a dring data msg and so
6331 	 * didn't receive the corresponding data.
6332 	 */
6333 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6334 
6335 	/* validate start and end indeces in the tx nack msg */
6336 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6337 		/* drop the message if invalid index */
6338 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6339 		    start, end);
6340 		return (rv);
6341 	}
6342 	/* validate dring_ident */
6343 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6344 		/* invalid dring_ident, drop the msg */
6345 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6346 		    dringmsg->dring_ident);
6347 		return (rv);
6348 	}
6349 	mutex_enter(&ldcp->txlock);
6350 	mutex_enter(&ldcp->tclock);
6351 
6352 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6353 		/* no busy descriptors, bogus nack ? */
6354 		mutex_exit(&ldcp->tclock);
6355 		mutex_exit(&ldcp->txlock);
6356 		return (rv);
6357 	}
6358 
6359 	/* we just mark the descrs as done so they can be reclaimed */
6360 	for (txi = start; txi <= end; ) {
6361 		txdp = &(ldcp->txdp[txi]);
6362 		hdrp = &txdp->hdr;
6363 		if (hdrp->dstate == VIO_DESC_READY)
6364 			hdrp->dstate = VIO_DESC_DONE;
6365 		INCR_TXI(txi, ldcp);
6366 	}
6367 	mutex_exit(&ldcp->tclock);
6368 	mutex_exit(&ldcp->txlock);
6369 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6370 	return (rv);
6371 }
6372 
6373 static void
6374 vgen_reclaim(vgen_ldc_t *ldcp)
6375 {
6376 	mutex_enter(&ldcp->tclock);
6377 
6378 	vgen_reclaim_dring(ldcp);
6379 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6380 
6381 	mutex_exit(&ldcp->tclock);
6382 }
6383 
6384 /*
6385  * transmit reclaim function. starting from the current reclaim index
6386  * look for descriptors marked DONE and reclaim the descriptor and the
6387  * corresponding buffers (tbuf).
6388  */
6389 static void
6390 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6391 {
6392 	int count = 0;
6393 	vnet_public_desc_t *txdp;
6394 	vgen_private_desc_t *tbufp;
6395 	vio_dring_entry_hdr_t	*hdrp;
6396 
6397 #ifdef DEBUG
6398 	if (vgen_trigger_txtimeout)
6399 		return;
6400 #endif
6401 
6402 	tbufp = ldcp->cur_tbufp;
6403 	txdp = tbufp->descp;
6404 	hdrp = &txdp->hdr;
6405 
6406 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6407 	    (tbufp != ldcp->next_tbufp)) {
6408 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6409 		hdrp->dstate = VIO_DESC_FREE;
6410 		hdrp->ack = B_FALSE;
6411 
6412 		tbufp = NEXTTBUF(ldcp, tbufp);
6413 		txdp = tbufp->descp;
6414 		hdrp = &txdp->hdr;
6415 		count++;
6416 	}
6417 
6418 	ldcp->cur_tbufp = tbufp;
6419 
6420 	/*
6421 	 * Check if mac layer should be notified to restart transmissions
6422 	 */
6423 	if ((ldcp->need_resched) && (count > 0)) {
6424 		vio_net_tx_update_t vtx_update =
6425 		    ldcp->portp->vcb.vio_net_tx_update;
6426 
6427 		ldcp->need_resched = B_FALSE;
6428 		vtx_update(ldcp->portp->vhp);
6429 	}
6430 }
6431 
6432 /* return the number of pending transmits for the channel */
6433 static int
6434 vgen_num_txpending(vgen_ldc_t *ldcp)
6435 {
6436 	int n;
6437 
6438 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6439 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6440 	} else  {
6441 		/* cur_tbufp > next_tbufp */
6442 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6443 	}
6444 
6445 	return (n);
6446 }
6447 
6448 /* determine if the transmit descriptor ring is full */
6449 static int
6450 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6451 {
6452 	vgen_private_desc_t	*tbufp;
6453 	vgen_private_desc_t	*ntbufp;
6454 
6455 	tbufp = ldcp->next_tbufp;
6456 	ntbufp = NEXTTBUF(ldcp, tbufp);
6457 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6458 		return (VGEN_SUCCESS);
6459 	}
6460 	return (VGEN_FAILURE);
6461 }
6462 
6463 /* determine if timeout condition has occured */
6464 static int
6465 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6466 {
6467 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6468 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6469 	    (vnet_ldcwd_txtimeout) &&
6470 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6471 		return (VGEN_SUCCESS);
6472 	} else {
6473 		return (VGEN_FAILURE);
6474 	}
6475 }
6476 
6477 /* transmit watchdog timeout handler */
6478 static void
6479 vgen_ldc_watchdog(void *arg)
6480 {
6481 	vgen_ldc_t *ldcp;
6482 	vgen_t *vgenp;
6483 	int rv;
6484 
6485 	ldcp = (vgen_ldc_t *)arg;
6486 	vgenp = LDC_TO_VGEN(ldcp);
6487 
6488 	rv = vgen_ldc_txtimeout(ldcp);
6489 	if (rv == VGEN_SUCCESS) {
6490 		DWARN(vgenp, ldcp, "transmit timeout\n");
6491 #ifdef DEBUG
6492 		if (vgen_trigger_txtimeout) {
6493 			/* tx timeout triggered for debugging */
6494 			vgen_trigger_txtimeout = 0;
6495 		}
6496 #endif
6497 		mutex_enter(&ldcp->cblock);
6498 		ldcp->need_ldc_reset = B_TRUE;
6499 		vgen_handshake_retry(ldcp);
6500 		mutex_exit(&ldcp->cblock);
6501 		if (ldcp->need_resched) {
6502 			vio_net_tx_update_t vtx_update =
6503 			    ldcp->portp->vcb.vio_net_tx_update;
6504 
6505 			ldcp->need_resched = B_FALSE;
6506 			vtx_update(ldcp->portp->vhp);
6507 		}
6508 	}
6509 
6510 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6511 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6512 }
6513 
6514 /* handler for error messages received from the peer ldc end-point */
6515 static void
6516 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6517 {
6518 	_NOTE(ARGUNUSED(ldcp, tagp))
6519 }
6520 
6521 static int
6522 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6523 {
6524 	vio_raw_data_msg_t	*rmsg;
6525 	vio_dring_msg_t		*dmsg;
6526 	uint64_t		seq_num;
6527 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6528 
6529 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6530 		dmsg = (vio_dring_msg_t *)tagp;
6531 		seq_num = dmsg->seq_num;
6532 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6533 		rmsg = (vio_raw_data_msg_t *)tagp;
6534 		seq_num = rmsg->seq_num;
6535 	} else {
6536 		return (EINVAL);
6537 	}
6538 
6539 	if (seq_num != ldcp->next_rxseq) {
6540 
6541 		/* seqnums don't match */
6542 		DWARN(vgenp, ldcp,
6543 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6544 		    ldcp->next_rxseq, seq_num);
6545 
6546 		ldcp->need_ldc_reset = B_TRUE;
6547 		return (EINVAL);
6548 
6549 	}
6550 
6551 	ldcp->next_rxseq++;
6552 
6553 	return (0);
6554 }
6555 
6556 /* Check if the session id in the received message is valid */
6557 static int
6558 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6559 {
6560 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6561 
6562 	if (tagp->vio_sid != ldcp->peer_sid) {
6563 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6564 		    ldcp->peer_sid, tagp->vio_sid);
6565 		return (VGEN_FAILURE);
6566 	}
6567 	else
6568 		return (VGEN_SUCCESS);
6569 }
6570 
6571 static caddr_t
6572 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6573 {
6574 	(void) sprintf(ebuf,
6575 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6576 	return (ebuf);
6577 }
6578 
6579 /* Handshake watchdog timeout handler */
6580 static void
6581 vgen_hwatchdog(void *arg)
6582 {
6583 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6584 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6585 
6586 	DWARN(vgenp, ldcp,
6587 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
6588 	    ldcp->hphase, ldcp->hstate);
6589 
6590 	mutex_enter(&ldcp->cblock);
6591 	if (ldcp->cancel_htid) {
6592 		ldcp->cancel_htid = 0;
6593 		mutex_exit(&ldcp->cblock);
6594 		return;
6595 	}
6596 	ldcp->htid = 0;
6597 	ldcp->need_ldc_reset = B_TRUE;
6598 	vgen_handshake_retry(ldcp);
6599 	mutex_exit(&ldcp->cblock);
6600 }
6601 
6602 static void
6603 vgen_print_hparams(vgen_hparams_t *hp)
6604 {
6605 	uint8_t	addr[6];
6606 	char	ea[6];
6607 	ldc_mem_cookie_t *dc;
6608 
6609 	cmn_err(CE_CONT, "version_info:\n");
6610 	cmn_err(CE_CONT,
6611 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6612 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6613 
6614 	vnet_macaddr_ultostr(hp->addr, addr);
6615 	cmn_err(CE_CONT, "attr_info:\n");
6616 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6617 	    vgen_print_ethaddr(addr, ea));
6618 	cmn_err(CE_CONT,
6619 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6620 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6621 
6622 	dc = &hp->dring_cookie;
6623 	cmn_err(CE_CONT, "dring_info:\n");
6624 	cmn_err(CE_CONT,
6625 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6626 	cmn_err(CE_CONT,
6627 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6628 	    dc->addr, dc->size);
6629 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6630 }
6631 
6632 static void
6633 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6634 {
6635 	vgen_hparams_t *hp;
6636 
6637 	cmn_err(CE_CONT, "Channel Information:\n");
6638 	cmn_err(CE_CONT,
6639 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6640 	    ldcp->ldc_id, ldcp->ldc_status);
6641 	cmn_err(CE_CONT,
6642 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6643 	    ldcp->local_sid, ldcp->peer_sid);
6644 	cmn_err(CE_CONT,
6645 	    "\thphase: 0x%x, hstate: 0x%x\n",
6646 	    ldcp->hphase, ldcp->hstate);
6647 
6648 	cmn_err(CE_CONT, "Local handshake params:\n");
6649 	hp = &ldcp->local_hparams;
6650 	vgen_print_hparams(hp);
6651 
6652 	cmn_err(CE_CONT, "Peer handshake params:\n");
6653 	hp = &ldcp->peer_hparams;
6654 	vgen_print_hparams(hp);
6655 }
6656 
6657 /*
6658  * Send received packets up the stack.
6659  */
6660 static void
6661 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
6662 {
6663 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6664 
6665 	if (ldcp->rcv_thread != NULL) {
6666 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
6667 		mutex_exit(&ldcp->rxlock);
6668 	} else {
6669 		ASSERT(MUTEX_HELD(&ldcp->cblock));
6670 		mutex_exit(&ldcp->cblock);
6671 	}
6672 
6673 	vrx_cb(ldcp->portp->vhp, bp);
6674 
6675 	if (ldcp->rcv_thread != NULL) {
6676 		mutex_enter(&ldcp->rxlock);
6677 	} else {
6678 		mutex_enter(&ldcp->cblock);
6679 	}
6680 }
6681 
6682 /*
6683  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
6684  * This thread is woken up by the LDC interrupt handler to process
6685  * LDC packets and receive data.
6686  */
6687 static void
6688 vgen_ldc_rcv_worker(void *arg)
6689 {
6690 	callb_cpr_t	cprinfo;
6691 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6692 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6693 
6694 	DBG1(vgenp, ldcp, "enter\n");
6695 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
6696 	    "vnet_rcv_thread");
6697 	mutex_enter(&ldcp->rcv_thr_lock);
6698 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
6699 
6700 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
6701 		/*
6702 		 * Wait until the data is received or a stop
6703 		 * request is received.
6704 		 */
6705 		while (!(ldcp->rcv_thr_flags &
6706 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
6707 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6708 		}
6709 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
6710 
6711 		/*
6712 		 * First process the stop request.
6713 		 */
6714 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
6715 			DBG2(vgenp, ldcp, "stopped\n");
6716 			break;
6717 		}
6718 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
6719 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
6720 		mutex_exit(&ldcp->rcv_thr_lock);
6721 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
6722 		vgen_handle_evt_read(ldcp);
6723 		mutex_enter(&ldcp->rcv_thr_lock);
6724 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
6725 	}
6726 
6727 	/*
6728 	 * Update the run status and wakeup the thread that
6729 	 * has sent the stop request.
6730 	 */
6731 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP;
6732 	ldcp->rcv_thread = NULL;
6733 	CALLB_CPR_EXIT(&cprinfo);
6734 
6735 	thread_exit();
6736 	DBG1(vgenp, ldcp, "exit\n");
6737 }
6738 
6739 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
6740 static void
6741 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
6742 {
6743 	kt_did_t	tid = 0;
6744 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6745 
6746 	DBG1(vgenp, ldcp, "enter\n");
6747 	/*
6748 	 * Send a stop request by setting the stop flag and
6749 	 * wait until the receive thread stops.
6750 	 */
6751 	mutex_enter(&ldcp->rcv_thr_lock);
6752 	if (ldcp->rcv_thread != NULL) {
6753 		tid = ldcp->rcv_thread->t_did;
6754 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
6755 		cv_signal(&ldcp->rcv_thr_cv);
6756 	}
6757 	mutex_exit(&ldcp->rcv_thr_lock);
6758 
6759 	if (tid != 0) {
6760 		thread_join(tid);
6761 	}
6762 	DBG1(vgenp, ldcp, "exit\n");
6763 }
6764 
6765 /*
6766  * Wait for the channel rx-queue to be drained by allowing the receive
6767  * worker thread to read all messages from the rx-queue of the channel.
6768  * Assumption: further callbacks are disabled at this time.
6769  */
6770 static void
6771 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
6772 {
6773 	clock_t	tm;
6774 	clock_t	wt;
6775 	clock_t	rv;
6776 
6777 	/*
6778 	 * If there is data in ldc rx queue, wait until the rx
6779 	 * worker thread runs and drains all msgs in the queue.
6780 	 */
6781 	wt = drv_usectohz(MILLISEC);
6782 
6783 	mutex_enter(&ldcp->rcv_thr_lock);
6784 
6785 	tm = ddi_get_lbolt() + wt;
6786 
6787 	/*
6788 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
6789 	 * If DATARCVD is set, that means the callback has signalled the worker
6790 	 * thread, but the worker hasn't started processing yet. If PROCESSING
6791 	 * is set, that means the thread is awake and processing. Note that the
6792 	 * DATARCVD state can only be seen once, as the assumption is that
6793 	 * further callbacks have been disabled at this point.
6794 	 */
6795 	while (ldcp->rcv_thr_flags &
6796 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
6797 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
6798 		if (rv == -1) {	/* timeout */
6799 			/*
6800 			 * Note that the only way we return is due to a timeout;
6801 			 * we set the new time to wait, before we go back and
6802 			 * check the condition. The other(unlikely) possibility
6803 			 * is a premature wakeup(see cv_timedwait(9F)) in which
6804 			 * case we just continue to use the same time to wait.
6805 			 */
6806 			tm = ddi_get_lbolt() + wt;
6807 		}
6808 	}
6809 
6810 	mutex_exit(&ldcp->rcv_thr_lock);
6811 }
6812 
6813 /*
6814  * vgen_dds_rx -- post DDS messages to vnet.
6815  */
6816 static int
6817 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6818 {
6819 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
6820 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6821 
6822 	if (dmsg->dds_class != DDS_VNET_NIU) {
6823 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
6824 		return (EBADMSG);
6825 	}
6826 	vnet_dds_rx(vgenp->vnetp, dmsg);
6827 	return (0);
6828 }
6829 
6830 /*
6831  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
6832  */
6833 int
6834 vgen_dds_tx(void *arg, void *msg)
6835 {
6836 	vgen_t *vgenp = arg;
6837 	vio_dds_msg_t *dmsg = msg;
6838 	vgen_portlist_t *plistp = &vgenp->vgenports;
6839 	vgen_ldc_t *ldcp;
6840 	vgen_ldclist_t *ldclp;
6841 	int rv = EIO;
6842 
6843 
6844 	READ_ENTER(&plistp->rwlock);
6845 	ldclp = &(vgenp->vsw_portp->ldclist);
6846 	READ_ENTER(&ldclp->rwlock);
6847 	ldcp = ldclp->headp;
6848 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
6849 		goto vgen_dsend_exit;
6850 	}
6851 
6852 	dmsg->tag.vio_sid = ldcp->local_sid;
6853 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
6854 	if (rv != VGEN_SUCCESS) {
6855 		rv = EIO;
6856 	} else {
6857 		rv = 0;
6858 	}
6859 
6860 vgen_dsend_exit:
6861 	RW_EXIT(&ldclp->rwlock);
6862 	RW_EXIT(&plistp->rwlock);
6863 	return (rv);
6864 
6865 }
6866 
6867 #if DEBUG
6868 
6869 /*
6870  * Print debug messages - set to 0xf to enable all msgs
6871  */
6872 static void
6873 debug_printf(const char *fname, vgen_t *vgenp,
6874     vgen_ldc_t *ldcp, const char *fmt, ...)
6875 {
6876 	char    buf[256];
6877 	char    *bufp = buf;
6878 	va_list ap;
6879 
6880 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
6881 		(void) sprintf(bufp, "vnet%d:",
6882 		    ((vnet_t *)(vgenp->vnetp))->instance);
6883 		bufp += strlen(bufp);
6884 	}
6885 	if (ldcp != NULL) {
6886 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
6887 		bufp += strlen(bufp);
6888 	}
6889 	(void) sprintf(bufp, "%s: ", fname);
6890 	bufp += strlen(bufp);
6891 
6892 	va_start(ap, fmt);
6893 	(void) vsprintf(bufp, fmt, ap);
6894 	va_end(ap);
6895 
6896 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
6897 	    (vgendbg_ldcid == ldcp->ldc_id)) {
6898 		cmn_err(CE_CONT, "%s\n", buf);
6899 	}
6900 }
6901 #endif
6902