xref: /titanic_44/usr/src/uts/sun4v/io/vnet_gen.c (revision 82d5eb48a7d00754517c192ba2dd23cb68997b3c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 int vgen_uninit(void *arg);
77 int vgen_dds_tx(void *arg, void *dmsg);
78 static int vgen_start(void *arg);
79 static void vgen_stop(void *arg);
80 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
81 static int vgen_multicst(void *arg, boolean_t add,
82 	const uint8_t *mca);
83 static int vgen_promisc(void *arg, boolean_t on);
84 static int vgen_unicst(void *arg, const uint8_t *mca);
85 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
86 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
87 
88 /* vgen internal functions */
89 static int vgen_read_mdprops(vgen_t *vgenp);
90 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
91 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
92 	mde_cookie_t node);
93 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
94 	uint32_t *mtu);
95 static void vgen_detach_ports(vgen_t *vgenp);
96 static void vgen_port_detach(vgen_port_t *portp);
97 static void vgen_port_list_insert(vgen_port_t *portp);
98 static void vgen_port_list_remove(vgen_port_t *portp);
99 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
100 	int port_num);
101 static int vgen_mdeg_reg(vgen_t *vgenp);
102 static void vgen_mdeg_unreg(vgen_t *vgenp);
103 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
104 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
105 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
106 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
107 	mde_cookie_t mdex);
108 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
109 static int vgen_port_attach(vgen_port_t *portp);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static void vgen_port_detach_mdeg(vgen_port_t *portp);
112 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
113 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
114 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
115 
116 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
117 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
118 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
120 static void vgen_init_ports(vgen_t *vgenp);
121 static void vgen_port_init(vgen_port_t *portp);
122 static void vgen_uninit_ports(vgen_t *vgenp);
123 static void vgen_port_uninit(vgen_port_t *portp);
124 static void vgen_init_ldcs(vgen_port_t *portp);
125 static void vgen_uninit_ldcs(vgen_port_t *portp);
126 static int vgen_ldc_init(vgen_ldc_t *ldcp);
127 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
128 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
131 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
132 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
133 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
134 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
135 static int vgen_ldcsend(void *arg, mblk_t *mp);
136 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
137 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
138 static void vgen_reclaim(vgen_ldc_t *ldcp);
139 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
140 static int vgen_num_txpending(vgen_ldc_t *ldcp);
141 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
142 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
143 static void vgen_ldc_watchdog(void *arg);
144 
145 /* vgen handshake functions */
146 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
147 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
148 	boolean_t caller_holds_lock);
149 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
150 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
151 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
152 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
153 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
154 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
155 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
156 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
157 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
158 static void vgen_handshake(vgen_ldc_t *ldcp);
159 static int vgen_handshake_done(vgen_ldc_t *ldcp);
160 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
161 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
162 	vio_msg_tag_t *tagp);
163 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
166 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
169 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
170 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
176 	uint32_t start, int32_t end, uint8_t pstate);
177 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
178 	uint32_t msglen);
179 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
181 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
182 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
184 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
185 static void vgen_hwatchdog(void *arg);
186 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
187 static void vgen_print_hparams(vgen_hparams_t *hp);
188 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
189 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
190 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
191 static void vgen_ldc_rcv_worker(void *arg);
192 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
193 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
194 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
195 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
196 
197 /* VLAN routines */
198 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
199 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
200 	uint16_t *nvidsp, uint16_t *default_idp);
201 static void vgen_vlan_create_hash(vgen_port_t *portp);
202 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
203 static void vgen_vlan_add_ids(vgen_port_t *portp);
204 static void vgen_vlan_remove_ids(vgen_port_t *portp);
205 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
206 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
207 	uint16_t *vidp);
208 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
209 	boolean_t is_tagged, uint16_t vid);
210 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
211 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
212 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
213 
214 /* externs */
215 extern void vnet_dds_rx(void *arg, void *dmsg);
216 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
217 
218 /*
219  * The handshake process consists of 5 phases defined below, with VH_PHASE0
220  * being the pre-handshake phase and VH_DONE is the phase to indicate
221  * successful completion of all phases.
222  * Each phase may have one to several handshake states which are required
223  * to complete successfully to move to the next phase.
224  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
225  * more details.
226  */
227 /* handshake phases */
228 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
229 
230 /* handshake states */
231 enum {
232 
233 	VER_INFO_SENT	=	0x1,
234 	VER_ACK_RCVD	=	0x2,
235 	VER_INFO_RCVD	=	0x4,
236 	VER_ACK_SENT	=	0x8,
237 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
238 
239 	ATTR_INFO_SENT	=	0x10,
240 	ATTR_ACK_RCVD	=	0x20,
241 	ATTR_INFO_RCVD	=	0x40,
242 	ATTR_ACK_SENT	=	0x80,
243 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
244 
245 	DRING_INFO_SENT	=	0x100,
246 	DRING_ACK_RCVD	=	0x200,
247 	DRING_INFO_RCVD	=	0x400,
248 	DRING_ACK_SENT	=	0x800,
249 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
250 
251 	RDX_INFO_SENT	=	0x1000,
252 	RDX_ACK_RCVD	=	0x2000,
253 	RDX_INFO_RCVD	=	0x4000,
254 	RDX_ACK_SENT	=	0x8000,
255 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
256 
257 };
258 
259 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
260 
261 #define	LDC_LOCK(ldcp)	\
262 				mutex_enter(&((ldcp)->cblock));\
263 				mutex_enter(&((ldcp)->rxlock));\
264 				mutex_enter(&((ldcp)->wrlock));\
265 				mutex_enter(&((ldcp)->txlock));\
266 				mutex_enter(&((ldcp)->tclock));
267 #define	LDC_UNLOCK(ldcp)	\
268 				mutex_exit(&((ldcp)->tclock));\
269 				mutex_exit(&((ldcp)->txlock));\
270 				mutex_exit(&((ldcp)->wrlock));\
271 				mutex_exit(&((ldcp)->rxlock));\
272 				mutex_exit(&((ldcp)->cblock));
273 
274 #define	VGEN_VER_EQ(ldcp, major, minor)	\
275 	((ldcp)->local_hparams.ver_major == (major) &&	\
276 	    (ldcp)->local_hparams.ver_minor == (minor))
277 
278 #define	VGEN_VER_LT(ldcp, major, minor)	\
279 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
280 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
281 	    (ldcp)->local_hparams.ver_minor < (minor)))
282 
283 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
284 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
285 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
286 	    (ldcp)->local_hparams.ver_minor >= (minor)))
287 
288 static struct ether_addr etherbroadcastaddr = {
289 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
290 };
291 /*
292  * MIB II broadcast/multicast packets
293  */
294 #define	IS_BROADCAST(ehp) \
295 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
296 #define	IS_MULTICAST(ehp) \
297 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
298 
299 /*
300  * Property names
301  */
302 static char macaddr_propname[] = "mac-address";
303 static char rmacaddr_propname[] = "remote-mac-address";
304 static char channel_propname[] = "channel-endpoint";
305 static char reg_propname[] = "reg";
306 static char port_propname[] = "port";
307 static char swport_propname[] = "switch-port";
308 static char id_propname[] = "id";
309 static char vdev_propname[] = "virtual-device";
310 static char vnet_propname[] = "network";
311 static char pri_types_propname[] = "priority-ether-types";
312 static char vgen_pvid_propname[] = "port-vlan-id";
313 static char vgen_vid_propname[] = "vlan-id";
314 static char vgen_dvid_propname[] = "default-vlan-id";
315 static char port_pvid_propname[] = "remote-port-vlan-id";
316 static char port_vid_propname[] = "remote-vlan-id";
317 static char vgen_mtu_propname[] = "mtu";
318 
319 /* versions supported - in decreasing order */
320 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 4} };
321 
322 /* Tunables */
323 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
324 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
325 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
326 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
327 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
328 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
329 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
330 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
331 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
332 
333 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
334 
335 /*
336  * max # of packets accumulated prior to sending them up. It is best
337  * to keep this at 60% of the number of recieve buffers.
338  */
339 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
340 
341 /*
342  * Internal tunables for receive buffer pools, that is,  the size and number of
343  * mblks for each pool. At least 3 sizes must be specified if these are used.
344  * The sizes must be specified in increasing order. Non-zero value of the first
345  * size will be used as a hint to use these values instead of the algorithm
346  * that determines the sizes based on MTU.
347  */
348 uint32_t vgen_rbufsz1 = 0;
349 uint32_t vgen_rbufsz2 = 0;
350 uint32_t vgen_rbufsz3 = 0;
351 uint32_t vgen_rbufsz4 = 0;
352 
353 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
354 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
355 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
356 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
357 
358 /*
359  * In the absence of "priority-ether-types" property in MD, the following
360  * internal tunable can be set to specify a single priority ethertype.
361  */
362 uint64_t vgen_pri_eth_type = 0;
363 
364 /*
365  * Number of transmit priority buffers that are preallocated per device.
366  * This number is chosen to be a small value to throttle transmission
367  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
368  */
369 uint32_t vgen_pri_tx_nmblks = 64;
370 
371 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
372 
373 #ifdef DEBUG
374 /* flags to simulate error conditions for debugging */
375 int vgen_trigger_txtimeout = 0;
376 int vgen_trigger_rxlost = 0;
377 #endif
378 
379 /*
380  * Matching criteria passed to the MDEG to register interest
381  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
382  * by their 'name' and 'cfg-handle' properties.
383  */
384 static md_prop_match_t vdev_prop_match[] = {
385 	{ MDET_PROP_STR,    "name"   },
386 	{ MDET_PROP_VAL,    "cfg-handle" },
387 	{ MDET_LIST_END,    NULL    }
388 };
389 
390 static mdeg_node_match_t vdev_match = { "virtual-device",
391 						vdev_prop_match };
392 
393 /* MD update matching structure */
394 static md_prop_match_t	vport_prop_match[] = {
395 	{ MDET_PROP_VAL,	"id" },
396 	{ MDET_LIST_END,	NULL }
397 };
398 
399 static mdeg_node_match_t vport_match = { "virtual-device-port",
400 					vport_prop_match };
401 
402 /* template for matching a particular vnet instance */
403 static mdeg_prop_spec_t vgen_prop_template[] = {
404 	{ MDET_PROP_STR,	"name",		"network" },
405 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
406 	{ MDET_LIST_END,	NULL,		NULL }
407 };
408 
409 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
410 
411 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
412 
413 static mac_callbacks_t vgen_m_callbacks = {
414 	0,
415 	vgen_stat,
416 	vgen_start,
417 	vgen_stop,
418 	vgen_promisc,
419 	vgen_multicst,
420 	vgen_unicst,
421 	vgen_tx,
422 	NULL,
423 	NULL,
424 	NULL
425 };
426 
427 /* externs */
428 extern pri_t	maxclsyspri;
429 extern proc_t	p0;
430 extern uint32_t vnet_ntxds;
431 extern uint32_t vnet_ldcwd_interval;
432 extern uint32_t vnet_ldcwd_txtimeout;
433 extern uint32_t vnet_ldc_mtu;
434 extern uint32_t vnet_nrbufs;
435 extern uint32_t	vnet_ethermtu;
436 extern uint16_t	vnet_default_vlan_id;
437 extern boolean_t vnet_jumbo_rxpools;
438 
439 #ifdef DEBUG
440 
441 extern int vnet_dbglevel;
442 static void debug_printf(const char *fname, vgen_t *vgenp,
443 	vgen_ldc_t *ldcp, const char *fmt, ...);
444 
445 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
446 int vgendbg_ldcid = -1;
447 
448 /* simulate handshake error conditions for debug */
449 uint32_t vgen_hdbg;
450 #define	HDBG_VERSION	0x1
451 #define	HDBG_TIMEOUT	0x2
452 #define	HDBG_BAD_SID	0x4
453 #define	HDBG_OUT_STATE	0x8
454 
455 #endif
456 
457 /*
458  * vgen_init() is called by an instance of vnet driver to initialize the
459  * corresponding generic proxy transport layer. The arguments passed by vnet
460  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
461  * the mac address of the vnet device, and a pointer to vgen_t is passed
462  * back as a handle to vnet.
463  */
464 int
465 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
466     const uint8_t *macaddr, void **vgenhdl)
467 {
468 	vgen_t *vgenp;
469 	int instance;
470 	int rv;
471 
472 	if ((vnetp == NULL) || (vnetdip == NULL))
473 		return (DDI_FAILURE);
474 
475 	instance = ddi_get_instance(vnetdip);
476 
477 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
478 
479 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
480 
481 	vgenp->vnetp = vnetp;
482 	vgenp->instance = instance;
483 	vgenp->regprop = regprop;
484 	vgenp->vnetdip = vnetdip;
485 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
486 
487 	/* allocate multicast table */
488 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
489 	    sizeof (struct ether_addr), KM_SLEEP);
490 	vgenp->mccount = 0;
491 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
492 
493 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
494 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
495 
496 	rv = vgen_read_mdprops(vgenp);
497 	if (rv != 0) {
498 		goto vgen_init_fail;
499 	}
500 
501 	/* register with MD event generator */
502 	rv = vgen_mdeg_reg(vgenp);
503 	if (rv != DDI_SUCCESS) {
504 		goto vgen_init_fail;
505 	}
506 
507 	*vgenhdl = (void *)vgenp;
508 
509 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
510 	return (DDI_SUCCESS);
511 
512 vgen_init_fail:
513 	rw_destroy(&vgenp->vgenports.rwlock);
514 	mutex_destroy(&vgenp->lock);
515 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
516 	    sizeof (struct ether_addr));
517 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
518 		kmem_free(vgenp->pri_types,
519 		    sizeof (uint16_t) * vgenp->pri_num_types);
520 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
521 	}
522 	KMEM_FREE(vgenp);
523 	return (DDI_FAILURE);
524 }
525 
526 /*
527  * Called by vnet to undo the initializations done by vgen_init().
528  * The handle provided by generic transport during vgen_init() is the argument.
529  */
530 int
531 vgen_uninit(void *arg)
532 {
533 	vgen_t		*vgenp = (vgen_t *)arg;
534 	vio_mblk_pool_t	*rp;
535 	vio_mblk_pool_t	*nrp;
536 
537 	if (vgenp == NULL) {
538 		return (DDI_FAILURE);
539 	}
540 
541 	DBG1(vgenp, NULL, "enter\n");
542 
543 	/* unregister with MD event generator */
544 	vgen_mdeg_unreg(vgenp);
545 
546 	mutex_enter(&vgenp->lock);
547 
548 	/* detach all ports from the device */
549 	vgen_detach_ports(vgenp);
550 
551 	/*
552 	 * free any pending rx mblk pools,
553 	 * that couldn't be freed previously during channel detach.
554 	 */
555 	rp = vgenp->rmp;
556 	while (rp != NULL) {
557 		nrp = vgenp->rmp = rp->nextp;
558 		if (vio_destroy_mblks(rp)) {
559 			vgenp->rmp = rp;
560 			mutex_exit(&vgenp->lock);
561 			return (DDI_FAILURE);
562 		}
563 		rp = nrp;
564 	}
565 
566 	/* free multicast table */
567 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
568 
569 	/* free pri_types table */
570 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
571 		kmem_free(vgenp->pri_types,
572 		    sizeof (uint16_t) * vgenp->pri_num_types);
573 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
574 	}
575 
576 	mutex_exit(&vgenp->lock);
577 
578 	rw_destroy(&vgenp->vgenports.rwlock);
579 	mutex_destroy(&vgenp->lock);
580 
581 	DBG1(vgenp, NULL, "exit\n");
582 	KMEM_FREE(vgenp);
583 
584 	return (DDI_SUCCESS);
585 }
586 
587 /* enable transmit/receive for the device */
588 int
589 vgen_start(void *arg)
590 {
591 	vgen_port_t	*portp = (vgen_port_t *)arg;
592 	vgen_t		*vgenp = portp->vgenp;
593 
594 	DBG1(vgenp, NULL, "enter\n");
595 	mutex_enter(&portp->lock);
596 	vgen_port_init(portp);
597 	portp->flags |= VGEN_STARTED;
598 	mutex_exit(&portp->lock);
599 	DBG1(vgenp, NULL, "exit\n");
600 
601 	return (DDI_SUCCESS);
602 }
603 
604 /* stop transmit/receive */
605 void
606 vgen_stop(void *arg)
607 {
608 	vgen_port_t	*portp = (vgen_port_t *)arg;
609 	vgen_t		*vgenp = portp->vgenp;
610 
611 	DBG1(vgenp, NULL, "enter\n");
612 
613 	mutex_enter(&portp->lock);
614 	vgen_port_uninit(portp);
615 	portp->flags &= ~(VGEN_STARTED);
616 	mutex_exit(&portp->lock);
617 	DBG1(vgenp, NULL, "exit\n");
618 
619 }
620 
621 /* vgen transmit function */
622 static mblk_t *
623 vgen_tx(void *arg, mblk_t *mp)
624 {
625 	int i;
626 	vgen_port_t *portp;
627 	int status = VGEN_FAILURE;
628 
629 	portp = (vgen_port_t *)arg;
630 	/*
631 	 * Retry so that we avoid reporting a failure
632 	 * to the upper layer. Returning a failure may cause the
633 	 * upper layer to go into single threaded mode there by
634 	 * causing performance degradation, especially for a large
635 	 * number of connections.
636 	 */
637 	for (i = 0; i < vgen_tx_retries; ) {
638 		status = vgen_portsend(portp, mp);
639 		if (status == VGEN_SUCCESS) {
640 			break;
641 		}
642 		if (++i < vgen_tx_retries)
643 			delay(drv_usectohz(vgen_tx_delay));
644 	}
645 	if (status != VGEN_SUCCESS) {
646 		/* failure */
647 		return (mp);
648 	}
649 	/* success */
650 	return (NULL);
651 }
652 
653 /*
654  * This function provides any necessary tagging/untagging of the frames
655  * that are being transmitted over the port. It first verifies the vlan
656  * membership of the destination(port) and drops the packet if the
657  * destination doesn't belong to the given vlan.
658  *
659  * Arguments:
660  *   portp:     port over which the frames should be transmitted
661  *   mp:        frame to be transmitted
662  *   is_tagged:
663  *              B_TRUE: indicates frame header contains the vlan tag already.
664  *              B_FALSE: indicates frame is untagged.
665  *   vid:       vlan in which the frame should be transmitted.
666  *
667  * Returns:
668  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
669  *              Failure: NULL
670  */
671 static mblk_t *
672 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
673 	uint16_t vid)
674 {
675 	vgen_t				*vgenp;
676 	boolean_t			dst_tagged;
677 	int				rv;
678 
679 	vgenp = portp->vgenp;
680 
681 	/*
682 	 * If the packet is going to a vnet:
683 	 *   Check if the destination vnet is in the same vlan.
684 	 *   Check the frame header if tag or untag is needed.
685 	 *
686 	 * We do not check the above conditions if the packet is going to vsw:
687 	 *   vsw must be present implicitly in all the vlans that a vnet device
688 	 *   is configured into; even if vsw itself is not assigned to those
689 	 *   vlans as an interface. For instance, the packet might be destined
690 	 *   to another vnet(indirectly through vsw) or to an external host
691 	 *   which is in the same vlan as this vnet and vsw itself may not be
692 	 *   present in that vlan. Similarly packets going to vsw must be
693 	 *   always tagged(unless in the default-vlan) if not already tagged,
694 	 *   as we do not know the final destination. This is needed because
695 	 *   vsw must always invoke its switching function only after tagging
696 	 *   the packet; otherwise after switching function determines the
697 	 *   destination we cannot figure out if the destination belongs to the
698 	 *   the same vlan that the frame originated from and if it needs tag/
699 	 *   untag. Note that vsw will tag the packet itself when it receives
700 	 *   it over the channel from a client if needed. However, that is
701 	 *   needed only in the case of vlan unaware clients such as obp or
702 	 *   earlier versions of vnet.
703 	 *
704 	 */
705 	if (portp != vgenp->vsw_portp) {
706 		/*
707 		 * Packet going to a vnet. Check if the destination vnet is in
708 		 * the same vlan. Then check the frame header if tag/untag is
709 		 * needed.
710 		 */
711 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
712 		if (rv == B_FALSE) {
713 			/* drop the packet */
714 			freemsg(mp);
715 			return (NULL);
716 		}
717 
718 		/* is the destination tagged or untagged in this vlan? */
719 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
720 		    (dst_tagged = B_TRUE);
721 
722 		if (is_tagged == dst_tagged) {
723 			/* no tagging/untagging needed */
724 			return (mp);
725 		}
726 
727 		if (is_tagged == B_TRUE) {
728 			/* frame is tagged; destination needs untagged */
729 			mp = vnet_vlan_remove_tag(mp);
730 			return (mp);
731 		}
732 
733 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
734 	}
735 
736 	/*
737 	 * Packet going to a vnet needs tagging.
738 	 * OR
739 	 * If the packet is going to vsw, then it must be tagged in all cases:
740 	 * unknown unicast, broadcast/multicast or to vsw interface.
741 	 */
742 
743 	if (is_tagged == B_FALSE) {
744 		mp = vnet_vlan_insert_tag(mp, vid);
745 	}
746 
747 	return (mp);
748 }
749 
750 /* transmit packets over the given port */
751 static int
752 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
753 {
754 	vgen_ldclist_t		*ldclp;
755 	vgen_ldc_t		*ldcp;
756 	int			status;
757 	int			rv = VGEN_SUCCESS;
758 	vgen_t			*vgenp = portp->vgenp;
759 	vnet_t			*vnetp = vgenp->vnetp;
760 	boolean_t		is_tagged;
761 	boolean_t		dec_refcnt = B_FALSE;
762 	uint16_t		vlan_id;
763 	struct ether_header	*ehp;
764 
765 	if (portp->use_vsw_port) {
766 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
767 		portp = portp->vgenp->vsw_portp;
768 		dec_refcnt = B_TRUE;
769 	}
770 	if (portp == NULL) {
771 		return (VGEN_FAILURE);
772 	}
773 
774 	/*
775 	 * Determine the vlan id that the frame belongs to.
776 	 */
777 	ehp = (struct ether_header *)mp->b_rptr;
778 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
779 
780 	if (vlan_id == vnetp->default_vlan_id) {
781 
782 		/* Frames in default vlan must be untagged */
783 		ASSERT(is_tagged == B_FALSE);
784 
785 		/*
786 		 * If the destination is a vnet-port verify it belongs to the
787 		 * default vlan; otherwise drop the packet. We do not need
788 		 * this check for vsw-port, as it should implicitly belong to
789 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
790 		 */
791 		if (portp != vgenp->vsw_portp &&
792 		    portp->pvid != vnetp->default_vlan_id) {
793 			freemsg(mp);
794 			goto portsend_ret;
795 		}
796 
797 	} else {	/* frame not in default-vlan */
798 
799 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
800 		if (mp == NULL) {
801 			goto portsend_ret;
802 		}
803 
804 	}
805 
806 	ldclp = &portp->ldclist;
807 	READ_ENTER(&ldclp->rwlock);
808 	/*
809 	 * NOTE: for now, we will assume we have a single channel.
810 	 */
811 	if (ldclp->headp == NULL) {
812 		RW_EXIT(&ldclp->rwlock);
813 		rv = VGEN_FAILURE;
814 		goto portsend_ret;
815 	}
816 	ldcp = ldclp->headp;
817 
818 	status = ldcp->tx(ldcp, mp);
819 
820 	RW_EXIT(&ldclp->rwlock);
821 
822 	if (status != VGEN_TX_SUCCESS) {
823 		rv = VGEN_FAILURE;
824 	}
825 
826 portsend_ret:
827 	if (dec_refcnt == B_TRUE) {
828 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
829 	}
830 	return (rv);
831 }
832 
833 /*
834  * Wrapper function to transmit normal and/or priority frames over the channel.
835  */
836 static int
837 vgen_ldcsend(void *arg, mblk_t *mp)
838 {
839 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
840 	int			status;
841 	struct ether_header	*ehp;
842 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
843 	uint32_t		num_types;
844 	uint16_t		*types;
845 	int			i;
846 
847 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
848 
849 	num_types = vgenp->pri_num_types;
850 	types = vgenp->pri_types;
851 	ehp = (struct ether_header *)mp->b_rptr;
852 
853 	for (i = 0; i < num_types; i++) {
854 
855 		if (ehp->ether_type == types[i]) {
856 			/* priority frame, use pri tx function */
857 			vgen_ldcsend_pkt(ldcp, mp);
858 			return (VGEN_SUCCESS);
859 		}
860 
861 	}
862 
863 	status  = vgen_ldcsend_dring(ldcp, mp);
864 
865 	return (status);
866 }
867 
868 /*
869  * This functions handles ldc channel reset while in the context
870  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
871  */
872 static void
873 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
874 {
875 	ldc_status_t	istatus;
876 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
877 
878 	if (mutex_tryenter(&ldcp->cblock)) {
879 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
880 			DWARN(vgenp, ldcp, "ldc_status() error\n");
881 		} else {
882 			ldcp->ldc_status = istatus;
883 		}
884 		if (ldcp->ldc_status != LDC_UP) {
885 			vgen_handle_evt_reset(ldcp);
886 		}
887 		mutex_exit(&ldcp->cblock);
888 	}
889 }
890 
891 /*
892  * This function transmits the frame in the payload of a raw data
893  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
894  * send special frames with high priorities, without going through
895  * the normal data path which uses descriptor ring mechanism.
896  */
897 static void
898 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
899 {
900 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
901 	vio_raw_data_msg_t	*pkt;
902 	mblk_t			*bp;
903 	mblk_t			*nmp = NULL;
904 	caddr_t			dst;
905 	uint32_t		mblksz;
906 	uint32_t		size;
907 	uint32_t		nbytes;
908 	int			rv;
909 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
910 	vgen_stats_t		*statsp = &ldcp->stats;
911 
912 	/* drop the packet if ldc is not up or handshake is not done */
913 	if (ldcp->ldc_status != LDC_UP) {
914 		(void) atomic_inc_32(&statsp->tx_pri_fail);
915 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
916 		    ldcp->ldc_status);
917 		goto send_pkt_exit;
918 	}
919 
920 	if (ldcp->hphase != VH_DONE) {
921 		(void) atomic_inc_32(&statsp->tx_pri_fail);
922 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
923 		    ldcp->hphase);
924 		goto send_pkt_exit;
925 	}
926 
927 	size = msgsize(mp);
928 
929 	/* frame size bigger than available payload len of raw data msg ? */
930 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
931 		(void) atomic_inc_32(&statsp->tx_pri_fail);
932 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
933 		goto send_pkt_exit;
934 	}
935 
936 	if (size < ETHERMIN)
937 		size = ETHERMIN;
938 
939 	/* alloc space for a raw data message */
940 	nmp = vio_allocb(vgenp->pri_tx_vmp);
941 	if (nmp == NULL) {
942 		(void) atomic_inc_32(&statsp->tx_pri_fail);
943 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
944 		goto send_pkt_exit;
945 	}
946 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
947 
948 	/* copy frame into the payload of raw data message */
949 	dst = (caddr_t)pkt->data;
950 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
951 		mblksz = MBLKL(bp);
952 		bcopy(bp->b_rptr, dst, mblksz);
953 		dst += mblksz;
954 	}
955 
956 	/* setup the raw data msg */
957 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
958 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
959 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
960 	pkt->tag.vio_sid = ldcp->local_sid;
961 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
962 
963 	/* send the msg over ldc */
964 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
965 	if (rv != VGEN_SUCCESS) {
966 		(void) atomic_inc_32(&statsp->tx_pri_fail);
967 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
968 		if (rv == ECONNRESET) {
969 			vgen_ldcsend_process_reset(ldcp);
970 		}
971 		goto send_pkt_exit;
972 	}
973 
974 	/* update stats */
975 	(void) atomic_inc_64(&statsp->tx_pri_packets);
976 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
977 
978 send_pkt_exit:
979 	if (nmp != NULL)
980 		freemsg(nmp);
981 	freemsg(mp);
982 }
983 
984 /*
985  * This function transmits normal (non-priority) data frames over
986  * the channel. It queues the frame into the transmit descriptor ring
987  * and sends a VIO_DRING_DATA message if needed, to wake up the
988  * peer to (re)start processing.
989  */
990 static int
991 vgen_ldcsend_dring(void *arg, mblk_t *mp)
992 {
993 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
994 	vgen_private_desc_t	*tbufp;
995 	vgen_private_desc_t	*rtbufp;
996 	vnet_public_desc_t	*rtxdp;
997 	vgen_private_desc_t	*ntbufp;
998 	vnet_public_desc_t	*txdp;
999 	vio_dring_entry_hdr_t	*hdrp;
1000 	vgen_stats_t		*statsp;
1001 	struct ether_header	*ehp;
1002 	boolean_t		is_bcast = B_FALSE;
1003 	boolean_t		is_mcast = B_FALSE;
1004 	size_t			mblksz;
1005 	caddr_t			dst;
1006 	mblk_t			*bp;
1007 	size_t			size;
1008 	int			rv = 0;
1009 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1010 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1011 
1012 	statsp = &ldcp->stats;
1013 	size = msgsize(mp);
1014 
1015 	DBG1(vgenp, ldcp, "enter\n");
1016 
1017 	if (ldcp->ldc_status != LDC_UP) {
1018 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1019 		    ldcp->ldc_status);
1020 		/* retry ldc_up() if needed */
1021 		if (ldcp->flags & CHANNEL_STARTED)
1022 			(void) ldc_up(ldcp->ldc_handle);
1023 		goto send_dring_exit;
1024 	}
1025 
1026 	/* drop the packet if ldc is not up or handshake is not done */
1027 	if (ldcp->hphase != VH_DONE) {
1028 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1029 		    ldcp->hphase);
1030 		goto send_dring_exit;
1031 	}
1032 
1033 	if (size > (size_t)lp->mtu) {
1034 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1035 		goto send_dring_exit;
1036 	}
1037 	if (size < ETHERMIN)
1038 		size = ETHERMIN;
1039 
1040 	ehp = (struct ether_header *)mp->b_rptr;
1041 	is_bcast = IS_BROADCAST(ehp);
1042 	is_mcast = IS_MULTICAST(ehp);
1043 
1044 	mutex_enter(&ldcp->txlock);
1045 	/*
1046 	 * allocate a descriptor
1047 	 */
1048 	tbufp = ldcp->next_tbufp;
1049 	ntbufp = NEXTTBUF(ldcp, tbufp);
1050 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1051 
1052 		mutex_enter(&ldcp->tclock);
1053 		/* Try reclaiming now */
1054 		vgen_reclaim_dring(ldcp);
1055 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1056 
1057 		if (ntbufp == ldcp->cur_tbufp) {
1058 			/* Now we are really out of tbuf/txds */
1059 			ldcp->need_resched = B_TRUE;
1060 			mutex_exit(&ldcp->tclock);
1061 
1062 			statsp->tx_no_desc++;
1063 			mutex_exit(&ldcp->txlock);
1064 
1065 			return (VGEN_TX_NORESOURCES);
1066 		}
1067 		mutex_exit(&ldcp->tclock);
1068 	}
1069 	/* update next available tbuf in the ring and update tx index */
1070 	ldcp->next_tbufp = ntbufp;
1071 	INCR_TXI(ldcp->next_txi, ldcp);
1072 
1073 	/* Mark the buffer busy before releasing the lock */
1074 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1075 	mutex_exit(&ldcp->txlock);
1076 
1077 	/* copy data into pre-allocated transmit buffer */
1078 	dst = tbufp->datap + VNET_IPALIGN;
1079 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1080 		mblksz = MBLKL(bp);
1081 		bcopy(bp->b_rptr, dst, mblksz);
1082 		dst += mblksz;
1083 	}
1084 
1085 	tbufp->datalen = size;
1086 
1087 	/* initialize the corresponding public descriptor (txd) */
1088 	txdp = tbufp->descp;
1089 	hdrp = &txdp->hdr;
1090 	txdp->nbytes = size;
1091 	txdp->ncookies = tbufp->ncookies;
1092 	bcopy((tbufp->memcookie), (txdp->memcookie),
1093 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1094 
1095 	mutex_enter(&ldcp->wrlock);
1096 	/*
1097 	 * If the flags not set to BUSY, it implies that the clobber
1098 	 * was done while we were copying the data. In such case,
1099 	 * discard the packet and return.
1100 	 */
1101 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1102 		statsp->oerrors++;
1103 		mutex_exit(&ldcp->wrlock);
1104 		goto send_dring_exit;
1105 	}
1106 	hdrp->dstate = VIO_DESC_READY;
1107 
1108 	/* update stats */
1109 	statsp->opackets++;
1110 	statsp->obytes += size;
1111 	if (is_bcast)
1112 		statsp->brdcstxmt++;
1113 	else if (is_mcast)
1114 		statsp->multixmt++;
1115 
1116 	/* send dring datamsg to the peer */
1117 	if (ldcp->resched_peer) {
1118 
1119 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1120 		rtxdp = rtbufp->descp;
1121 
1122 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1123 
1124 			rv = vgen_send_dring_data(ldcp,
1125 			    (uint32_t)ldcp->resched_peer_txi, -1);
1126 			if (rv != 0) {
1127 				/* error: drop the packet */
1128 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1129 				    "failed: rv(%d) len(%d)\n",
1130 				    ldcp->ldc_id, rv, size);
1131 				statsp->oerrors++;
1132 			} else {
1133 				ldcp->resched_peer = B_FALSE;
1134 			}
1135 
1136 		}
1137 
1138 	}
1139 
1140 	mutex_exit(&ldcp->wrlock);
1141 
1142 send_dring_exit:
1143 	if (rv == ECONNRESET) {
1144 		vgen_ldcsend_process_reset(ldcp);
1145 	}
1146 	freemsg(mp);
1147 	DBG1(vgenp, ldcp, "exit\n");
1148 	return (VGEN_TX_SUCCESS);
1149 }
1150 
1151 /* enable/disable a multicast address */
1152 int
1153 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1154 {
1155 	vgen_t			*vgenp;
1156 	vnet_mcast_msg_t	mcastmsg;
1157 	vio_msg_tag_t		*tagp;
1158 	vgen_port_t		*portp;
1159 	vgen_portlist_t		*plistp;
1160 	vgen_ldc_t		*ldcp;
1161 	vgen_ldclist_t		*ldclp;
1162 	struct ether_addr	*addrp;
1163 	int			rv = DDI_FAILURE;
1164 	uint32_t		i;
1165 
1166 	portp = (vgen_port_t *)arg;
1167 	vgenp = portp->vgenp;
1168 
1169 	if (portp != vgenp->vsw_portp) {
1170 		return (DDI_SUCCESS);
1171 	}
1172 
1173 	addrp = (struct ether_addr *)mca;
1174 	tagp = &mcastmsg.tag;
1175 	bzero(&mcastmsg, sizeof (mcastmsg));
1176 
1177 	mutex_enter(&vgenp->lock);
1178 
1179 	plistp = &(vgenp->vgenports);
1180 
1181 	READ_ENTER(&plistp->rwlock);
1182 
1183 	portp = vgenp->vsw_portp;
1184 	if (portp == NULL) {
1185 		RW_EXIT(&plistp->rwlock);
1186 		mutex_exit(&vgenp->lock);
1187 		return (rv);
1188 	}
1189 	ldclp = &portp->ldclist;
1190 
1191 	READ_ENTER(&ldclp->rwlock);
1192 
1193 	ldcp = ldclp->headp;
1194 	if (ldcp == NULL)
1195 		goto vgen_mcast_exit;
1196 
1197 	mutex_enter(&ldcp->cblock);
1198 
1199 	if (ldcp->hphase == VH_DONE) {
1200 		/*
1201 		 * If handshake is done, send a msg to vsw to add/remove
1202 		 * the multicast address. Otherwise, we just update this
1203 		 * mcast address in our table and the table will be sync'd
1204 		 * with vsw when handshake completes.
1205 		 */
1206 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1207 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1208 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1209 		tagp->vio_sid = ldcp->local_sid;
1210 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1211 		mcastmsg.set = add;
1212 		mcastmsg.count = 1;
1213 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1214 		    B_FALSE) != VGEN_SUCCESS) {
1215 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1216 			mutex_exit(&ldcp->cblock);
1217 			goto vgen_mcast_exit;
1218 		}
1219 	}
1220 
1221 	mutex_exit(&ldcp->cblock);
1222 
1223 	if (add) {
1224 
1225 		/* expand multicast table if necessary */
1226 		if (vgenp->mccount >= vgenp->mcsize) {
1227 			struct ether_addr	*newtab;
1228 			uint32_t		newsize;
1229 
1230 
1231 			newsize = vgenp->mcsize * 2;
1232 
1233 			newtab = kmem_zalloc(newsize *
1234 			    sizeof (struct ether_addr), KM_NOSLEEP);
1235 			if (newtab == NULL)
1236 				goto vgen_mcast_exit;
1237 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1238 			    sizeof (struct ether_addr));
1239 			kmem_free(vgenp->mctab,
1240 			    vgenp->mcsize * sizeof (struct ether_addr));
1241 
1242 			vgenp->mctab = newtab;
1243 			vgenp->mcsize = newsize;
1244 		}
1245 
1246 		/* add address to the table */
1247 		vgenp->mctab[vgenp->mccount++] = *addrp;
1248 
1249 	} else {
1250 
1251 		/* delete address from the table */
1252 		for (i = 0; i < vgenp->mccount; i++) {
1253 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1254 
1255 				/*
1256 				 * If there's more than one address in this
1257 				 * table, delete the unwanted one by moving
1258 				 * the last one in the list over top of it;
1259 				 * otherwise, just remove it.
1260 				 */
1261 				if (vgenp->mccount > 1) {
1262 					vgenp->mctab[i] =
1263 					    vgenp->mctab[vgenp->mccount-1];
1264 				}
1265 				vgenp->mccount--;
1266 				break;
1267 			}
1268 		}
1269 	}
1270 
1271 	rv = DDI_SUCCESS;
1272 
1273 vgen_mcast_exit:
1274 	RW_EXIT(&ldclp->rwlock);
1275 	RW_EXIT(&plistp->rwlock);
1276 
1277 	mutex_exit(&vgenp->lock);
1278 	return (rv);
1279 }
1280 
1281 /* set or clear promiscuous mode on the device */
1282 static int
1283 vgen_promisc(void *arg, boolean_t on)
1284 {
1285 	_NOTE(ARGUNUSED(arg, on))
1286 	return (DDI_SUCCESS);
1287 }
1288 
1289 /* set the unicast mac address of the device */
1290 static int
1291 vgen_unicst(void *arg, const uint8_t *mca)
1292 {
1293 	_NOTE(ARGUNUSED(arg, mca))
1294 	return (DDI_SUCCESS);
1295 }
1296 
1297 /* get device statistics */
1298 int
1299 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1300 {
1301 	vgen_port_t	*portp = (vgen_port_t *)arg;
1302 
1303 	*val = vgen_port_stat(portp, stat);
1304 
1305 	return (0);
1306 }
1307 
1308 static void
1309 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1310 {
1311 	 _NOTE(ARGUNUSED(arg, wq, mp))
1312 }
1313 
1314 /* vgen internal functions */
1315 /* detach all ports from the device */
1316 static void
1317 vgen_detach_ports(vgen_t *vgenp)
1318 {
1319 	vgen_port_t	*portp;
1320 	vgen_portlist_t	*plistp;
1321 
1322 	plistp = &(vgenp->vgenports);
1323 	WRITE_ENTER(&plistp->rwlock);
1324 
1325 	while ((portp = plistp->headp) != NULL) {
1326 		vgen_port_detach(portp);
1327 	}
1328 
1329 	RW_EXIT(&plistp->rwlock);
1330 }
1331 
1332 /*
1333  * detach the given port.
1334  */
1335 static void
1336 vgen_port_detach(vgen_port_t *portp)
1337 {
1338 	vgen_t		*vgenp;
1339 	vgen_ldclist_t	*ldclp;
1340 	int		port_num;
1341 
1342 	vgenp = portp->vgenp;
1343 	port_num = portp->port_num;
1344 
1345 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1346 
1347 	/*
1348 	 * If this port is connected to the vswitch, then
1349 	 * potentially there could be ports that may be using
1350 	 * this port to transmit packets. To address this do
1351 	 * the following:
1352 	 *	- First set vgenp->vsw_portp to NULL, so that
1353 	 *	  its not used after that.
1354 	 *	- Then wait for the refcnt to go down to 0.
1355 	 *	- Now we can safely detach this port.
1356 	 */
1357 	if (vgenp->vsw_portp == portp) {
1358 		vgenp->vsw_portp = NULL;
1359 		while (vgenp->vsw_port_refcnt > 0) {
1360 			delay(drv_usectohz(vgen_tx_delay));
1361 		}
1362 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1363 	}
1364 
1365 	if (portp->vhp != NULL) {
1366 		vio_net_resource_unreg(portp->vhp);
1367 		portp->vhp = NULL;
1368 	}
1369 
1370 	vgen_vlan_destroy_hash(portp);
1371 
1372 	/* remove it from port list */
1373 	vgen_port_list_remove(portp);
1374 
1375 	/* detach channels from this port */
1376 	ldclp = &portp->ldclist;
1377 	WRITE_ENTER(&ldclp->rwlock);
1378 	while (ldclp->headp) {
1379 		vgen_ldc_detach(ldclp->headp);
1380 	}
1381 	RW_EXIT(&ldclp->rwlock);
1382 	rw_destroy(&ldclp->rwlock);
1383 
1384 	if (portp->num_ldcs != 0) {
1385 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1386 		portp->num_ldcs = 0;
1387 	}
1388 
1389 	mutex_destroy(&portp->lock);
1390 	KMEM_FREE(portp);
1391 
1392 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1393 }
1394 
1395 /* add a port to port list */
1396 static void
1397 vgen_port_list_insert(vgen_port_t *portp)
1398 {
1399 	vgen_portlist_t *plistp;
1400 	vgen_t *vgenp;
1401 
1402 	vgenp = portp->vgenp;
1403 	plistp = &(vgenp->vgenports);
1404 
1405 	if (plistp->headp == NULL) {
1406 		plistp->headp = portp;
1407 	} else {
1408 		plistp->tailp->nextp = portp;
1409 	}
1410 	plistp->tailp = portp;
1411 	portp->nextp = NULL;
1412 }
1413 
1414 /* remove a port from port list */
1415 static void
1416 vgen_port_list_remove(vgen_port_t *portp)
1417 {
1418 	vgen_port_t *prevp;
1419 	vgen_port_t *nextp;
1420 	vgen_portlist_t *plistp;
1421 	vgen_t *vgenp;
1422 
1423 	vgenp = portp->vgenp;
1424 
1425 	plistp = &(vgenp->vgenports);
1426 
1427 	if (plistp->headp == NULL)
1428 		return;
1429 
1430 	if (portp == plistp->headp) {
1431 		plistp->headp = portp->nextp;
1432 		if (portp == plistp->tailp)
1433 			plistp->tailp = plistp->headp;
1434 	} else {
1435 		for (prevp = plistp->headp;
1436 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1437 		    prevp = nextp)
1438 			;
1439 		if (nextp == portp) {
1440 			prevp->nextp = portp->nextp;
1441 		}
1442 		if (portp == plistp->tailp)
1443 			plistp->tailp = prevp;
1444 	}
1445 }
1446 
1447 /* lookup a port in the list based on port_num */
1448 static vgen_port_t *
1449 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1450 {
1451 	vgen_port_t *portp = NULL;
1452 
1453 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1454 		if (portp->port_num == port_num) {
1455 			break;
1456 		}
1457 	}
1458 
1459 	return (portp);
1460 }
1461 
1462 /* enable ports for transmit/receive */
1463 static void
1464 vgen_init_ports(vgen_t *vgenp)
1465 {
1466 	vgen_port_t	*portp;
1467 	vgen_portlist_t	*plistp;
1468 
1469 	plistp = &(vgenp->vgenports);
1470 	READ_ENTER(&plistp->rwlock);
1471 
1472 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1473 		vgen_port_init(portp);
1474 	}
1475 
1476 	RW_EXIT(&plistp->rwlock);
1477 }
1478 
1479 static void
1480 vgen_port_init(vgen_port_t *portp)
1481 {
1482 	/* Add the port to the specified vlans */
1483 	vgen_vlan_add_ids(portp);
1484 
1485 	/* Bring up the channels of this port */
1486 	vgen_init_ldcs(portp);
1487 }
1488 
1489 /* disable transmit/receive on ports */
1490 static void
1491 vgen_uninit_ports(vgen_t *vgenp)
1492 {
1493 	vgen_port_t	*portp;
1494 	vgen_portlist_t	*plistp;
1495 
1496 	plistp = &(vgenp->vgenports);
1497 	READ_ENTER(&plistp->rwlock);
1498 
1499 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1500 		vgen_port_uninit(portp);
1501 	}
1502 
1503 	RW_EXIT(&plistp->rwlock);
1504 }
1505 
1506 static void
1507 vgen_port_uninit(vgen_port_t *portp)
1508 {
1509 	vgen_uninit_ldcs(portp);
1510 
1511 	/* remove the port from vlans it has been assigned to */
1512 	vgen_vlan_remove_ids(portp);
1513 }
1514 
1515 /*
1516  * Scan the machine description for this instance of vnet
1517  * and read its properties. Called only from vgen_init().
1518  * Returns: 0 on success, 1 on failure.
1519  */
1520 static int
1521 vgen_read_mdprops(vgen_t *vgenp)
1522 {
1523 	vnet_t		*vnetp = vgenp->vnetp;
1524 	md_t		*mdp = NULL;
1525 	mde_cookie_t	rootnode;
1526 	mde_cookie_t	*listp = NULL;
1527 	uint64_t	cfgh;
1528 	char		*name;
1529 	int		rv = 1;
1530 	int		num_nodes = 0;
1531 	int		num_devs = 0;
1532 	int		listsz = 0;
1533 	int		i;
1534 
1535 	if ((mdp = md_get_handle()) == NULL) {
1536 		return (rv);
1537 	}
1538 
1539 	num_nodes = md_node_count(mdp);
1540 	ASSERT(num_nodes > 0);
1541 
1542 	listsz = num_nodes * sizeof (mde_cookie_t);
1543 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1544 
1545 	rootnode = md_root_node(mdp);
1546 
1547 	/* search for all "virtual_device" nodes */
1548 	num_devs = md_scan_dag(mdp, rootnode,
1549 	    md_find_name(mdp, vdev_propname),
1550 	    md_find_name(mdp, "fwd"), listp);
1551 	if (num_devs <= 0) {
1552 		goto vgen_readmd_exit;
1553 	}
1554 
1555 	/*
1556 	 * Now loop through the list of virtual-devices looking for
1557 	 * devices with name "network" and for each such device compare
1558 	 * its instance with what we have from the 'reg' property to
1559 	 * find the right node in MD and then read all its properties.
1560 	 */
1561 	for (i = 0; i < num_devs; i++) {
1562 
1563 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1564 			goto vgen_readmd_exit;
1565 		}
1566 
1567 		/* is this a "network" device? */
1568 		if (strcmp(name, vnet_propname) != 0)
1569 			continue;
1570 
1571 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1572 			goto vgen_readmd_exit;
1573 		}
1574 
1575 		/* is this the required instance of vnet? */
1576 		if (vgenp->regprop != cfgh)
1577 			continue;
1578 
1579 		/*
1580 		 * Read the mtu. Note that we set the mtu of vnet device within
1581 		 * this routine itself, after validating the range.
1582 		 */
1583 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1584 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1585 			vnetp->mtu = ETHERMTU;
1586 		}
1587 		vgenp->max_frame_size = vnetp->mtu +
1588 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1589 
1590 		/* read priority ether types */
1591 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1592 
1593 		/* read vlan id properties of this vnet instance */
1594 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1595 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1596 		    &vnetp->default_vlan_id);
1597 
1598 		rv = 0;
1599 		break;
1600 	}
1601 
1602 vgen_readmd_exit:
1603 
1604 	kmem_free(listp, listsz);
1605 	(void) md_fini_handle(mdp);
1606 	return (rv);
1607 }
1608 
1609 /*
1610  * Read vlan id properties of the given MD node.
1611  * Arguments:
1612  *   arg:          device argument(vnet device or a port)
1613  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1614  *   mdp:          machine description
1615  *   node:         md node cookie
1616  *
1617  * Returns:
1618  *   pvidp:        port-vlan-id of the node
1619  *   vidspp:       list of vlan-ids of the node
1620  *   nvidsp:       # of vlan-ids in the list
1621  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1622  */
1623 static void
1624 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1625 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1626 	uint16_t *default_idp)
1627 {
1628 	vgen_t		*vgenp;
1629 	vnet_t		*vnetp;
1630 	vgen_port_t	*portp;
1631 	char		*pvid_propname;
1632 	char		*vid_propname;
1633 	uint_t		nvids;
1634 	uint32_t	vids_size;
1635 	int		rv;
1636 	int		i;
1637 	uint64_t	*data;
1638 	uint64_t	val;
1639 	int		size;
1640 	int		inst;
1641 
1642 	if (type == VGEN_LOCAL) {
1643 
1644 		vgenp = (vgen_t *)arg;
1645 		vnetp = vgenp->vnetp;
1646 		pvid_propname = vgen_pvid_propname;
1647 		vid_propname = vgen_vid_propname;
1648 		inst = vnetp->instance;
1649 
1650 	} else if (type == VGEN_PEER) {
1651 
1652 		portp = (vgen_port_t *)arg;
1653 		vgenp = portp->vgenp;
1654 		vnetp = vgenp->vnetp;
1655 		pvid_propname = port_pvid_propname;
1656 		vid_propname = port_vid_propname;
1657 		inst = portp->port_num;
1658 
1659 	} else {
1660 		return;
1661 	}
1662 
1663 	if (type == VGEN_LOCAL && default_idp != NULL) {
1664 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1665 		if (rv != 0) {
1666 			DWARN(vgenp, NULL, "prop(%s) not found",
1667 			    vgen_dvid_propname);
1668 
1669 			*default_idp = vnet_default_vlan_id;
1670 		} else {
1671 			*default_idp = val & 0xFFF;
1672 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1673 			    inst, *default_idp);
1674 		}
1675 	}
1676 
1677 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1678 	if (rv != 0) {
1679 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1680 		*pvidp = vnet_default_vlan_id;
1681 	} else {
1682 
1683 		*pvidp = val & 0xFFF;
1684 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1685 		    pvid_propname, inst, *pvidp);
1686 	}
1687 
1688 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1689 	    &size);
1690 	if (rv != 0) {
1691 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1692 		size = 0;
1693 	} else {
1694 		size /= sizeof (uint64_t);
1695 	}
1696 	nvids = size;
1697 
1698 	if (nvids != 0) {
1699 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1700 		vids_size = sizeof (uint16_t) * nvids;
1701 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1702 		for (i = 0; i < nvids; i++) {
1703 			(*vidspp)[i] = data[i] & 0xFFFF;
1704 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1705 		}
1706 		DBG2(vgenp, NULL, "\n");
1707 	}
1708 
1709 	*nvidsp = nvids;
1710 }
1711 
1712 /*
1713  * Create a vlan id hash table for the given port.
1714  */
1715 static void
1716 vgen_vlan_create_hash(vgen_port_t *portp)
1717 {
1718 	char		hashname[MAXNAMELEN];
1719 
1720 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1721 	    portp->port_num);
1722 
1723 	portp->vlan_nchains = vgen_vlan_nchains;
1724 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1725 	    portp->vlan_nchains, mod_hash_null_valdtor);
1726 }
1727 
1728 /*
1729  * Destroy the vlan id hash table in the given port.
1730  */
1731 static void
1732 vgen_vlan_destroy_hash(vgen_port_t *portp)
1733 {
1734 	if (portp->vlan_hashp != NULL) {
1735 		mod_hash_destroy_hash(portp->vlan_hashp);
1736 		portp->vlan_hashp = NULL;
1737 		portp->vlan_nchains = 0;
1738 	}
1739 }
1740 
1741 /*
1742  * Add a port to the vlans specified in its port properites.
1743  */
1744 static void
1745 vgen_vlan_add_ids(vgen_port_t *portp)
1746 {
1747 	int		rv;
1748 	int		i;
1749 
1750 	rv = mod_hash_insert(portp->vlan_hashp,
1751 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1752 	    (mod_hash_val_t)B_TRUE);
1753 	ASSERT(rv == 0);
1754 
1755 	for (i = 0; i < portp->nvids; i++) {
1756 		rv = mod_hash_insert(portp->vlan_hashp,
1757 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1758 		    (mod_hash_val_t)B_TRUE);
1759 		ASSERT(rv == 0);
1760 	}
1761 }
1762 
1763 /*
1764  * Remove a port from the vlans it has been assigned to.
1765  */
1766 static void
1767 vgen_vlan_remove_ids(vgen_port_t *portp)
1768 {
1769 	int		rv;
1770 	int		i;
1771 	mod_hash_val_t	vp;
1772 
1773 	rv = mod_hash_remove(portp->vlan_hashp,
1774 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1775 	    (mod_hash_val_t *)&vp);
1776 	ASSERT(rv == 0);
1777 
1778 	for (i = 0; i < portp->nvids; i++) {
1779 		rv = mod_hash_remove(portp->vlan_hashp,
1780 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1781 		    (mod_hash_val_t *)&vp);
1782 		ASSERT(rv == 0);
1783 	}
1784 }
1785 
1786 /*
1787  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1788  * then the vlan-id is available in the tag; otherwise, its vlan id is
1789  * implicitly obtained from the port-vlan-id of the vnet device.
1790  * The vlan id determined is returned in vidp.
1791  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1792  */
1793 static boolean_t
1794 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1795 {
1796 	struct ether_vlan_header	*evhp;
1797 
1798 	/* If it's a tagged frame, get the vlan id from vlan header */
1799 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1800 
1801 		evhp = (struct ether_vlan_header *)ehp;
1802 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1803 		return (B_TRUE);
1804 	}
1805 
1806 	/* Untagged frame, vlan-id is the pvid of vnet device */
1807 	*vidp = vnetp->pvid;
1808 	return (B_FALSE);
1809 }
1810 
1811 /*
1812  * Find the given vlan id in the hash table.
1813  * Return: B_TRUE if the id is found; B_FALSE if not found.
1814  */
1815 static boolean_t
1816 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1817 {
1818 	int		rv;
1819 	mod_hash_val_t	vp;
1820 
1821 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1822 
1823 	if (rv != 0)
1824 		return (B_FALSE);
1825 
1826 	return (B_TRUE);
1827 }
1828 
1829 /*
1830  * This function reads "priority-ether-types" property from md. This property
1831  * is used to enable support for priority frames. Applications which need
1832  * guaranteed and timely delivery of certain high priority frames to/from
1833  * a vnet or vsw within ldoms, should configure this property by providing
1834  * the ether type(s) for which the priority facility is needed.
1835  * Normal data frames are delivered over a ldc channel using the descriptor
1836  * ring mechanism which is constrained by factors such as descriptor ring size,
1837  * the rate at which the ring is processed at the peer ldc end point, etc.
1838  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1839  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1840  * descriptor ring path and enables a more reliable and timely delivery of
1841  * frames to the peer.
1842  */
1843 static void
1844 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1845 {
1846 	int		rv;
1847 	uint16_t	*types;
1848 	uint64_t	*data;
1849 	int		size;
1850 	int		i;
1851 	size_t		mblk_sz;
1852 
1853 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1854 	    (uint8_t **)&data, &size);
1855 	if (rv != 0) {
1856 		/*
1857 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1858 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1859 		 */
1860 		if (vgen_pri_eth_type != 0) {
1861 			size = sizeof (vgen_pri_eth_type);
1862 			data = &vgen_pri_eth_type;
1863 		} else {
1864 			DBG2(vgenp, NULL,
1865 			    "prop(%s) not found", pri_types_propname);
1866 			size = 0;
1867 		}
1868 	}
1869 
1870 	if (size == 0) {
1871 		vgenp->pri_num_types = 0;
1872 		return;
1873 	}
1874 
1875 	/*
1876 	 * we have some priority-ether-types defined;
1877 	 * allocate a table of these types and also
1878 	 * allocate a pool of mblks to transmit these
1879 	 * priority packets.
1880 	 */
1881 	size /= sizeof (uint64_t);
1882 	vgenp->pri_num_types = size;
1883 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1884 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1885 		types[i] = data[i] & 0xFFFF;
1886 	}
1887 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1888 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1889 	    &vgenp->pri_tx_vmp);
1890 }
1891 
1892 static void
1893 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1894 {
1895 	int		rv;
1896 	uint64_t	val;
1897 	char		*mtu_propname;
1898 
1899 	mtu_propname = vgen_mtu_propname;
1900 
1901 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1902 	if (rv != 0) {
1903 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1904 		*mtu = vnet_ethermtu;
1905 	} else {
1906 
1907 		*mtu = val & 0xFFFF;
1908 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1909 		    vgenp->instance, *mtu);
1910 	}
1911 }
1912 
1913 /* register with MD event generator */
1914 static int
1915 vgen_mdeg_reg(vgen_t *vgenp)
1916 {
1917 	mdeg_prop_spec_t	*pspecp;
1918 	mdeg_node_spec_t	*parentp;
1919 	uint_t			templatesz;
1920 	int			rv;
1921 	mdeg_handle_t		dev_hdl = NULL;
1922 	mdeg_handle_t		port_hdl = NULL;
1923 
1924 	templatesz = sizeof (vgen_prop_template);
1925 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1926 	if (pspecp == NULL) {
1927 		return (DDI_FAILURE);
1928 	}
1929 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1930 	if (parentp == NULL) {
1931 		kmem_free(pspecp, templatesz);
1932 		return (DDI_FAILURE);
1933 	}
1934 
1935 	bcopy(vgen_prop_template, pspecp, templatesz);
1936 
1937 	/*
1938 	 * NOTE: The instance here refers to the value of "reg" property and
1939 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1940 	 */
1941 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1942 
1943 	parentp->namep = "virtual-device";
1944 	parentp->specp = pspecp;
1945 
1946 	/* save parentp in vgen_t */
1947 	vgenp->mdeg_parentp = parentp;
1948 
1949 	/*
1950 	 * Register an interest in 'virtual-device' nodes with a
1951 	 * 'name' property of 'network'
1952 	 */
1953 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1954 	if (rv != MDEG_SUCCESS) {
1955 		DERR(vgenp, NULL, "mdeg_register failed\n");
1956 		goto mdeg_reg_fail;
1957 	}
1958 
1959 	/* Register an interest in 'port' nodes */
1960 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1961 	    &port_hdl);
1962 	if (rv != MDEG_SUCCESS) {
1963 		DERR(vgenp, NULL, "mdeg_register failed\n");
1964 		goto mdeg_reg_fail;
1965 	}
1966 
1967 	/* save mdeg handle in vgen_t */
1968 	vgenp->mdeg_dev_hdl = dev_hdl;
1969 	vgenp->mdeg_port_hdl = port_hdl;
1970 
1971 	return (DDI_SUCCESS);
1972 
1973 mdeg_reg_fail:
1974 	if (dev_hdl != NULL) {
1975 		(void) mdeg_unregister(dev_hdl);
1976 	}
1977 	KMEM_FREE(parentp);
1978 	kmem_free(pspecp, templatesz);
1979 	vgenp->mdeg_parentp = NULL;
1980 	return (DDI_FAILURE);
1981 }
1982 
1983 /* unregister with MD event generator */
1984 static void
1985 vgen_mdeg_unreg(vgen_t *vgenp)
1986 {
1987 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1988 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
1989 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1990 	KMEM_FREE(vgenp->mdeg_parentp);
1991 	vgenp->mdeg_parentp = NULL;
1992 	vgenp->mdeg_dev_hdl = NULL;
1993 	vgenp->mdeg_port_hdl = NULL;
1994 }
1995 
1996 /* mdeg callback function for the port node */
1997 static int
1998 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1999 {
2000 	int idx;
2001 	int vsw_idx = -1;
2002 	uint64_t val;
2003 	vgen_t *vgenp;
2004 
2005 	if ((resp == NULL) || (cb_argp == NULL)) {
2006 		return (MDEG_FAILURE);
2007 	}
2008 
2009 	vgenp = (vgen_t *)cb_argp;
2010 	DBG1(vgenp, NULL, "enter\n");
2011 
2012 	mutex_enter(&vgenp->lock);
2013 
2014 	DBG1(vgenp, NULL, "ports: removed(%x), "
2015 	"added(%x), updated(%x)\n", resp->removed.nelem,
2016 	    resp->added.nelem, resp->match_curr.nelem);
2017 
2018 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2019 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2020 		    resp->removed.mdep[idx]);
2021 	}
2022 
2023 	if (vgenp->vsw_portp == NULL) {
2024 		/*
2025 		 * find vsw_port and add it first, because other ports need
2026 		 * this when adding fdb entry (see vgen_port_init()).
2027 		 */
2028 		for (idx = 0; idx < resp->added.nelem; idx++) {
2029 			if (!(md_get_prop_val(resp->added.mdp,
2030 			    resp->added.mdep[idx], swport_propname, &val))) {
2031 				if (val == 0) {
2032 					/*
2033 					 * This port is connected to the
2034 					 * vsw on service domain.
2035 					 */
2036 					vsw_idx = idx;
2037 					if (vgen_add_port(vgenp,
2038 					    resp->added.mdp,
2039 					    resp->added.mdep[idx]) !=
2040 					    DDI_SUCCESS) {
2041 						cmn_err(CE_NOTE, "vnet%d Could "
2042 						    "not initialize virtual "
2043 						    "switch port.",
2044 						    vgenp->instance);
2045 						mutex_exit(&vgenp->lock);
2046 						return (MDEG_FAILURE);
2047 					}
2048 					break;
2049 				}
2050 			}
2051 		}
2052 		if (vsw_idx == -1) {
2053 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2054 			mutex_exit(&vgenp->lock);
2055 			return (MDEG_FAILURE);
2056 		}
2057 	}
2058 
2059 	for (idx = 0; idx < resp->added.nelem; idx++) {
2060 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2061 			continue;
2062 
2063 		/* If this port can't be added just skip it. */
2064 		(void) vgen_add_port(vgenp, resp->added.mdp,
2065 		    resp->added.mdep[idx]);
2066 	}
2067 
2068 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2069 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2070 		    resp->match_curr.mdep[idx],
2071 		    resp->match_prev.mdp,
2072 		    resp->match_prev.mdep[idx]);
2073 	}
2074 
2075 	mutex_exit(&vgenp->lock);
2076 	DBG1(vgenp, NULL, "exit\n");
2077 	return (MDEG_SUCCESS);
2078 }
2079 
2080 /* mdeg callback function for the vnet node */
2081 static int
2082 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2083 {
2084 	vgen_t		*vgenp;
2085 	vnet_t		*vnetp;
2086 	md_t		*mdp;
2087 	mde_cookie_t	node;
2088 	uint64_t	inst;
2089 	char		*node_name = NULL;
2090 
2091 	if ((resp == NULL) || (cb_argp == NULL)) {
2092 		return (MDEG_FAILURE);
2093 	}
2094 
2095 	vgenp = (vgen_t *)cb_argp;
2096 	vnetp = vgenp->vnetp;
2097 
2098 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2099 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2100 	    resp->match_curr.nelem, resp->match_prev.nelem);
2101 
2102 	mutex_enter(&vgenp->lock);
2103 
2104 	/*
2105 	 * We get an initial callback for this node as 'added' after
2106 	 * registering with mdeg. Note that we would have already gathered
2107 	 * information about this vnet node by walking MD earlier during attach
2108 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2109 	 * of this node might have changed when we get this initial 'added'
2110 	 * callback. We handle this as if an update occured and invoke the same
2111 	 * function which handles updates to the properties of this vnet-node
2112 	 * if any. A non-zero 'match' value indicates that the MD has been
2113 	 * updated and that a 'network' node is present which may or may not
2114 	 * have been updated. It is up to the clients to examine their own
2115 	 * nodes and determine if they have changed.
2116 	 */
2117 	if (resp->added.nelem != 0) {
2118 
2119 		if (resp->added.nelem != 1) {
2120 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2121 			    "invalid: %d\n", vnetp->instance,
2122 			    resp->added.nelem);
2123 			goto vgen_mdeg_cb_err;
2124 		}
2125 
2126 		mdp = resp->added.mdp;
2127 		node = resp->added.mdep[0];
2128 
2129 	} else if (resp->match_curr.nelem != 0) {
2130 
2131 		if (resp->match_curr.nelem != 1) {
2132 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2133 			    "invalid: %d\n", vnetp->instance,
2134 			    resp->match_curr.nelem);
2135 			goto vgen_mdeg_cb_err;
2136 		}
2137 
2138 		mdp = resp->match_curr.mdp;
2139 		node = resp->match_curr.mdep[0];
2140 
2141 	} else {
2142 		goto vgen_mdeg_cb_err;
2143 	}
2144 
2145 	/* Validate name and instance */
2146 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2147 		DERR(vgenp, NULL, "unable to get node name\n");
2148 		goto vgen_mdeg_cb_err;
2149 	}
2150 
2151 	/* is this a virtual-network device? */
2152 	if (strcmp(node_name, vnet_propname) != 0) {
2153 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2154 		goto vgen_mdeg_cb_err;
2155 	}
2156 
2157 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2158 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2159 		goto vgen_mdeg_cb_err;
2160 	}
2161 
2162 	/* is this the right instance of vnet? */
2163 	if (inst != vgenp->regprop) {
2164 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2165 		goto vgen_mdeg_cb_err;
2166 	}
2167 
2168 	vgen_update_md_prop(vgenp, mdp, node);
2169 
2170 	mutex_exit(&vgenp->lock);
2171 	return (MDEG_SUCCESS);
2172 
2173 vgen_mdeg_cb_err:
2174 	mutex_exit(&vgenp->lock);
2175 	return (MDEG_FAILURE);
2176 }
2177 
2178 /*
2179  * Check to see if the relevant properties in the specified node have
2180  * changed, and if so take the appropriate action.
2181  */
2182 static void
2183 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2184 {
2185 	uint16_t	pvid;
2186 	uint16_t	*vids;
2187 	uint16_t	nvids;
2188 	vnet_t		*vnetp = vgenp->vnetp;
2189 	uint32_t	mtu;
2190 	enum		{ MD_init = 0x1,
2191 			    MD_vlans = 0x2,
2192 			    MD_mtu = 0x4 } updated;
2193 	int		rv;
2194 
2195 	updated = MD_init;
2196 
2197 	/* Read the vlan ids */
2198 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2199 	    &nvids, NULL);
2200 
2201 	/* Determine if there are any vlan id updates */
2202 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2203 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2204 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2205 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2206 		updated |= MD_vlans;
2207 	}
2208 
2209 	/* Read mtu */
2210 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2211 	if (mtu != vnetp->mtu) {
2212 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2213 			updated |= MD_mtu;
2214 		} else {
2215 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2216 			    " as the specified value:%d is invalid\n",
2217 			    vnetp->instance, mtu);
2218 		}
2219 	}
2220 
2221 	/* Now process the updated props */
2222 
2223 	if (updated & MD_vlans) {
2224 
2225 		/* save the new vlan ids */
2226 		vnetp->pvid = pvid;
2227 		if (vnetp->nvids != 0) {
2228 			kmem_free(vnetp->vids,
2229 			    sizeof (uint16_t) * vnetp->nvids);
2230 			vnetp->nvids = 0;
2231 		}
2232 		if (nvids != 0) {
2233 			vnetp->nvids = nvids;
2234 			vnetp->vids = vids;
2235 		}
2236 
2237 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2238 		vgen_reset_vlan_unaware_ports(vgenp);
2239 
2240 	} else {
2241 
2242 		if (nvids != 0) {
2243 			kmem_free(vids, sizeof (uint16_t) * nvids);
2244 		}
2245 	}
2246 
2247 	if (updated & MD_mtu) {
2248 
2249 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2250 		    vnetp->mtu, mtu);
2251 
2252 		rv = vnet_mtu_update(vnetp, mtu);
2253 		if (rv == 0) {
2254 			vgenp->max_frame_size = mtu +
2255 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2256 		}
2257 	}
2258 }
2259 
2260 /* add a new port to the device */
2261 static int
2262 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2263 {
2264 	vgen_port_t	*portp;
2265 	int		rv;
2266 
2267 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2268 
2269 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2270 	if (rv != DDI_SUCCESS) {
2271 		KMEM_FREE(portp);
2272 		return (DDI_FAILURE);
2273 	}
2274 
2275 	rv = vgen_port_attach(portp);
2276 	if (rv != DDI_SUCCESS) {
2277 		return (DDI_FAILURE);
2278 	}
2279 
2280 	return (DDI_SUCCESS);
2281 }
2282 
2283 /* read properties of the port from its md node */
2284 static int
2285 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2286 	mde_cookie_t mdex)
2287 {
2288 	uint64_t		port_num;
2289 	uint64_t		*ldc_ids;
2290 	uint64_t		macaddr;
2291 	uint64_t		val;
2292 	int			num_ldcs;
2293 	int			i;
2294 	int			addrsz;
2295 	int			num_nodes = 0;
2296 	int			listsz = 0;
2297 	mde_cookie_t		*listp = NULL;
2298 	uint8_t			*addrp;
2299 	struct ether_addr	ea;
2300 
2301 	/* read "id" property to get the port number */
2302 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2303 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2304 		return (DDI_FAILURE);
2305 	}
2306 
2307 	/*
2308 	 * Find the channel endpoint node(s) under this port node.
2309 	 */
2310 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2311 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2312 		    num_nodes);
2313 		return (DDI_FAILURE);
2314 	}
2315 
2316 	/* allocate space for node list */
2317 	listsz = num_nodes * sizeof (mde_cookie_t);
2318 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2319 	if (listp == NULL)
2320 		return (DDI_FAILURE);
2321 
2322 	num_ldcs = md_scan_dag(mdp, mdex,
2323 	    md_find_name(mdp, channel_propname),
2324 	    md_find_name(mdp, "fwd"), listp);
2325 
2326 	if (num_ldcs <= 0) {
2327 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2328 		kmem_free(listp, listsz);
2329 		return (DDI_FAILURE);
2330 	}
2331 
2332 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2333 
2334 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2335 	if (ldc_ids == NULL) {
2336 		kmem_free(listp, listsz);
2337 		return (DDI_FAILURE);
2338 	}
2339 
2340 	for (i = 0; i < num_ldcs; i++) {
2341 		/* read channel ids */
2342 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2343 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2344 			    id_propname);
2345 			kmem_free(listp, listsz);
2346 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2347 			return (DDI_FAILURE);
2348 		}
2349 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2350 	}
2351 
2352 	kmem_free(listp, listsz);
2353 
2354 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2355 	    &addrsz)) {
2356 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2357 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2358 		return (DDI_FAILURE);
2359 	}
2360 
2361 	if (addrsz < ETHERADDRL) {
2362 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2363 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2364 		return (DDI_FAILURE);
2365 	}
2366 
2367 	macaddr = *((uint64_t *)addrp);
2368 
2369 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2370 
2371 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2372 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2373 		macaddr >>= 8;
2374 	}
2375 
2376 	if (vgenp->vsw_portp == NULL) {
2377 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2378 			if (val == 0) {
2379 				(void) atomic_swap_32(
2380 				    &vgenp->vsw_port_refcnt, 0);
2381 				/* This port is connected to the vsw */
2382 				vgenp->vsw_portp = portp;
2383 			}
2384 		}
2385 	}
2386 
2387 	/* now update all properties into the port */
2388 	portp->vgenp = vgenp;
2389 	portp->port_num = port_num;
2390 	ether_copy(&ea, &portp->macaddr);
2391 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2392 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2393 	portp->num_ldcs = num_ldcs;
2394 
2395 	/* read vlan id properties of this port node */
2396 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2397 	    &portp->vids, &portp->nvids, NULL);
2398 
2399 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2400 
2401 	return (DDI_SUCCESS);
2402 }
2403 
2404 /* remove a port from the device */
2405 static int
2406 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2407 {
2408 	uint64_t	port_num;
2409 	vgen_port_t	*portp;
2410 	vgen_portlist_t	*plistp;
2411 
2412 	/* read "id" property to get the port number */
2413 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2414 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2415 		return (DDI_FAILURE);
2416 	}
2417 
2418 	plistp = &(vgenp->vgenports);
2419 
2420 	WRITE_ENTER(&plistp->rwlock);
2421 	portp = vgen_port_lookup(plistp, (int)port_num);
2422 	if (portp == NULL) {
2423 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2424 		RW_EXIT(&plistp->rwlock);
2425 		return (DDI_FAILURE);
2426 	}
2427 
2428 	vgen_port_detach_mdeg(portp);
2429 	RW_EXIT(&plistp->rwlock);
2430 
2431 	return (DDI_SUCCESS);
2432 }
2433 
2434 /* attach a port to the device based on mdeg data */
2435 static int
2436 vgen_port_attach(vgen_port_t *portp)
2437 {
2438 	int			i;
2439 	vgen_portlist_t		*plistp;
2440 	vgen_t			*vgenp;
2441 	uint64_t		*ldcids;
2442 	uint32_t		num_ldcs;
2443 	mac_register_t		*macp;
2444 	vio_net_res_type_t	type;
2445 	int			rv;
2446 
2447 	ASSERT(portp != NULL);
2448 
2449 	vgenp = portp->vgenp;
2450 	ldcids = portp->ldc_ids;
2451 	num_ldcs = portp->num_ldcs;
2452 
2453 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2454 
2455 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2456 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2457 	portp->ldclist.headp = NULL;
2458 
2459 	for (i = 0; i < num_ldcs; i++) {
2460 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2461 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2462 			vgen_port_detach(portp);
2463 			return (DDI_FAILURE);
2464 		}
2465 	}
2466 
2467 	/* create vlan id hash table */
2468 	vgen_vlan_create_hash(portp);
2469 
2470 	if (portp == vgenp->vsw_portp) {
2471 		/* This port is connected to the switch port */
2472 		vgenp->vsw_portp = portp;
2473 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2474 		type = VIO_NET_RES_LDC_SERVICE;
2475 	} else {
2476 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2477 		type = VIO_NET_RES_LDC_GUEST;
2478 	}
2479 
2480 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2481 		vgen_port_detach(portp);
2482 		return (DDI_FAILURE);
2483 	}
2484 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2485 	macp->m_driver = portp;
2486 	macp->m_dip = vgenp->vnetdip;
2487 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2488 	macp->m_callbacks = &vgen_m_callbacks;
2489 	macp->m_min_sdu = 0;
2490 	macp->m_max_sdu = ETHERMTU;
2491 
2492 	mutex_enter(&portp->lock);
2493 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2494 	    portp->macaddr, &portp->vhp, &portp->vcb);
2495 	mutex_exit(&portp->lock);
2496 	mac_free(macp);
2497 
2498 	if (rv == 0) {
2499 		/* link it into the list of ports */
2500 		plistp = &(vgenp->vgenports);
2501 		WRITE_ENTER(&plistp->rwlock);
2502 		vgen_port_list_insert(portp);
2503 		RW_EXIT(&plistp->rwlock);
2504 	} else {
2505 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2506 		    portp);
2507 		vgen_port_detach(portp);
2508 	}
2509 
2510 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2511 	return (DDI_SUCCESS);
2512 }
2513 
2514 /* detach a port from the device based on mdeg data */
2515 static void
2516 vgen_port_detach_mdeg(vgen_port_t *portp)
2517 {
2518 	vgen_t *vgenp = portp->vgenp;
2519 
2520 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2521 
2522 	mutex_enter(&portp->lock);
2523 
2524 	/* stop the port if needed */
2525 	if (portp->flags & VGEN_STARTED) {
2526 		vgen_port_uninit(portp);
2527 	}
2528 
2529 	mutex_exit(&portp->lock);
2530 	vgen_port_detach(portp);
2531 
2532 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2533 }
2534 
2535 static int
2536 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2537 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2538 {
2539 	uint64_t	cport_num;
2540 	uint64_t	pport_num;
2541 	vgen_portlist_t	*plistp;
2542 	vgen_port_t	*portp;
2543 	boolean_t	updated_vlans = B_FALSE;
2544 	uint16_t	pvid;
2545 	uint16_t	*vids;
2546 	uint16_t	nvids;
2547 
2548 	/*
2549 	 * For now, we get port updates only if vlan ids changed.
2550 	 * We read the port num and do some sanity check.
2551 	 */
2552 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2553 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2554 		return (DDI_FAILURE);
2555 	}
2556 
2557 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2558 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2559 		return (DDI_FAILURE);
2560 	}
2561 	if (cport_num != pport_num)
2562 		return (DDI_FAILURE);
2563 
2564 	plistp = &(vgenp->vgenports);
2565 
2566 	READ_ENTER(&plistp->rwlock);
2567 
2568 	portp = vgen_port_lookup(plistp, (int)cport_num);
2569 	if (portp == NULL) {
2570 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2571 		RW_EXIT(&plistp->rwlock);
2572 		return (DDI_FAILURE);
2573 	}
2574 
2575 	/* Read the vlan ids */
2576 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2577 	    &nvids, NULL);
2578 
2579 	/* Determine if there are any vlan id updates */
2580 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2581 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2582 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2583 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2584 		updated_vlans = B_TRUE;
2585 	}
2586 
2587 	if (updated_vlans == B_FALSE) {
2588 		RW_EXIT(&plistp->rwlock);
2589 		return (DDI_FAILURE);
2590 	}
2591 
2592 	/* remove the port from vlans it has been assigned to */
2593 	vgen_vlan_remove_ids(portp);
2594 
2595 	/* save the new vlan ids */
2596 	portp->pvid = pvid;
2597 	if (portp->nvids != 0) {
2598 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2599 		portp->nvids = 0;
2600 	}
2601 	if (nvids != 0) {
2602 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2603 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2604 		portp->nvids = nvids;
2605 		kmem_free(vids, sizeof (uint16_t) * nvids);
2606 	}
2607 
2608 	/* add port to the new vlans */
2609 	vgen_vlan_add_ids(portp);
2610 
2611 	/* reset the port if it is vlan unaware (ver < 1.3) */
2612 	vgen_vlan_unaware_port_reset(portp);
2613 
2614 	RW_EXIT(&plistp->rwlock);
2615 
2616 	return (DDI_SUCCESS);
2617 }
2618 
2619 static uint64_t
2620 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2621 {
2622 	vgen_ldclist_t	*ldclp;
2623 	vgen_ldc_t *ldcp;
2624 	uint64_t	val;
2625 
2626 	val = 0;
2627 	ldclp = &portp->ldclist;
2628 
2629 	READ_ENTER(&ldclp->rwlock);
2630 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2631 		val += vgen_ldc_stat(ldcp, stat);
2632 	}
2633 	RW_EXIT(&ldclp->rwlock);
2634 
2635 	return (val);
2636 }
2637 
2638 /* allocate receive resources */
2639 static int
2640 vgen_init_multipools(vgen_ldc_t *ldcp)
2641 {
2642 	size_t		data_sz;
2643 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2644 	int		status;
2645 	uint32_t	sz1 = 0;
2646 	uint32_t	sz2 = 0;
2647 	uint32_t	sz3 = 0;
2648 	uint32_t	sz4 = 0;
2649 
2650 	/*
2651 	 * We round up the mtu specified to be a multiple of 2K.
2652 	 * We then create rx pools based on the rounded up size.
2653 	 */
2654 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2655 	data_sz = VNET_ROUNDUP_2K(data_sz);
2656 
2657 	/*
2658 	 * If pool sizes are specified, use them. Note that the presence of
2659 	 * the first tunable will be used as a hint.
2660 	 */
2661 	if (vgen_rbufsz1 != 0) {
2662 
2663 		sz1 = vgen_rbufsz1;
2664 		sz2 = vgen_rbufsz2;
2665 		sz3 = vgen_rbufsz3;
2666 		sz4 = vgen_rbufsz4;
2667 
2668 		if (sz4 == 0) { /* need 3 pools */
2669 
2670 			ldcp->max_rxpool_size = sz3;
2671 			status = vio_init_multipools(&ldcp->vmp,
2672 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2673 			    vgen_nrbufs2, vgen_nrbufs3);
2674 
2675 		} else {
2676 
2677 			ldcp->max_rxpool_size = sz4;
2678 			status = vio_init_multipools(&ldcp->vmp,
2679 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2680 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2681 			    vgen_nrbufs4);
2682 		}
2683 		return (status);
2684 	}
2685 
2686 	/*
2687 	 * Pool sizes are not specified. We select the pool sizes based on the
2688 	 * mtu if vnet_jumbo_rxpools is enabled.
2689 	 */
2690 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2691 		/*
2692 		 * Receive buffer pool allocation based on mtu is disabled.
2693 		 * Use the default mechanism of standard size pool allocation.
2694 		 */
2695 		sz1 = VGEN_DBLK_SZ_128;
2696 		sz2 = VGEN_DBLK_SZ_256;
2697 		sz3 = VGEN_DBLK_SZ_2048;
2698 		ldcp->max_rxpool_size = sz3;
2699 
2700 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2701 		    sz1, sz2, sz3,
2702 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2703 
2704 		return (status);
2705 	}
2706 
2707 	switch (data_sz) {
2708 
2709 	case VNET_4K:
2710 
2711 		sz1 = VGEN_DBLK_SZ_128;
2712 		sz2 = VGEN_DBLK_SZ_256;
2713 		sz3 = VGEN_DBLK_SZ_2048;
2714 		sz4 = sz3 << 1;			/* 4K */
2715 		ldcp->max_rxpool_size = sz4;
2716 
2717 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2718 		    sz1, sz2, sz3, sz4,
2719 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2720 		break;
2721 
2722 	default:	/* data_sz:  4K+ to 16K */
2723 
2724 		sz1 = VGEN_DBLK_SZ_256;
2725 		sz2 = VGEN_DBLK_SZ_2048;
2726 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2727 		sz4 = data_sz;		/* Jumbo-size  */
2728 		ldcp->max_rxpool_size = sz4;
2729 
2730 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2731 		    sz1, sz2, sz3, sz4,
2732 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2733 		break;
2734 
2735 	}
2736 
2737 	return (status);
2738 }
2739 
2740 /* attach the channel corresponding to the given ldc_id to the port */
2741 static int
2742 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2743 {
2744 	vgen_t 		*vgenp;
2745 	vgen_ldclist_t	*ldclp;
2746 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2747 	ldc_attr_t 	attr;
2748 	int 		status;
2749 	ldc_status_t	istatus;
2750 	char		kname[MAXNAMELEN];
2751 	int		instance;
2752 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2753 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2754 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2755 		AST_create_rxmblks = 0x20,
2756 		AST_create_rcv_thread = 0x40} attach_state;
2757 
2758 	attach_state = AST_init;
2759 	vgenp = portp->vgenp;
2760 	ldclp = &portp->ldclist;
2761 
2762 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2763 	if (ldcp == NULL) {
2764 		goto ldc_attach_failed;
2765 	}
2766 	ldcp->ldc_id = ldc_id;
2767 	ldcp->portp = portp;
2768 
2769 	attach_state |= AST_ldc_alloc;
2770 
2771 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2772 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2773 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2774 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2775 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2776 
2777 	attach_state |= AST_mutex_init;
2778 
2779 	attr.devclass = LDC_DEV_NT;
2780 	attr.instance = vgenp->instance;
2781 	attr.mode = LDC_MODE_UNRELIABLE;
2782 	attr.mtu = vnet_ldc_mtu;
2783 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2784 	if (status != 0) {
2785 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2786 		goto ldc_attach_failed;
2787 	}
2788 	attach_state |= AST_ldc_init;
2789 
2790 	if (vgen_rcv_thread_enabled) {
2791 		ldcp->rcv_thr_flags = 0;
2792 
2793 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2794 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2795 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2796 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2797 
2798 		attach_state |= AST_create_rcv_thread;
2799 		if (ldcp->rcv_thread == NULL) {
2800 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2801 			goto ldc_attach_failed;
2802 		}
2803 	}
2804 
2805 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2806 	if (status != 0) {
2807 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2808 		    status);
2809 		goto ldc_attach_failed;
2810 	}
2811 	/*
2812 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2813 	 * data msgs, including raw data msgs used to recv priority frames.
2814 	 */
2815 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2816 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2817 	attach_state |= AST_ldc_reg_cb;
2818 
2819 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2820 	ASSERT(istatus == LDC_INIT);
2821 	ldcp->ldc_status = istatus;
2822 
2823 	/* allocate transmit resources */
2824 	status = vgen_alloc_tx_ring(ldcp);
2825 	if (status != 0) {
2826 		goto ldc_attach_failed;
2827 	}
2828 	attach_state |= AST_alloc_tx_ring;
2829 
2830 	/* allocate receive resources */
2831 	status = vgen_init_multipools(ldcp);
2832 	if (status != 0) {
2833 		goto ldc_attach_failed;
2834 	}
2835 	attach_state |= AST_create_rxmblks;
2836 
2837 	/* Setup kstats for the channel */
2838 	instance = vgenp->instance;
2839 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2840 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2841 	if (ldcp->ksp == NULL) {
2842 		goto ldc_attach_failed;
2843 	}
2844 
2845 	/* initialize vgen_versions supported */
2846 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2847 	vgen_reset_vnet_proto_ops(ldcp);
2848 
2849 	/* link it into the list of channels for this port */
2850 	WRITE_ENTER(&ldclp->rwlock);
2851 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2852 	ldcp->nextp = *prev_ldcp;
2853 	*prev_ldcp = ldcp;
2854 	RW_EXIT(&ldclp->rwlock);
2855 
2856 	ldcp->flags |= CHANNEL_ATTACHED;
2857 	return (DDI_SUCCESS);
2858 
2859 ldc_attach_failed:
2860 	if (attach_state & AST_ldc_reg_cb) {
2861 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2862 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2863 	}
2864 	if (attach_state & AST_create_rcv_thread) {
2865 		if (ldcp->rcv_thread != NULL) {
2866 			vgen_stop_rcv_thread(ldcp);
2867 		}
2868 		mutex_destroy(&ldcp->rcv_thr_lock);
2869 		cv_destroy(&ldcp->rcv_thr_cv);
2870 	}
2871 	if (attach_state & AST_create_rxmblks) {
2872 		vio_mblk_pool_t *fvmp = NULL;
2873 
2874 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
2875 		ASSERT(fvmp == NULL);
2876 	}
2877 	if (attach_state & AST_alloc_tx_ring) {
2878 		vgen_free_tx_ring(ldcp);
2879 	}
2880 	if (attach_state & AST_ldc_init) {
2881 		(void) ldc_fini(ldcp->ldc_handle);
2882 	}
2883 	if (attach_state & AST_mutex_init) {
2884 		mutex_destroy(&ldcp->tclock);
2885 		mutex_destroy(&ldcp->txlock);
2886 		mutex_destroy(&ldcp->cblock);
2887 		mutex_destroy(&ldcp->wrlock);
2888 		mutex_destroy(&ldcp->rxlock);
2889 	}
2890 	if (attach_state & AST_ldc_alloc) {
2891 		KMEM_FREE(ldcp);
2892 	}
2893 	return (DDI_FAILURE);
2894 }
2895 
2896 /* detach a channel from the port */
2897 static void
2898 vgen_ldc_detach(vgen_ldc_t *ldcp)
2899 {
2900 	vgen_port_t	*portp;
2901 	vgen_t 		*vgenp;
2902 	vgen_ldc_t 	*pldcp;
2903 	vgen_ldc_t	**prev_ldcp;
2904 	vgen_ldclist_t	*ldclp;
2905 
2906 	portp = ldcp->portp;
2907 	vgenp = portp->vgenp;
2908 	ldclp = &portp->ldclist;
2909 
2910 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
2911 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
2912 		if (pldcp == ldcp) {
2913 			break;
2914 		}
2915 	}
2916 
2917 	if (pldcp == NULL) {
2918 		/* invalid ldcp? */
2919 		return;
2920 	}
2921 
2922 	if (ldcp->ldc_status != LDC_INIT) {
2923 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2924 	}
2925 
2926 	if (ldcp->flags & CHANNEL_ATTACHED) {
2927 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2928 
2929 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2930 		if (ldcp->rcv_thread != NULL) {
2931 			/* First stop the receive thread */
2932 			vgen_stop_rcv_thread(ldcp);
2933 			mutex_destroy(&ldcp->rcv_thr_lock);
2934 			cv_destroy(&ldcp->rcv_thr_cv);
2935 		}
2936 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2937 
2938 		vgen_destroy_kstats(ldcp->ksp);
2939 		ldcp->ksp = NULL;
2940 
2941 		/*
2942 		 * if we cannot reclaim all mblks, put this
2943 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
2944 		 * device gets detached (see vgen_uninit()).
2945 		 */
2946 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
2947 
2948 		/* free transmit resources */
2949 		vgen_free_tx_ring(ldcp);
2950 
2951 		(void) ldc_fini(ldcp->ldc_handle);
2952 		mutex_destroy(&ldcp->tclock);
2953 		mutex_destroy(&ldcp->txlock);
2954 		mutex_destroy(&ldcp->cblock);
2955 		mutex_destroy(&ldcp->wrlock);
2956 		mutex_destroy(&ldcp->rxlock);
2957 
2958 		/* unlink it from the list */
2959 		*prev_ldcp = ldcp->nextp;
2960 		KMEM_FREE(ldcp);
2961 	}
2962 }
2963 
2964 /*
2965  * This function allocates transmit resources for the channel.
2966  * The resources consist of a transmit descriptor ring and an associated
2967  * transmit buffer ring.
2968  */
2969 static int
2970 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
2971 {
2972 	void *tbufp;
2973 	ldc_mem_info_t minfo;
2974 	uint32_t txdsize;
2975 	uint32_t tbufsize;
2976 	int status;
2977 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2978 
2979 	ldcp->num_txds = vnet_ntxds;
2980 	txdsize = sizeof (vnet_public_desc_t);
2981 	tbufsize = sizeof (vgen_private_desc_t);
2982 
2983 	/* allocate transmit buffer ring */
2984 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
2985 	if (tbufp == NULL) {
2986 		return (DDI_FAILURE);
2987 	}
2988 
2989 	/* create transmit descriptor ring */
2990 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
2991 	    &ldcp->tx_dhandle);
2992 	if (status) {
2993 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
2994 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2995 		return (DDI_FAILURE);
2996 	}
2997 
2998 	/* get the addr of descripror ring */
2999 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
3000 	if (status) {
3001 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3002 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3003 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3004 		ldcp->tbufp = NULL;
3005 		return (DDI_FAILURE);
3006 	}
3007 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3008 	ldcp->tbufp = tbufp;
3009 
3010 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3011 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3012 
3013 	return (DDI_SUCCESS);
3014 }
3015 
3016 /* Free transmit resources for the channel */
3017 static void
3018 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3019 {
3020 	int tbufsize = sizeof (vgen_private_desc_t);
3021 
3022 	/* free transmit descriptor ring */
3023 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3024 
3025 	/* free transmit buffer ring */
3026 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3027 	ldcp->txdp = ldcp->txdendp = NULL;
3028 	ldcp->tbufp = ldcp->tbufendp = NULL;
3029 }
3030 
3031 /* enable transmit/receive on the channels for the port */
3032 static void
3033 vgen_init_ldcs(vgen_port_t *portp)
3034 {
3035 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3036 	vgen_ldc_t	*ldcp;
3037 
3038 	READ_ENTER(&ldclp->rwlock);
3039 	ldcp =  ldclp->headp;
3040 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3041 		(void) vgen_ldc_init(ldcp);
3042 	}
3043 	RW_EXIT(&ldclp->rwlock);
3044 }
3045 
3046 /* stop transmit/receive on the channels for the port */
3047 static void
3048 vgen_uninit_ldcs(vgen_port_t *portp)
3049 {
3050 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3051 	vgen_ldc_t	*ldcp;
3052 
3053 	READ_ENTER(&ldclp->rwlock);
3054 	ldcp =  ldclp->headp;
3055 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3056 		vgen_ldc_uninit(ldcp);
3057 	}
3058 	RW_EXIT(&ldclp->rwlock);
3059 }
3060 
3061 /* enable transmit/receive on the channel */
3062 static int
3063 vgen_ldc_init(vgen_ldc_t *ldcp)
3064 {
3065 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3066 	ldc_status_t	istatus;
3067 	int		rv;
3068 	uint32_t	retries = 0;
3069 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3070 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3071 	init_state = ST_init;
3072 
3073 	DBG1(vgenp, ldcp, "enter\n");
3074 	LDC_LOCK(ldcp);
3075 
3076 	rv = ldc_open(ldcp->ldc_handle);
3077 	if (rv != 0) {
3078 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3079 		goto ldcinit_failed;
3080 	}
3081 	init_state |= ST_ldc_open;
3082 
3083 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3084 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3085 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3086 		goto ldcinit_failed;
3087 	}
3088 	ldcp->ldc_status = istatus;
3089 
3090 	rv = vgen_init_tbufs(ldcp);
3091 	if (rv != 0) {
3092 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3093 		goto ldcinit_failed;
3094 	}
3095 	init_state |= ST_init_tbufs;
3096 
3097 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3098 	if (rv != 0) {
3099 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3100 		goto ldcinit_failed;
3101 	}
3102 
3103 	init_state |= ST_cb_enable;
3104 
3105 	do {
3106 		rv = ldc_up(ldcp->ldc_handle);
3107 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3108 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3109 			drv_usecwait(VGEN_LDC_UP_DELAY);
3110 		}
3111 		if (retries++ >= vgen_ldcup_retries)
3112 			break;
3113 	} while (rv == EWOULDBLOCK);
3114 
3115 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3116 	if (istatus == LDC_UP) {
3117 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3118 	}
3119 
3120 	ldcp->ldc_status = istatus;
3121 
3122 	/* initialize transmit watchdog timeout */
3123 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3124 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3125 
3126 	ldcp->hphase = -1;
3127 	ldcp->flags |= CHANNEL_STARTED;
3128 
3129 	/* if channel is already UP - start handshake */
3130 	if (istatus == LDC_UP) {
3131 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3132 		if (ldcp->portp != vgenp->vsw_portp) {
3133 			/*
3134 			 * As the channel is up, use this port from now on.
3135 			 */
3136 			(void) atomic_swap_32(
3137 			    &ldcp->portp->use_vsw_port, B_FALSE);
3138 		}
3139 
3140 		/* Initialize local session id */
3141 		ldcp->local_sid = ddi_get_lbolt();
3142 
3143 		/* clear peer session id */
3144 		ldcp->peer_sid = 0;
3145 		ldcp->hretries = 0;
3146 
3147 		/* Initiate Handshake process with peer ldc endpoint */
3148 		vgen_reset_hphase(ldcp);
3149 
3150 		mutex_exit(&ldcp->tclock);
3151 		mutex_exit(&ldcp->txlock);
3152 		mutex_exit(&ldcp->wrlock);
3153 		mutex_exit(&ldcp->rxlock);
3154 		vgen_handshake(vh_nextphase(ldcp));
3155 		mutex_exit(&ldcp->cblock);
3156 	} else {
3157 		LDC_UNLOCK(ldcp);
3158 	}
3159 
3160 	return (DDI_SUCCESS);
3161 
3162 ldcinit_failed:
3163 	if (init_state & ST_cb_enable) {
3164 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3165 	}
3166 	if (init_state & ST_init_tbufs) {
3167 		vgen_uninit_tbufs(ldcp);
3168 	}
3169 	if (init_state & ST_ldc_open) {
3170 		(void) ldc_close(ldcp->ldc_handle);
3171 	}
3172 	LDC_UNLOCK(ldcp);
3173 	DBG1(vgenp, ldcp, "exit\n");
3174 	return (DDI_FAILURE);
3175 }
3176 
3177 /* stop transmit/receive on the channel */
3178 static void
3179 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3180 {
3181 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3182 	int	rv;
3183 	uint_t	retries = 0;
3184 
3185 	DBG1(vgenp, ldcp, "enter\n");
3186 	LDC_LOCK(ldcp);
3187 
3188 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3189 		LDC_UNLOCK(ldcp);
3190 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3191 		return;
3192 	}
3193 
3194 	/* disable further callbacks */
3195 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3196 	if (rv != 0) {
3197 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3198 	}
3199 
3200 	if (vgenp->vsw_portp == ldcp->portp) {
3201 		vio_net_report_err_t rep_err =
3202 		    ldcp->portp->vcb.vio_net_report_err;
3203 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3204 	}
3205 
3206 	/*
3207 	 * clear handshake done bit and wait for pending tx and cb to finish.
3208 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3209 	 */
3210 	ldcp->hphase &= ~(VH_DONE);
3211 	LDC_UNLOCK(ldcp);
3212 
3213 	/* cancel handshake watchdog timeout */
3214 	if (ldcp->htid) {
3215 		(void) untimeout(ldcp->htid);
3216 		ldcp->htid = 0;
3217 	}
3218 
3219 	if (ldcp->cancel_htid) {
3220 		(void) untimeout(ldcp->cancel_htid);
3221 		ldcp->cancel_htid = 0;
3222 	}
3223 
3224 	/* cancel transmit watchdog timeout */
3225 	if (ldcp->wd_tid) {
3226 		(void) untimeout(ldcp->wd_tid);
3227 		ldcp->wd_tid = 0;
3228 	}
3229 
3230 	drv_usecwait(1000);
3231 
3232 	if (ldcp->rcv_thread != NULL) {
3233 		/*
3234 		 * Note that callbacks have been disabled already(above). The
3235 		 * drain function takes care of the condition when an already
3236 		 * executing callback signals the worker to start processing or
3237 		 * the worker has already been signalled and is in the middle of
3238 		 * processing.
3239 		 */
3240 		vgen_drain_rcv_thread(ldcp);
3241 	}
3242 
3243 	/* acquire locks again; any pending transmits and callbacks are done */
3244 	LDC_LOCK(ldcp);
3245 
3246 	vgen_reset_hphase(ldcp);
3247 
3248 	vgen_uninit_tbufs(ldcp);
3249 
3250 	/* close the channel - retry on EAGAIN */
3251 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
3252 		if (++retries > vgen_ldccl_retries) {
3253 			break;
3254 		}
3255 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
3256 	}
3257 	if (rv != 0) {
3258 		cmn_err(CE_NOTE,
3259 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
3260 		    vgenp->instance, rv, ldcp->ldc_id);
3261 	}
3262 
3263 	ldcp->ldc_status = LDC_INIT;
3264 	ldcp->flags &= ~(CHANNEL_STARTED);
3265 
3266 	LDC_UNLOCK(ldcp);
3267 
3268 	DBG1(vgenp, ldcp, "exit\n");
3269 }
3270 
3271 /* Initialize the transmit buffer ring for the channel */
3272 static int
3273 vgen_init_tbufs(vgen_ldc_t *ldcp)
3274 {
3275 	vgen_private_desc_t	*tbufp;
3276 	vnet_public_desc_t	*txdp;
3277 	vio_dring_entry_hdr_t		*hdrp;
3278 	int 			i;
3279 	int 			rv;
3280 	caddr_t			datap = NULL;
3281 	int			ci;
3282 	uint32_t		ncookies;
3283 	size_t			data_sz;
3284 	vgen_t			*vgenp;
3285 
3286 	vgenp = LDC_TO_VGEN(ldcp);
3287 
3288 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3289 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3290 
3291 	/*
3292 	 * In order to ensure that the number of ldc cookies per descriptor is
3293 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3294 	 * outlined below:
3295 	 *
3296 	 * Align the entire data buffer area to 8K and carve out per descriptor
3297 	 * data buffers starting from this 8K aligned base address.
3298 	 *
3299 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3300 	 * For sizes up to 12K we round up the size to the next 2K.
3301 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3302 	 * 14K could end up needing 3 cookies, with the buffer spread across
3303 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3304 	 */
3305 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3306 	if (data_sz <= VNET_12K) {
3307 		data_sz = VNET_ROUNDUP_2K(data_sz);
3308 	} else {
3309 		data_sz = VNET_ROUNDUP_4K(data_sz);
3310 	}
3311 
3312 	/* allocate extra 8K bytes for alignment */
3313 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3314 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3315 	ldcp->tx_datap = datap;
3316 
3317 
3318 	/* align the starting address of the data area to 8K */
3319 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3320 
3321 	/*
3322 	 * for each private descriptor, allocate a ldc mem_handle which is
3323 	 * required to map the data during transmit, set the flags
3324 	 * to free (available for use by transmit routine).
3325 	 */
3326 
3327 	for (i = 0; i < ldcp->num_txds; i++) {
3328 
3329 		tbufp = &(ldcp->tbufp[i]);
3330 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3331 		    &(tbufp->memhandle));
3332 		if (rv) {
3333 			tbufp->memhandle = 0;
3334 			goto init_tbufs_failed;
3335 		}
3336 
3337 		/*
3338 		 * bind ldc memhandle to the corresponding transmit buffer.
3339 		 */
3340 		ci = ncookies = 0;
3341 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3342 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3343 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3344 		if (rv != 0) {
3345 			goto init_tbufs_failed;
3346 		}
3347 
3348 		/*
3349 		 * successful in binding the handle to tx data buffer.
3350 		 * set datap in the private descr to this buffer.
3351 		 */
3352 		tbufp->datap = datap;
3353 
3354 		if ((ncookies == 0) ||
3355 		    (ncookies > MAX_COOKIES)) {
3356 			goto init_tbufs_failed;
3357 		}
3358 
3359 		for (ci = 1; ci < ncookies; ci++) {
3360 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3361 			    &(tbufp->memcookie[ci]));
3362 			if (rv != 0) {
3363 				goto init_tbufs_failed;
3364 			}
3365 		}
3366 
3367 		tbufp->ncookies = ncookies;
3368 		datap += data_sz;
3369 
3370 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3371 		txdp = &(ldcp->txdp[i]);
3372 		hdrp = &txdp->hdr;
3373 		hdrp->dstate = VIO_DESC_FREE;
3374 		hdrp->ack = B_FALSE;
3375 		tbufp->descp = txdp;
3376 
3377 	}
3378 
3379 	/* reset tbuf walking pointers */
3380 	ldcp->next_tbufp = ldcp->tbufp;
3381 	ldcp->cur_tbufp = ldcp->tbufp;
3382 
3383 	/* initialize tx seqnum and index */
3384 	ldcp->next_txseq = VNET_ISS;
3385 	ldcp->next_txi = 0;
3386 
3387 	ldcp->resched_peer = B_TRUE;
3388 	ldcp->resched_peer_txi = 0;
3389 
3390 	return (DDI_SUCCESS);
3391 
3392 init_tbufs_failed:;
3393 	vgen_uninit_tbufs(ldcp);
3394 	return (DDI_FAILURE);
3395 }
3396 
3397 /* Uninitialize transmit buffer ring for the channel */
3398 static void
3399 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3400 {
3401 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3402 	int 			i;
3403 
3404 	/* for each tbuf (priv_desc), free ldc mem_handle */
3405 	for (i = 0; i < ldcp->num_txds; i++) {
3406 
3407 		tbufp = &(ldcp->tbufp[i]);
3408 
3409 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3410 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3411 			tbufp->datap = NULL;
3412 		}
3413 		if (tbufp->memhandle) {
3414 			(void) ldc_mem_free_handle(tbufp->memhandle);
3415 			tbufp->memhandle = 0;
3416 		}
3417 	}
3418 
3419 	if (ldcp->tx_datap) {
3420 		/* prealloc'd tx data buffer */
3421 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3422 		ldcp->tx_datap = NULL;
3423 		ldcp->tx_data_sz = 0;
3424 	}
3425 
3426 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3427 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3428 }
3429 
3430 /* clobber tx descriptor ring */
3431 static void
3432 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3433 {
3434 	vnet_public_desc_t	*txdp;
3435 	vgen_private_desc_t	*tbufp;
3436 	vio_dring_entry_hdr_t	*hdrp;
3437 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3438 	int i;
3439 #ifdef DEBUG
3440 	int ndone = 0;
3441 #endif
3442 
3443 	for (i = 0; i < ldcp->num_txds; i++) {
3444 
3445 		tbufp = &(ldcp->tbufp[i]);
3446 		txdp = tbufp->descp;
3447 		hdrp = &txdp->hdr;
3448 
3449 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3450 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3451 #ifdef DEBUG
3452 			if (hdrp->dstate == VIO_DESC_DONE)
3453 				ndone++;
3454 #endif
3455 			hdrp->dstate = VIO_DESC_FREE;
3456 			hdrp->ack = B_FALSE;
3457 		}
3458 	}
3459 	/* reset tbuf walking pointers */
3460 	ldcp->next_tbufp = ldcp->tbufp;
3461 	ldcp->cur_tbufp = ldcp->tbufp;
3462 
3463 	/* reset tx seqnum and index */
3464 	ldcp->next_txseq = VNET_ISS;
3465 	ldcp->next_txi = 0;
3466 
3467 	ldcp->resched_peer = B_TRUE;
3468 	ldcp->resched_peer_txi = 0;
3469 
3470 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3471 }
3472 
3473 /* clobber receive descriptor ring */
3474 static void
3475 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3476 {
3477 	ldcp->rx_dhandle = 0;
3478 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3479 	ldcp->rxdp = NULL;
3480 	ldcp->next_rxi = 0;
3481 	ldcp->num_rxds = 0;
3482 	ldcp->next_rxseq = VNET_ISS;
3483 }
3484 
3485 /* initialize receive descriptor ring */
3486 static int
3487 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3488 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3489 {
3490 	int rv;
3491 	ldc_mem_info_t minfo;
3492 
3493 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3494 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3495 	if (rv != 0) {
3496 		return (DDI_FAILURE);
3497 	}
3498 
3499 	/*
3500 	 * sucessfully mapped, now try to
3501 	 * get info about the mapped dring
3502 	 */
3503 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3504 	if (rv != 0) {
3505 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3506 		return (DDI_FAILURE);
3507 	}
3508 
3509 	/*
3510 	 * save ring address, number of descriptors.
3511 	 */
3512 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3513 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3514 	ldcp->num_rxdcookies = ncookies;
3515 	ldcp->num_rxds = num_desc;
3516 	ldcp->next_rxi = 0;
3517 	ldcp->next_rxseq = VNET_ISS;
3518 	ldcp->dring_mtype = minfo.mtype;
3519 
3520 	return (DDI_SUCCESS);
3521 }
3522 
3523 /* get channel statistics */
3524 static uint64_t
3525 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3526 {
3527 	vgen_stats_t *statsp;
3528 	uint64_t val;
3529 
3530 	val = 0;
3531 	statsp = &ldcp->stats;
3532 	switch (stat) {
3533 
3534 	case MAC_STAT_MULTIRCV:
3535 		val = statsp->multircv;
3536 		break;
3537 
3538 	case MAC_STAT_BRDCSTRCV:
3539 		val = statsp->brdcstrcv;
3540 		break;
3541 
3542 	case MAC_STAT_MULTIXMT:
3543 		val = statsp->multixmt;
3544 		break;
3545 
3546 	case MAC_STAT_BRDCSTXMT:
3547 		val = statsp->brdcstxmt;
3548 		break;
3549 
3550 	case MAC_STAT_NORCVBUF:
3551 		val = statsp->norcvbuf;
3552 		break;
3553 
3554 	case MAC_STAT_IERRORS:
3555 		val = statsp->ierrors;
3556 		break;
3557 
3558 	case MAC_STAT_NOXMTBUF:
3559 		val = statsp->noxmtbuf;
3560 		break;
3561 
3562 	case MAC_STAT_OERRORS:
3563 		val = statsp->oerrors;
3564 		break;
3565 
3566 	case MAC_STAT_COLLISIONS:
3567 		break;
3568 
3569 	case MAC_STAT_RBYTES:
3570 		val = statsp->rbytes;
3571 		break;
3572 
3573 	case MAC_STAT_IPACKETS:
3574 		val = statsp->ipackets;
3575 		break;
3576 
3577 	case MAC_STAT_OBYTES:
3578 		val = statsp->obytes;
3579 		break;
3580 
3581 	case MAC_STAT_OPACKETS:
3582 		val = statsp->opackets;
3583 		break;
3584 
3585 	/* stats not relevant to ldc, return 0 */
3586 	case MAC_STAT_IFSPEED:
3587 	case ETHER_STAT_ALIGN_ERRORS:
3588 	case ETHER_STAT_FCS_ERRORS:
3589 	case ETHER_STAT_FIRST_COLLISIONS:
3590 	case ETHER_STAT_MULTI_COLLISIONS:
3591 	case ETHER_STAT_DEFER_XMTS:
3592 	case ETHER_STAT_TX_LATE_COLLISIONS:
3593 	case ETHER_STAT_EX_COLLISIONS:
3594 	case ETHER_STAT_MACXMT_ERRORS:
3595 	case ETHER_STAT_CARRIER_ERRORS:
3596 	case ETHER_STAT_TOOLONG_ERRORS:
3597 	case ETHER_STAT_XCVR_ADDR:
3598 	case ETHER_STAT_XCVR_ID:
3599 	case ETHER_STAT_XCVR_INUSE:
3600 	case ETHER_STAT_CAP_1000FDX:
3601 	case ETHER_STAT_CAP_1000HDX:
3602 	case ETHER_STAT_CAP_100FDX:
3603 	case ETHER_STAT_CAP_100HDX:
3604 	case ETHER_STAT_CAP_10FDX:
3605 	case ETHER_STAT_CAP_10HDX:
3606 	case ETHER_STAT_CAP_ASMPAUSE:
3607 	case ETHER_STAT_CAP_PAUSE:
3608 	case ETHER_STAT_CAP_AUTONEG:
3609 	case ETHER_STAT_ADV_CAP_1000FDX:
3610 	case ETHER_STAT_ADV_CAP_1000HDX:
3611 	case ETHER_STAT_ADV_CAP_100FDX:
3612 	case ETHER_STAT_ADV_CAP_100HDX:
3613 	case ETHER_STAT_ADV_CAP_10FDX:
3614 	case ETHER_STAT_ADV_CAP_10HDX:
3615 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3616 	case ETHER_STAT_ADV_CAP_PAUSE:
3617 	case ETHER_STAT_ADV_CAP_AUTONEG:
3618 	case ETHER_STAT_LP_CAP_1000FDX:
3619 	case ETHER_STAT_LP_CAP_1000HDX:
3620 	case ETHER_STAT_LP_CAP_100FDX:
3621 	case ETHER_STAT_LP_CAP_100HDX:
3622 	case ETHER_STAT_LP_CAP_10FDX:
3623 	case ETHER_STAT_LP_CAP_10HDX:
3624 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3625 	case ETHER_STAT_LP_CAP_PAUSE:
3626 	case ETHER_STAT_LP_CAP_AUTONEG:
3627 	case ETHER_STAT_LINK_ASMPAUSE:
3628 	case ETHER_STAT_LINK_PAUSE:
3629 	case ETHER_STAT_LINK_AUTONEG:
3630 	case ETHER_STAT_LINK_DUPLEX:
3631 	default:
3632 		val = 0;
3633 		break;
3634 
3635 	}
3636 	return (val);
3637 }
3638 
3639 /*
3640  * LDC channel is UP, start handshake process with peer.
3641  */
3642 static void
3643 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3644 {
3645 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3646 
3647 	DBG1(vgenp, ldcp, "enter\n");
3648 
3649 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3650 
3651 	if (ldcp->portp != vgenp->vsw_portp) {
3652 		/*
3653 		 * As the channel is up, use this port from now on.
3654 		 */
3655 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3656 	}
3657 
3658 	/* Initialize local session id */
3659 	ldcp->local_sid = ddi_get_lbolt();
3660 
3661 	/* clear peer session id */
3662 	ldcp->peer_sid = 0;
3663 	ldcp->hretries = 0;
3664 
3665 	if (ldcp->hphase != VH_PHASE0) {
3666 		vgen_handshake_reset(ldcp);
3667 	}
3668 
3669 	/* Initiate Handshake process with peer ldc endpoint */
3670 	vgen_handshake(vh_nextphase(ldcp));
3671 
3672 	DBG1(vgenp, ldcp, "exit\n");
3673 }
3674 
3675 /*
3676  * LDC channel is Reset, terminate connection with peer and try to
3677  * bring the channel up again.
3678  */
3679 static void
3680 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3681 {
3682 	ldc_status_t istatus;
3683 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3684 	int	rv;
3685 
3686 	DBG1(vgenp, ldcp, "enter\n");
3687 
3688 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3689 
3690 	if ((ldcp->portp != vgenp->vsw_portp) &&
3691 	    (vgenp->vsw_portp != NULL)) {
3692 		/*
3693 		 * As the channel is down, use the switch port until
3694 		 * the channel becomes ready to be used.
3695 		 */
3696 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3697 	}
3698 
3699 	if (vgenp->vsw_portp == ldcp->portp) {
3700 		vio_net_report_err_t rep_err =
3701 		    ldcp->portp->vcb.vio_net_report_err;
3702 
3703 		/* Post a reset message */
3704 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3705 	}
3706 
3707 	if (ldcp->hphase != VH_PHASE0) {
3708 		vgen_handshake_reset(ldcp);
3709 	}
3710 
3711 	/* try to bring the channel up */
3712 	rv = ldc_up(ldcp->ldc_handle);
3713 	if (rv != 0) {
3714 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3715 	}
3716 
3717 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3718 		DWARN(vgenp, ldcp, "ldc_status err\n");
3719 	} else {
3720 		ldcp->ldc_status = istatus;
3721 	}
3722 
3723 	/* if channel is already UP - restart handshake */
3724 	if (ldcp->ldc_status == LDC_UP) {
3725 		vgen_handle_evt_up(ldcp);
3726 	}
3727 
3728 	DBG1(vgenp, ldcp, "exit\n");
3729 }
3730 
3731 /* Interrupt handler for the channel */
3732 static uint_t
3733 vgen_ldc_cb(uint64_t event, caddr_t arg)
3734 {
3735 	_NOTE(ARGUNUSED(event))
3736 	vgen_ldc_t	*ldcp;
3737 	vgen_t		*vgenp;
3738 	ldc_status_t 	istatus;
3739 	vgen_stats_t	*statsp;
3740 	timeout_id_t	cancel_htid = 0;
3741 	uint_t		ret = LDC_SUCCESS;
3742 
3743 	ldcp = (vgen_ldc_t *)arg;
3744 	vgenp = LDC_TO_VGEN(ldcp);
3745 	statsp = &ldcp->stats;
3746 
3747 	DBG1(vgenp, ldcp, "enter\n");
3748 
3749 	mutex_enter(&ldcp->cblock);
3750 	statsp->callbacks++;
3751 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3752 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3753 		    ldcp->ldc_status);
3754 		mutex_exit(&ldcp->cblock);
3755 		return (LDC_SUCCESS);
3756 	}
3757 
3758 	/*
3759 	 * cache cancel_htid before the events specific
3760 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3761 	 * as it is also used to indicate the timer to quit immediately.
3762 	 */
3763 	cancel_htid = ldcp->cancel_htid;
3764 
3765 	/*
3766 	 * NOTE: not using switch() as event could be triggered by
3767 	 * a state change and a read request. Also the ordering	of the
3768 	 * check for the event types is deliberate.
3769 	 */
3770 	if (event & LDC_EVT_UP) {
3771 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3772 			DWARN(vgenp, ldcp, "ldc_status err\n");
3773 			/* status couldn't be determined */
3774 			ret = LDC_FAILURE;
3775 			goto ldc_cb_ret;
3776 		}
3777 		ldcp->ldc_status = istatus;
3778 		if (ldcp->ldc_status != LDC_UP) {
3779 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3780 			    " but ldc status is not UP(0x%x)\n",
3781 			    ldcp->ldc_status);
3782 			/* spurious interrupt, return success */
3783 			goto ldc_cb_ret;
3784 		}
3785 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3786 		    event, ldcp->ldc_status);
3787 
3788 		vgen_handle_evt_up(ldcp);
3789 
3790 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3791 	}
3792 
3793 	/* Handle RESET/DOWN before READ event */
3794 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3795 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3796 			DWARN(vgenp, ldcp, "ldc_status error\n");
3797 			/* status couldn't be determined */
3798 			ret = LDC_FAILURE;
3799 			goto ldc_cb_ret;
3800 		}
3801 		ldcp->ldc_status = istatus;
3802 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3803 		    event, ldcp->ldc_status);
3804 
3805 		vgen_handle_evt_reset(ldcp);
3806 
3807 		/*
3808 		 * As the channel is down/reset, ignore READ event
3809 		 * but print a debug warning message.
3810 		 */
3811 		if (event & LDC_EVT_READ) {
3812 			DWARN(vgenp, ldcp,
3813 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3814 			event &= ~LDC_EVT_READ;
3815 		}
3816 	}
3817 
3818 	if (event & LDC_EVT_READ) {
3819 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3820 		    event, ldcp->ldc_status);
3821 
3822 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3823 
3824 		if (ldcp->rcv_thread != NULL) {
3825 			/*
3826 			 * If the receive thread is enabled, then
3827 			 * wakeup the receive thread to process the
3828 			 * LDC messages.
3829 			 */
3830 			mutex_exit(&ldcp->cblock);
3831 			mutex_enter(&ldcp->rcv_thr_lock);
3832 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3833 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3834 				cv_signal(&ldcp->rcv_thr_cv);
3835 			}
3836 			mutex_exit(&ldcp->rcv_thr_lock);
3837 			mutex_enter(&ldcp->cblock);
3838 		} else  {
3839 			vgen_handle_evt_read(ldcp);
3840 		}
3841 	}
3842 
3843 ldc_cb_ret:
3844 	/*
3845 	 * Check to see if the status of cancel_htid has
3846 	 * changed. If another timer needs to be cancelled,
3847 	 * then let the next callback to clear it.
3848 	 */
3849 	if (cancel_htid == 0) {
3850 		cancel_htid = ldcp->cancel_htid;
3851 	}
3852 	mutex_exit(&ldcp->cblock);
3853 
3854 	if (cancel_htid) {
3855 		/*
3856 		 * Cancel handshake timer.
3857 		 * untimeout(9F) will not return until the pending callback is
3858 		 * cancelled or has run. No problems will result from calling
3859 		 * untimeout if the handler has already completed.
3860 		 * If the timeout handler did run, then it would just
3861 		 * return as cancel_htid is set.
3862 		 */
3863 		DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n", cancel_htid);
3864 		(void) untimeout(cancel_htid);
3865 		mutex_enter(&ldcp->cblock);
3866 		/* clear it only if its the same as the one we cancelled */
3867 		if (ldcp->cancel_htid == cancel_htid) {
3868 			ldcp->cancel_htid = 0;
3869 		}
3870 		mutex_exit(&ldcp->cblock);
3871 	}
3872 	DBG1(vgenp, ldcp, "exit\n");
3873 	return (ret);
3874 }
3875 
3876 static void
3877 vgen_handle_evt_read(vgen_ldc_t *ldcp)
3878 {
3879 	int		rv;
3880 	uint64_t	*ldcmsg;
3881 	size_t		msglen;
3882 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3883 	vio_msg_tag_t	*tagp;
3884 	ldc_status_t 	istatus;
3885 	boolean_t 	has_data;
3886 
3887 	DBG1(vgenp, ldcp, "enter\n");
3888 
3889 	ldcmsg = ldcp->ldcmsg;
3890 	/*
3891 	 * If the receive thread is enabled, then the cblock
3892 	 * need to be acquired here. If not, the vgen_ldc_cb()
3893 	 * calls this function with cblock held already.
3894 	 */
3895 	if (ldcp->rcv_thread != NULL) {
3896 		mutex_enter(&ldcp->cblock);
3897 	} else {
3898 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3899 	}
3900 
3901 vgen_evt_read:
3902 	do {
3903 		msglen = ldcp->msglen;
3904 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3905 
3906 		if (rv != 0) {
3907 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
3908 			    rv, msglen);
3909 			if (rv == ECONNRESET)
3910 				goto vgen_evtread_error;
3911 			break;
3912 		}
3913 		if (msglen == 0) {
3914 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3915 			break;
3916 		}
3917 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3918 
3919 		tagp = (vio_msg_tag_t *)ldcmsg;
3920 
3921 		if (ldcp->peer_sid) {
3922 			/*
3923 			 * check sid only after we have received peer's sid
3924 			 * in the version negotiate msg.
3925 			 */
3926 #ifdef DEBUG
3927 			if (vgen_hdbg & HDBG_BAD_SID) {
3928 				/* simulate bad sid condition */
3929 				tagp->vio_sid = 0;
3930 				vgen_hdbg &= ~(HDBG_BAD_SID);
3931 			}
3932 #endif
3933 			rv = vgen_check_sid(ldcp, tagp);
3934 			if (rv != VGEN_SUCCESS) {
3935 				/*
3936 				 * If sid mismatch is detected,
3937 				 * reset the channel.
3938 				 */
3939 				ldcp->need_ldc_reset = B_TRUE;
3940 				goto vgen_evtread_error;
3941 			}
3942 		}
3943 
3944 		switch (tagp->vio_msgtype) {
3945 		case VIO_TYPE_CTRL:
3946 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3947 			break;
3948 
3949 		case VIO_TYPE_DATA:
3950 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3951 			break;
3952 
3953 		case VIO_TYPE_ERR:
3954 			vgen_handle_errmsg(ldcp, tagp);
3955 			break;
3956 
3957 		default:
3958 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3959 			    tagp->vio_msgtype);
3960 			break;
3961 		}
3962 
3963 		/*
3964 		 * If an error is encountered, stop processing and
3965 		 * handle the error.
3966 		 */
3967 		if (rv != 0) {
3968 			goto vgen_evtread_error;
3969 		}
3970 
3971 	} while (msglen);
3972 
3973 	/* check once more before exiting */
3974 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3975 	if ((rv == 0) && (has_data == B_TRUE)) {
3976 		DTRACE_PROBE(vgen_chkq);
3977 		goto vgen_evt_read;
3978 	}
3979 
3980 vgen_evtread_error:
3981 	if (rv == ECONNRESET) {
3982 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3983 			DWARN(vgenp, ldcp, "ldc_status err\n");
3984 		} else {
3985 			ldcp->ldc_status = istatus;
3986 		}
3987 		vgen_handle_evt_reset(ldcp);
3988 	} else if (rv) {
3989 		vgen_handshake_retry(ldcp);
3990 	}
3991 
3992 	/*
3993 	 * If the receive thread is enabled, then cancel the
3994 	 * handshake timeout here.
3995 	 */
3996 	if (ldcp->rcv_thread != NULL) {
3997 		timeout_id_t cancel_htid = ldcp->cancel_htid;
3998 
3999 		mutex_exit(&ldcp->cblock);
4000 		if (cancel_htid) {
4001 			/*
4002 			 * Cancel handshake timer. untimeout(9F) will
4003 			 * not return until the pending callback is cancelled
4004 			 * or has run. No problems will result from calling
4005 			 * untimeout if the handler has already completed.
4006 			 * If the timeout handler did run, then it would just
4007 			 * return as cancel_htid is set.
4008 			 */
4009 			DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n",
4010 			    cancel_htid);
4011 			(void) untimeout(cancel_htid);
4012 
4013 			/*
4014 			 * clear it only if its the same as the one we
4015 			 * cancelled
4016 			 */
4017 			mutex_enter(&ldcp->cblock);
4018 			if (ldcp->cancel_htid == cancel_htid) {
4019 				ldcp->cancel_htid = 0;
4020 			}
4021 			mutex_exit(&ldcp->cblock);
4022 		}
4023 	}
4024 
4025 	DBG1(vgenp, ldcp, "exit\n");
4026 }
4027 
4028 /* vgen handshake functions */
4029 
4030 /* change the hphase for the channel to the next phase */
4031 static vgen_ldc_t *
4032 vh_nextphase(vgen_ldc_t *ldcp)
4033 {
4034 	if (ldcp->hphase == VH_PHASE3) {
4035 		ldcp->hphase = VH_DONE;
4036 	} else {
4037 		ldcp->hphase++;
4038 	}
4039 	return (ldcp);
4040 }
4041 
4042 /*
4043  * wrapper routine to send the given message over ldc using ldc_write().
4044  */
4045 static int
4046 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4047     boolean_t caller_holds_lock)
4048 {
4049 	int			rv;
4050 	size_t			len;
4051 	uint32_t		retries = 0;
4052 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4053 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4054 	vio_dring_msg_t		*dmsg;
4055 	vio_raw_data_msg_t	*rmsg;
4056 	boolean_t		data_msg = B_FALSE;
4057 
4058 	len = msglen;
4059 	if ((len == 0) || (msg == NULL))
4060 		return (VGEN_FAILURE);
4061 
4062 	if (!caller_holds_lock) {
4063 		mutex_enter(&ldcp->wrlock);
4064 	}
4065 
4066 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4067 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4068 			dmsg = (vio_dring_msg_t *)tagp;
4069 			dmsg->seq_num = ldcp->next_txseq;
4070 			data_msg = B_TRUE;
4071 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4072 			rmsg = (vio_raw_data_msg_t *)tagp;
4073 			rmsg->seq_num = ldcp->next_txseq;
4074 			data_msg = B_TRUE;
4075 		}
4076 	}
4077 
4078 	do {
4079 		len = msglen;
4080 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4081 		if (retries++ >= vgen_ldcwr_retries)
4082 			break;
4083 	} while (rv == EWOULDBLOCK);
4084 
4085 	if (rv == 0 && data_msg == B_TRUE) {
4086 		ldcp->next_txseq++;
4087 	}
4088 
4089 	if (!caller_holds_lock) {
4090 		mutex_exit(&ldcp->wrlock);
4091 	}
4092 
4093 	if (rv != 0) {
4094 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4095 		    rv, msglen);
4096 		return (rv);
4097 	}
4098 
4099 	if (len != msglen) {
4100 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4101 		    rv, msglen);
4102 		return (VGEN_FAILURE);
4103 	}
4104 
4105 	return (VGEN_SUCCESS);
4106 }
4107 
4108 /* send version negotiate message to the peer over ldc */
4109 static int
4110 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4111 {
4112 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4113 	vio_ver_msg_t	vermsg;
4114 	vio_msg_tag_t	*tagp = &vermsg.tag;
4115 	int		rv;
4116 
4117 	bzero(&vermsg, sizeof (vermsg));
4118 
4119 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4120 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4121 	tagp->vio_subtype_env = VIO_VER_INFO;
4122 	tagp->vio_sid = ldcp->local_sid;
4123 
4124 	/* get version msg payload from ldcp->local */
4125 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4126 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4127 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4128 
4129 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4130 	if (rv != VGEN_SUCCESS) {
4131 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4132 		return (rv);
4133 	}
4134 
4135 	ldcp->hstate |= VER_INFO_SENT;
4136 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4137 	    vermsg.ver_major, vermsg.ver_minor);
4138 
4139 	return (VGEN_SUCCESS);
4140 }
4141 
4142 /* send attr info message to the peer over ldc */
4143 static int
4144 vgen_send_attr_info(vgen_ldc_t *ldcp)
4145 {
4146 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4147 	vnet_attr_msg_t	attrmsg;
4148 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4149 	int		rv;
4150 
4151 	bzero(&attrmsg, sizeof (attrmsg));
4152 
4153 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4154 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4155 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4156 	tagp->vio_sid = ldcp->local_sid;
4157 
4158 	/* get attr msg payload from ldcp->local */
4159 	attrmsg.mtu = ldcp->local_hparams.mtu;
4160 	attrmsg.addr = ldcp->local_hparams.addr;
4161 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4162 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4163 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4164 
4165 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4166 	if (rv != VGEN_SUCCESS) {
4167 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4168 		return (rv);
4169 	}
4170 
4171 	ldcp->hstate |= ATTR_INFO_SENT;
4172 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4173 
4174 	return (VGEN_SUCCESS);
4175 }
4176 
4177 /* send descriptor ring register message to the peer over ldc */
4178 static int
4179 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4180 {
4181 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4182 	vio_dring_reg_msg_t	msg;
4183 	vio_msg_tag_t		*tagp = &msg.tag;
4184 	int		rv;
4185 
4186 	bzero(&msg, sizeof (msg));
4187 
4188 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4189 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4190 	tagp->vio_subtype_env = VIO_DRING_REG;
4191 	tagp->vio_sid = ldcp->local_sid;
4192 
4193 	/* get dring info msg payload from ldcp->local */
4194 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4195 	    sizeof (ldc_mem_cookie_t));
4196 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4197 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4198 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4199 
4200 	/*
4201 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4202 	 * value and sends it in the ack, which is saved in
4203 	 * vgen_handle_dring_reg().
4204 	 */
4205 	msg.dring_ident = 0;
4206 
4207 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4208 	if (rv != VGEN_SUCCESS) {
4209 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4210 		return (rv);
4211 	}
4212 
4213 	ldcp->hstate |= DRING_INFO_SENT;
4214 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4215 
4216 	return (VGEN_SUCCESS);
4217 }
4218 
4219 static int
4220 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4221 {
4222 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4223 	vio_rdx_msg_t	rdxmsg;
4224 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4225 	int		rv;
4226 
4227 	bzero(&rdxmsg, sizeof (rdxmsg));
4228 
4229 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4230 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4231 	tagp->vio_subtype_env = VIO_RDX;
4232 	tagp->vio_sid = ldcp->local_sid;
4233 
4234 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4235 	if (rv != VGEN_SUCCESS) {
4236 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4237 		return (rv);
4238 	}
4239 
4240 	ldcp->hstate |= RDX_INFO_SENT;
4241 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4242 
4243 	return (VGEN_SUCCESS);
4244 }
4245 
4246 /* send descriptor ring data message to the peer over ldc */
4247 static int
4248 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4249 {
4250 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4251 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4252 	vio_msg_tag_t	*tagp = &msgp->tag;
4253 	vgen_stats_t	*statsp = &ldcp->stats;
4254 	int		rv;
4255 
4256 	bzero(msgp, sizeof (*msgp));
4257 
4258 	tagp->vio_msgtype = VIO_TYPE_DATA;
4259 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4260 	tagp->vio_subtype_env = VIO_DRING_DATA;
4261 	tagp->vio_sid = ldcp->local_sid;
4262 
4263 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4264 	msgp->start_idx = start;
4265 	msgp->end_idx = end;
4266 
4267 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4268 	if (rv != VGEN_SUCCESS) {
4269 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4270 		return (rv);
4271 	}
4272 
4273 	statsp->dring_data_msgs++;
4274 
4275 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4276 
4277 	return (VGEN_SUCCESS);
4278 }
4279 
4280 /* send multicast addr info message to vsw */
4281 static int
4282 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4283 {
4284 	vnet_mcast_msg_t	mcastmsg;
4285 	vnet_mcast_msg_t	*msgp;
4286 	vio_msg_tag_t		*tagp;
4287 	vgen_t			*vgenp;
4288 	struct ether_addr	*mca;
4289 	int			rv;
4290 	int			i;
4291 	uint32_t		size;
4292 	uint32_t		mccount;
4293 	uint32_t		n;
4294 
4295 	msgp = &mcastmsg;
4296 	tagp = &msgp->tag;
4297 	vgenp = LDC_TO_VGEN(ldcp);
4298 
4299 	mccount = vgenp->mccount;
4300 	i = 0;
4301 
4302 	do {
4303 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4304 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4305 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4306 		tagp->vio_sid = ldcp->local_sid;
4307 
4308 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4309 		size = n * sizeof (struct ether_addr);
4310 
4311 		mca = &(vgenp->mctab[i]);
4312 		bcopy(mca, (msgp->mca), size);
4313 		msgp->set = B_TRUE;
4314 		msgp->count = n;
4315 
4316 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4317 		    B_FALSE);
4318 		if (rv != VGEN_SUCCESS) {
4319 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4320 			return (rv);
4321 		}
4322 
4323 		mccount -= n;
4324 		i += n;
4325 
4326 	} while (mccount);
4327 
4328 	return (VGEN_SUCCESS);
4329 }
4330 
4331 /* Initiate Phase 2 of handshake */
4332 static int
4333 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4334 {
4335 	int rv;
4336 	uint32_t ncookies = 0;
4337 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4338 
4339 #ifdef DEBUG
4340 	if (vgen_hdbg & HDBG_OUT_STATE) {
4341 		/* simulate out of state condition */
4342 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4343 		rv = vgen_send_rdx_info(ldcp);
4344 		return (rv);
4345 	}
4346 	if (vgen_hdbg & HDBG_TIMEOUT) {
4347 		/* simulate timeout condition */
4348 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4349 		return (VGEN_SUCCESS);
4350 	}
4351 #endif
4352 	rv = vgen_send_attr_info(ldcp);
4353 	if (rv != VGEN_SUCCESS) {
4354 		return (rv);
4355 	}
4356 
4357 	/* Bind descriptor ring to the channel */
4358 	if (ldcp->num_txdcookies == 0) {
4359 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4360 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4361 		    &ldcp->tx_dcookie, &ncookies);
4362 		if (rv != 0) {
4363 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4364 			    "rv(%x)\n", rv);
4365 			return (rv);
4366 		}
4367 		ASSERT(ncookies == 1);
4368 		ldcp->num_txdcookies = ncookies;
4369 	}
4370 
4371 	/* update local dring_info params */
4372 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4373 	    sizeof (ldc_mem_cookie_t));
4374 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4375 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4376 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4377 
4378 	rv = vgen_send_dring_reg(ldcp);
4379 	if (rv != VGEN_SUCCESS) {
4380 		return (rv);
4381 	}
4382 
4383 	return (VGEN_SUCCESS);
4384 }
4385 
4386 /*
4387  * Set vnet-protocol-version dependent functions based on version.
4388  */
4389 static void
4390 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4391 {
4392 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4393 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4394 
4395 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4396 		/*
4397 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4398 		 * Support), set the mtu in our attributes to max_frame_size.
4399 		 */
4400 		lp->mtu = vgenp->max_frame_size;
4401 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4402 		/*
4403 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4404 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4405 		 */
4406 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4407 	} else {
4408 		vgen_port_t	*portp = ldcp->portp;
4409 		vnet_t		*vnetp = vgenp->vnetp;
4410 		/*
4411 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4412 		 * We can negotiate that size with those peers provided the
4413 		 * following conditions are true:
4414 		 * - Only pvid is defined for our peer and there are no vids.
4415 		 * - pvids are equal.
4416 		 * If the above conditions are true, then we can send/recv only
4417 		 * untagged frames of max size ETHERMAX.
4418 		 */
4419 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4420 			lp->mtu = ETHERMAX;
4421 		}
4422 	}
4423 
4424 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4425 		/* Versions >= 1.2 */
4426 
4427 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4428 			/*
4429 			 * enable priority routines and pkt mode only if
4430 			 * at least one pri-eth-type is specified in MD.
4431 			 */
4432 
4433 			ldcp->tx = vgen_ldcsend;
4434 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4435 
4436 			/* set xfer mode for vgen_send_attr_info() */
4437 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4438 
4439 		} else {
4440 			/* no priority eth types defined in MD */
4441 
4442 			ldcp->tx = vgen_ldcsend_dring;
4443 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4444 
4445 			/* set xfer mode for vgen_send_attr_info() */
4446 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4447 
4448 		}
4449 	} else {
4450 		/* Versions prior to 1.2  */
4451 
4452 		vgen_reset_vnet_proto_ops(ldcp);
4453 	}
4454 }
4455 
4456 /*
4457  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4458  */
4459 static void
4460 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4461 {
4462 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4463 
4464 	ldcp->tx = vgen_ldcsend_dring;
4465 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4466 
4467 	/* set xfer mode for vgen_send_attr_info() */
4468 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4469 }
4470 
4471 static void
4472 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4473 {
4474 	vgen_ldclist_t	*ldclp;
4475 	vgen_ldc_t	*ldcp;
4476 	vgen_t		*vgenp = portp->vgenp;
4477 	vnet_t		*vnetp = vgenp->vnetp;
4478 
4479 	ldclp = &portp->ldclist;
4480 
4481 	READ_ENTER(&ldclp->rwlock);
4482 
4483 	/*
4484 	 * NOTE: for now, we will assume we have a single channel.
4485 	 */
4486 	if (ldclp->headp == NULL) {
4487 		RW_EXIT(&ldclp->rwlock);
4488 		return;
4489 	}
4490 	ldcp = ldclp->headp;
4491 
4492 	mutex_enter(&ldcp->cblock);
4493 
4494 	/*
4495 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4496 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4497 	 */
4498 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4499 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4500 		ldcp->need_ldc_reset = B_TRUE;
4501 		vgen_handshake_retry(ldcp);
4502 	}
4503 
4504 	mutex_exit(&ldcp->cblock);
4505 
4506 	RW_EXIT(&ldclp->rwlock);
4507 }
4508 
4509 static void
4510 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4511 {
4512 	vgen_port_t	*portp;
4513 	vgen_portlist_t	*plistp;
4514 
4515 	plistp = &(vgenp->vgenports);
4516 	READ_ENTER(&plistp->rwlock);
4517 
4518 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4519 
4520 		vgen_vlan_unaware_port_reset(portp);
4521 
4522 	}
4523 
4524 	RW_EXIT(&plistp->rwlock);
4525 }
4526 
4527 /*
4528  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4529  * This can happen after a channel comes up (status: LDC_UP) or
4530  * when handshake gets terminated due to various conditions.
4531  */
4532 static void
4533 vgen_reset_hphase(vgen_ldc_t *ldcp)
4534 {
4535 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4536 	ldc_status_t istatus;
4537 	int rv;
4538 
4539 	DBG1(vgenp, ldcp, "enter\n");
4540 	/* reset hstate and hphase */
4541 	ldcp->hstate = 0;
4542 	ldcp->hphase = VH_PHASE0;
4543 
4544 	vgen_reset_vnet_proto_ops(ldcp);
4545 
4546 	/*
4547 	 * Save the id of pending handshake timer in cancel_htid.
4548 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4549 	 * be cancelled after releasing cblock.
4550 	 */
4551 	if (ldcp->htid) {
4552 		ldcp->cancel_htid = ldcp->htid;
4553 		ldcp->htid = 0;
4554 	}
4555 
4556 	if (ldcp->local_hparams.dring_ready) {
4557 		ldcp->local_hparams.dring_ready = B_FALSE;
4558 	}
4559 
4560 	/* Unbind tx descriptor ring from the channel */
4561 	if (ldcp->num_txdcookies) {
4562 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4563 		if (rv != 0) {
4564 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4565 		}
4566 		ldcp->num_txdcookies = 0;
4567 	}
4568 
4569 	if (ldcp->peer_hparams.dring_ready) {
4570 		ldcp->peer_hparams.dring_ready = B_FALSE;
4571 		/* Unmap peer's dring */
4572 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4573 		vgen_clobber_rxds(ldcp);
4574 	}
4575 
4576 	vgen_clobber_tbufs(ldcp);
4577 
4578 	/*
4579 	 * clear local handshake params and initialize.
4580 	 */
4581 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4582 
4583 	/* set version to the highest version supported */
4584 	ldcp->local_hparams.ver_major =
4585 	    ldcp->vgen_versions[0].ver_major;
4586 	ldcp->local_hparams.ver_minor =
4587 	    ldcp->vgen_versions[0].ver_minor;
4588 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4589 
4590 	/* set attr_info params */
4591 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4592 	ldcp->local_hparams.addr =
4593 	    vnet_macaddr_strtoul(vgenp->macaddr);
4594 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4595 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4596 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4597 
4598 	/*
4599 	 * Note: dring is created, but not bound yet.
4600 	 * local dring_info params will be updated when we bind the dring in
4601 	 * vgen_handshake_phase2().
4602 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4603 	 * value and sends it in the ack, which is saved in
4604 	 * vgen_handle_dring_reg().
4605 	 */
4606 	ldcp->local_hparams.dring_ident = 0;
4607 
4608 	/* clear peer_hparams */
4609 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4610 
4611 	/* reset the channel if required */
4612 	if (ldcp->need_ldc_reset) {
4613 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4614 		ldcp->need_ldc_reset = B_FALSE;
4615 		(void) ldc_down(ldcp->ldc_handle);
4616 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4617 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4618 		ldcp->ldc_status = istatus;
4619 
4620 		/* clear sids */
4621 		ldcp->local_sid = 0;
4622 		ldcp->peer_sid = 0;
4623 
4624 		/* try to bring the channel up */
4625 		rv = ldc_up(ldcp->ldc_handle);
4626 		if (rv != 0) {
4627 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4628 		}
4629 
4630 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4631 			DWARN(vgenp, ldcp, "ldc_status err\n");
4632 		} else {
4633 			ldcp->ldc_status = istatus;
4634 		}
4635 	}
4636 }
4637 
4638 /* wrapper function for vgen_reset_hphase */
4639 static void
4640 vgen_handshake_reset(vgen_ldc_t *ldcp)
4641 {
4642 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4643 	mutex_enter(&ldcp->rxlock);
4644 	mutex_enter(&ldcp->wrlock);
4645 	mutex_enter(&ldcp->txlock);
4646 	mutex_enter(&ldcp->tclock);
4647 
4648 	vgen_reset_hphase(ldcp);
4649 
4650 	mutex_exit(&ldcp->tclock);
4651 	mutex_exit(&ldcp->txlock);
4652 	mutex_exit(&ldcp->wrlock);
4653 	mutex_exit(&ldcp->rxlock);
4654 }
4655 
4656 /*
4657  * Initiate handshake with the peer by sending various messages
4658  * based on the handshake-phase that the channel is currently in.
4659  */
4660 static void
4661 vgen_handshake(vgen_ldc_t *ldcp)
4662 {
4663 	uint32_t hphase = ldcp->hphase;
4664 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4665 	ldc_status_t	istatus;
4666 	int	rv = 0;
4667 
4668 	switch (hphase) {
4669 
4670 	case VH_PHASE1:
4671 
4672 		/*
4673 		 * start timer, for entire handshake process, turn this timer
4674 		 * off if all phases of handshake complete successfully and
4675 		 * hphase goes to VH_DONE(below) or
4676 		 * vgen_reset_hphase() gets called or
4677 		 * channel is reset due to errors or
4678 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4679 		 */
4680 		ASSERT(ldcp->htid == 0);
4681 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4682 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4683 
4684 		/* Phase 1 involves negotiating the version */
4685 		rv = vgen_send_version_negotiate(ldcp);
4686 		break;
4687 
4688 	case VH_PHASE2:
4689 		rv = vgen_handshake_phase2(ldcp);
4690 		break;
4691 
4692 	case VH_PHASE3:
4693 		rv = vgen_send_rdx_info(ldcp);
4694 		break;
4695 
4696 	case VH_DONE:
4697 		/*
4698 		 * Save the id of pending handshake timer in cancel_htid.
4699 		 * This will be checked in vgen_ldc_cb() and the handshake
4700 		 * timer will be cancelled after releasing cblock.
4701 		 */
4702 		if (ldcp->htid) {
4703 			ldcp->cancel_htid = ldcp->htid;
4704 			ldcp->htid = 0;
4705 		}
4706 		ldcp->hretries = 0;
4707 		DBG1(vgenp, ldcp, "Handshake Done\n");
4708 
4709 		if (ldcp->portp == vgenp->vsw_portp) {
4710 			/*
4711 			 * If this channel(port) is connected to vsw,
4712 			 * need to sync multicast table with vsw.
4713 			 */
4714 			mutex_exit(&ldcp->cblock);
4715 
4716 			mutex_enter(&vgenp->lock);
4717 			rv = vgen_send_mcast_info(ldcp);
4718 			mutex_exit(&vgenp->lock);
4719 
4720 			mutex_enter(&ldcp->cblock);
4721 			if (rv != VGEN_SUCCESS)
4722 				break;
4723 		}
4724 
4725 		/*
4726 		 * Check if mac layer should be notified to restart
4727 		 * transmissions. This can happen if the channel got
4728 		 * reset and vgen_clobber_tbufs() is called, while
4729 		 * need_resched is set.
4730 		 */
4731 		mutex_enter(&ldcp->tclock);
4732 		if (ldcp->need_resched) {
4733 			vio_net_tx_update_t vtx_update =
4734 			    ldcp->portp->vcb.vio_net_tx_update;
4735 
4736 			ldcp->need_resched = B_FALSE;
4737 			vtx_update(ldcp->portp->vhp);
4738 		}
4739 		mutex_exit(&ldcp->tclock);
4740 
4741 		break;
4742 
4743 	default:
4744 		break;
4745 	}
4746 
4747 	if (rv == ECONNRESET) {
4748 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4749 			DWARN(vgenp, ldcp, "ldc_status err\n");
4750 		} else {
4751 			ldcp->ldc_status = istatus;
4752 		}
4753 		vgen_handle_evt_reset(ldcp);
4754 	} else if (rv) {
4755 		vgen_handshake_reset(ldcp);
4756 	}
4757 }
4758 
4759 /*
4760  * Check if the current handshake phase has completed successfully and
4761  * return the status.
4762  */
4763 static int
4764 vgen_handshake_done(vgen_ldc_t *ldcp)
4765 {
4766 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4767 	uint32_t	hphase = ldcp->hphase;
4768 	int 		status = 0;
4769 
4770 	switch (hphase) {
4771 
4772 	case VH_PHASE1:
4773 		/*
4774 		 * Phase1 is done, if version negotiation
4775 		 * completed successfully.
4776 		 */
4777 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4778 		    VER_NEGOTIATED);
4779 		break;
4780 
4781 	case VH_PHASE2:
4782 		/*
4783 		 * Phase 2 is done, if attr info and dring info
4784 		 * have been exchanged successfully.
4785 		 */
4786 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4787 		    ATTR_INFO_EXCHANGED) &&
4788 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4789 		    DRING_INFO_EXCHANGED));
4790 		break;
4791 
4792 	case VH_PHASE3:
4793 		/* Phase 3 is done, if rdx msg has been exchanged */
4794 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4795 		    RDX_EXCHANGED);
4796 		break;
4797 
4798 	default:
4799 		break;
4800 	}
4801 
4802 	if (status == 0) {
4803 		return (VGEN_FAILURE);
4804 	}
4805 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4806 	return (VGEN_SUCCESS);
4807 }
4808 
4809 /* retry handshake on failure */
4810 static void
4811 vgen_handshake_retry(vgen_ldc_t *ldcp)
4812 {
4813 	/* reset handshake phase */
4814 	vgen_handshake_reset(ldcp);
4815 
4816 	/* handshake retry is specified and the channel is UP */
4817 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
4818 		if (ldcp->hretries++ < vgen_max_hretries) {
4819 			ldcp->local_sid = ddi_get_lbolt();
4820 			vgen_handshake(vh_nextphase(ldcp));
4821 		}
4822 	}
4823 }
4824 
4825 /*
4826  * Handle a version info msg from the peer or an ACK/NACK from the peer
4827  * to a version info msg that we sent.
4828  */
4829 static int
4830 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4831 {
4832 	vgen_t		*vgenp;
4833 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4834 	int		ack = 0;
4835 	int		failed = 0;
4836 	int		idx;
4837 	vgen_ver_t	*versions = ldcp->vgen_versions;
4838 	int		rv = 0;
4839 
4840 	vgenp = LDC_TO_VGEN(ldcp);
4841 	DBG1(vgenp, ldcp, "enter\n");
4842 	switch (tagp->vio_subtype) {
4843 	case VIO_SUBTYPE_INFO:
4844 
4845 		/*  Cache sid of peer if this is the first time */
4846 		if (ldcp->peer_sid == 0) {
4847 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4848 			    tagp->vio_sid);
4849 			ldcp->peer_sid = tagp->vio_sid;
4850 		}
4851 
4852 		if (ldcp->hphase != VH_PHASE1) {
4853 			/*
4854 			 * If we are not already in VH_PHASE1, reset to
4855 			 * pre-handshake state, and initiate handshake
4856 			 * to the peer too.
4857 			 */
4858 			vgen_handshake_reset(ldcp);
4859 			vgen_handshake(vh_nextphase(ldcp));
4860 		}
4861 		ldcp->hstate |= VER_INFO_RCVD;
4862 
4863 		/* save peer's requested values */
4864 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4865 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4866 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4867 
4868 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4869 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4870 			/* unsupported dev_class, send NACK */
4871 
4872 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4873 
4874 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4875 			tagp->vio_sid = ldcp->local_sid;
4876 			/* send reply msg back to peer */
4877 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4878 			    sizeof (*vermsg), B_FALSE);
4879 			if (rv != VGEN_SUCCESS) {
4880 				return (rv);
4881 			}
4882 			return (VGEN_FAILURE);
4883 		}
4884 
4885 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4886 		    vermsg->ver_major,  vermsg->ver_minor);
4887 
4888 		idx = 0;
4889 
4890 		for (;;) {
4891 
4892 			if (vermsg->ver_major > versions[idx].ver_major) {
4893 
4894 				/* nack with next lower version */
4895 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4896 				vermsg->ver_major = versions[idx].ver_major;
4897 				vermsg->ver_minor = versions[idx].ver_minor;
4898 				break;
4899 			}
4900 
4901 			if (vermsg->ver_major == versions[idx].ver_major) {
4902 
4903 				/* major version match - ACK version */
4904 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4905 				ack = 1;
4906 
4907 				/*
4908 				 * lower minor version to the one this endpt
4909 				 * supports, if necessary
4910 				 */
4911 				if (vermsg->ver_minor >
4912 				    versions[idx].ver_minor) {
4913 					vermsg->ver_minor =
4914 					    versions[idx].ver_minor;
4915 					ldcp->peer_hparams.ver_minor =
4916 					    versions[idx].ver_minor;
4917 				}
4918 				break;
4919 			}
4920 
4921 			idx++;
4922 
4923 			if (idx == VGEN_NUM_VER) {
4924 
4925 				/* no version match - send NACK */
4926 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4927 				vermsg->ver_major = 0;
4928 				vermsg->ver_minor = 0;
4929 				failed = 1;
4930 				break;
4931 			}
4932 
4933 		}
4934 
4935 		tagp->vio_sid = ldcp->local_sid;
4936 
4937 		/* send reply msg back to peer */
4938 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4939 		    B_FALSE);
4940 		if (rv != VGEN_SUCCESS) {
4941 			return (rv);
4942 		}
4943 
4944 		if (ack) {
4945 			ldcp->hstate |= VER_ACK_SENT;
4946 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4947 			    vermsg->ver_major, vermsg->ver_minor);
4948 		}
4949 		if (failed) {
4950 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4951 			return (VGEN_FAILURE);
4952 		}
4953 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4954 
4955 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4956 
4957 			/* local and peer versions match? */
4958 			ASSERT((ldcp->local_hparams.ver_major ==
4959 			    ldcp->peer_hparams.ver_major) &&
4960 			    (ldcp->local_hparams.ver_minor ==
4961 			    ldcp->peer_hparams.ver_minor));
4962 
4963 			vgen_set_vnet_proto_ops(ldcp);
4964 
4965 			/* move to the next phase */
4966 			vgen_handshake(vh_nextphase(ldcp));
4967 		}
4968 
4969 		break;
4970 
4971 	case VIO_SUBTYPE_ACK:
4972 
4973 		if (ldcp->hphase != VH_PHASE1) {
4974 			/*  This should not happen. */
4975 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4976 			return (VGEN_FAILURE);
4977 		}
4978 
4979 		/* SUCCESS - we have agreed on a version */
4980 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4981 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4982 		ldcp->hstate |= VER_ACK_RCVD;
4983 
4984 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4985 		    vermsg->ver_major,  vermsg->ver_minor);
4986 
4987 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4988 
4989 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4990 
4991 			/* local and peer versions match? */
4992 			ASSERT((ldcp->local_hparams.ver_major ==
4993 			    ldcp->peer_hparams.ver_major) &&
4994 			    (ldcp->local_hparams.ver_minor ==
4995 			    ldcp->peer_hparams.ver_minor));
4996 
4997 			vgen_set_vnet_proto_ops(ldcp);
4998 
4999 			/* move to the next phase */
5000 			vgen_handshake(vh_nextphase(ldcp));
5001 		}
5002 		break;
5003 
5004 	case VIO_SUBTYPE_NACK:
5005 
5006 		if (ldcp->hphase != VH_PHASE1) {
5007 			/*  This should not happen.  */
5008 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
5009 			"Phase(%u)\n", ldcp->hphase);
5010 			return (VGEN_FAILURE);
5011 		}
5012 
5013 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
5014 		    vermsg->ver_major, vermsg->ver_minor);
5015 
5016 		/* check if version in NACK is zero */
5017 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
5018 			/*
5019 			 * Version Negotiation has failed.
5020 			 */
5021 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5022 			return (VGEN_FAILURE);
5023 		}
5024 
5025 		idx = 0;
5026 
5027 		for (;;) {
5028 
5029 			if (vermsg->ver_major > versions[idx].ver_major) {
5030 				/* select next lower version */
5031 
5032 				ldcp->local_hparams.ver_major =
5033 				    versions[idx].ver_major;
5034 				ldcp->local_hparams.ver_minor =
5035 				    versions[idx].ver_minor;
5036 				break;
5037 			}
5038 
5039 			if (vermsg->ver_major == versions[idx].ver_major) {
5040 				/* major version match */
5041 
5042 				ldcp->local_hparams.ver_major =
5043 				    versions[idx].ver_major;
5044 
5045 				ldcp->local_hparams.ver_minor =
5046 				    versions[idx].ver_minor;
5047 				break;
5048 			}
5049 
5050 			idx++;
5051 
5052 			if (idx == VGEN_NUM_VER) {
5053 				/*
5054 				 * no version match.
5055 				 * Version Negotiation has failed.
5056 				 */
5057 				DWARN(vgenp, ldcp,
5058 				    "Version Negotiation Failed\n");
5059 				return (VGEN_FAILURE);
5060 			}
5061 
5062 		}
5063 
5064 		rv = vgen_send_version_negotiate(ldcp);
5065 		if (rv != VGEN_SUCCESS) {
5066 			return (rv);
5067 		}
5068 
5069 		break;
5070 	}
5071 
5072 	DBG1(vgenp, ldcp, "exit\n");
5073 	return (VGEN_SUCCESS);
5074 }
5075 
5076 /* Check if the attributes are supported */
5077 static int
5078 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5079 {
5080 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5081 
5082 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5083 	    (msg->ack_freq > 64) ||
5084 	    (msg->xfer_mode != lp->xfer_mode)) {
5085 		return (VGEN_FAILURE);
5086 	}
5087 
5088 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5089 		/* versions < 1.4, mtu must match */
5090 		if (msg->mtu != lp->mtu) {
5091 			return (VGEN_FAILURE);
5092 		}
5093 	} else {
5094 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5095 		if (msg->mtu < ETHERMAX) {
5096 			return (VGEN_FAILURE);
5097 		}
5098 	}
5099 
5100 	return (VGEN_SUCCESS);
5101 }
5102 
5103 /*
5104  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5105  * to an attr info msg that we sent.
5106  */
5107 static int
5108 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5109 {
5110 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5111 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5112 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5113 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5114 	int		ack = 1;
5115 	int		rv = 0;
5116 	uint32_t	mtu;
5117 
5118 	DBG1(vgenp, ldcp, "enter\n");
5119 	if (ldcp->hphase != VH_PHASE2) {
5120 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5121 		" Invalid Phase(%u)\n",
5122 		    tagp->vio_subtype, ldcp->hphase);
5123 		return (VGEN_FAILURE);
5124 	}
5125 	switch (tagp->vio_subtype) {
5126 	case VIO_SUBTYPE_INFO:
5127 
5128 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5129 		ldcp->hstate |= ATTR_INFO_RCVD;
5130 
5131 		/* save peer's values */
5132 		rp->mtu = msg->mtu;
5133 		rp->addr = msg->addr;
5134 		rp->addr_type = msg->addr_type;
5135 		rp->xfer_mode = msg->xfer_mode;
5136 		rp->ack_freq = msg->ack_freq;
5137 
5138 		rv = vgen_check_attr_info(ldcp, msg);
5139 		if (rv == VGEN_FAILURE) {
5140 			/* unsupported attr, send NACK */
5141 			ack = 0;
5142 		} else {
5143 
5144 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5145 
5146 				/*
5147 				 * Versions >= 1.4:
5148 				 * The mtu is negotiated down to the
5149 				 * minimum of our mtu and peer's mtu.
5150 				 */
5151 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5152 
5153 				/*
5154 				 * If we have received an ack for the attr info
5155 				 * that we sent, then check if the mtu computed
5156 				 * above matches the mtu that the peer had ack'd
5157 				 * (saved in local hparams). If they don't
5158 				 * match, we fail the handshake.
5159 				 */
5160 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5161 					if (mtu != lp->mtu) {
5162 						/* send NACK */
5163 						ack = 0;
5164 					}
5165 				} else {
5166 					/*
5167 					 * Save the mtu computed above in our
5168 					 * attr parameters, so it gets sent in
5169 					 * the attr info from us to the peer.
5170 					 */
5171 					lp->mtu = mtu;
5172 				}
5173 
5174 				/* save the MIN mtu in the msg to be replied */
5175 				msg->mtu = mtu;
5176 
5177 			}
5178 		}
5179 
5180 
5181 		if (ack) {
5182 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5183 		} else {
5184 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5185 		}
5186 		tagp->vio_sid = ldcp->local_sid;
5187 
5188 		/* send reply msg back to peer */
5189 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5190 		    B_FALSE);
5191 		if (rv != VGEN_SUCCESS) {
5192 			return (rv);
5193 		}
5194 
5195 		if (ack) {
5196 			ldcp->hstate |= ATTR_ACK_SENT;
5197 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5198 		} else {
5199 			/* failed */
5200 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5201 			return (VGEN_FAILURE);
5202 		}
5203 
5204 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5205 			vgen_handshake(vh_nextphase(ldcp));
5206 		}
5207 
5208 		break;
5209 
5210 	case VIO_SUBTYPE_ACK:
5211 
5212 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5213 			/*
5214 			 * Versions >= 1.4:
5215 			 * The ack msg sent by the peer contains the minimum of
5216 			 * our mtu (that we had sent in our attr info) and the
5217 			 * peer's mtu.
5218 			 *
5219 			 * If we have sent an ack for the attr info msg from
5220 			 * the peer, check if the mtu that was computed then
5221 			 * (saved in local hparams) matches the mtu that the
5222 			 * peer has ack'd. If they don't match, we fail the
5223 			 * handshake.
5224 			 */
5225 			if (ldcp->hstate & ATTR_ACK_SENT) {
5226 				if (lp->mtu != msg->mtu) {
5227 					return (VGEN_FAILURE);
5228 				}
5229 			} else {
5230 				/*
5231 				 * If the mtu ack'd by the peer is > our mtu
5232 				 * fail handshake. Otherwise, save the mtu, so
5233 				 * we can validate it when we receive attr info
5234 				 * from our peer.
5235 				 */
5236 				if (msg->mtu > lp->mtu) {
5237 					return (VGEN_FAILURE);
5238 				}
5239 				if (msg->mtu <= lp->mtu) {
5240 					lp->mtu = msg->mtu;
5241 				}
5242 			}
5243 		}
5244 
5245 		ldcp->hstate |= ATTR_ACK_RCVD;
5246 
5247 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5248 
5249 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5250 			vgen_handshake(vh_nextphase(ldcp));
5251 		}
5252 		break;
5253 
5254 	case VIO_SUBTYPE_NACK:
5255 
5256 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5257 		return (VGEN_FAILURE);
5258 	}
5259 	DBG1(vgenp, ldcp, "exit\n");
5260 	return (VGEN_SUCCESS);
5261 }
5262 
5263 /* Check if the dring info msg is ok */
5264 static int
5265 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5266 {
5267 	/* check if msg contents are ok */
5268 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5269 	    sizeof (vnet_public_desc_t))) {
5270 		return (VGEN_FAILURE);
5271 	}
5272 	return (VGEN_SUCCESS);
5273 }
5274 
5275 /*
5276  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5277  * the peer to a dring register msg that we sent.
5278  */
5279 static int
5280 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5281 {
5282 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5283 	ldc_mem_cookie_t dcookie;
5284 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5285 	int ack = 0;
5286 	int rv = 0;
5287 
5288 	DBG1(vgenp, ldcp, "enter\n");
5289 	if (ldcp->hphase < VH_PHASE2) {
5290 		/* dring_info can be rcvd in any of the phases after Phase1 */
5291 		DWARN(vgenp, ldcp,
5292 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5293 		    tagp->vio_subtype, ldcp->hphase);
5294 		return (VGEN_FAILURE);
5295 	}
5296 	switch (tagp->vio_subtype) {
5297 	case VIO_SUBTYPE_INFO:
5298 
5299 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5300 		ldcp->hstate |= DRING_INFO_RCVD;
5301 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5302 
5303 		ASSERT(msg->ncookies == 1);
5304 
5305 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5306 			/*
5307 			 * verified dring info msg to be ok,
5308 			 * now try to map the remote dring.
5309 			 */
5310 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5311 			    msg->descriptor_size, &dcookie,
5312 			    msg->ncookies);
5313 			if (rv == DDI_SUCCESS) {
5314 				/* now we can ack the peer */
5315 				ack = 1;
5316 			}
5317 		}
5318 		if (ack == 0) {
5319 			/* failed, send NACK */
5320 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5321 		} else {
5322 			if (!(ldcp->peer_hparams.dring_ready)) {
5323 
5324 				/* save peer's dring_info values */
5325 				bcopy(&dcookie,
5326 				    &(ldcp->peer_hparams.dring_cookie),
5327 				    sizeof (dcookie));
5328 				ldcp->peer_hparams.num_desc =
5329 				    msg->num_descriptors;
5330 				ldcp->peer_hparams.desc_size =
5331 				    msg->descriptor_size;
5332 				ldcp->peer_hparams.num_dcookies =
5333 				    msg->ncookies;
5334 
5335 				/* set dring_ident for the peer */
5336 				ldcp->peer_hparams.dring_ident =
5337 				    (uint64_t)ldcp->rxdp;
5338 				/* return the dring_ident in ack msg */
5339 				msg->dring_ident =
5340 				    (uint64_t)ldcp->rxdp;
5341 
5342 				ldcp->peer_hparams.dring_ready = B_TRUE;
5343 			}
5344 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5345 		}
5346 		tagp->vio_sid = ldcp->local_sid;
5347 		/* send reply msg back to peer */
5348 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5349 		    B_FALSE);
5350 		if (rv != VGEN_SUCCESS) {
5351 			return (rv);
5352 		}
5353 
5354 		if (ack) {
5355 			ldcp->hstate |= DRING_ACK_SENT;
5356 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5357 		} else {
5358 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5359 			return (VGEN_FAILURE);
5360 		}
5361 
5362 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5363 			vgen_handshake(vh_nextphase(ldcp));
5364 		}
5365 
5366 		break;
5367 
5368 	case VIO_SUBTYPE_ACK:
5369 
5370 		ldcp->hstate |= DRING_ACK_RCVD;
5371 
5372 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5373 
5374 		if (!(ldcp->local_hparams.dring_ready)) {
5375 			/* local dring is now ready */
5376 			ldcp->local_hparams.dring_ready = B_TRUE;
5377 
5378 			/* save dring_ident acked by peer */
5379 			ldcp->local_hparams.dring_ident =
5380 			    msg->dring_ident;
5381 		}
5382 
5383 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5384 			vgen_handshake(vh_nextphase(ldcp));
5385 		}
5386 
5387 		break;
5388 
5389 	case VIO_SUBTYPE_NACK:
5390 
5391 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5392 		return (VGEN_FAILURE);
5393 	}
5394 	DBG1(vgenp, ldcp, "exit\n");
5395 	return (VGEN_SUCCESS);
5396 }
5397 
5398 /*
5399  * Handle a rdx info msg from the peer or an ACK/NACK
5400  * from the peer to a rdx info msg that we sent.
5401  */
5402 static int
5403 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5404 {
5405 	int rv = 0;
5406 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5407 
5408 	DBG1(vgenp, ldcp, "enter\n");
5409 	if (ldcp->hphase != VH_PHASE3) {
5410 		DWARN(vgenp, ldcp,
5411 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5412 		    tagp->vio_subtype, ldcp->hphase);
5413 		return (VGEN_FAILURE);
5414 	}
5415 	switch (tagp->vio_subtype) {
5416 	case VIO_SUBTYPE_INFO:
5417 
5418 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5419 		ldcp->hstate |= RDX_INFO_RCVD;
5420 
5421 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5422 		tagp->vio_sid = ldcp->local_sid;
5423 		/* send reply msg back to peer */
5424 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5425 		    B_FALSE);
5426 		if (rv != VGEN_SUCCESS) {
5427 			return (rv);
5428 		}
5429 
5430 		ldcp->hstate |= RDX_ACK_SENT;
5431 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5432 
5433 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5434 			vgen_handshake(vh_nextphase(ldcp));
5435 		}
5436 
5437 		break;
5438 
5439 	case VIO_SUBTYPE_ACK:
5440 
5441 		ldcp->hstate |= RDX_ACK_RCVD;
5442 
5443 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5444 
5445 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5446 			vgen_handshake(vh_nextphase(ldcp));
5447 		}
5448 		break;
5449 
5450 	case VIO_SUBTYPE_NACK:
5451 
5452 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5453 		return (VGEN_FAILURE);
5454 	}
5455 	DBG1(vgenp, ldcp, "exit\n");
5456 	return (VGEN_SUCCESS);
5457 }
5458 
5459 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5460 static int
5461 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5462 {
5463 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5464 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5465 	struct ether_addr *addrp;
5466 	int count;
5467 	int i;
5468 
5469 	DBG1(vgenp, ldcp, "enter\n");
5470 	switch (tagp->vio_subtype) {
5471 
5472 	case VIO_SUBTYPE_INFO:
5473 
5474 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5475 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5476 		break;
5477 
5478 	case VIO_SUBTYPE_ACK:
5479 
5480 		/* success adding/removing multicast addr */
5481 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5482 		break;
5483 
5484 	case VIO_SUBTYPE_NACK:
5485 
5486 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5487 		if (!(msgp->set)) {
5488 			/* multicast remove request failed */
5489 			break;
5490 		}
5491 
5492 		/* multicast add request failed */
5493 		for (count = 0; count < msgp->count; count++) {
5494 			addrp = &(msgp->mca[count]);
5495 
5496 			/* delete address from the table */
5497 			for (i = 0; i < vgenp->mccount; i++) {
5498 				if (ether_cmp(addrp,
5499 				    &(vgenp->mctab[i])) == 0) {
5500 					if (vgenp->mccount > 1) {
5501 						int t = vgenp->mccount - 1;
5502 						vgenp->mctab[i] =
5503 						    vgenp->mctab[t];
5504 					}
5505 					vgenp->mccount--;
5506 					break;
5507 				}
5508 			}
5509 		}
5510 		break;
5511 
5512 	}
5513 	DBG1(vgenp, ldcp, "exit\n");
5514 
5515 	return (VGEN_SUCCESS);
5516 }
5517 
5518 /* handler for control messages received from the peer ldc end-point */
5519 static int
5520 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5521 {
5522 	int rv = 0;
5523 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5524 
5525 	DBG1(vgenp, ldcp, "enter\n");
5526 	switch (tagp->vio_subtype_env) {
5527 
5528 	case VIO_VER_INFO:
5529 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5530 		break;
5531 
5532 	case VIO_ATTR_INFO:
5533 		rv = vgen_handle_attr_info(ldcp, tagp);
5534 		break;
5535 
5536 	case VIO_DRING_REG:
5537 		rv = vgen_handle_dring_reg(ldcp, tagp);
5538 		break;
5539 
5540 	case VIO_RDX:
5541 		rv = vgen_handle_rdx_info(ldcp, tagp);
5542 		break;
5543 
5544 	case VNET_MCAST_INFO:
5545 		rv = vgen_handle_mcast_info(ldcp, tagp);
5546 		break;
5547 
5548 	case VIO_DDS_INFO:
5549 		rv = vgen_dds_rx(ldcp, tagp);
5550 		break;
5551 	}
5552 
5553 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5554 	return (rv);
5555 }
5556 
5557 /* handler for data messages received from the peer ldc end-point */
5558 static int
5559 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5560 {
5561 	int rv = 0;
5562 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5563 
5564 	DBG1(vgenp, ldcp, "enter\n");
5565 
5566 	if (ldcp->hphase != VH_DONE)
5567 		return (rv);
5568 
5569 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5570 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5571 		if (rv != 0) {
5572 			return (rv);
5573 		}
5574 	}
5575 
5576 	switch (tagp->vio_subtype_env) {
5577 	case VIO_DRING_DATA:
5578 		rv = vgen_handle_dring_data(ldcp, tagp);
5579 		break;
5580 
5581 	case VIO_PKT_DATA:
5582 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5583 		break;
5584 	default:
5585 		break;
5586 	}
5587 
5588 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5589 	return (rv);
5590 }
5591 
5592 /*
5593  * dummy pkt data handler function for vnet protocol version 1.0
5594  */
5595 static void
5596 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5597 {
5598 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5599 }
5600 
5601 /*
5602  * This function handles raw pkt data messages received over the channel.
5603  * Currently, only priority-eth-type frames are received through this mechanism.
5604  * In this case, the frame(data) is present within the message itself which
5605  * is copied into an mblk before sending it up the stack.
5606  */
5607 static void
5608 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5609 {
5610 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5611 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5612 	uint32_t		size;
5613 	mblk_t			*mp;
5614 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5615 	vgen_stats_t		*statsp = &ldcp->stats;
5616 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5617 	vio_net_rx_cb_t		vrx_cb;
5618 
5619 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5620 
5621 	mutex_exit(&ldcp->cblock);
5622 
5623 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5624 	if (size < ETHERMIN || size > lp->mtu) {
5625 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5626 		goto exit;
5627 	}
5628 
5629 	mp = vio_multipool_allocb(&ldcp->vmp, size);
5630 	if (mp == NULL) {
5631 		mp = allocb(size, BPRI_MED);
5632 		if (mp == NULL) {
5633 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5634 			DWARN(vgenp, ldcp, "allocb failure, "
5635 			    "unable to process priority frame\n");
5636 			goto exit;
5637 		}
5638 	}
5639 
5640 	/* copy the frame from the payload of raw data msg into the mblk */
5641 	bcopy(pkt->data, mp->b_rptr, size);
5642 	mp->b_wptr = mp->b_rptr + size;
5643 
5644 	/* update stats */
5645 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5646 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5647 
5648 	/* send up; call vrx_cb() as cblock is already released */
5649 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5650 	vrx_cb(ldcp->portp->vhp, mp);
5651 
5652 exit:
5653 	mutex_enter(&ldcp->cblock);
5654 }
5655 
5656 static int
5657 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
5658     int32_t end, uint8_t pstate)
5659 {
5660 	int rv = 0;
5661 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5662 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
5663 
5664 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
5665 	tagp->vio_sid = ldcp->local_sid;
5666 	msgp->start_idx = start;
5667 	msgp->end_idx = end;
5668 	msgp->dring_process_state = pstate;
5669 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
5670 	if (rv != VGEN_SUCCESS) {
5671 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
5672 	}
5673 	return (rv);
5674 }
5675 
5676 static int
5677 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5678 {
5679 	int rv = 0;
5680 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5681 
5682 
5683 	DBG1(vgenp, ldcp, "enter\n");
5684 	switch (tagp->vio_subtype) {
5685 
5686 	case VIO_SUBTYPE_INFO:
5687 		/*
5688 		 * To reduce the locking contention, release the
5689 		 * cblock here and re-acquire it once we are done
5690 		 * receiving packets.
5691 		 */
5692 		mutex_exit(&ldcp->cblock);
5693 		mutex_enter(&ldcp->rxlock);
5694 		rv = vgen_handle_dring_data_info(ldcp, tagp);
5695 		mutex_exit(&ldcp->rxlock);
5696 		mutex_enter(&ldcp->cblock);
5697 		break;
5698 
5699 	case VIO_SUBTYPE_ACK:
5700 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
5701 		break;
5702 
5703 	case VIO_SUBTYPE_NACK:
5704 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
5705 		break;
5706 	}
5707 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5708 	return (rv);
5709 }
5710 
5711 static int
5712 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5713 {
5714 	uint32_t start;
5715 	int32_t end;
5716 	int rv = 0;
5717 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5718 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5719 #ifdef VGEN_HANDLE_LOST_PKTS
5720 	vgen_stats_t *statsp = &ldcp->stats;
5721 	uint32_t rxi;
5722 	int n;
5723 #endif
5724 
5725 	DBG1(vgenp, ldcp, "enter\n");
5726 
5727 	start = dringmsg->start_idx;
5728 	end = dringmsg->end_idx;
5729 	/*
5730 	 * received a data msg, which contains the start and end
5731 	 * indices of the descriptors within the rx ring holding data,
5732 	 * the seq_num of data packet corresponding to the start index,
5733 	 * and the dring_ident.
5734 	 * We can now read the contents of each of these descriptors
5735 	 * and gather data from it.
5736 	 */
5737 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
5738 	    start, end);
5739 
5740 	/* validate rx start and end indeces */
5741 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
5742 	    !(CHECK_RXI(end, ldcp)))) {
5743 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
5744 		    start, end);
5745 		/* drop the message if invalid index */
5746 		return (rv);
5747 	}
5748 
5749 	/* validate dring_ident */
5750 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
5751 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5752 		    dringmsg->dring_ident);
5753 		/* invalid dring_ident, drop the msg */
5754 		return (rv);
5755 	}
5756 #ifdef DEBUG
5757 	if (vgen_trigger_rxlost) {
5758 		/* drop this msg to simulate lost pkts for debugging */
5759 		vgen_trigger_rxlost = 0;
5760 		return (rv);
5761 	}
5762 #endif
5763 
5764 #ifdef	VGEN_HANDLE_LOST_PKTS
5765 
5766 	/* receive start index doesn't match expected index */
5767 	if (ldcp->next_rxi != start) {
5768 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
5769 		    ldcp->next_rxi, start);
5770 
5771 		/* calculate the number of pkts lost */
5772 		if (start >= ldcp->next_rxi) {
5773 			n = start - ldcp->next_rxi;
5774 		} else  {
5775 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
5776 		}
5777 
5778 		statsp->rx_lost_pkts += n;
5779 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
5780 		tagp->vio_sid = ldcp->local_sid;
5781 		/* indicate the range of lost descriptors */
5782 		dringmsg->start_idx = ldcp->next_rxi;
5783 		rxi = start;
5784 		DECR_RXI(rxi, ldcp);
5785 		dringmsg->end_idx = rxi;
5786 		/* dring ident is left unchanged */
5787 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5788 		    sizeof (*dringmsg), B_FALSE);
5789 		if (rv != VGEN_SUCCESS) {
5790 			DWARN(vgenp, ldcp,
5791 			    "vgen_sendmsg failed, stype:NACK\n");
5792 			return (rv);
5793 		}
5794 		/*
5795 		 * treat this range of descrs/pkts as dropped
5796 		 * and set the new expected value of next_rxi
5797 		 * and continue(below) to process from the new
5798 		 * start index.
5799 		 */
5800 		ldcp->next_rxi = start;
5801 	}
5802 
5803 #endif	/* VGEN_HANDLE_LOST_PKTS */
5804 
5805 	/* Now receive messages */
5806 	rv = vgen_process_dring_data(ldcp, tagp);
5807 
5808 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5809 	return (rv);
5810 }
5811 
5812 static int
5813 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5814 {
5815 	boolean_t set_ack_start = B_FALSE;
5816 	uint32_t start;
5817 	uint32_t ack_end;
5818 	uint32_t next_rxi;
5819 	uint32_t rxi;
5820 	int count = 0;
5821 	int rv = 0;
5822 	uint32_t retries = 0;
5823 	vgen_stats_t *statsp;
5824 	vnet_public_desc_t rxd;
5825 	vio_dring_entry_hdr_t *hdrp;
5826 	mblk_t *bp = NULL;
5827 	mblk_t *bpt = NULL;
5828 	uint32_t ack_start;
5829 	boolean_t rxd_err = B_FALSE;
5830 	mblk_t *mp = NULL;
5831 	size_t nbytes;
5832 	boolean_t ack_needed = B_FALSE;
5833 	size_t nread;
5834 	uint64_t off = 0;
5835 	struct ether_header *ehp;
5836 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5837 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5838 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5839 
5840 	DBG1(vgenp, ldcp, "enter\n");
5841 
5842 	statsp = &ldcp->stats;
5843 	start = dringmsg->start_idx;
5844 
5845 	/*
5846 	 * start processing the descriptors from the specified
5847 	 * start index, up to the index a descriptor is not ready
5848 	 * to be processed or we process the entire descriptor ring
5849 	 * and wrap around upto the start index.
5850 	 */
5851 
5852 	/* need to set the start index of descriptors to be ack'd */
5853 	set_ack_start = B_TRUE;
5854 
5855 	/* index upto which we have ack'd */
5856 	ack_end = start;
5857 	DECR_RXI(ack_end, ldcp);
5858 
5859 	next_rxi = rxi =  start;
5860 	do {
5861 vgen_recv_retry:
5862 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
5863 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
5864 		if (rv != 0) {
5865 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
5866 			    " rv(%d)\n", rv);
5867 			statsp->ierrors++;
5868 			return (rv);
5869 		}
5870 
5871 		hdrp = &rxd.hdr;
5872 
5873 		if (hdrp->dstate != VIO_DESC_READY) {
5874 			/*
5875 			 * Before waiting and retry here, send up
5876 			 * the packets that are received already
5877 			 */
5878 			if (bp != NULL) {
5879 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5880 				vgen_rx(ldcp, bp);
5881 				count = 0;
5882 				bp = bpt = NULL;
5883 			}
5884 			/*
5885 			 * descriptor is not ready.
5886 			 * retry descriptor acquire, stop processing
5887 			 * after max # retries.
5888 			 */
5889 			if (retries == vgen_recv_retries)
5890 				break;
5891 			retries++;
5892 			drv_usecwait(vgen_recv_delay);
5893 			goto vgen_recv_retry;
5894 		}
5895 		retries = 0;
5896 
5897 		if (set_ack_start) {
5898 			/*
5899 			 * initialize the start index of the range
5900 			 * of descriptors to be ack'd.
5901 			 */
5902 			ack_start = rxi;
5903 			set_ack_start = B_FALSE;
5904 		}
5905 
5906 		if ((rxd.nbytes < ETHERMIN) ||
5907 		    (rxd.nbytes > lp->mtu) ||
5908 		    (rxd.ncookies == 0) ||
5909 		    (rxd.ncookies > MAX_COOKIES)) {
5910 			rxd_err = B_TRUE;
5911 		} else {
5912 			/*
5913 			 * Try to allocate an mblk from the free pool
5914 			 * of recv mblks for the channel.
5915 			 * If this fails, use allocb().
5916 			 */
5917 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
5918 			if (nbytes > ldcp->max_rxpool_size) {
5919 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
5920 				    BPRI_MED);
5921 			} else {
5922 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
5923 				if (mp == NULL) {
5924 					statsp->rx_vio_allocb_fail++;
5925 					/*
5926 					 * Data buffer returned by allocb(9F)
5927 					 * is 8byte aligned. We allocate extra
5928 					 * 8 bytes to ensure size is multiple
5929 					 * of 8 bytes for ldc_mem_copy().
5930 					 */
5931 					mp = allocb(VNET_IPALIGN +
5932 					    rxd.nbytes + 8, BPRI_MED);
5933 				}
5934 			}
5935 		}
5936 		if ((rxd_err) || (mp == NULL)) {
5937 			/*
5938 			 * rxd_err or allocb() failure,
5939 			 * drop this packet, get next.
5940 			 */
5941 			if (rxd_err) {
5942 				statsp->ierrors++;
5943 				rxd_err = B_FALSE;
5944 			} else {
5945 				statsp->rx_allocb_fail++;
5946 			}
5947 
5948 			ack_needed = hdrp->ack;
5949 
5950 			/* set descriptor done bit */
5951 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
5952 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
5953 			    VIO_DESC_DONE);
5954 			if (rv != 0) {
5955 				DWARN(vgenp, ldcp,
5956 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
5957 				    rv);
5958 				return (rv);
5959 			}
5960 
5961 			if (ack_needed) {
5962 				ack_needed = B_FALSE;
5963 				/*
5964 				 * sender needs ack for this packet,
5965 				 * ack pkts upto this index.
5966 				 */
5967 				ack_end = rxi;
5968 
5969 				rv = vgen_send_dring_ack(ldcp, tagp,
5970 				    ack_start, ack_end,
5971 				    VIO_DP_ACTIVE);
5972 				if (rv != VGEN_SUCCESS) {
5973 					goto error_ret;
5974 				}
5975 
5976 				/* need to set new ack start index */
5977 				set_ack_start = B_TRUE;
5978 			}
5979 			goto vgen_next_rxi;
5980 		}
5981 
5982 		nread = nbytes;
5983 		rv = ldc_mem_copy(ldcp->ldc_handle,
5984 		    (caddr_t)mp->b_rptr, off, &nread,
5985 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
5986 
5987 		/* if ldc_mem_copy() failed */
5988 		if (rv) {
5989 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
5990 			statsp->ierrors++;
5991 			freemsg(mp);
5992 			goto error_ret;
5993 		}
5994 
5995 		ack_needed = hdrp->ack;
5996 
5997 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
5998 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
5999 		    VIO_DESC_DONE);
6000 		if (rv != 0) {
6001 			DWARN(vgenp, ldcp,
6002 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
6003 			goto error_ret;
6004 		}
6005 
6006 		mp->b_rptr += VNET_IPALIGN;
6007 
6008 		if (ack_needed) {
6009 			ack_needed = B_FALSE;
6010 			/*
6011 			 * sender needs ack for this packet,
6012 			 * ack pkts upto this index.
6013 			 */
6014 			ack_end = rxi;
6015 
6016 			rv = vgen_send_dring_ack(ldcp, tagp,
6017 			    ack_start, ack_end, VIO_DP_ACTIVE);
6018 			if (rv != VGEN_SUCCESS) {
6019 				goto error_ret;
6020 			}
6021 
6022 			/* need to set new ack start index */
6023 			set_ack_start = B_TRUE;
6024 		}
6025 
6026 		if (nread != nbytes) {
6027 			DWARN(vgenp, ldcp,
6028 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6029 			    nread, nbytes);
6030 			statsp->ierrors++;
6031 			freemsg(mp);
6032 			goto vgen_next_rxi;
6033 		}
6034 
6035 		/* point to the actual end of data */
6036 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6037 
6038 		/* update stats */
6039 		statsp->ipackets++;
6040 		statsp->rbytes += rxd.nbytes;
6041 		ehp = (struct ether_header *)mp->b_rptr;
6042 		if (IS_BROADCAST(ehp))
6043 			statsp->brdcstrcv++;
6044 		else if (IS_MULTICAST(ehp))
6045 			statsp->multircv++;
6046 
6047 		/* build a chain of received packets */
6048 		if (bp == NULL) {
6049 			/* first pkt */
6050 			bp = mp;
6051 			bpt = bp;
6052 			bpt->b_next = NULL;
6053 		} else {
6054 			mp->b_next = NULL;
6055 			bpt->b_next = mp;
6056 			bpt = mp;
6057 		}
6058 
6059 		if (count++ > vgen_chain_len) {
6060 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6061 			vgen_rx(ldcp, bp);
6062 			count = 0;
6063 			bp = bpt = NULL;
6064 		}
6065 
6066 vgen_next_rxi:
6067 		/* update end index of range of descrs to be ack'd */
6068 		ack_end = rxi;
6069 
6070 		/* update the next index to be processed */
6071 		INCR_RXI(next_rxi, ldcp);
6072 		if (next_rxi == start) {
6073 			/*
6074 			 * processed the entire descriptor ring upto
6075 			 * the index at which we started.
6076 			 */
6077 			break;
6078 		}
6079 
6080 		rxi = next_rxi;
6081 
6082 	_NOTE(CONSTCOND)
6083 	} while (1);
6084 
6085 	/*
6086 	 * send an ack message to peer indicating that we have stopped
6087 	 * processing descriptors.
6088 	 */
6089 	if (set_ack_start) {
6090 		/*
6091 		 * We have ack'd upto some index and we have not
6092 		 * processed any descriptors beyond that index.
6093 		 * Use the last ack'd index as both the start and
6094 		 * end of range of descrs being ack'd.
6095 		 * Note: This results in acking the last index twice
6096 		 * and should be harmless.
6097 		 */
6098 		ack_start = ack_end;
6099 	}
6100 
6101 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6102 	    VIO_DP_STOPPED);
6103 	if (rv != VGEN_SUCCESS) {
6104 		goto error_ret;
6105 	}
6106 
6107 	/* save new recv index of next dring msg */
6108 	ldcp->next_rxi = next_rxi;
6109 
6110 error_ret:
6111 	/* send up packets received so far */
6112 	if (bp != NULL) {
6113 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6114 		vgen_rx(ldcp, bp);
6115 		bp = bpt = NULL;
6116 	}
6117 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6118 	return (rv);
6119 
6120 }
6121 
6122 static int
6123 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6124 {
6125 	int rv = 0;
6126 	uint32_t start;
6127 	int32_t end;
6128 	uint32_t txi;
6129 	boolean_t ready_txd = B_FALSE;
6130 	vgen_stats_t *statsp;
6131 	vgen_private_desc_t *tbufp;
6132 	vnet_public_desc_t *txdp;
6133 	vio_dring_entry_hdr_t *hdrp;
6134 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6135 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6136 
6137 	DBG1(vgenp, ldcp, "enter\n");
6138 	start = dringmsg->start_idx;
6139 	end = dringmsg->end_idx;
6140 	statsp = &ldcp->stats;
6141 
6142 	/*
6143 	 * received an ack corresponding to a specific descriptor for
6144 	 * which we had set the ACK bit in the descriptor (during
6145 	 * transmit). This enables us to reclaim descriptors.
6146 	 */
6147 
6148 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6149 
6150 	/* validate start and end indeces in the tx ack msg */
6151 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6152 		/* drop the message if invalid index */
6153 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6154 		    start, end);
6155 		return (rv);
6156 	}
6157 	/* validate dring_ident */
6158 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6159 		/* invalid dring_ident, drop the msg */
6160 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6161 		    dringmsg->dring_ident);
6162 		return (rv);
6163 	}
6164 	statsp->dring_data_acks++;
6165 
6166 	/* reclaim descriptors that are done */
6167 	vgen_reclaim(ldcp);
6168 
6169 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6170 		/*
6171 		 * receiver continued processing descriptors after
6172 		 * sending us the ack.
6173 		 */
6174 		return (rv);
6175 	}
6176 
6177 	statsp->dring_stopped_acks++;
6178 
6179 	/* receiver stopped processing descriptors */
6180 	mutex_enter(&ldcp->wrlock);
6181 	mutex_enter(&ldcp->tclock);
6182 
6183 	/*
6184 	 * determine if there are any pending tx descriptors
6185 	 * ready to be processed by the receiver(peer) and if so,
6186 	 * send a message to the peer to restart receiving.
6187 	 */
6188 	ready_txd = B_FALSE;
6189 
6190 	/*
6191 	 * using the end index of the descriptor range for which
6192 	 * we received the ack, check if the next descriptor is
6193 	 * ready.
6194 	 */
6195 	txi = end;
6196 	INCR_TXI(txi, ldcp);
6197 	tbufp = &ldcp->tbufp[txi];
6198 	txdp = tbufp->descp;
6199 	hdrp = &txdp->hdr;
6200 	if (hdrp->dstate == VIO_DESC_READY) {
6201 		ready_txd = B_TRUE;
6202 	} else {
6203 		/*
6204 		 * descr next to the end of ack'd descr range is not
6205 		 * ready.
6206 		 * starting from the current reclaim index, check
6207 		 * if any descriptor is ready.
6208 		 */
6209 
6210 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6211 		tbufp = &ldcp->tbufp[txi];
6212 
6213 		txdp = tbufp->descp;
6214 		hdrp = &txdp->hdr;
6215 		if (hdrp->dstate == VIO_DESC_READY) {
6216 			ready_txd = B_TRUE;
6217 		}
6218 
6219 	}
6220 
6221 	if (ready_txd) {
6222 		/*
6223 		 * we have tx descriptor(s) ready to be
6224 		 * processed by the receiver.
6225 		 * send a message to the peer with the start index
6226 		 * of ready descriptors.
6227 		 */
6228 		rv = vgen_send_dring_data(ldcp, txi, -1);
6229 		if (rv != VGEN_SUCCESS) {
6230 			ldcp->resched_peer = B_TRUE;
6231 			ldcp->resched_peer_txi = txi;
6232 			mutex_exit(&ldcp->tclock);
6233 			mutex_exit(&ldcp->wrlock);
6234 			return (rv);
6235 		}
6236 	} else {
6237 		/*
6238 		 * no ready tx descriptors. set the flag to send a
6239 		 * message to peer when tx descriptors are ready in
6240 		 * transmit routine.
6241 		 */
6242 		ldcp->resched_peer = B_TRUE;
6243 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6244 	}
6245 
6246 	mutex_exit(&ldcp->tclock);
6247 	mutex_exit(&ldcp->wrlock);
6248 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6249 	return (rv);
6250 }
6251 
6252 static int
6253 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6254 {
6255 	int rv = 0;
6256 	uint32_t start;
6257 	int32_t end;
6258 	uint32_t txi;
6259 	vnet_public_desc_t *txdp;
6260 	vio_dring_entry_hdr_t *hdrp;
6261 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6262 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6263 
6264 	DBG1(vgenp, ldcp, "enter\n");
6265 	start = dringmsg->start_idx;
6266 	end = dringmsg->end_idx;
6267 
6268 	/*
6269 	 * peer sent a NACK msg to indicate lost packets.
6270 	 * The start and end correspond to the range of descriptors
6271 	 * for which the peer didn't receive a dring data msg and so
6272 	 * didn't receive the corresponding data.
6273 	 */
6274 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6275 
6276 	/* validate start and end indeces in the tx nack msg */
6277 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6278 		/* drop the message if invalid index */
6279 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6280 		    start, end);
6281 		return (rv);
6282 	}
6283 	/* validate dring_ident */
6284 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6285 		/* invalid dring_ident, drop the msg */
6286 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6287 		    dringmsg->dring_ident);
6288 		return (rv);
6289 	}
6290 	mutex_enter(&ldcp->txlock);
6291 	mutex_enter(&ldcp->tclock);
6292 
6293 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6294 		/* no busy descriptors, bogus nack ? */
6295 		mutex_exit(&ldcp->tclock);
6296 		mutex_exit(&ldcp->txlock);
6297 		return (rv);
6298 	}
6299 
6300 	/* we just mark the descrs as done so they can be reclaimed */
6301 	for (txi = start; txi <= end; ) {
6302 		txdp = &(ldcp->txdp[txi]);
6303 		hdrp = &txdp->hdr;
6304 		if (hdrp->dstate == VIO_DESC_READY)
6305 			hdrp->dstate = VIO_DESC_DONE;
6306 		INCR_TXI(txi, ldcp);
6307 	}
6308 	mutex_exit(&ldcp->tclock);
6309 	mutex_exit(&ldcp->txlock);
6310 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6311 	return (rv);
6312 }
6313 
6314 static void
6315 vgen_reclaim(vgen_ldc_t *ldcp)
6316 {
6317 	mutex_enter(&ldcp->tclock);
6318 
6319 	vgen_reclaim_dring(ldcp);
6320 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6321 
6322 	mutex_exit(&ldcp->tclock);
6323 }
6324 
6325 /*
6326  * transmit reclaim function. starting from the current reclaim index
6327  * look for descriptors marked DONE and reclaim the descriptor and the
6328  * corresponding buffers (tbuf).
6329  */
6330 static void
6331 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6332 {
6333 	int count = 0;
6334 	vnet_public_desc_t *txdp;
6335 	vgen_private_desc_t *tbufp;
6336 	vio_dring_entry_hdr_t	*hdrp;
6337 
6338 #ifdef DEBUG
6339 	if (vgen_trigger_txtimeout)
6340 		return;
6341 #endif
6342 
6343 	tbufp = ldcp->cur_tbufp;
6344 	txdp = tbufp->descp;
6345 	hdrp = &txdp->hdr;
6346 
6347 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6348 	    (tbufp != ldcp->next_tbufp)) {
6349 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6350 		hdrp->dstate = VIO_DESC_FREE;
6351 		hdrp->ack = B_FALSE;
6352 
6353 		tbufp = NEXTTBUF(ldcp, tbufp);
6354 		txdp = tbufp->descp;
6355 		hdrp = &txdp->hdr;
6356 		count++;
6357 	}
6358 
6359 	ldcp->cur_tbufp = tbufp;
6360 
6361 	/*
6362 	 * Check if mac layer should be notified to restart transmissions
6363 	 */
6364 	if ((ldcp->need_resched) && (count > 0)) {
6365 		vio_net_tx_update_t vtx_update =
6366 		    ldcp->portp->vcb.vio_net_tx_update;
6367 
6368 		ldcp->need_resched = B_FALSE;
6369 		vtx_update(ldcp->portp->vhp);
6370 	}
6371 }
6372 
6373 /* return the number of pending transmits for the channel */
6374 static int
6375 vgen_num_txpending(vgen_ldc_t *ldcp)
6376 {
6377 	int n;
6378 
6379 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6380 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6381 	} else  {
6382 		/* cur_tbufp > next_tbufp */
6383 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6384 	}
6385 
6386 	return (n);
6387 }
6388 
6389 /* determine if the transmit descriptor ring is full */
6390 static int
6391 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6392 {
6393 	vgen_private_desc_t	*tbufp;
6394 	vgen_private_desc_t	*ntbufp;
6395 
6396 	tbufp = ldcp->next_tbufp;
6397 	ntbufp = NEXTTBUF(ldcp, tbufp);
6398 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6399 		return (VGEN_SUCCESS);
6400 	}
6401 	return (VGEN_FAILURE);
6402 }
6403 
6404 /* determine if timeout condition has occured */
6405 static int
6406 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6407 {
6408 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6409 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6410 	    (vnet_ldcwd_txtimeout) &&
6411 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6412 		return (VGEN_SUCCESS);
6413 	} else {
6414 		return (VGEN_FAILURE);
6415 	}
6416 }
6417 
6418 /* transmit watchdog timeout handler */
6419 static void
6420 vgen_ldc_watchdog(void *arg)
6421 {
6422 	vgen_ldc_t *ldcp;
6423 	vgen_t *vgenp;
6424 	int rv;
6425 
6426 	ldcp = (vgen_ldc_t *)arg;
6427 	vgenp = LDC_TO_VGEN(ldcp);
6428 
6429 	rv = vgen_ldc_txtimeout(ldcp);
6430 	if (rv == VGEN_SUCCESS) {
6431 		DWARN(vgenp, ldcp, "transmit timeout\n");
6432 #ifdef DEBUG
6433 		if (vgen_trigger_txtimeout) {
6434 			/* tx timeout triggered for debugging */
6435 			vgen_trigger_txtimeout = 0;
6436 		}
6437 #endif
6438 		mutex_enter(&ldcp->cblock);
6439 		ldcp->need_ldc_reset = B_TRUE;
6440 		vgen_handshake_retry(ldcp);
6441 		mutex_exit(&ldcp->cblock);
6442 		if (ldcp->need_resched) {
6443 			vio_net_tx_update_t vtx_update =
6444 			    ldcp->portp->vcb.vio_net_tx_update;
6445 
6446 			ldcp->need_resched = B_FALSE;
6447 			vtx_update(ldcp->portp->vhp);
6448 		}
6449 	}
6450 
6451 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6452 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6453 }
6454 
6455 /* handler for error messages received from the peer ldc end-point */
6456 static void
6457 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6458 {
6459 	_NOTE(ARGUNUSED(ldcp, tagp))
6460 }
6461 
6462 static int
6463 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6464 {
6465 	vio_raw_data_msg_t	*rmsg;
6466 	vio_dring_msg_t		*dmsg;
6467 	uint64_t		seq_num;
6468 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6469 
6470 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6471 		dmsg = (vio_dring_msg_t *)tagp;
6472 		seq_num = dmsg->seq_num;
6473 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6474 		rmsg = (vio_raw_data_msg_t *)tagp;
6475 		seq_num = rmsg->seq_num;
6476 	} else {
6477 		return (EINVAL);
6478 	}
6479 
6480 	if (seq_num != ldcp->next_rxseq) {
6481 
6482 		/* seqnums don't match */
6483 		DWARN(vgenp, ldcp,
6484 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6485 		    ldcp->next_rxseq, seq_num);
6486 
6487 		ldcp->need_ldc_reset = B_TRUE;
6488 		return (EINVAL);
6489 
6490 	}
6491 
6492 	ldcp->next_rxseq++;
6493 
6494 	return (0);
6495 }
6496 
6497 /* Check if the session id in the received message is valid */
6498 static int
6499 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6500 {
6501 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6502 
6503 	if (tagp->vio_sid != ldcp->peer_sid) {
6504 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6505 		    ldcp->peer_sid, tagp->vio_sid);
6506 		return (VGEN_FAILURE);
6507 	}
6508 	else
6509 		return (VGEN_SUCCESS);
6510 }
6511 
6512 static caddr_t
6513 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6514 {
6515 	(void) sprintf(ebuf,
6516 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6517 	return (ebuf);
6518 }
6519 
6520 /* Handshake watchdog timeout handler */
6521 static void
6522 vgen_hwatchdog(void *arg)
6523 {
6524 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6525 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6526 
6527 	DWARN(vgenp, ldcp,
6528 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
6529 	    ldcp->hphase, ldcp->hstate);
6530 
6531 	mutex_enter(&ldcp->cblock);
6532 	if (ldcp->cancel_htid) {
6533 		ldcp->cancel_htid = 0;
6534 		mutex_exit(&ldcp->cblock);
6535 		return;
6536 	}
6537 	ldcp->htid = 0;
6538 	ldcp->need_ldc_reset = B_TRUE;
6539 	vgen_handshake_retry(ldcp);
6540 	mutex_exit(&ldcp->cblock);
6541 }
6542 
6543 static void
6544 vgen_print_hparams(vgen_hparams_t *hp)
6545 {
6546 	uint8_t	addr[6];
6547 	char	ea[6];
6548 	ldc_mem_cookie_t *dc;
6549 
6550 	cmn_err(CE_CONT, "version_info:\n");
6551 	cmn_err(CE_CONT,
6552 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6553 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6554 
6555 	vnet_macaddr_ultostr(hp->addr, addr);
6556 	cmn_err(CE_CONT, "attr_info:\n");
6557 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6558 	    vgen_print_ethaddr(addr, ea));
6559 	cmn_err(CE_CONT,
6560 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6561 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6562 
6563 	dc = &hp->dring_cookie;
6564 	cmn_err(CE_CONT, "dring_info:\n");
6565 	cmn_err(CE_CONT,
6566 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6567 	cmn_err(CE_CONT,
6568 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6569 	    dc->addr, dc->size);
6570 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6571 }
6572 
6573 static void
6574 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6575 {
6576 	vgen_hparams_t *hp;
6577 
6578 	cmn_err(CE_CONT, "Channel Information:\n");
6579 	cmn_err(CE_CONT,
6580 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6581 	    ldcp->ldc_id, ldcp->ldc_status);
6582 	cmn_err(CE_CONT,
6583 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6584 	    ldcp->local_sid, ldcp->peer_sid);
6585 	cmn_err(CE_CONT,
6586 	    "\thphase: 0x%x, hstate: 0x%x\n",
6587 	    ldcp->hphase, ldcp->hstate);
6588 
6589 	cmn_err(CE_CONT, "Local handshake params:\n");
6590 	hp = &ldcp->local_hparams;
6591 	vgen_print_hparams(hp);
6592 
6593 	cmn_err(CE_CONT, "Peer handshake params:\n");
6594 	hp = &ldcp->peer_hparams;
6595 	vgen_print_hparams(hp);
6596 }
6597 
6598 /*
6599  * Send received packets up the stack.
6600  */
6601 static void
6602 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
6603 {
6604 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6605 
6606 	if (ldcp->rcv_thread != NULL) {
6607 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
6608 		mutex_exit(&ldcp->rxlock);
6609 	} else {
6610 		ASSERT(MUTEX_HELD(&ldcp->cblock));
6611 		mutex_exit(&ldcp->cblock);
6612 	}
6613 
6614 	vrx_cb(ldcp->portp->vhp, bp);
6615 
6616 	if (ldcp->rcv_thread != NULL) {
6617 		mutex_enter(&ldcp->rxlock);
6618 	} else {
6619 		mutex_enter(&ldcp->cblock);
6620 	}
6621 }
6622 
6623 /*
6624  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
6625  * This thread is woken up by the LDC interrupt handler to process
6626  * LDC packets and receive data.
6627  */
6628 static void
6629 vgen_ldc_rcv_worker(void *arg)
6630 {
6631 	callb_cpr_t	cprinfo;
6632 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6633 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6634 
6635 	DBG1(vgenp, ldcp, "enter\n");
6636 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
6637 	    "vnet_rcv_thread");
6638 	mutex_enter(&ldcp->rcv_thr_lock);
6639 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
6640 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
6641 
6642 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
6643 		/*
6644 		 * Wait until the data is received or a stop
6645 		 * request is received.
6646 		 */
6647 		while (!(ldcp->rcv_thr_flags &
6648 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
6649 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6650 		}
6651 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
6652 
6653 		/*
6654 		 * First process the stop request.
6655 		 */
6656 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
6657 			DBG2(vgenp, ldcp, "stopped\n");
6658 			break;
6659 		}
6660 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
6661 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
6662 		mutex_exit(&ldcp->rcv_thr_lock);
6663 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
6664 		vgen_handle_evt_read(ldcp);
6665 		mutex_enter(&ldcp->rcv_thr_lock);
6666 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
6667 	}
6668 
6669 	/*
6670 	 * Update the run status and wakeup the thread that
6671 	 * has sent the stop request.
6672 	 */
6673 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
6674 	cv_signal(&ldcp->rcv_thr_cv);
6675 	CALLB_CPR_EXIT(&cprinfo);
6676 	thread_exit();
6677 	DBG1(vgenp, ldcp, "exit\n");
6678 }
6679 
6680 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
6681 static void
6682 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
6683 {
6684 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6685 
6686 	DBG1(vgenp, ldcp, "enter\n");
6687 	/*
6688 	 * Send a stop request by setting the stop flag and
6689 	 * wait until the receive thread stops.
6690 	 */
6691 	mutex_enter(&ldcp->rcv_thr_lock);
6692 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6693 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
6694 		cv_signal(&ldcp->rcv_thr_cv);
6695 		DBG2(vgenp, ldcp, "waiting...");
6696 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6697 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6698 		}
6699 	}
6700 	mutex_exit(&ldcp->rcv_thr_lock);
6701 	ldcp->rcv_thread = NULL;
6702 	DBG1(vgenp, ldcp, "exit\n");
6703 }
6704 
6705 /*
6706  * Wait for the channel rx-queue to be drained by allowing the receive
6707  * worker thread to read all messages from the rx-queue of the channel.
6708  * Assumption: further callbacks are disabled at this time.
6709  */
6710 static void
6711 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
6712 {
6713 	clock_t	tm;
6714 	clock_t	wt;
6715 	clock_t	rv;
6716 
6717 	/*
6718 	 * If there is data in ldc rx queue, wait until the rx
6719 	 * worker thread runs and drains all msgs in the queue.
6720 	 */
6721 	wt = drv_usectohz(MILLISEC);
6722 
6723 	mutex_enter(&ldcp->rcv_thr_lock);
6724 
6725 	tm = ddi_get_lbolt() + wt;
6726 
6727 	/*
6728 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
6729 	 * If DATARCVD is set, that means the callback has signalled the worker
6730 	 * thread, but the worker hasn't started processing yet. If PROCESSING
6731 	 * is set, that means the thread is awake and processing. Note that the
6732 	 * DATARCVD state can only be seen once, as the assumption is that
6733 	 * further callbacks have been disabled at this point.
6734 	 */
6735 	while (ldcp->rcv_thr_flags &
6736 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
6737 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
6738 		if (rv == -1) {	/* timeout */
6739 			/*
6740 			 * Note that the only way we return is due to a timeout;
6741 			 * we set the new time to wait, before we go back and
6742 			 * check the condition. The other(unlikely) possibility
6743 			 * is a premature wakeup(see cv_timedwait(9F)) in which
6744 			 * case we just continue to use the same time to wait.
6745 			 */
6746 			tm = ddi_get_lbolt() + wt;
6747 		}
6748 	}
6749 
6750 	mutex_exit(&ldcp->rcv_thr_lock);
6751 }
6752 
6753 /*
6754  * vgen_dds_rx -- post DDS messages to vnet.
6755  */
6756 static int
6757 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6758 {
6759 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
6760 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6761 
6762 	if (dmsg->dds_class != DDS_VNET_NIU) {
6763 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
6764 		return (EBADMSG);
6765 	}
6766 	vnet_dds_rx(vgenp->vnetp, dmsg);
6767 	return (0);
6768 }
6769 
6770 /*
6771  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
6772  */
6773 int
6774 vgen_dds_tx(void *arg, void *msg)
6775 {
6776 	vgen_t *vgenp = arg;
6777 	vio_dds_msg_t *dmsg = msg;
6778 	vgen_portlist_t *plistp = &vgenp->vgenports;
6779 	vgen_ldc_t *ldcp;
6780 	vgen_ldclist_t *ldclp;
6781 	int rv = EIO;
6782 
6783 
6784 	READ_ENTER(&plistp->rwlock);
6785 	ldclp = &(vgenp->vsw_portp->ldclist);
6786 	READ_ENTER(&ldclp->rwlock);
6787 	ldcp = ldclp->headp;
6788 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
6789 		goto vgen_dsend_exit;
6790 	}
6791 
6792 	dmsg->tag.vio_sid = ldcp->local_sid;
6793 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
6794 	if (rv != VGEN_SUCCESS) {
6795 		rv = EIO;
6796 	} else {
6797 		rv = 0;
6798 	}
6799 
6800 vgen_dsend_exit:
6801 	RW_EXIT(&ldclp->rwlock);
6802 	RW_EXIT(&plistp->rwlock);
6803 	return (rv);
6804 
6805 }
6806 
6807 #if DEBUG
6808 
6809 /*
6810  * Print debug messages - set to 0xf to enable all msgs
6811  */
6812 static void
6813 debug_printf(const char *fname, vgen_t *vgenp,
6814     vgen_ldc_t *ldcp, const char *fmt, ...)
6815 {
6816 	char    buf[256];
6817 	char    *bufp = buf;
6818 	va_list ap;
6819 
6820 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
6821 		(void) sprintf(bufp, "vnet%d:",
6822 		    ((vnet_t *)(vgenp->vnetp))->instance);
6823 		bufp += strlen(bufp);
6824 	}
6825 	if (ldcp != NULL) {
6826 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
6827 		bufp += strlen(bufp);
6828 	}
6829 	(void) sprintf(bufp, "%s: ", fname);
6830 	bufp += strlen(bufp);
6831 
6832 	va_start(ap, fmt);
6833 	(void) vsprintf(bufp, fmt, ap);
6834 	va_end(ap);
6835 
6836 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
6837 	    (vgendbg_ldcid == ldcp->ldc_id)) {
6838 		cmn_err(CE_CONT, "%s\n", buf);
6839 	}
6840 }
6841 #endif
6842