xref: /titanic_51/usr/src/uts/sun4v/io/vnet_gen.c (revision b9bd317cda1afb3a01f4812de73e8cec888cbbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/atomic.h>
60 #include <sys/callb.h>
61 #include <sys/sdt.h>
62 #include <sys/intr.h>
63 #include <sys/pattr.h>
64 #include <sys/vlan.h>
65 
66 /*
67  * Implementation of the mac functionality for vnet using the
68  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
69  */
70 
71 /*
72  * Function prototypes.
73  */
74 /* vgen proxy entry points */
75 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
76     const uint8_t *macaddr, void **vgenhdl);
77 int vgen_uninit(void *arg);
78 int vgen_dds_tx(void *arg, void *dmsg);
79 static int vgen_start(void *arg);
80 static void vgen_stop(void *arg);
81 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
82 static int vgen_multicst(void *arg, boolean_t add,
83 	const uint8_t *mca);
84 static int vgen_promisc(void *arg, boolean_t on);
85 static int vgen_unicst(void *arg, const uint8_t *mca);
86 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
87 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
88 
89 /* vgen internal functions */
90 static int vgen_read_mdprops(vgen_t *vgenp);
91 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
92 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
93 	mde_cookie_t node);
94 static void vgen_detach_ports(vgen_t *vgenp);
95 static void vgen_port_detach(vgen_port_t *portp);
96 static void vgen_port_list_insert(vgen_port_t *portp);
97 static void vgen_port_list_remove(vgen_port_t *portp);
98 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
99 	int port_num);
100 static int vgen_mdeg_reg(vgen_t *vgenp);
101 static void vgen_mdeg_unreg(vgen_t *vgenp);
102 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
103 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
104 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
105 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
106 	mde_cookie_t mdex);
107 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
108 static int vgen_port_attach(vgen_port_t *portp);
109 static void vgen_port_detach_mdeg(vgen_port_t *portp);
110 static void vgen_port_detach_mdeg(vgen_port_t *portp);
111 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
112 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
113 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
114 
115 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
116 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
117 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
118 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
119 static void vgen_init_ports(vgen_t *vgenp);
120 static void vgen_port_init(vgen_port_t *portp);
121 static void vgen_uninit_ports(vgen_t *vgenp);
122 static void vgen_port_uninit(vgen_port_t *portp);
123 static void vgen_init_ldcs(vgen_port_t *portp);
124 static void vgen_uninit_ldcs(vgen_port_t *portp);
125 static int vgen_ldc_init(vgen_ldc_t *ldcp);
126 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
127 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
128 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
129 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
130 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
131 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
132 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(void *arg, mblk_t *mp);
135 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
136 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
137 static void vgen_reclaim(vgen_ldc_t *ldcp);
138 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
139 static int vgen_num_txpending(vgen_ldc_t *ldcp);
140 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
141 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
142 static void vgen_ldc_watchdog(void *arg);
143 
144 /* vgen handshake functions */
145 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
146 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
147 	boolean_t caller_holds_lock);
148 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
149 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
150 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
151 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
152 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
153 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
154 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
155 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
156 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
157 static void vgen_handshake(vgen_ldc_t *ldcp);
158 static int vgen_handshake_done(vgen_ldc_t *ldcp);
159 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
160 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
161 	vio_msg_tag_t *tagp);
162 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
163 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
166 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
168 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
169 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
175 	uint32_t start, int32_t end, uint8_t pstate);
176 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
177 	uint32_t msglen);
178 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
179 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
180 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
181 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
182 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
184 static void vgen_hwatchdog(void *arg);
185 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
186 static void vgen_print_hparams(vgen_hparams_t *hp);
187 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
188 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
189 static void vgen_ldc_rcv_worker(void *arg);
190 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
191 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
192 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
193 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
194 
195 /* VLAN routines */
196 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
197 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
198 	uint16_t *nvidsp, uint16_t *default_idp);
199 static void vgen_vlan_create_hash(vgen_port_t *portp);
200 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
201 static void vgen_vlan_add_ids(vgen_port_t *portp);
202 static void vgen_vlan_remove_ids(vgen_port_t *portp);
203 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
204 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
205 	uint16_t *vidp);
206 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
207 	boolean_t is_tagged, uint16_t vid);
208 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
209 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
210 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
211 
212 /* externs */
213 extern void vnet_dds_rx(void *arg, void *dmsg);
214 
215 /*
216  * The handshake process consists of 5 phases defined below, with VH_PHASE0
217  * being the pre-handshake phase and VH_DONE is the phase to indicate
218  * successful completion of all phases.
219  * Each phase may have one to several handshake states which are required
220  * to complete successfully to move to the next phase.
221  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
222  * more details.
223  */
224 /* handshake phases */
225 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
226 
227 /* handshake states */
228 enum {
229 
230 	VER_INFO_SENT	=	0x1,
231 	VER_ACK_RCVD	=	0x2,
232 	VER_INFO_RCVD	=	0x4,
233 	VER_ACK_SENT	=	0x8,
234 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
235 
236 	ATTR_INFO_SENT	=	0x10,
237 	ATTR_ACK_RCVD	=	0x20,
238 	ATTR_INFO_RCVD	=	0x40,
239 	ATTR_ACK_SENT	=	0x80,
240 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
241 
242 	DRING_INFO_SENT	=	0x100,
243 	DRING_ACK_RCVD	=	0x200,
244 	DRING_INFO_RCVD	=	0x400,
245 	DRING_ACK_SENT	=	0x800,
246 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
247 
248 	RDX_INFO_SENT	=	0x1000,
249 	RDX_ACK_RCVD	=	0x2000,
250 	RDX_INFO_RCVD	=	0x4000,
251 	RDX_ACK_SENT	=	0x8000,
252 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
253 
254 };
255 
256 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
257 
258 #define	LDC_LOCK(ldcp)	\
259 				mutex_enter(&((ldcp)->cblock));\
260 				mutex_enter(&((ldcp)->rxlock));\
261 				mutex_enter(&((ldcp)->wrlock));\
262 				mutex_enter(&((ldcp)->txlock));\
263 				mutex_enter(&((ldcp)->tclock));
264 #define	LDC_UNLOCK(ldcp)	\
265 				mutex_exit(&((ldcp)->tclock));\
266 				mutex_exit(&((ldcp)->txlock));\
267 				mutex_exit(&((ldcp)->wrlock));\
268 				mutex_exit(&((ldcp)->rxlock));\
269 				mutex_exit(&((ldcp)->cblock));
270 
271 #define	VGEN_VER_EQ(ldcp, major, minor)	\
272 	((ldcp)->local_hparams.ver_major == (major) &&	\
273 	    (ldcp)->local_hparams.ver_minor == (minor))
274 
275 #define	VGEN_VER_LT(ldcp, major, minor)	\
276 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
277 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
278 	    (ldcp)->local_hparams.ver_minor < (minor)))
279 
280 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
281 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
282 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
283 	    (ldcp)->local_hparams.ver_minor >= (minor)))
284 
285 static struct ether_addr etherbroadcastaddr = {
286 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
287 };
288 /*
289  * MIB II broadcast/multicast packets
290  */
291 #define	IS_BROADCAST(ehp) \
292 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
293 #define	IS_MULTICAST(ehp) \
294 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
295 
296 /*
297  * Property names
298  */
299 static char macaddr_propname[] = "mac-address";
300 static char rmacaddr_propname[] = "remote-mac-address";
301 static char channel_propname[] = "channel-endpoint";
302 static char reg_propname[] = "reg";
303 static char port_propname[] = "port";
304 static char swport_propname[] = "switch-port";
305 static char id_propname[] = "id";
306 static char vdev_propname[] = "virtual-device";
307 static char vnet_propname[] = "network";
308 static char pri_types_propname[] = "priority-ether-types";
309 static char vgen_pvid_propname[] = "port-vlan-id";
310 static char vgen_vid_propname[] = "vlan-id";
311 static char vgen_dvid_propname[] = "default-vlan-id";
312 static char port_pvid_propname[] = "remote-port-vlan-id";
313 static char port_vid_propname[] = "remote-vlan-id";
314 
315 /* versions supported - in decreasing order */
316 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 3} };
317 
318 /* Tunables */
319 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
320 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
321 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
322 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
323 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
324 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
325 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
326 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
327 
328 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
329 
330 /*
331  * max # of packets accumulated prior to sending them up. It is best
332  * to keep this at 60% of the number of recieve buffers.
333  */
334 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
335 
336 /*
337  * Tunables for each receive buffer size and number of buffers for
338  * each buffer size.
339  */
340 uint32_t vgen_rbufsz1 = VGEN_DBLK_SZ_128;
341 uint32_t vgen_rbufsz2 = VGEN_DBLK_SZ_256;
342 uint32_t vgen_rbufsz3 = VGEN_DBLK_SZ_2048;
343 
344 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
345 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
346 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
347 
348 /*
349  * In the absence of "priority-ether-types" property in MD, the following
350  * internal tunable can be set to specify a single priority ethertype.
351  */
352 uint64_t vgen_pri_eth_type = 0;
353 
354 /*
355  * Number of transmit priority buffers that are preallocated per device.
356  * This number is chosen to be a small value to throttle transmission
357  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
358  */
359 uint32_t vgen_pri_tx_nmblks = 64;
360 
361 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
362 
363 #ifdef DEBUG
364 /* flags to simulate error conditions for debugging */
365 int vgen_trigger_txtimeout = 0;
366 int vgen_trigger_rxlost = 0;
367 #endif
368 
369 /*
370  * Matching criteria passed to the MDEG to register interest
371  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
372  * by their 'name' and 'cfg-handle' properties.
373  */
374 static md_prop_match_t vdev_prop_match[] = {
375 	{ MDET_PROP_STR,    "name"   },
376 	{ MDET_PROP_VAL,    "cfg-handle" },
377 	{ MDET_LIST_END,    NULL    }
378 };
379 
380 static mdeg_node_match_t vdev_match = { "virtual-device",
381 						vdev_prop_match };
382 
383 /* MD update matching structure */
384 static md_prop_match_t	vport_prop_match[] = {
385 	{ MDET_PROP_VAL,	"id" },
386 	{ MDET_LIST_END,	NULL }
387 };
388 
389 static mdeg_node_match_t vport_match = { "virtual-device-port",
390 					vport_prop_match };
391 
392 /* template for matching a particular vnet instance */
393 static mdeg_prop_spec_t vgen_prop_template[] = {
394 	{ MDET_PROP_STR,	"name",		"network" },
395 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
396 	{ MDET_LIST_END,	NULL,		NULL }
397 };
398 
399 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
400 
401 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
402 
403 static mac_callbacks_t vgen_m_callbacks = {
404 	0,
405 	vgen_stat,
406 	vgen_start,
407 	vgen_stop,
408 	vgen_promisc,
409 	vgen_multicst,
410 	vgen_unicst,
411 	vgen_tx,
412 	NULL,
413 	NULL,
414 	NULL
415 };
416 
417 /* externs */
418 extern pri_t	maxclsyspri;
419 extern proc_t	p0;
420 extern uint32_t vnet_ntxds;
421 extern uint32_t vnet_ldcwd_interval;
422 extern uint32_t vnet_ldcwd_txtimeout;
423 extern uint32_t vnet_ldc_mtu;
424 extern uint32_t vnet_nrbufs;
425 extern uint32_t	vnet_ethermtu;
426 extern uint16_t	vnet_default_vlan_id;
427 
428 #ifdef DEBUG
429 
430 extern int vnet_dbglevel;
431 static void debug_printf(const char *fname, vgen_t *vgenp,
432 	vgen_ldc_t *ldcp, const char *fmt, ...);
433 
434 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
435 int vgendbg_ldcid = -1;
436 
437 /* simulate handshake error conditions for debug */
438 uint32_t vgen_hdbg;
439 #define	HDBG_VERSION	0x1
440 #define	HDBG_TIMEOUT	0x2
441 #define	HDBG_BAD_SID	0x4
442 #define	HDBG_OUT_STATE	0x8
443 
444 #endif
445 
446 /*
447  * vgen_init() is called by an instance of vnet driver to initialize the
448  * corresponding generic proxy transport layer. The arguments passed by vnet
449  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
450  * the mac address of the vnet device, and a pointer to vgen_t is passed
451  * back as a handle to vnet.
452  */
453 int
454 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
455     const uint8_t *macaddr, void **vgenhdl)
456 {
457 	vgen_t *vgenp;
458 	int instance;
459 	int rv;
460 
461 	if ((vnetp == NULL) || (vnetdip == NULL))
462 		return (DDI_FAILURE);
463 
464 	instance = ddi_get_instance(vnetdip);
465 
466 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
467 
468 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
469 
470 	vgenp->vnetp = vnetp;
471 	vgenp->instance = instance;
472 	vgenp->regprop = regprop;
473 	vgenp->vnetdip = vnetdip;
474 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
475 
476 	/* allocate multicast table */
477 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
478 	    sizeof (struct ether_addr), KM_SLEEP);
479 	vgenp->mccount = 0;
480 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
481 	vgenp->max_frame_size = vnet_ethermtu + sizeof (struct ether_header)
482 	    + VLAN_TAGSZ;
483 
484 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
485 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
486 
487 	rv = vgen_read_mdprops(vgenp);
488 	if (rv != 0) {
489 		goto vgen_init_fail;
490 	}
491 
492 	/* register with MD event generator */
493 	rv = vgen_mdeg_reg(vgenp);
494 	if (rv != DDI_SUCCESS) {
495 		goto vgen_init_fail;
496 	}
497 
498 	*vgenhdl = (void *)vgenp;
499 
500 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
501 	return (DDI_SUCCESS);
502 
503 vgen_init_fail:
504 	rw_destroy(&vgenp->vgenports.rwlock);
505 	mutex_destroy(&vgenp->lock);
506 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
507 	    sizeof (struct ether_addr));
508 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
509 		kmem_free(vgenp->pri_types,
510 		    sizeof (uint16_t) * vgenp->pri_num_types);
511 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
512 	}
513 	KMEM_FREE(vgenp);
514 	return (DDI_FAILURE);
515 }
516 
517 /*
518  * Called by vnet to undo the initializations done by vgen_init().
519  * The handle provided by generic transport during vgen_init() is the argument.
520  */
521 int
522 vgen_uninit(void *arg)
523 {
524 	vgen_t		*vgenp = (vgen_t *)arg;
525 	vio_mblk_pool_t	*rp;
526 	vio_mblk_pool_t	*nrp;
527 
528 	if (vgenp == NULL) {
529 		return (DDI_FAILURE);
530 	}
531 
532 	DBG1(vgenp, NULL, "enter\n");
533 
534 	/* unregister with MD event generator */
535 	vgen_mdeg_unreg(vgenp);
536 
537 	mutex_enter(&vgenp->lock);
538 
539 	/* detach all ports from the device */
540 	vgen_detach_ports(vgenp);
541 
542 	/*
543 	 * free any pending rx mblk pools,
544 	 * that couldn't be freed previously during channel detach.
545 	 */
546 	rp = vgenp->rmp;
547 	while (rp != NULL) {
548 		nrp = vgenp->rmp = rp->nextp;
549 		if (vio_destroy_mblks(rp)) {
550 			vgenp->rmp = rp;
551 			mutex_exit(&vgenp->lock);
552 			return (DDI_FAILURE);
553 		}
554 		rp = nrp;
555 	}
556 
557 	/* free multicast table */
558 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
559 
560 	/* free pri_types table */
561 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
562 		kmem_free(vgenp->pri_types,
563 		    sizeof (uint16_t) * vgenp->pri_num_types);
564 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
565 	}
566 
567 	mutex_exit(&vgenp->lock);
568 
569 	rw_destroy(&vgenp->vgenports.rwlock);
570 	mutex_destroy(&vgenp->lock);
571 
572 	KMEM_FREE(vgenp);
573 
574 	DBG1(vgenp, NULL, "exit\n");
575 
576 	return (DDI_SUCCESS);
577 }
578 
579 /* enable transmit/receive for the device */
580 int
581 vgen_start(void *arg)
582 {
583 	vgen_port_t	*portp = (vgen_port_t *)arg;
584 	vgen_t		*vgenp = portp->vgenp;
585 
586 	DBG1(vgenp, NULL, "enter\n");
587 	mutex_enter(&portp->lock);
588 	vgen_port_init(portp);
589 	portp->flags |= VGEN_STARTED;
590 	mutex_exit(&portp->lock);
591 	DBG1(vgenp, NULL, "exit\n");
592 
593 	return (DDI_SUCCESS);
594 }
595 
596 /* stop transmit/receive */
597 void
598 vgen_stop(void *arg)
599 {
600 	vgen_port_t	*portp = (vgen_port_t *)arg;
601 	vgen_t		*vgenp = portp->vgenp;
602 
603 	DBG1(vgenp, NULL, "enter\n");
604 
605 	mutex_enter(&portp->lock);
606 	vgen_port_uninit(portp);
607 	portp->flags &= ~(VGEN_STARTED);
608 	mutex_exit(&portp->lock);
609 	DBG1(vgenp, NULL, "exit\n");
610 
611 }
612 
613 /* vgen transmit function */
614 static mblk_t *
615 vgen_tx(void *arg, mblk_t *mp)
616 {
617 	int i;
618 	vgen_port_t *portp;
619 	int status = VGEN_FAILURE;
620 
621 	portp = (vgen_port_t *)arg;
622 	/*
623 	 * Retry so that we avoid reporting a failure
624 	 * to the upper layer. Returning a failure may cause the
625 	 * upper layer to go into single threaded mode there by
626 	 * causing performance degradation, especially for a large
627 	 * number of connections.
628 	 */
629 	for (i = 0; i < vgen_tx_retries; ) {
630 		status = vgen_portsend(portp, mp);
631 		if (status == VGEN_SUCCESS) {
632 			break;
633 		}
634 		if (++i < vgen_tx_retries)
635 			delay(drv_usectohz(vgen_tx_delay));
636 	}
637 	if (status != VGEN_SUCCESS) {
638 		/* failure */
639 		return (mp);
640 	}
641 	/* success */
642 	return (NULL);
643 }
644 
645 /*
646  * This function provides any necessary tagging/untagging of the frames
647  * that are being transmitted over the port. It first verifies the vlan
648  * membership of the destination(port) and drops the packet if the
649  * destination doesn't belong to the given vlan.
650  *
651  * Arguments:
652  *   portp:     port over which the frames should be transmitted
653  *   mp:        frame to be transmitted
654  *   is_tagged:
655  *              B_TRUE: indicates frame header contains the vlan tag already.
656  *              B_FALSE: indicates frame is untagged.
657  *   vid:       vlan in which the frame should be transmitted.
658  *
659  * Returns:
660  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
661  *              Failure: NULL
662  */
663 static mblk_t *
664 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
665 	uint16_t vid)
666 {
667 	vgen_t				*vgenp;
668 	boolean_t			dst_tagged;
669 	int				rv;
670 
671 	vgenp = portp->vgenp;
672 
673 	/*
674 	 * If the packet is going to a vnet:
675 	 *   Check if the destination vnet is in the same vlan.
676 	 *   Check the frame header if tag or untag is needed.
677 	 *
678 	 * We do not check the above conditions if the packet is going to vsw:
679 	 *   vsw must be present implicitly in all the vlans that a vnet device
680 	 *   is configured into; even if vsw itself is not assigned to those
681 	 *   vlans as an interface. For instance, the packet might be destined
682 	 *   to another vnet(indirectly through vsw) or to an external host
683 	 *   which is in the same vlan as this vnet and vsw itself may not be
684 	 *   present in that vlan. Similarly packets going to vsw must be
685 	 *   always tagged(unless in the default-vlan) if not already tagged,
686 	 *   as we do not know the final destination. This is needed because
687 	 *   vsw must always invoke its switching function only after tagging
688 	 *   the packet; otherwise after switching function determines the
689 	 *   destination we cannot figure out if the destination belongs to the
690 	 *   the same vlan that the frame originated from and if it needs tag/
691 	 *   untag. Note that vsw will tag the packet itself when it receives
692 	 *   it over the channel from a client if needed. However, that is
693 	 *   needed only in the case of vlan unaware clients such as obp or
694 	 *   earlier versions of vnet.
695 	 *
696 	 */
697 	if (portp != vgenp->vsw_portp) {
698 		/*
699 		 * Packet going to a vnet. Check if the destination vnet is in
700 		 * the same vlan. Then check the frame header if tag/untag is
701 		 * needed.
702 		 */
703 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
704 		if (rv == B_FALSE) {
705 			/* drop the packet */
706 			freemsg(mp);
707 			return (NULL);
708 		}
709 
710 		/* is the destination tagged or untagged in this vlan? */
711 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
712 		    (dst_tagged = B_TRUE);
713 
714 		if (is_tagged == dst_tagged) {
715 			/* no tagging/untagging needed */
716 			return (mp);
717 		}
718 
719 		if (is_tagged == B_TRUE) {
720 			/* frame is tagged; destination needs untagged */
721 			mp = vnet_vlan_remove_tag(mp);
722 			return (mp);
723 		}
724 
725 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
726 	}
727 
728 	/*
729 	 * Packet going to a vnet needs tagging.
730 	 * OR
731 	 * If the packet is going to vsw, then it must be tagged in all cases:
732 	 * unknown unicast, broadcast/multicast or to vsw interface.
733 	 */
734 
735 	if (is_tagged == B_FALSE) {
736 		mp = vnet_vlan_insert_tag(mp, vid);
737 	}
738 
739 	return (mp);
740 }
741 
742 /* transmit packets over the given port */
743 static int
744 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
745 {
746 	vgen_ldclist_t		*ldclp;
747 	vgen_ldc_t		*ldcp;
748 	int			status;
749 	int			rv = VGEN_SUCCESS;
750 	vgen_t			*vgenp = portp->vgenp;
751 	vnet_t			*vnetp = vgenp->vnetp;
752 	boolean_t		is_tagged;
753 	boolean_t		dec_refcnt = B_FALSE;
754 	uint16_t		vlan_id;
755 	struct ether_header	*ehp;
756 
757 	if (portp->use_vsw_port) {
758 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
759 		portp = portp->vgenp->vsw_portp;
760 		dec_refcnt = B_TRUE;
761 	}
762 	if (portp == NULL) {
763 		return (VGEN_FAILURE);
764 	}
765 
766 	/*
767 	 * Determine the vlan id that the frame belongs to.
768 	 */
769 	ehp = (struct ether_header *)mp->b_rptr;
770 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
771 
772 	if (vlan_id == vnetp->default_vlan_id) {
773 
774 		/* Frames in default vlan must be untagged */
775 		ASSERT(is_tagged == B_FALSE);
776 
777 		/*
778 		 * If the destination is a vnet-port verify it belongs to the
779 		 * default vlan; otherwise drop the packet. We do not need
780 		 * this check for vsw-port, as it should implicitly belong to
781 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
782 		 */
783 		if (portp != vgenp->vsw_portp &&
784 		    portp->pvid != vnetp->default_vlan_id) {
785 			freemsg(mp);
786 			goto portsend_ret;
787 		}
788 
789 	} else {	/* frame not in default-vlan */
790 
791 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
792 		if (mp == NULL) {
793 			goto portsend_ret;
794 		}
795 
796 	}
797 
798 	ldclp = &portp->ldclist;
799 	READ_ENTER(&ldclp->rwlock);
800 	/*
801 	 * NOTE: for now, we will assume we have a single channel.
802 	 */
803 	if (ldclp->headp == NULL) {
804 		RW_EXIT(&ldclp->rwlock);
805 		rv = VGEN_FAILURE;
806 		goto portsend_ret;
807 	}
808 	ldcp = ldclp->headp;
809 
810 	status = ldcp->tx(ldcp, mp);
811 
812 	RW_EXIT(&ldclp->rwlock);
813 
814 	if (status != VGEN_TX_SUCCESS) {
815 		rv = VGEN_FAILURE;
816 	}
817 
818 portsend_ret:
819 	if (dec_refcnt == B_TRUE) {
820 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
821 	}
822 	return (rv);
823 }
824 
825 /*
826  * Wrapper function to transmit normal and/or priority frames over the channel.
827  */
828 static int
829 vgen_ldcsend(void *arg, mblk_t *mp)
830 {
831 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
832 	int			status;
833 	struct ether_header	*ehp;
834 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
835 	uint32_t		num_types;
836 	uint16_t		*types;
837 	int			i;
838 
839 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
840 
841 	num_types = vgenp->pri_num_types;
842 	types = vgenp->pri_types;
843 	ehp = (struct ether_header *)mp->b_rptr;
844 
845 	for (i = 0; i < num_types; i++) {
846 
847 		if (ehp->ether_type == types[i]) {
848 			/* priority frame, use pri tx function */
849 			vgen_ldcsend_pkt(ldcp, mp);
850 			return (VGEN_SUCCESS);
851 		}
852 
853 	}
854 
855 	status  = vgen_ldcsend_dring(ldcp, mp);
856 
857 	return (status);
858 }
859 
860 /*
861  * This functions handles ldc channel reset while in the context
862  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
863  */
864 static void
865 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
866 {
867 	ldc_status_t	istatus;
868 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
869 
870 	if (mutex_tryenter(&ldcp->cblock)) {
871 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
872 			DWARN(vgenp, ldcp, "ldc_status() error\n");
873 		} else {
874 			ldcp->ldc_status = istatus;
875 		}
876 		if (ldcp->ldc_status != LDC_UP) {
877 			vgen_handle_evt_reset(ldcp);
878 		}
879 		mutex_exit(&ldcp->cblock);
880 	}
881 }
882 
883 /*
884  * This function transmits the frame in the payload of a raw data
885  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
886  * send special frames with high priorities, without going through
887  * the normal data path which uses descriptor ring mechanism.
888  */
889 static void
890 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
891 {
892 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
893 	vio_raw_data_msg_t	*pkt;
894 	mblk_t			*bp;
895 	mblk_t			*nmp = NULL;
896 	caddr_t			dst;
897 	uint32_t		mblksz;
898 	uint32_t		size;
899 	uint32_t		nbytes;
900 	int			rv;
901 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
902 	vgen_stats_t		*statsp = &ldcp->stats;
903 
904 	/* drop the packet if ldc is not up or handshake is not done */
905 	if (ldcp->ldc_status != LDC_UP) {
906 		(void) atomic_inc_32(&statsp->tx_pri_fail);
907 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
908 		    ldcp->ldc_status);
909 		goto send_pkt_exit;
910 	}
911 
912 	if (ldcp->hphase != VH_DONE) {
913 		(void) atomic_inc_32(&statsp->tx_pri_fail);
914 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
915 		    ldcp->hphase);
916 		goto send_pkt_exit;
917 	}
918 
919 	size = msgsize(mp);
920 
921 	/* frame size bigger than available payload len of raw data msg ? */
922 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
923 		(void) atomic_inc_32(&statsp->tx_pri_fail);
924 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
925 		goto send_pkt_exit;
926 	}
927 
928 	if (size < ETHERMIN)
929 		size = ETHERMIN;
930 
931 	/* alloc space for a raw data message */
932 	nmp = vio_allocb(vgenp->pri_tx_vmp);
933 	if (nmp == NULL) {
934 		(void) atomic_inc_32(&statsp->tx_pri_fail);
935 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
936 		goto send_pkt_exit;
937 	}
938 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
939 
940 	/* copy frame into the payload of raw data message */
941 	dst = (caddr_t)pkt->data;
942 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
943 		mblksz = MBLKL(bp);
944 		bcopy(bp->b_rptr, dst, mblksz);
945 		dst += mblksz;
946 	}
947 
948 	/* setup the raw data msg */
949 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
950 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
951 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
952 	pkt->tag.vio_sid = ldcp->local_sid;
953 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
954 
955 	/* send the msg over ldc */
956 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
957 	if (rv != VGEN_SUCCESS) {
958 		(void) atomic_inc_32(&statsp->tx_pri_fail);
959 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
960 		if (rv == ECONNRESET) {
961 			vgen_ldcsend_process_reset(ldcp);
962 		}
963 		goto send_pkt_exit;
964 	}
965 
966 	/* update stats */
967 	(void) atomic_inc_64(&statsp->tx_pri_packets);
968 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
969 
970 send_pkt_exit:
971 	if (nmp != NULL)
972 		freemsg(nmp);
973 	freemsg(mp);
974 }
975 
976 /*
977  * This function transmits normal (non-priority) data frames over
978  * the channel. It queues the frame into the transmit descriptor ring
979  * and sends a VIO_DRING_DATA message if needed, to wake up the
980  * peer to (re)start processing.
981  */
982 static int
983 vgen_ldcsend_dring(void *arg, mblk_t *mp)
984 {
985 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
986 	vgen_private_desc_t	*tbufp;
987 	vgen_private_desc_t	*rtbufp;
988 	vnet_public_desc_t	*rtxdp;
989 	vgen_private_desc_t	*ntbufp;
990 	vnet_public_desc_t	*txdp;
991 	vio_dring_entry_hdr_t	*hdrp;
992 	vgen_stats_t		*statsp;
993 	struct ether_header	*ehp;
994 	boolean_t		is_bcast = B_FALSE;
995 	boolean_t		is_mcast = B_FALSE;
996 	size_t			mblksz;
997 	caddr_t			dst;
998 	mblk_t			*bp;
999 	size_t			size;
1000 	int			rv = 0;
1001 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1002 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1003 
1004 	statsp = &ldcp->stats;
1005 	size = msgsize(mp);
1006 
1007 	DBG1(vgenp, ldcp, "enter\n");
1008 
1009 	if (ldcp->ldc_status != LDC_UP) {
1010 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1011 		    ldcp->ldc_status);
1012 		/* retry ldc_up() if needed */
1013 		if (ldcp->flags & CHANNEL_STARTED)
1014 			(void) ldc_up(ldcp->ldc_handle);
1015 		goto send_dring_exit;
1016 	}
1017 
1018 	/* drop the packet if ldc is not up or handshake is not done */
1019 	if (ldcp->hphase != VH_DONE) {
1020 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1021 		    ldcp->hphase);
1022 		goto send_dring_exit;
1023 	}
1024 
1025 	if (size > (size_t)lp->mtu) {
1026 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1027 		goto send_dring_exit;
1028 	}
1029 	if (size < ETHERMIN)
1030 		size = ETHERMIN;
1031 
1032 	ehp = (struct ether_header *)mp->b_rptr;
1033 	is_bcast = IS_BROADCAST(ehp);
1034 	is_mcast = IS_MULTICAST(ehp);
1035 
1036 	mutex_enter(&ldcp->txlock);
1037 	/*
1038 	 * allocate a descriptor
1039 	 */
1040 	tbufp = ldcp->next_tbufp;
1041 	ntbufp = NEXTTBUF(ldcp, tbufp);
1042 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1043 
1044 		mutex_enter(&ldcp->tclock);
1045 		/* Try reclaiming now */
1046 		vgen_reclaim_dring(ldcp);
1047 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1048 
1049 		if (ntbufp == ldcp->cur_tbufp) {
1050 			/* Now we are really out of tbuf/txds */
1051 			ldcp->need_resched = B_TRUE;
1052 			mutex_exit(&ldcp->tclock);
1053 
1054 			statsp->tx_no_desc++;
1055 			mutex_exit(&ldcp->txlock);
1056 
1057 			return (VGEN_TX_NORESOURCES);
1058 		}
1059 		mutex_exit(&ldcp->tclock);
1060 	}
1061 	/* update next available tbuf in the ring and update tx index */
1062 	ldcp->next_tbufp = ntbufp;
1063 	INCR_TXI(ldcp->next_txi, ldcp);
1064 
1065 	/* Mark the buffer busy before releasing the lock */
1066 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1067 	mutex_exit(&ldcp->txlock);
1068 
1069 	/* copy data into pre-allocated transmit buffer */
1070 	dst = tbufp->datap + VNET_IPALIGN;
1071 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1072 		mblksz = MBLKL(bp);
1073 		bcopy(bp->b_rptr, dst, mblksz);
1074 		dst += mblksz;
1075 	}
1076 
1077 	tbufp->datalen = size;
1078 
1079 	/* initialize the corresponding public descriptor (txd) */
1080 	txdp = tbufp->descp;
1081 	hdrp = &txdp->hdr;
1082 	txdp->nbytes = size;
1083 	txdp->ncookies = tbufp->ncookies;
1084 	bcopy((tbufp->memcookie), (txdp->memcookie),
1085 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1086 
1087 	mutex_enter(&ldcp->wrlock);
1088 	/*
1089 	 * If the flags not set to BUSY, it implies that the clobber
1090 	 * was done while we were copying the data. In such case,
1091 	 * discard the packet and return.
1092 	 */
1093 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1094 		statsp->oerrors++;
1095 		mutex_exit(&ldcp->wrlock);
1096 		goto send_dring_exit;
1097 	}
1098 	hdrp->dstate = VIO_DESC_READY;
1099 
1100 	/* update stats */
1101 	statsp->opackets++;
1102 	statsp->obytes += size;
1103 	if (is_bcast)
1104 		statsp->brdcstxmt++;
1105 	else if (is_mcast)
1106 		statsp->multixmt++;
1107 
1108 	/* send dring datamsg to the peer */
1109 	if (ldcp->resched_peer) {
1110 
1111 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1112 		rtxdp = rtbufp->descp;
1113 
1114 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1115 
1116 			rv = vgen_send_dring_data(ldcp,
1117 			    (uint32_t)ldcp->resched_peer_txi, -1);
1118 			if (rv != 0) {
1119 				/* error: drop the packet */
1120 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1121 				    "failed: rv(%d) len(%d)\n",
1122 				    ldcp->ldc_id, rv, size);
1123 				statsp->oerrors++;
1124 			} else {
1125 				ldcp->resched_peer = B_FALSE;
1126 			}
1127 
1128 		}
1129 
1130 	}
1131 
1132 	mutex_exit(&ldcp->wrlock);
1133 
1134 send_dring_exit:
1135 	if (rv == ECONNRESET) {
1136 		vgen_ldcsend_process_reset(ldcp);
1137 	}
1138 	freemsg(mp);
1139 	DBG1(vgenp, ldcp, "exit\n");
1140 	return (VGEN_TX_SUCCESS);
1141 }
1142 
1143 /* enable/disable a multicast address */
1144 int
1145 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1146 {
1147 	vgen_t			*vgenp;
1148 	vnet_mcast_msg_t	mcastmsg;
1149 	vio_msg_tag_t		*tagp;
1150 	vgen_port_t		*portp;
1151 	vgen_portlist_t		*plistp;
1152 	vgen_ldc_t		*ldcp;
1153 	vgen_ldclist_t		*ldclp;
1154 	struct ether_addr	*addrp;
1155 	int			rv = DDI_FAILURE;
1156 	uint32_t		i;
1157 
1158 	portp = (vgen_port_t *)arg;
1159 	vgenp = portp->vgenp;
1160 
1161 	if (portp != vgenp->vsw_portp) {
1162 		return (DDI_SUCCESS);
1163 	}
1164 
1165 	addrp = (struct ether_addr *)mca;
1166 	tagp = &mcastmsg.tag;
1167 	bzero(&mcastmsg, sizeof (mcastmsg));
1168 
1169 	mutex_enter(&vgenp->lock);
1170 
1171 	plistp = &(vgenp->vgenports);
1172 
1173 	READ_ENTER(&plistp->rwlock);
1174 
1175 	portp = vgenp->vsw_portp;
1176 	if (portp == NULL) {
1177 		RW_EXIT(&plistp->rwlock);
1178 		mutex_exit(&vgenp->lock);
1179 		return (rv);
1180 	}
1181 	ldclp = &portp->ldclist;
1182 
1183 	READ_ENTER(&ldclp->rwlock);
1184 
1185 	ldcp = ldclp->headp;
1186 	if (ldcp == NULL)
1187 		goto vgen_mcast_exit;
1188 
1189 	mutex_enter(&ldcp->cblock);
1190 
1191 	if (ldcp->hphase == VH_DONE) {
1192 		/*
1193 		 * If handshake is done, send a msg to vsw to add/remove
1194 		 * the multicast address. Otherwise, we just update this
1195 		 * mcast address in our table and the table will be sync'd
1196 		 * with vsw when handshake completes.
1197 		 */
1198 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1199 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1200 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1201 		tagp->vio_sid = ldcp->local_sid;
1202 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1203 		mcastmsg.set = add;
1204 		mcastmsg.count = 1;
1205 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1206 		    B_FALSE) != VGEN_SUCCESS) {
1207 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1208 			mutex_exit(&ldcp->cblock);
1209 			goto vgen_mcast_exit;
1210 		}
1211 	}
1212 
1213 	mutex_exit(&ldcp->cblock);
1214 
1215 	if (add) {
1216 
1217 		/* expand multicast table if necessary */
1218 		if (vgenp->mccount >= vgenp->mcsize) {
1219 			struct ether_addr	*newtab;
1220 			uint32_t		newsize;
1221 
1222 
1223 			newsize = vgenp->mcsize * 2;
1224 
1225 			newtab = kmem_zalloc(newsize *
1226 			    sizeof (struct ether_addr), KM_NOSLEEP);
1227 			if (newtab == NULL)
1228 				goto vgen_mcast_exit;
1229 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1230 			    sizeof (struct ether_addr));
1231 			kmem_free(vgenp->mctab,
1232 			    vgenp->mcsize * sizeof (struct ether_addr));
1233 
1234 			vgenp->mctab = newtab;
1235 			vgenp->mcsize = newsize;
1236 		}
1237 
1238 		/* add address to the table */
1239 		vgenp->mctab[vgenp->mccount++] = *addrp;
1240 
1241 	} else {
1242 
1243 		/* delete address from the table */
1244 		for (i = 0; i < vgenp->mccount; i++) {
1245 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1246 
1247 				/*
1248 				 * If there's more than one address in this
1249 				 * table, delete the unwanted one by moving
1250 				 * the last one in the list over top of it;
1251 				 * otherwise, just remove it.
1252 				 */
1253 				if (vgenp->mccount > 1) {
1254 					vgenp->mctab[i] =
1255 					    vgenp->mctab[vgenp->mccount-1];
1256 				}
1257 				vgenp->mccount--;
1258 				break;
1259 			}
1260 		}
1261 	}
1262 
1263 	rv = DDI_SUCCESS;
1264 
1265 vgen_mcast_exit:
1266 	RW_EXIT(&ldclp->rwlock);
1267 	RW_EXIT(&plistp->rwlock);
1268 
1269 	mutex_exit(&vgenp->lock);
1270 	return (rv);
1271 }
1272 
1273 /* set or clear promiscuous mode on the device */
1274 static int
1275 vgen_promisc(void *arg, boolean_t on)
1276 {
1277 	_NOTE(ARGUNUSED(arg, on))
1278 	return (DDI_SUCCESS);
1279 }
1280 
1281 /* set the unicast mac address of the device */
1282 static int
1283 vgen_unicst(void *arg, const uint8_t *mca)
1284 {
1285 	_NOTE(ARGUNUSED(arg, mca))
1286 	return (DDI_SUCCESS);
1287 }
1288 
1289 /* get device statistics */
1290 int
1291 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1292 {
1293 	vgen_port_t	*portp = (vgen_port_t *)arg;
1294 
1295 	*val = vgen_port_stat(portp, stat);
1296 
1297 	return (0);
1298 }
1299 
1300 static void
1301 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1302 {
1303 	 _NOTE(ARGUNUSED(arg, wq, mp))
1304 }
1305 
1306 /* vgen internal functions */
1307 /* detach all ports from the device */
1308 static void
1309 vgen_detach_ports(vgen_t *vgenp)
1310 {
1311 	vgen_port_t	*portp;
1312 	vgen_portlist_t	*plistp;
1313 
1314 	plistp = &(vgenp->vgenports);
1315 	WRITE_ENTER(&plistp->rwlock);
1316 
1317 	while ((portp = plistp->headp) != NULL) {
1318 		vgen_port_detach(portp);
1319 	}
1320 
1321 	RW_EXIT(&plistp->rwlock);
1322 }
1323 
1324 /*
1325  * detach the given port.
1326  */
1327 static void
1328 vgen_port_detach(vgen_port_t *portp)
1329 {
1330 	vgen_t		*vgenp;
1331 	vgen_ldclist_t	*ldclp;
1332 	int		port_num;
1333 
1334 	vgenp = portp->vgenp;
1335 	port_num = portp->port_num;
1336 
1337 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1338 
1339 	/*
1340 	 * If this port is connected to the vswitch, then
1341 	 * potentially there could be ports that may be using
1342 	 * this port to transmit packets. To address this do
1343 	 * the following:
1344 	 *	- First set vgenp->vsw_portp to NULL, so that
1345 	 *	  its not used after that.
1346 	 *	- Then wait for the refcnt to go down to 0.
1347 	 *	- Now we can safely detach this port.
1348 	 */
1349 	if (vgenp->vsw_portp == portp) {
1350 		vgenp->vsw_portp = NULL;
1351 		while (vgenp->vsw_port_refcnt > 0) {
1352 			delay(drv_usectohz(vgen_tx_delay));
1353 		}
1354 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1355 	}
1356 
1357 	if (portp->vhp != NULL) {
1358 		vio_net_resource_unreg(portp->vhp);
1359 		portp->vhp = NULL;
1360 	}
1361 
1362 	vgen_vlan_destroy_hash(portp);
1363 
1364 	/* remove it from port list */
1365 	vgen_port_list_remove(portp);
1366 
1367 	/* detach channels from this port */
1368 	ldclp = &portp->ldclist;
1369 	WRITE_ENTER(&ldclp->rwlock);
1370 	while (ldclp->headp) {
1371 		vgen_ldc_detach(ldclp->headp);
1372 	}
1373 	RW_EXIT(&ldclp->rwlock);
1374 	rw_destroy(&ldclp->rwlock);
1375 
1376 	if (portp->num_ldcs != 0) {
1377 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1378 		portp->num_ldcs = 0;
1379 	}
1380 
1381 	mutex_destroy(&portp->lock);
1382 	KMEM_FREE(portp);
1383 
1384 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1385 }
1386 
1387 /* add a port to port list */
1388 static void
1389 vgen_port_list_insert(vgen_port_t *portp)
1390 {
1391 	vgen_portlist_t *plistp;
1392 	vgen_t *vgenp;
1393 
1394 	vgenp = portp->vgenp;
1395 	plistp = &(vgenp->vgenports);
1396 
1397 	if (plistp->headp == NULL) {
1398 		plistp->headp = portp;
1399 	} else {
1400 		plistp->tailp->nextp = portp;
1401 	}
1402 	plistp->tailp = portp;
1403 	portp->nextp = NULL;
1404 }
1405 
1406 /* remove a port from port list */
1407 static void
1408 vgen_port_list_remove(vgen_port_t *portp)
1409 {
1410 	vgen_port_t *prevp;
1411 	vgen_port_t *nextp;
1412 	vgen_portlist_t *plistp;
1413 	vgen_t *vgenp;
1414 
1415 	vgenp = portp->vgenp;
1416 
1417 	plistp = &(vgenp->vgenports);
1418 
1419 	if (plistp->headp == NULL)
1420 		return;
1421 
1422 	if (portp == plistp->headp) {
1423 		plistp->headp = portp->nextp;
1424 		if (portp == plistp->tailp)
1425 			plistp->tailp = plistp->headp;
1426 	} else {
1427 		for (prevp = plistp->headp;
1428 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1429 		    prevp = nextp)
1430 			;
1431 		if (nextp == portp) {
1432 			prevp->nextp = portp->nextp;
1433 		}
1434 		if (portp == plistp->tailp)
1435 			plistp->tailp = prevp;
1436 	}
1437 }
1438 
1439 /* lookup a port in the list based on port_num */
1440 static vgen_port_t *
1441 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1442 {
1443 	vgen_port_t *portp = NULL;
1444 
1445 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1446 		if (portp->port_num == port_num) {
1447 			break;
1448 		}
1449 	}
1450 
1451 	return (portp);
1452 }
1453 
1454 /* enable ports for transmit/receive */
1455 static void
1456 vgen_init_ports(vgen_t *vgenp)
1457 {
1458 	vgen_port_t	*portp;
1459 	vgen_portlist_t	*plistp;
1460 
1461 	plistp = &(vgenp->vgenports);
1462 	READ_ENTER(&plistp->rwlock);
1463 
1464 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1465 		vgen_port_init(portp);
1466 	}
1467 
1468 	RW_EXIT(&plistp->rwlock);
1469 }
1470 
1471 static void
1472 vgen_port_init(vgen_port_t *portp)
1473 {
1474 	/* Add the port to the specified vlans */
1475 	vgen_vlan_add_ids(portp);
1476 
1477 	/* Bring up the channels of this port */
1478 	vgen_init_ldcs(portp);
1479 }
1480 
1481 /* disable transmit/receive on ports */
1482 static void
1483 vgen_uninit_ports(vgen_t *vgenp)
1484 {
1485 	vgen_port_t	*portp;
1486 	vgen_portlist_t	*plistp;
1487 
1488 	plistp = &(vgenp->vgenports);
1489 	READ_ENTER(&plistp->rwlock);
1490 
1491 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1492 		vgen_port_uninit(portp);
1493 	}
1494 
1495 	RW_EXIT(&plistp->rwlock);
1496 }
1497 
1498 static void
1499 vgen_port_uninit(vgen_port_t *portp)
1500 {
1501 	vgen_uninit_ldcs(portp);
1502 
1503 	/* remove the port from vlans it has been assigned to */
1504 	vgen_vlan_remove_ids(portp);
1505 }
1506 
1507 /*
1508  * Scan the machine description for this instance of vnet
1509  * and read its properties. Called only from vgen_init().
1510  * Returns: 0 on success, 1 on failure.
1511  */
1512 static int
1513 vgen_read_mdprops(vgen_t *vgenp)
1514 {
1515 	vnet_t		*vnetp = vgenp->vnetp;
1516 	md_t		*mdp = NULL;
1517 	mde_cookie_t	rootnode;
1518 	mde_cookie_t	*listp = NULL;
1519 	uint64_t	cfgh;
1520 	char		*name;
1521 	int		rv = 1;
1522 	int		num_nodes = 0;
1523 	int		num_devs = 0;
1524 	int		listsz = 0;
1525 	int		i;
1526 
1527 	if ((mdp = md_get_handle()) == NULL) {
1528 		return (rv);
1529 	}
1530 
1531 	num_nodes = md_node_count(mdp);
1532 	ASSERT(num_nodes > 0);
1533 
1534 	listsz = num_nodes * sizeof (mde_cookie_t);
1535 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1536 
1537 	rootnode = md_root_node(mdp);
1538 
1539 	/* search for all "virtual_device" nodes */
1540 	num_devs = md_scan_dag(mdp, rootnode,
1541 	    md_find_name(mdp, vdev_propname),
1542 	    md_find_name(mdp, "fwd"), listp);
1543 	if (num_devs <= 0) {
1544 		goto vgen_readmd_exit;
1545 	}
1546 
1547 	/*
1548 	 * Now loop through the list of virtual-devices looking for
1549 	 * devices with name "network" and for each such device compare
1550 	 * its instance with what we have from the 'reg' property to
1551 	 * find the right node in MD and then read all its properties.
1552 	 */
1553 	for (i = 0; i < num_devs; i++) {
1554 
1555 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1556 			goto vgen_readmd_exit;
1557 		}
1558 
1559 		/* is this a "network" device? */
1560 		if (strcmp(name, vnet_propname) != 0)
1561 			continue;
1562 
1563 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1564 			goto vgen_readmd_exit;
1565 		}
1566 
1567 		/* is this the required instance of vnet? */
1568 		if (vgenp->regprop != cfgh)
1569 			continue;
1570 
1571 		/* now read all properties of this vnet instance */
1572 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1573 
1574 		/* read vlan id properties of this vnet instance */
1575 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1576 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1577 		    &vnetp->default_vlan_id);
1578 
1579 		rv = 0;
1580 		break;
1581 	}
1582 
1583 vgen_readmd_exit:
1584 
1585 	kmem_free(listp, listsz);
1586 	(void) md_fini_handle(mdp);
1587 	return (rv);
1588 }
1589 
1590 /*
1591  * Read vlan id properties of the given MD node.
1592  * Arguments:
1593  *   arg:          device argument(vnet device or a port)
1594  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1595  *   mdp:          machine description
1596  *   node:         md node cookie
1597  *
1598  * Returns:
1599  *   pvidp:        port-vlan-id of the node
1600  *   vidspp:       list of vlan-ids of the node
1601  *   nvidsp:       # of vlan-ids in the list
1602  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1603  */
1604 static void
1605 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1606 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1607 	uint16_t *default_idp)
1608 {
1609 	vgen_t		*vgenp;
1610 	vnet_t		*vnetp;
1611 	vgen_port_t	*portp;
1612 	char		*pvid_propname;
1613 	char		*vid_propname;
1614 	uint_t		nvids;
1615 	uint32_t	vids_size;
1616 	int		rv;
1617 	int		i;
1618 	uint64_t	*data;
1619 	uint64_t	val;
1620 	int		size;
1621 	int		inst;
1622 
1623 	if (type == VGEN_LOCAL) {
1624 
1625 		vgenp = (vgen_t *)arg;
1626 		vnetp = vgenp->vnetp;
1627 		pvid_propname = vgen_pvid_propname;
1628 		vid_propname = vgen_vid_propname;
1629 		inst = vnetp->instance;
1630 
1631 	} else if (type == VGEN_PEER) {
1632 
1633 		portp = (vgen_port_t *)arg;
1634 		vgenp = portp->vgenp;
1635 		vnetp = vgenp->vnetp;
1636 		pvid_propname = port_pvid_propname;
1637 		vid_propname = port_vid_propname;
1638 		inst = portp->port_num;
1639 
1640 	} else {
1641 		return;
1642 	}
1643 
1644 	if (type == VGEN_LOCAL && default_idp != NULL) {
1645 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1646 		if (rv != 0) {
1647 			DWARN(vgenp, NULL, "prop(%s) not found",
1648 			    vgen_dvid_propname);
1649 
1650 			*default_idp = vnet_default_vlan_id;
1651 		} else {
1652 			*default_idp = val & 0xFFF;
1653 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1654 			    inst, *default_idp);
1655 		}
1656 	}
1657 
1658 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1659 	if (rv != 0) {
1660 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1661 		*pvidp = vnet_default_vlan_id;
1662 	} else {
1663 
1664 		*pvidp = val & 0xFFF;
1665 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1666 		    pvid_propname, inst, *pvidp);
1667 	}
1668 
1669 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1670 	    &size);
1671 	if (rv != 0) {
1672 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1673 		size = 0;
1674 	} else {
1675 		size /= sizeof (uint64_t);
1676 	}
1677 	nvids = size;
1678 
1679 	if (nvids != 0) {
1680 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1681 		vids_size = sizeof (uint16_t) * nvids;
1682 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1683 		for (i = 0; i < nvids; i++) {
1684 			(*vidspp)[i] = data[i] & 0xFFFF;
1685 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1686 		}
1687 		DBG2(vgenp, NULL, "\n");
1688 	}
1689 
1690 	*nvidsp = nvids;
1691 }
1692 
1693 /*
1694  * Create a vlan id hash table for the given port.
1695  */
1696 static void
1697 vgen_vlan_create_hash(vgen_port_t *portp)
1698 {
1699 	char		hashname[MAXNAMELEN];
1700 
1701 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1702 	    portp->port_num);
1703 
1704 	portp->vlan_nchains = vgen_vlan_nchains;
1705 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1706 	    portp->vlan_nchains, mod_hash_null_valdtor);
1707 }
1708 
1709 /*
1710  * Destroy the vlan id hash table in the given port.
1711  */
1712 static void
1713 vgen_vlan_destroy_hash(vgen_port_t *portp)
1714 {
1715 	if (portp->vlan_hashp != NULL) {
1716 		mod_hash_destroy_hash(portp->vlan_hashp);
1717 		portp->vlan_hashp = NULL;
1718 		portp->vlan_nchains = 0;
1719 	}
1720 }
1721 
1722 /*
1723  * Add a port to the vlans specified in its port properites.
1724  */
1725 static void
1726 vgen_vlan_add_ids(vgen_port_t *portp)
1727 {
1728 	int		rv;
1729 	int		i;
1730 
1731 	rv = mod_hash_insert(portp->vlan_hashp,
1732 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1733 	    (mod_hash_val_t)B_TRUE);
1734 	ASSERT(rv == 0);
1735 
1736 	for (i = 0; i < portp->nvids; i++) {
1737 		rv = mod_hash_insert(portp->vlan_hashp,
1738 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1739 		    (mod_hash_val_t)B_TRUE);
1740 		ASSERT(rv == 0);
1741 	}
1742 }
1743 
1744 /*
1745  * Remove a port from the vlans it has been assigned to.
1746  */
1747 static void
1748 vgen_vlan_remove_ids(vgen_port_t *portp)
1749 {
1750 	int		rv;
1751 	int		i;
1752 	mod_hash_val_t	vp;
1753 
1754 	rv = mod_hash_remove(portp->vlan_hashp,
1755 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1756 	    (mod_hash_val_t *)&vp);
1757 	ASSERT(rv == 0);
1758 
1759 	for (i = 0; i < portp->nvids; i++) {
1760 		rv = mod_hash_remove(portp->vlan_hashp,
1761 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1762 		    (mod_hash_val_t *)&vp);
1763 		ASSERT(rv == 0);
1764 	}
1765 }
1766 
1767 /*
1768  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1769  * then the vlan-id is available in the tag; otherwise, its vlan id is
1770  * implicitly obtained from the port-vlan-id of the vnet device.
1771  * The vlan id determined is returned in vidp.
1772  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1773  */
1774 static boolean_t
1775 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1776 {
1777 	struct ether_vlan_header	*evhp;
1778 
1779 	/* If it's a tagged frame, get the vlan id from vlan header */
1780 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1781 
1782 		evhp = (struct ether_vlan_header *)ehp;
1783 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1784 		return (B_TRUE);
1785 	}
1786 
1787 	/* Untagged frame, vlan-id is the pvid of vnet device */
1788 	*vidp = vnetp->pvid;
1789 	return (B_FALSE);
1790 }
1791 
1792 /*
1793  * Find the given vlan id in the hash table.
1794  * Return: B_TRUE if the id is found; B_FALSE if not found.
1795  */
1796 static boolean_t
1797 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1798 {
1799 	int		rv;
1800 	mod_hash_val_t	vp;
1801 
1802 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1803 
1804 	if (rv != 0)
1805 		return (B_FALSE);
1806 
1807 	return (B_TRUE);
1808 }
1809 
1810 /*
1811  * This function reads "priority-ether-types" property from md. This property
1812  * is used to enable support for priority frames. Applications which need
1813  * guaranteed and timely delivery of certain high priority frames to/from
1814  * a vnet or vsw within ldoms, should configure this property by providing
1815  * the ether type(s) for which the priority facility is needed.
1816  * Normal data frames are delivered over a ldc channel using the descriptor
1817  * ring mechanism which is constrained by factors such as descriptor ring size,
1818  * the rate at which the ring is processed at the peer ldc end point, etc.
1819  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1820  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1821  * descriptor ring path and enables a more reliable and timely delivery of
1822  * frames to the peer.
1823  */
1824 static void
1825 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1826 {
1827 	int		rv;
1828 	uint16_t	*types;
1829 	uint64_t	*data;
1830 	int		size;
1831 	int		i;
1832 	size_t		mblk_sz;
1833 
1834 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1835 	    (uint8_t **)&data, &size);
1836 	if (rv != 0) {
1837 		/*
1838 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1839 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1840 		 */
1841 		if (vgen_pri_eth_type != 0) {
1842 			size = sizeof (vgen_pri_eth_type);
1843 			data = &vgen_pri_eth_type;
1844 		} else {
1845 			DBG2(vgenp, NULL,
1846 			    "prop(%s) not found", pri_types_propname);
1847 			size = 0;
1848 		}
1849 	}
1850 
1851 	if (size == 0) {
1852 		vgenp->pri_num_types = 0;
1853 		return;
1854 	}
1855 
1856 	/*
1857 	 * we have some priority-ether-types defined;
1858 	 * allocate a table of these types and also
1859 	 * allocate a pool of mblks to transmit these
1860 	 * priority packets.
1861 	 */
1862 	size /= sizeof (uint64_t);
1863 	vgenp->pri_num_types = size;
1864 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1865 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1866 		types[i] = data[i] & 0xFFFF;
1867 	}
1868 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1869 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1870 	    &vgenp->pri_tx_vmp);
1871 }
1872 
1873 /* register with MD event generator */
1874 static int
1875 vgen_mdeg_reg(vgen_t *vgenp)
1876 {
1877 	mdeg_prop_spec_t	*pspecp;
1878 	mdeg_node_spec_t	*parentp;
1879 	uint_t			templatesz;
1880 	int			rv;
1881 	mdeg_handle_t		dev_hdl = NULL;
1882 	mdeg_handle_t		port_hdl = NULL;
1883 
1884 	templatesz = sizeof (vgen_prop_template);
1885 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1886 	if (pspecp == NULL) {
1887 		return (DDI_FAILURE);
1888 	}
1889 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1890 	if (parentp == NULL) {
1891 		kmem_free(pspecp, templatesz);
1892 		return (DDI_FAILURE);
1893 	}
1894 
1895 	bcopy(vgen_prop_template, pspecp, templatesz);
1896 
1897 	/*
1898 	 * NOTE: The instance here refers to the value of "reg" property and
1899 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1900 	 */
1901 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1902 
1903 	parentp->namep = "virtual-device";
1904 	parentp->specp = pspecp;
1905 
1906 	/* save parentp in vgen_t */
1907 	vgenp->mdeg_parentp = parentp;
1908 
1909 	/*
1910 	 * Register an interest in 'virtual-device' nodes with a
1911 	 * 'name' property of 'network'
1912 	 */
1913 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1914 	if (rv != MDEG_SUCCESS) {
1915 		DERR(vgenp, NULL, "mdeg_register failed\n");
1916 		goto mdeg_reg_fail;
1917 	}
1918 
1919 	/* Register an interest in 'port' nodes */
1920 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1921 	    &port_hdl);
1922 	if (rv != MDEG_SUCCESS) {
1923 		DERR(vgenp, NULL, "mdeg_register failed\n");
1924 		goto mdeg_reg_fail;
1925 	}
1926 
1927 	/* save mdeg handle in vgen_t */
1928 	vgenp->mdeg_dev_hdl = dev_hdl;
1929 	vgenp->mdeg_port_hdl = port_hdl;
1930 
1931 	return (DDI_SUCCESS);
1932 
1933 mdeg_reg_fail:
1934 	if (dev_hdl != NULL) {
1935 		(void) mdeg_unregister(dev_hdl);
1936 	}
1937 	KMEM_FREE(parentp);
1938 	kmem_free(pspecp, templatesz);
1939 	vgenp->mdeg_parentp = NULL;
1940 	return (DDI_FAILURE);
1941 }
1942 
1943 /* unregister with MD event generator */
1944 static void
1945 vgen_mdeg_unreg(vgen_t *vgenp)
1946 {
1947 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1948 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
1949 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
1950 	KMEM_FREE(vgenp->mdeg_parentp);
1951 	vgenp->mdeg_parentp = NULL;
1952 	vgenp->mdeg_dev_hdl = NULL;
1953 	vgenp->mdeg_port_hdl = NULL;
1954 }
1955 
1956 /* mdeg callback function for the port node */
1957 static int
1958 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1959 {
1960 	int idx;
1961 	int vsw_idx = -1;
1962 	uint64_t val;
1963 	vgen_t *vgenp;
1964 
1965 	if ((resp == NULL) || (cb_argp == NULL)) {
1966 		return (MDEG_FAILURE);
1967 	}
1968 
1969 	vgenp = (vgen_t *)cb_argp;
1970 	DBG1(vgenp, NULL, "enter\n");
1971 
1972 	mutex_enter(&vgenp->lock);
1973 
1974 	DBG1(vgenp, NULL, "ports: removed(%x), "
1975 	"added(%x), updated(%x)\n", resp->removed.nelem,
1976 	    resp->added.nelem, resp->match_curr.nelem);
1977 
1978 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1979 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1980 		    resp->removed.mdep[idx]);
1981 	}
1982 
1983 	if (vgenp->vsw_portp == NULL) {
1984 		/*
1985 		 * find vsw_port and add it first, because other ports need
1986 		 * this when adding fdb entry (see vgen_port_init()).
1987 		 */
1988 		for (idx = 0; idx < resp->added.nelem; idx++) {
1989 			if (!(md_get_prop_val(resp->added.mdp,
1990 			    resp->added.mdep[idx], swport_propname, &val))) {
1991 				if (val == 0) {
1992 					/*
1993 					 * This port is connected to the
1994 					 * vsw on service domain.
1995 					 */
1996 					vsw_idx = idx;
1997 					if (vgen_add_port(vgenp,
1998 					    resp->added.mdp,
1999 					    resp->added.mdep[idx]) !=
2000 					    DDI_SUCCESS) {
2001 						cmn_err(CE_NOTE, "vnet%d Could "
2002 						    "not initialize virtual "
2003 						    "switch port.",
2004 						    vgenp->instance);
2005 						mutex_exit(&vgenp->lock);
2006 						return (MDEG_FAILURE);
2007 					}
2008 					break;
2009 				}
2010 			}
2011 		}
2012 		if (vsw_idx == -1) {
2013 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2014 			mutex_exit(&vgenp->lock);
2015 			return (MDEG_FAILURE);
2016 		}
2017 	}
2018 
2019 	for (idx = 0; idx < resp->added.nelem; idx++) {
2020 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2021 			continue;
2022 
2023 		/* If this port can't be added just skip it. */
2024 		(void) vgen_add_port(vgenp, resp->added.mdp,
2025 		    resp->added.mdep[idx]);
2026 	}
2027 
2028 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2029 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2030 		    resp->match_curr.mdep[idx],
2031 		    resp->match_prev.mdp,
2032 		    resp->match_prev.mdep[idx]);
2033 	}
2034 
2035 	mutex_exit(&vgenp->lock);
2036 	DBG1(vgenp, NULL, "exit\n");
2037 	return (MDEG_SUCCESS);
2038 }
2039 
2040 /* mdeg callback function for the vnet node */
2041 static int
2042 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2043 {
2044 	vgen_t		*vgenp;
2045 	vnet_t		*vnetp;
2046 	md_t		*mdp;
2047 	mde_cookie_t	node;
2048 	uint64_t	inst;
2049 	char		*node_name = NULL;
2050 
2051 	if ((resp == NULL) || (cb_argp == NULL)) {
2052 		return (MDEG_FAILURE);
2053 	}
2054 
2055 	vgenp = (vgen_t *)cb_argp;
2056 	vnetp = vgenp->vnetp;
2057 
2058 	DBG1(vgenp, NULL, "%s: added %d : removed %d : curr matched %d"
2059 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2060 	    resp->match_curr.nelem, resp->match_prev.nelem);
2061 
2062 	mutex_enter(&vgenp->lock);
2063 
2064 	/*
2065 	 * We get an initial callback for this node as 'added' after
2066 	 * registering with mdeg. Note that we would have already gathered
2067 	 * information about this vnet node by walking MD earlier during attach
2068 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2069 	 * of this node might have changed when we get this initial 'added'
2070 	 * callback. We handle this as if an update occured and invoke the same
2071 	 * function which handles updates to the properties of this vnet-node
2072 	 * if any. A non-zero 'match' value indicates that the MD has been
2073 	 * updated and that a 'network' node is present which may or may not
2074 	 * have been updated. It is up to the clients to examine their own
2075 	 * nodes and determine if they have changed.
2076 	 */
2077 	if (resp->added.nelem != 0) {
2078 
2079 		if (resp->added.nelem != 1) {
2080 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2081 			    "invalid: %d\n", vnetp->instance,
2082 			    resp->added.nelem);
2083 			goto vgen_mdeg_cb_err;
2084 		}
2085 
2086 		mdp = resp->added.mdp;
2087 		node = resp->added.mdep[0];
2088 
2089 	} else if (resp->match_curr.nelem != 0) {
2090 
2091 		if (resp->match_curr.nelem != 1) {
2092 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2093 			    "invalid: %d\n", vnetp->instance,
2094 			    resp->match_curr.nelem);
2095 			goto vgen_mdeg_cb_err;
2096 		}
2097 
2098 		mdp = resp->match_curr.mdp;
2099 		node = resp->match_curr.mdep[0];
2100 
2101 	} else {
2102 		goto vgen_mdeg_cb_err;
2103 	}
2104 
2105 	/* Validate name and instance */
2106 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2107 		DERR(vgenp, NULL, "unable to get node name\n");
2108 		goto vgen_mdeg_cb_err;
2109 	}
2110 
2111 	/* is this a virtual-network device? */
2112 	if (strcmp(node_name, vnet_propname) != 0) {
2113 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2114 		goto vgen_mdeg_cb_err;
2115 	}
2116 
2117 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2118 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2119 		goto vgen_mdeg_cb_err;
2120 	}
2121 
2122 	/* is this the right instance of vnet? */
2123 	if (inst != vgenp->regprop) {
2124 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2125 		goto vgen_mdeg_cb_err;
2126 	}
2127 
2128 	vgen_update_md_prop(vgenp, mdp, node);
2129 
2130 	mutex_exit(&vgenp->lock);
2131 	return (MDEG_SUCCESS);
2132 
2133 vgen_mdeg_cb_err:
2134 	mutex_exit(&vgenp->lock);
2135 	return (MDEG_FAILURE);
2136 }
2137 
2138 /*
2139  * Check to see if the relevant properties in the specified node have
2140  * changed, and if so take the appropriate action.
2141  */
2142 static void
2143 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2144 {
2145 	uint16_t	pvid;
2146 	uint16_t	*vids;
2147 	uint16_t	nvids;
2148 	vnet_t		*vnetp = vgenp->vnetp;
2149 	boolean_t	updated_vlans = B_FALSE;
2150 
2151 	/* Read the vlan ids */
2152 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2153 	    &nvids, NULL);
2154 
2155 	/* Determine if there are any vlan id updates */
2156 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2157 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2158 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2159 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2160 		updated_vlans = B_TRUE;
2161 	}
2162 
2163 	if (updated_vlans == B_FALSE) {
2164 		if (nvids != 0) {
2165 			kmem_free(vids, sizeof (uint16_t) * nvids);
2166 		}
2167 		return;
2168 	}
2169 
2170 	/* save the new vlan ids */
2171 	vnetp->pvid = pvid;
2172 	if (vnetp->nvids != 0) {
2173 		kmem_free(vnetp->vids, sizeof (uint16_t) * vnetp->nvids);
2174 		vnetp->nvids = 0;
2175 	}
2176 	if (nvids != 0) {
2177 		vnetp->nvids = nvids;
2178 		vnetp->vids = vids;
2179 	}
2180 
2181 	/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2182 	vgen_reset_vlan_unaware_ports(vgenp);
2183 }
2184 
2185 /* add a new port to the device */
2186 static int
2187 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2188 {
2189 	vgen_port_t	*portp;
2190 	int		rv;
2191 
2192 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2193 
2194 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2195 	if (rv != DDI_SUCCESS) {
2196 		KMEM_FREE(portp);
2197 		return (DDI_FAILURE);
2198 	}
2199 
2200 	rv = vgen_port_attach(portp);
2201 	if (rv != DDI_SUCCESS) {
2202 		return (DDI_FAILURE);
2203 	}
2204 
2205 	return (DDI_SUCCESS);
2206 }
2207 
2208 /* read properties of the port from its md node */
2209 static int
2210 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2211 	mde_cookie_t mdex)
2212 {
2213 	uint64_t		port_num;
2214 	uint64_t		*ldc_ids;
2215 	uint64_t		macaddr;
2216 	uint64_t		val;
2217 	int			num_ldcs;
2218 	int			i;
2219 	int			addrsz;
2220 	int			num_nodes = 0;
2221 	int			listsz = 0;
2222 	mde_cookie_t		*listp = NULL;
2223 	uint8_t			*addrp;
2224 	struct ether_addr	ea;
2225 
2226 	/* read "id" property to get the port number */
2227 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2228 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2229 		return (DDI_FAILURE);
2230 	}
2231 
2232 	/*
2233 	 * Find the channel endpoint node(s) under this port node.
2234 	 */
2235 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2236 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2237 		    num_nodes);
2238 		return (DDI_FAILURE);
2239 	}
2240 
2241 	/* allocate space for node list */
2242 	listsz = num_nodes * sizeof (mde_cookie_t);
2243 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2244 	if (listp == NULL)
2245 		return (DDI_FAILURE);
2246 
2247 	num_ldcs = md_scan_dag(mdp, mdex,
2248 	    md_find_name(mdp, channel_propname),
2249 	    md_find_name(mdp, "fwd"), listp);
2250 
2251 	if (num_ldcs <= 0) {
2252 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2253 		kmem_free(listp, listsz);
2254 		return (DDI_FAILURE);
2255 	}
2256 
2257 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2258 
2259 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2260 	if (ldc_ids == NULL) {
2261 		kmem_free(listp, listsz);
2262 		return (DDI_FAILURE);
2263 	}
2264 
2265 	for (i = 0; i < num_ldcs; i++) {
2266 		/* read channel ids */
2267 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2268 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2269 			    id_propname);
2270 			kmem_free(listp, listsz);
2271 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2272 			return (DDI_FAILURE);
2273 		}
2274 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2275 	}
2276 
2277 	kmem_free(listp, listsz);
2278 
2279 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2280 	    &addrsz)) {
2281 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2282 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2283 		return (DDI_FAILURE);
2284 	}
2285 
2286 	if (addrsz < ETHERADDRL) {
2287 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2288 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2289 		return (DDI_FAILURE);
2290 	}
2291 
2292 	macaddr = *((uint64_t *)addrp);
2293 
2294 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2295 
2296 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2297 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2298 		macaddr >>= 8;
2299 	}
2300 
2301 	if (vgenp->vsw_portp == NULL) {
2302 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2303 			if (val == 0) {
2304 				(void) atomic_swap_32(
2305 				    &vgenp->vsw_port_refcnt, 0);
2306 				/* This port is connected to the vsw */
2307 				vgenp->vsw_portp = portp;
2308 			}
2309 		}
2310 	}
2311 
2312 	/* now update all properties into the port */
2313 	portp->vgenp = vgenp;
2314 	portp->port_num = port_num;
2315 	ether_copy(&ea, &portp->macaddr);
2316 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2317 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2318 	portp->num_ldcs = num_ldcs;
2319 
2320 	/* read vlan id properties of this port node */
2321 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2322 	    &portp->vids, &portp->nvids, NULL);
2323 
2324 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2325 
2326 	return (DDI_SUCCESS);
2327 }
2328 
2329 /* remove a port from the device */
2330 static int
2331 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2332 {
2333 	uint64_t	port_num;
2334 	vgen_port_t	*portp;
2335 	vgen_portlist_t	*plistp;
2336 
2337 	/* read "id" property to get the port number */
2338 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2339 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2340 		return (DDI_FAILURE);
2341 	}
2342 
2343 	plistp = &(vgenp->vgenports);
2344 
2345 	WRITE_ENTER(&plistp->rwlock);
2346 	portp = vgen_port_lookup(plistp, (int)port_num);
2347 	if (portp == NULL) {
2348 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2349 		RW_EXIT(&plistp->rwlock);
2350 		return (DDI_FAILURE);
2351 	}
2352 
2353 	vgen_port_detach_mdeg(portp);
2354 	RW_EXIT(&plistp->rwlock);
2355 
2356 	return (DDI_SUCCESS);
2357 }
2358 
2359 /* attach a port to the device based on mdeg data */
2360 static int
2361 vgen_port_attach(vgen_port_t *portp)
2362 {
2363 	int			i;
2364 	vgen_portlist_t		*plistp;
2365 	vgen_t			*vgenp;
2366 	uint64_t		*ldcids;
2367 	uint32_t		num_ldcs;
2368 	mac_register_t		*macp;
2369 	vio_net_res_type_t	type;
2370 	int			rv;
2371 
2372 	ASSERT(portp != NULL);
2373 
2374 	vgenp = portp->vgenp;
2375 	ldcids = portp->ldc_ids;
2376 	num_ldcs = portp->num_ldcs;
2377 
2378 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2379 
2380 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2381 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2382 	portp->ldclist.headp = NULL;
2383 
2384 	for (i = 0; i < num_ldcs; i++) {
2385 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2386 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2387 			vgen_port_detach(portp);
2388 			return (DDI_FAILURE);
2389 		}
2390 	}
2391 
2392 	/* create vlan id hash table */
2393 	vgen_vlan_create_hash(portp);
2394 
2395 	if (portp == vgenp->vsw_portp) {
2396 		/* This port is connected to the switch port */
2397 		vgenp->vsw_portp = portp;
2398 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2399 		type = VIO_NET_RES_LDC_SERVICE;
2400 	} else {
2401 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2402 		type = VIO_NET_RES_LDC_GUEST;
2403 	}
2404 
2405 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2406 		vgen_port_detach(portp);
2407 		return (DDI_FAILURE);
2408 	}
2409 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2410 	macp->m_driver = portp;
2411 	macp->m_dip = vgenp->vnetdip;
2412 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2413 	macp->m_callbacks = &vgen_m_callbacks;
2414 	macp->m_min_sdu = 0;
2415 	macp->m_max_sdu = ETHERMTU;
2416 
2417 	mutex_enter(&portp->lock);
2418 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2419 	    portp->macaddr, &portp->vhp, &portp->vcb);
2420 	mutex_exit(&portp->lock);
2421 	mac_free(macp);
2422 
2423 	if (rv == 0) {
2424 		/* link it into the list of ports */
2425 		plistp = &(vgenp->vgenports);
2426 		WRITE_ENTER(&plistp->rwlock);
2427 		vgen_port_list_insert(portp);
2428 		RW_EXIT(&plistp->rwlock);
2429 	} else {
2430 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2431 		    portp);
2432 		vgen_port_detach(portp);
2433 	}
2434 
2435 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2436 	return (DDI_SUCCESS);
2437 }
2438 
2439 /* detach a port from the device based on mdeg data */
2440 static void
2441 vgen_port_detach_mdeg(vgen_port_t *portp)
2442 {
2443 	vgen_t *vgenp = portp->vgenp;
2444 
2445 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2446 
2447 	mutex_enter(&portp->lock);
2448 
2449 	/* stop the port if needed */
2450 	if (portp->flags & VGEN_STARTED) {
2451 		vgen_port_uninit(portp);
2452 	}
2453 
2454 	mutex_exit(&portp->lock);
2455 	vgen_port_detach(portp);
2456 
2457 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2458 }
2459 
2460 static int
2461 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2462 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2463 {
2464 	uint64_t	cport_num;
2465 	uint64_t	pport_num;
2466 	vgen_portlist_t	*plistp;
2467 	vgen_port_t	*portp;
2468 	boolean_t	updated_vlans = B_FALSE;
2469 	uint16_t	pvid;
2470 	uint16_t	*vids;
2471 	uint16_t	nvids;
2472 
2473 	/*
2474 	 * For now, we get port updates only if vlan ids changed.
2475 	 * We read the port num and do some sanity check.
2476 	 */
2477 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2478 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2479 		return (DDI_FAILURE);
2480 	}
2481 
2482 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2483 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2484 		return (DDI_FAILURE);
2485 	}
2486 	if (cport_num != pport_num)
2487 		return (DDI_FAILURE);
2488 
2489 	plistp = &(vgenp->vgenports);
2490 
2491 	READ_ENTER(&plistp->rwlock);
2492 
2493 	portp = vgen_port_lookup(plistp, (int)cport_num);
2494 	if (portp == NULL) {
2495 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2496 		RW_EXIT(&plistp->rwlock);
2497 		return (DDI_FAILURE);
2498 	}
2499 
2500 	/* Read the vlan ids */
2501 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2502 	    &nvids, NULL);
2503 
2504 	/* Determine if there are any vlan id updates */
2505 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2506 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2507 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2508 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2509 		updated_vlans = B_TRUE;
2510 	}
2511 
2512 	if (updated_vlans == B_FALSE) {
2513 		RW_EXIT(&plistp->rwlock);
2514 		return (DDI_FAILURE);
2515 	}
2516 
2517 	/* remove the port from vlans it has been assigned to */
2518 	vgen_vlan_remove_ids(portp);
2519 
2520 	/* save the new vlan ids */
2521 	portp->pvid = pvid;
2522 	if (portp->nvids != 0) {
2523 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2524 		portp->nvids = 0;
2525 	}
2526 	if (nvids != 0) {
2527 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2528 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2529 		portp->nvids = nvids;
2530 		kmem_free(vids, sizeof (uint16_t) * nvids);
2531 	}
2532 
2533 	/* add port to the new vlans */
2534 	vgen_vlan_add_ids(portp);
2535 
2536 	/* reset the port if it is vlan unaware (ver < 1.3) */
2537 	vgen_vlan_unaware_port_reset(portp);
2538 
2539 	RW_EXIT(&plistp->rwlock);
2540 
2541 	return (DDI_SUCCESS);
2542 }
2543 
2544 static uint64_t
2545 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2546 {
2547 	vgen_ldclist_t	*ldclp;
2548 	vgen_ldc_t *ldcp;
2549 	uint64_t	val;
2550 
2551 	val = 0;
2552 	ldclp = &portp->ldclist;
2553 
2554 	READ_ENTER(&ldclp->rwlock);
2555 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2556 		val += vgen_ldc_stat(ldcp, stat);
2557 	}
2558 	RW_EXIT(&ldclp->rwlock);
2559 
2560 	return (val);
2561 }
2562 
2563 /* attach the channel corresponding to the given ldc_id to the port */
2564 static int
2565 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2566 {
2567 	vgen_t 		*vgenp;
2568 	vgen_ldclist_t	*ldclp;
2569 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2570 	ldc_attr_t 	attr;
2571 	int 		status;
2572 	ldc_status_t	istatus;
2573 	char		kname[MAXNAMELEN];
2574 	int		instance;
2575 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2576 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2577 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2578 		AST_create_rxmblks = 0x20,
2579 		AST_create_rcv_thread = 0x40} attach_state;
2580 
2581 	attach_state = AST_init;
2582 	vgenp = portp->vgenp;
2583 	ldclp = &portp->ldclist;
2584 
2585 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2586 	if (ldcp == NULL) {
2587 		goto ldc_attach_failed;
2588 	}
2589 	ldcp->ldc_id = ldc_id;
2590 	ldcp->portp = portp;
2591 
2592 	attach_state |= AST_ldc_alloc;
2593 
2594 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2595 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2596 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2597 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2598 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2599 
2600 	attach_state |= AST_mutex_init;
2601 
2602 	attr.devclass = LDC_DEV_NT;
2603 	attr.instance = vgenp->instance;
2604 	attr.mode = LDC_MODE_UNRELIABLE;
2605 	attr.mtu = vnet_ldc_mtu;
2606 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2607 	if (status != 0) {
2608 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2609 		goto ldc_attach_failed;
2610 	}
2611 	attach_state |= AST_ldc_init;
2612 
2613 	if (vgen_rcv_thread_enabled) {
2614 		ldcp->rcv_thr_flags = 0;
2615 
2616 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2617 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2618 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2619 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2620 
2621 		attach_state |= AST_create_rcv_thread;
2622 		if (ldcp->rcv_thread == NULL) {
2623 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2624 			goto ldc_attach_failed;
2625 		}
2626 	}
2627 
2628 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2629 	if (status != 0) {
2630 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2631 		    status);
2632 		goto ldc_attach_failed;
2633 	}
2634 	/*
2635 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2636 	 * data msgs, including raw data msgs used to recv priority frames.
2637 	 */
2638 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2639 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2640 	attach_state |= AST_ldc_reg_cb;
2641 
2642 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2643 	ASSERT(istatus == LDC_INIT);
2644 	ldcp->ldc_status = istatus;
2645 
2646 	/* allocate transmit resources */
2647 	status = vgen_alloc_tx_ring(ldcp);
2648 	if (status != 0) {
2649 		goto ldc_attach_failed;
2650 	}
2651 	attach_state |= AST_alloc_tx_ring;
2652 
2653 	/* allocate receive resources */
2654 	status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2655 	    vgen_rbufsz1, vgen_rbufsz2, vgen_rbufsz3,
2656 	    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2657 	if (status != 0) {
2658 		goto ldc_attach_failed;
2659 	}
2660 	attach_state |= AST_create_rxmblks;
2661 
2662 	/* Setup kstats for the channel */
2663 	instance = vgenp->instance;
2664 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2665 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2666 	if (ldcp->ksp == NULL) {
2667 		goto ldc_attach_failed;
2668 	}
2669 
2670 	/* initialize vgen_versions supported */
2671 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2672 	vgen_reset_vnet_proto_ops(ldcp);
2673 
2674 	/* link it into the list of channels for this port */
2675 	WRITE_ENTER(&ldclp->rwlock);
2676 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2677 	ldcp->nextp = *prev_ldcp;
2678 	*prev_ldcp = ldcp;
2679 	RW_EXIT(&ldclp->rwlock);
2680 
2681 	ldcp->flags |= CHANNEL_ATTACHED;
2682 	return (DDI_SUCCESS);
2683 
2684 ldc_attach_failed:
2685 	if (attach_state & AST_ldc_reg_cb) {
2686 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2687 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2688 	}
2689 	if (attach_state & AST_create_rcv_thread) {
2690 		if (ldcp->rcv_thread != NULL) {
2691 			vgen_stop_rcv_thread(ldcp);
2692 		}
2693 		mutex_destroy(&ldcp->rcv_thr_lock);
2694 		cv_destroy(&ldcp->rcv_thr_cv);
2695 	}
2696 	if (attach_state & AST_create_rxmblks) {
2697 		vio_mblk_pool_t *fvmp = NULL;
2698 
2699 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
2700 		ASSERT(fvmp == NULL);
2701 	}
2702 	if (attach_state & AST_alloc_tx_ring) {
2703 		vgen_free_tx_ring(ldcp);
2704 	}
2705 	if (attach_state & AST_ldc_init) {
2706 		(void) ldc_fini(ldcp->ldc_handle);
2707 	}
2708 	if (attach_state & AST_mutex_init) {
2709 		mutex_destroy(&ldcp->tclock);
2710 		mutex_destroy(&ldcp->txlock);
2711 		mutex_destroy(&ldcp->cblock);
2712 		mutex_destroy(&ldcp->wrlock);
2713 		mutex_destroy(&ldcp->rxlock);
2714 	}
2715 	if (attach_state & AST_ldc_alloc) {
2716 		KMEM_FREE(ldcp);
2717 	}
2718 	return (DDI_FAILURE);
2719 }
2720 
2721 /* detach a channel from the port */
2722 static void
2723 vgen_ldc_detach(vgen_ldc_t *ldcp)
2724 {
2725 	vgen_port_t	*portp;
2726 	vgen_t 		*vgenp;
2727 	vgen_ldc_t 	*pldcp;
2728 	vgen_ldc_t	**prev_ldcp;
2729 	vgen_ldclist_t	*ldclp;
2730 
2731 	portp = ldcp->portp;
2732 	vgenp = portp->vgenp;
2733 	ldclp = &portp->ldclist;
2734 
2735 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
2736 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
2737 		if (pldcp == ldcp) {
2738 			break;
2739 		}
2740 	}
2741 
2742 	if (pldcp == NULL) {
2743 		/* invalid ldcp? */
2744 		return;
2745 	}
2746 
2747 	if (ldcp->ldc_status != LDC_INIT) {
2748 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2749 	}
2750 
2751 	if (ldcp->flags & CHANNEL_ATTACHED) {
2752 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2753 
2754 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2755 		if (ldcp->rcv_thread != NULL) {
2756 			/* First stop the receive thread */
2757 			vgen_stop_rcv_thread(ldcp);
2758 			mutex_destroy(&ldcp->rcv_thr_lock);
2759 			cv_destroy(&ldcp->rcv_thr_cv);
2760 		}
2761 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2762 
2763 		vgen_destroy_kstats(ldcp->ksp);
2764 		ldcp->ksp = NULL;
2765 
2766 		/*
2767 		 * if we cannot reclaim all mblks, put this
2768 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
2769 		 * device gets detached (see vgen_uninit()).
2770 		 */
2771 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
2772 
2773 		/* free transmit resources */
2774 		vgen_free_tx_ring(ldcp);
2775 
2776 		(void) ldc_fini(ldcp->ldc_handle);
2777 		mutex_destroy(&ldcp->tclock);
2778 		mutex_destroy(&ldcp->txlock);
2779 		mutex_destroy(&ldcp->cblock);
2780 		mutex_destroy(&ldcp->wrlock);
2781 		mutex_destroy(&ldcp->rxlock);
2782 
2783 		/* unlink it from the list */
2784 		*prev_ldcp = ldcp->nextp;
2785 		KMEM_FREE(ldcp);
2786 	}
2787 }
2788 
2789 /*
2790  * This function allocates transmit resources for the channel.
2791  * The resources consist of a transmit descriptor ring and an associated
2792  * transmit buffer ring.
2793  */
2794 static int
2795 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
2796 {
2797 	void *tbufp;
2798 	ldc_mem_info_t minfo;
2799 	uint32_t txdsize;
2800 	uint32_t tbufsize;
2801 	int status;
2802 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2803 
2804 	ldcp->num_txds = vnet_ntxds;
2805 	txdsize = sizeof (vnet_public_desc_t);
2806 	tbufsize = sizeof (vgen_private_desc_t);
2807 
2808 	/* allocate transmit buffer ring */
2809 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
2810 	if (tbufp == NULL) {
2811 		return (DDI_FAILURE);
2812 	}
2813 
2814 	/* create transmit descriptor ring */
2815 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
2816 	    &ldcp->tx_dhandle);
2817 	if (status) {
2818 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
2819 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2820 		return (DDI_FAILURE);
2821 	}
2822 
2823 	/* get the addr of descripror ring */
2824 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
2825 	if (status) {
2826 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
2827 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2828 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
2829 		ldcp->tbufp = NULL;
2830 		return (DDI_FAILURE);
2831 	}
2832 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
2833 	ldcp->tbufp = tbufp;
2834 
2835 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
2836 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
2837 
2838 	return (DDI_SUCCESS);
2839 }
2840 
2841 /* Free transmit resources for the channel */
2842 static void
2843 vgen_free_tx_ring(vgen_ldc_t *ldcp)
2844 {
2845 	int tbufsize = sizeof (vgen_private_desc_t);
2846 
2847 	/* free transmit descriptor ring */
2848 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
2849 
2850 	/* free transmit buffer ring */
2851 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
2852 	ldcp->txdp = ldcp->txdendp = NULL;
2853 	ldcp->tbufp = ldcp->tbufendp = NULL;
2854 }
2855 
2856 /* enable transmit/receive on the channels for the port */
2857 static void
2858 vgen_init_ldcs(vgen_port_t *portp)
2859 {
2860 	vgen_ldclist_t	*ldclp = &portp->ldclist;
2861 	vgen_ldc_t	*ldcp;
2862 
2863 	READ_ENTER(&ldclp->rwlock);
2864 	ldcp =  ldclp->headp;
2865 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
2866 		(void) vgen_ldc_init(ldcp);
2867 	}
2868 	RW_EXIT(&ldclp->rwlock);
2869 }
2870 
2871 /* stop transmit/receive on the channels for the port */
2872 static void
2873 vgen_uninit_ldcs(vgen_port_t *portp)
2874 {
2875 	vgen_ldclist_t	*ldclp = &portp->ldclist;
2876 	vgen_ldc_t	*ldcp;
2877 
2878 	READ_ENTER(&ldclp->rwlock);
2879 	ldcp =  ldclp->headp;
2880 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
2881 		vgen_ldc_uninit(ldcp);
2882 	}
2883 	RW_EXIT(&ldclp->rwlock);
2884 }
2885 
2886 /* enable transmit/receive on the channel */
2887 static int
2888 vgen_ldc_init(vgen_ldc_t *ldcp)
2889 {
2890 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2891 	ldc_status_t	istatus;
2892 	int		rv;
2893 	uint32_t	retries = 0;
2894 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
2895 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
2896 	init_state = ST_init;
2897 
2898 	DBG1(vgenp, ldcp, "enter\n");
2899 	LDC_LOCK(ldcp);
2900 
2901 	rv = ldc_open(ldcp->ldc_handle);
2902 	if (rv != 0) {
2903 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2904 		goto ldcinit_failed;
2905 	}
2906 	init_state |= ST_ldc_open;
2907 
2908 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2909 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
2910 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2911 		goto ldcinit_failed;
2912 	}
2913 	ldcp->ldc_status = istatus;
2914 
2915 	rv = vgen_init_tbufs(ldcp);
2916 	if (rv != 0) {
2917 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
2918 		goto ldcinit_failed;
2919 	}
2920 	init_state |= ST_init_tbufs;
2921 
2922 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2923 	if (rv != 0) {
2924 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2925 		goto ldcinit_failed;
2926 	}
2927 
2928 	init_state |= ST_cb_enable;
2929 
2930 	do {
2931 		rv = ldc_up(ldcp->ldc_handle);
2932 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
2933 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2934 			drv_usecwait(VGEN_LDC_UP_DELAY);
2935 		}
2936 		if (retries++ >= vgen_ldcup_retries)
2937 			break;
2938 	} while (rv == EWOULDBLOCK);
2939 
2940 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2941 	if (istatus == LDC_UP) {
2942 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2943 	}
2944 
2945 	ldcp->ldc_status = istatus;
2946 
2947 	/* initialize transmit watchdog timeout */
2948 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
2949 	    drv_usectohz(vnet_ldcwd_interval * 1000));
2950 
2951 	ldcp->hphase = -1;
2952 	ldcp->flags |= CHANNEL_STARTED;
2953 
2954 	/* if channel is already UP - start handshake */
2955 	if (istatus == LDC_UP) {
2956 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2957 		if (ldcp->portp != vgenp->vsw_portp) {
2958 			/*
2959 			 * As the channel is up, use this port from now on.
2960 			 */
2961 			(void) atomic_swap_32(
2962 			    &ldcp->portp->use_vsw_port, B_FALSE);
2963 		}
2964 
2965 		/* Initialize local session id */
2966 		ldcp->local_sid = ddi_get_lbolt();
2967 
2968 		/* clear peer session id */
2969 		ldcp->peer_sid = 0;
2970 		ldcp->hretries = 0;
2971 
2972 		/* Initiate Handshake process with peer ldc endpoint */
2973 		vgen_reset_hphase(ldcp);
2974 
2975 		mutex_exit(&ldcp->tclock);
2976 		mutex_exit(&ldcp->txlock);
2977 		mutex_exit(&ldcp->wrlock);
2978 		mutex_exit(&ldcp->rxlock);
2979 		vgen_handshake(vh_nextphase(ldcp));
2980 		mutex_exit(&ldcp->cblock);
2981 	} else {
2982 		LDC_UNLOCK(ldcp);
2983 	}
2984 
2985 	return (DDI_SUCCESS);
2986 
2987 ldcinit_failed:
2988 	if (init_state & ST_cb_enable) {
2989 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2990 	}
2991 	if (init_state & ST_init_tbufs) {
2992 		vgen_uninit_tbufs(ldcp);
2993 	}
2994 	if (init_state & ST_ldc_open) {
2995 		(void) ldc_close(ldcp->ldc_handle);
2996 	}
2997 	LDC_UNLOCK(ldcp);
2998 	DBG1(vgenp, ldcp, "exit\n");
2999 	return (DDI_FAILURE);
3000 }
3001 
3002 /* stop transmit/receive on the channel */
3003 static void
3004 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3005 {
3006 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3007 	int	rv;
3008 
3009 	DBG1(vgenp, ldcp, "enter\n");
3010 	LDC_LOCK(ldcp);
3011 
3012 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3013 		LDC_UNLOCK(ldcp);
3014 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3015 		return;
3016 	}
3017 
3018 	/* disable further callbacks */
3019 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3020 	if (rv != 0) {
3021 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3022 	}
3023 
3024 	if (vgenp->vsw_portp == ldcp->portp) {
3025 		vio_net_report_err_t rep_err =
3026 		    ldcp->portp->vcb.vio_net_report_err;
3027 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3028 	}
3029 
3030 	/*
3031 	 * clear handshake done bit and wait for pending tx and cb to finish.
3032 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3033 	 */
3034 	ldcp->hphase &= ~(VH_DONE);
3035 	LDC_UNLOCK(ldcp);
3036 
3037 	/* cancel handshake watchdog timeout */
3038 	if (ldcp->htid) {
3039 		(void) untimeout(ldcp->htid);
3040 		ldcp->htid = 0;
3041 	}
3042 
3043 	/* cancel transmit watchdog timeout */
3044 	if (ldcp->wd_tid) {
3045 		(void) untimeout(ldcp->wd_tid);
3046 		ldcp->wd_tid = 0;
3047 	}
3048 
3049 	drv_usecwait(1000);
3050 
3051 	/* acquire locks again; any pending transmits and callbacks are done */
3052 	LDC_LOCK(ldcp);
3053 
3054 	vgen_reset_hphase(ldcp);
3055 
3056 	vgen_uninit_tbufs(ldcp);
3057 
3058 	rv = ldc_close(ldcp->ldc_handle);
3059 	if (rv != 0) {
3060 		DWARN(vgenp, ldcp, "ldc_close err\n");
3061 	}
3062 	ldcp->ldc_status = LDC_INIT;
3063 	ldcp->flags &= ~(CHANNEL_STARTED);
3064 
3065 	LDC_UNLOCK(ldcp);
3066 
3067 	DBG1(vgenp, ldcp, "exit\n");
3068 }
3069 
3070 /* Initialize the transmit buffer ring for the channel */
3071 static int
3072 vgen_init_tbufs(vgen_ldc_t *ldcp)
3073 {
3074 	vgen_private_desc_t	*tbufp;
3075 	vnet_public_desc_t	*txdp;
3076 	vio_dring_entry_hdr_t		*hdrp;
3077 	int 			i;
3078 	int 			rv;
3079 	caddr_t			datap = NULL;
3080 	int			ci;
3081 	uint32_t		ncookies;
3082 	size_t			data_sz;
3083 	vgen_t			*vgenp;
3084 
3085 	vgenp = LDC_TO_VGEN(ldcp);
3086 
3087 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3088 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3089 
3090 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3091 	data_sz = VNET_ROUNDUP_2K(data_sz);
3092 	ldcp->tx_data_sz = data_sz * ldcp->num_txds;
3093 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3094 	ldcp->tx_datap = datap;
3095 
3096 	/*
3097 	 * for each private descriptor, allocate a ldc mem_handle which is
3098 	 * required to map the data during transmit, set the flags
3099 	 * to free (available for use by transmit routine).
3100 	 */
3101 
3102 	for (i = 0; i < ldcp->num_txds; i++) {
3103 
3104 		tbufp = &(ldcp->tbufp[i]);
3105 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3106 		    &(tbufp->memhandle));
3107 		if (rv) {
3108 			tbufp->memhandle = 0;
3109 			goto init_tbufs_failed;
3110 		}
3111 
3112 		/*
3113 		 * bind ldc memhandle to the corresponding transmit buffer.
3114 		 */
3115 		ci = ncookies = 0;
3116 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3117 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3118 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3119 		if (rv != 0) {
3120 			goto init_tbufs_failed;
3121 		}
3122 
3123 		/*
3124 		 * successful in binding the handle to tx data buffer.
3125 		 * set datap in the private descr to this buffer.
3126 		 */
3127 		tbufp->datap = datap;
3128 
3129 		if ((ncookies == 0) ||
3130 		    (ncookies > MAX_COOKIES)) {
3131 			goto init_tbufs_failed;
3132 		}
3133 
3134 		for (ci = 1; ci < ncookies; ci++) {
3135 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3136 			    &(tbufp->memcookie[ci]));
3137 			if (rv != 0) {
3138 				goto init_tbufs_failed;
3139 			}
3140 		}
3141 
3142 		tbufp->ncookies = ncookies;
3143 		datap += data_sz;
3144 
3145 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3146 		txdp = &(ldcp->txdp[i]);
3147 		hdrp = &txdp->hdr;
3148 		hdrp->dstate = VIO_DESC_FREE;
3149 		hdrp->ack = B_FALSE;
3150 		tbufp->descp = txdp;
3151 
3152 	}
3153 
3154 	/* reset tbuf walking pointers */
3155 	ldcp->next_tbufp = ldcp->tbufp;
3156 	ldcp->cur_tbufp = ldcp->tbufp;
3157 
3158 	/* initialize tx seqnum and index */
3159 	ldcp->next_txseq = VNET_ISS;
3160 	ldcp->next_txi = 0;
3161 
3162 	ldcp->resched_peer = B_TRUE;
3163 	ldcp->resched_peer_txi = 0;
3164 
3165 	return (DDI_SUCCESS);
3166 
3167 init_tbufs_failed:;
3168 	vgen_uninit_tbufs(ldcp);
3169 	return (DDI_FAILURE);
3170 }
3171 
3172 /* Uninitialize transmit buffer ring for the channel */
3173 static void
3174 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3175 {
3176 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3177 	int 			i;
3178 
3179 	/* for each tbuf (priv_desc), free ldc mem_handle */
3180 	for (i = 0; i < ldcp->num_txds; i++) {
3181 
3182 		tbufp = &(ldcp->tbufp[i]);
3183 
3184 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3185 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3186 			tbufp->datap = NULL;
3187 		}
3188 		if (tbufp->memhandle) {
3189 			(void) ldc_mem_free_handle(tbufp->memhandle);
3190 			tbufp->memhandle = 0;
3191 		}
3192 	}
3193 
3194 	if (ldcp->tx_datap) {
3195 		/* prealloc'd tx data buffer */
3196 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3197 		ldcp->tx_datap = NULL;
3198 		ldcp->tx_data_sz = 0;
3199 	}
3200 
3201 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3202 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3203 }
3204 
3205 /* clobber tx descriptor ring */
3206 static void
3207 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3208 {
3209 	vnet_public_desc_t	*txdp;
3210 	vgen_private_desc_t	*tbufp;
3211 	vio_dring_entry_hdr_t	*hdrp;
3212 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3213 	int i;
3214 #ifdef DEBUG
3215 	int ndone = 0;
3216 #endif
3217 
3218 	for (i = 0; i < ldcp->num_txds; i++) {
3219 
3220 		tbufp = &(ldcp->tbufp[i]);
3221 		txdp = tbufp->descp;
3222 		hdrp = &txdp->hdr;
3223 
3224 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3225 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3226 #ifdef DEBUG
3227 			if (hdrp->dstate == VIO_DESC_DONE)
3228 				ndone++;
3229 #endif
3230 			hdrp->dstate = VIO_DESC_FREE;
3231 			hdrp->ack = B_FALSE;
3232 		}
3233 	}
3234 	/* reset tbuf walking pointers */
3235 	ldcp->next_tbufp = ldcp->tbufp;
3236 	ldcp->cur_tbufp = ldcp->tbufp;
3237 
3238 	/* reset tx seqnum and index */
3239 	ldcp->next_txseq = VNET_ISS;
3240 	ldcp->next_txi = 0;
3241 
3242 	ldcp->resched_peer = B_TRUE;
3243 	ldcp->resched_peer_txi = 0;
3244 
3245 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3246 }
3247 
3248 /* clobber receive descriptor ring */
3249 static void
3250 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3251 {
3252 	ldcp->rx_dhandle = 0;
3253 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3254 	ldcp->rxdp = NULL;
3255 	ldcp->next_rxi = 0;
3256 	ldcp->num_rxds = 0;
3257 	ldcp->next_rxseq = VNET_ISS;
3258 }
3259 
3260 /* initialize receive descriptor ring */
3261 static int
3262 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3263 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3264 {
3265 	int rv;
3266 	ldc_mem_info_t minfo;
3267 
3268 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3269 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3270 	if (rv != 0) {
3271 		return (DDI_FAILURE);
3272 	}
3273 
3274 	/*
3275 	 * sucessfully mapped, now try to
3276 	 * get info about the mapped dring
3277 	 */
3278 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3279 	if (rv != 0) {
3280 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3281 		return (DDI_FAILURE);
3282 	}
3283 
3284 	/*
3285 	 * save ring address, number of descriptors.
3286 	 */
3287 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3288 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3289 	ldcp->num_rxdcookies = ncookies;
3290 	ldcp->num_rxds = num_desc;
3291 	ldcp->next_rxi = 0;
3292 	ldcp->next_rxseq = VNET_ISS;
3293 	ldcp->dring_mtype = minfo.mtype;
3294 
3295 	return (DDI_SUCCESS);
3296 }
3297 
3298 /* get channel statistics */
3299 static uint64_t
3300 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3301 {
3302 	vgen_stats_t *statsp;
3303 	uint64_t val;
3304 
3305 	val = 0;
3306 	statsp = &ldcp->stats;
3307 	switch (stat) {
3308 
3309 	case MAC_STAT_MULTIRCV:
3310 		val = statsp->multircv;
3311 		break;
3312 
3313 	case MAC_STAT_BRDCSTRCV:
3314 		val = statsp->brdcstrcv;
3315 		break;
3316 
3317 	case MAC_STAT_MULTIXMT:
3318 		val = statsp->multixmt;
3319 		break;
3320 
3321 	case MAC_STAT_BRDCSTXMT:
3322 		val = statsp->brdcstxmt;
3323 		break;
3324 
3325 	case MAC_STAT_NORCVBUF:
3326 		val = statsp->norcvbuf;
3327 		break;
3328 
3329 	case MAC_STAT_IERRORS:
3330 		val = statsp->ierrors;
3331 		break;
3332 
3333 	case MAC_STAT_NOXMTBUF:
3334 		val = statsp->noxmtbuf;
3335 		break;
3336 
3337 	case MAC_STAT_OERRORS:
3338 		val = statsp->oerrors;
3339 		break;
3340 
3341 	case MAC_STAT_COLLISIONS:
3342 		break;
3343 
3344 	case MAC_STAT_RBYTES:
3345 		val = statsp->rbytes;
3346 		break;
3347 
3348 	case MAC_STAT_IPACKETS:
3349 		val = statsp->ipackets;
3350 		break;
3351 
3352 	case MAC_STAT_OBYTES:
3353 		val = statsp->obytes;
3354 		break;
3355 
3356 	case MAC_STAT_OPACKETS:
3357 		val = statsp->opackets;
3358 		break;
3359 
3360 	/* stats not relevant to ldc, return 0 */
3361 	case MAC_STAT_IFSPEED:
3362 	case ETHER_STAT_ALIGN_ERRORS:
3363 	case ETHER_STAT_FCS_ERRORS:
3364 	case ETHER_STAT_FIRST_COLLISIONS:
3365 	case ETHER_STAT_MULTI_COLLISIONS:
3366 	case ETHER_STAT_DEFER_XMTS:
3367 	case ETHER_STAT_TX_LATE_COLLISIONS:
3368 	case ETHER_STAT_EX_COLLISIONS:
3369 	case ETHER_STAT_MACXMT_ERRORS:
3370 	case ETHER_STAT_CARRIER_ERRORS:
3371 	case ETHER_STAT_TOOLONG_ERRORS:
3372 	case ETHER_STAT_XCVR_ADDR:
3373 	case ETHER_STAT_XCVR_ID:
3374 	case ETHER_STAT_XCVR_INUSE:
3375 	case ETHER_STAT_CAP_1000FDX:
3376 	case ETHER_STAT_CAP_1000HDX:
3377 	case ETHER_STAT_CAP_100FDX:
3378 	case ETHER_STAT_CAP_100HDX:
3379 	case ETHER_STAT_CAP_10FDX:
3380 	case ETHER_STAT_CAP_10HDX:
3381 	case ETHER_STAT_CAP_ASMPAUSE:
3382 	case ETHER_STAT_CAP_PAUSE:
3383 	case ETHER_STAT_CAP_AUTONEG:
3384 	case ETHER_STAT_ADV_CAP_1000FDX:
3385 	case ETHER_STAT_ADV_CAP_1000HDX:
3386 	case ETHER_STAT_ADV_CAP_100FDX:
3387 	case ETHER_STAT_ADV_CAP_100HDX:
3388 	case ETHER_STAT_ADV_CAP_10FDX:
3389 	case ETHER_STAT_ADV_CAP_10HDX:
3390 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3391 	case ETHER_STAT_ADV_CAP_PAUSE:
3392 	case ETHER_STAT_ADV_CAP_AUTONEG:
3393 	case ETHER_STAT_LP_CAP_1000FDX:
3394 	case ETHER_STAT_LP_CAP_1000HDX:
3395 	case ETHER_STAT_LP_CAP_100FDX:
3396 	case ETHER_STAT_LP_CAP_100HDX:
3397 	case ETHER_STAT_LP_CAP_10FDX:
3398 	case ETHER_STAT_LP_CAP_10HDX:
3399 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3400 	case ETHER_STAT_LP_CAP_PAUSE:
3401 	case ETHER_STAT_LP_CAP_AUTONEG:
3402 	case ETHER_STAT_LINK_ASMPAUSE:
3403 	case ETHER_STAT_LINK_PAUSE:
3404 	case ETHER_STAT_LINK_AUTONEG:
3405 	case ETHER_STAT_LINK_DUPLEX:
3406 	default:
3407 		val = 0;
3408 		break;
3409 
3410 	}
3411 	return (val);
3412 }
3413 
3414 /*
3415  * LDC channel is UP, start handshake process with peer.
3416  */
3417 static void
3418 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3419 {
3420 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3421 
3422 	DBG1(vgenp, ldcp, "enter\n");
3423 
3424 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3425 
3426 	if (ldcp->portp != vgenp->vsw_portp) {
3427 		/*
3428 		 * As the channel is up, use this port from now on.
3429 		 */
3430 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3431 	}
3432 
3433 	/* Initialize local session id */
3434 	ldcp->local_sid = ddi_get_lbolt();
3435 
3436 	/* clear peer session id */
3437 	ldcp->peer_sid = 0;
3438 	ldcp->hretries = 0;
3439 
3440 	if (ldcp->hphase != VH_PHASE0) {
3441 		vgen_handshake_reset(ldcp);
3442 	}
3443 
3444 	/* Initiate Handshake process with peer ldc endpoint */
3445 	vgen_handshake(vh_nextphase(ldcp));
3446 
3447 	DBG1(vgenp, ldcp, "exit\n");
3448 }
3449 
3450 /*
3451  * LDC channel is Reset, terminate connection with peer and try to
3452  * bring the channel up again.
3453  */
3454 static void
3455 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3456 {
3457 	ldc_status_t istatus;
3458 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3459 	int	rv;
3460 
3461 	DBG1(vgenp, ldcp, "enter\n");
3462 
3463 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3464 
3465 	if ((ldcp->portp != vgenp->vsw_portp) &&
3466 	    (vgenp->vsw_portp != NULL)) {
3467 		/*
3468 		 * As the channel is down, use the switch port until
3469 		 * the channel becomes ready to be used.
3470 		 */
3471 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3472 	}
3473 
3474 	if (vgenp->vsw_portp == ldcp->portp) {
3475 		vio_net_report_err_t rep_err =
3476 		    ldcp->portp->vcb.vio_net_report_err;
3477 
3478 		/* Post a reset message */
3479 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3480 	}
3481 
3482 	if (ldcp->hphase != VH_PHASE0) {
3483 		vgen_handshake_reset(ldcp);
3484 	}
3485 
3486 	/* try to bring the channel up */
3487 	rv = ldc_up(ldcp->ldc_handle);
3488 	if (rv != 0) {
3489 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3490 	}
3491 
3492 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3493 		DWARN(vgenp, ldcp, "ldc_status err\n");
3494 	} else {
3495 		ldcp->ldc_status = istatus;
3496 	}
3497 
3498 	/* if channel is already UP - restart handshake */
3499 	if (ldcp->ldc_status == LDC_UP) {
3500 		vgen_handle_evt_up(ldcp);
3501 	}
3502 
3503 	DBG1(vgenp, ldcp, "exit\n");
3504 }
3505 
3506 /* Interrupt handler for the channel */
3507 static uint_t
3508 vgen_ldc_cb(uint64_t event, caddr_t arg)
3509 {
3510 	_NOTE(ARGUNUSED(event))
3511 	vgen_ldc_t	*ldcp;
3512 	vgen_t		*vgenp;
3513 	ldc_status_t 	istatus;
3514 	vgen_stats_t	*statsp;
3515 
3516 	ldcp = (vgen_ldc_t *)arg;
3517 	vgenp = LDC_TO_VGEN(ldcp);
3518 	statsp = &ldcp->stats;
3519 
3520 	DBG1(vgenp, ldcp, "enter\n");
3521 
3522 	mutex_enter(&ldcp->cblock);
3523 	statsp->callbacks++;
3524 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3525 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3526 		    ldcp->ldc_status);
3527 		mutex_exit(&ldcp->cblock);
3528 		return (LDC_SUCCESS);
3529 	}
3530 
3531 	/*
3532 	 * NOTE: not using switch() as event could be triggered by
3533 	 * a state change and a read request. Also the ordering	of the
3534 	 * check for the event types is deliberate.
3535 	 */
3536 	if (event & LDC_EVT_UP) {
3537 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3538 			DWARN(vgenp, ldcp, "ldc_status err\n");
3539 			/* status couldn't be determined */
3540 			mutex_exit(&ldcp->cblock);
3541 			return (LDC_FAILURE);
3542 		}
3543 		ldcp->ldc_status = istatus;
3544 		if (ldcp->ldc_status != LDC_UP) {
3545 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3546 			    " but ldc status is not UP(0x%x)\n",
3547 			    ldcp->ldc_status);
3548 			/* spurious interrupt, return success */
3549 			mutex_exit(&ldcp->cblock);
3550 			return (LDC_SUCCESS);
3551 		}
3552 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3553 		    event, ldcp->ldc_status);
3554 
3555 		vgen_handle_evt_up(ldcp);
3556 
3557 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3558 	}
3559 
3560 	/* Handle RESET/DOWN before READ event */
3561 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3562 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3563 			DWARN(vgenp, ldcp, "ldc_status error\n");
3564 			/* status couldn't be determined */
3565 			mutex_exit(&ldcp->cblock);
3566 			return (LDC_FAILURE);
3567 		}
3568 		ldcp->ldc_status = istatus;
3569 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3570 		    event, ldcp->ldc_status);
3571 
3572 		vgen_handle_evt_reset(ldcp);
3573 
3574 		/*
3575 		 * As the channel is down/reset, ignore READ event
3576 		 * but print a debug warning message.
3577 		 */
3578 		if (event & LDC_EVT_READ) {
3579 			DWARN(vgenp, ldcp,
3580 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3581 			event &= ~LDC_EVT_READ;
3582 		}
3583 	}
3584 
3585 	if (event & LDC_EVT_READ) {
3586 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3587 		    event, ldcp->ldc_status);
3588 
3589 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3590 
3591 		if (ldcp->rcv_thread != NULL) {
3592 			/*
3593 			 * If the receive thread is enabled, then
3594 			 * wakeup the receive thread to process the
3595 			 * LDC messages.
3596 			 */
3597 			mutex_exit(&ldcp->cblock);
3598 			mutex_enter(&ldcp->rcv_thr_lock);
3599 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3600 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3601 				cv_signal(&ldcp->rcv_thr_cv);
3602 			}
3603 			mutex_exit(&ldcp->rcv_thr_lock);
3604 			mutex_enter(&ldcp->cblock);
3605 		} else  {
3606 			vgen_handle_evt_read(ldcp);
3607 		}
3608 	}
3609 	mutex_exit(&ldcp->cblock);
3610 
3611 	if (ldcp->cancel_htid) {
3612 		/*
3613 		 * Cancel handshake timer.
3614 		 * untimeout(9F) will not return until the pending callback is
3615 		 * cancelled or has run. No problems will result from calling
3616 		 * untimeout if the handler has already completed.
3617 		 * If the timeout handler did run, then it would just
3618 		 * return as cancel_htid is set.
3619 		 */
3620 		(void) untimeout(ldcp->cancel_htid);
3621 		ldcp->cancel_htid = 0;
3622 	}
3623 	DBG1(vgenp, ldcp, "exit\n");
3624 
3625 	return (LDC_SUCCESS);
3626 }
3627 
3628 static void
3629 vgen_handle_evt_read(vgen_ldc_t *ldcp)
3630 {
3631 	int		rv;
3632 	uint64_t	*ldcmsg;
3633 	size_t		msglen;
3634 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3635 	vio_msg_tag_t	*tagp;
3636 	ldc_status_t 	istatus;
3637 	boolean_t 	has_data;
3638 
3639 	DBG1(vgenp, ldcp, "enter\n");
3640 
3641 	ldcmsg = ldcp->ldcmsg;
3642 	/*
3643 	 * If the receive thread is enabled, then the cblock
3644 	 * need to be acquired here. If not, the vgen_ldc_cb()
3645 	 * calls this function with cblock held already.
3646 	 */
3647 	if (ldcp->rcv_thread != NULL) {
3648 		mutex_enter(&ldcp->cblock);
3649 	} else {
3650 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3651 	}
3652 
3653 vgen_evt_read:
3654 	do {
3655 		msglen = ldcp->msglen;
3656 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3657 
3658 		if (rv != 0) {
3659 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
3660 			    rv, msglen);
3661 			if (rv == ECONNRESET)
3662 				goto vgen_evtread_error;
3663 			break;
3664 		}
3665 		if (msglen == 0) {
3666 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3667 			break;
3668 		}
3669 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3670 
3671 		tagp = (vio_msg_tag_t *)ldcmsg;
3672 
3673 		if (ldcp->peer_sid) {
3674 			/*
3675 			 * check sid only after we have received peer's sid
3676 			 * in the version negotiate msg.
3677 			 */
3678 #ifdef DEBUG
3679 			if (vgen_hdbg & HDBG_BAD_SID) {
3680 				/* simulate bad sid condition */
3681 				tagp->vio_sid = 0;
3682 				vgen_hdbg &= ~(HDBG_BAD_SID);
3683 			}
3684 #endif
3685 			rv = vgen_check_sid(ldcp, tagp);
3686 			if (rv != VGEN_SUCCESS) {
3687 				/*
3688 				 * If sid mismatch is detected,
3689 				 * reset the channel.
3690 				 */
3691 				ldcp->need_ldc_reset = B_TRUE;
3692 				goto vgen_evtread_error;
3693 			}
3694 		}
3695 
3696 		switch (tagp->vio_msgtype) {
3697 		case VIO_TYPE_CTRL:
3698 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3699 			break;
3700 
3701 		case VIO_TYPE_DATA:
3702 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3703 			break;
3704 
3705 		case VIO_TYPE_ERR:
3706 			vgen_handle_errmsg(ldcp, tagp);
3707 			break;
3708 
3709 		default:
3710 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3711 			    tagp->vio_msgtype);
3712 			break;
3713 		}
3714 
3715 		/*
3716 		 * If an error is encountered, stop processing and
3717 		 * handle the error.
3718 		 */
3719 		if (rv != 0) {
3720 			goto vgen_evtread_error;
3721 		}
3722 
3723 	} while (msglen);
3724 
3725 	/* check once more before exiting */
3726 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3727 	if ((rv == 0) && (has_data == B_TRUE)) {
3728 		DTRACE_PROBE(vgen_chkq);
3729 		goto vgen_evt_read;
3730 	}
3731 
3732 vgen_evtread_error:
3733 	if (rv == ECONNRESET) {
3734 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3735 			DWARN(vgenp, ldcp, "ldc_status err\n");
3736 		} else {
3737 			ldcp->ldc_status = istatus;
3738 		}
3739 		vgen_handle_evt_reset(ldcp);
3740 	} else if (rv) {
3741 		vgen_handshake_retry(ldcp);
3742 	}
3743 
3744 	/*
3745 	 * If the receive thread is not enabled, then cancel the
3746 	 * handshake timeout here.
3747 	 */
3748 	if (ldcp->rcv_thread != NULL) {
3749 		mutex_exit(&ldcp->cblock);
3750 		if (ldcp->cancel_htid) {
3751 			/*
3752 			 * Cancel handshake timer. untimeout(9F) will
3753 			 * not return until the pending callback is cancelled
3754 			 * or has run. No problems will result from calling
3755 			 * untimeout if the handler has already completed.
3756 			 * If the timeout handler did run, then it would just
3757 			 * return as cancel_htid is set.
3758 			 */
3759 			(void) untimeout(ldcp->cancel_htid);
3760 			ldcp->cancel_htid = 0;
3761 		}
3762 	}
3763 
3764 	DBG1(vgenp, ldcp, "exit\n");
3765 }
3766 
3767 /* vgen handshake functions */
3768 
3769 /* change the hphase for the channel to the next phase */
3770 static vgen_ldc_t *
3771 vh_nextphase(vgen_ldc_t *ldcp)
3772 {
3773 	if (ldcp->hphase == VH_PHASE3) {
3774 		ldcp->hphase = VH_DONE;
3775 	} else {
3776 		ldcp->hphase++;
3777 	}
3778 	return (ldcp);
3779 }
3780 
3781 /*
3782  * wrapper routine to send the given message over ldc using ldc_write().
3783  */
3784 static int
3785 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
3786     boolean_t caller_holds_lock)
3787 {
3788 	int			rv;
3789 	size_t			len;
3790 	uint32_t		retries = 0;
3791 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3792 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
3793 	vio_dring_msg_t		*dmsg;
3794 	vio_raw_data_msg_t	*rmsg;
3795 	boolean_t		data_msg = B_FALSE;
3796 
3797 	len = msglen;
3798 	if ((len == 0) || (msg == NULL))
3799 		return (VGEN_FAILURE);
3800 
3801 	if (!caller_holds_lock) {
3802 		mutex_enter(&ldcp->wrlock);
3803 	}
3804 
3805 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
3806 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
3807 			dmsg = (vio_dring_msg_t *)tagp;
3808 			dmsg->seq_num = ldcp->next_txseq;
3809 			data_msg = B_TRUE;
3810 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
3811 			rmsg = (vio_raw_data_msg_t *)tagp;
3812 			rmsg->seq_num = ldcp->next_txseq;
3813 			data_msg = B_TRUE;
3814 		}
3815 	}
3816 
3817 	do {
3818 		len = msglen;
3819 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
3820 		if (retries++ >= vgen_ldcwr_retries)
3821 			break;
3822 	} while (rv == EWOULDBLOCK);
3823 
3824 	if (rv == 0 && data_msg == B_TRUE) {
3825 		ldcp->next_txseq++;
3826 	}
3827 
3828 	if (!caller_holds_lock) {
3829 		mutex_exit(&ldcp->wrlock);
3830 	}
3831 
3832 	if (rv != 0) {
3833 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
3834 		    rv, msglen);
3835 		return (rv);
3836 	}
3837 
3838 	if (len != msglen) {
3839 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
3840 		    rv, msglen);
3841 		return (VGEN_FAILURE);
3842 	}
3843 
3844 	return (VGEN_SUCCESS);
3845 }
3846 
3847 /* send version negotiate message to the peer over ldc */
3848 static int
3849 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3850 {
3851 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3852 	vio_ver_msg_t	vermsg;
3853 	vio_msg_tag_t	*tagp = &vermsg.tag;
3854 	int		rv;
3855 
3856 	bzero(&vermsg, sizeof (vermsg));
3857 
3858 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3859 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3860 	tagp->vio_subtype_env = VIO_VER_INFO;
3861 	tagp->vio_sid = ldcp->local_sid;
3862 
3863 	/* get version msg payload from ldcp->local */
3864 	vermsg.ver_major = ldcp->local_hparams.ver_major;
3865 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3866 	vermsg.dev_class = ldcp->local_hparams.dev_class;
3867 
3868 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3869 	if (rv != VGEN_SUCCESS) {
3870 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3871 		return (rv);
3872 	}
3873 
3874 	ldcp->hstate |= VER_INFO_SENT;
3875 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3876 	    vermsg.ver_major, vermsg.ver_minor);
3877 
3878 	return (VGEN_SUCCESS);
3879 }
3880 
3881 /* send attr info message to the peer over ldc */
3882 static int
3883 vgen_send_attr_info(vgen_ldc_t *ldcp)
3884 {
3885 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3886 	vnet_attr_msg_t	attrmsg;
3887 	vio_msg_tag_t	*tagp = &attrmsg.tag;
3888 	int		rv;
3889 
3890 	bzero(&attrmsg, sizeof (attrmsg));
3891 
3892 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3893 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3894 	tagp->vio_subtype_env = VIO_ATTR_INFO;
3895 	tagp->vio_sid = ldcp->local_sid;
3896 
3897 	/* get attr msg payload from ldcp->local */
3898 	attrmsg.mtu = ldcp->local_hparams.mtu;
3899 	attrmsg.addr = ldcp->local_hparams.addr;
3900 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
3901 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3902 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3903 
3904 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3905 	if (rv != VGEN_SUCCESS) {
3906 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3907 		return (rv);
3908 	}
3909 
3910 	ldcp->hstate |= ATTR_INFO_SENT;
3911 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3912 
3913 	return (VGEN_SUCCESS);
3914 }
3915 
3916 /* send descriptor ring register message to the peer over ldc */
3917 static int
3918 vgen_send_dring_reg(vgen_ldc_t *ldcp)
3919 {
3920 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3921 	vio_dring_reg_msg_t	msg;
3922 	vio_msg_tag_t		*tagp = &msg.tag;
3923 	int		rv;
3924 
3925 	bzero(&msg, sizeof (msg));
3926 
3927 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3928 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3929 	tagp->vio_subtype_env = VIO_DRING_REG;
3930 	tagp->vio_sid = ldcp->local_sid;
3931 
3932 	/* get dring info msg payload from ldcp->local */
3933 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
3934 	    sizeof (ldc_mem_cookie_t));
3935 	msg.ncookies = ldcp->local_hparams.num_dcookies;
3936 	msg.num_descriptors = ldcp->local_hparams.num_desc;
3937 	msg.descriptor_size = ldcp->local_hparams.desc_size;
3938 
3939 	/*
3940 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3941 	 * value and sends it in the ack, which is saved in
3942 	 * vgen_handle_dring_reg().
3943 	 */
3944 	msg.dring_ident = 0;
3945 
3946 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
3947 	if (rv != VGEN_SUCCESS) {
3948 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3949 		return (rv);
3950 	}
3951 
3952 	ldcp->hstate |= DRING_INFO_SENT;
3953 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3954 
3955 	return (VGEN_SUCCESS);
3956 }
3957 
3958 static int
3959 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3960 {
3961 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3962 	vio_rdx_msg_t	rdxmsg;
3963 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
3964 	int		rv;
3965 
3966 	bzero(&rdxmsg, sizeof (rdxmsg));
3967 
3968 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3969 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3970 	tagp->vio_subtype_env = VIO_RDX;
3971 	tagp->vio_sid = ldcp->local_sid;
3972 
3973 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3974 	if (rv != VGEN_SUCCESS) {
3975 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3976 		return (rv);
3977 	}
3978 
3979 	ldcp->hstate |= RDX_INFO_SENT;
3980 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3981 
3982 	return (VGEN_SUCCESS);
3983 }
3984 
3985 /* send descriptor ring data message to the peer over ldc */
3986 static int
3987 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
3988 {
3989 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3990 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
3991 	vio_msg_tag_t	*tagp = &msgp->tag;
3992 	vgen_stats_t	*statsp = &ldcp->stats;
3993 	int		rv;
3994 
3995 	bzero(msgp, sizeof (*msgp));
3996 
3997 	tagp->vio_msgtype = VIO_TYPE_DATA;
3998 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3999 	tagp->vio_subtype_env = VIO_DRING_DATA;
4000 	tagp->vio_sid = ldcp->local_sid;
4001 
4002 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4003 	msgp->start_idx = start;
4004 	msgp->end_idx = end;
4005 
4006 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4007 	if (rv != VGEN_SUCCESS) {
4008 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4009 		return (rv);
4010 	}
4011 
4012 	statsp->dring_data_msgs++;
4013 
4014 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4015 
4016 	return (VGEN_SUCCESS);
4017 }
4018 
4019 /* send multicast addr info message to vsw */
4020 static int
4021 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4022 {
4023 	vnet_mcast_msg_t	mcastmsg;
4024 	vnet_mcast_msg_t	*msgp;
4025 	vio_msg_tag_t		*tagp;
4026 	vgen_t			*vgenp;
4027 	struct ether_addr	*mca;
4028 	int			rv;
4029 	int			i;
4030 	uint32_t		size;
4031 	uint32_t		mccount;
4032 	uint32_t		n;
4033 
4034 	msgp = &mcastmsg;
4035 	tagp = &msgp->tag;
4036 	vgenp = LDC_TO_VGEN(ldcp);
4037 
4038 	mccount = vgenp->mccount;
4039 	i = 0;
4040 
4041 	do {
4042 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4043 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4044 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4045 		tagp->vio_sid = ldcp->local_sid;
4046 
4047 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4048 		size = n * sizeof (struct ether_addr);
4049 
4050 		mca = &(vgenp->mctab[i]);
4051 		bcopy(mca, (msgp->mca), size);
4052 		msgp->set = B_TRUE;
4053 		msgp->count = n;
4054 
4055 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4056 		    B_FALSE);
4057 		if (rv != VGEN_SUCCESS) {
4058 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4059 			return (rv);
4060 		}
4061 
4062 		mccount -= n;
4063 		i += n;
4064 
4065 	} while (mccount);
4066 
4067 	return (VGEN_SUCCESS);
4068 }
4069 
4070 /* Initiate Phase 2 of handshake */
4071 static int
4072 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4073 {
4074 	int rv;
4075 	uint32_t ncookies = 0;
4076 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4077 
4078 #ifdef DEBUG
4079 	if (vgen_hdbg & HDBG_OUT_STATE) {
4080 		/* simulate out of state condition */
4081 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4082 		rv = vgen_send_rdx_info(ldcp);
4083 		return (rv);
4084 	}
4085 	if (vgen_hdbg & HDBG_TIMEOUT) {
4086 		/* simulate timeout condition */
4087 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4088 		return (VGEN_SUCCESS);
4089 	}
4090 #endif
4091 	rv = vgen_send_attr_info(ldcp);
4092 	if (rv != VGEN_SUCCESS) {
4093 		return (rv);
4094 	}
4095 
4096 	/* Bind descriptor ring to the channel */
4097 	if (ldcp->num_txdcookies == 0) {
4098 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4099 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4100 		    &ldcp->tx_dcookie, &ncookies);
4101 		if (rv != 0) {
4102 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4103 			    "rv(%x)\n", rv);
4104 			return (rv);
4105 		}
4106 		ASSERT(ncookies == 1);
4107 		ldcp->num_txdcookies = ncookies;
4108 	}
4109 
4110 	/* update local dring_info params */
4111 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4112 	    sizeof (ldc_mem_cookie_t));
4113 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4114 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4115 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4116 
4117 	rv = vgen_send_dring_reg(ldcp);
4118 	if (rv != VGEN_SUCCESS) {
4119 		return (rv);
4120 	}
4121 
4122 	return (VGEN_SUCCESS);
4123 }
4124 
4125 /*
4126  * Set vnet-protocol-version dependent functions based on version.
4127  */
4128 static void
4129 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4130 {
4131 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4132 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4133 
4134 	if (VGEN_VER_GTEQ(ldcp, 1, 3)) {
4135 
4136 		/*
4137 		 * If the version negotiated with peer is >= 1.3,
4138 		 * set the mtu in our attributes to max_frame_size.
4139 		 */
4140 		lp->mtu = vgenp->max_frame_size;
4141 	} else {
4142 		vgen_port_t	*portp = ldcp->portp;
4143 		vnet_t		*vnetp = vgenp->vnetp;
4144 		/*
4145 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4146 		 * We can negotiate that size with those peers provided the
4147 		 * following conditions are true:
4148 		 * - Our max_frame_size is greater only by VLAN_TAGSZ (4).
4149 		 * - Only pvid is defined for our peer and there are no vids.
4150 		 * - pvids are equal.
4151 		 * If the above conditions are true, then we can send/recv only
4152 		 * untagged frames of max size ETHERMAX.
4153 		 */
4154 		if ((vgenp->max_frame_size == ETHERMAX + VLAN_TAGSZ) &&
4155 		    portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4156 			lp->mtu = ETHERMAX;
4157 		}
4158 	}
4159 
4160 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4161 		/* Versions >= 1.2 */
4162 
4163 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4164 			/*
4165 			 * enable priority routines and pkt mode only if
4166 			 * at least one pri-eth-type is specified in MD.
4167 			 */
4168 
4169 			ldcp->tx = vgen_ldcsend;
4170 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4171 
4172 			/* set xfer mode for vgen_send_attr_info() */
4173 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4174 
4175 		} else {
4176 			/* no priority eth types defined in MD */
4177 
4178 			ldcp->tx = vgen_ldcsend_dring;
4179 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4180 
4181 			/* set xfer mode for vgen_send_attr_info() */
4182 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4183 
4184 		}
4185 	} else {
4186 		/* Versions prior to 1.2  */
4187 
4188 		vgen_reset_vnet_proto_ops(ldcp);
4189 	}
4190 }
4191 
4192 /*
4193  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4194  */
4195 static void
4196 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4197 {
4198 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4199 
4200 	ldcp->tx = vgen_ldcsend_dring;
4201 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4202 
4203 	/* set xfer mode for vgen_send_attr_info() */
4204 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4205 }
4206 
4207 static void
4208 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4209 {
4210 	vgen_ldclist_t	*ldclp;
4211 	vgen_ldc_t	*ldcp;
4212 	vgen_t		*vgenp = portp->vgenp;
4213 	vnet_t		*vnetp = vgenp->vnetp;
4214 
4215 	ldclp = &portp->ldclist;
4216 
4217 	READ_ENTER(&ldclp->rwlock);
4218 
4219 	/*
4220 	 * NOTE: for now, we will assume we have a single channel.
4221 	 */
4222 	if (ldclp->headp == NULL) {
4223 		RW_EXIT(&ldclp->rwlock);
4224 		return;
4225 	}
4226 	ldcp = ldclp->headp;
4227 
4228 	mutex_enter(&ldcp->cblock);
4229 
4230 	/*
4231 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4232 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4233 	 */
4234 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4235 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4236 		ldcp->need_ldc_reset = B_TRUE;
4237 		vgen_handshake_retry(ldcp);
4238 	}
4239 
4240 	mutex_exit(&ldcp->cblock);
4241 
4242 	RW_EXIT(&ldclp->rwlock);
4243 }
4244 
4245 static void
4246 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4247 {
4248 	vgen_port_t	*portp;
4249 	vgen_portlist_t	*plistp;
4250 
4251 	plistp = &(vgenp->vgenports);
4252 	READ_ENTER(&plistp->rwlock);
4253 
4254 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4255 
4256 		vgen_vlan_unaware_port_reset(portp);
4257 
4258 	}
4259 
4260 	RW_EXIT(&plistp->rwlock);
4261 }
4262 
4263 /*
4264  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4265  * This can happen after a channel comes up (status: LDC_UP) or
4266  * when handshake gets terminated due to various conditions.
4267  */
4268 static void
4269 vgen_reset_hphase(vgen_ldc_t *ldcp)
4270 {
4271 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4272 	ldc_status_t istatus;
4273 	int rv;
4274 
4275 	DBG1(vgenp, ldcp, "enter\n");
4276 	/* reset hstate and hphase */
4277 	ldcp->hstate = 0;
4278 	ldcp->hphase = VH_PHASE0;
4279 
4280 	vgen_reset_vnet_proto_ops(ldcp);
4281 
4282 	/*
4283 	 * Save the id of pending handshake timer in cancel_htid.
4284 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4285 	 * be cancelled after releasing cblock.
4286 	 */
4287 	if (ldcp->htid) {
4288 		ldcp->cancel_htid = ldcp->htid;
4289 		ldcp->htid = 0;
4290 	}
4291 
4292 	if (ldcp->local_hparams.dring_ready) {
4293 		ldcp->local_hparams.dring_ready = B_FALSE;
4294 	}
4295 
4296 	/* Unbind tx descriptor ring from the channel */
4297 	if (ldcp->num_txdcookies) {
4298 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4299 		if (rv != 0) {
4300 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4301 		}
4302 		ldcp->num_txdcookies = 0;
4303 	}
4304 
4305 	if (ldcp->peer_hparams.dring_ready) {
4306 		ldcp->peer_hparams.dring_ready = B_FALSE;
4307 		/* Unmap peer's dring */
4308 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4309 		vgen_clobber_rxds(ldcp);
4310 	}
4311 
4312 	vgen_clobber_tbufs(ldcp);
4313 
4314 	/*
4315 	 * clear local handshake params and initialize.
4316 	 */
4317 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4318 
4319 	/* set version to the highest version supported */
4320 	ldcp->local_hparams.ver_major =
4321 	    ldcp->vgen_versions[0].ver_major;
4322 	ldcp->local_hparams.ver_minor =
4323 	    ldcp->vgen_versions[0].ver_minor;
4324 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4325 
4326 	/* set attr_info params */
4327 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4328 	ldcp->local_hparams.addr =
4329 	    vnet_macaddr_strtoul(vgenp->macaddr);
4330 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4331 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4332 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4333 
4334 	/*
4335 	 * Note: dring is created, but not bound yet.
4336 	 * local dring_info params will be updated when we bind the dring in
4337 	 * vgen_handshake_phase2().
4338 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4339 	 * value and sends it in the ack, which is saved in
4340 	 * vgen_handle_dring_reg().
4341 	 */
4342 	ldcp->local_hparams.dring_ident = 0;
4343 
4344 	/* clear peer_hparams */
4345 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4346 
4347 	/* reset the channel if required */
4348 	if (ldcp->need_ldc_reset) {
4349 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4350 		ldcp->need_ldc_reset = B_FALSE;
4351 		(void) ldc_down(ldcp->ldc_handle);
4352 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4353 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4354 		ldcp->ldc_status = istatus;
4355 
4356 		/* clear sids */
4357 		ldcp->local_sid = 0;
4358 		ldcp->peer_sid = 0;
4359 
4360 		/* try to bring the channel up */
4361 		rv = ldc_up(ldcp->ldc_handle);
4362 		if (rv != 0) {
4363 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4364 		}
4365 
4366 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4367 			DWARN(vgenp, ldcp, "ldc_status err\n");
4368 		} else {
4369 			ldcp->ldc_status = istatus;
4370 		}
4371 	}
4372 }
4373 
4374 /* wrapper function for vgen_reset_hphase */
4375 static void
4376 vgen_handshake_reset(vgen_ldc_t *ldcp)
4377 {
4378 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4379 	mutex_enter(&ldcp->rxlock);
4380 	mutex_enter(&ldcp->wrlock);
4381 	mutex_enter(&ldcp->txlock);
4382 	mutex_enter(&ldcp->tclock);
4383 
4384 	vgen_reset_hphase(ldcp);
4385 
4386 	mutex_exit(&ldcp->tclock);
4387 	mutex_exit(&ldcp->txlock);
4388 	mutex_exit(&ldcp->wrlock);
4389 	mutex_exit(&ldcp->rxlock);
4390 }
4391 
4392 /*
4393  * Initiate handshake with the peer by sending various messages
4394  * based on the handshake-phase that the channel is currently in.
4395  */
4396 static void
4397 vgen_handshake(vgen_ldc_t *ldcp)
4398 {
4399 	uint32_t hphase = ldcp->hphase;
4400 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4401 	ldc_status_t	istatus;
4402 	int	rv = 0;
4403 
4404 	switch (hphase) {
4405 
4406 	case VH_PHASE1:
4407 
4408 		/*
4409 		 * start timer, for entire handshake process, turn this timer
4410 		 * off if all phases of handshake complete successfully and
4411 		 * hphase goes to VH_DONE(below) or
4412 		 * vgen_reset_hphase() gets called or
4413 		 * channel is reset due to errors or
4414 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4415 		 */
4416 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4417 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4418 
4419 		/* Phase 1 involves negotiating the version */
4420 		rv = vgen_send_version_negotiate(ldcp);
4421 		break;
4422 
4423 	case VH_PHASE2:
4424 		rv = vgen_handshake_phase2(ldcp);
4425 		break;
4426 
4427 	case VH_PHASE3:
4428 		rv = vgen_send_rdx_info(ldcp);
4429 		break;
4430 
4431 	case VH_DONE:
4432 		/*
4433 		 * Save the id of pending handshake timer in cancel_htid.
4434 		 * This will be checked in vgen_ldc_cb() and the handshake
4435 		 * timer will be cancelled after releasing cblock.
4436 		 */
4437 		if (ldcp->htid) {
4438 			ldcp->cancel_htid = ldcp->htid;
4439 			ldcp->htid = 0;
4440 		}
4441 		ldcp->hretries = 0;
4442 		DBG1(vgenp, ldcp, "Handshake Done\n");
4443 
4444 		if (ldcp->portp == vgenp->vsw_portp) {
4445 			/*
4446 			 * If this channel(port) is connected to vsw,
4447 			 * need to sync multicast table with vsw.
4448 			 */
4449 			mutex_exit(&ldcp->cblock);
4450 
4451 			mutex_enter(&vgenp->lock);
4452 			rv = vgen_send_mcast_info(ldcp);
4453 			mutex_exit(&vgenp->lock);
4454 
4455 			mutex_enter(&ldcp->cblock);
4456 			if (rv != VGEN_SUCCESS)
4457 				break;
4458 		}
4459 
4460 		/*
4461 		 * Check if mac layer should be notified to restart
4462 		 * transmissions. This can happen if the channel got
4463 		 * reset and vgen_clobber_tbufs() is called, while
4464 		 * need_resched is set.
4465 		 */
4466 		mutex_enter(&ldcp->tclock);
4467 		if (ldcp->need_resched) {
4468 			vio_net_tx_update_t vtx_update =
4469 			    ldcp->portp->vcb.vio_net_tx_update;
4470 
4471 			ldcp->need_resched = B_FALSE;
4472 			vtx_update(ldcp->portp->vhp);
4473 		}
4474 		mutex_exit(&ldcp->tclock);
4475 
4476 		break;
4477 
4478 	default:
4479 		break;
4480 	}
4481 
4482 	if (rv == ECONNRESET) {
4483 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4484 			DWARN(vgenp, ldcp, "ldc_status err\n");
4485 		} else {
4486 			ldcp->ldc_status = istatus;
4487 		}
4488 		vgen_handle_evt_reset(ldcp);
4489 	} else if (rv) {
4490 		vgen_handshake_reset(ldcp);
4491 	}
4492 }
4493 
4494 /*
4495  * Check if the current handshake phase has completed successfully and
4496  * return the status.
4497  */
4498 static int
4499 vgen_handshake_done(vgen_ldc_t *ldcp)
4500 {
4501 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4502 	uint32_t	hphase = ldcp->hphase;
4503 	int 		status = 0;
4504 
4505 	switch (hphase) {
4506 
4507 	case VH_PHASE1:
4508 		/*
4509 		 * Phase1 is done, if version negotiation
4510 		 * completed successfully.
4511 		 */
4512 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4513 		    VER_NEGOTIATED);
4514 		break;
4515 
4516 	case VH_PHASE2:
4517 		/*
4518 		 * Phase 2 is done, if attr info and dring info
4519 		 * have been exchanged successfully.
4520 		 */
4521 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4522 		    ATTR_INFO_EXCHANGED) &&
4523 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4524 		    DRING_INFO_EXCHANGED));
4525 		break;
4526 
4527 	case VH_PHASE3:
4528 		/* Phase 3 is done, if rdx msg has been exchanged */
4529 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4530 		    RDX_EXCHANGED);
4531 		break;
4532 
4533 	default:
4534 		break;
4535 	}
4536 
4537 	if (status == 0) {
4538 		return (VGEN_FAILURE);
4539 	}
4540 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4541 	return (VGEN_SUCCESS);
4542 }
4543 
4544 /* retry handshake on failure */
4545 static void
4546 vgen_handshake_retry(vgen_ldc_t *ldcp)
4547 {
4548 	/* reset handshake phase */
4549 	vgen_handshake_reset(ldcp);
4550 
4551 	/* handshake retry is specified and the channel is UP */
4552 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
4553 		if (ldcp->hretries++ < vgen_max_hretries) {
4554 			ldcp->local_sid = ddi_get_lbolt();
4555 			vgen_handshake(vh_nextphase(ldcp));
4556 		}
4557 	}
4558 }
4559 
4560 /*
4561  * Handle a version info msg from the peer or an ACK/NACK from the peer
4562  * to a version info msg that we sent.
4563  */
4564 static int
4565 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4566 {
4567 	vgen_t		*vgenp;
4568 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4569 	int		ack = 0;
4570 	int		failed = 0;
4571 	int		idx;
4572 	vgen_ver_t	*versions = ldcp->vgen_versions;
4573 	int		rv = 0;
4574 
4575 	vgenp = LDC_TO_VGEN(ldcp);
4576 	DBG1(vgenp, ldcp, "enter\n");
4577 	switch (tagp->vio_subtype) {
4578 	case VIO_SUBTYPE_INFO:
4579 
4580 		/*  Cache sid of peer if this is the first time */
4581 		if (ldcp->peer_sid == 0) {
4582 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4583 			    tagp->vio_sid);
4584 			ldcp->peer_sid = tagp->vio_sid;
4585 		}
4586 
4587 		if (ldcp->hphase != VH_PHASE1) {
4588 			/*
4589 			 * If we are not already in VH_PHASE1, reset to
4590 			 * pre-handshake state, and initiate handshake
4591 			 * to the peer too.
4592 			 */
4593 			vgen_handshake_reset(ldcp);
4594 			vgen_handshake(vh_nextphase(ldcp));
4595 		}
4596 		ldcp->hstate |= VER_INFO_RCVD;
4597 
4598 		/* save peer's requested values */
4599 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4600 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4601 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4602 
4603 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4604 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4605 			/* unsupported dev_class, send NACK */
4606 
4607 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4608 
4609 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4610 			tagp->vio_sid = ldcp->local_sid;
4611 			/* send reply msg back to peer */
4612 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4613 			    sizeof (*vermsg), B_FALSE);
4614 			if (rv != VGEN_SUCCESS) {
4615 				return (rv);
4616 			}
4617 			return (VGEN_FAILURE);
4618 		}
4619 
4620 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4621 		    vermsg->ver_major,  vermsg->ver_minor);
4622 
4623 		idx = 0;
4624 
4625 		for (;;) {
4626 
4627 			if (vermsg->ver_major > versions[idx].ver_major) {
4628 
4629 				/* nack with next lower version */
4630 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4631 				vermsg->ver_major = versions[idx].ver_major;
4632 				vermsg->ver_minor = versions[idx].ver_minor;
4633 				break;
4634 			}
4635 
4636 			if (vermsg->ver_major == versions[idx].ver_major) {
4637 
4638 				/* major version match - ACK version */
4639 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4640 				ack = 1;
4641 
4642 				/*
4643 				 * lower minor version to the one this endpt
4644 				 * supports, if necessary
4645 				 */
4646 				if (vermsg->ver_minor >
4647 				    versions[idx].ver_minor) {
4648 					vermsg->ver_minor =
4649 					    versions[idx].ver_minor;
4650 					ldcp->peer_hparams.ver_minor =
4651 					    versions[idx].ver_minor;
4652 				}
4653 				break;
4654 			}
4655 
4656 			idx++;
4657 
4658 			if (idx == VGEN_NUM_VER) {
4659 
4660 				/* no version match - send NACK */
4661 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4662 				vermsg->ver_major = 0;
4663 				vermsg->ver_minor = 0;
4664 				failed = 1;
4665 				break;
4666 			}
4667 
4668 		}
4669 
4670 		tagp->vio_sid = ldcp->local_sid;
4671 
4672 		/* send reply msg back to peer */
4673 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4674 		    B_FALSE);
4675 		if (rv != VGEN_SUCCESS) {
4676 			return (rv);
4677 		}
4678 
4679 		if (ack) {
4680 			ldcp->hstate |= VER_ACK_SENT;
4681 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4682 			    vermsg->ver_major, vermsg->ver_minor);
4683 		}
4684 		if (failed) {
4685 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4686 			return (VGEN_FAILURE);
4687 		}
4688 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4689 
4690 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4691 
4692 			/* local and peer versions match? */
4693 			ASSERT((ldcp->local_hparams.ver_major ==
4694 			    ldcp->peer_hparams.ver_major) &&
4695 			    (ldcp->local_hparams.ver_minor ==
4696 			    ldcp->peer_hparams.ver_minor));
4697 
4698 			vgen_set_vnet_proto_ops(ldcp);
4699 
4700 			/* move to the next phase */
4701 			vgen_handshake(vh_nextphase(ldcp));
4702 		}
4703 
4704 		break;
4705 
4706 	case VIO_SUBTYPE_ACK:
4707 
4708 		if (ldcp->hphase != VH_PHASE1) {
4709 			/*  This should not happen. */
4710 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4711 			return (VGEN_FAILURE);
4712 		}
4713 
4714 		/* SUCCESS - we have agreed on a version */
4715 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4716 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4717 		ldcp->hstate |= VER_ACK_RCVD;
4718 
4719 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4720 		    vermsg->ver_major,  vermsg->ver_minor);
4721 
4722 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4723 
4724 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4725 
4726 			/* local and peer versions match? */
4727 			ASSERT((ldcp->local_hparams.ver_major ==
4728 			    ldcp->peer_hparams.ver_major) &&
4729 			    (ldcp->local_hparams.ver_minor ==
4730 			    ldcp->peer_hparams.ver_minor));
4731 
4732 			vgen_set_vnet_proto_ops(ldcp);
4733 
4734 			/* move to the next phase */
4735 			vgen_handshake(vh_nextphase(ldcp));
4736 		}
4737 		break;
4738 
4739 	case VIO_SUBTYPE_NACK:
4740 
4741 		if (ldcp->hphase != VH_PHASE1) {
4742 			/*  This should not happen.  */
4743 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4744 			"Phase(%u)\n", ldcp->hphase);
4745 			return (VGEN_FAILURE);
4746 		}
4747 
4748 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4749 		    vermsg->ver_major, vermsg->ver_minor);
4750 
4751 		/* check if version in NACK is zero */
4752 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4753 			/*
4754 			 * Version Negotiation has failed.
4755 			 */
4756 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4757 			return (VGEN_FAILURE);
4758 		}
4759 
4760 		idx = 0;
4761 
4762 		for (;;) {
4763 
4764 			if (vermsg->ver_major > versions[idx].ver_major) {
4765 				/* select next lower version */
4766 
4767 				ldcp->local_hparams.ver_major =
4768 				    versions[idx].ver_major;
4769 				ldcp->local_hparams.ver_minor =
4770 				    versions[idx].ver_minor;
4771 				break;
4772 			}
4773 
4774 			if (vermsg->ver_major == versions[idx].ver_major) {
4775 				/* major version match */
4776 
4777 				ldcp->local_hparams.ver_major =
4778 				    versions[idx].ver_major;
4779 
4780 				ldcp->local_hparams.ver_minor =
4781 				    versions[idx].ver_minor;
4782 				break;
4783 			}
4784 
4785 			idx++;
4786 
4787 			if (idx == VGEN_NUM_VER) {
4788 				/*
4789 				 * no version match.
4790 				 * Version Negotiation has failed.
4791 				 */
4792 				DWARN(vgenp, ldcp,
4793 				    "Version Negotiation Failed\n");
4794 				return (VGEN_FAILURE);
4795 			}
4796 
4797 		}
4798 
4799 		rv = vgen_send_version_negotiate(ldcp);
4800 		if (rv != VGEN_SUCCESS) {
4801 			return (rv);
4802 		}
4803 
4804 		break;
4805 	}
4806 
4807 	DBG1(vgenp, ldcp, "exit\n");
4808 	return (VGEN_SUCCESS);
4809 }
4810 
4811 /* Check if the attributes are supported */
4812 static int
4813 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4814 {
4815 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4816 
4817 	if ((msg->mtu != lp->mtu) ||
4818 	    (msg->addr_type != ADDR_TYPE_MAC) ||
4819 	    (msg->ack_freq > 64) ||
4820 	    (msg->xfer_mode != lp->xfer_mode)) {
4821 		return (VGEN_FAILURE);
4822 	}
4823 
4824 	return (VGEN_SUCCESS);
4825 }
4826 
4827 /*
4828  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4829  * to an attr info msg that we sent.
4830  */
4831 static int
4832 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4833 {
4834 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4835 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
4836 	int		ack = 0;
4837 	int		rv = 0;
4838 
4839 	DBG1(vgenp, ldcp, "enter\n");
4840 	if (ldcp->hphase != VH_PHASE2) {
4841 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4842 		" Invalid Phase(%u)\n",
4843 		    tagp->vio_subtype, ldcp->hphase);
4844 		return (VGEN_FAILURE);
4845 	}
4846 	switch (tagp->vio_subtype) {
4847 	case VIO_SUBTYPE_INFO:
4848 
4849 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
4850 		ldcp->hstate |= ATTR_INFO_RCVD;
4851 
4852 		/* save peer's values */
4853 		ldcp->peer_hparams.mtu = attrmsg->mtu;
4854 		ldcp->peer_hparams.addr = attrmsg->addr;
4855 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
4856 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
4857 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
4858 
4859 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
4860 			/* unsupported attr, send NACK */
4861 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4862 		} else {
4863 			ack = 1;
4864 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4865 		}
4866 		tagp->vio_sid = ldcp->local_sid;
4867 
4868 		/* send reply msg back to peer */
4869 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
4870 		    B_FALSE);
4871 		if (rv != VGEN_SUCCESS) {
4872 			return (rv);
4873 		}
4874 
4875 		if (ack) {
4876 			ldcp->hstate |= ATTR_ACK_SENT;
4877 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4878 		} else {
4879 			/* failed */
4880 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
4881 			return (VGEN_FAILURE);
4882 		}
4883 
4884 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4885 			vgen_handshake(vh_nextphase(ldcp));
4886 		}
4887 
4888 		break;
4889 
4890 	case VIO_SUBTYPE_ACK:
4891 
4892 		ldcp->hstate |= ATTR_ACK_RCVD;
4893 
4894 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4895 
4896 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4897 			vgen_handshake(vh_nextphase(ldcp));
4898 		}
4899 		break;
4900 
4901 	case VIO_SUBTYPE_NACK:
4902 
4903 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4904 		return (VGEN_FAILURE);
4905 	}
4906 	DBG1(vgenp, ldcp, "exit\n");
4907 	return (VGEN_SUCCESS);
4908 }
4909 
4910 /* Check if the dring info msg is ok */
4911 static int
4912 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
4913 {
4914 	/* check if msg contents are ok */
4915 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
4916 	    sizeof (vnet_public_desc_t))) {
4917 		return (VGEN_FAILURE);
4918 	}
4919 	return (VGEN_SUCCESS);
4920 }
4921 
4922 /*
4923  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4924  * the peer to a dring register msg that we sent.
4925  */
4926 static int
4927 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4928 {
4929 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
4930 	ldc_mem_cookie_t dcookie;
4931 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4932 	int ack = 0;
4933 	int rv = 0;
4934 
4935 	DBG1(vgenp, ldcp, "enter\n");
4936 	if (ldcp->hphase < VH_PHASE2) {
4937 		/* dring_info can be rcvd in any of the phases after Phase1 */
4938 		DWARN(vgenp, ldcp,
4939 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4940 		    tagp->vio_subtype, ldcp->hphase);
4941 		return (VGEN_FAILURE);
4942 	}
4943 	switch (tagp->vio_subtype) {
4944 	case VIO_SUBTYPE_INFO:
4945 
4946 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
4947 		ldcp->hstate |= DRING_INFO_RCVD;
4948 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
4949 
4950 		ASSERT(msg->ncookies == 1);
4951 
4952 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
4953 			/*
4954 			 * verified dring info msg to be ok,
4955 			 * now try to map the remote dring.
4956 			 */
4957 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
4958 			    msg->descriptor_size, &dcookie,
4959 			    msg->ncookies);
4960 			if (rv == DDI_SUCCESS) {
4961 				/* now we can ack the peer */
4962 				ack = 1;
4963 			}
4964 		}
4965 		if (ack == 0) {
4966 			/* failed, send NACK */
4967 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4968 		} else {
4969 			if (!(ldcp->peer_hparams.dring_ready)) {
4970 
4971 				/* save peer's dring_info values */
4972 				bcopy(&dcookie,
4973 				    &(ldcp->peer_hparams.dring_cookie),
4974 				    sizeof (dcookie));
4975 				ldcp->peer_hparams.num_desc =
4976 				    msg->num_descriptors;
4977 				ldcp->peer_hparams.desc_size =
4978 				    msg->descriptor_size;
4979 				ldcp->peer_hparams.num_dcookies =
4980 				    msg->ncookies;
4981 
4982 				/* set dring_ident for the peer */
4983 				ldcp->peer_hparams.dring_ident =
4984 				    (uint64_t)ldcp->rxdp;
4985 				/* return the dring_ident in ack msg */
4986 				msg->dring_ident =
4987 				    (uint64_t)ldcp->rxdp;
4988 
4989 				ldcp->peer_hparams.dring_ready = B_TRUE;
4990 			}
4991 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4992 		}
4993 		tagp->vio_sid = ldcp->local_sid;
4994 		/* send reply msg back to peer */
4995 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4996 		    B_FALSE);
4997 		if (rv != VGEN_SUCCESS) {
4998 			return (rv);
4999 		}
5000 
5001 		if (ack) {
5002 			ldcp->hstate |= DRING_ACK_SENT;
5003 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5004 		} else {
5005 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5006 			return (VGEN_FAILURE);
5007 		}
5008 
5009 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5010 			vgen_handshake(vh_nextphase(ldcp));
5011 		}
5012 
5013 		break;
5014 
5015 	case VIO_SUBTYPE_ACK:
5016 
5017 		ldcp->hstate |= DRING_ACK_RCVD;
5018 
5019 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5020 
5021 		if (!(ldcp->local_hparams.dring_ready)) {
5022 			/* local dring is now ready */
5023 			ldcp->local_hparams.dring_ready = B_TRUE;
5024 
5025 			/* save dring_ident acked by peer */
5026 			ldcp->local_hparams.dring_ident =
5027 			    msg->dring_ident;
5028 		}
5029 
5030 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5031 			vgen_handshake(vh_nextphase(ldcp));
5032 		}
5033 
5034 		break;
5035 
5036 	case VIO_SUBTYPE_NACK:
5037 
5038 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5039 		return (VGEN_FAILURE);
5040 	}
5041 	DBG1(vgenp, ldcp, "exit\n");
5042 	return (VGEN_SUCCESS);
5043 }
5044 
5045 /*
5046  * Handle a rdx info msg from the peer or an ACK/NACK
5047  * from the peer to a rdx info msg that we sent.
5048  */
5049 static int
5050 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5051 {
5052 	int rv = 0;
5053 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5054 
5055 	DBG1(vgenp, ldcp, "enter\n");
5056 	if (ldcp->hphase != VH_PHASE3) {
5057 		DWARN(vgenp, ldcp,
5058 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5059 		    tagp->vio_subtype, ldcp->hphase);
5060 		return (VGEN_FAILURE);
5061 	}
5062 	switch (tagp->vio_subtype) {
5063 	case VIO_SUBTYPE_INFO:
5064 
5065 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5066 		ldcp->hstate |= RDX_INFO_RCVD;
5067 
5068 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5069 		tagp->vio_sid = ldcp->local_sid;
5070 		/* send reply msg back to peer */
5071 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5072 		    B_FALSE);
5073 		if (rv != VGEN_SUCCESS) {
5074 			return (rv);
5075 		}
5076 
5077 		ldcp->hstate |= RDX_ACK_SENT;
5078 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5079 
5080 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5081 			vgen_handshake(vh_nextphase(ldcp));
5082 		}
5083 
5084 		break;
5085 
5086 	case VIO_SUBTYPE_ACK:
5087 
5088 		ldcp->hstate |= RDX_ACK_RCVD;
5089 
5090 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5091 
5092 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5093 			vgen_handshake(vh_nextphase(ldcp));
5094 		}
5095 		break;
5096 
5097 	case VIO_SUBTYPE_NACK:
5098 
5099 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5100 		return (VGEN_FAILURE);
5101 	}
5102 	DBG1(vgenp, ldcp, "exit\n");
5103 	return (VGEN_SUCCESS);
5104 }
5105 
5106 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5107 static int
5108 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5109 {
5110 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5111 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5112 	struct ether_addr *addrp;
5113 	int count;
5114 	int i;
5115 
5116 	DBG1(vgenp, ldcp, "enter\n");
5117 	switch (tagp->vio_subtype) {
5118 
5119 	case VIO_SUBTYPE_INFO:
5120 
5121 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5122 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5123 		break;
5124 
5125 	case VIO_SUBTYPE_ACK:
5126 
5127 		/* success adding/removing multicast addr */
5128 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5129 		break;
5130 
5131 	case VIO_SUBTYPE_NACK:
5132 
5133 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5134 		if (!(msgp->set)) {
5135 			/* multicast remove request failed */
5136 			break;
5137 		}
5138 
5139 		/* multicast add request failed */
5140 		for (count = 0; count < msgp->count; count++) {
5141 			addrp = &(msgp->mca[count]);
5142 
5143 			/* delete address from the table */
5144 			for (i = 0; i < vgenp->mccount; i++) {
5145 				if (ether_cmp(addrp,
5146 				    &(vgenp->mctab[i])) == 0) {
5147 					if (vgenp->mccount > 1) {
5148 						int t = vgenp->mccount - 1;
5149 						vgenp->mctab[i] =
5150 						    vgenp->mctab[t];
5151 					}
5152 					vgenp->mccount--;
5153 					break;
5154 				}
5155 			}
5156 		}
5157 		break;
5158 
5159 	}
5160 	DBG1(vgenp, ldcp, "exit\n");
5161 
5162 	return (VGEN_SUCCESS);
5163 }
5164 
5165 /* handler for control messages received from the peer ldc end-point */
5166 static int
5167 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5168 {
5169 	int rv = 0;
5170 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5171 
5172 	DBG1(vgenp, ldcp, "enter\n");
5173 	switch (tagp->vio_subtype_env) {
5174 
5175 	case VIO_VER_INFO:
5176 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5177 		break;
5178 
5179 	case VIO_ATTR_INFO:
5180 		rv = vgen_handle_attr_info(ldcp, tagp);
5181 		break;
5182 
5183 	case VIO_DRING_REG:
5184 		rv = vgen_handle_dring_reg(ldcp, tagp);
5185 		break;
5186 
5187 	case VIO_RDX:
5188 		rv = vgen_handle_rdx_info(ldcp, tagp);
5189 		break;
5190 
5191 	case VNET_MCAST_INFO:
5192 		rv = vgen_handle_mcast_info(ldcp, tagp);
5193 		break;
5194 
5195 	case VIO_DDS_INFO:
5196 		rv = vgen_dds_rx(ldcp, tagp);
5197 		break;
5198 	}
5199 
5200 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5201 	return (rv);
5202 }
5203 
5204 /* handler for data messages received from the peer ldc end-point */
5205 static int
5206 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5207 {
5208 	int rv = 0;
5209 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5210 
5211 	DBG1(vgenp, ldcp, "enter\n");
5212 
5213 	if (ldcp->hphase != VH_DONE)
5214 		return (rv);
5215 
5216 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5217 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5218 		if (rv != 0) {
5219 			return (rv);
5220 		}
5221 	}
5222 
5223 	switch (tagp->vio_subtype_env) {
5224 	case VIO_DRING_DATA:
5225 		rv = vgen_handle_dring_data(ldcp, tagp);
5226 		break;
5227 
5228 	case VIO_PKT_DATA:
5229 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5230 		break;
5231 	default:
5232 		break;
5233 	}
5234 
5235 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5236 	return (rv);
5237 }
5238 
5239 /*
5240  * dummy pkt data handler function for vnet protocol version 1.0
5241  */
5242 static void
5243 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5244 {
5245 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5246 }
5247 
5248 /*
5249  * This function handles raw pkt data messages received over the channel.
5250  * Currently, only priority-eth-type frames are received through this mechanism.
5251  * In this case, the frame(data) is present within the message itself which
5252  * is copied into an mblk before sending it up the stack.
5253  */
5254 static void
5255 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5256 {
5257 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5258 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5259 	uint32_t		size;
5260 	mblk_t			*mp;
5261 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5262 	vgen_stats_t		*statsp = &ldcp->stats;
5263 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5264 	vio_net_rx_cb_t		vrx_cb;
5265 
5266 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5267 
5268 	mutex_exit(&ldcp->cblock);
5269 
5270 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5271 	if (size < ETHERMIN || size > lp->mtu) {
5272 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5273 		goto exit;
5274 	}
5275 
5276 	mp = vio_multipool_allocb(&ldcp->vmp, size);
5277 	if (mp == NULL) {
5278 		mp = allocb(size, BPRI_MED);
5279 		if (mp == NULL) {
5280 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5281 			DWARN(vgenp, ldcp, "allocb failure, "
5282 			    "unable to process priority frame\n");
5283 			goto exit;
5284 		}
5285 	}
5286 
5287 	/* copy the frame from the payload of raw data msg into the mblk */
5288 	bcopy(pkt->data, mp->b_rptr, size);
5289 	mp->b_wptr = mp->b_rptr + size;
5290 
5291 	/* update stats */
5292 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5293 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5294 
5295 	/* send up; call vrx_cb() as cblock is already released */
5296 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5297 	vrx_cb(ldcp->portp->vhp, mp);
5298 
5299 exit:
5300 	mutex_enter(&ldcp->cblock);
5301 }
5302 
5303 static int
5304 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
5305     int32_t end, uint8_t pstate)
5306 {
5307 	int rv = 0;
5308 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5309 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
5310 
5311 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
5312 	tagp->vio_sid = ldcp->local_sid;
5313 	msgp->start_idx = start;
5314 	msgp->end_idx = end;
5315 	msgp->dring_process_state = pstate;
5316 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
5317 	if (rv != VGEN_SUCCESS) {
5318 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
5319 	}
5320 	return (rv);
5321 }
5322 
5323 static int
5324 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5325 {
5326 	int rv = 0;
5327 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5328 
5329 
5330 	DBG1(vgenp, ldcp, "enter\n");
5331 	switch (tagp->vio_subtype) {
5332 
5333 	case VIO_SUBTYPE_INFO:
5334 		/*
5335 		 * To reduce the locking contention, release the
5336 		 * cblock here and re-acquire it once we are done
5337 		 * receiving packets.
5338 		 */
5339 		mutex_exit(&ldcp->cblock);
5340 		mutex_enter(&ldcp->rxlock);
5341 		rv = vgen_handle_dring_data_info(ldcp, tagp);
5342 		mutex_exit(&ldcp->rxlock);
5343 		mutex_enter(&ldcp->cblock);
5344 		break;
5345 
5346 	case VIO_SUBTYPE_ACK:
5347 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
5348 		break;
5349 
5350 	case VIO_SUBTYPE_NACK:
5351 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
5352 		break;
5353 	}
5354 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5355 	return (rv);
5356 }
5357 
5358 static int
5359 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5360 {
5361 	uint32_t start;
5362 	int32_t end;
5363 	int rv = 0;
5364 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5365 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5366 #ifdef VGEN_HANDLE_LOST_PKTS
5367 	vgen_stats_t *statsp = &ldcp->stats;
5368 	uint32_t rxi;
5369 	int n;
5370 #endif
5371 
5372 	DBG1(vgenp, ldcp, "enter\n");
5373 
5374 	start = dringmsg->start_idx;
5375 	end = dringmsg->end_idx;
5376 	/*
5377 	 * received a data msg, which contains the start and end
5378 	 * indices of the descriptors within the rx ring holding data,
5379 	 * the seq_num of data packet corresponding to the start index,
5380 	 * and the dring_ident.
5381 	 * We can now read the contents of each of these descriptors
5382 	 * and gather data from it.
5383 	 */
5384 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
5385 	    start, end);
5386 
5387 	/* validate rx start and end indeces */
5388 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
5389 	    !(CHECK_RXI(end, ldcp)))) {
5390 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
5391 		    start, end);
5392 		/* drop the message if invalid index */
5393 		return (rv);
5394 	}
5395 
5396 	/* validate dring_ident */
5397 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
5398 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5399 		    dringmsg->dring_ident);
5400 		/* invalid dring_ident, drop the msg */
5401 		return (rv);
5402 	}
5403 #ifdef DEBUG
5404 	if (vgen_trigger_rxlost) {
5405 		/* drop this msg to simulate lost pkts for debugging */
5406 		vgen_trigger_rxlost = 0;
5407 		return (rv);
5408 	}
5409 #endif
5410 
5411 #ifdef	VGEN_HANDLE_LOST_PKTS
5412 
5413 	/* receive start index doesn't match expected index */
5414 	if (ldcp->next_rxi != start) {
5415 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
5416 		    ldcp->next_rxi, start);
5417 
5418 		/* calculate the number of pkts lost */
5419 		if (start >= ldcp->next_rxi) {
5420 			n = start - ldcp->next_rxi;
5421 		} else  {
5422 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
5423 		}
5424 
5425 		statsp->rx_lost_pkts += n;
5426 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
5427 		tagp->vio_sid = ldcp->local_sid;
5428 		/* indicate the range of lost descriptors */
5429 		dringmsg->start_idx = ldcp->next_rxi;
5430 		rxi = start;
5431 		DECR_RXI(rxi, ldcp);
5432 		dringmsg->end_idx = rxi;
5433 		/* dring ident is left unchanged */
5434 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5435 		    sizeof (*dringmsg), B_FALSE);
5436 		if (rv != VGEN_SUCCESS) {
5437 			DWARN(vgenp, ldcp,
5438 			    "vgen_sendmsg failed, stype:NACK\n");
5439 			return (rv);
5440 		}
5441 		/*
5442 		 * treat this range of descrs/pkts as dropped
5443 		 * and set the new expected value of next_rxi
5444 		 * and continue(below) to process from the new
5445 		 * start index.
5446 		 */
5447 		ldcp->next_rxi = start;
5448 	}
5449 
5450 #endif	/* VGEN_HANDLE_LOST_PKTS */
5451 
5452 	/* Now receive messages */
5453 	rv = vgen_process_dring_data(ldcp, tagp);
5454 
5455 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5456 	return (rv);
5457 }
5458 
5459 static int
5460 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5461 {
5462 	boolean_t set_ack_start = B_FALSE;
5463 	uint32_t start;
5464 	uint32_t ack_end;
5465 	uint32_t next_rxi;
5466 	uint32_t rxi;
5467 	int count = 0;
5468 	int rv = 0;
5469 	uint32_t retries = 0;
5470 	vgen_stats_t *statsp;
5471 	vnet_public_desc_t rxd;
5472 	vio_dring_entry_hdr_t *hdrp;
5473 	mblk_t *bp = NULL;
5474 	mblk_t *bpt = NULL;
5475 	uint32_t ack_start;
5476 	boolean_t rxd_err = B_FALSE;
5477 	mblk_t *mp = NULL;
5478 	size_t nbytes;
5479 	boolean_t ack_needed = B_FALSE;
5480 	size_t nread;
5481 	uint64_t off = 0;
5482 	struct ether_header *ehp;
5483 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5484 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5485 
5486 	DBG1(vgenp, ldcp, "enter\n");
5487 
5488 	statsp = &ldcp->stats;
5489 	start = dringmsg->start_idx;
5490 
5491 	/*
5492 	 * start processing the descriptors from the specified
5493 	 * start index, up to the index a descriptor is not ready
5494 	 * to be processed or we process the entire descriptor ring
5495 	 * and wrap around upto the start index.
5496 	 */
5497 
5498 	/* need to set the start index of descriptors to be ack'd */
5499 	set_ack_start = B_TRUE;
5500 
5501 	/* index upto which we have ack'd */
5502 	ack_end = start;
5503 	DECR_RXI(ack_end, ldcp);
5504 
5505 	next_rxi = rxi =  start;
5506 	do {
5507 vgen_recv_retry:
5508 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
5509 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
5510 		if (rv != 0) {
5511 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
5512 			    " rv(%d)\n", rv);
5513 			statsp->ierrors++;
5514 			return (rv);
5515 		}
5516 
5517 		hdrp = &rxd.hdr;
5518 
5519 		if (hdrp->dstate != VIO_DESC_READY) {
5520 			/*
5521 			 * Before waiting and retry here, send up
5522 			 * the packets that are received already
5523 			 */
5524 			if (bp != NULL) {
5525 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5526 				vgen_rx(ldcp, bp);
5527 				count = 0;
5528 				bp = bpt = NULL;
5529 			}
5530 			/*
5531 			 * descriptor is not ready.
5532 			 * retry descriptor acquire, stop processing
5533 			 * after max # retries.
5534 			 */
5535 			if (retries == vgen_recv_retries)
5536 				break;
5537 			retries++;
5538 			drv_usecwait(vgen_recv_delay);
5539 			goto vgen_recv_retry;
5540 		}
5541 		retries = 0;
5542 
5543 		if (set_ack_start) {
5544 			/*
5545 			 * initialize the start index of the range
5546 			 * of descriptors to be ack'd.
5547 			 */
5548 			ack_start = rxi;
5549 			set_ack_start = B_FALSE;
5550 		}
5551 
5552 		if ((rxd.nbytes < ETHERMIN) ||
5553 		    (rxd.ncookies == 0) ||
5554 		    (rxd.ncookies > MAX_COOKIES)) {
5555 			rxd_err = B_TRUE;
5556 		} else {
5557 			/*
5558 			 * Try to allocate an mblk from the free pool
5559 			 * of recv mblks for the channel.
5560 			 * If this fails, use allocb().
5561 			 */
5562 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
5563 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
5564 			if (!mp) {
5565 				/*
5566 				 * The data buffer returned by
5567 				 * allocb(9F) is 8byte aligned. We
5568 				 * allocate extra 8 bytes to ensure
5569 				 * size is multiple of 8 bytes for
5570 				 * ldc_mem_copy().
5571 				 */
5572 				statsp->rx_vio_allocb_fail++;
5573 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
5574 				    BPRI_MED);
5575 			}
5576 		}
5577 		if ((rxd_err) || (mp == NULL)) {
5578 			/*
5579 			 * rxd_err or allocb() failure,
5580 			 * drop this packet, get next.
5581 			 */
5582 			if (rxd_err) {
5583 				statsp->ierrors++;
5584 				rxd_err = B_FALSE;
5585 			} else {
5586 				statsp->rx_allocb_fail++;
5587 			}
5588 
5589 			ack_needed = hdrp->ack;
5590 
5591 			/* set descriptor done bit */
5592 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
5593 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
5594 			    VIO_DESC_DONE);
5595 			if (rv != 0) {
5596 				DWARN(vgenp, ldcp,
5597 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
5598 				    rv);
5599 				return (rv);
5600 			}
5601 
5602 			if (ack_needed) {
5603 				ack_needed = B_FALSE;
5604 				/*
5605 				 * sender needs ack for this packet,
5606 				 * ack pkts upto this index.
5607 				 */
5608 				ack_end = rxi;
5609 
5610 				rv = vgen_send_dring_ack(ldcp, tagp,
5611 				    ack_start, ack_end,
5612 				    VIO_DP_ACTIVE);
5613 				if (rv != VGEN_SUCCESS) {
5614 					goto error_ret;
5615 				}
5616 
5617 				/* need to set new ack start index */
5618 				set_ack_start = B_TRUE;
5619 			}
5620 			goto vgen_next_rxi;
5621 		}
5622 
5623 		nread = nbytes;
5624 		rv = ldc_mem_copy(ldcp->ldc_handle,
5625 		    (caddr_t)mp->b_rptr, off, &nread,
5626 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
5627 
5628 		/* if ldc_mem_copy() failed */
5629 		if (rv) {
5630 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
5631 			statsp->ierrors++;
5632 			freemsg(mp);
5633 			goto error_ret;
5634 		}
5635 
5636 		ack_needed = hdrp->ack;
5637 
5638 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
5639 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
5640 		    VIO_DESC_DONE);
5641 		if (rv != 0) {
5642 			DWARN(vgenp, ldcp,
5643 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
5644 			goto error_ret;
5645 		}
5646 
5647 		mp->b_rptr += VNET_IPALIGN;
5648 
5649 		if (ack_needed) {
5650 			ack_needed = B_FALSE;
5651 			/*
5652 			 * sender needs ack for this packet,
5653 			 * ack pkts upto this index.
5654 			 */
5655 			ack_end = rxi;
5656 
5657 			rv = vgen_send_dring_ack(ldcp, tagp,
5658 			    ack_start, ack_end, VIO_DP_ACTIVE);
5659 			if (rv != VGEN_SUCCESS) {
5660 				goto error_ret;
5661 			}
5662 
5663 			/* need to set new ack start index */
5664 			set_ack_start = B_TRUE;
5665 		}
5666 
5667 		if (nread != nbytes) {
5668 			DWARN(vgenp, ldcp,
5669 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
5670 			    nread, nbytes);
5671 			statsp->ierrors++;
5672 			freemsg(mp);
5673 			goto vgen_next_rxi;
5674 		}
5675 
5676 		/* point to the actual end of data */
5677 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
5678 
5679 		/* update stats */
5680 		statsp->ipackets++;
5681 		statsp->rbytes += rxd.nbytes;
5682 		ehp = (struct ether_header *)mp->b_rptr;
5683 		if (IS_BROADCAST(ehp))
5684 			statsp->brdcstrcv++;
5685 		else if (IS_MULTICAST(ehp))
5686 			statsp->multircv++;
5687 
5688 		/* build a chain of received packets */
5689 		if (bp == NULL) {
5690 			/* first pkt */
5691 			bp = mp;
5692 			bpt = bp;
5693 			bpt->b_next = NULL;
5694 		} else {
5695 			mp->b_next = NULL;
5696 			bpt->b_next = mp;
5697 			bpt = mp;
5698 		}
5699 
5700 		if (count++ > vgen_chain_len) {
5701 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5702 			vgen_rx(ldcp, bp);
5703 			count = 0;
5704 			bp = bpt = NULL;
5705 		}
5706 
5707 vgen_next_rxi:
5708 		/* update end index of range of descrs to be ack'd */
5709 		ack_end = rxi;
5710 
5711 		/* update the next index to be processed */
5712 		INCR_RXI(next_rxi, ldcp);
5713 		if (next_rxi == start) {
5714 			/*
5715 			 * processed the entire descriptor ring upto
5716 			 * the index at which we started.
5717 			 */
5718 			break;
5719 		}
5720 
5721 		rxi = next_rxi;
5722 
5723 	_NOTE(CONSTCOND)
5724 	} while (1);
5725 
5726 	/*
5727 	 * send an ack message to peer indicating that we have stopped
5728 	 * processing descriptors.
5729 	 */
5730 	if (set_ack_start) {
5731 		/*
5732 		 * We have ack'd upto some index and we have not
5733 		 * processed any descriptors beyond that index.
5734 		 * Use the last ack'd index as both the start and
5735 		 * end of range of descrs being ack'd.
5736 		 * Note: This results in acking the last index twice
5737 		 * and should be harmless.
5738 		 */
5739 		ack_start = ack_end;
5740 	}
5741 
5742 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
5743 	    VIO_DP_STOPPED);
5744 	if (rv != VGEN_SUCCESS) {
5745 		goto error_ret;
5746 	}
5747 
5748 	/* save new recv index of next dring msg */
5749 	ldcp->next_rxi = next_rxi;
5750 
5751 error_ret:
5752 	/* send up packets received so far */
5753 	if (bp != NULL) {
5754 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5755 		vgen_rx(ldcp, bp);
5756 		bp = bpt = NULL;
5757 	}
5758 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5759 	return (rv);
5760 
5761 }
5762 
5763 static int
5764 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5765 {
5766 	int rv = 0;
5767 	uint32_t start;
5768 	int32_t end;
5769 	uint32_t txi;
5770 	boolean_t ready_txd = B_FALSE;
5771 	vgen_stats_t *statsp;
5772 	vgen_private_desc_t *tbufp;
5773 	vnet_public_desc_t *txdp;
5774 	vio_dring_entry_hdr_t *hdrp;
5775 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5776 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5777 
5778 	DBG1(vgenp, ldcp, "enter\n");
5779 	start = dringmsg->start_idx;
5780 	end = dringmsg->end_idx;
5781 	statsp = &ldcp->stats;
5782 
5783 	/*
5784 	 * received an ack corresponding to a specific descriptor for
5785 	 * which we had set the ACK bit in the descriptor (during
5786 	 * transmit). This enables us to reclaim descriptors.
5787 	 */
5788 
5789 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
5790 
5791 	/* validate start and end indeces in the tx ack msg */
5792 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
5793 		/* drop the message if invalid index */
5794 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
5795 		    start, end);
5796 		return (rv);
5797 	}
5798 	/* validate dring_ident */
5799 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
5800 		/* invalid dring_ident, drop the msg */
5801 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5802 		    dringmsg->dring_ident);
5803 		return (rv);
5804 	}
5805 	statsp->dring_data_acks++;
5806 
5807 	/* reclaim descriptors that are done */
5808 	vgen_reclaim(ldcp);
5809 
5810 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
5811 		/*
5812 		 * receiver continued processing descriptors after
5813 		 * sending us the ack.
5814 		 */
5815 		return (rv);
5816 	}
5817 
5818 	statsp->dring_stopped_acks++;
5819 
5820 	/* receiver stopped processing descriptors */
5821 	mutex_enter(&ldcp->wrlock);
5822 	mutex_enter(&ldcp->tclock);
5823 
5824 	/*
5825 	 * determine if there are any pending tx descriptors
5826 	 * ready to be processed by the receiver(peer) and if so,
5827 	 * send a message to the peer to restart receiving.
5828 	 */
5829 	ready_txd = B_FALSE;
5830 
5831 	/*
5832 	 * using the end index of the descriptor range for which
5833 	 * we received the ack, check if the next descriptor is
5834 	 * ready.
5835 	 */
5836 	txi = end;
5837 	INCR_TXI(txi, ldcp);
5838 	tbufp = &ldcp->tbufp[txi];
5839 	txdp = tbufp->descp;
5840 	hdrp = &txdp->hdr;
5841 	if (hdrp->dstate == VIO_DESC_READY) {
5842 		ready_txd = B_TRUE;
5843 	} else {
5844 		/*
5845 		 * descr next to the end of ack'd descr range is not
5846 		 * ready.
5847 		 * starting from the current reclaim index, check
5848 		 * if any descriptor is ready.
5849 		 */
5850 
5851 		txi = ldcp->cur_tbufp - ldcp->tbufp;
5852 		tbufp = &ldcp->tbufp[txi];
5853 
5854 		txdp = tbufp->descp;
5855 		hdrp = &txdp->hdr;
5856 		if (hdrp->dstate == VIO_DESC_READY) {
5857 			ready_txd = B_TRUE;
5858 		}
5859 
5860 	}
5861 
5862 	if (ready_txd) {
5863 		/*
5864 		 * we have tx descriptor(s) ready to be
5865 		 * processed by the receiver.
5866 		 * send a message to the peer with the start index
5867 		 * of ready descriptors.
5868 		 */
5869 		rv = vgen_send_dring_data(ldcp, txi, -1);
5870 		if (rv != VGEN_SUCCESS) {
5871 			ldcp->resched_peer = B_TRUE;
5872 			ldcp->resched_peer_txi = txi;
5873 			mutex_exit(&ldcp->tclock);
5874 			mutex_exit(&ldcp->wrlock);
5875 			return (rv);
5876 		}
5877 	} else {
5878 		/*
5879 		 * no ready tx descriptors. set the flag to send a
5880 		 * message to peer when tx descriptors are ready in
5881 		 * transmit routine.
5882 		 */
5883 		ldcp->resched_peer = B_TRUE;
5884 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
5885 	}
5886 
5887 	mutex_exit(&ldcp->tclock);
5888 	mutex_exit(&ldcp->wrlock);
5889 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5890 	return (rv);
5891 }
5892 
5893 static int
5894 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5895 {
5896 	int rv = 0;
5897 	uint32_t start;
5898 	int32_t end;
5899 	uint32_t txi;
5900 	vnet_public_desc_t *txdp;
5901 	vio_dring_entry_hdr_t *hdrp;
5902 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5903 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5904 
5905 	DBG1(vgenp, ldcp, "enter\n");
5906 	start = dringmsg->start_idx;
5907 	end = dringmsg->end_idx;
5908 
5909 	/*
5910 	 * peer sent a NACK msg to indicate lost packets.
5911 	 * The start and end correspond to the range of descriptors
5912 	 * for which the peer didn't receive a dring data msg and so
5913 	 * didn't receive the corresponding data.
5914 	 */
5915 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
5916 
5917 	/* validate start and end indeces in the tx nack msg */
5918 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
5919 		/* drop the message if invalid index */
5920 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
5921 		    start, end);
5922 		return (rv);
5923 	}
5924 	/* validate dring_ident */
5925 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
5926 		/* invalid dring_ident, drop the msg */
5927 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5928 		    dringmsg->dring_ident);
5929 		return (rv);
5930 	}
5931 	mutex_enter(&ldcp->txlock);
5932 	mutex_enter(&ldcp->tclock);
5933 
5934 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
5935 		/* no busy descriptors, bogus nack ? */
5936 		mutex_exit(&ldcp->tclock);
5937 		mutex_exit(&ldcp->txlock);
5938 		return (rv);
5939 	}
5940 
5941 	/* we just mark the descrs as done so they can be reclaimed */
5942 	for (txi = start; txi <= end; ) {
5943 		txdp = &(ldcp->txdp[txi]);
5944 		hdrp = &txdp->hdr;
5945 		if (hdrp->dstate == VIO_DESC_READY)
5946 			hdrp->dstate = VIO_DESC_DONE;
5947 		INCR_TXI(txi, ldcp);
5948 	}
5949 	mutex_exit(&ldcp->tclock);
5950 	mutex_exit(&ldcp->txlock);
5951 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5952 	return (rv);
5953 }
5954 
5955 static void
5956 vgen_reclaim(vgen_ldc_t *ldcp)
5957 {
5958 	mutex_enter(&ldcp->tclock);
5959 
5960 	vgen_reclaim_dring(ldcp);
5961 	ldcp->reclaim_lbolt = ddi_get_lbolt();
5962 
5963 	mutex_exit(&ldcp->tclock);
5964 }
5965 
5966 /*
5967  * transmit reclaim function. starting from the current reclaim index
5968  * look for descriptors marked DONE and reclaim the descriptor and the
5969  * corresponding buffers (tbuf).
5970  */
5971 static void
5972 vgen_reclaim_dring(vgen_ldc_t *ldcp)
5973 {
5974 	int count = 0;
5975 	vnet_public_desc_t *txdp;
5976 	vgen_private_desc_t *tbufp;
5977 	vio_dring_entry_hdr_t	*hdrp;
5978 
5979 #ifdef DEBUG
5980 	if (vgen_trigger_txtimeout)
5981 		return;
5982 #endif
5983 
5984 	tbufp = ldcp->cur_tbufp;
5985 	txdp = tbufp->descp;
5986 	hdrp = &txdp->hdr;
5987 
5988 	while ((hdrp->dstate == VIO_DESC_DONE) &&
5989 	    (tbufp != ldcp->next_tbufp)) {
5990 		tbufp->flags = VGEN_PRIV_DESC_FREE;
5991 		hdrp->dstate = VIO_DESC_FREE;
5992 		hdrp->ack = B_FALSE;
5993 
5994 		tbufp = NEXTTBUF(ldcp, tbufp);
5995 		txdp = tbufp->descp;
5996 		hdrp = &txdp->hdr;
5997 		count++;
5998 	}
5999 
6000 	ldcp->cur_tbufp = tbufp;
6001 
6002 	/*
6003 	 * Check if mac layer should be notified to restart transmissions
6004 	 */
6005 	if ((ldcp->need_resched) && (count > 0)) {
6006 		vio_net_tx_update_t vtx_update =
6007 		    ldcp->portp->vcb.vio_net_tx_update;
6008 
6009 		ldcp->need_resched = B_FALSE;
6010 		vtx_update(ldcp->portp->vhp);
6011 	}
6012 }
6013 
6014 /* return the number of pending transmits for the channel */
6015 static int
6016 vgen_num_txpending(vgen_ldc_t *ldcp)
6017 {
6018 	int n;
6019 
6020 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6021 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6022 	} else  {
6023 		/* cur_tbufp > next_tbufp */
6024 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6025 	}
6026 
6027 	return (n);
6028 }
6029 
6030 /* determine if the transmit descriptor ring is full */
6031 static int
6032 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6033 {
6034 	vgen_private_desc_t	*tbufp;
6035 	vgen_private_desc_t	*ntbufp;
6036 
6037 	tbufp = ldcp->next_tbufp;
6038 	ntbufp = NEXTTBUF(ldcp, tbufp);
6039 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6040 		return (VGEN_SUCCESS);
6041 	}
6042 	return (VGEN_FAILURE);
6043 }
6044 
6045 /* determine if timeout condition has occured */
6046 static int
6047 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6048 {
6049 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6050 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6051 	    (vnet_ldcwd_txtimeout) &&
6052 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6053 		return (VGEN_SUCCESS);
6054 	} else {
6055 		return (VGEN_FAILURE);
6056 	}
6057 }
6058 
6059 /* transmit watchdog timeout handler */
6060 static void
6061 vgen_ldc_watchdog(void *arg)
6062 {
6063 	vgen_ldc_t *ldcp;
6064 	vgen_t *vgenp;
6065 	int rv;
6066 
6067 	ldcp = (vgen_ldc_t *)arg;
6068 	vgenp = LDC_TO_VGEN(ldcp);
6069 
6070 	rv = vgen_ldc_txtimeout(ldcp);
6071 	if (rv == VGEN_SUCCESS) {
6072 		DWARN(vgenp, ldcp, "transmit timeout\n");
6073 #ifdef DEBUG
6074 		if (vgen_trigger_txtimeout) {
6075 			/* tx timeout triggered for debugging */
6076 			vgen_trigger_txtimeout = 0;
6077 		}
6078 #endif
6079 		mutex_enter(&ldcp->cblock);
6080 		ldcp->need_ldc_reset = B_TRUE;
6081 		vgen_handshake_retry(ldcp);
6082 		mutex_exit(&ldcp->cblock);
6083 		if (ldcp->need_resched) {
6084 			vio_net_tx_update_t vtx_update =
6085 			    ldcp->portp->vcb.vio_net_tx_update;
6086 
6087 			ldcp->need_resched = B_FALSE;
6088 			vtx_update(ldcp->portp->vhp);
6089 		}
6090 	}
6091 
6092 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6093 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6094 }
6095 
6096 /* handler for error messages received from the peer ldc end-point */
6097 static void
6098 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6099 {
6100 	_NOTE(ARGUNUSED(ldcp, tagp))
6101 }
6102 
6103 static int
6104 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6105 {
6106 	vio_raw_data_msg_t	*rmsg;
6107 	vio_dring_msg_t		*dmsg;
6108 	uint64_t		seq_num;
6109 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6110 
6111 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6112 		dmsg = (vio_dring_msg_t *)tagp;
6113 		seq_num = dmsg->seq_num;
6114 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6115 		rmsg = (vio_raw_data_msg_t *)tagp;
6116 		seq_num = rmsg->seq_num;
6117 	} else {
6118 		return (EINVAL);
6119 	}
6120 
6121 	if (seq_num != ldcp->next_rxseq) {
6122 
6123 		/* seqnums don't match */
6124 		DWARN(vgenp, ldcp,
6125 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6126 		    ldcp->next_rxseq, seq_num);
6127 
6128 		ldcp->need_ldc_reset = B_TRUE;
6129 		return (EINVAL);
6130 
6131 	}
6132 
6133 	ldcp->next_rxseq++;
6134 
6135 	return (0);
6136 }
6137 
6138 /* Check if the session id in the received message is valid */
6139 static int
6140 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6141 {
6142 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6143 
6144 	if (tagp->vio_sid != ldcp->peer_sid) {
6145 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6146 		    ldcp->peer_sid, tagp->vio_sid);
6147 		return (VGEN_FAILURE);
6148 	}
6149 	else
6150 		return (VGEN_SUCCESS);
6151 }
6152 
6153 static caddr_t
6154 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6155 {
6156 	(void) sprintf(ebuf,
6157 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6158 	return (ebuf);
6159 }
6160 
6161 /* Handshake watchdog timeout handler */
6162 static void
6163 vgen_hwatchdog(void *arg)
6164 {
6165 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6166 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6167 
6168 	DWARN(vgenp, ldcp,
6169 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
6170 	    ldcp->hphase, ldcp->hstate);
6171 
6172 	mutex_enter(&ldcp->cblock);
6173 	if (ldcp->cancel_htid) {
6174 		ldcp->cancel_htid = 0;
6175 		mutex_exit(&ldcp->cblock);
6176 		return;
6177 	}
6178 	ldcp->htid = 0;
6179 	ldcp->need_ldc_reset = B_TRUE;
6180 	vgen_handshake_retry(ldcp);
6181 	mutex_exit(&ldcp->cblock);
6182 }
6183 
6184 static void
6185 vgen_print_hparams(vgen_hparams_t *hp)
6186 {
6187 	uint8_t	addr[6];
6188 	char	ea[6];
6189 	ldc_mem_cookie_t *dc;
6190 
6191 	cmn_err(CE_CONT, "version_info:\n");
6192 	cmn_err(CE_CONT,
6193 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6194 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6195 
6196 	vnet_macaddr_ultostr(hp->addr, addr);
6197 	cmn_err(CE_CONT, "attr_info:\n");
6198 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6199 	    vgen_print_ethaddr(addr, ea));
6200 	cmn_err(CE_CONT,
6201 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6202 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6203 
6204 	dc = &hp->dring_cookie;
6205 	cmn_err(CE_CONT, "dring_info:\n");
6206 	cmn_err(CE_CONT,
6207 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6208 	cmn_err(CE_CONT,
6209 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6210 	    dc->addr, dc->size);
6211 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6212 }
6213 
6214 static void
6215 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6216 {
6217 	vgen_hparams_t *hp;
6218 
6219 	cmn_err(CE_CONT, "Channel Information:\n");
6220 	cmn_err(CE_CONT,
6221 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6222 	    ldcp->ldc_id, ldcp->ldc_status);
6223 	cmn_err(CE_CONT,
6224 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6225 	    ldcp->local_sid, ldcp->peer_sid);
6226 	cmn_err(CE_CONT,
6227 	    "\thphase: 0x%x, hstate: 0x%x\n",
6228 	    ldcp->hphase, ldcp->hstate);
6229 
6230 	cmn_err(CE_CONT, "Local handshake params:\n");
6231 	hp = &ldcp->local_hparams;
6232 	vgen_print_hparams(hp);
6233 
6234 	cmn_err(CE_CONT, "Peer handshake params:\n");
6235 	hp = &ldcp->peer_hparams;
6236 	vgen_print_hparams(hp);
6237 }
6238 
6239 /*
6240  * Send received packets up the stack.
6241  */
6242 static void
6243 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
6244 {
6245 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6246 
6247 	if (ldcp->rcv_thread != NULL) {
6248 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
6249 		mutex_exit(&ldcp->rxlock);
6250 	} else {
6251 		ASSERT(MUTEX_HELD(&ldcp->cblock));
6252 		mutex_exit(&ldcp->cblock);
6253 	}
6254 
6255 	vrx_cb(ldcp->portp->vhp, bp);
6256 
6257 	if (ldcp->rcv_thread != NULL) {
6258 		mutex_enter(&ldcp->rxlock);
6259 	} else {
6260 		mutex_enter(&ldcp->cblock);
6261 	}
6262 }
6263 
6264 /*
6265  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
6266  * This thread is woken up by the LDC interrupt handler to process
6267  * LDC packets and receive data.
6268  */
6269 static void
6270 vgen_ldc_rcv_worker(void *arg)
6271 {
6272 	callb_cpr_t	cprinfo;
6273 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6274 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6275 
6276 	DBG1(vgenp, ldcp, "enter\n");
6277 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
6278 	    "vnet_rcv_thread");
6279 	mutex_enter(&ldcp->rcv_thr_lock);
6280 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
6281 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
6282 
6283 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
6284 		/*
6285 		 * Wait until the data is received or a stop
6286 		 * request is received.
6287 		 */
6288 		while (!(ldcp->rcv_thr_flags &
6289 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
6290 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6291 		}
6292 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
6293 
6294 		/*
6295 		 * First process the stop request.
6296 		 */
6297 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
6298 			DBG2(vgenp, ldcp, "stopped\n");
6299 			break;
6300 		}
6301 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
6302 		mutex_exit(&ldcp->rcv_thr_lock);
6303 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
6304 		vgen_handle_evt_read(ldcp);
6305 		mutex_enter(&ldcp->rcv_thr_lock);
6306 	}
6307 
6308 	/*
6309 	 * Update the run status and wakeup the thread that
6310 	 * has sent the stop request.
6311 	 */
6312 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
6313 	cv_signal(&ldcp->rcv_thr_cv);
6314 	CALLB_CPR_EXIT(&cprinfo);
6315 	thread_exit();
6316 	DBG1(vgenp, ldcp, "exit\n");
6317 }
6318 
6319 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
6320 static void
6321 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
6322 {
6323 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6324 
6325 	DBG1(vgenp, ldcp, "enter\n");
6326 	/*
6327 	 * Send a stop request by setting the stop flag and
6328 	 * wait until the receive thread stops.
6329 	 */
6330 	mutex_enter(&ldcp->rcv_thr_lock);
6331 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6332 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
6333 		cv_signal(&ldcp->rcv_thr_cv);
6334 		DBG2(vgenp, ldcp, "waiting...");
6335 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6336 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6337 		}
6338 	}
6339 	mutex_exit(&ldcp->rcv_thr_lock);
6340 	ldcp->rcv_thread = NULL;
6341 	DBG1(vgenp, ldcp, "exit\n");
6342 }
6343 
6344 /*
6345  * vgen_dds_rx -- post DDS messages to vnet.
6346  */
6347 static int
6348 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6349 {
6350 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
6351 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6352 
6353 	if (dmsg->dds_class != DDS_VNET_NIU) {
6354 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
6355 		return (EBADMSG);
6356 	}
6357 	vnet_dds_rx(vgenp->vnetp, dmsg);
6358 	return (0);
6359 }
6360 
6361 /*
6362  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
6363  */
6364 int
6365 vgen_dds_tx(void *arg, void *msg)
6366 {
6367 	vgen_t *vgenp = arg;
6368 	vio_dds_msg_t *dmsg = msg;
6369 	vgen_portlist_t *plistp = &vgenp->vgenports;
6370 	vgen_ldc_t *ldcp;
6371 	vgen_ldclist_t *ldclp;
6372 	int rv = EIO;
6373 
6374 
6375 	READ_ENTER(&plistp->rwlock);
6376 	ldclp = &(vgenp->vsw_portp->ldclist);
6377 	READ_ENTER(&ldclp->rwlock);
6378 	ldcp = ldclp->headp;
6379 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
6380 		goto vgen_dsend_exit;
6381 	}
6382 
6383 	dmsg->tag.vio_sid = ldcp->local_sid;
6384 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
6385 	if (rv != VGEN_SUCCESS) {
6386 		rv = EIO;
6387 	} else {
6388 		rv = 0;
6389 	}
6390 
6391 vgen_dsend_exit:
6392 	RW_EXIT(&ldclp->rwlock);
6393 	RW_EXIT(&plistp->rwlock);
6394 	return (rv);
6395 
6396 }
6397 
6398 #if DEBUG
6399 
6400 /*
6401  * Print debug messages - set to 0xf to enable all msgs
6402  */
6403 static void
6404 debug_printf(const char *fname, vgen_t *vgenp,
6405     vgen_ldc_t *ldcp, const char *fmt, ...)
6406 {
6407 	char    buf[256];
6408 	char    *bufp = buf;
6409 	va_list ap;
6410 
6411 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
6412 		(void) sprintf(bufp, "vnet%d:",
6413 		    ((vnet_t *)(vgenp->vnetp))->instance);
6414 		bufp += strlen(bufp);
6415 	}
6416 	if (ldcp != NULL) {
6417 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
6418 		bufp += strlen(bufp);
6419 	}
6420 	(void) sprintf(bufp, "%s: ", fname);
6421 	bufp += strlen(bufp);
6422 
6423 	va_start(ap, fmt);
6424 	(void) vsprintf(bufp, fmt, ap);
6425 	va_end(ap);
6426 
6427 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
6428 	    (vgendbg_ldcid == ldcp->ldc_id)) {
6429 		cmn_err(CE_CONT, "%s\n", buf);
6430 	}
6431 }
6432 #endif
6433