xref: /titanic_51/usr/src/uts/sun4v/io/vnet_gen.c (revision 5a7763bf3e9db4cfe6cb523b096cb74af71e3793)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/devops.h>
37 #include <sys/ksynch.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsun.h>
45 #include <sys/note.h>
46 #include <sys/mac.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ldc.h>
49 #include <sys/mach_descrip.h>
50 #include <sys/mdeg.h>
51 #include <net/if.h>
52 #include <sys/vnet.h>
53 #include <sys/vio_mailbox.h>
54 #include <sys/vio_common.h>
55 #include <sys/vnet_common.h>
56 #include <sys/vnet_mailbox.h>
57 #include <sys/vio_util.h>
58 #include <sys/vnet_gen.h>
59 #include <sys/atomic.h>
60 #include <sys/callb.h>
61 #include <sys/sdt.h>
62 #include <sys/intr.h>
63 #include <sys/pattr.h>
64 #include <sys/vlan.h>
65 
66 /*
67  * Implementation of the mac functionality for vnet using the
68  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
69  */
70 
71 /*
72  * Function prototypes.
73  */
74 /* vgen proxy entry points */
75 int vgen_init(vnet_t *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
76 	mac_register_t **vgenmacp);
77 int vgen_uninit(void *arg);
78 static int vgen_start(void *arg);
79 static void vgen_stop(void *arg);
80 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
81 static int vgen_multicst(void *arg, boolean_t add,
82 	const uint8_t *mca);
83 static int vgen_promisc(void *arg, boolean_t on);
84 static int vgen_unicst(void *arg, const uint8_t *mca);
85 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
86 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
87 
88 /* externs - functions provided by vnet to add/remove/modify entries in fdb */
89 extern void vnet_fdbe_add(vnet_t *vnetp, struct ether_addr *macaddr,
90 	uint8_t type, mac_tx_t m_tx, void *port);
91 extern void vnet_fdbe_del(vnet_t *vnetp, struct ether_addr *eaddr);
92 extern void vnet_fdbe_modify(vnet_t *vnetp, struct ether_addr *macaddr,
93 	void *portp, boolean_t flag);
94 extern void vnet_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp);
95 extern void vnet_tx_update(void *arg);
96 
97 /* vgen internal functions */
98 static int vgen_read_mdprops(vgen_t *vgenp);
99 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
100 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
101 	mde_cookie_t node);
102 static void vgen_detach_ports(vgen_t *vgenp);
103 static void vgen_port_detach(vgen_port_t *portp);
104 static void vgen_port_list_insert(vgen_port_t *portp);
105 static void vgen_port_list_remove(vgen_port_t *portp);
106 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
107 	int port_num);
108 static int vgen_mdeg_reg(vgen_t *vgenp);
109 static void vgen_mdeg_unreg(vgen_t *vgenp);
110 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
111 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
112 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
113 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
114 	mde_cookie_t mdex);
115 static void vgen_fdbe_modify(vgen_port_t *portp, boolean_t use_vsw_port,
116 	boolean_t flag);
117 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
118 static int vgen_port_attach(vgen_port_t *portp);
119 static void vgen_port_detach_mdeg(vgen_port_t *portp);
120 static void vgen_port_detach_mdeg(vgen_port_t *portp);
121 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
122 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
123 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
124 
125 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
126 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
127 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
128 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
129 static void vgen_init_ports(vgen_t *vgenp);
130 static void vgen_port_init(vgen_port_t *portp);
131 static void vgen_uninit_ports(vgen_t *vgenp);
132 static void vgen_port_uninit(vgen_port_t *portp);
133 static void vgen_init_ldcs(vgen_port_t *portp);
134 static void vgen_uninit_ldcs(vgen_port_t *portp);
135 static int vgen_ldc_init(vgen_ldc_t *ldcp);
136 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
137 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
138 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
139 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
140 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
141 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
142 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
143 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
144 static int vgen_ldcsend(void *arg, mblk_t *mp);
145 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
146 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
147 static void vgen_reclaim(vgen_ldc_t *ldcp);
148 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
149 static int vgen_num_txpending(vgen_ldc_t *ldcp);
150 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
151 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
152 static void vgen_ldc_watchdog(void *arg);
153 
154 /* vgen handshake functions */
155 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
156 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
157 	boolean_t caller_holds_lock);
158 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
159 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
160 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
161 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
162 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
163 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
164 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
165 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
166 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
167 static void vgen_handshake(vgen_ldc_t *ldcp);
168 static int vgen_handshake_done(vgen_ldc_t *ldcp);
169 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
170 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
171 	vio_msg_tag_t *tagp);
172 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
178 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
179 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
182 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
184 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
185 	uint32_t start, int32_t end, uint8_t pstate);
186 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
187 	uint32_t msglen);
188 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
189 static void vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag);
190 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag);
191 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
192 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
193 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
194 static void vgen_hwatchdog(void *arg);
195 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
196 static void vgen_print_hparams(vgen_hparams_t *hp);
197 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
198 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
199 static void vgen_ldc_rcv_worker(void *arg);
200 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
201 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
202 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
203 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
204 
205 /* VLAN routines */
206 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
207 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
208 	uint16_t *nvidsp, uint16_t *default_idp);
209 static void vgen_vlan_create_hash(vgen_port_t *portp);
210 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
211 static void vgen_vlan_add_ids(vgen_port_t *portp);
212 static void vgen_vlan_remove_ids(vgen_port_t *portp);
213 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
214 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
215 	uint16_t *vidp);
216 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
217 	boolean_t is_tagged, uint16_t vid);
218 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
219 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
220 
221 /*
222  * The handshake process consists of 5 phases defined below, with VH_PHASE0
223  * being the pre-handshake phase and VH_DONE is the phase to indicate
224  * successful completion of all phases.
225  * Each phase may have one to several handshake states which are required
226  * to complete successfully to move to the next phase.
227  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
228  * more details.
229  */
230 /* handshake phases */
231 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
232 
233 /* handshake states */
234 enum {
235 
236 	VER_INFO_SENT	=	0x1,
237 	VER_ACK_RCVD	=	0x2,
238 	VER_INFO_RCVD	=	0x4,
239 	VER_ACK_SENT	=	0x8,
240 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
241 
242 	ATTR_INFO_SENT	=	0x10,
243 	ATTR_ACK_RCVD	=	0x20,
244 	ATTR_INFO_RCVD	=	0x40,
245 	ATTR_ACK_SENT	=	0x80,
246 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
247 
248 	DRING_INFO_SENT	=	0x100,
249 	DRING_ACK_RCVD	=	0x200,
250 	DRING_INFO_RCVD	=	0x400,
251 	DRING_ACK_SENT	=	0x800,
252 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
253 
254 	RDX_INFO_SENT	=	0x1000,
255 	RDX_ACK_RCVD	=	0x2000,
256 	RDX_INFO_RCVD	=	0x4000,
257 	RDX_ACK_SENT	=	0x8000,
258 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
259 
260 };
261 
262 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
263 
264 #define	LDC_LOCK(ldcp)	\
265 				mutex_enter(&((ldcp)->cblock));\
266 				mutex_enter(&((ldcp)->rxlock));\
267 				mutex_enter(&((ldcp)->wrlock));\
268 				mutex_enter(&((ldcp)->txlock));\
269 				mutex_enter(&((ldcp)->tclock));
270 #define	LDC_UNLOCK(ldcp)	\
271 				mutex_exit(&((ldcp)->tclock));\
272 				mutex_exit(&((ldcp)->txlock));\
273 				mutex_exit(&((ldcp)->wrlock));\
274 				mutex_exit(&((ldcp)->rxlock));\
275 				mutex_exit(&((ldcp)->cblock));
276 
277 #define	VGEN_VER_EQ(ldcp, major, minor)	\
278 	((ldcp)->local_hparams.ver_major == (major) &&	\
279 	    (ldcp)->local_hparams.ver_minor == (minor))
280 
281 #define	VGEN_VER_LT(ldcp, major, minor)	\
282 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
283 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
284 	    (ldcp)->local_hparams.ver_minor < (minor)))
285 
286 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
287 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
288 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
289 	    (ldcp)->local_hparams.ver_minor >= (minor)))
290 
291 static struct ether_addr etherbroadcastaddr = {
292 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
293 };
294 /*
295  * MIB II broadcast/multicast packets
296  */
297 #define	IS_BROADCAST(ehp) \
298 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
299 #define	IS_MULTICAST(ehp) \
300 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
301 
302 /*
303  * Property names
304  */
305 static char macaddr_propname[] = "mac-address";
306 static char rmacaddr_propname[] = "remote-mac-address";
307 static char channel_propname[] = "channel-endpoint";
308 static char reg_propname[] = "reg";
309 static char port_propname[] = "port";
310 static char swport_propname[] = "switch-port";
311 static char id_propname[] = "id";
312 static char vdev_propname[] = "virtual-device";
313 static char vnet_propname[] = "network";
314 static char pri_types_propname[] = "priority-ether-types";
315 static char vgen_pvid_propname[] = "port-vlan-id";
316 static char vgen_vid_propname[] = "vlan-id";
317 static char vgen_dvid_propname[] = "default-vlan-id";
318 static char port_pvid_propname[] = "remote-port-vlan-id";
319 static char port_vid_propname[] = "remote-vlan-id";
320 
321 /* versions supported - in decreasing order */
322 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 3} };
323 
324 /* Tunables */
325 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
326 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
327 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
328 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
329 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
330 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
331 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
332 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
333 
334 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
335 
336 /*
337  * max # of packets accumulated prior to sending them up. It is best
338  * to keep this at 60% of the number of recieve buffers.
339  */
340 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
341 
342 /*
343  * Tunables for each receive buffer size and number of buffers for
344  * each buffer size.
345  */
346 uint32_t vgen_rbufsz1 = VGEN_DBLK_SZ_128;
347 uint32_t vgen_rbufsz2 = VGEN_DBLK_SZ_256;
348 uint32_t vgen_rbufsz3 = VGEN_DBLK_SZ_2048;
349 
350 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
351 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
352 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
353 
354 /*
355  * In the absence of "priority-ether-types" property in MD, the following
356  * internal tunable can be set to specify a single priority ethertype.
357  */
358 uint64_t vgen_pri_eth_type = 0;
359 
360 /*
361  * Number of transmit priority buffers that are preallocated per device.
362  * This number is chosen to be a small value to throttle transmission
363  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
364  */
365 uint32_t vgen_pri_tx_nmblks = 64;
366 
367 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
368 
369 #ifdef DEBUG
370 /* flags to simulate error conditions for debugging */
371 int vgen_trigger_txtimeout = 0;
372 int vgen_trigger_rxlost = 0;
373 #endif
374 
375 /*
376  * Matching criteria passed to the MDEG to register interest
377  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
378  * by their 'name' and 'cfg-handle' properties.
379  */
380 static md_prop_match_t vdev_prop_match[] = {
381 	{ MDET_PROP_STR,    "name"   },
382 	{ MDET_PROP_VAL,    "cfg-handle" },
383 	{ MDET_LIST_END,    NULL    }
384 };
385 
386 static mdeg_node_match_t vdev_match = { "virtual-device",
387 						vdev_prop_match };
388 
389 /* MD update matching structure */
390 static md_prop_match_t	vport_prop_match[] = {
391 	{ MDET_PROP_VAL,	"id" },
392 	{ MDET_LIST_END,	NULL }
393 };
394 
395 static mdeg_node_match_t vport_match = { "virtual-device-port",
396 					vport_prop_match };
397 
398 /* template for matching a particular vnet instance */
399 static mdeg_prop_spec_t vgen_prop_template[] = {
400 	{ MDET_PROP_STR,	"name",		"network" },
401 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
402 	{ MDET_LIST_END,	NULL,		NULL }
403 };
404 
405 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
406 
407 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
408 
409 static mac_callbacks_t vgen_m_callbacks = {
410 	0,
411 	vgen_stat,
412 	vgen_start,
413 	vgen_stop,
414 	vgen_promisc,
415 	vgen_multicst,
416 	vgen_unicst,
417 	vgen_tx,
418 	NULL,
419 	NULL,
420 	NULL
421 };
422 
423 /* externs */
424 extern pri_t	maxclsyspri;
425 extern proc_t	p0;
426 extern uint32_t vnet_ntxds;
427 extern uint32_t vnet_ldcwd_interval;
428 extern uint32_t vnet_ldcwd_txtimeout;
429 extern uint32_t vnet_ldc_mtu;
430 extern uint32_t vnet_nrbufs;
431 extern uint32_t	vnet_ethermtu;
432 extern uint16_t	vnet_default_vlan_id;
433 
434 #ifdef DEBUG
435 
436 extern int vnet_dbglevel;
437 static void debug_printf(const char *fname, vgen_t *vgenp,
438 	vgen_ldc_t *ldcp, const char *fmt, ...);
439 
440 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
441 int vgendbg_ldcid = -1;
442 
443 /* simulate handshake error conditions for debug */
444 uint32_t vgen_hdbg;
445 #define	HDBG_VERSION	0x1
446 #define	HDBG_TIMEOUT	0x2
447 #define	HDBG_BAD_SID	0x4
448 #define	HDBG_OUT_STATE	0x8
449 
450 #endif
451 
452 /*
453  * vgen_init() is called by an instance of vnet driver to initialize the
454  * corresponding generic proxy transport layer. The arguments passed by vnet
455  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
456  * the mac address of the vnet device, and a pointer to mac_register_t of
457  * the generic transport is returned in the last argument.
458  */
459 int
460 vgen_init(vnet_t *vnetp, dev_info_t *vnetdip, const uint8_t *macaddr,
461     mac_register_t **vgenmacp)
462 {
463 	vgen_t *vgenp;
464 	mac_register_t *macp;
465 	int instance;
466 	int rv;
467 
468 	if ((vnetp == NULL) || (vnetdip == NULL))
469 		return (DDI_FAILURE);
470 
471 	instance = ddi_get_instance(vnetdip);
472 
473 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
474 
475 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
476 
477 	vgenp->vnetp = vnetp;
478 	vgenp->vnetdip = vnetdip;
479 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
480 
481 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
482 		KMEM_FREE(vgenp);
483 		return (DDI_FAILURE);
484 	}
485 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
486 	macp->m_driver = vgenp;
487 	macp->m_dip = vnetdip;
488 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
489 	macp->m_callbacks = &vgen_m_callbacks;
490 	macp->m_min_sdu = 0;
491 	macp->m_max_sdu = vnet_ethermtu;
492 	vgenp->macp = macp;
493 
494 	/* allocate multicast table */
495 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
496 	    sizeof (struct ether_addr), KM_SLEEP);
497 	vgenp->mccount = 0;
498 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
499 	vgenp->max_frame_size = vnet_ethermtu + sizeof (struct ether_header)
500 	    + VLAN_TAGSZ;
501 
502 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
503 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
504 
505 	rv = vgen_read_mdprops(vgenp);
506 	if (rv != 0) {
507 		goto vgen_init_fail;
508 	}
509 
510 	/* register with MD event generator */
511 	rv = vgen_mdeg_reg(vgenp);
512 	if (rv != DDI_SUCCESS) {
513 		goto vgen_init_fail;
514 	}
515 
516 	/* register macp of this vgen_t with vnet */
517 	*vgenmacp = vgenp->macp;
518 
519 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
520 	return (DDI_SUCCESS);
521 
522 vgen_init_fail:
523 	rw_destroy(&vgenp->vgenports.rwlock);
524 	mutex_destroy(&vgenp->lock);
525 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
526 	    sizeof (struct ether_addr));
527 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
528 		kmem_free(vgenp->pri_types,
529 		    sizeof (uint16_t) * vgenp->pri_num_types);
530 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
531 	}
532 	mac_free(vgenp->macp);
533 	KMEM_FREE(vgenp);
534 	return (DDI_FAILURE);
535 }
536 
537 /*
538  * Called by vnet to undo the initializations done by vgen_init().
539  * The handle provided by generic transport during vgen_init() is the argument.
540  */
541 int
542 vgen_uninit(void *arg)
543 {
544 	vgen_t		*vgenp = (vgen_t *)arg;
545 	vio_mblk_pool_t	*rp;
546 	vio_mblk_pool_t	*nrp;
547 
548 	if (vgenp == NULL) {
549 		return (DDI_FAILURE);
550 	}
551 
552 	DBG1(vgenp, NULL, "enter\n");
553 
554 	/* unregister with MD event generator */
555 	vgen_mdeg_unreg(vgenp);
556 
557 	mutex_enter(&vgenp->lock);
558 
559 	/* detach all ports from the device */
560 	vgen_detach_ports(vgenp);
561 
562 	/*
563 	 * free any pending rx mblk pools,
564 	 * that couldn't be freed previously during channel detach.
565 	 */
566 	rp = vgenp->rmp;
567 	while (rp != NULL) {
568 		nrp = vgenp->rmp = rp->nextp;
569 		if (vio_destroy_mblks(rp)) {
570 			vgenp->rmp = rp;
571 			mutex_exit(&vgenp->lock);
572 			return (DDI_FAILURE);
573 		}
574 		rp = nrp;
575 	}
576 
577 	/* free multicast table */
578 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
579 
580 	/* free pri_types table */
581 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
582 		kmem_free(vgenp->pri_types,
583 		    sizeof (uint16_t) * vgenp->pri_num_types);
584 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
585 	}
586 
587 	mac_free(vgenp->macp);
588 
589 	mutex_exit(&vgenp->lock);
590 
591 	rw_destroy(&vgenp->vgenports.rwlock);
592 	mutex_destroy(&vgenp->lock);
593 
594 	KMEM_FREE(vgenp);
595 
596 	DBG1(vgenp, NULL, "exit\n");
597 
598 	return (DDI_SUCCESS);
599 }
600 
601 /* enable transmit/receive for the device */
602 int
603 vgen_start(void *arg)
604 {
605 	vgen_t		*vgenp = (vgen_t *)arg;
606 
607 	DBG1(vgenp, NULL, "enter\n");
608 
609 	mutex_enter(&vgenp->lock);
610 	vgen_init_ports(vgenp);
611 	vgenp->flags |= VGEN_STARTED;
612 	mutex_exit(&vgenp->lock);
613 
614 	DBG1(vgenp, NULL, "exit\n");
615 	return (DDI_SUCCESS);
616 }
617 
618 /* stop transmit/receive */
619 void
620 vgen_stop(void *arg)
621 {
622 	vgen_t		*vgenp = (vgen_t *)arg;
623 
624 	DBG1(vgenp, NULL, "enter\n");
625 
626 	mutex_enter(&vgenp->lock);
627 	vgen_uninit_ports(vgenp);
628 	vgenp->flags &= ~(VGEN_STARTED);
629 	mutex_exit(&vgenp->lock);
630 
631 	DBG1(vgenp, NULL, "exit\n");
632 }
633 
634 /* vgen transmit function */
635 static mblk_t *
636 vgen_tx(void *arg, mblk_t *mp)
637 {
638 	int i;
639 	vgen_port_t *portp;
640 	int status = VGEN_FAILURE;
641 
642 	portp = (vgen_port_t *)arg;
643 	/*
644 	 * Retry so that we avoid reporting a failure
645 	 * to the upper layer. Returning a failure may cause the
646 	 * upper layer to go into single threaded mode there by
647 	 * causing performance degradation, especially for a large
648 	 * number of connections.
649 	 */
650 	for (i = 0; i < vgen_tx_retries; ) {
651 		status = vgen_portsend(portp, mp);
652 		if (status == VGEN_SUCCESS) {
653 			break;
654 		}
655 		if (++i < vgen_tx_retries)
656 			delay(drv_usectohz(vgen_tx_delay));
657 	}
658 	if (status != VGEN_SUCCESS) {
659 		/* failure */
660 		return (mp);
661 	}
662 	/* success */
663 	return (NULL);
664 }
665 
666 /*
667  * This function provides any necessary tagging/untagging of the frames
668  * that are being transmitted over the port. It first verifies the vlan
669  * membership of the destination(port) and drops the packet if the
670  * destination doesn't belong to the given vlan.
671  *
672  * Arguments:
673  *   portp:     port over which the frames should be transmitted
674  *   mp:        frame to be transmitted
675  *   is_tagged:
676  *              B_TRUE: indicates frame header contains the vlan tag already.
677  *              B_FALSE: indicates frame is untagged.
678  *   vid:       vlan in which the frame should be transmitted.
679  *
680  * Returns:
681  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
682  *              Failure: NULL
683  */
684 static mblk_t *
685 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
686 	uint16_t vid)
687 {
688 	vgen_t				*vgenp;
689 	boolean_t			dst_tagged;
690 	int				rv;
691 
692 	vgenp = portp->vgenp;
693 
694 	/*
695 	 * If the packet is going to a vnet:
696 	 *   Check if the destination vnet is in the same vlan.
697 	 *   Check the frame header if tag or untag is needed.
698 	 *
699 	 * We do not check the above conditions if the packet is going to vsw:
700 	 *   vsw must be present implicitly in all the vlans that a vnet device
701 	 *   is configured into; even if vsw itself is not assigned to those
702 	 *   vlans as an interface. For instance, the packet might be destined
703 	 *   to another vnet(indirectly through vsw) or to an external host
704 	 *   which is in the same vlan as this vnet and vsw itself may not be
705 	 *   present in that vlan. Similarly packets going to vsw must be
706 	 *   always tagged(unless in the default-vlan) if not already tagged,
707 	 *   as we do not know the final destination. This is needed because
708 	 *   vsw must always invoke its switching function only after tagging
709 	 *   the packet; otherwise after switching function determines the
710 	 *   destination we cannot figure out if the destination belongs to the
711 	 *   the same vlan that the frame originated from and if it needs tag/
712 	 *   untag. Note that vsw will tag the packet itself when it receives
713 	 *   it over the channel from a client if needed. However, that is
714 	 *   needed only in the case of vlan unaware clients such as obp or
715 	 *   earlier versions of vnet.
716 	 *
717 	 */
718 	if (portp != vgenp->vsw_portp) {
719 		/*
720 		 * Packet going to a vnet. Check if the destination vnet is in
721 		 * the same vlan. Then check the frame header if tag/untag is
722 		 * needed.
723 		 */
724 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
725 		if (rv == B_FALSE) {
726 			/* drop the packet */
727 			freemsg(mp);
728 			return (NULL);
729 		}
730 
731 		/* is the destination tagged or untagged in this vlan? */
732 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
733 		    (dst_tagged = B_TRUE);
734 
735 		if (is_tagged == dst_tagged) {
736 			/* no tagging/untagging needed */
737 			return (mp);
738 		}
739 
740 		if (is_tagged == B_TRUE) {
741 			/* frame is tagged; destination needs untagged */
742 			mp = vnet_vlan_remove_tag(mp);
743 			return (mp);
744 		}
745 
746 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
747 	}
748 
749 	/*
750 	 * Packet going to a vnet needs tagging.
751 	 * OR
752 	 * If the packet is going to vsw, then it must be tagged in all cases:
753 	 * unknown unicast, broadcast/multicast or to vsw interface.
754 	 */
755 
756 	if (is_tagged == B_FALSE) {
757 		mp = vnet_vlan_insert_tag(mp, vid);
758 	}
759 
760 	return (mp);
761 }
762 
763 /* transmit packets over the given port */
764 static int
765 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
766 {
767 	vgen_ldclist_t		*ldclp;
768 	vgen_ldc_t		*ldcp;
769 	int			status;
770 	int			rv = VGEN_SUCCESS;
771 	vgen_t			*vgenp;
772 	vnet_t			*vnetp;
773 	boolean_t		is_tagged;
774 	uint16_t		vlan_id;
775 	struct ether_header	*ehp;
776 
777 	vgenp = portp->vgenp;
778 	vnetp = vgenp->vnetp;
779 
780 	/*
781 	 * Determine the vlan id that the frame belongs to.
782 	 */
783 	ehp = (struct ether_header *)mp->b_rptr;
784 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
785 
786 	if (vlan_id == vnetp->default_vlan_id) {
787 
788 		/* Frames in default vlan must be untagged */
789 		ASSERT(is_tagged == B_FALSE);
790 
791 		/*
792 		 * If the destination is a vnet-port verify it belongs to the
793 		 * default vlan; otherwise drop the packet. We do not need
794 		 * this check for vsw-port, as it should implicitly belong to
795 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
796 		 */
797 		if (portp != vgenp->vsw_portp &&
798 		    portp->pvid != vnetp->default_vlan_id) {
799 			freemsg(mp);
800 			return (VGEN_SUCCESS);
801 		}
802 
803 	} else {	/* frame not in default-vlan */
804 
805 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
806 		if (mp == NULL) {
807 			return (VGEN_SUCCESS);
808 		}
809 
810 	}
811 
812 	ldclp = &portp->ldclist;
813 	READ_ENTER(&ldclp->rwlock);
814 	/*
815 	 * NOTE: for now, we will assume we have a single channel.
816 	 */
817 	if (ldclp->headp == NULL) {
818 		RW_EXIT(&ldclp->rwlock);
819 		return (VGEN_FAILURE);
820 	}
821 	ldcp = ldclp->headp;
822 
823 	status = ldcp->tx(ldcp, mp);
824 
825 	RW_EXIT(&ldclp->rwlock);
826 
827 	if (status != VGEN_TX_SUCCESS) {
828 		rv = VGEN_FAILURE;
829 	}
830 	return (rv);
831 }
832 
833 /*
834  * Wrapper function to transmit normal and/or priority frames over the channel.
835  */
836 static int
837 vgen_ldcsend(void *arg, mblk_t *mp)
838 {
839 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
840 	int			status;
841 	struct ether_header	*ehp;
842 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
843 	uint32_t		num_types;
844 	uint16_t		*types;
845 	int			i;
846 
847 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
848 
849 	num_types = vgenp->pri_num_types;
850 	types = vgenp->pri_types;
851 	ehp = (struct ether_header *)mp->b_rptr;
852 
853 	for (i = 0; i < num_types; i++) {
854 
855 		if (ehp->ether_type == types[i]) {
856 			/* priority frame, use pri tx function */
857 			vgen_ldcsend_pkt(ldcp, mp);
858 			return (VGEN_SUCCESS);
859 		}
860 
861 	}
862 
863 	status  = vgen_ldcsend_dring(ldcp, mp);
864 
865 	return (status);
866 }
867 
868 /*
869  * This functions handles ldc channel reset while in the context
870  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
871  */
872 static void
873 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
874 {
875 	ldc_status_t	istatus;
876 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
877 
878 	/*
879 	 * Calling mutex_enter() will result in a deadlock, if the other thread
880 	 * already holds cblock and is waiting for all references on the fdbe
881 	 * to be dropped in vnet_fdbe_modify() which is called from
882 	 * vgen_handle_evt_reset(). This transmit thread holds a reference to
883 	 * that fdb entry and will not drop its reference unless it gets cblock
884 	 * here, completes processing and returns.
885 	 * To avoid this race condition, we check if either callback thread
886 	 * or another tx thread is already holding cblock, if so just return
887 	 * and the thread which already holds it will handle the reset.
888 	 */
889 	if (mutex_tryenter(&ldcp->cblock)) {
890 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
891 			DWARN(vgenp, ldcp, "ldc_status() error\n");
892 		} else {
893 			ldcp->ldc_status = istatus;
894 		}
895 		if (ldcp->ldc_status != LDC_UP) {
896 			/*
897 			 * Second arg is TRUE, as we know that
898 			 * the caller of this function - vnet_m_tx(),
899 			 * already has a ref on the fdb entry.
900 			 */
901 			vgen_handle_evt_reset(ldcp, B_TRUE);
902 		}
903 		mutex_exit(&ldcp->cblock);
904 	}
905 }
906 
907 /*
908  * This function transmits the frame in the payload of a raw data
909  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
910  * send special frames with high priorities, without going through
911  * the normal data path which uses descriptor ring mechanism.
912  */
913 static void
914 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
915 {
916 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
917 	vio_raw_data_msg_t	*pkt;
918 	mblk_t			*bp;
919 	mblk_t			*nmp = NULL;
920 	caddr_t			dst;
921 	uint32_t		mblksz;
922 	uint32_t		size;
923 	uint32_t		nbytes;
924 	int			rv;
925 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
926 	vgen_stats_t		*statsp = &ldcp->stats;
927 
928 	/* drop the packet if ldc is not up or handshake is not done */
929 	if (ldcp->ldc_status != LDC_UP) {
930 		(void) atomic_inc_32(&statsp->tx_pri_fail);
931 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
932 		    ldcp->ldc_status);
933 		goto send_pkt_exit;
934 	}
935 
936 	if (ldcp->hphase != VH_DONE) {
937 		(void) atomic_inc_32(&statsp->tx_pri_fail);
938 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
939 		    ldcp->hphase);
940 		goto send_pkt_exit;
941 	}
942 
943 	size = msgsize(mp);
944 
945 	/* frame size bigger than available payload len of raw data msg ? */
946 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
947 		(void) atomic_inc_32(&statsp->tx_pri_fail);
948 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
949 		goto send_pkt_exit;
950 	}
951 
952 	if (size < ETHERMIN)
953 		size = ETHERMIN;
954 
955 	/* alloc space for a raw data message */
956 	nmp = vio_allocb(vgenp->pri_tx_vmp);
957 	if (nmp == NULL) {
958 		(void) atomic_inc_32(&statsp->tx_pri_fail);
959 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
960 		goto send_pkt_exit;
961 	}
962 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
963 
964 	/* copy frame into the payload of raw data message */
965 	dst = (caddr_t)pkt->data;
966 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
967 		mblksz = MBLKL(bp);
968 		bcopy(bp->b_rptr, dst, mblksz);
969 		dst += mblksz;
970 	}
971 
972 	/* setup the raw data msg */
973 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
974 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
975 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
976 	pkt->tag.vio_sid = ldcp->local_sid;
977 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
978 
979 	/* send the msg over ldc */
980 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
981 	if (rv != VGEN_SUCCESS) {
982 		(void) atomic_inc_32(&statsp->tx_pri_fail);
983 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
984 		if (rv == ECONNRESET) {
985 			vgen_ldcsend_process_reset(ldcp);
986 		}
987 		goto send_pkt_exit;
988 	}
989 
990 	/* update stats */
991 	(void) atomic_inc_64(&statsp->tx_pri_packets);
992 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
993 
994 send_pkt_exit:
995 	if (nmp != NULL)
996 		freemsg(nmp);
997 	freemsg(mp);
998 }
999 
1000 /*
1001  * This function transmits normal (non-priority) data frames over
1002  * the channel. It queues the frame into the transmit descriptor ring
1003  * and sends a VIO_DRING_DATA message if needed, to wake up the
1004  * peer to (re)start processing.
1005  */
1006 static int
1007 vgen_ldcsend_dring(void *arg, mblk_t *mp)
1008 {
1009 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
1010 	vgen_private_desc_t	*tbufp;
1011 	vgen_private_desc_t	*rtbufp;
1012 	vnet_public_desc_t	*rtxdp;
1013 	vgen_private_desc_t	*ntbufp;
1014 	vnet_public_desc_t	*txdp;
1015 	vio_dring_entry_hdr_t	*hdrp;
1016 	vgen_stats_t		*statsp;
1017 	struct ether_header	*ehp;
1018 	boolean_t		is_bcast = B_FALSE;
1019 	boolean_t		is_mcast = B_FALSE;
1020 	size_t			mblksz;
1021 	caddr_t			dst;
1022 	mblk_t			*bp;
1023 	size_t			size;
1024 	int			rv = 0;
1025 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1026 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1027 
1028 	statsp = &ldcp->stats;
1029 	size = msgsize(mp);
1030 
1031 	DBG1(vgenp, ldcp, "enter\n");
1032 
1033 	if (ldcp->ldc_status != LDC_UP) {
1034 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1035 		    ldcp->ldc_status);
1036 		/* retry ldc_up() if needed */
1037 		if (ldcp->flags & CHANNEL_STARTED)
1038 			(void) ldc_up(ldcp->ldc_handle);
1039 		goto send_dring_exit;
1040 	}
1041 
1042 	/* drop the packet if ldc is not up or handshake is not done */
1043 	if (ldcp->hphase != VH_DONE) {
1044 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1045 		    ldcp->hphase);
1046 		goto send_dring_exit;
1047 	}
1048 
1049 	if (size > (size_t)lp->mtu) {
1050 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1051 		goto send_dring_exit;
1052 	}
1053 	if (size < ETHERMIN)
1054 		size = ETHERMIN;
1055 
1056 	ehp = (struct ether_header *)mp->b_rptr;
1057 	is_bcast = IS_BROADCAST(ehp);
1058 	is_mcast = IS_MULTICAST(ehp);
1059 
1060 	mutex_enter(&ldcp->txlock);
1061 	/*
1062 	 * allocate a descriptor
1063 	 */
1064 	tbufp = ldcp->next_tbufp;
1065 	ntbufp = NEXTTBUF(ldcp, tbufp);
1066 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1067 
1068 		mutex_enter(&ldcp->tclock);
1069 		/* Try reclaiming now */
1070 		vgen_reclaim_dring(ldcp);
1071 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1072 
1073 		if (ntbufp == ldcp->cur_tbufp) {
1074 			/* Now we are really out of tbuf/txds */
1075 			ldcp->need_resched = B_TRUE;
1076 			mutex_exit(&ldcp->tclock);
1077 
1078 			statsp->tx_no_desc++;
1079 			mutex_exit(&ldcp->txlock);
1080 
1081 			return (VGEN_TX_NORESOURCES);
1082 		}
1083 		mutex_exit(&ldcp->tclock);
1084 	}
1085 	/* update next available tbuf in the ring and update tx index */
1086 	ldcp->next_tbufp = ntbufp;
1087 	INCR_TXI(ldcp->next_txi, ldcp);
1088 
1089 	/* Mark the buffer busy before releasing the lock */
1090 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1091 	mutex_exit(&ldcp->txlock);
1092 
1093 	/* copy data into pre-allocated transmit buffer */
1094 	dst = tbufp->datap + VNET_IPALIGN;
1095 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1096 		mblksz = MBLKL(bp);
1097 		bcopy(bp->b_rptr, dst, mblksz);
1098 		dst += mblksz;
1099 	}
1100 
1101 	tbufp->datalen = size;
1102 
1103 	/* initialize the corresponding public descriptor (txd) */
1104 	txdp = tbufp->descp;
1105 	hdrp = &txdp->hdr;
1106 	txdp->nbytes = size;
1107 	txdp->ncookies = tbufp->ncookies;
1108 	bcopy((tbufp->memcookie), (txdp->memcookie),
1109 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1110 
1111 	mutex_enter(&ldcp->wrlock);
1112 	/*
1113 	 * If the flags not set to BUSY, it implies that the clobber
1114 	 * was done while we were copying the data. In such case,
1115 	 * discard the packet and return.
1116 	 */
1117 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1118 		statsp->oerrors++;
1119 		mutex_exit(&ldcp->wrlock);
1120 		goto send_dring_exit;
1121 	}
1122 	hdrp->dstate = VIO_DESC_READY;
1123 
1124 	/* update stats */
1125 	statsp->opackets++;
1126 	statsp->obytes += size;
1127 	if (is_bcast)
1128 		statsp->brdcstxmt++;
1129 	else if (is_mcast)
1130 		statsp->multixmt++;
1131 
1132 	/* send dring datamsg to the peer */
1133 	if (ldcp->resched_peer) {
1134 
1135 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1136 		rtxdp = rtbufp->descp;
1137 
1138 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1139 
1140 			rv = vgen_send_dring_data(ldcp,
1141 			    (uint32_t)ldcp->resched_peer_txi, -1);
1142 			if (rv != 0) {
1143 				/* error: drop the packet */
1144 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1145 				    "failed: rv(%d) len(%d)\n",
1146 				    ldcp->ldc_id, rv, size);
1147 				statsp->oerrors++;
1148 			} else {
1149 				ldcp->resched_peer = B_FALSE;
1150 			}
1151 
1152 		}
1153 
1154 	}
1155 
1156 	mutex_exit(&ldcp->wrlock);
1157 
1158 send_dring_exit:
1159 	if (rv == ECONNRESET) {
1160 		vgen_ldcsend_process_reset(ldcp);
1161 	}
1162 	freemsg(mp);
1163 	DBG1(vgenp, ldcp, "exit\n");
1164 	return (VGEN_TX_SUCCESS);
1165 }
1166 
1167 /* enable/disable a multicast address */
1168 int
1169 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1170 {
1171 	vgen_t			*vgenp;
1172 	vnet_mcast_msg_t	mcastmsg;
1173 	vio_msg_tag_t		*tagp;
1174 	vgen_port_t		*portp;
1175 	vgen_portlist_t		*plistp;
1176 	vgen_ldc_t		*ldcp;
1177 	vgen_ldclist_t		*ldclp;
1178 	struct ether_addr	*addrp;
1179 	int			rv = DDI_FAILURE;
1180 	uint32_t		i;
1181 
1182 	vgenp = (vgen_t *)arg;
1183 	addrp = (struct ether_addr *)mca;
1184 	tagp = &mcastmsg.tag;
1185 	bzero(&mcastmsg, sizeof (mcastmsg));
1186 
1187 	mutex_enter(&vgenp->lock);
1188 
1189 	plistp = &(vgenp->vgenports);
1190 
1191 	READ_ENTER(&plistp->rwlock);
1192 
1193 	portp = vgenp->vsw_portp;
1194 	if (portp == NULL) {
1195 		RW_EXIT(&plistp->rwlock);
1196 		mutex_exit(&vgenp->lock);
1197 		return (rv);
1198 	}
1199 	ldclp = &portp->ldclist;
1200 
1201 	READ_ENTER(&ldclp->rwlock);
1202 
1203 	ldcp = ldclp->headp;
1204 	if (ldcp == NULL)
1205 		goto vgen_mcast_exit;
1206 
1207 	mutex_enter(&ldcp->cblock);
1208 
1209 	if (ldcp->hphase == VH_DONE) {
1210 		/*
1211 		 * If handshake is done, send a msg to vsw to add/remove
1212 		 * the multicast address. Otherwise, we just update this
1213 		 * mcast address in our table and the table will be sync'd
1214 		 * with vsw when handshake completes.
1215 		 */
1216 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1217 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1218 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1219 		tagp->vio_sid = ldcp->local_sid;
1220 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1221 		mcastmsg.set = add;
1222 		mcastmsg.count = 1;
1223 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1224 		    B_FALSE) != VGEN_SUCCESS) {
1225 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1226 			mutex_exit(&ldcp->cblock);
1227 			goto vgen_mcast_exit;
1228 		}
1229 	}
1230 
1231 	mutex_exit(&ldcp->cblock);
1232 
1233 	if (add) {
1234 
1235 		/* expand multicast table if necessary */
1236 		if (vgenp->mccount >= vgenp->mcsize) {
1237 			struct ether_addr	*newtab;
1238 			uint32_t		newsize;
1239 
1240 
1241 			newsize = vgenp->mcsize * 2;
1242 
1243 			newtab = kmem_zalloc(newsize *
1244 			    sizeof (struct ether_addr), KM_NOSLEEP);
1245 			if (newtab == NULL)
1246 				goto vgen_mcast_exit;
1247 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1248 			    sizeof (struct ether_addr));
1249 			kmem_free(vgenp->mctab,
1250 			    vgenp->mcsize * sizeof (struct ether_addr));
1251 
1252 			vgenp->mctab = newtab;
1253 			vgenp->mcsize = newsize;
1254 		}
1255 
1256 		/* add address to the table */
1257 		vgenp->mctab[vgenp->mccount++] = *addrp;
1258 
1259 	} else {
1260 
1261 		/* delete address from the table */
1262 		for (i = 0; i < vgenp->mccount; i++) {
1263 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1264 
1265 				/*
1266 				 * If there's more than one address in this
1267 				 * table, delete the unwanted one by moving
1268 				 * the last one in the list over top of it;
1269 				 * otherwise, just remove it.
1270 				 */
1271 				if (vgenp->mccount > 1) {
1272 					vgenp->mctab[i] =
1273 					    vgenp->mctab[vgenp->mccount-1];
1274 				}
1275 				vgenp->mccount--;
1276 				break;
1277 			}
1278 		}
1279 	}
1280 
1281 	rv = DDI_SUCCESS;
1282 
1283 vgen_mcast_exit:
1284 	RW_EXIT(&ldclp->rwlock);
1285 	RW_EXIT(&plistp->rwlock);
1286 
1287 	mutex_exit(&vgenp->lock);
1288 	return (rv);
1289 }
1290 
1291 /* set or clear promiscuous mode on the device */
1292 static int
1293 vgen_promisc(void *arg, boolean_t on)
1294 {
1295 	_NOTE(ARGUNUSED(arg, on))
1296 	return (DDI_SUCCESS);
1297 }
1298 
1299 /* set the unicast mac address of the device */
1300 static int
1301 vgen_unicst(void *arg, const uint8_t *mca)
1302 {
1303 	_NOTE(ARGUNUSED(arg, mca))
1304 	return (DDI_SUCCESS);
1305 }
1306 
1307 /* get device statistics */
1308 int
1309 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1310 {
1311 	vgen_t		*vgenp = (vgen_t *)arg;
1312 	vgen_port_t	*portp;
1313 	vgen_portlist_t	*plistp;
1314 
1315 	*val = 0;
1316 
1317 	plistp = &(vgenp->vgenports);
1318 	READ_ENTER(&plistp->rwlock);
1319 
1320 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1321 		*val += vgen_port_stat(portp, stat);
1322 	}
1323 
1324 	RW_EXIT(&plistp->rwlock);
1325 
1326 	return (0);
1327 }
1328 
1329 static void
1330 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1331 {
1332 	 _NOTE(ARGUNUSED(arg, wq, mp))
1333 }
1334 
1335 /* vgen internal functions */
1336 /* detach all ports from the device */
1337 static void
1338 vgen_detach_ports(vgen_t *vgenp)
1339 {
1340 	vgen_port_t	*portp;
1341 	vgen_portlist_t	*plistp;
1342 
1343 	plistp = &(vgenp->vgenports);
1344 	WRITE_ENTER(&plistp->rwlock);
1345 
1346 	while ((portp = plistp->headp) != NULL) {
1347 		vgen_port_detach(portp);
1348 	}
1349 
1350 	RW_EXIT(&plistp->rwlock);
1351 }
1352 
1353 /*
1354  * detach the given port.
1355  */
1356 static void
1357 vgen_port_detach(vgen_port_t *portp)
1358 {
1359 	vgen_t		*vgenp;
1360 	vgen_ldclist_t	*ldclp;
1361 	int		port_num;
1362 
1363 	vgenp = portp->vgenp;
1364 	port_num = portp->port_num;
1365 
1366 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1367 
1368 	vgen_vlan_destroy_hash(portp);
1369 
1370 	/* remove it from port list */
1371 	vgen_port_list_remove(portp);
1372 
1373 	/* detach channels from this port */
1374 	ldclp = &portp->ldclist;
1375 	WRITE_ENTER(&ldclp->rwlock);
1376 	while (ldclp->headp) {
1377 		vgen_ldc_detach(ldclp->headp);
1378 	}
1379 	RW_EXIT(&ldclp->rwlock);
1380 	rw_destroy(&ldclp->rwlock);
1381 
1382 	if (portp->num_ldcs != 0) {
1383 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1384 		portp->num_ldcs = 0;
1385 	}
1386 
1387 	if (vgenp->vsw_portp == portp) {
1388 		vgenp->vsw_portp = NULL;
1389 	}
1390 	KMEM_FREE(portp);
1391 
1392 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1393 }
1394 
1395 /* add a port to port list */
1396 static void
1397 vgen_port_list_insert(vgen_port_t *portp)
1398 {
1399 	vgen_portlist_t *plistp;
1400 	vgen_t *vgenp;
1401 
1402 	vgenp = portp->vgenp;
1403 	plistp = &(vgenp->vgenports);
1404 
1405 	if (plistp->headp == NULL) {
1406 		plistp->headp = portp;
1407 	} else {
1408 		plistp->tailp->nextp = portp;
1409 	}
1410 	plistp->tailp = portp;
1411 	portp->nextp = NULL;
1412 }
1413 
1414 /* remove a port from port list */
1415 static void
1416 vgen_port_list_remove(vgen_port_t *portp)
1417 {
1418 	vgen_port_t *prevp;
1419 	vgen_port_t *nextp;
1420 	vgen_portlist_t *plistp;
1421 	vgen_t *vgenp;
1422 
1423 	vgenp = portp->vgenp;
1424 
1425 	plistp = &(vgenp->vgenports);
1426 
1427 	if (plistp->headp == NULL)
1428 		return;
1429 
1430 	if (portp == plistp->headp) {
1431 		plistp->headp = portp->nextp;
1432 		if (portp == plistp->tailp)
1433 			plistp->tailp = plistp->headp;
1434 	} else {
1435 		for (prevp = plistp->headp;
1436 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1437 		    prevp = nextp)
1438 			;
1439 		if (nextp == portp) {
1440 			prevp->nextp = portp->nextp;
1441 		}
1442 		if (portp == plistp->tailp)
1443 			plistp->tailp = prevp;
1444 	}
1445 }
1446 
1447 /* lookup a port in the list based on port_num */
1448 static vgen_port_t *
1449 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1450 {
1451 	vgen_port_t *portp = NULL;
1452 
1453 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1454 		if (portp->port_num == port_num) {
1455 			break;
1456 		}
1457 	}
1458 
1459 	return (portp);
1460 }
1461 
1462 /* enable ports for transmit/receive */
1463 static void
1464 vgen_init_ports(vgen_t *vgenp)
1465 {
1466 	vgen_port_t	*portp;
1467 	vgen_portlist_t	*plistp;
1468 
1469 	plistp = &(vgenp->vgenports);
1470 	READ_ENTER(&plistp->rwlock);
1471 
1472 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1473 		vgen_port_init(portp);
1474 	}
1475 
1476 	RW_EXIT(&plistp->rwlock);
1477 }
1478 
1479 static void
1480 vgen_port_init(vgen_port_t *portp)
1481 {
1482 	vgen_t		*vgenp = portp->vgenp;
1483 	vgen_port_t	*tx_portp;
1484 	int		type;
1485 
1486 	/*
1487 	 * Add the mac address of the port into the fdb of the vnet device.
1488 	 *
1489 	 * If the port being added is a vnet-port:
1490 	 * In this case the tx_port arg specified to vnet_fdbe_add() is
1491 	 * vsw-port. This is done so that vsw-port acts as the route to reach
1492 	 * the macaddr corresponding to this port, until the channel for this
1493 	 * port comes up (LDC_UP) and handshake is done successfully. eg, if
1494 	 * the peer is OBP-vnet, it may not bring the channel up for this port
1495 	 * and may communicate via vsw to reach this port. Later, when
1496 	 * Solaris-vnet comes up at the other end of the channel for this port
1497 	 * and brings up the channel, it is an indication that peer vnet is
1498 	 * capable of distributed switching, so the direct route through this
1499 	 * port is specified in fdb (see func vgen_fdbe_modify()).
1500 	 */
1501 	if (portp == vgenp->vsw_portp) {
1502 		type = VNET_VSWPORT;
1503 	} else {
1504 		type = VNET_VNETPORT;
1505 	}
1506 	tx_portp = vgenp->vsw_portp;
1507 
1508 	/* Add entry for the port's mac address into fdb */
1509 	vnet_fdbe_add(vgenp->vnetp, &portp->macaddr, type, vgen_tx, tx_portp);
1510 
1511 	/* Add the port to the specified vlans */
1512 	vgen_vlan_add_ids(portp);
1513 
1514 	/* Bring up the channels of this port */
1515 	vgen_init_ldcs(portp);
1516 }
1517 
1518 /* disable transmit/receive on ports */
1519 static void
1520 vgen_uninit_ports(vgen_t *vgenp)
1521 {
1522 	vgen_port_t	*portp;
1523 	vgen_portlist_t	*plistp;
1524 
1525 	plistp = &(vgenp->vgenports);
1526 	READ_ENTER(&plistp->rwlock);
1527 
1528 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1529 		vgen_port_uninit(portp);
1530 	}
1531 
1532 	RW_EXIT(&plistp->rwlock);
1533 }
1534 
1535 static void
1536 vgen_port_uninit(vgen_port_t *portp)
1537 {
1538 	vgen_t	*vgenp = portp->vgenp;
1539 
1540 	vgen_uninit_ldcs(portp);
1541 
1542 	/* delete the entry in vnet's fdb for this macaddr/port */
1543 	vnet_fdbe_del(vgenp->vnetp, &portp->macaddr);
1544 
1545 	/* remove the port from vlans it has been assigned to */
1546 	vgen_vlan_remove_ids(portp);
1547 }
1548 
1549 /*
1550  * Scan the machine description for this instance of vnet
1551  * and read its properties. Called only from vgen_init().
1552  * Returns: 0 on success, 1 on failure.
1553  */
1554 static int
1555 vgen_read_mdprops(vgen_t *vgenp)
1556 {
1557 	vnet_t		*vnetp = vgenp->vnetp;
1558 	md_t		*mdp = NULL;
1559 	mde_cookie_t	rootnode;
1560 	mde_cookie_t	*listp = NULL;
1561 	uint64_t	inst;
1562 	uint64_t	cfgh;
1563 	char		*name;
1564 	int		rv = 1;
1565 	int		num_nodes = 0;
1566 	int		num_devs = 0;
1567 	int		listsz = 0;
1568 	int		i;
1569 
1570 	/*
1571 	 * In each 'virtual-device' node in the MD there is a
1572 	 * 'cfg-handle' property which is the MD's concept of
1573 	 * an instance number (this may be completely different from
1574 	 * the device drivers instance #). OBP reads that value and
1575 	 * stores it in the 'reg' property of the appropriate node in
1576 	 * the device tree. We first read this reg property and use this
1577 	 * to compare against the 'cfg-handle' property of vnet nodes
1578 	 * in MD to get to this specific vnet instance and then read
1579 	 * other properties that we are interested in.
1580 	 * We also cache the value of 'reg' property and use it later
1581 	 * to register callbacks with mdeg (see vgen_mdeg_reg())
1582 	 */
1583 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
1584 	    DDI_PROP_DONTPASS, reg_propname, -1);
1585 	if (inst == -1) {
1586 		return (rv);
1587 	}
1588 
1589 	vgenp->regprop = inst;
1590 
1591 	if ((mdp = md_get_handle()) == NULL) {
1592 		return (rv);
1593 	}
1594 
1595 	num_nodes = md_node_count(mdp);
1596 	ASSERT(num_nodes > 0);
1597 
1598 	listsz = num_nodes * sizeof (mde_cookie_t);
1599 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1600 
1601 	rootnode = md_root_node(mdp);
1602 
1603 	/* search for all "virtual_device" nodes */
1604 	num_devs = md_scan_dag(mdp, rootnode,
1605 	    md_find_name(mdp, vdev_propname),
1606 	    md_find_name(mdp, "fwd"), listp);
1607 	if (num_devs <= 0) {
1608 		goto vgen_readmd_exit;
1609 	}
1610 
1611 	/*
1612 	 * Now loop through the list of virtual-devices looking for
1613 	 * devices with name "network" and for each such device compare
1614 	 * its instance with what we have from the 'reg' property to
1615 	 * find the right node in MD and then read all its properties.
1616 	 */
1617 	for (i = 0; i < num_devs; i++) {
1618 
1619 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1620 			goto vgen_readmd_exit;
1621 		}
1622 
1623 		/* is this a "network" device? */
1624 		if (strcmp(name, vnet_propname) != 0)
1625 			continue;
1626 
1627 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1628 			goto vgen_readmd_exit;
1629 		}
1630 
1631 		/* is this the required instance of vnet? */
1632 		if (inst != cfgh)
1633 			continue;
1634 
1635 		/* now read all properties of this vnet instance */
1636 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1637 
1638 		/* read vlan id properties of this vnet instance */
1639 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1640 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1641 		    &vnetp->default_vlan_id);
1642 
1643 		rv = 0;
1644 		break;
1645 	}
1646 
1647 vgen_readmd_exit:
1648 
1649 	kmem_free(listp, listsz);
1650 	(void) md_fini_handle(mdp);
1651 	return (rv);
1652 }
1653 
1654 /*
1655  * Read vlan id properties of the given MD node.
1656  * Arguments:
1657  *   arg:          device argument(vnet device or a port)
1658  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1659  *   mdp:          machine description
1660  *   node:         md node cookie
1661  *
1662  * Returns:
1663  *   pvidp:        port-vlan-id of the node
1664  *   vidspp:       list of vlan-ids of the node
1665  *   nvidsp:       # of vlan-ids in the list
1666  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1667  */
1668 static void
1669 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1670 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1671 	uint16_t *default_idp)
1672 {
1673 	vgen_t		*vgenp;
1674 	vnet_t		*vnetp;
1675 	vgen_port_t	*portp;
1676 	char		*pvid_propname;
1677 	char		*vid_propname;
1678 	uint_t		nvids;
1679 	uint32_t	vids_size;
1680 	int		rv;
1681 	int		i;
1682 	uint64_t	*data;
1683 	uint64_t	val;
1684 	int		size;
1685 	int		inst;
1686 
1687 	if (type == VGEN_LOCAL) {
1688 
1689 		vgenp = (vgen_t *)arg;
1690 		vnetp = vgenp->vnetp;
1691 		pvid_propname = vgen_pvid_propname;
1692 		vid_propname = vgen_vid_propname;
1693 		inst = vnetp->instance;
1694 
1695 	} else if (type == VGEN_PEER) {
1696 
1697 		portp = (vgen_port_t *)arg;
1698 		vgenp = portp->vgenp;
1699 		vnetp = vgenp->vnetp;
1700 		pvid_propname = port_pvid_propname;
1701 		vid_propname = port_vid_propname;
1702 		inst = portp->port_num;
1703 
1704 	} else {
1705 		return;
1706 	}
1707 
1708 	if (type == VGEN_LOCAL && default_idp != NULL) {
1709 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1710 		if (rv != 0) {
1711 			DWARN(vgenp, NULL, "prop(%s) not found",
1712 			    vgen_dvid_propname);
1713 
1714 			*default_idp = vnet_default_vlan_id;
1715 		} else {
1716 			*default_idp = val & 0xFFF;
1717 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1718 			    inst, *default_idp);
1719 		}
1720 	}
1721 
1722 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1723 	if (rv != 0) {
1724 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1725 		*pvidp = vnet_default_vlan_id;
1726 	} else {
1727 
1728 		*pvidp = val & 0xFFF;
1729 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1730 		    pvid_propname, inst, *pvidp);
1731 	}
1732 
1733 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1734 	    &size);
1735 	if (rv != 0) {
1736 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1737 		size = 0;
1738 	} else {
1739 		size /= sizeof (uint64_t);
1740 	}
1741 	nvids = size;
1742 
1743 	if (nvids != 0) {
1744 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1745 		vids_size = sizeof (uint16_t) * nvids;
1746 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1747 		for (i = 0; i < nvids; i++) {
1748 			(*vidspp)[i] = data[i] & 0xFFFF;
1749 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1750 		}
1751 		DBG2(vgenp, NULL, "\n");
1752 	}
1753 
1754 	*nvidsp = nvids;
1755 }
1756 
1757 /*
1758  * Create a vlan id hash table for the given port.
1759  */
1760 static void
1761 vgen_vlan_create_hash(vgen_port_t *portp)
1762 {
1763 	char		hashname[MAXNAMELEN];
1764 
1765 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1766 	    portp->port_num);
1767 
1768 	portp->vlan_nchains = vgen_vlan_nchains;
1769 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1770 	    portp->vlan_nchains, mod_hash_null_valdtor);
1771 }
1772 
1773 /*
1774  * Destroy the vlan id hash table in the given port.
1775  */
1776 static void
1777 vgen_vlan_destroy_hash(vgen_port_t *portp)
1778 {
1779 	if (portp->vlan_hashp != NULL) {
1780 		mod_hash_destroy_hash(portp->vlan_hashp);
1781 		portp->vlan_hashp = NULL;
1782 		portp->vlan_nchains = 0;
1783 	}
1784 }
1785 
1786 /*
1787  * Add a port to the vlans specified in its port properites.
1788  */
1789 static void
1790 vgen_vlan_add_ids(vgen_port_t *portp)
1791 {
1792 	int		rv;
1793 	int		i;
1794 
1795 	rv = mod_hash_insert(portp->vlan_hashp,
1796 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1797 	    (mod_hash_val_t)B_TRUE);
1798 	ASSERT(rv == 0);
1799 
1800 	for (i = 0; i < portp->nvids; i++) {
1801 		rv = mod_hash_insert(portp->vlan_hashp,
1802 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1803 		    (mod_hash_val_t)B_TRUE);
1804 		ASSERT(rv == 0);
1805 	}
1806 }
1807 
1808 /*
1809  * Remove a port from the vlans it has been assigned to.
1810  */
1811 static void
1812 vgen_vlan_remove_ids(vgen_port_t *portp)
1813 {
1814 	int		rv;
1815 	int		i;
1816 	mod_hash_val_t	vp;
1817 
1818 	rv = mod_hash_remove(portp->vlan_hashp,
1819 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1820 	    (mod_hash_val_t *)&vp);
1821 	ASSERT(rv == 0);
1822 
1823 	for (i = 0; i < portp->nvids; i++) {
1824 		rv = mod_hash_remove(portp->vlan_hashp,
1825 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1826 		    (mod_hash_val_t *)&vp);
1827 		ASSERT(rv == 0);
1828 	}
1829 }
1830 
1831 /*
1832  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1833  * then the vlan-id is available in the tag; otherwise, its vlan id is
1834  * implicitly obtained from the port-vlan-id of the vnet device.
1835  * The vlan id determined is returned in vidp.
1836  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1837  */
1838 static boolean_t
1839 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1840 {
1841 	struct ether_vlan_header	*evhp;
1842 
1843 	/* If it's a tagged frame, get the vlan id from vlan header */
1844 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1845 
1846 		evhp = (struct ether_vlan_header *)ehp;
1847 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1848 		return (B_TRUE);
1849 	}
1850 
1851 	/* Untagged frame, vlan-id is the pvid of vnet device */
1852 	*vidp = vnetp->pvid;
1853 	return (B_FALSE);
1854 }
1855 
1856 /*
1857  * Find the given vlan id in the hash table.
1858  * Return: B_TRUE if the id is found; B_FALSE if not found.
1859  */
1860 static boolean_t
1861 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1862 {
1863 	int		rv;
1864 	mod_hash_val_t	vp;
1865 
1866 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1867 
1868 	if (rv != 0)
1869 		return (B_FALSE);
1870 
1871 	return (B_TRUE);
1872 }
1873 
1874 /*
1875  * Modify fdb entries corresponding to the port's macaddr, to use a different
1876  * port. This is done when the port's ldc channel goes down or comes up. When
1877  * the channel state changes to RESET/DOWN, we modify the fdb entry for the
1878  * port by specifying vsw-port as the port to be used for transmits.
1879  * Similarly when the channel state changes to UP, we restore its fdb entry to
1880  * start using the actual vnet-port for transmits.
1881  *
1882  * Arguments:
1883  *
1884  *   portp:          port for which fdb entry is being updated
1885  *
1886  *   use_vsw_port:
1887  *                   B_TRUE:  update fdb entry to use vsw-port for transmits
1888  *                   B_FALSE: update fdb entry to use the port itself for tx
1889  *
1890  *   flag:	     provides context info
1891  *                   B_TRUE:  this func is being called from transmit routine
1892  *                   B_FALSE: other contexts (callbacks)
1893  */
1894 static void
1895 vgen_fdbe_modify(vgen_port_t *portp, boolean_t use_vsw_port,
1896 	boolean_t flag)
1897 {
1898 	vgen_t		*vgenp = portp->vgenp;
1899 	vnet_t		*vnetp = vgenp->vnetp;
1900 	vgen_port_t	*pp;
1901 
1902 	(use_vsw_port == B_TRUE) ? (pp = vgenp->vsw_portp) : (pp = portp);
1903 
1904 	vnet_fdbe_modify(vnetp, &portp->macaddr, pp, flag);
1905 }
1906 
1907 /*
1908  * This function reads "priority-ether-types" property from md. This property
1909  * is used to enable support for priority frames. Applications which need
1910  * guaranteed and timely delivery of certain high priority frames to/from
1911  * a vnet or vsw within ldoms, should configure this property by providing
1912  * the ether type(s) for which the priority facility is needed.
1913  * Normal data frames are delivered over a ldc channel using the descriptor
1914  * ring mechanism which is constrained by factors such as descriptor ring size,
1915  * the rate at which the ring is processed at the peer ldc end point, etc.
1916  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1917  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1918  * descriptor ring path and enables a more reliable and timely delivery of
1919  * frames to the peer.
1920  */
1921 static void
1922 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1923 {
1924 	int		rv;
1925 	uint16_t	*types;
1926 	uint64_t	*data;
1927 	int		size;
1928 	int		i;
1929 	size_t		mblk_sz;
1930 
1931 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1932 	    (uint8_t **)&data, &size);
1933 	if (rv != 0) {
1934 		/*
1935 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1936 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1937 		 */
1938 		if (vgen_pri_eth_type != 0) {
1939 			size = sizeof (vgen_pri_eth_type);
1940 			data = &vgen_pri_eth_type;
1941 		} else {
1942 			DWARN(vgenp, NULL,
1943 			    "prop(%s) not found", pri_types_propname);
1944 			size = 0;
1945 		}
1946 	}
1947 
1948 	if (size == 0) {
1949 		vgenp->pri_num_types = 0;
1950 		return;
1951 	}
1952 
1953 	/*
1954 	 * we have some priority-ether-types defined;
1955 	 * allocate a table of these types and also
1956 	 * allocate a pool of mblks to transmit these
1957 	 * priority packets.
1958 	 */
1959 	size /= sizeof (uint64_t);
1960 	vgenp->pri_num_types = size;
1961 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1962 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1963 		types[i] = data[i] & 0xFFFF;
1964 	}
1965 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1966 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1967 	    &vgenp->pri_tx_vmp);
1968 }
1969 
1970 /* register with MD event generator */
1971 static int
1972 vgen_mdeg_reg(vgen_t *vgenp)
1973 {
1974 	mdeg_prop_spec_t	*pspecp;
1975 	mdeg_node_spec_t	*parentp;
1976 	uint_t			templatesz;
1977 	int			rv;
1978 	mdeg_handle_t		dev_hdl = NULL;
1979 	mdeg_handle_t		port_hdl = NULL;
1980 
1981 	templatesz = sizeof (vgen_prop_template);
1982 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1983 	if (pspecp == NULL) {
1984 		return (DDI_FAILURE);
1985 	}
1986 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1987 	if (parentp == NULL) {
1988 		kmem_free(pspecp, templatesz);
1989 		return (DDI_FAILURE);
1990 	}
1991 
1992 	bcopy(vgen_prop_template, pspecp, templatesz);
1993 
1994 	/*
1995 	 * NOTE: The instance here refers to the value of "reg" property and
1996 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1997 	 */
1998 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1999 
2000 	parentp->namep = "virtual-device";
2001 	parentp->specp = pspecp;
2002 
2003 	/* save parentp in vgen_t */
2004 	vgenp->mdeg_parentp = parentp;
2005 
2006 	/*
2007 	 * Register an interest in 'virtual-device' nodes with a
2008 	 * 'name' property of 'network'
2009 	 */
2010 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
2011 	if (rv != MDEG_SUCCESS) {
2012 		DERR(vgenp, NULL, "mdeg_register failed\n");
2013 		goto mdeg_reg_fail;
2014 	}
2015 
2016 	/* Register an interest in 'port' nodes */
2017 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
2018 	    &port_hdl);
2019 	if (rv != MDEG_SUCCESS) {
2020 		DERR(vgenp, NULL, "mdeg_register failed\n");
2021 		goto mdeg_reg_fail;
2022 	}
2023 
2024 	/* save mdeg handle in vgen_t */
2025 	vgenp->mdeg_dev_hdl = dev_hdl;
2026 	vgenp->mdeg_port_hdl = port_hdl;
2027 
2028 	return (DDI_SUCCESS);
2029 
2030 mdeg_reg_fail:
2031 	if (dev_hdl != NULL) {
2032 		(void) mdeg_unregister(dev_hdl);
2033 	}
2034 	KMEM_FREE(parentp);
2035 	kmem_free(pspecp, templatesz);
2036 	vgenp->mdeg_parentp = NULL;
2037 	return (DDI_FAILURE);
2038 }
2039 
2040 /* unregister with MD event generator */
2041 static void
2042 vgen_mdeg_unreg(vgen_t *vgenp)
2043 {
2044 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
2045 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
2046 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
2047 	KMEM_FREE(vgenp->mdeg_parentp);
2048 	vgenp->mdeg_parentp = NULL;
2049 	vgenp->mdeg_dev_hdl = NULL;
2050 	vgenp->mdeg_port_hdl = NULL;
2051 }
2052 
2053 /* mdeg callback function for the port node */
2054 static int
2055 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
2056 {
2057 	int idx;
2058 	int vsw_idx = -1;
2059 	uint64_t val;
2060 	vgen_t *vgenp;
2061 
2062 	if ((resp == NULL) || (cb_argp == NULL)) {
2063 		return (MDEG_FAILURE);
2064 	}
2065 
2066 	vgenp = (vgen_t *)cb_argp;
2067 	DBG1(vgenp, NULL, "enter\n");
2068 
2069 	mutex_enter(&vgenp->lock);
2070 
2071 	DBG1(vgenp, NULL, "ports: removed(%x), "
2072 	"added(%x), updated(%x)\n", resp->removed.nelem,
2073 	    resp->added.nelem, resp->match_curr.nelem);
2074 
2075 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2076 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2077 		    resp->removed.mdep[idx]);
2078 	}
2079 
2080 	if (vgenp->vsw_portp == NULL) {
2081 		/*
2082 		 * find vsw_port and add it first, because other ports need
2083 		 * this when adding fdb entry (see vgen_port_init()).
2084 		 */
2085 		for (idx = 0; idx < resp->added.nelem; idx++) {
2086 			if (!(md_get_prop_val(resp->added.mdp,
2087 			    resp->added.mdep[idx], swport_propname, &val))) {
2088 				if (val == 0) {
2089 					/*
2090 					 * This port is connected to the
2091 					 * vsw on service domain.
2092 					 */
2093 					vsw_idx = idx;
2094 					if (vgen_add_port(vgenp,
2095 					    resp->added.mdp,
2096 					    resp->added.mdep[idx]) !=
2097 					    DDI_SUCCESS) {
2098 						cmn_err(CE_NOTE, "vnet%d Could "
2099 						    "not initialize virtual "
2100 						    "switch port.",
2101 						    ddi_get_instance(vgenp->
2102 						    vnetdip));
2103 						mutex_exit(&vgenp->lock);
2104 						return (MDEG_FAILURE);
2105 					}
2106 					break;
2107 				}
2108 			}
2109 		}
2110 		if (vsw_idx == -1) {
2111 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2112 			mutex_exit(&vgenp->lock);
2113 			return (MDEG_FAILURE);
2114 		}
2115 	}
2116 
2117 	for (idx = 0; idx < resp->added.nelem; idx++) {
2118 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2119 			continue;
2120 
2121 		/* If this port can't be added just skip it. */
2122 		(void) vgen_add_port(vgenp, resp->added.mdp,
2123 		    resp->added.mdep[idx]);
2124 	}
2125 
2126 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2127 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2128 		    resp->match_curr.mdep[idx],
2129 		    resp->match_prev.mdp,
2130 		    resp->match_prev.mdep[idx]);
2131 	}
2132 
2133 	mutex_exit(&vgenp->lock);
2134 	DBG1(vgenp, NULL, "exit\n");
2135 	return (MDEG_SUCCESS);
2136 }
2137 
2138 /* mdeg callback function for the vnet node */
2139 static int
2140 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2141 {
2142 	vgen_t		*vgenp;
2143 	vnet_t		*vnetp;
2144 	md_t		*mdp;
2145 	mde_cookie_t	node;
2146 	uint64_t	inst;
2147 	char		*node_name = NULL;
2148 
2149 	if ((resp == NULL) || (cb_argp == NULL)) {
2150 		return (MDEG_FAILURE);
2151 	}
2152 
2153 	vgenp = (vgen_t *)cb_argp;
2154 	vnetp = vgenp->vnetp;
2155 
2156 	DBG1(vgenp, NULL, "%s: added %d : removed %d : curr matched %d"
2157 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2158 	    resp->match_curr.nelem, resp->match_prev.nelem);
2159 
2160 	mutex_enter(&vgenp->lock);
2161 
2162 	/*
2163 	 * We get an initial callback for this node as 'added' after
2164 	 * registering with mdeg. Note that we would have already gathered
2165 	 * information about this vnet node by walking MD earlier during attach
2166 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2167 	 * of this node might have changed when we get this initial 'added'
2168 	 * callback. We handle this as if an update occured and invoke the same
2169 	 * function which handles updates to the properties of this vnet-node
2170 	 * if any. A non-zero 'match' value indicates that the MD has been
2171 	 * updated and that a 'network' node is present which may or may not
2172 	 * have been updated. It is up to the clients to examine their own
2173 	 * nodes and determine if they have changed.
2174 	 */
2175 	if (resp->added.nelem != 0) {
2176 
2177 		if (resp->added.nelem != 1) {
2178 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2179 			    "invalid: %d\n", vnetp->instance,
2180 			    resp->added.nelem);
2181 			goto vgen_mdeg_cb_err;
2182 		}
2183 
2184 		mdp = resp->added.mdp;
2185 		node = resp->added.mdep[0];
2186 
2187 	} else if (resp->match_curr.nelem != 0) {
2188 
2189 		if (resp->match_curr.nelem != 1) {
2190 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2191 			    "invalid: %d\n", vnetp->instance,
2192 			    resp->match_curr.nelem);
2193 			goto vgen_mdeg_cb_err;
2194 		}
2195 
2196 		mdp = resp->match_curr.mdp;
2197 		node = resp->match_curr.mdep[0];
2198 
2199 	} else {
2200 		goto vgen_mdeg_cb_err;
2201 	}
2202 
2203 	/* Validate name and instance */
2204 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2205 		DERR(vgenp, NULL, "unable to get node name\n");
2206 		goto vgen_mdeg_cb_err;
2207 	}
2208 
2209 	/* is this a virtual-network device? */
2210 	if (strcmp(node_name, vnet_propname) != 0) {
2211 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2212 		goto vgen_mdeg_cb_err;
2213 	}
2214 
2215 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2216 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2217 		goto vgen_mdeg_cb_err;
2218 	}
2219 
2220 	/* is this the right instance of vsw? */
2221 	if (inst != vgenp->regprop) {
2222 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2223 		goto vgen_mdeg_cb_err;
2224 	}
2225 
2226 	vgen_update_md_prop(vgenp, mdp, node);
2227 
2228 	mutex_exit(&vgenp->lock);
2229 	return (MDEG_SUCCESS);
2230 
2231 vgen_mdeg_cb_err:
2232 	mutex_exit(&vgenp->lock);
2233 	return (MDEG_FAILURE);
2234 }
2235 
2236 /*
2237  * Check to see if the relevant properties in the specified node have
2238  * changed, and if so take the appropriate action.
2239  */
2240 static void
2241 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2242 {
2243 	uint16_t	pvid;
2244 	uint16_t	*vids;
2245 	uint16_t	nvids;
2246 	vnet_t		*vnetp = vgenp->vnetp;
2247 	boolean_t	updated_vlans = B_FALSE;
2248 
2249 	/* Read the vlan ids */
2250 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2251 	    &nvids, NULL);
2252 
2253 	/* Determine if there are any vlan id updates */
2254 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2255 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2256 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2257 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2258 		updated_vlans = B_TRUE;
2259 	}
2260 
2261 	if (updated_vlans == B_FALSE) {
2262 		if (nvids != 0) {
2263 			kmem_free(vids, sizeof (uint16_t) * nvids);
2264 		}
2265 		return;
2266 	}
2267 
2268 	/* save the new vlan ids */
2269 	vnetp->pvid = pvid;
2270 	if (vnetp->nvids != 0) {
2271 		kmem_free(vnetp->vids, sizeof (uint16_t) * vnetp->nvids);
2272 		vnetp->nvids = 0;
2273 	}
2274 	if (nvids != 0) {
2275 		vnetp->nvids = nvids;
2276 		vnetp->vids = vids;
2277 	}
2278 
2279 	/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2280 	vgen_reset_vlan_unaware_ports(vgenp);
2281 }
2282 
2283 /* add a new port to the device */
2284 static int
2285 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2286 {
2287 	vgen_port_t	*portp;
2288 	int		rv;
2289 
2290 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2291 
2292 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2293 	if (rv != DDI_SUCCESS) {
2294 		KMEM_FREE(portp);
2295 		return (DDI_FAILURE);
2296 	}
2297 
2298 	rv = vgen_port_attach(portp);
2299 	if (rv != DDI_SUCCESS) {
2300 		return (DDI_FAILURE);
2301 	}
2302 
2303 	return (DDI_SUCCESS);
2304 }
2305 
2306 /* read properties of the port from its md node */
2307 static int
2308 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2309 	mde_cookie_t mdex)
2310 {
2311 	uint64_t		port_num;
2312 	uint64_t		*ldc_ids;
2313 	uint64_t		macaddr;
2314 	uint64_t		val;
2315 	int			num_ldcs;
2316 	int			i;
2317 	int			addrsz;
2318 	int			num_nodes = 0;
2319 	int			listsz = 0;
2320 	mde_cookie_t		*listp = NULL;
2321 	uint8_t			*addrp;
2322 	struct ether_addr	ea;
2323 
2324 	/* read "id" property to get the port number */
2325 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2326 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2327 		return (DDI_FAILURE);
2328 	}
2329 
2330 	/*
2331 	 * Find the channel endpoint node(s) under this port node.
2332 	 */
2333 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2334 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2335 		    num_nodes);
2336 		return (DDI_FAILURE);
2337 	}
2338 
2339 	/* allocate space for node list */
2340 	listsz = num_nodes * sizeof (mde_cookie_t);
2341 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2342 	if (listp == NULL)
2343 		return (DDI_FAILURE);
2344 
2345 	num_ldcs = md_scan_dag(mdp, mdex,
2346 	    md_find_name(mdp, channel_propname),
2347 	    md_find_name(mdp, "fwd"), listp);
2348 
2349 	if (num_ldcs <= 0) {
2350 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2351 		kmem_free(listp, listsz);
2352 		return (DDI_FAILURE);
2353 	}
2354 
2355 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2356 
2357 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2358 	if (ldc_ids == NULL) {
2359 		kmem_free(listp, listsz);
2360 		return (DDI_FAILURE);
2361 	}
2362 
2363 	for (i = 0; i < num_ldcs; i++) {
2364 		/* read channel ids */
2365 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2366 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2367 			    id_propname);
2368 			kmem_free(listp, listsz);
2369 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2370 			return (DDI_FAILURE);
2371 		}
2372 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2373 	}
2374 
2375 	kmem_free(listp, listsz);
2376 
2377 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2378 	    &addrsz)) {
2379 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2380 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2381 		return (DDI_FAILURE);
2382 	}
2383 
2384 	if (addrsz < ETHERADDRL) {
2385 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2386 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2387 		return (DDI_FAILURE);
2388 	}
2389 
2390 	macaddr = *((uint64_t *)addrp);
2391 
2392 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2393 
2394 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2395 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2396 		macaddr >>= 8;
2397 	}
2398 
2399 	if (vgenp->vsw_portp == NULL) {
2400 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2401 			if (val == 0) {
2402 				/* This port is connected to the vsw */
2403 				vgenp->vsw_portp = portp;
2404 			}
2405 		}
2406 	}
2407 
2408 	/* now update all properties into the port */
2409 	portp->vgenp = vgenp;
2410 	portp->port_num = port_num;
2411 	ether_copy(&ea, &portp->macaddr);
2412 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2413 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2414 	portp->num_ldcs = num_ldcs;
2415 
2416 	/* read vlan id properties of this port node */
2417 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2418 	    &portp->vids, &portp->nvids, NULL);
2419 
2420 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2421 
2422 	return (DDI_SUCCESS);
2423 }
2424 
2425 /* remove a port from the device */
2426 static int
2427 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2428 {
2429 	uint64_t	port_num;
2430 	vgen_port_t	*portp;
2431 	vgen_portlist_t	*plistp;
2432 
2433 	/* read "id" property to get the port number */
2434 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2435 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2436 		return (DDI_FAILURE);
2437 	}
2438 
2439 	plistp = &(vgenp->vgenports);
2440 
2441 	WRITE_ENTER(&plistp->rwlock);
2442 	portp = vgen_port_lookup(plistp, (int)port_num);
2443 	if (portp == NULL) {
2444 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2445 		RW_EXIT(&plistp->rwlock);
2446 		return (DDI_FAILURE);
2447 	}
2448 
2449 	vgen_port_detach_mdeg(portp);
2450 	RW_EXIT(&plistp->rwlock);
2451 
2452 	return (DDI_SUCCESS);
2453 }
2454 
2455 /* attach a port to the device based on mdeg data */
2456 static int
2457 vgen_port_attach(vgen_port_t *portp)
2458 {
2459 	int			i;
2460 	vgen_portlist_t		*plistp;
2461 	vgen_t			*vgenp;
2462 	uint64_t		*ldcids;
2463 	uint32_t		num_ldcs;
2464 
2465 	ASSERT(portp != NULL);
2466 
2467 	vgenp = portp->vgenp;
2468 	ldcids = portp->ldc_ids;
2469 	num_ldcs = portp->num_ldcs;
2470 
2471 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2472 
2473 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2474 	portp->ldclist.headp = NULL;
2475 
2476 	for (i = 0; i < num_ldcs; i++) {
2477 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2478 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2479 			vgen_port_detach(portp);
2480 			return (DDI_FAILURE);
2481 		}
2482 	}
2483 
2484 	/* create vlan id hash table */
2485 	vgen_vlan_create_hash(portp);
2486 
2487 	/* link it into the list of ports */
2488 	plistp = &(vgenp->vgenports);
2489 	WRITE_ENTER(&plistp->rwlock);
2490 	vgen_port_list_insert(portp);
2491 	RW_EXIT(&plistp->rwlock);
2492 
2493 	if (vgenp->flags & VGEN_STARTED) {	/* interface is configured */
2494 		vgen_port_init(portp);
2495 	}
2496 
2497 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2498 	return (DDI_SUCCESS);
2499 }
2500 
2501 /* detach a port from the device based on mdeg data */
2502 static void
2503 vgen_port_detach_mdeg(vgen_port_t *portp)
2504 {
2505 	vgen_t *vgenp = portp->vgenp;
2506 
2507 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2508 	/* stop the port if needed */
2509 	if (vgenp->flags & VGEN_STARTED) {
2510 		vgen_port_uninit(portp);
2511 	}
2512 	vgen_port_detach(portp);
2513 
2514 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2515 }
2516 
2517 static int
2518 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2519 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2520 {
2521 	uint64_t	cport_num;
2522 	uint64_t	pport_num;
2523 	vgen_portlist_t	*plistp;
2524 	vgen_port_t	*portp;
2525 	boolean_t	updated_vlans = B_FALSE;
2526 	uint16_t	pvid;
2527 	uint16_t	*vids;
2528 	uint16_t	nvids;
2529 
2530 	/*
2531 	 * For now, we get port updates only if vlan ids changed.
2532 	 * We read the port num and do some sanity check.
2533 	 */
2534 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2535 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2536 		return (DDI_FAILURE);
2537 	}
2538 
2539 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2540 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2541 		return (DDI_FAILURE);
2542 	}
2543 	if (cport_num != pport_num)
2544 		return (DDI_FAILURE);
2545 
2546 	plistp = &(vgenp->vgenports);
2547 
2548 	READ_ENTER(&plistp->rwlock);
2549 
2550 	portp = vgen_port_lookup(plistp, (int)cport_num);
2551 	if (portp == NULL) {
2552 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2553 		RW_EXIT(&plistp->rwlock);
2554 		return (DDI_FAILURE);
2555 	}
2556 
2557 	/* Read the vlan ids */
2558 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2559 	    &nvids, NULL);
2560 
2561 	/* Determine if there are any vlan id updates */
2562 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2563 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2564 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2565 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2566 		updated_vlans = B_TRUE;
2567 	}
2568 
2569 	if (updated_vlans == B_FALSE) {
2570 		RW_EXIT(&plistp->rwlock);
2571 		return (DDI_FAILURE);
2572 	}
2573 
2574 	/* remove the port from vlans it has been assigned to */
2575 	vgen_vlan_remove_ids(portp);
2576 
2577 	/* save the new vlan ids */
2578 	portp->pvid = pvid;
2579 	if (portp->nvids != 0) {
2580 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2581 		portp->nvids = 0;
2582 	}
2583 	if (nvids != 0) {
2584 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2585 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2586 		portp->nvids = nvids;
2587 		kmem_free(vids, sizeof (uint16_t) * nvids);
2588 	}
2589 
2590 	/* add port to the new vlans */
2591 	vgen_vlan_add_ids(portp);
2592 
2593 	/* reset the port if it is vlan unaware (ver < 1.3) */
2594 	vgen_vlan_unaware_port_reset(portp);
2595 
2596 	RW_EXIT(&plistp->rwlock);
2597 
2598 	return (DDI_SUCCESS);
2599 }
2600 
2601 static uint64_t
2602 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2603 {
2604 	vgen_ldclist_t	*ldclp;
2605 	vgen_ldc_t *ldcp;
2606 	uint64_t	val;
2607 
2608 	val = 0;
2609 	ldclp = &portp->ldclist;
2610 
2611 	READ_ENTER(&ldclp->rwlock);
2612 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2613 		val += vgen_ldc_stat(ldcp, stat);
2614 	}
2615 	RW_EXIT(&ldclp->rwlock);
2616 
2617 	return (val);
2618 }
2619 
2620 /* attach the channel corresponding to the given ldc_id to the port */
2621 static int
2622 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2623 {
2624 	vgen_t 		*vgenp;
2625 	vgen_ldclist_t	*ldclp;
2626 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2627 	ldc_attr_t 	attr;
2628 	int 		status;
2629 	ldc_status_t	istatus;
2630 	char		kname[MAXNAMELEN];
2631 	int		instance;
2632 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2633 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2634 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2635 		AST_create_rxmblks = 0x20,
2636 		AST_create_rcv_thread = 0x40} attach_state;
2637 
2638 	attach_state = AST_init;
2639 	vgenp = portp->vgenp;
2640 	ldclp = &portp->ldclist;
2641 
2642 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2643 	if (ldcp == NULL) {
2644 		goto ldc_attach_failed;
2645 	}
2646 	ldcp->ldc_id = ldc_id;
2647 	ldcp->portp = portp;
2648 
2649 	attach_state |= AST_ldc_alloc;
2650 
2651 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2652 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2653 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2654 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2655 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2656 
2657 	attach_state |= AST_mutex_init;
2658 
2659 	attr.devclass = LDC_DEV_NT;
2660 	attr.instance = ddi_get_instance(vgenp->vnetdip);
2661 	attr.mode = LDC_MODE_UNRELIABLE;
2662 	attr.mtu = vnet_ldc_mtu;
2663 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2664 	if (status != 0) {
2665 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2666 		goto ldc_attach_failed;
2667 	}
2668 	attach_state |= AST_ldc_init;
2669 
2670 	if (vgen_rcv_thread_enabled) {
2671 		ldcp->rcv_thr_flags = 0;
2672 
2673 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2674 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2675 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2676 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2677 
2678 		attach_state |= AST_create_rcv_thread;
2679 		if (ldcp->rcv_thread == NULL) {
2680 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2681 			goto ldc_attach_failed;
2682 		}
2683 	}
2684 
2685 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2686 	if (status != 0) {
2687 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2688 		    status);
2689 		goto ldc_attach_failed;
2690 	}
2691 	/*
2692 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2693 	 * data msgs, including raw data msgs used to recv priority frames.
2694 	 */
2695 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2696 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2697 	attach_state |= AST_ldc_reg_cb;
2698 
2699 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2700 	ASSERT(istatus == LDC_INIT);
2701 	ldcp->ldc_status = istatus;
2702 
2703 	/* allocate transmit resources */
2704 	status = vgen_alloc_tx_ring(ldcp);
2705 	if (status != 0) {
2706 		goto ldc_attach_failed;
2707 	}
2708 	attach_state |= AST_alloc_tx_ring;
2709 
2710 	/* allocate receive resources */
2711 	status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2712 	    vgen_rbufsz1, vgen_rbufsz2, vgen_rbufsz3,
2713 	    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2714 	if (status != 0) {
2715 		goto ldc_attach_failed;
2716 	}
2717 	attach_state |= AST_create_rxmblks;
2718 
2719 	/* Setup kstats for the channel */
2720 	instance = ddi_get_instance(vgenp->vnetdip);
2721 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2722 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2723 	if (ldcp->ksp == NULL) {
2724 		goto ldc_attach_failed;
2725 	}
2726 
2727 	/* initialize vgen_versions supported */
2728 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2729 	vgen_reset_vnet_proto_ops(ldcp);
2730 
2731 	/* link it into the list of channels for this port */
2732 	WRITE_ENTER(&ldclp->rwlock);
2733 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2734 	ldcp->nextp = *prev_ldcp;
2735 	*prev_ldcp = ldcp;
2736 	RW_EXIT(&ldclp->rwlock);
2737 
2738 	ldcp->flags |= CHANNEL_ATTACHED;
2739 	return (DDI_SUCCESS);
2740 
2741 ldc_attach_failed:
2742 	if (attach_state & AST_ldc_reg_cb) {
2743 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2744 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2745 	}
2746 	if (attach_state & AST_create_rcv_thread) {
2747 		if (ldcp->rcv_thread != NULL) {
2748 			vgen_stop_rcv_thread(ldcp);
2749 		}
2750 		mutex_destroy(&ldcp->rcv_thr_lock);
2751 		cv_destroy(&ldcp->rcv_thr_cv);
2752 	}
2753 	if (attach_state & AST_create_rxmblks) {
2754 		vio_mblk_pool_t *fvmp = NULL;
2755 
2756 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
2757 		ASSERT(fvmp == NULL);
2758 	}
2759 	if (attach_state & AST_alloc_tx_ring) {
2760 		vgen_free_tx_ring(ldcp);
2761 	}
2762 	if (attach_state & AST_ldc_init) {
2763 		(void) ldc_fini(ldcp->ldc_handle);
2764 	}
2765 	if (attach_state & AST_mutex_init) {
2766 		mutex_destroy(&ldcp->tclock);
2767 		mutex_destroy(&ldcp->txlock);
2768 		mutex_destroy(&ldcp->cblock);
2769 		mutex_destroy(&ldcp->wrlock);
2770 		mutex_destroy(&ldcp->rxlock);
2771 	}
2772 	if (attach_state & AST_ldc_alloc) {
2773 		KMEM_FREE(ldcp);
2774 	}
2775 	return (DDI_FAILURE);
2776 }
2777 
2778 /* detach a channel from the port */
2779 static void
2780 vgen_ldc_detach(vgen_ldc_t *ldcp)
2781 {
2782 	vgen_port_t	*portp;
2783 	vgen_t 		*vgenp;
2784 	vgen_ldc_t 	*pldcp;
2785 	vgen_ldc_t	**prev_ldcp;
2786 	vgen_ldclist_t	*ldclp;
2787 
2788 	portp = ldcp->portp;
2789 	vgenp = portp->vgenp;
2790 	ldclp = &portp->ldclist;
2791 
2792 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
2793 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
2794 		if (pldcp == ldcp) {
2795 			break;
2796 		}
2797 	}
2798 
2799 	if (pldcp == NULL) {
2800 		/* invalid ldcp? */
2801 		return;
2802 	}
2803 
2804 	if (ldcp->ldc_status != LDC_INIT) {
2805 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2806 	}
2807 
2808 	if (ldcp->flags & CHANNEL_ATTACHED) {
2809 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2810 
2811 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2812 		if (ldcp->rcv_thread != NULL) {
2813 			/* First stop the receive thread */
2814 			vgen_stop_rcv_thread(ldcp);
2815 			mutex_destroy(&ldcp->rcv_thr_lock);
2816 			cv_destroy(&ldcp->rcv_thr_cv);
2817 		}
2818 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2819 
2820 		vgen_destroy_kstats(ldcp->ksp);
2821 		ldcp->ksp = NULL;
2822 
2823 		/*
2824 		 * if we cannot reclaim all mblks, put this
2825 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
2826 		 * device gets detached (see vgen_uninit()).
2827 		 */
2828 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
2829 
2830 		/* free transmit resources */
2831 		vgen_free_tx_ring(ldcp);
2832 
2833 		(void) ldc_fini(ldcp->ldc_handle);
2834 		mutex_destroy(&ldcp->tclock);
2835 		mutex_destroy(&ldcp->txlock);
2836 		mutex_destroy(&ldcp->cblock);
2837 		mutex_destroy(&ldcp->wrlock);
2838 		mutex_destroy(&ldcp->rxlock);
2839 
2840 		/* unlink it from the list */
2841 		*prev_ldcp = ldcp->nextp;
2842 		KMEM_FREE(ldcp);
2843 	}
2844 }
2845 
2846 /*
2847  * This function allocates transmit resources for the channel.
2848  * The resources consist of a transmit descriptor ring and an associated
2849  * transmit buffer ring.
2850  */
2851 static int
2852 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
2853 {
2854 	void *tbufp;
2855 	ldc_mem_info_t minfo;
2856 	uint32_t txdsize;
2857 	uint32_t tbufsize;
2858 	int status;
2859 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2860 
2861 	ldcp->num_txds = vnet_ntxds;
2862 	txdsize = sizeof (vnet_public_desc_t);
2863 	tbufsize = sizeof (vgen_private_desc_t);
2864 
2865 	/* allocate transmit buffer ring */
2866 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
2867 	if (tbufp == NULL) {
2868 		return (DDI_FAILURE);
2869 	}
2870 
2871 	/* create transmit descriptor ring */
2872 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
2873 	    &ldcp->tx_dhandle);
2874 	if (status) {
2875 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
2876 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2877 		return (DDI_FAILURE);
2878 	}
2879 
2880 	/* get the addr of descripror ring */
2881 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
2882 	if (status) {
2883 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
2884 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
2885 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
2886 		ldcp->tbufp = NULL;
2887 		return (DDI_FAILURE);
2888 	}
2889 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
2890 	ldcp->tbufp = tbufp;
2891 
2892 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
2893 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
2894 
2895 	return (DDI_SUCCESS);
2896 }
2897 
2898 /* Free transmit resources for the channel */
2899 static void
2900 vgen_free_tx_ring(vgen_ldc_t *ldcp)
2901 {
2902 	int tbufsize = sizeof (vgen_private_desc_t);
2903 
2904 	/* free transmit descriptor ring */
2905 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
2906 
2907 	/* free transmit buffer ring */
2908 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
2909 	ldcp->txdp = ldcp->txdendp = NULL;
2910 	ldcp->tbufp = ldcp->tbufendp = NULL;
2911 }
2912 
2913 /* enable transmit/receive on the channels for the port */
2914 static void
2915 vgen_init_ldcs(vgen_port_t *portp)
2916 {
2917 	vgen_ldclist_t	*ldclp = &portp->ldclist;
2918 	vgen_ldc_t	*ldcp;
2919 
2920 	READ_ENTER(&ldclp->rwlock);
2921 	ldcp =  ldclp->headp;
2922 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
2923 		(void) vgen_ldc_init(ldcp);
2924 	}
2925 	RW_EXIT(&ldclp->rwlock);
2926 }
2927 
2928 /* stop transmit/receive on the channels for the port */
2929 static void
2930 vgen_uninit_ldcs(vgen_port_t *portp)
2931 {
2932 	vgen_ldclist_t	*ldclp = &portp->ldclist;
2933 	vgen_ldc_t	*ldcp;
2934 
2935 	READ_ENTER(&ldclp->rwlock);
2936 	ldcp =  ldclp->headp;
2937 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
2938 		vgen_ldc_uninit(ldcp);
2939 	}
2940 	RW_EXIT(&ldclp->rwlock);
2941 }
2942 
2943 /* enable transmit/receive on the channel */
2944 static int
2945 vgen_ldc_init(vgen_ldc_t *ldcp)
2946 {
2947 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2948 	ldc_status_t	istatus;
2949 	int		rv;
2950 	uint32_t	retries = 0;
2951 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
2952 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
2953 	init_state = ST_init;
2954 
2955 	DBG1(vgenp, ldcp, "enter\n");
2956 	LDC_LOCK(ldcp);
2957 
2958 	rv = ldc_open(ldcp->ldc_handle);
2959 	if (rv != 0) {
2960 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2961 		goto ldcinit_failed;
2962 	}
2963 	init_state |= ST_ldc_open;
2964 
2965 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2966 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
2967 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2968 		goto ldcinit_failed;
2969 	}
2970 	ldcp->ldc_status = istatus;
2971 
2972 	rv = vgen_init_tbufs(ldcp);
2973 	if (rv != 0) {
2974 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
2975 		goto ldcinit_failed;
2976 	}
2977 	init_state |= ST_init_tbufs;
2978 
2979 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2980 	if (rv != 0) {
2981 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2982 		goto ldcinit_failed;
2983 	}
2984 
2985 	init_state |= ST_cb_enable;
2986 
2987 	do {
2988 		rv = ldc_up(ldcp->ldc_handle);
2989 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
2990 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
2991 			drv_usecwait(VGEN_LDC_UP_DELAY);
2992 		}
2993 		if (retries++ >= vgen_ldcup_retries)
2994 			break;
2995 	} while (rv == EWOULDBLOCK);
2996 
2997 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2998 	if (istatus == LDC_UP) {
2999 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3000 	}
3001 
3002 	ldcp->ldc_status = istatus;
3003 
3004 	/* initialize transmit watchdog timeout */
3005 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3006 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3007 
3008 	ldcp->hphase = -1;
3009 	ldcp->flags |= CHANNEL_STARTED;
3010 
3011 	/* if channel is already UP - start handshake */
3012 	if (istatus == LDC_UP) {
3013 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3014 		if (ldcp->portp != vgenp->vsw_portp) {
3015 			/*
3016 			 * modify fdb entry to use this port as the channel is
3017 			 * up, instead of going through the vsw-port (see
3018 			 * comments in vgen_port_init())
3019 			 */
3020 			vgen_fdbe_modify(ldcp->portp, B_FALSE, B_FALSE);
3021 		}
3022 
3023 		/* Initialize local session id */
3024 		ldcp->local_sid = ddi_get_lbolt();
3025 
3026 		/* clear peer session id */
3027 		ldcp->peer_sid = 0;
3028 		ldcp->hretries = 0;
3029 
3030 		/* Initiate Handshake process with peer ldc endpoint */
3031 		vgen_reset_hphase(ldcp);
3032 
3033 		mutex_exit(&ldcp->tclock);
3034 		mutex_exit(&ldcp->txlock);
3035 		mutex_exit(&ldcp->wrlock);
3036 		mutex_exit(&ldcp->rxlock);
3037 		vgen_handshake(vh_nextphase(ldcp));
3038 		mutex_exit(&ldcp->cblock);
3039 	} else {
3040 		LDC_UNLOCK(ldcp);
3041 	}
3042 
3043 	return (DDI_SUCCESS);
3044 
3045 ldcinit_failed:
3046 	if (init_state & ST_cb_enable) {
3047 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3048 	}
3049 	if (init_state & ST_init_tbufs) {
3050 		vgen_uninit_tbufs(ldcp);
3051 	}
3052 	if (init_state & ST_ldc_open) {
3053 		(void) ldc_close(ldcp->ldc_handle);
3054 	}
3055 	LDC_UNLOCK(ldcp);
3056 	DBG1(vgenp, ldcp, "exit\n");
3057 	return (DDI_FAILURE);
3058 }
3059 
3060 /* stop transmit/receive on the channel */
3061 static void
3062 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3063 {
3064 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3065 	int	rv;
3066 
3067 	DBG1(vgenp, ldcp, "enter\n");
3068 	LDC_LOCK(ldcp);
3069 
3070 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3071 		LDC_UNLOCK(ldcp);
3072 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3073 		return;
3074 	}
3075 
3076 	/* disable further callbacks */
3077 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3078 	if (rv != 0) {
3079 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3080 	}
3081 
3082 	/*
3083 	 * clear handshake done bit and wait for pending tx and cb to finish.
3084 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3085 	 */
3086 	ldcp->hphase &= ~(VH_DONE);
3087 	LDC_UNLOCK(ldcp);
3088 
3089 	/* cancel handshake watchdog timeout */
3090 	if (ldcp->htid) {
3091 		(void) untimeout(ldcp->htid);
3092 		ldcp->htid = 0;
3093 	}
3094 
3095 	/* cancel transmit watchdog timeout */
3096 	if (ldcp->wd_tid) {
3097 		(void) untimeout(ldcp->wd_tid);
3098 		ldcp->wd_tid = 0;
3099 	}
3100 
3101 	drv_usecwait(1000);
3102 
3103 	/* acquire locks again; any pending transmits and callbacks are done */
3104 	LDC_LOCK(ldcp);
3105 
3106 	vgen_reset_hphase(ldcp);
3107 
3108 	vgen_uninit_tbufs(ldcp);
3109 
3110 	rv = ldc_close(ldcp->ldc_handle);
3111 	if (rv != 0) {
3112 		DWARN(vgenp, ldcp, "ldc_close err\n");
3113 	}
3114 	ldcp->ldc_status = LDC_INIT;
3115 	ldcp->flags &= ~(CHANNEL_STARTED);
3116 
3117 	LDC_UNLOCK(ldcp);
3118 
3119 	DBG1(vgenp, ldcp, "exit\n");
3120 }
3121 
3122 /* Initialize the transmit buffer ring for the channel */
3123 static int
3124 vgen_init_tbufs(vgen_ldc_t *ldcp)
3125 {
3126 	vgen_private_desc_t	*tbufp;
3127 	vnet_public_desc_t	*txdp;
3128 	vio_dring_entry_hdr_t		*hdrp;
3129 	int 			i;
3130 	int 			rv;
3131 	caddr_t			datap = NULL;
3132 	int			ci;
3133 	uint32_t		ncookies;
3134 	size_t			data_sz;
3135 	vgen_t			*vgenp;
3136 
3137 	vgenp = LDC_TO_VGEN(ldcp);
3138 
3139 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3140 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3141 
3142 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3143 	data_sz = VNET_ROUNDUP_2K(data_sz);
3144 	ldcp->tx_data_sz = data_sz * ldcp->num_txds;
3145 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3146 	ldcp->tx_datap = datap;
3147 
3148 	/*
3149 	 * for each private descriptor, allocate a ldc mem_handle which is
3150 	 * required to map the data during transmit, set the flags
3151 	 * to free (available for use by transmit routine).
3152 	 */
3153 
3154 	for (i = 0; i < ldcp->num_txds; i++) {
3155 
3156 		tbufp = &(ldcp->tbufp[i]);
3157 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3158 		    &(tbufp->memhandle));
3159 		if (rv) {
3160 			tbufp->memhandle = 0;
3161 			goto init_tbufs_failed;
3162 		}
3163 
3164 		/*
3165 		 * bind ldc memhandle to the corresponding transmit buffer.
3166 		 */
3167 		ci = ncookies = 0;
3168 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3169 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3170 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3171 		if (rv != 0) {
3172 			goto init_tbufs_failed;
3173 		}
3174 
3175 		/*
3176 		 * successful in binding the handle to tx data buffer.
3177 		 * set datap in the private descr to this buffer.
3178 		 */
3179 		tbufp->datap = datap;
3180 
3181 		if ((ncookies == 0) ||
3182 		    (ncookies > MAX_COOKIES)) {
3183 			goto init_tbufs_failed;
3184 		}
3185 
3186 		for (ci = 1; ci < ncookies; ci++) {
3187 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3188 			    &(tbufp->memcookie[ci]));
3189 			if (rv != 0) {
3190 				goto init_tbufs_failed;
3191 			}
3192 		}
3193 
3194 		tbufp->ncookies = ncookies;
3195 		datap += data_sz;
3196 
3197 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3198 		txdp = &(ldcp->txdp[i]);
3199 		hdrp = &txdp->hdr;
3200 		hdrp->dstate = VIO_DESC_FREE;
3201 		hdrp->ack = B_FALSE;
3202 		tbufp->descp = txdp;
3203 
3204 	}
3205 
3206 	/* reset tbuf walking pointers */
3207 	ldcp->next_tbufp = ldcp->tbufp;
3208 	ldcp->cur_tbufp = ldcp->tbufp;
3209 
3210 	/* initialize tx seqnum and index */
3211 	ldcp->next_txseq = VNET_ISS;
3212 	ldcp->next_txi = 0;
3213 
3214 	ldcp->resched_peer = B_TRUE;
3215 	ldcp->resched_peer_txi = 0;
3216 
3217 	return (DDI_SUCCESS);
3218 
3219 init_tbufs_failed:;
3220 	vgen_uninit_tbufs(ldcp);
3221 	return (DDI_FAILURE);
3222 }
3223 
3224 /* Uninitialize transmit buffer ring for the channel */
3225 static void
3226 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3227 {
3228 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3229 	int 			i;
3230 
3231 	/* for each tbuf (priv_desc), free ldc mem_handle */
3232 	for (i = 0; i < ldcp->num_txds; i++) {
3233 
3234 		tbufp = &(ldcp->tbufp[i]);
3235 
3236 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3237 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3238 			tbufp->datap = NULL;
3239 		}
3240 		if (tbufp->memhandle) {
3241 			(void) ldc_mem_free_handle(tbufp->memhandle);
3242 			tbufp->memhandle = 0;
3243 		}
3244 	}
3245 
3246 	if (ldcp->tx_datap) {
3247 		/* prealloc'd tx data buffer */
3248 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3249 		ldcp->tx_datap = NULL;
3250 		ldcp->tx_data_sz = 0;
3251 	}
3252 
3253 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3254 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3255 }
3256 
3257 /* clobber tx descriptor ring */
3258 static void
3259 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3260 {
3261 	vnet_public_desc_t	*txdp;
3262 	vgen_private_desc_t	*tbufp;
3263 	vio_dring_entry_hdr_t	*hdrp;
3264 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3265 	int i;
3266 #ifdef DEBUG
3267 	int ndone = 0;
3268 #endif
3269 
3270 	for (i = 0; i < ldcp->num_txds; i++) {
3271 
3272 		tbufp = &(ldcp->tbufp[i]);
3273 		txdp = tbufp->descp;
3274 		hdrp = &txdp->hdr;
3275 
3276 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3277 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3278 #ifdef DEBUG
3279 			if (hdrp->dstate == VIO_DESC_DONE)
3280 				ndone++;
3281 #endif
3282 			hdrp->dstate = VIO_DESC_FREE;
3283 			hdrp->ack = B_FALSE;
3284 		}
3285 	}
3286 	/* reset tbuf walking pointers */
3287 	ldcp->next_tbufp = ldcp->tbufp;
3288 	ldcp->cur_tbufp = ldcp->tbufp;
3289 
3290 	/* reset tx seqnum and index */
3291 	ldcp->next_txseq = VNET_ISS;
3292 	ldcp->next_txi = 0;
3293 
3294 	ldcp->resched_peer = B_TRUE;
3295 	ldcp->resched_peer_txi = 0;
3296 
3297 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3298 }
3299 
3300 /* clobber receive descriptor ring */
3301 static void
3302 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3303 {
3304 	ldcp->rx_dhandle = 0;
3305 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3306 	ldcp->rxdp = NULL;
3307 	ldcp->next_rxi = 0;
3308 	ldcp->num_rxds = 0;
3309 	ldcp->next_rxseq = VNET_ISS;
3310 }
3311 
3312 /* initialize receive descriptor ring */
3313 static int
3314 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3315 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3316 {
3317 	int rv;
3318 	ldc_mem_info_t minfo;
3319 
3320 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3321 	    desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
3322 	if (rv != 0) {
3323 		return (DDI_FAILURE);
3324 	}
3325 
3326 	/*
3327 	 * sucessfully mapped, now try to
3328 	 * get info about the mapped dring
3329 	 */
3330 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3331 	if (rv != 0) {
3332 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3333 		return (DDI_FAILURE);
3334 	}
3335 
3336 	/*
3337 	 * save ring address, number of descriptors.
3338 	 */
3339 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3340 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3341 	ldcp->num_rxdcookies = ncookies;
3342 	ldcp->num_rxds = num_desc;
3343 	ldcp->next_rxi = 0;
3344 	ldcp->next_rxseq = VNET_ISS;
3345 
3346 	return (DDI_SUCCESS);
3347 }
3348 
3349 /* get channel statistics */
3350 static uint64_t
3351 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3352 {
3353 	vgen_stats_t *statsp;
3354 	uint64_t val;
3355 
3356 	val = 0;
3357 	statsp = &ldcp->stats;
3358 	switch (stat) {
3359 
3360 	case MAC_STAT_MULTIRCV:
3361 		val = statsp->multircv;
3362 		break;
3363 
3364 	case MAC_STAT_BRDCSTRCV:
3365 		val = statsp->brdcstrcv;
3366 		break;
3367 
3368 	case MAC_STAT_MULTIXMT:
3369 		val = statsp->multixmt;
3370 		break;
3371 
3372 	case MAC_STAT_BRDCSTXMT:
3373 		val = statsp->brdcstxmt;
3374 		break;
3375 
3376 	case MAC_STAT_NORCVBUF:
3377 		val = statsp->norcvbuf;
3378 		break;
3379 
3380 	case MAC_STAT_IERRORS:
3381 		val = statsp->ierrors;
3382 		break;
3383 
3384 	case MAC_STAT_NOXMTBUF:
3385 		val = statsp->noxmtbuf;
3386 		break;
3387 
3388 	case MAC_STAT_OERRORS:
3389 		val = statsp->oerrors;
3390 		break;
3391 
3392 	case MAC_STAT_COLLISIONS:
3393 		break;
3394 
3395 	case MAC_STAT_RBYTES:
3396 		val = statsp->rbytes;
3397 		break;
3398 
3399 	case MAC_STAT_IPACKETS:
3400 		val = statsp->ipackets;
3401 		break;
3402 
3403 	case MAC_STAT_OBYTES:
3404 		val = statsp->obytes;
3405 		break;
3406 
3407 	case MAC_STAT_OPACKETS:
3408 		val = statsp->opackets;
3409 		break;
3410 
3411 	/* stats not relevant to ldc, return 0 */
3412 	case MAC_STAT_IFSPEED:
3413 	case ETHER_STAT_ALIGN_ERRORS:
3414 	case ETHER_STAT_FCS_ERRORS:
3415 	case ETHER_STAT_FIRST_COLLISIONS:
3416 	case ETHER_STAT_MULTI_COLLISIONS:
3417 	case ETHER_STAT_DEFER_XMTS:
3418 	case ETHER_STAT_TX_LATE_COLLISIONS:
3419 	case ETHER_STAT_EX_COLLISIONS:
3420 	case ETHER_STAT_MACXMT_ERRORS:
3421 	case ETHER_STAT_CARRIER_ERRORS:
3422 	case ETHER_STAT_TOOLONG_ERRORS:
3423 	case ETHER_STAT_XCVR_ADDR:
3424 	case ETHER_STAT_XCVR_ID:
3425 	case ETHER_STAT_XCVR_INUSE:
3426 	case ETHER_STAT_CAP_1000FDX:
3427 	case ETHER_STAT_CAP_1000HDX:
3428 	case ETHER_STAT_CAP_100FDX:
3429 	case ETHER_STAT_CAP_100HDX:
3430 	case ETHER_STAT_CAP_10FDX:
3431 	case ETHER_STAT_CAP_10HDX:
3432 	case ETHER_STAT_CAP_ASMPAUSE:
3433 	case ETHER_STAT_CAP_PAUSE:
3434 	case ETHER_STAT_CAP_AUTONEG:
3435 	case ETHER_STAT_ADV_CAP_1000FDX:
3436 	case ETHER_STAT_ADV_CAP_1000HDX:
3437 	case ETHER_STAT_ADV_CAP_100FDX:
3438 	case ETHER_STAT_ADV_CAP_100HDX:
3439 	case ETHER_STAT_ADV_CAP_10FDX:
3440 	case ETHER_STAT_ADV_CAP_10HDX:
3441 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3442 	case ETHER_STAT_ADV_CAP_PAUSE:
3443 	case ETHER_STAT_ADV_CAP_AUTONEG:
3444 	case ETHER_STAT_LP_CAP_1000FDX:
3445 	case ETHER_STAT_LP_CAP_1000HDX:
3446 	case ETHER_STAT_LP_CAP_100FDX:
3447 	case ETHER_STAT_LP_CAP_100HDX:
3448 	case ETHER_STAT_LP_CAP_10FDX:
3449 	case ETHER_STAT_LP_CAP_10HDX:
3450 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3451 	case ETHER_STAT_LP_CAP_PAUSE:
3452 	case ETHER_STAT_LP_CAP_AUTONEG:
3453 	case ETHER_STAT_LINK_ASMPAUSE:
3454 	case ETHER_STAT_LINK_PAUSE:
3455 	case ETHER_STAT_LINK_AUTONEG:
3456 	case ETHER_STAT_LINK_DUPLEX:
3457 	default:
3458 		val = 0;
3459 		break;
3460 
3461 	}
3462 	return (val);
3463 }
3464 
3465 /*
3466  * LDC channel is UP, start handshake process with peer. Flag tells
3467  * vnet_fdbe_modify() about the context: set to B_TRUE if this
3468  * function is being called from transmit routine, otherwise B_FALSE.
3469  */
3470 static void
3471 vgen_handle_evt_up(vgen_ldc_t *ldcp, boolean_t flag)
3472 {
3473 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3474 
3475 	DBG1(vgenp, ldcp, "enter\n");
3476 
3477 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3478 
3479 	if (ldcp->portp != vgenp->vsw_portp) {
3480 		/*
3481 		 * modify fdb entry to use this port as the channel is up,
3482 		 * instead of going through the vsw-port (see comments in
3483 		 * vgen_port_init())
3484 		 */
3485 		vgen_fdbe_modify(ldcp->portp, B_FALSE, flag);
3486 	}
3487 
3488 	/* Initialize local session id */
3489 	ldcp->local_sid = ddi_get_lbolt();
3490 
3491 	/* clear peer session id */
3492 	ldcp->peer_sid = 0;
3493 	ldcp->hretries = 0;
3494 
3495 	if (ldcp->hphase != VH_PHASE0) {
3496 		vgen_handshake_reset(ldcp);
3497 	}
3498 
3499 	/* Initiate Handshake process with peer ldc endpoint */
3500 	vgen_handshake(vh_nextphase(ldcp));
3501 
3502 	DBG1(vgenp, ldcp, "exit\n");
3503 }
3504 
3505 /*
3506  * LDC channel is Reset, terminate connection with peer and try to
3507  * bring the channel up again.
3508  * Flag tells vnet_fdbe_modify() about the context: set to B_TRUE if this
3509  * function is being called from transmit routine, otherwise B_FALSE.
3510  */
3511 static void
3512 vgen_handle_evt_reset(vgen_ldc_t *ldcp, boolean_t flag)
3513 {
3514 	ldc_status_t istatus;
3515 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3516 	int	rv;
3517 
3518 	DBG1(vgenp, ldcp, "enter\n");
3519 
3520 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3521 
3522 	if ((ldcp->portp != vgenp->vsw_portp) &&
3523 	    (vgenp->vsw_portp != NULL)) {
3524 		/*
3525 		 * modify fdb entry to use vsw-port  as the channel is reset
3526 		 * and we don't have a direct link to the destination (see
3527 		 * comments in vgen_port_init()).
3528 		 */
3529 		vgen_fdbe_modify(ldcp->portp, B_TRUE, flag);
3530 	}
3531 
3532 	if (ldcp->hphase != VH_PHASE0) {
3533 		vgen_handshake_reset(ldcp);
3534 	}
3535 
3536 	/* try to bring the channel up */
3537 	rv = ldc_up(ldcp->ldc_handle);
3538 	if (rv != 0) {
3539 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3540 	}
3541 
3542 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3543 		DWARN(vgenp, ldcp, "ldc_status err\n");
3544 	} else {
3545 		ldcp->ldc_status = istatus;
3546 	}
3547 
3548 	/* if channel is already UP - restart handshake */
3549 	if (ldcp->ldc_status == LDC_UP) {
3550 		vgen_handle_evt_up(ldcp, flag);
3551 	}
3552 
3553 	DBG1(vgenp, ldcp, "exit\n");
3554 }
3555 
3556 /* Interrupt handler for the channel */
3557 static uint_t
3558 vgen_ldc_cb(uint64_t event, caddr_t arg)
3559 {
3560 	_NOTE(ARGUNUSED(event))
3561 	vgen_ldc_t	*ldcp;
3562 	vgen_t		*vgenp;
3563 	ldc_status_t 	istatus;
3564 	vgen_stats_t	*statsp;
3565 
3566 	ldcp = (vgen_ldc_t *)arg;
3567 	vgenp = LDC_TO_VGEN(ldcp);
3568 	statsp = &ldcp->stats;
3569 
3570 	DBG1(vgenp, ldcp, "enter\n");
3571 
3572 	mutex_enter(&ldcp->cblock);
3573 	statsp->callbacks++;
3574 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3575 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3576 		    ldcp->ldc_status);
3577 		mutex_exit(&ldcp->cblock);
3578 		return (LDC_SUCCESS);
3579 	}
3580 
3581 	/*
3582 	 * NOTE: not using switch() as event could be triggered by
3583 	 * a state change and a read request. Also the ordering	of the
3584 	 * check for the event types is deliberate.
3585 	 */
3586 	if (event & LDC_EVT_UP) {
3587 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3588 			DWARN(vgenp, ldcp, "ldc_status err\n");
3589 			/* status couldn't be determined */
3590 			mutex_exit(&ldcp->cblock);
3591 			return (LDC_FAILURE);
3592 		}
3593 		ldcp->ldc_status = istatus;
3594 		if (ldcp->ldc_status != LDC_UP) {
3595 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3596 			    " but ldc status is not UP(0x%x)\n",
3597 			    ldcp->ldc_status);
3598 			/* spurious interrupt, return success */
3599 			mutex_exit(&ldcp->cblock);
3600 			return (LDC_SUCCESS);
3601 		}
3602 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3603 		    event, ldcp->ldc_status);
3604 
3605 		vgen_handle_evt_up(ldcp, B_FALSE);
3606 
3607 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3608 	}
3609 
3610 	/* Handle RESET/DOWN before READ event */
3611 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3612 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3613 			DWARN(vgenp, ldcp, "ldc_status error\n");
3614 			/* status couldn't be determined */
3615 			mutex_exit(&ldcp->cblock);
3616 			return (LDC_FAILURE);
3617 		}
3618 		ldcp->ldc_status = istatus;
3619 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3620 		    event, ldcp->ldc_status);
3621 
3622 		vgen_handle_evt_reset(ldcp, B_FALSE);
3623 
3624 		/*
3625 		 * As the channel is down/reset, ignore READ event
3626 		 * but print a debug warning message.
3627 		 */
3628 		if (event & LDC_EVT_READ) {
3629 			DWARN(vgenp, ldcp,
3630 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3631 			event &= ~LDC_EVT_READ;
3632 		}
3633 	}
3634 
3635 	if (event & LDC_EVT_READ) {
3636 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3637 		    event, ldcp->ldc_status);
3638 
3639 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3640 
3641 		if (ldcp->rcv_thread != NULL) {
3642 			/*
3643 			 * If the receive thread is enabled, then
3644 			 * wakeup the receive thread to process the
3645 			 * LDC messages.
3646 			 */
3647 			mutex_exit(&ldcp->cblock);
3648 			mutex_enter(&ldcp->rcv_thr_lock);
3649 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3650 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3651 				cv_signal(&ldcp->rcv_thr_cv);
3652 			}
3653 			mutex_exit(&ldcp->rcv_thr_lock);
3654 			mutex_enter(&ldcp->cblock);
3655 		} else  {
3656 			vgen_handle_evt_read(ldcp);
3657 		}
3658 	}
3659 	mutex_exit(&ldcp->cblock);
3660 
3661 	if (ldcp->cancel_htid) {
3662 		/*
3663 		 * Cancel handshake timer.
3664 		 * untimeout(9F) will not return until the pending callback is
3665 		 * cancelled or has run. No problems will result from calling
3666 		 * untimeout if the handler has already completed.
3667 		 * If the timeout handler did run, then it would just
3668 		 * return as cancel_htid is set.
3669 		 */
3670 		(void) untimeout(ldcp->cancel_htid);
3671 		ldcp->cancel_htid = 0;
3672 	}
3673 	DBG1(vgenp, ldcp, "exit\n");
3674 
3675 	return (LDC_SUCCESS);
3676 }
3677 
3678 static void
3679 vgen_handle_evt_read(vgen_ldc_t *ldcp)
3680 {
3681 	int		rv;
3682 	uint64_t	*ldcmsg;
3683 	size_t		msglen;
3684 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3685 	vio_msg_tag_t	*tagp;
3686 	ldc_status_t 	istatus;
3687 	boolean_t 	has_data;
3688 
3689 	DBG1(vgenp, ldcp, "enter\n");
3690 
3691 	ldcmsg = ldcp->ldcmsg;
3692 	/*
3693 	 * If the receive thread is enabled, then the cblock
3694 	 * need to be acquired here. If not, the vgen_ldc_cb()
3695 	 * calls this function with cblock held already.
3696 	 */
3697 	if (ldcp->rcv_thread != NULL) {
3698 		mutex_enter(&ldcp->cblock);
3699 	} else {
3700 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3701 	}
3702 
3703 vgen_evt_read:
3704 	do {
3705 		msglen = ldcp->msglen;
3706 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3707 
3708 		if (rv != 0) {
3709 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
3710 			    rv, msglen);
3711 			if (rv == ECONNRESET)
3712 				goto vgen_evtread_error;
3713 			break;
3714 		}
3715 		if (msglen == 0) {
3716 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3717 			break;
3718 		}
3719 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3720 
3721 		tagp = (vio_msg_tag_t *)ldcmsg;
3722 
3723 		if (ldcp->peer_sid) {
3724 			/*
3725 			 * check sid only after we have received peer's sid
3726 			 * in the version negotiate msg.
3727 			 */
3728 #ifdef DEBUG
3729 			if (vgen_hdbg & HDBG_BAD_SID) {
3730 				/* simulate bad sid condition */
3731 				tagp->vio_sid = 0;
3732 				vgen_hdbg &= ~(HDBG_BAD_SID);
3733 			}
3734 #endif
3735 			rv = vgen_check_sid(ldcp, tagp);
3736 			if (rv != VGEN_SUCCESS) {
3737 				/*
3738 				 * If sid mismatch is detected,
3739 				 * reset the channel.
3740 				 */
3741 				ldcp->need_ldc_reset = B_TRUE;
3742 				goto vgen_evtread_error;
3743 			}
3744 		}
3745 
3746 		switch (tagp->vio_msgtype) {
3747 		case VIO_TYPE_CTRL:
3748 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3749 			break;
3750 
3751 		case VIO_TYPE_DATA:
3752 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3753 			break;
3754 
3755 		case VIO_TYPE_ERR:
3756 			vgen_handle_errmsg(ldcp, tagp);
3757 			break;
3758 
3759 		default:
3760 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3761 			    tagp->vio_msgtype);
3762 			break;
3763 		}
3764 
3765 		/*
3766 		 * If an error is encountered, stop processing and
3767 		 * handle the error.
3768 		 */
3769 		if (rv != 0) {
3770 			goto vgen_evtread_error;
3771 		}
3772 
3773 	} while (msglen);
3774 
3775 	/* check once more before exiting */
3776 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3777 	if ((rv == 0) && (has_data == B_TRUE)) {
3778 		DTRACE_PROBE(vgen_chkq);
3779 		goto vgen_evt_read;
3780 	}
3781 
3782 vgen_evtread_error:
3783 	if (rv == ECONNRESET) {
3784 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3785 			DWARN(vgenp, ldcp, "ldc_status err\n");
3786 		} else {
3787 			ldcp->ldc_status = istatus;
3788 		}
3789 		vgen_handle_evt_reset(ldcp, B_FALSE);
3790 	} else if (rv) {
3791 		vgen_handshake_retry(ldcp);
3792 	}
3793 
3794 	/*
3795 	 * If the receive thread is not enabled, then cancel the
3796 	 * handshake timeout here.
3797 	 */
3798 	if (ldcp->rcv_thread != NULL) {
3799 		mutex_exit(&ldcp->cblock);
3800 		if (ldcp->cancel_htid) {
3801 			/*
3802 			 * Cancel handshake timer. untimeout(9F) will
3803 			 * not return until the pending callback is cancelled
3804 			 * or has run. No problems will result from calling
3805 			 * untimeout if the handler has already completed.
3806 			 * If the timeout handler did run, then it would just
3807 			 * return as cancel_htid is set.
3808 			 */
3809 			(void) untimeout(ldcp->cancel_htid);
3810 			ldcp->cancel_htid = 0;
3811 		}
3812 	}
3813 
3814 	DBG1(vgenp, ldcp, "exit\n");
3815 }
3816 
3817 /* vgen handshake functions */
3818 
3819 /* change the hphase for the channel to the next phase */
3820 static vgen_ldc_t *
3821 vh_nextphase(vgen_ldc_t *ldcp)
3822 {
3823 	if (ldcp->hphase == VH_PHASE3) {
3824 		ldcp->hphase = VH_DONE;
3825 	} else {
3826 		ldcp->hphase++;
3827 	}
3828 	return (ldcp);
3829 }
3830 
3831 /*
3832  * wrapper routine to send the given message over ldc using ldc_write().
3833  */
3834 static int
3835 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
3836     boolean_t caller_holds_lock)
3837 {
3838 	int			rv;
3839 	size_t			len;
3840 	uint32_t		retries = 0;
3841 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3842 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
3843 	vio_dring_msg_t		*dmsg;
3844 	vio_raw_data_msg_t	*rmsg;
3845 	boolean_t		data_msg = B_FALSE;
3846 
3847 	len = msglen;
3848 	if ((len == 0) || (msg == NULL))
3849 		return (VGEN_FAILURE);
3850 
3851 	if (!caller_holds_lock) {
3852 		mutex_enter(&ldcp->wrlock);
3853 	}
3854 
3855 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
3856 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
3857 			dmsg = (vio_dring_msg_t *)tagp;
3858 			dmsg->seq_num = ldcp->next_txseq;
3859 			data_msg = B_TRUE;
3860 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
3861 			rmsg = (vio_raw_data_msg_t *)tagp;
3862 			rmsg->seq_num = ldcp->next_txseq;
3863 			data_msg = B_TRUE;
3864 		}
3865 	}
3866 
3867 	do {
3868 		len = msglen;
3869 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
3870 		if (retries++ >= vgen_ldcwr_retries)
3871 			break;
3872 	} while (rv == EWOULDBLOCK);
3873 
3874 	if (rv == 0 && data_msg == B_TRUE) {
3875 		ldcp->next_txseq++;
3876 	}
3877 
3878 	if (!caller_holds_lock) {
3879 		mutex_exit(&ldcp->wrlock);
3880 	}
3881 
3882 	if (rv != 0) {
3883 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
3884 		    rv, msglen);
3885 		return (rv);
3886 	}
3887 
3888 	if (len != msglen) {
3889 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
3890 		    rv, msglen);
3891 		return (VGEN_FAILURE);
3892 	}
3893 
3894 	return (VGEN_SUCCESS);
3895 }
3896 
3897 /* send version negotiate message to the peer over ldc */
3898 static int
3899 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3900 {
3901 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3902 	vio_ver_msg_t	vermsg;
3903 	vio_msg_tag_t	*tagp = &vermsg.tag;
3904 	int		rv;
3905 
3906 	bzero(&vermsg, sizeof (vermsg));
3907 
3908 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3909 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3910 	tagp->vio_subtype_env = VIO_VER_INFO;
3911 	tagp->vio_sid = ldcp->local_sid;
3912 
3913 	/* get version msg payload from ldcp->local */
3914 	vermsg.ver_major = ldcp->local_hparams.ver_major;
3915 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3916 	vermsg.dev_class = ldcp->local_hparams.dev_class;
3917 
3918 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3919 	if (rv != VGEN_SUCCESS) {
3920 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3921 		return (rv);
3922 	}
3923 
3924 	ldcp->hstate |= VER_INFO_SENT;
3925 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3926 	    vermsg.ver_major, vermsg.ver_minor);
3927 
3928 	return (VGEN_SUCCESS);
3929 }
3930 
3931 /* send attr info message to the peer over ldc */
3932 static int
3933 vgen_send_attr_info(vgen_ldc_t *ldcp)
3934 {
3935 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3936 	vnet_attr_msg_t	attrmsg;
3937 	vio_msg_tag_t	*tagp = &attrmsg.tag;
3938 	int		rv;
3939 
3940 	bzero(&attrmsg, sizeof (attrmsg));
3941 
3942 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3943 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3944 	tagp->vio_subtype_env = VIO_ATTR_INFO;
3945 	tagp->vio_sid = ldcp->local_sid;
3946 
3947 	/* get attr msg payload from ldcp->local */
3948 	attrmsg.mtu = ldcp->local_hparams.mtu;
3949 	attrmsg.addr = ldcp->local_hparams.addr;
3950 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
3951 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3952 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3953 
3954 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3955 	if (rv != VGEN_SUCCESS) {
3956 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3957 		return (rv);
3958 	}
3959 
3960 	ldcp->hstate |= ATTR_INFO_SENT;
3961 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3962 
3963 	return (VGEN_SUCCESS);
3964 }
3965 
3966 /* send descriptor ring register message to the peer over ldc */
3967 static int
3968 vgen_send_dring_reg(vgen_ldc_t *ldcp)
3969 {
3970 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3971 	vio_dring_reg_msg_t	msg;
3972 	vio_msg_tag_t		*tagp = &msg.tag;
3973 	int		rv;
3974 
3975 	bzero(&msg, sizeof (msg));
3976 
3977 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3978 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3979 	tagp->vio_subtype_env = VIO_DRING_REG;
3980 	tagp->vio_sid = ldcp->local_sid;
3981 
3982 	/* get dring info msg payload from ldcp->local */
3983 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
3984 	    sizeof (ldc_mem_cookie_t));
3985 	msg.ncookies = ldcp->local_hparams.num_dcookies;
3986 	msg.num_descriptors = ldcp->local_hparams.num_desc;
3987 	msg.descriptor_size = ldcp->local_hparams.desc_size;
3988 
3989 	/*
3990 	 * dring_ident is set to 0. After mapping the dring, peer sets this
3991 	 * value and sends it in the ack, which is saved in
3992 	 * vgen_handle_dring_reg().
3993 	 */
3994 	msg.dring_ident = 0;
3995 
3996 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
3997 	if (rv != VGEN_SUCCESS) {
3998 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3999 		return (rv);
4000 	}
4001 
4002 	ldcp->hstate |= DRING_INFO_SENT;
4003 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4004 
4005 	return (VGEN_SUCCESS);
4006 }
4007 
4008 static int
4009 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4010 {
4011 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4012 	vio_rdx_msg_t	rdxmsg;
4013 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4014 	int		rv;
4015 
4016 	bzero(&rdxmsg, sizeof (rdxmsg));
4017 
4018 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4019 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4020 	tagp->vio_subtype_env = VIO_RDX;
4021 	tagp->vio_sid = ldcp->local_sid;
4022 
4023 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4024 	if (rv != VGEN_SUCCESS) {
4025 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4026 		return (rv);
4027 	}
4028 
4029 	ldcp->hstate |= RDX_INFO_SENT;
4030 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4031 
4032 	return (VGEN_SUCCESS);
4033 }
4034 
4035 /* send descriptor ring data message to the peer over ldc */
4036 static int
4037 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4038 {
4039 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4040 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4041 	vio_msg_tag_t	*tagp = &msgp->tag;
4042 	vgen_stats_t	*statsp = &ldcp->stats;
4043 	int		rv;
4044 
4045 	bzero(msgp, sizeof (*msgp));
4046 
4047 	tagp->vio_msgtype = VIO_TYPE_DATA;
4048 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4049 	tagp->vio_subtype_env = VIO_DRING_DATA;
4050 	tagp->vio_sid = ldcp->local_sid;
4051 
4052 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4053 	msgp->start_idx = start;
4054 	msgp->end_idx = end;
4055 
4056 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4057 	if (rv != VGEN_SUCCESS) {
4058 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4059 		return (rv);
4060 	}
4061 
4062 	statsp->dring_data_msgs++;
4063 
4064 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4065 
4066 	return (VGEN_SUCCESS);
4067 }
4068 
4069 /* send multicast addr info message to vsw */
4070 static int
4071 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4072 {
4073 	vnet_mcast_msg_t	mcastmsg;
4074 	vnet_mcast_msg_t	*msgp;
4075 	vio_msg_tag_t		*tagp;
4076 	vgen_t			*vgenp;
4077 	struct ether_addr	*mca;
4078 	int			rv;
4079 	int			i;
4080 	uint32_t		size;
4081 	uint32_t		mccount;
4082 	uint32_t		n;
4083 
4084 	msgp = &mcastmsg;
4085 	tagp = &msgp->tag;
4086 	vgenp = LDC_TO_VGEN(ldcp);
4087 
4088 	mccount = vgenp->mccount;
4089 	i = 0;
4090 
4091 	do {
4092 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4093 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4094 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4095 		tagp->vio_sid = ldcp->local_sid;
4096 
4097 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4098 		size = n * sizeof (struct ether_addr);
4099 
4100 		mca = &(vgenp->mctab[i]);
4101 		bcopy(mca, (msgp->mca), size);
4102 		msgp->set = B_TRUE;
4103 		msgp->count = n;
4104 
4105 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4106 		    B_FALSE);
4107 		if (rv != VGEN_SUCCESS) {
4108 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4109 			return (rv);
4110 		}
4111 
4112 		mccount -= n;
4113 		i += n;
4114 
4115 	} while (mccount);
4116 
4117 	return (VGEN_SUCCESS);
4118 }
4119 
4120 /* Initiate Phase 2 of handshake */
4121 static int
4122 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4123 {
4124 	int rv;
4125 	uint32_t ncookies = 0;
4126 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4127 
4128 #ifdef DEBUG
4129 	if (vgen_hdbg & HDBG_OUT_STATE) {
4130 		/* simulate out of state condition */
4131 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4132 		rv = vgen_send_rdx_info(ldcp);
4133 		return (rv);
4134 	}
4135 	if (vgen_hdbg & HDBG_TIMEOUT) {
4136 		/* simulate timeout condition */
4137 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4138 		return (VGEN_SUCCESS);
4139 	}
4140 #endif
4141 	rv = vgen_send_attr_info(ldcp);
4142 	if (rv != VGEN_SUCCESS) {
4143 		return (rv);
4144 	}
4145 
4146 	/* Bind descriptor ring to the channel */
4147 	if (ldcp->num_txdcookies == 0) {
4148 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4149 		    LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
4150 		if (rv != 0) {
4151 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4152 			    "rv(%x)\n", rv);
4153 			return (rv);
4154 		}
4155 		ASSERT(ncookies == 1);
4156 		ldcp->num_txdcookies = ncookies;
4157 	}
4158 
4159 	/* update local dring_info params */
4160 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4161 	    sizeof (ldc_mem_cookie_t));
4162 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4163 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4164 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4165 
4166 	rv = vgen_send_dring_reg(ldcp);
4167 	if (rv != VGEN_SUCCESS) {
4168 		return (rv);
4169 	}
4170 
4171 	return (VGEN_SUCCESS);
4172 }
4173 
4174 /*
4175  * Set vnet-protocol-version dependent functions based on version.
4176  */
4177 static void
4178 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4179 {
4180 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4181 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4182 
4183 	if (VGEN_VER_GTEQ(ldcp, 1, 3)) {
4184 		/*
4185 		 * If the version negotiated with peer is >= 1.3,
4186 		 * set the mtu in our attributes to max_frame_size.
4187 		 */
4188 		lp->mtu = vgenp->max_frame_size;
4189 	} else {
4190 		vgen_port_t	*portp = ldcp->portp;
4191 		vnet_t		*vnetp = vgenp->vnetp;
4192 		/*
4193 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4194 		 * We can negotiate that size with those peers provided the
4195 		 * following conditions are true:
4196 		 * - Our max_frame_size is greater only by VLAN_TAGSZ (4).
4197 		 * - Only pvid is defined for our peer and there are no vids.
4198 		 * - pvids are equal.
4199 		 * If the above conditions are true, then we can send/recv only
4200 		 * untagged frames of max size ETHERMAX.
4201 		 */
4202 		if ((vgenp->max_frame_size == ETHERMAX + VLAN_TAGSZ) &&
4203 		    portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4204 			lp->mtu = ETHERMAX;
4205 		}
4206 	}
4207 
4208 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4209 		/* Versions >= 1.2 */
4210 
4211 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4212 			/*
4213 			 * enable priority routines and pkt mode only if
4214 			 * at least one pri-eth-type is specified in MD.
4215 			 */
4216 
4217 			ldcp->tx = vgen_ldcsend;
4218 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4219 
4220 			/* set xfer mode for vgen_send_attr_info() */
4221 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4222 
4223 		} else {
4224 			/* no priority eth types defined in MD */
4225 
4226 			ldcp->tx = vgen_ldcsend_dring;
4227 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4228 
4229 			/* set xfer mode for vgen_send_attr_info() */
4230 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4231 
4232 		}
4233 	} else {
4234 		/* Versions prior to 1.2  */
4235 
4236 		vgen_reset_vnet_proto_ops(ldcp);
4237 	}
4238 }
4239 
4240 /*
4241  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4242  */
4243 static void
4244 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4245 {
4246 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4247 
4248 	ldcp->tx = vgen_ldcsend_dring;
4249 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4250 
4251 	/* set xfer mode for vgen_send_attr_info() */
4252 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4253 }
4254 
4255 static void
4256 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4257 {
4258 	vgen_ldclist_t	*ldclp;
4259 	vgen_ldc_t	*ldcp;
4260 	vgen_t		*vgenp = portp->vgenp;
4261 	vnet_t		*vnetp = vgenp->vnetp;
4262 
4263 	ldclp = &portp->ldclist;
4264 
4265 	READ_ENTER(&ldclp->rwlock);
4266 
4267 	/*
4268 	 * NOTE: for now, we will assume we have a single channel.
4269 	 */
4270 	if (ldclp->headp == NULL) {
4271 		RW_EXIT(&ldclp->rwlock);
4272 		return;
4273 	}
4274 	ldcp = ldclp->headp;
4275 
4276 	mutex_enter(&ldcp->cblock);
4277 
4278 	/*
4279 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4280 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4281 	 */
4282 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4283 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4284 		ldcp->need_ldc_reset = B_TRUE;
4285 		vgen_handshake_retry(ldcp);
4286 	}
4287 
4288 	mutex_exit(&ldcp->cblock);
4289 
4290 	RW_EXIT(&ldclp->rwlock);
4291 }
4292 
4293 static void
4294 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4295 {
4296 	vgen_port_t	*portp;
4297 	vgen_portlist_t	*plistp;
4298 
4299 	plistp = &(vgenp->vgenports);
4300 	READ_ENTER(&plistp->rwlock);
4301 
4302 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4303 
4304 		vgen_vlan_unaware_port_reset(portp);
4305 
4306 	}
4307 
4308 	RW_EXIT(&plistp->rwlock);
4309 }
4310 
4311 /*
4312  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4313  * This can happen after a channel comes up (status: LDC_UP) or
4314  * when handshake gets terminated due to various conditions.
4315  */
4316 static void
4317 vgen_reset_hphase(vgen_ldc_t *ldcp)
4318 {
4319 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4320 	ldc_status_t istatus;
4321 	int rv;
4322 
4323 	DBG1(vgenp, ldcp, "enter\n");
4324 	/* reset hstate and hphase */
4325 	ldcp->hstate = 0;
4326 	ldcp->hphase = VH_PHASE0;
4327 
4328 	vgen_reset_vnet_proto_ops(ldcp);
4329 
4330 	/*
4331 	 * Save the id of pending handshake timer in cancel_htid.
4332 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4333 	 * be cancelled after releasing cblock.
4334 	 */
4335 	if (ldcp->htid) {
4336 		ldcp->cancel_htid = ldcp->htid;
4337 		ldcp->htid = 0;
4338 	}
4339 
4340 	if (ldcp->local_hparams.dring_ready) {
4341 		ldcp->local_hparams.dring_ready = B_FALSE;
4342 	}
4343 
4344 	/* Unbind tx descriptor ring from the channel */
4345 	if (ldcp->num_txdcookies) {
4346 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4347 		if (rv != 0) {
4348 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4349 		}
4350 		ldcp->num_txdcookies = 0;
4351 	}
4352 
4353 	if (ldcp->peer_hparams.dring_ready) {
4354 		ldcp->peer_hparams.dring_ready = B_FALSE;
4355 		/* Unmap peer's dring */
4356 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4357 		vgen_clobber_rxds(ldcp);
4358 	}
4359 
4360 	vgen_clobber_tbufs(ldcp);
4361 
4362 	/*
4363 	 * clear local handshake params and initialize.
4364 	 */
4365 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4366 
4367 	/* set version to the highest version supported */
4368 	ldcp->local_hparams.ver_major =
4369 	    ldcp->vgen_versions[0].ver_major;
4370 	ldcp->local_hparams.ver_minor =
4371 	    ldcp->vgen_versions[0].ver_minor;
4372 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4373 
4374 	/* set attr_info params */
4375 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4376 	ldcp->local_hparams.addr =
4377 	    vnet_macaddr_strtoul(vgenp->macaddr);
4378 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4379 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4380 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4381 
4382 	/*
4383 	 * Note: dring is created, but not bound yet.
4384 	 * local dring_info params will be updated when we bind the dring in
4385 	 * vgen_handshake_phase2().
4386 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4387 	 * value and sends it in the ack, which is saved in
4388 	 * vgen_handle_dring_reg().
4389 	 */
4390 	ldcp->local_hparams.dring_ident = 0;
4391 
4392 	/* clear peer_hparams */
4393 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4394 
4395 	/* reset the channel if required */
4396 	if (ldcp->need_ldc_reset) {
4397 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4398 		ldcp->need_ldc_reset = B_FALSE;
4399 		(void) ldc_down(ldcp->ldc_handle);
4400 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4401 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4402 		ldcp->ldc_status = istatus;
4403 
4404 		/* clear sids */
4405 		ldcp->local_sid = 0;
4406 		ldcp->peer_sid = 0;
4407 
4408 		/* try to bring the channel up */
4409 		rv = ldc_up(ldcp->ldc_handle);
4410 		if (rv != 0) {
4411 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4412 		}
4413 
4414 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4415 			DWARN(vgenp, ldcp, "ldc_status err\n");
4416 		} else {
4417 			ldcp->ldc_status = istatus;
4418 		}
4419 	}
4420 }
4421 
4422 /* wrapper function for vgen_reset_hphase */
4423 static void
4424 vgen_handshake_reset(vgen_ldc_t *ldcp)
4425 {
4426 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4427 	mutex_enter(&ldcp->rxlock);
4428 	mutex_enter(&ldcp->wrlock);
4429 	mutex_enter(&ldcp->txlock);
4430 	mutex_enter(&ldcp->tclock);
4431 
4432 	vgen_reset_hphase(ldcp);
4433 
4434 	mutex_exit(&ldcp->tclock);
4435 	mutex_exit(&ldcp->txlock);
4436 	mutex_exit(&ldcp->wrlock);
4437 	mutex_exit(&ldcp->rxlock);
4438 }
4439 
4440 /*
4441  * Initiate handshake with the peer by sending various messages
4442  * based on the handshake-phase that the channel is currently in.
4443  */
4444 static void
4445 vgen_handshake(vgen_ldc_t *ldcp)
4446 {
4447 	uint32_t hphase = ldcp->hphase;
4448 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4449 	ldc_status_t	istatus;
4450 	int	rv = 0;
4451 
4452 	switch (hphase) {
4453 
4454 	case VH_PHASE1:
4455 
4456 		/*
4457 		 * start timer, for entire handshake process, turn this timer
4458 		 * off if all phases of handshake complete successfully and
4459 		 * hphase goes to VH_DONE(below) or
4460 		 * vgen_reset_hphase() gets called or
4461 		 * channel is reset due to errors or
4462 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4463 		 */
4464 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4465 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4466 
4467 		/* Phase 1 involves negotiating the version */
4468 		rv = vgen_send_version_negotiate(ldcp);
4469 		break;
4470 
4471 	case VH_PHASE2:
4472 		rv = vgen_handshake_phase2(ldcp);
4473 		break;
4474 
4475 	case VH_PHASE3:
4476 		rv = vgen_send_rdx_info(ldcp);
4477 		break;
4478 
4479 	case VH_DONE:
4480 		/*
4481 		 * Save the id of pending handshake timer in cancel_htid.
4482 		 * This will be checked in vgen_ldc_cb() and the handshake
4483 		 * timer will be cancelled after releasing cblock.
4484 		 */
4485 		if (ldcp->htid) {
4486 			ldcp->cancel_htid = ldcp->htid;
4487 			ldcp->htid = 0;
4488 		}
4489 		ldcp->hretries = 0;
4490 		DBG1(vgenp, ldcp, "Handshake Done\n");
4491 
4492 		if (ldcp->portp == vgenp->vsw_portp) {
4493 			/*
4494 			 * If this channel(port) is connected to vsw,
4495 			 * need to sync multicast table with vsw.
4496 			 */
4497 			mutex_exit(&ldcp->cblock);
4498 
4499 			mutex_enter(&vgenp->lock);
4500 			rv = vgen_send_mcast_info(ldcp);
4501 			mutex_exit(&vgenp->lock);
4502 
4503 			mutex_enter(&ldcp->cblock);
4504 			if (rv != VGEN_SUCCESS)
4505 				break;
4506 		}
4507 
4508 		/*
4509 		 * Check if mac layer should be notified to restart
4510 		 * transmissions. This can happen if the channel got
4511 		 * reset and vgen_clobber_tbufs() is called, while
4512 		 * need_resched is set.
4513 		 */
4514 		mutex_enter(&ldcp->tclock);
4515 		if (ldcp->need_resched) {
4516 			ldcp->need_resched = B_FALSE;
4517 			vnet_tx_update(vgenp->vnetp);
4518 		}
4519 		mutex_exit(&ldcp->tclock);
4520 
4521 		break;
4522 
4523 	default:
4524 		break;
4525 	}
4526 
4527 	if (rv == ECONNRESET) {
4528 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4529 			DWARN(vgenp, ldcp, "ldc_status err\n");
4530 		} else {
4531 			ldcp->ldc_status = istatus;
4532 		}
4533 		vgen_handle_evt_reset(ldcp, B_FALSE);
4534 	} else if (rv) {
4535 		vgen_handshake_reset(ldcp);
4536 	}
4537 }
4538 
4539 /*
4540  * Check if the current handshake phase has completed successfully and
4541  * return the status.
4542  */
4543 static int
4544 vgen_handshake_done(vgen_ldc_t *ldcp)
4545 {
4546 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4547 	uint32_t	hphase = ldcp->hphase;
4548 	int 		status = 0;
4549 
4550 	switch (hphase) {
4551 
4552 	case VH_PHASE1:
4553 		/*
4554 		 * Phase1 is done, if version negotiation
4555 		 * completed successfully.
4556 		 */
4557 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4558 		    VER_NEGOTIATED);
4559 		break;
4560 
4561 	case VH_PHASE2:
4562 		/*
4563 		 * Phase 2 is done, if attr info and dring info
4564 		 * have been exchanged successfully.
4565 		 */
4566 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4567 		    ATTR_INFO_EXCHANGED) &&
4568 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4569 		    DRING_INFO_EXCHANGED));
4570 		break;
4571 
4572 	case VH_PHASE3:
4573 		/* Phase 3 is done, if rdx msg has been exchanged */
4574 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4575 		    RDX_EXCHANGED);
4576 		break;
4577 
4578 	default:
4579 		break;
4580 	}
4581 
4582 	if (status == 0) {
4583 		return (VGEN_FAILURE);
4584 	}
4585 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4586 	return (VGEN_SUCCESS);
4587 }
4588 
4589 /* retry handshake on failure */
4590 static void
4591 vgen_handshake_retry(vgen_ldc_t *ldcp)
4592 {
4593 	/* reset handshake phase */
4594 	vgen_handshake_reset(ldcp);
4595 
4596 	/* handshake retry is specified and the channel is UP */
4597 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
4598 		if (ldcp->hretries++ < vgen_max_hretries) {
4599 			ldcp->local_sid = ddi_get_lbolt();
4600 			vgen_handshake(vh_nextphase(ldcp));
4601 		}
4602 	}
4603 }
4604 
4605 /*
4606  * Handle a version info msg from the peer or an ACK/NACK from the peer
4607  * to a version info msg that we sent.
4608  */
4609 static int
4610 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4611 {
4612 	vgen_t		*vgenp;
4613 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4614 	int		ack = 0;
4615 	int		failed = 0;
4616 	int		idx;
4617 	vgen_ver_t	*versions = ldcp->vgen_versions;
4618 	int		rv = 0;
4619 
4620 	vgenp = LDC_TO_VGEN(ldcp);
4621 	DBG1(vgenp, ldcp, "enter\n");
4622 	switch (tagp->vio_subtype) {
4623 	case VIO_SUBTYPE_INFO:
4624 
4625 		/*  Cache sid of peer if this is the first time */
4626 		if (ldcp->peer_sid == 0) {
4627 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4628 			    tagp->vio_sid);
4629 			ldcp->peer_sid = tagp->vio_sid;
4630 		}
4631 
4632 		if (ldcp->hphase != VH_PHASE1) {
4633 			/*
4634 			 * If we are not already in VH_PHASE1, reset to
4635 			 * pre-handshake state, and initiate handshake
4636 			 * to the peer too.
4637 			 */
4638 			vgen_handshake_reset(ldcp);
4639 			vgen_handshake(vh_nextphase(ldcp));
4640 		}
4641 		ldcp->hstate |= VER_INFO_RCVD;
4642 
4643 		/* save peer's requested values */
4644 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4645 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4646 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4647 
4648 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4649 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4650 			/* unsupported dev_class, send NACK */
4651 
4652 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4653 
4654 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4655 			tagp->vio_sid = ldcp->local_sid;
4656 			/* send reply msg back to peer */
4657 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4658 			    sizeof (*vermsg), B_FALSE);
4659 			if (rv != VGEN_SUCCESS) {
4660 				return (rv);
4661 			}
4662 			return (VGEN_FAILURE);
4663 		}
4664 
4665 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4666 		    vermsg->ver_major,  vermsg->ver_minor);
4667 
4668 		idx = 0;
4669 
4670 		for (;;) {
4671 
4672 			if (vermsg->ver_major > versions[idx].ver_major) {
4673 
4674 				/* nack with next lower version */
4675 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4676 				vermsg->ver_major = versions[idx].ver_major;
4677 				vermsg->ver_minor = versions[idx].ver_minor;
4678 				break;
4679 			}
4680 
4681 			if (vermsg->ver_major == versions[idx].ver_major) {
4682 
4683 				/* major version match - ACK version */
4684 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4685 				ack = 1;
4686 
4687 				/*
4688 				 * lower minor version to the one this endpt
4689 				 * supports, if necessary
4690 				 */
4691 				if (vermsg->ver_minor >
4692 				    versions[idx].ver_minor) {
4693 					vermsg->ver_minor =
4694 					    versions[idx].ver_minor;
4695 					ldcp->peer_hparams.ver_minor =
4696 					    versions[idx].ver_minor;
4697 				}
4698 				break;
4699 			}
4700 
4701 			idx++;
4702 
4703 			if (idx == VGEN_NUM_VER) {
4704 
4705 				/* no version match - send NACK */
4706 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4707 				vermsg->ver_major = 0;
4708 				vermsg->ver_minor = 0;
4709 				failed = 1;
4710 				break;
4711 			}
4712 
4713 		}
4714 
4715 		tagp->vio_sid = ldcp->local_sid;
4716 
4717 		/* send reply msg back to peer */
4718 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4719 		    B_FALSE);
4720 		if (rv != VGEN_SUCCESS) {
4721 			return (rv);
4722 		}
4723 
4724 		if (ack) {
4725 			ldcp->hstate |= VER_ACK_SENT;
4726 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4727 			    vermsg->ver_major, vermsg->ver_minor);
4728 		}
4729 		if (failed) {
4730 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4731 			return (VGEN_FAILURE);
4732 		}
4733 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4734 
4735 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4736 
4737 			/* local and peer versions match? */
4738 			ASSERT((ldcp->local_hparams.ver_major ==
4739 			    ldcp->peer_hparams.ver_major) &&
4740 			    (ldcp->local_hparams.ver_minor ==
4741 			    ldcp->peer_hparams.ver_minor));
4742 
4743 			vgen_set_vnet_proto_ops(ldcp);
4744 
4745 			/* move to the next phase */
4746 			vgen_handshake(vh_nextphase(ldcp));
4747 		}
4748 
4749 		break;
4750 
4751 	case VIO_SUBTYPE_ACK:
4752 
4753 		if (ldcp->hphase != VH_PHASE1) {
4754 			/*  This should not happen. */
4755 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4756 			return (VGEN_FAILURE);
4757 		}
4758 
4759 		/* SUCCESS - we have agreed on a version */
4760 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4761 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4762 		ldcp->hstate |= VER_ACK_RCVD;
4763 
4764 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4765 		    vermsg->ver_major,  vermsg->ver_minor);
4766 
4767 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4768 
4769 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4770 
4771 			/* local and peer versions match? */
4772 			ASSERT((ldcp->local_hparams.ver_major ==
4773 			    ldcp->peer_hparams.ver_major) &&
4774 			    (ldcp->local_hparams.ver_minor ==
4775 			    ldcp->peer_hparams.ver_minor));
4776 
4777 			vgen_set_vnet_proto_ops(ldcp);
4778 
4779 			/* move to the next phase */
4780 			vgen_handshake(vh_nextphase(ldcp));
4781 		}
4782 		break;
4783 
4784 	case VIO_SUBTYPE_NACK:
4785 
4786 		if (ldcp->hphase != VH_PHASE1) {
4787 			/*  This should not happen.  */
4788 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4789 			"Phase(%u)\n", ldcp->hphase);
4790 			return (VGEN_FAILURE);
4791 		}
4792 
4793 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4794 		    vermsg->ver_major, vermsg->ver_minor);
4795 
4796 		/* check if version in NACK is zero */
4797 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4798 			/*
4799 			 * Version Negotiation has failed.
4800 			 */
4801 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4802 			return (VGEN_FAILURE);
4803 		}
4804 
4805 		idx = 0;
4806 
4807 		for (;;) {
4808 
4809 			if (vermsg->ver_major > versions[idx].ver_major) {
4810 				/* select next lower version */
4811 
4812 				ldcp->local_hparams.ver_major =
4813 				    versions[idx].ver_major;
4814 				ldcp->local_hparams.ver_minor =
4815 				    versions[idx].ver_minor;
4816 				break;
4817 			}
4818 
4819 			if (vermsg->ver_major == versions[idx].ver_major) {
4820 				/* major version match */
4821 
4822 				ldcp->local_hparams.ver_major =
4823 				    versions[idx].ver_major;
4824 
4825 				ldcp->local_hparams.ver_minor =
4826 				    versions[idx].ver_minor;
4827 				break;
4828 			}
4829 
4830 			idx++;
4831 
4832 			if (idx == VGEN_NUM_VER) {
4833 				/*
4834 				 * no version match.
4835 				 * Version Negotiation has failed.
4836 				 */
4837 				DWARN(vgenp, ldcp,
4838 				    "Version Negotiation Failed\n");
4839 				return (VGEN_FAILURE);
4840 			}
4841 
4842 		}
4843 
4844 		rv = vgen_send_version_negotiate(ldcp);
4845 		if (rv != VGEN_SUCCESS) {
4846 			return (rv);
4847 		}
4848 
4849 		break;
4850 	}
4851 
4852 	DBG1(vgenp, ldcp, "exit\n");
4853 	return (VGEN_SUCCESS);
4854 }
4855 
4856 /* Check if the attributes are supported */
4857 static int
4858 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4859 {
4860 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4861 
4862 	if ((msg->mtu != lp->mtu) ||
4863 	    (msg->addr_type != ADDR_TYPE_MAC) ||
4864 	    (msg->ack_freq > 64) ||
4865 	    (msg->xfer_mode != lp->xfer_mode)) {
4866 		return (VGEN_FAILURE);
4867 	}
4868 
4869 	return (VGEN_SUCCESS);
4870 }
4871 
4872 /*
4873  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4874  * to an attr info msg that we sent.
4875  */
4876 static int
4877 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4878 {
4879 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4880 	vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
4881 	int		ack = 0;
4882 	int		rv = 0;
4883 
4884 	DBG1(vgenp, ldcp, "enter\n");
4885 	if (ldcp->hphase != VH_PHASE2) {
4886 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4887 		" Invalid Phase(%u)\n",
4888 		    tagp->vio_subtype, ldcp->hphase);
4889 		return (VGEN_FAILURE);
4890 	}
4891 	switch (tagp->vio_subtype) {
4892 	case VIO_SUBTYPE_INFO:
4893 
4894 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
4895 		ldcp->hstate |= ATTR_INFO_RCVD;
4896 
4897 		/* save peer's values */
4898 		ldcp->peer_hparams.mtu = attrmsg->mtu;
4899 		ldcp->peer_hparams.addr = attrmsg->addr;
4900 		ldcp->peer_hparams.addr_type = attrmsg->addr_type;
4901 		ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
4902 		ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
4903 
4904 		if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
4905 			/* unsupported attr, send NACK */
4906 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4907 		} else {
4908 			ack = 1;
4909 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4910 		}
4911 		tagp->vio_sid = ldcp->local_sid;
4912 
4913 		/* send reply msg back to peer */
4914 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
4915 		    B_FALSE);
4916 		if (rv != VGEN_SUCCESS) {
4917 			return (rv);
4918 		}
4919 
4920 		if (ack) {
4921 			ldcp->hstate |= ATTR_ACK_SENT;
4922 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4923 		} else {
4924 			/* failed */
4925 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
4926 			return (VGEN_FAILURE);
4927 		}
4928 
4929 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4930 			vgen_handshake(vh_nextphase(ldcp));
4931 		}
4932 
4933 		break;
4934 
4935 	case VIO_SUBTYPE_ACK:
4936 
4937 		ldcp->hstate |= ATTR_ACK_RCVD;
4938 
4939 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4940 
4941 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4942 			vgen_handshake(vh_nextphase(ldcp));
4943 		}
4944 		break;
4945 
4946 	case VIO_SUBTYPE_NACK:
4947 
4948 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4949 		return (VGEN_FAILURE);
4950 	}
4951 	DBG1(vgenp, ldcp, "exit\n");
4952 	return (VGEN_SUCCESS);
4953 }
4954 
4955 /* Check if the dring info msg is ok */
4956 static int
4957 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
4958 {
4959 	/* check if msg contents are ok */
4960 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
4961 	    sizeof (vnet_public_desc_t))) {
4962 		return (VGEN_FAILURE);
4963 	}
4964 	return (VGEN_SUCCESS);
4965 }
4966 
4967 /*
4968  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4969  * the peer to a dring register msg that we sent.
4970  */
4971 static int
4972 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4973 {
4974 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
4975 	ldc_mem_cookie_t dcookie;
4976 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4977 	int ack = 0;
4978 	int rv = 0;
4979 
4980 	DBG1(vgenp, ldcp, "enter\n");
4981 	if (ldcp->hphase < VH_PHASE2) {
4982 		/* dring_info can be rcvd in any of the phases after Phase1 */
4983 		DWARN(vgenp, ldcp,
4984 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4985 		    tagp->vio_subtype, ldcp->hphase);
4986 		return (VGEN_FAILURE);
4987 	}
4988 	switch (tagp->vio_subtype) {
4989 	case VIO_SUBTYPE_INFO:
4990 
4991 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
4992 		ldcp->hstate |= DRING_INFO_RCVD;
4993 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
4994 
4995 		ASSERT(msg->ncookies == 1);
4996 
4997 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
4998 			/*
4999 			 * verified dring info msg to be ok,
5000 			 * now try to map the remote dring.
5001 			 */
5002 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5003 			    msg->descriptor_size, &dcookie,
5004 			    msg->ncookies);
5005 			if (rv == DDI_SUCCESS) {
5006 				/* now we can ack the peer */
5007 				ack = 1;
5008 			}
5009 		}
5010 		if (ack == 0) {
5011 			/* failed, send NACK */
5012 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5013 		} else {
5014 			if (!(ldcp->peer_hparams.dring_ready)) {
5015 
5016 				/* save peer's dring_info values */
5017 				bcopy(&dcookie,
5018 				    &(ldcp->peer_hparams.dring_cookie),
5019 				    sizeof (dcookie));
5020 				ldcp->peer_hparams.num_desc =
5021 				    msg->num_descriptors;
5022 				ldcp->peer_hparams.desc_size =
5023 				    msg->descriptor_size;
5024 				ldcp->peer_hparams.num_dcookies =
5025 				    msg->ncookies;
5026 
5027 				/* set dring_ident for the peer */
5028 				ldcp->peer_hparams.dring_ident =
5029 				    (uint64_t)ldcp->rxdp;
5030 				/* return the dring_ident in ack msg */
5031 				msg->dring_ident =
5032 				    (uint64_t)ldcp->rxdp;
5033 
5034 				ldcp->peer_hparams.dring_ready = B_TRUE;
5035 			}
5036 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5037 		}
5038 		tagp->vio_sid = ldcp->local_sid;
5039 		/* send reply msg back to peer */
5040 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5041 		    B_FALSE);
5042 		if (rv != VGEN_SUCCESS) {
5043 			return (rv);
5044 		}
5045 
5046 		if (ack) {
5047 			ldcp->hstate |= DRING_ACK_SENT;
5048 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5049 		} else {
5050 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5051 			return (VGEN_FAILURE);
5052 		}
5053 
5054 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5055 			vgen_handshake(vh_nextphase(ldcp));
5056 		}
5057 
5058 		break;
5059 
5060 	case VIO_SUBTYPE_ACK:
5061 
5062 		ldcp->hstate |= DRING_ACK_RCVD;
5063 
5064 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5065 
5066 		if (!(ldcp->local_hparams.dring_ready)) {
5067 			/* local dring is now ready */
5068 			ldcp->local_hparams.dring_ready = B_TRUE;
5069 
5070 			/* save dring_ident acked by peer */
5071 			ldcp->local_hparams.dring_ident =
5072 			    msg->dring_ident;
5073 		}
5074 
5075 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5076 			vgen_handshake(vh_nextphase(ldcp));
5077 		}
5078 
5079 		break;
5080 
5081 	case VIO_SUBTYPE_NACK:
5082 
5083 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5084 		return (VGEN_FAILURE);
5085 	}
5086 	DBG1(vgenp, ldcp, "exit\n");
5087 	return (VGEN_SUCCESS);
5088 }
5089 
5090 /*
5091  * Handle a rdx info msg from the peer or an ACK/NACK
5092  * from the peer to a rdx info msg that we sent.
5093  */
5094 static int
5095 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5096 {
5097 	int rv = 0;
5098 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5099 
5100 	DBG1(vgenp, ldcp, "enter\n");
5101 	if (ldcp->hphase != VH_PHASE3) {
5102 		DWARN(vgenp, ldcp,
5103 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5104 		    tagp->vio_subtype, ldcp->hphase);
5105 		return (VGEN_FAILURE);
5106 	}
5107 	switch (tagp->vio_subtype) {
5108 	case VIO_SUBTYPE_INFO:
5109 
5110 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5111 		ldcp->hstate |= RDX_INFO_RCVD;
5112 
5113 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5114 		tagp->vio_sid = ldcp->local_sid;
5115 		/* send reply msg back to peer */
5116 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5117 		    B_FALSE);
5118 		if (rv != VGEN_SUCCESS) {
5119 			return (rv);
5120 		}
5121 
5122 		ldcp->hstate |= RDX_ACK_SENT;
5123 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5124 
5125 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5126 			vgen_handshake(vh_nextphase(ldcp));
5127 		}
5128 
5129 		break;
5130 
5131 	case VIO_SUBTYPE_ACK:
5132 
5133 		ldcp->hstate |= RDX_ACK_RCVD;
5134 
5135 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5136 
5137 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5138 			vgen_handshake(vh_nextphase(ldcp));
5139 		}
5140 		break;
5141 
5142 	case VIO_SUBTYPE_NACK:
5143 
5144 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5145 		return (VGEN_FAILURE);
5146 	}
5147 	DBG1(vgenp, ldcp, "exit\n");
5148 	return (VGEN_SUCCESS);
5149 }
5150 
5151 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5152 static int
5153 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5154 {
5155 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5156 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5157 	struct ether_addr *addrp;
5158 	int count;
5159 	int i;
5160 
5161 	DBG1(vgenp, ldcp, "enter\n");
5162 	switch (tagp->vio_subtype) {
5163 
5164 	case VIO_SUBTYPE_INFO:
5165 
5166 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5167 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5168 		break;
5169 
5170 	case VIO_SUBTYPE_ACK:
5171 
5172 		/* success adding/removing multicast addr */
5173 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5174 		break;
5175 
5176 	case VIO_SUBTYPE_NACK:
5177 
5178 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5179 		if (!(msgp->set)) {
5180 			/* multicast remove request failed */
5181 			break;
5182 		}
5183 
5184 		/* multicast add request failed */
5185 		for (count = 0; count < msgp->count; count++) {
5186 			addrp = &(msgp->mca[count]);
5187 
5188 			/* delete address from the table */
5189 			for (i = 0; i < vgenp->mccount; i++) {
5190 				if (ether_cmp(addrp,
5191 				    &(vgenp->mctab[i])) == 0) {
5192 					if (vgenp->mccount > 1) {
5193 						int t = vgenp->mccount - 1;
5194 						vgenp->mctab[i] =
5195 						    vgenp->mctab[t];
5196 					}
5197 					vgenp->mccount--;
5198 					break;
5199 				}
5200 			}
5201 		}
5202 		break;
5203 
5204 	}
5205 	DBG1(vgenp, ldcp, "exit\n");
5206 
5207 	return (VGEN_SUCCESS);
5208 }
5209 
5210 /* handler for control messages received from the peer ldc end-point */
5211 static int
5212 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5213 {
5214 	int rv = 0;
5215 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5216 
5217 	DBG1(vgenp, ldcp, "enter\n");
5218 	switch (tagp->vio_subtype_env) {
5219 
5220 	case VIO_VER_INFO:
5221 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5222 		break;
5223 
5224 	case VIO_ATTR_INFO:
5225 		rv = vgen_handle_attr_info(ldcp, tagp);
5226 		break;
5227 
5228 	case VIO_DRING_REG:
5229 		rv = vgen_handle_dring_reg(ldcp, tagp);
5230 		break;
5231 
5232 	case VIO_RDX:
5233 		rv = vgen_handle_rdx_info(ldcp, tagp);
5234 		break;
5235 
5236 	case VNET_MCAST_INFO:
5237 		rv = vgen_handle_mcast_info(ldcp, tagp);
5238 		break;
5239 
5240 	}
5241 
5242 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5243 	return (rv);
5244 }
5245 
5246 /* handler for data messages received from the peer ldc end-point */
5247 static int
5248 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5249 {
5250 	int rv = 0;
5251 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5252 
5253 	DBG1(vgenp, ldcp, "enter\n");
5254 
5255 	if (ldcp->hphase != VH_DONE)
5256 		return (rv);
5257 
5258 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5259 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5260 		if (rv != 0) {
5261 			return (rv);
5262 		}
5263 	}
5264 
5265 	switch (tagp->vio_subtype_env) {
5266 	case VIO_DRING_DATA:
5267 		rv = vgen_handle_dring_data(ldcp, tagp);
5268 		break;
5269 
5270 	case VIO_PKT_DATA:
5271 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5272 		break;
5273 	default:
5274 		break;
5275 	}
5276 
5277 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5278 	return (rv);
5279 }
5280 
5281 /*
5282  * dummy pkt data handler function for vnet protocol version 1.0
5283  */
5284 static void
5285 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5286 {
5287 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5288 }
5289 
5290 /*
5291  * This function handles raw pkt data messages received over the channel.
5292  * Currently, only priority-eth-type frames are received through this mechanism.
5293  * In this case, the frame(data) is present within the message itself which
5294  * is copied into an mblk before sending it up the stack.
5295  */
5296 static void
5297 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5298 {
5299 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5300 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5301 	uint32_t		size;
5302 	mblk_t			*mp;
5303 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5304 	vgen_stats_t		*statsp = &ldcp->stats;
5305 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5306 
5307 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5308 
5309 	mutex_exit(&ldcp->cblock);
5310 
5311 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5312 	if (size < ETHERMIN || size > lp->mtu) {
5313 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5314 		goto exit;
5315 	}
5316 
5317 	mp = vio_multipool_allocb(&ldcp->vmp, size);
5318 	if (mp == NULL) {
5319 		mp = allocb(size, BPRI_MED);
5320 		if (mp == NULL) {
5321 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5322 			DWARN(vgenp, ldcp, "allocb failure, "
5323 			    "unable to process priority frame\n");
5324 			goto exit;
5325 		}
5326 	}
5327 
5328 	/* copy the frame from the payload of raw data msg into the mblk */
5329 	bcopy(pkt->data, mp->b_rptr, size);
5330 	mp->b_wptr = mp->b_rptr + size;
5331 
5332 	/* update stats */
5333 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5334 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5335 
5336 	/* send up; call vnet_rx() as cblock is already released */
5337 	vnet_rx(vgenp->vnetp, NULL, mp);
5338 
5339 exit:
5340 	mutex_enter(&ldcp->cblock);
5341 }
5342 
5343 static int
5344 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
5345     int32_t end, uint8_t pstate)
5346 {
5347 	int rv = 0;
5348 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5349 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
5350 
5351 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
5352 	tagp->vio_sid = ldcp->local_sid;
5353 	msgp->start_idx = start;
5354 	msgp->end_idx = end;
5355 	msgp->dring_process_state = pstate;
5356 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
5357 	if (rv != VGEN_SUCCESS) {
5358 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
5359 	}
5360 	return (rv);
5361 }
5362 
5363 static int
5364 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5365 {
5366 	int rv = 0;
5367 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5368 
5369 
5370 	DBG1(vgenp, ldcp, "enter\n");
5371 	switch (tagp->vio_subtype) {
5372 
5373 	case VIO_SUBTYPE_INFO:
5374 		/*
5375 		 * To reduce the locking contention, release the
5376 		 * cblock here and re-acquire it once we are done
5377 		 * receiving packets.
5378 		 */
5379 		mutex_exit(&ldcp->cblock);
5380 		mutex_enter(&ldcp->rxlock);
5381 		rv = vgen_handle_dring_data_info(ldcp, tagp);
5382 		mutex_exit(&ldcp->rxlock);
5383 		mutex_enter(&ldcp->cblock);
5384 		break;
5385 
5386 	case VIO_SUBTYPE_ACK:
5387 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
5388 		break;
5389 
5390 	case VIO_SUBTYPE_NACK:
5391 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
5392 		break;
5393 	}
5394 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5395 	return (rv);
5396 }
5397 
5398 static int
5399 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5400 {
5401 	uint32_t start;
5402 	int32_t end;
5403 	int rv = 0;
5404 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5405 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5406 #ifdef VGEN_HANDLE_LOST_PKTS
5407 	vgen_stats_t *statsp = &ldcp->stats;
5408 	uint32_t rxi;
5409 	int n;
5410 #endif
5411 
5412 	DBG1(vgenp, ldcp, "enter\n");
5413 
5414 	start = dringmsg->start_idx;
5415 	end = dringmsg->end_idx;
5416 	/*
5417 	 * received a data msg, which contains the start and end
5418 	 * indices of the descriptors within the rx ring holding data,
5419 	 * the seq_num of data packet corresponding to the start index,
5420 	 * and the dring_ident.
5421 	 * We can now read the contents of each of these descriptors
5422 	 * and gather data from it.
5423 	 */
5424 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
5425 	    start, end);
5426 
5427 	/* validate rx start and end indeces */
5428 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
5429 	    !(CHECK_RXI(end, ldcp)))) {
5430 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
5431 		    start, end);
5432 		/* drop the message if invalid index */
5433 		return (rv);
5434 	}
5435 
5436 	/* validate dring_ident */
5437 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
5438 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5439 		    dringmsg->dring_ident);
5440 		/* invalid dring_ident, drop the msg */
5441 		return (rv);
5442 	}
5443 #ifdef DEBUG
5444 	if (vgen_trigger_rxlost) {
5445 		/* drop this msg to simulate lost pkts for debugging */
5446 		vgen_trigger_rxlost = 0;
5447 		return (rv);
5448 	}
5449 #endif
5450 
5451 #ifdef	VGEN_HANDLE_LOST_PKTS
5452 
5453 	/* receive start index doesn't match expected index */
5454 	if (ldcp->next_rxi != start) {
5455 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
5456 		    ldcp->next_rxi, start);
5457 
5458 		/* calculate the number of pkts lost */
5459 		if (start >= ldcp->next_rxi) {
5460 			n = start - ldcp->next_rxi;
5461 		} else  {
5462 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
5463 		}
5464 
5465 		statsp->rx_lost_pkts += n;
5466 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
5467 		tagp->vio_sid = ldcp->local_sid;
5468 		/* indicate the range of lost descriptors */
5469 		dringmsg->start_idx = ldcp->next_rxi;
5470 		rxi = start;
5471 		DECR_RXI(rxi, ldcp);
5472 		dringmsg->end_idx = rxi;
5473 		/* dring ident is left unchanged */
5474 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5475 		    sizeof (*dringmsg), B_FALSE);
5476 		if (rv != VGEN_SUCCESS) {
5477 			DWARN(vgenp, ldcp,
5478 			    "vgen_sendmsg failed, stype:NACK\n");
5479 			return (rv);
5480 		}
5481 		/*
5482 		 * treat this range of descrs/pkts as dropped
5483 		 * and set the new expected value of next_rxi
5484 		 * and continue(below) to process from the new
5485 		 * start index.
5486 		 */
5487 		ldcp->next_rxi = start;
5488 	}
5489 
5490 #endif	/* VGEN_HANDLE_LOST_PKTS */
5491 
5492 	/* Now receive messages */
5493 	rv = vgen_process_dring_data(ldcp, tagp);
5494 
5495 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5496 	return (rv);
5497 }
5498 
5499 static int
5500 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5501 {
5502 	boolean_t set_ack_start = B_FALSE;
5503 	uint32_t start;
5504 	uint32_t ack_end;
5505 	uint32_t next_rxi;
5506 	uint32_t rxi;
5507 	int count = 0;
5508 	int rv = 0;
5509 	uint32_t retries = 0;
5510 	vgen_stats_t *statsp;
5511 	vnet_public_desc_t *rxdp;
5512 	vio_dring_entry_hdr_t *hdrp;
5513 	mblk_t *bp = NULL;
5514 	mblk_t *bpt = NULL;
5515 	uint32_t ack_start;
5516 	uint32_t datalen;
5517 	uint32_t ncookies;
5518 	boolean_t rxd_err = B_FALSE;
5519 	mblk_t *mp = NULL;
5520 	size_t nbytes;
5521 	boolean_t ack_needed = B_FALSE;
5522 	size_t nread;
5523 	uint64_t off = 0;
5524 	struct ether_header *ehp;
5525 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5526 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5527 
5528 	DBG1(vgenp, ldcp, "enter\n");
5529 
5530 	statsp = &ldcp->stats;
5531 	start = dringmsg->start_idx;
5532 
5533 	/*
5534 	 * start processing the descriptors from the specified
5535 	 * start index, up to the index a descriptor is not ready
5536 	 * to be processed or we process the entire descriptor ring
5537 	 * and wrap around upto the start index.
5538 	 */
5539 
5540 	/* need to set the start index of descriptors to be ack'd */
5541 	set_ack_start = B_TRUE;
5542 
5543 	/* index upto which we have ack'd */
5544 	ack_end = start;
5545 	DECR_RXI(ack_end, ldcp);
5546 
5547 	next_rxi = rxi =  start;
5548 	do {
5549 vgen_recv_retry:
5550 		rv = ldc_mem_dring_acquire(ldcp->rx_dhandle, rxi, rxi);
5551 		if (rv != 0) {
5552 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
5553 			    " rv(%d)\n", rv);
5554 			statsp->ierrors++;
5555 			return (rv);
5556 		}
5557 
5558 		rxdp = &(ldcp->rxdp[rxi]);
5559 		hdrp = &rxdp->hdr;
5560 
5561 		if (hdrp->dstate != VIO_DESC_READY) {
5562 			/*
5563 			 * Before waiting and retry here, send up
5564 			 * the packets that are received already
5565 			 */
5566 			if (bp != NULL) {
5567 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5568 				vgen_rx(ldcp, bp);
5569 				count = 0;
5570 				bp = bpt = NULL;
5571 			}
5572 			/*
5573 			 * descriptor is not ready.
5574 			 * retry descriptor acquire, stop processing
5575 			 * after max # retries.
5576 			 */
5577 			if (retries == vgen_recv_retries)
5578 				break;
5579 			retries++;
5580 			drv_usecwait(vgen_recv_delay);
5581 			goto vgen_recv_retry;
5582 		}
5583 		retries = 0;
5584 
5585 		if (set_ack_start) {
5586 			/*
5587 			 * initialize the start index of the range
5588 			 * of descriptors to be ack'd.
5589 			 */
5590 			ack_start = rxi;
5591 			set_ack_start = B_FALSE;
5592 		}
5593 
5594 		datalen = rxdp->nbytes;
5595 		ncookies = rxdp->ncookies;
5596 		if ((datalen < ETHERMIN) ||
5597 		    (ncookies == 0) ||
5598 		    (ncookies > MAX_COOKIES)) {
5599 			rxd_err = B_TRUE;
5600 		} else {
5601 			/*
5602 			 * Try to allocate an mblk from the free pool
5603 			 * of recv mblks for the channel.
5604 			 * If this fails, use allocb().
5605 			 */
5606 			nbytes = (VNET_IPALIGN + datalen + 7) & ~7;
5607 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
5608 			if (!mp) {
5609 				/*
5610 				 * The data buffer returned by
5611 				 * allocb(9F) is 8byte aligned. We
5612 				 * allocate extra 8 bytes to ensure
5613 				 * size is multiple of 8 bytes for
5614 				 * ldc_mem_copy().
5615 				 */
5616 				statsp->rx_vio_allocb_fail++;
5617 				mp = allocb(VNET_IPALIGN + datalen + 8,
5618 				    BPRI_MED);
5619 			}
5620 		}
5621 		if ((rxd_err) || (mp == NULL)) {
5622 			/*
5623 			 * rxd_err or allocb() failure,
5624 			 * drop this packet, get next.
5625 			 */
5626 			if (rxd_err) {
5627 				statsp->ierrors++;
5628 				rxd_err = B_FALSE;
5629 			} else {
5630 				statsp->rx_allocb_fail++;
5631 			}
5632 
5633 			ack_needed = hdrp->ack;
5634 
5635 			/* set descriptor done bit */
5636 			hdrp->dstate = VIO_DESC_DONE;
5637 
5638 			rv = ldc_mem_dring_release(ldcp->rx_dhandle,
5639 			    rxi, rxi);
5640 			if (rv != 0) {
5641 				DWARN(vgenp, ldcp,
5642 				    "ldc_mem_dring_release err rv(%d)\n", rv);
5643 				return (rv);
5644 			}
5645 
5646 			if (ack_needed) {
5647 				ack_needed = B_FALSE;
5648 				/*
5649 				 * sender needs ack for this packet,
5650 				 * ack pkts upto this index.
5651 				 */
5652 				ack_end = rxi;
5653 
5654 				rv = vgen_send_dring_ack(ldcp, tagp,
5655 				    ack_start, ack_end,
5656 				    VIO_DP_ACTIVE);
5657 				if (rv != VGEN_SUCCESS) {
5658 					goto error_ret;
5659 				}
5660 
5661 				/* need to set new ack start index */
5662 				set_ack_start = B_TRUE;
5663 			}
5664 			goto vgen_next_rxi;
5665 		}
5666 
5667 		nread = nbytes;
5668 		rv = ldc_mem_copy(ldcp->ldc_handle,
5669 		    (caddr_t)mp->b_rptr, off, &nread,
5670 		    rxdp->memcookie, ncookies, LDC_COPY_IN);
5671 
5672 		/* if ldc_mem_copy() failed */
5673 		if (rv) {
5674 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
5675 			statsp->ierrors++;
5676 			freemsg(mp);
5677 			goto error_ret;
5678 		}
5679 
5680 		ack_needed = hdrp->ack;
5681 		hdrp->dstate = VIO_DESC_DONE;
5682 
5683 		rv = ldc_mem_dring_release(ldcp->rx_dhandle, rxi, rxi);
5684 		if (rv != 0) {
5685 			DWARN(vgenp, ldcp,
5686 			    "ldc_mem_dring_release err rv(%d)\n", rv);
5687 			goto error_ret;
5688 		}
5689 
5690 		mp->b_rptr += VNET_IPALIGN;
5691 
5692 		if (ack_needed) {
5693 			ack_needed = B_FALSE;
5694 			/*
5695 			 * sender needs ack for this packet,
5696 			 * ack pkts upto this index.
5697 			 */
5698 			ack_end = rxi;
5699 
5700 			rv = vgen_send_dring_ack(ldcp, tagp,
5701 			    ack_start, ack_end, VIO_DP_ACTIVE);
5702 			if (rv != VGEN_SUCCESS) {
5703 				goto error_ret;
5704 			}
5705 
5706 			/* need to set new ack start index */
5707 			set_ack_start = B_TRUE;
5708 		}
5709 
5710 		if (nread != nbytes) {
5711 			DWARN(vgenp, ldcp,
5712 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
5713 			    nread, nbytes);
5714 			statsp->ierrors++;
5715 			freemsg(mp);
5716 			goto vgen_next_rxi;
5717 		}
5718 
5719 		/* point to the actual end of data */
5720 		mp->b_wptr = mp->b_rptr + datalen;
5721 
5722 		/* update stats */
5723 		statsp->ipackets++;
5724 		statsp->rbytes += datalen;
5725 		ehp = (struct ether_header *)mp->b_rptr;
5726 		if (IS_BROADCAST(ehp))
5727 			statsp->brdcstrcv++;
5728 		else if (IS_MULTICAST(ehp))
5729 			statsp->multircv++;
5730 
5731 		/* build a chain of received packets */
5732 		if (bp == NULL) {
5733 			/* first pkt */
5734 			bp = mp;
5735 			bpt = bp;
5736 			bpt->b_next = NULL;
5737 		} else {
5738 			mp->b_next = NULL;
5739 			bpt->b_next = mp;
5740 			bpt = mp;
5741 		}
5742 
5743 		if (count++ > vgen_chain_len) {
5744 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5745 			vgen_rx(ldcp, bp);
5746 			count = 0;
5747 			bp = bpt = NULL;
5748 		}
5749 
5750 vgen_next_rxi:
5751 		/* update end index of range of descrs to be ack'd */
5752 		ack_end = rxi;
5753 
5754 		/* update the next index to be processed */
5755 		INCR_RXI(next_rxi, ldcp);
5756 		if (next_rxi == start) {
5757 			/*
5758 			 * processed the entire descriptor ring upto
5759 			 * the index at which we started.
5760 			 */
5761 			break;
5762 		}
5763 
5764 		rxi = next_rxi;
5765 
5766 	_NOTE(CONSTCOND)
5767 	} while (1);
5768 
5769 	/*
5770 	 * send an ack message to peer indicating that we have stopped
5771 	 * processing descriptors.
5772 	 */
5773 	if (set_ack_start) {
5774 		/*
5775 		 * We have ack'd upto some index and we have not
5776 		 * processed any descriptors beyond that index.
5777 		 * Use the last ack'd index as both the start and
5778 		 * end of range of descrs being ack'd.
5779 		 * Note: This results in acking the last index twice
5780 		 * and should be harmless.
5781 		 */
5782 		ack_start = ack_end;
5783 	}
5784 
5785 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
5786 	    VIO_DP_STOPPED);
5787 	if (rv != VGEN_SUCCESS) {
5788 		goto error_ret;
5789 	}
5790 
5791 	/* save new recv index of next dring msg */
5792 	ldcp->next_rxi = next_rxi;
5793 
5794 error_ret:
5795 	/* send up packets received so far */
5796 	if (bp != NULL) {
5797 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5798 		vgen_rx(ldcp, bp);
5799 		bp = bpt = NULL;
5800 	}
5801 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5802 	return (rv);
5803 
5804 }
5805 
5806 static int
5807 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5808 {
5809 	int rv = 0;
5810 	uint32_t start;
5811 	int32_t end;
5812 	uint32_t txi;
5813 	boolean_t ready_txd = B_FALSE;
5814 	vgen_stats_t *statsp;
5815 	vgen_private_desc_t *tbufp;
5816 	vnet_public_desc_t *txdp;
5817 	vio_dring_entry_hdr_t *hdrp;
5818 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5819 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5820 
5821 	DBG1(vgenp, ldcp, "enter\n");
5822 	start = dringmsg->start_idx;
5823 	end = dringmsg->end_idx;
5824 	statsp = &ldcp->stats;
5825 
5826 	/*
5827 	 * received an ack corresponding to a specific descriptor for
5828 	 * which we had set the ACK bit in the descriptor (during
5829 	 * transmit). This enables us to reclaim descriptors.
5830 	 */
5831 
5832 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
5833 
5834 	/* validate start and end indeces in the tx ack msg */
5835 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
5836 		/* drop the message if invalid index */
5837 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
5838 		    start, end);
5839 		return (rv);
5840 	}
5841 	/* validate dring_ident */
5842 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
5843 		/* invalid dring_ident, drop the msg */
5844 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5845 		    dringmsg->dring_ident);
5846 		return (rv);
5847 	}
5848 	statsp->dring_data_acks++;
5849 
5850 	/* reclaim descriptors that are done */
5851 	vgen_reclaim(ldcp);
5852 
5853 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
5854 		/*
5855 		 * receiver continued processing descriptors after
5856 		 * sending us the ack.
5857 		 */
5858 		return (rv);
5859 	}
5860 
5861 	statsp->dring_stopped_acks++;
5862 
5863 	/* receiver stopped processing descriptors */
5864 	mutex_enter(&ldcp->wrlock);
5865 	mutex_enter(&ldcp->tclock);
5866 
5867 	/*
5868 	 * determine if there are any pending tx descriptors
5869 	 * ready to be processed by the receiver(peer) and if so,
5870 	 * send a message to the peer to restart receiving.
5871 	 */
5872 	ready_txd = B_FALSE;
5873 
5874 	/*
5875 	 * using the end index of the descriptor range for which
5876 	 * we received the ack, check if the next descriptor is
5877 	 * ready.
5878 	 */
5879 	txi = end;
5880 	INCR_TXI(txi, ldcp);
5881 	tbufp = &ldcp->tbufp[txi];
5882 	txdp = tbufp->descp;
5883 	hdrp = &txdp->hdr;
5884 	if (hdrp->dstate == VIO_DESC_READY) {
5885 		ready_txd = B_TRUE;
5886 	} else {
5887 		/*
5888 		 * descr next to the end of ack'd descr range is not
5889 		 * ready.
5890 		 * starting from the current reclaim index, check
5891 		 * if any descriptor is ready.
5892 		 */
5893 
5894 		txi = ldcp->cur_tbufp - ldcp->tbufp;
5895 		tbufp = &ldcp->tbufp[txi];
5896 
5897 		txdp = tbufp->descp;
5898 		hdrp = &txdp->hdr;
5899 		if (hdrp->dstate == VIO_DESC_READY) {
5900 			ready_txd = B_TRUE;
5901 		}
5902 
5903 	}
5904 
5905 	if (ready_txd) {
5906 		/*
5907 		 * we have tx descriptor(s) ready to be
5908 		 * processed by the receiver.
5909 		 * send a message to the peer with the start index
5910 		 * of ready descriptors.
5911 		 */
5912 		rv = vgen_send_dring_data(ldcp, txi, -1);
5913 		if (rv != VGEN_SUCCESS) {
5914 			ldcp->resched_peer = B_TRUE;
5915 			ldcp->resched_peer_txi = txi;
5916 			mutex_exit(&ldcp->tclock);
5917 			mutex_exit(&ldcp->wrlock);
5918 			return (rv);
5919 		}
5920 	} else {
5921 		/*
5922 		 * no ready tx descriptors. set the flag to send a
5923 		 * message to peer when tx descriptors are ready in
5924 		 * transmit routine.
5925 		 */
5926 		ldcp->resched_peer = B_TRUE;
5927 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
5928 	}
5929 
5930 	mutex_exit(&ldcp->tclock);
5931 	mutex_exit(&ldcp->wrlock);
5932 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5933 	return (rv);
5934 }
5935 
5936 static int
5937 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5938 {
5939 	int rv = 0;
5940 	uint32_t start;
5941 	int32_t end;
5942 	uint32_t txi;
5943 	vnet_public_desc_t *txdp;
5944 	vio_dring_entry_hdr_t *hdrp;
5945 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5946 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5947 
5948 	DBG1(vgenp, ldcp, "enter\n");
5949 	start = dringmsg->start_idx;
5950 	end = dringmsg->end_idx;
5951 
5952 	/*
5953 	 * peer sent a NACK msg to indicate lost packets.
5954 	 * The start and end correspond to the range of descriptors
5955 	 * for which the peer didn't receive a dring data msg and so
5956 	 * didn't receive the corresponding data.
5957 	 */
5958 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
5959 
5960 	/* validate start and end indeces in the tx nack msg */
5961 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
5962 		/* drop the message if invalid index */
5963 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
5964 		    start, end);
5965 		return (rv);
5966 	}
5967 	/* validate dring_ident */
5968 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
5969 		/* invalid dring_ident, drop the msg */
5970 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5971 		    dringmsg->dring_ident);
5972 		return (rv);
5973 	}
5974 	mutex_enter(&ldcp->txlock);
5975 	mutex_enter(&ldcp->tclock);
5976 
5977 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
5978 		/* no busy descriptors, bogus nack ? */
5979 		mutex_exit(&ldcp->tclock);
5980 		mutex_exit(&ldcp->txlock);
5981 		return (rv);
5982 	}
5983 
5984 	/* we just mark the descrs as done so they can be reclaimed */
5985 	for (txi = start; txi <= end; ) {
5986 		txdp = &(ldcp->txdp[txi]);
5987 		hdrp = &txdp->hdr;
5988 		if (hdrp->dstate == VIO_DESC_READY)
5989 			hdrp->dstate = VIO_DESC_DONE;
5990 		INCR_TXI(txi, ldcp);
5991 	}
5992 	mutex_exit(&ldcp->tclock);
5993 	mutex_exit(&ldcp->txlock);
5994 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5995 	return (rv);
5996 }
5997 
5998 static void
5999 vgen_reclaim(vgen_ldc_t *ldcp)
6000 {
6001 	mutex_enter(&ldcp->tclock);
6002 
6003 	vgen_reclaim_dring(ldcp);
6004 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6005 
6006 	mutex_exit(&ldcp->tclock);
6007 }
6008 
6009 /*
6010  * transmit reclaim function. starting from the current reclaim index
6011  * look for descriptors marked DONE and reclaim the descriptor and the
6012  * corresponding buffers (tbuf).
6013  */
6014 static void
6015 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6016 {
6017 	int count = 0;
6018 	vnet_public_desc_t *txdp;
6019 	vgen_private_desc_t *tbufp;
6020 	vio_dring_entry_hdr_t	*hdrp;
6021 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
6022 
6023 #ifdef DEBUG
6024 	if (vgen_trigger_txtimeout)
6025 		return;
6026 #endif
6027 
6028 	tbufp = ldcp->cur_tbufp;
6029 	txdp = tbufp->descp;
6030 	hdrp = &txdp->hdr;
6031 
6032 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6033 	    (tbufp != ldcp->next_tbufp)) {
6034 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6035 		hdrp->dstate = VIO_DESC_FREE;
6036 		hdrp->ack = B_FALSE;
6037 
6038 		tbufp = NEXTTBUF(ldcp, tbufp);
6039 		txdp = tbufp->descp;
6040 		hdrp = &txdp->hdr;
6041 		count++;
6042 	}
6043 
6044 	ldcp->cur_tbufp = tbufp;
6045 
6046 	/*
6047 	 * Check if mac layer should be notified to restart transmissions
6048 	 */
6049 	if ((ldcp->need_resched) && (count > 0)) {
6050 		ldcp->need_resched = B_FALSE;
6051 		vnet_tx_update(vgenp->vnetp);
6052 	}
6053 }
6054 
6055 /* return the number of pending transmits for the channel */
6056 static int
6057 vgen_num_txpending(vgen_ldc_t *ldcp)
6058 {
6059 	int n;
6060 
6061 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6062 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6063 	} else  {
6064 		/* cur_tbufp > next_tbufp */
6065 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6066 	}
6067 
6068 	return (n);
6069 }
6070 
6071 /* determine if the transmit descriptor ring is full */
6072 static int
6073 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6074 {
6075 	vgen_private_desc_t	*tbufp;
6076 	vgen_private_desc_t	*ntbufp;
6077 
6078 	tbufp = ldcp->next_tbufp;
6079 	ntbufp = NEXTTBUF(ldcp, tbufp);
6080 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6081 		return (VGEN_SUCCESS);
6082 	}
6083 	return (VGEN_FAILURE);
6084 }
6085 
6086 /* determine if timeout condition has occured */
6087 static int
6088 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6089 {
6090 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6091 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6092 	    (vnet_ldcwd_txtimeout) &&
6093 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6094 		return (VGEN_SUCCESS);
6095 	} else {
6096 		return (VGEN_FAILURE);
6097 	}
6098 }
6099 
6100 /* transmit watchdog timeout handler */
6101 static void
6102 vgen_ldc_watchdog(void *arg)
6103 {
6104 	vgen_ldc_t *ldcp;
6105 	vgen_t *vgenp;
6106 	int rv;
6107 
6108 	ldcp = (vgen_ldc_t *)arg;
6109 	vgenp = LDC_TO_VGEN(ldcp);
6110 
6111 	rv = vgen_ldc_txtimeout(ldcp);
6112 	if (rv == VGEN_SUCCESS) {
6113 		DWARN(vgenp, ldcp, "transmit timeout\n");
6114 #ifdef DEBUG
6115 		if (vgen_trigger_txtimeout) {
6116 			/* tx timeout triggered for debugging */
6117 			vgen_trigger_txtimeout = 0;
6118 		}
6119 #endif
6120 		mutex_enter(&ldcp->cblock);
6121 		ldcp->need_ldc_reset = B_TRUE;
6122 		vgen_handshake_retry(ldcp);
6123 		mutex_exit(&ldcp->cblock);
6124 		if (ldcp->need_resched) {
6125 			ldcp->need_resched = B_FALSE;
6126 			vnet_tx_update(vgenp->vnetp);
6127 		}
6128 	}
6129 
6130 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6131 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6132 }
6133 
6134 /* handler for error messages received from the peer ldc end-point */
6135 static void
6136 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6137 {
6138 	_NOTE(ARGUNUSED(ldcp, tagp))
6139 }
6140 
6141 static int
6142 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6143 {
6144 	vio_raw_data_msg_t	*rmsg;
6145 	vio_dring_msg_t		*dmsg;
6146 	uint64_t		seq_num;
6147 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6148 
6149 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6150 		dmsg = (vio_dring_msg_t *)tagp;
6151 		seq_num = dmsg->seq_num;
6152 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6153 		rmsg = (vio_raw_data_msg_t *)tagp;
6154 		seq_num = rmsg->seq_num;
6155 	} else {
6156 		return (EINVAL);
6157 	}
6158 
6159 	if (seq_num != ldcp->next_rxseq) {
6160 
6161 		/* seqnums don't match */
6162 		DWARN(vgenp, ldcp,
6163 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6164 		    ldcp->next_rxseq, seq_num);
6165 
6166 		ldcp->need_ldc_reset = B_TRUE;
6167 		return (EINVAL);
6168 
6169 	}
6170 
6171 	ldcp->next_rxseq++;
6172 
6173 	return (0);
6174 }
6175 
6176 /* Check if the session id in the received message is valid */
6177 static int
6178 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6179 {
6180 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6181 
6182 	if (tagp->vio_sid != ldcp->peer_sid) {
6183 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6184 		    ldcp->peer_sid, tagp->vio_sid);
6185 		return (VGEN_FAILURE);
6186 	}
6187 	else
6188 		return (VGEN_SUCCESS);
6189 }
6190 
6191 static caddr_t
6192 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6193 {
6194 	(void) sprintf(ebuf,
6195 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6196 	return (ebuf);
6197 }
6198 
6199 /* Handshake watchdog timeout handler */
6200 static void
6201 vgen_hwatchdog(void *arg)
6202 {
6203 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6204 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6205 
6206 	DWARN(vgenp, ldcp,
6207 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
6208 	    ldcp->hphase, ldcp->hstate);
6209 
6210 	mutex_enter(&ldcp->cblock);
6211 	if (ldcp->cancel_htid) {
6212 		ldcp->cancel_htid = 0;
6213 		mutex_exit(&ldcp->cblock);
6214 		return;
6215 	}
6216 	ldcp->htid = 0;
6217 	ldcp->need_ldc_reset = B_TRUE;
6218 	vgen_handshake_retry(ldcp);
6219 	mutex_exit(&ldcp->cblock);
6220 }
6221 
6222 static void
6223 vgen_print_hparams(vgen_hparams_t *hp)
6224 {
6225 	uint8_t	addr[6];
6226 	char	ea[6];
6227 	ldc_mem_cookie_t *dc;
6228 
6229 	cmn_err(CE_CONT, "version_info:\n");
6230 	cmn_err(CE_CONT,
6231 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6232 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6233 
6234 	vnet_macaddr_ultostr(hp->addr, addr);
6235 	cmn_err(CE_CONT, "attr_info:\n");
6236 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6237 	    vgen_print_ethaddr(addr, ea));
6238 	cmn_err(CE_CONT,
6239 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6240 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6241 
6242 	dc = &hp->dring_cookie;
6243 	cmn_err(CE_CONT, "dring_info:\n");
6244 	cmn_err(CE_CONT,
6245 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6246 	cmn_err(CE_CONT,
6247 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6248 	    dc->addr, dc->size);
6249 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6250 }
6251 
6252 static void
6253 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6254 {
6255 	vgen_hparams_t *hp;
6256 
6257 	cmn_err(CE_CONT, "Channel Information:\n");
6258 	cmn_err(CE_CONT,
6259 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6260 	    ldcp->ldc_id, ldcp->ldc_status);
6261 	cmn_err(CE_CONT,
6262 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6263 	    ldcp->local_sid, ldcp->peer_sid);
6264 	cmn_err(CE_CONT,
6265 	    "\thphase: 0x%x, hstate: 0x%x\n",
6266 	    ldcp->hphase, ldcp->hstate);
6267 
6268 	cmn_err(CE_CONT, "Local handshake params:\n");
6269 	hp = &ldcp->local_hparams;
6270 	vgen_print_hparams(hp);
6271 
6272 	cmn_err(CE_CONT, "Peer handshake params:\n");
6273 	hp = &ldcp->peer_hparams;
6274 	vgen_print_hparams(hp);
6275 }
6276 
6277 /*
6278  * Send received packets up the stack.
6279  */
6280 static void
6281 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
6282 {
6283 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6284 
6285 	if (ldcp->rcv_thread != NULL) {
6286 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
6287 		mutex_exit(&ldcp->rxlock);
6288 	} else {
6289 		ASSERT(MUTEX_HELD(&ldcp->cblock));
6290 		mutex_exit(&ldcp->cblock);
6291 	}
6292 
6293 	vnet_rx(vgenp->vnetp, NULL, bp);
6294 
6295 	if (ldcp->rcv_thread != NULL) {
6296 		mutex_enter(&ldcp->rxlock);
6297 	} else {
6298 		mutex_enter(&ldcp->cblock);
6299 	}
6300 }
6301 
6302 /*
6303  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
6304  * This thread is woken up by the LDC interrupt handler to process
6305  * LDC packets and receive data.
6306  */
6307 static void
6308 vgen_ldc_rcv_worker(void *arg)
6309 {
6310 	callb_cpr_t	cprinfo;
6311 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6312 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6313 
6314 	DBG1(vgenp, ldcp, "enter\n");
6315 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
6316 	    "vnet_rcv_thread");
6317 	mutex_enter(&ldcp->rcv_thr_lock);
6318 	ldcp->rcv_thr_flags |= VGEN_WTHR_RUNNING;
6319 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
6320 
6321 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
6322 		/*
6323 		 * Wait until the data is received or a stop
6324 		 * request is received.
6325 		 */
6326 		while (!(ldcp->rcv_thr_flags &
6327 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
6328 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6329 		}
6330 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
6331 
6332 		/*
6333 		 * First process the stop request.
6334 		 */
6335 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
6336 			DBG2(vgenp, ldcp, "stopped\n");
6337 			break;
6338 		}
6339 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
6340 		mutex_exit(&ldcp->rcv_thr_lock);
6341 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
6342 		vgen_handle_evt_read(ldcp);
6343 		mutex_enter(&ldcp->rcv_thr_lock);
6344 	}
6345 
6346 	/*
6347 	 * Update the run status and wakeup the thread that
6348 	 * has sent the stop request.
6349 	 */
6350 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_RUNNING;
6351 	cv_signal(&ldcp->rcv_thr_cv);
6352 	CALLB_CPR_EXIT(&cprinfo);
6353 	thread_exit();
6354 	DBG1(vgenp, ldcp, "exit\n");
6355 }
6356 
6357 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
6358 static void
6359 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
6360 {
6361 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6362 
6363 	DBG1(vgenp, ldcp, "enter\n");
6364 	/*
6365 	 * Send a stop request by setting the stop flag and
6366 	 * wait until the receive thread stops.
6367 	 */
6368 	mutex_enter(&ldcp->rcv_thr_lock);
6369 	if (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6370 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
6371 		cv_signal(&ldcp->rcv_thr_cv);
6372 		DBG2(vgenp, ldcp, "waiting...");
6373 		while (ldcp->rcv_thr_flags & VGEN_WTHR_RUNNING) {
6374 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6375 		}
6376 	}
6377 	mutex_exit(&ldcp->rcv_thr_lock);
6378 	ldcp->rcv_thread = NULL;
6379 	DBG1(vgenp, ldcp, "exit\n");
6380 }
6381 
6382 #if DEBUG
6383 
6384 /*
6385  * Print debug messages - set to 0xf to enable all msgs
6386  */
6387 static void
6388 debug_printf(const char *fname, vgen_t *vgenp,
6389     vgen_ldc_t *ldcp, const char *fmt, ...)
6390 {
6391 	char    buf[256];
6392 	char    *bufp = buf;
6393 	va_list ap;
6394 
6395 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
6396 		(void) sprintf(bufp, "vnet%d:",
6397 		    ((vnet_t *)(vgenp->vnetp))->instance);
6398 		bufp += strlen(bufp);
6399 	}
6400 	if (ldcp != NULL) {
6401 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
6402 		bufp += strlen(bufp);
6403 	}
6404 	(void) sprintf(bufp, "%s: ", fname);
6405 	bufp += strlen(bufp);
6406 
6407 	va_start(ap, fmt);
6408 	(void) vsprintf(bufp, fmt, ap);
6409 	va_end(ap);
6410 
6411 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
6412 	    (vgendbg_ldcid == ldcp->ldc_id)) {
6413 		cmn_err(CE_CONT, "%s\n", buf);
6414 	}
6415 }
6416 #endif
6417