xref: /titanic_50/usr/src/uts/sun4v/io/vnet_gen.c (revision 183971bab0e50aa34f0476451276612019e36db5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/sysmacros.h>
30 #include <sys/param.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /*
71  * Function prototypes.
72  */
73 /* vgen proxy entry points */
74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
75     const uint8_t *macaddr, void **vgenhdl);
76 void vgen_uninit(void *arg);
77 int vgen_dds_tx(void *arg, void *dmsg);
78 void vgen_mod_init(void);
79 int vgen_mod_cleanup(void);
80 void vgen_mod_fini(void);
81 static int vgen_start(void *arg);
82 static void vgen_stop(void *arg);
83 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
84 static int vgen_multicst(void *arg, boolean_t add,
85 	const uint8_t *mca);
86 static int vgen_promisc(void *arg, boolean_t on);
87 static int vgen_unicst(void *arg, const uint8_t *mca);
88 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
89 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
90 
91 /* vgen internal functions */
92 static int vgen_read_mdprops(vgen_t *vgenp);
93 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
94 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
95 	mde_cookie_t node);
96 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
97 	uint32_t *mtu);
98 static void vgen_detach_ports(vgen_t *vgenp);
99 static void vgen_port_detach(vgen_port_t *portp);
100 static void vgen_port_list_insert(vgen_port_t *portp);
101 static void vgen_port_list_remove(vgen_port_t *portp);
102 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
103 	int port_num);
104 static int vgen_mdeg_reg(vgen_t *vgenp);
105 static void vgen_mdeg_unreg(vgen_t *vgenp);
106 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
107 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
108 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
109 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
110 	mde_cookie_t mdex);
111 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
112 static int vgen_port_attach(vgen_port_t *portp);
113 static void vgen_port_detach_mdeg(vgen_port_t *portp);
114 static void vgen_port_detach_mdeg(vgen_port_t *portp);
115 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
116 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
117 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
118 
119 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
120 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
121 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
122 static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
123 static void vgen_init_ports(vgen_t *vgenp);
124 static void vgen_port_init(vgen_port_t *portp);
125 static void vgen_uninit_ports(vgen_t *vgenp);
126 static void vgen_port_uninit(vgen_port_t *portp);
127 static void vgen_init_ldcs(vgen_port_t *portp);
128 static void vgen_uninit_ldcs(vgen_port_t *portp);
129 static int vgen_ldc_init(vgen_ldc_t *ldcp);
130 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
131 static int vgen_init_tbufs(vgen_ldc_t *ldcp);
132 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
133 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
134 static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
135 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
137 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
138 static int vgen_ldcsend(void *arg, mblk_t *mp);
139 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
140 static int vgen_ldcsend_dring(void *arg, mblk_t *mp);
141 static void vgen_reclaim(vgen_ldc_t *ldcp);
142 static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
143 static int vgen_num_txpending(vgen_ldc_t *ldcp);
144 static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
145 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
146 static void vgen_ldc_watchdog(void *arg);
147 
148 /* vgen handshake functions */
149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
150 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
151 	boolean_t caller_holds_lock);
152 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
153 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
154 static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
155 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
156 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
157 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
158 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
159 static void vgen_handshake_reset(vgen_ldc_t *ldcp);
160 static void vgen_reset_hphase(vgen_ldc_t *ldcp);
161 static void vgen_handshake(vgen_ldc_t *ldcp);
162 static int vgen_handshake_done(vgen_ldc_t *ldcp);
163 static void vgen_handshake_retry(vgen_ldc_t *ldcp);
164 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
165 	vio_msg_tag_t *tagp);
166 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
172 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
173 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
174 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
175 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
179 	uint32_t start, int32_t end, uint8_t pstate);
180 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
181 	uint32_t msglen);
182 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
183 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
184 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp);
185 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
186 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
187 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
188 static void vgen_hwatchdog(void *arg);
189 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
190 static void vgen_print_hparams(vgen_hparams_t *hp);
191 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
192 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp);
193 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp);
194 static void vgen_ldc_rcv_worker(void *arg);
195 static void vgen_handle_evt_read(vgen_ldc_t *ldcp);
196 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp);
197 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
198 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
199 
200 /* VLAN routines */
201 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
202 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
203 	uint16_t *nvidsp, uint16_t *default_idp);
204 static void vgen_vlan_create_hash(vgen_port_t *portp);
205 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
206 static void vgen_vlan_add_ids(vgen_port_t *portp);
207 static void vgen_vlan_remove_ids(vgen_port_t *portp);
208 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
209 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
210 	uint16_t *vidp);
211 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
212 	boolean_t is_tagged, uint16_t vid);
213 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
214 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
215 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
216 
217 /* externs */
218 extern void vnet_dds_rx(void *arg, void *dmsg);
219 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
220 
221 /*
222  * The handshake process consists of 5 phases defined below, with VH_PHASE0
223  * being the pre-handshake phase and VH_DONE is the phase to indicate
224  * successful completion of all phases.
225  * Each phase may have one to several handshake states which are required
226  * to complete successfully to move to the next phase.
227  * Refer to the functions vgen_handshake() and vgen_handshake_done() for
228  * more details.
229  */
230 /* handshake phases */
231 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
232 
233 /* handshake states */
234 enum {
235 
236 	VER_INFO_SENT	=	0x1,
237 	VER_ACK_RCVD	=	0x2,
238 	VER_INFO_RCVD	=	0x4,
239 	VER_ACK_SENT	=	0x8,
240 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
241 
242 	ATTR_INFO_SENT	=	0x10,
243 	ATTR_ACK_RCVD	=	0x20,
244 	ATTR_INFO_RCVD	=	0x40,
245 	ATTR_ACK_SENT	=	0x80,
246 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
247 
248 	DRING_INFO_SENT	=	0x100,
249 	DRING_ACK_RCVD	=	0x200,
250 	DRING_INFO_RCVD	=	0x400,
251 	DRING_ACK_SENT	=	0x800,
252 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
253 
254 	RDX_INFO_SENT	=	0x1000,
255 	RDX_ACK_RCVD	=	0x2000,
256 	RDX_INFO_RCVD	=	0x4000,
257 	RDX_ACK_SENT	=	0x8000,
258 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
259 
260 };
261 
262 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
263 
264 #define	LDC_LOCK(ldcp)	\
265 				mutex_enter(&((ldcp)->cblock));\
266 				mutex_enter(&((ldcp)->rxlock));\
267 				mutex_enter(&((ldcp)->wrlock));\
268 				mutex_enter(&((ldcp)->txlock));\
269 				mutex_enter(&((ldcp)->tclock));
270 #define	LDC_UNLOCK(ldcp)	\
271 				mutex_exit(&((ldcp)->tclock));\
272 				mutex_exit(&((ldcp)->txlock));\
273 				mutex_exit(&((ldcp)->wrlock));\
274 				mutex_exit(&((ldcp)->rxlock));\
275 				mutex_exit(&((ldcp)->cblock));
276 
277 #define	VGEN_VER_EQ(ldcp, major, minor)	\
278 	((ldcp)->local_hparams.ver_major == (major) &&	\
279 	    (ldcp)->local_hparams.ver_minor == (minor))
280 
281 #define	VGEN_VER_LT(ldcp, major, minor)	\
282 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
283 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
284 	    (ldcp)->local_hparams.ver_minor < (minor)))
285 
286 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
287 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
288 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
289 	    (ldcp)->local_hparams.ver_minor >= (minor)))
290 
291 static struct ether_addr etherbroadcastaddr = {
292 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
293 };
294 /*
295  * MIB II broadcast/multicast packets
296  */
297 #define	IS_BROADCAST(ehp) \
298 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
299 #define	IS_MULTICAST(ehp) \
300 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
301 
302 /*
303  * Property names
304  */
305 static char macaddr_propname[] = "mac-address";
306 static char rmacaddr_propname[] = "remote-mac-address";
307 static char channel_propname[] = "channel-endpoint";
308 static char reg_propname[] = "reg";
309 static char port_propname[] = "port";
310 static char swport_propname[] = "switch-port";
311 static char id_propname[] = "id";
312 static char vdev_propname[] = "virtual-device";
313 static char vnet_propname[] = "network";
314 static char pri_types_propname[] = "priority-ether-types";
315 static char vgen_pvid_propname[] = "port-vlan-id";
316 static char vgen_vid_propname[] = "vlan-id";
317 static char vgen_dvid_propname[] = "default-vlan-id";
318 static char port_pvid_propname[] = "remote-port-vlan-id";
319 static char port_vid_propname[] = "remote-vlan-id";
320 static char vgen_mtu_propname[] = "mtu";
321 
322 /* versions supported - in decreasing order */
323 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 4} };
324 
325 /* Tunables */
326 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
327 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
328 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
329 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
330 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
331 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
332 uint32_t vgen_recv_retries = 10;	/* retry when rx descr not ready */
333 uint32_t vgen_tx_retries = 0x4;		/* retry when tx descr not available */
334 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
335 
336 int vgen_rcv_thread_enabled = 1;	/* Enable Recieve thread */
337 
338 static vio_mblk_pool_t	*vgen_rx_poolp = NULL;
339 static krwlock_t	vgen_rw;
340 
341 /*
342  * max # of packets accumulated prior to sending them up. It is best
343  * to keep this at 60% of the number of recieve buffers.
344  */
345 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
346 
347 /*
348  * Internal tunables for receive buffer pools, that is,  the size and number of
349  * mblks for each pool. At least 3 sizes must be specified if these are used.
350  * The sizes must be specified in increasing order. Non-zero value of the first
351  * size will be used as a hint to use these values instead of the algorithm
352  * that determines the sizes based on MTU.
353  */
354 uint32_t vgen_rbufsz1 = 0;
355 uint32_t vgen_rbufsz2 = 0;
356 uint32_t vgen_rbufsz3 = 0;
357 uint32_t vgen_rbufsz4 = 0;
358 
359 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
360 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
361 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
362 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
363 
364 /*
365  * In the absence of "priority-ether-types" property in MD, the following
366  * internal tunable can be set to specify a single priority ethertype.
367  */
368 uint64_t vgen_pri_eth_type = 0;
369 
370 /*
371  * Number of transmit priority buffers that are preallocated per device.
372  * This number is chosen to be a small value to throttle transmission
373  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
374  */
375 uint32_t vgen_pri_tx_nmblks = 64;
376 
377 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
378 
379 #ifdef DEBUG
380 /* flags to simulate error conditions for debugging */
381 int vgen_trigger_txtimeout = 0;
382 int vgen_trigger_rxlost = 0;
383 #endif
384 
385 /*
386  * Matching criteria passed to the MDEG to register interest
387  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
388  * by their 'name' and 'cfg-handle' properties.
389  */
390 static md_prop_match_t vdev_prop_match[] = {
391 	{ MDET_PROP_STR,    "name"   },
392 	{ MDET_PROP_VAL,    "cfg-handle" },
393 	{ MDET_LIST_END,    NULL    }
394 };
395 
396 static mdeg_node_match_t vdev_match = { "virtual-device",
397 						vdev_prop_match };
398 
399 /* MD update matching structure */
400 static md_prop_match_t	vport_prop_match[] = {
401 	{ MDET_PROP_VAL,	"id" },
402 	{ MDET_LIST_END,	NULL }
403 };
404 
405 static mdeg_node_match_t vport_match = { "virtual-device-port",
406 					vport_prop_match };
407 
408 /* template for matching a particular vnet instance */
409 static mdeg_prop_spec_t vgen_prop_template[] = {
410 	{ MDET_PROP_STR,	"name",		"network" },
411 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
412 	{ MDET_LIST_END,	NULL,		NULL }
413 };
414 
415 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
416 
417 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
418 
419 static mac_callbacks_t vgen_m_callbacks = {
420 	0,
421 	vgen_stat,
422 	vgen_start,
423 	vgen_stop,
424 	vgen_promisc,
425 	vgen_multicst,
426 	vgen_unicst,
427 	vgen_tx,
428 	NULL,
429 	NULL,
430 	NULL
431 };
432 
433 /* externs */
434 extern pri_t	maxclsyspri;
435 extern proc_t	p0;
436 extern uint32_t vnet_ntxds;
437 extern uint32_t vnet_ldcwd_interval;
438 extern uint32_t vnet_ldcwd_txtimeout;
439 extern uint32_t vnet_ldc_mtu;
440 extern uint32_t vnet_nrbufs;
441 extern uint32_t	vnet_ethermtu;
442 extern uint16_t	vnet_default_vlan_id;
443 extern boolean_t vnet_jumbo_rxpools;
444 
445 #ifdef DEBUG
446 
447 extern int vnet_dbglevel;
448 static void debug_printf(const char *fname, vgen_t *vgenp,
449 	vgen_ldc_t *ldcp, const char *fmt, ...);
450 
451 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
452 int vgendbg_ldcid = -1;
453 
454 /* simulate handshake error conditions for debug */
455 uint32_t vgen_hdbg;
456 #define	HDBG_VERSION	0x1
457 #define	HDBG_TIMEOUT	0x2
458 #define	HDBG_BAD_SID	0x4
459 #define	HDBG_OUT_STATE	0x8
460 
461 #endif
462 
463 /*
464  * vgen_init() is called by an instance of vnet driver to initialize the
465  * corresponding generic proxy transport layer. The arguments passed by vnet
466  * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
467  * the mac address of the vnet device, and a pointer to vgen_t is passed
468  * back as a handle to vnet.
469  */
470 int
471 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
472     const uint8_t *macaddr, void **vgenhdl)
473 {
474 	vgen_t *vgenp;
475 	int instance;
476 	int rv;
477 
478 	if ((vnetp == NULL) || (vnetdip == NULL))
479 		return (DDI_FAILURE);
480 
481 	instance = ddi_get_instance(vnetdip);
482 
483 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
484 
485 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
486 
487 	vgenp->vnetp = vnetp;
488 	vgenp->instance = instance;
489 	vgenp->regprop = regprop;
490 	vgenp->vnetdip = vnetdip;
491 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
492 
493 	/* allocate multicast table */
494 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
495 	    sizeof (struct ether_addr), KM_SLEEP);
496 	vgenp->mccount = 0;
497 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
498 
499 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
500 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
501 
502 	rv = vgen_read_mdprops(vgenp);
503 	if (rv != 0) {
504 		goto vgen_init_fail;
505 	}
506 
507 	/* register with MD event generator */
508 	rv = vgen_mdeg_reg(vgenp);
509 	if (rv != DDI_SUCCESS) {
510 		goto vgen_init_fail;
511 	}
512 
513 	*vgenhdl = (void *)vgenp;
514 
515 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
516 	return (DDI_SUCCESS);
517 
518 vgen_init_fail:
519 	rw_destroy(&vgenp->vgenports.rwlock);
520 	mutex_destroy(&vgenp->lock);
521 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
522 	    sizeof (struct ether_addr));
523 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
524 		kmem_free(vgenp->pri_types,
525 		    sizeof (uint16_t) * vgenp->pri_num_types);
526 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
527 	}
528 	KMEM_FREE(vgenp);
529 	return (DDI_FAILURE);
530 }
531 
532 /*
533  * Called by vnet to undo the initializations done by vgen_init().
534  * The handle provided by generic transport during vgen_init() is the argument.
535  */
536 void
537 vgen_uninit(void *arg)
538 {
539 	vgen_t		*vgenp = (vgen_t *)arg;
540 	vio_mblk_pool_t	*rp;
541 	vio_mblk_pool_t	*nrp;
542 
543 	if (vgenp == NULL) {
544 		return;
545 	}
546 
547 	DBG1(vgenp, NULL, "enter\n");
548 
549 	/* unregister with MD event generator */
550 	vgen_mdeg_unreg(vgenp);
551 
552 	mutex_enter(&vgenp->lock);
553 
554 	/* detach all ports from the device */
555 	vgen_detach_ports(vgenp);
556 
557 	/*
558 	 * free any pending rx mblk pools,
559 	 * that couldn't be freed previously during channel detach.
560 	 */
561 	rp = vgenp->rmp;
562 	while (rp != NULL) {
563 		nrp = vgenp->rmp = rp->nextp;
564 		if (vio_destroy_mblks(rp)) {
565 			WRITE_ENTER(&vgen_rw);
566 			rp->nextp = vgen_rx_poolp;
567 			vgen_rx_poolp = rp;
568 			RW_EXIT(&vgen_rw);
569 		}
570 		rp = nrp;
571 	}
572 
573 	/* free multicast table */
574 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
575 
576 	/* free pri_types table */
577 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
578 		kmem_free(vgenp->pri_types,
579 		    sizeof (uint16_t) * vgenp->pri_num_types);
580 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
581 	}
582 
583 	mutex_exit(&vgenp->lock);
584 
585 	rw_destroy(&vgenp->vgenports.rwlock);
586 	mutex_destroy(&vgenp->lock);
587 
588 	DBG1(vgenp, NULL, "exit\n");
589 	KMEM_FREE(vgenp);
590 }
591 
592 /*
593  * module specific initialization common to all instances of vnet/vgen.
594  */
595 void
596 vgen_mod_init(void)
597 {
598 	rw_init(&vgen_rw, NULL, RW_DRIVER, NULL);
599 }
600 
601 /*
602  * module specific cleanup common to all instances of vnet/vgen.
603  */
604 int
605 vgen_mod_cleanup(void)
606 {
607 	vio_mblk_pool_t	*poolp, *npoolp;
608 
609 	/*
610 	 * If any rx mblk pools are still in use, return
611 	 * error and stop the module from unloading.
612 	 */
613 	WRITE_ENTER(&vgen_rw);
614 	poolp = vgen_rx_poolp;
615 	while (poolp != NULL) {
616 		npoolp = vgen_rx_poolp = poolp->nextp;
617 		if (vio_destroy_mblks(poolp) != 0) {
618 			vgen_rx_poolp = poolp;
619 			RW_EXIT(&vgen_rw);
620 			return (EBUSY);
621 		}
622 		poolp = npoolp;
623 	}
624 	RW_EXIT(&vgen_rw);
625 
626 	return (0);
627 }
628 
629 /*
630  * module specific uninitialization common to all instances of vnet/vgen.
631  */
632 void
633 vgen_mod_fini(void)
634 {
635 	rw_destroy(&vgen_rw);
636 }
637 
638 /* enable transmit/receive for the device */
639 int
640 vgen_start(void *arg)
641 {
642 	vgen_port_t	*portp = (vgen_port_t *)arg;
643 	vgen_t		*vgenp = portp->vgenp;
644 
645 	DBG1(vgenp, NULL, "enter\n");
646 	mutex_enter(&portp->lock);
647 	vgen_port_init(portp);
648 	portp->flags |= VGEN_STARTED;
649 	mutex_exit(&portp->lock);
650 	DBG1(vgenp, NULL, "exit\n");
651 
652 	return (DDI_SUCCESS);
653 }
654 
655 /* stop transmit/receive */
656 void
657 vgen_stop(void *arg)
658 {
659 	vgen_port_t	*portp = (vgen_port_t *)arg;
660 	vgen_t		*vgenp = portp->vgenp;
661 
662 	DBG1(vgenp, NULL, "enter\n");
663 
664 	mutex_enter(&portp->lock);
665 	vgen_port_uninit(portp);
666 	portp->flags &= ~(VGEN_STARTED);
667 	mutex_exit(&portp->lock);
668 	DBG1(vgenp, NULL, "exit\n");
669 
670 }
671 
672 /* vgen transmit function */
673 static mblk_t *
674 vgen_tx(void *arg, mblk_t *mp)
675 {
676 	int i;
677 	vgen_port_t *portp;
678 	int status = VGEN_FAILURE;
679 
680 	portp = (vgen_port_t *)arg;
681 	/*
682 	 * Retry so that we avoid reporting a failure
683 	 * to the upper layer. Returning a failure may cause the
684 	 * upper layer to go into single threaded mode there by
685 	 * causing performance degradation, especially for a large
686 	 * number of connections.
687 	 */
688 	for (i = 0; i < vgen_tx_retries; ) {
689 		status = vgen_portsend(portp, mp);
690 		if (status == VGEN_SUCCESS) {
691 			break;
692 		}
693 		if (++i < vgen_tx_retries)
694 			delay(drv_usectohz(vgen_tx_delay));
695 	}
696 	if (status != VGEN_SUCCESS) {
697 		/* failure */
698 		return (mp);
699 	}
700 	/* success */
701 	return (NULL);
702 }
703 
704 /*
705  * This function provides any necessary tagging/untagging of the frames
706  * that are being transmitted over the port. It first verifies the vlan
707  * membership of the destination(port) and drops the packet if the
708  * destination doesn't belong to the given vlan.
709  *
710  * Arguments:
711  *   portp:     port over which the frames should be transmitted
712  *   mp:        frame to be transmitted
713  *   is_tagged:
714  *              B_TRUE: indicates frame header contains the vlan tag already.
715  *              B_FALSE: indicates frame is untagged.
716  *   vid:       vlan in which the frame should be transmitted.
717  *
718  * Returns:
719  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
720  *              Failure: NULL
721  */
722 static mblk_t *
723 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
724 	uint16_t vid)
725 {
726 	vgen_t				*vgenp;
727 	boolean_t			dst_tagged;
728 	int				rv;
729 
730 	vgenp = portp->vgenp;
731 
732 	/*
733 	 * If the packet is going to a vnet:
734 	 *   Check if the destination vnet is in the same vlan.
735 	 *   Check the frame header if tag or untag is needed.
736 	 *
737 	 * We do not check the above conditions if the packet is going to vsw:
738 	 *   vsw must be present implicitly in all the vlans that a vnet device
739 	 *   is configured into; even if vsw itself is not assigned to those
740 	 *   vlans as an interface. For instance, the packet might be destined
741 	 *   to another vnet(indirectly through vsw) or to an external host
742 	 *   which is in the same vlan as this vnet and vsw itself may not be
743 	 *   present in that vlan. Similarly packets going to vsw must be
744 	 *   always tagged(unless in the default-vlan) if not already tagged,
745 	 *   as we do not know the final destination. This is needed because
746 	 *   vsw must always invoke its switching function only after tagging
747 	 *   the packet; otherwise after switching function determines the
748 	 *   destination we cannot figure out if the destination belongs to the
749 	 *   the same vlan that the frame originated from and if it needs tag/
750 	 *   untag. Note that vsw will tag the packet itself when it receives
751 	 *   it over the channel from a client if needed. However, that is
752 	 *   needed only in the case of vlan unaware clients such as obp or
753 	 *   earlier versions of vnet.
754 	 *
755 	 */
756 	if (portp != vgenp->vsw_portp) {
757 		/*
758 		 * Packet going to a vnet. Check if the destination vnet is in
759 		 * the same vlan. Then check the frame header if tag/untag is
760 		 * needed.
761 		 */
762 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
763 		if (rv == B_FALSE) {
764 			/* drop the packet */
765 			freemsg(mp);
766 			return (NULL);
767 		}
768 
769 		/* is the destination tagged or untagged in this vlan? */
770 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
771 		    (dst_tagged = B_TRUE);
772 
773 		if (is_tagged == dst_tagged) {
774 			/* no tagging/untagging needed */
775 			return (mp);
776 		}
777 
778 		if (is_tagged == B_TRUE) {
779 			/* frame is tagged; destination needs untagged */
780 			mp = vnet_vlan_remove_tag(mp);
781 			return (mp);
782 		}
783 
784 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
785 	}
786 
787 	/*
788 	 * Packet going to a vnet needs tagging.
789 	 * OR
790 	 * If the packet is going to vsw, then it must be tagged in all cases:
791 	 * unknown unicast, broadcast/multicast or to vsw interface.
792 	 */
793 
794 	if (is_tagged == B_FALSE) {
795 		mp = vnet_vlan_insert_tag(mp, vid);
796 	}
797 
798 	return (mp);
799 }
800 
801 /* transmit packets over the given port */
802 static int
803 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
804 {
805 	vgen_ldclist_t		*ldclp;
806 	vgen_ldc_t		*ldcp;
807 	int			status;
808 	int			rv = VGEN_SUCCESS;
809 	vgen_t			*vgenp = portp->vgenp;
810 	vnet_t			*vnetp = vgenp->vnetp;
811 	boolean_t		is_tagged;
812 	boolean_t		dec_refcnt = B_FALSE;
813 	uint16_t		vlan_id;
814 	struct ether_header	*ehp;
815 
816 	if (portp->use_vsw_port) {
817 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
818 		portp = portp->vgenp->vsw_portp;
819 		dec_refcnt = B_TRUE;
820 	}
821 	if (portp == NULL) {
822 		return (VGEN_FAILURE);
823 	}
824 
825 	/*
826 	 * Determine the vlan id that the frame belongs to.
827 	 */
828 	ehp = (struct ether_header *)mp->b_rptr;
829 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
830 
831 	if (vlan_id == vnetp->default_vlan_id) {
832 
833 		/* Frames in default vlan must be untagged */
834 		ASSERT(is_tagged == B_FALSE);
835 
836 		/*
837 		 * If the destination is a vnet-port verify it belongs to the
838 		 * default vlan; otherwise drop the packet. We do not need
839 		 * this check for vsw-port, as it should implicitly belong to
840 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
841 		 */
842 		if (portp != vgenp->vsw_portp &&
843 		    portp->pvid != vnetp->default_vlan_id) {
844 			freemsg(mp);
845 			goto portsend_ret;
846 		}
847 
848 	} else {	/* frame not in default-vlan */
849 
850 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
851 		if (mp == NULL) {
852 			goto portsend_ret;
853 		}
854 
855 	}
856 
857 	ldclp = &portp->ldclist;
858 	READ_ENTER(&ldclp->rwlock);
859 	/*
860 	 * NOTE: for now, we will assume we have a single channel.
861 	 */
862 	if (ldclp->headp == NULL) {
863 		RW_EXIT(&ldclp->rwlock);
864 		rv = VGEN_FAILURE;
865 		goto portsend_ret;
866 	}
867 	ldcp = ldclp->headp;
868 
869 	status = ldcp->tx(ldcp, mp);
870 
871 	RW_EXIT(&ldclp->rwlock);
872 
873 	if (status != VGEN_TX_SUCCESS) {
874 		rv = VGEN_FAILURE;
875 	}
876 
877 portsend_ret:
878 	if (dec_refcnt == B_TRUE) {
879 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
880 	}
881 	return (rv);
882 }
883 
884 /*
885  * Wrapper function to transmit normal and/or priority frames over the channel.
886  */
887 static int
888 vgen_ldcsend(void *arg, mblk_t *mp)
889 {
890 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
891 	int			status;
892 	struct ether_header	*ehp;
893 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
894 	uint32_t		num_types;
895 	uint16_t		*types;
896 	int			i;
897 
898 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
899 
900 	num_types = vgenp->pri_num_types;
901 	types = vgenp->pri_types;
902 	ehp = (struct ether_header *)mp->b_rptr;
903 
904 	for (i = 0; i < num_types; i++) {
905 
906 		if (ehp->ether_type == types[i]) {
907 			/* priority frame, use pri tx function */
908 			vgen_ldcsend_pkt(ldcp, mp);
909 			return (VGEN_SUCCESS);
910 		}
911 
912 	}
913 
914 	status  = vgen_ldcsend_dring(ldcp, mp);
915 
916 	return (status);
917 }
918 
919 /*
920  * This functions handles ldc channel reset while in the context
921  * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
922  */
923 static void
924 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp)
925 {
926 	ldc_status_t	istatus;
927 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
928 
929 	if (mutex_tryenter(&ldcp->cblock)) {
930 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
931 			DWARN(vgenp, ldcp, "ldc_status() error\n");
932 		} else {
933 			ldcp->ldc_status = istatus;
934 		}
935 		if (ldcp->ldc_status != LDC_UP) {
936 			vgen_handle_evt_reset(ldcp);
937 		}
938 		mutex_exit(&ldcp->cblock);
939 	}
940 }
941 
942 /*
943  * This function transmits the frame in the payload of a raw data
944  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
945  * send special frames with high priorities, without going through
946  * the normal data path which uses descriptor ring mechanism.
947  */
948 static void
949 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
950 {
951 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
952 	vio_raw_data_msg_t	*pkt;
953 	mblk_t			*bp;
954 	mblk_t			*nmp = NULL;
955 	caddr_t			dst;
956 	uint32_t		mblksz;
957 	uint32_t		size;
958 	uint32_t		nbytes;
959 	int			rv;
960 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
961 	vgen_stats_t		*statsp = &ldcp->stats;
962 
963 	/* drop the packet if ldc is not up or handshake is not done */
964 	if (ldcp->ldc_status != LDC_UP) {
965 		(void) atomic_inc_32(&statsp->tx_pri_fail);
966 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
967 		    ldcp->ldc_status);
968 		goto send_pkt_exit;
969 	}
970 
971 	if (ldcp->hphase != VH_DONE) {
972 		(void) atomic_inc_32(&statsp->tx_pri_fail);
973 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
974 		    ldcp->hphase);
975 		goto send_pkt_exit;
976 	}
977 
978 	size = msgsize(mp);
979 
980 	/* frame size bigger than available payload len of raw data msg ? */
981 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
982 		(void) atomic_inc_32(&statsp->tx_pri_fail);
983 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
984 		goto send_pkt_exit;
985 	}
986 
987 	if (size < ETHERMIN)
988 		size = ETHERMIN;
989 
990 	/* alloc space for a raw data message */
991 	nmp = vio_allocb(vgenp->pri_tx_vmp);
992 	if (nmp == NULL) {
993 		(void) atomic_inc_32(&statsp->tx_pri_fail);
994 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
995 		goto send_pkt_exit;
996 	}
997 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
998 
999 	/* copy frame into the payload of raw data message */
1000 	dst = (caddr_t)pkt->data;
1001 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1002 		mblksz = MBLKL(bp);
1003 		bcopy(bp->b_rptr, dst, mblksz);
1004 		dst += mblksz;
1005 	}
1006 
1007 	/* setup the raw data msg */
1008 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
1009 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
1010 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
1011 	pkt->tag.vio_sid = ldcp->local_sid;
1012 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
1013 
1014 	/* send the msg over ldc */
1015 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
1016 	if (rv != VGEN_SUCCESS) {
1017 		(void) atomic_inc_32(&statsp->tx_pri_fail);
1018 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
1019 		if (rv == ECONNRESET) {
1020 			vgen_ldcsend_process_reset(ldcp);
1021 		}
1022 		goto send_pkt_exit;
1023 	}
1024 
1025 	/* update stats */
1026 	(void) atomic_inc_64(&statsp->tx_pri_packets);
1027 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
1028 
1029 send_pkt_exit:
1030 	if (nmp != NULL)
1031 		freemsg(nmp);
1032 	freemsg(mp);
1033 }
1034 
1035 /*
1036  * This function transmits normal (non-priority) data frames over
1037  * the channel. It queues the frame into the transmit descriptor ring
1038  * and sends a VIO_DRING_DATA message if needed, to wake up the
1039  * peer to (re)start processing.
1040  */
1041 static int
1042 vgen_ldcsend_dring(void *arg, mblk_t *mp)
1043 {
1044 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
1045 	vgen_private_desc_t	*tbufp;
1046 	vgen_private_desc_t	*rtbufp;
1047 	vnet_public_desc_t	*rtxdp;
1048 	vgen_private_desc_t	*ntbufp;
1049 	vnet_public_desc_t	*txdp;
1050 	vio_dring_entry_hdr_t	*hdrp;
1051 	vgen_stats_t		*statsp;
1052 	struct ether_header	*ehp;
1053 	boolean_t		is_bcast = B_FALSE;
1054 	boolean_t		is_mcast = B_FALSE;
1055 	size_t			mblksz;
1056 	caddr_t			dst;
1057 	mblk_t			*bp;
1058 	size_t			size;
1059 	int			rv = 0;
1060 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
1061 	vgen_hparams_t		*lp = &ldcp->local_hparams;
1062 
1063 	statsp = &ldcp->stats;
1064 	size = msgsize(mp);
1065 
1066 	DBG1(vgenp, ldcp, "enter\n");
1067 
1068 	if (ldcp->ldc_status != LDC_UP) {
1069 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
1070 		    ldcp->ldc_status);
1071 		/* retry ldc_up() if needed */
1072 		if (ldcp->flags & CHANNEL_STARTED)
1073 			(void) ldc_up(ldcp->ldc_handle);
1074 		goto send_dring_exit;
1075 	}
1076 
1077 	/* drop the packet if ldc is not up or handshake is not done */
1078 	if (ldcp->hphase != VH_DONE) {
1079 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
1080 		    ldcp->hphase);
1081 		goto send_dring_exit;
1082 	}
1083 
1084 	if (size > (size_t)lp->mtu) {
1085 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
1086 		goto send_dring_exit;
1087 	}
1088 	if (size < ETHERMIN)
1089 		size = ETHERMIN;
1090 
1091 	ehp = (struct ether_header *)mp->b_rptr;
1092 	is_bcast = IS_BROADCAST(ehp);
1093 	is_mcast = IS_MULTICAST(ehp);
1094 
1095 	mutex_enter(&ldcp->txlock);
1096 	/*
1097 	 * allocate a descriptor
1098 	 */
1099 	tbufp = ldcp->next_tbufp;
1100 	ntbufp = NEXTTBUF(ldcp, tbufp);
1101 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
1102 
1103 		mutex_enter(&ldcp->tclock);
1104 		/* Try reclaiming now */
1105 		vgen_reclaim_dring(ldcp);
1106 		ldcp->reclaim_lbolt = ddi_get_lbolt();
1107 
1108 		if (ntbufp == ldcp->cur_tbufp) {
1109 			/* Now we are really out of tbuf/txds */
1110 			ldcp->need_resched = B_TRUE;
1111 			mutex_exit(&ldcp->tclock);
1112 
1113 			statsp->tx_no_desc++;
1114 			mutex_exit(&ldcp->txlock);
1115 
1116 			return (VGEN_TX_NORESOURCES);
1117 		}
1118 		mutex_exit(&ldcp->tclock);
1119 	}
1120 	/* update next available tbuf in the ring and update tx index */
1121 	ldcp->next_tbufp = ntbufp;
1122 	INCR_TXI(ldcp->next_txi, ldcp);
1123 
1124 	/* Mark the buffer busy before releasing the lock */
1125 	tbufp->flags = VGEN_PRIV_DESC_BUSY;
1126 	mutex_exit(&ldcp->txlock);
1127 
1128 	/* copy data into pre-allocated transmit buffer */
1129 	dst = tbufp->datap + VNET_IPALIGN;
1130 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1131 		mblksz = MBLKL(bp);
1132 		bcopy(bp->b_rptr, dst, mblksz);
1133 		dst += mblksz;
1134 	}
1135 
1136 	tbufp->datalen = size;
1137 
1138 	/* initialize the corresponding public descriptor (txd) */
1139 	txdp = tbufp->descp;
1140 	hdrp = &txdp->hdr;
1141 	txdp->nbytes = size;
1142 	txdp->ncookies = tbufp->ncookies;
1143 	bcopy((tbufp->memcookie), (txdp->memcookie),
1144 	    tbufp->ncookies * sizeof (ldc_mem_cookie_t));
1145 
1146 	mutex_enter(&ldcp->wrlock);
1147 	/*
1148 	 * If the flags not set to BUSY, it implies that the clobber
1149 	 * was done while we were copying the data. In such case,
1150 	 * discard the packet and return.
1151 	 */
1152 	if (tbufp->flags != VGEN_PRIV_DESC_BUSY) {
1153 		statsp->oerrors++;
1154 		mutex_exit(&ldcp->wrlock);
1155 		goto send_dring_exit;
1156 	}
1157 	hdrp->dstate = VIO_DESC_READY;
1158 
1159 	/* update stats */
1160 	statsp->opackets++;
1161 	statsp->obytes += size;
1162 	if (is_bcast)
1163 		statsp->brdcstxmt++;
1164 	else if (is_mcast)
1165 		statsp->multixmt++;
1166 
1167 	/* send dring datamsg to the peer */
1168 	if (ldcp->resched_peer) {
1169 
1170 		rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi];
1171 		rtxdp = rtbufp->descp;
1172 
1173 		if (rtxdp->hdr.dstate == VIO_DESC_READY) {
1174 
1175 			rv = vgen_send_dring_data(ldcp,
1176 			    (uint32_t)ldcp->resched_peer_txi, -1);
1177 			if (rv != 0) {
1178 				/* error: drop the packet */
1179 				DWARN(vgenp, ldcp, "vgen_send_dring_data "
1180 				    "failed: rv(%d) len(%d)\n",
1181 				    ldcp->ldc_id, rv, size);
1182 				statsp->oerrors++;
1183 			} else {
1184 				ldcp->resched_peer = B_FALSE;
1185 			}
1186 
1187 		}
1188 
1189 	}
1190 
1191 	mutex_exit(&ldcp->wrlock);
1192 
1193 send_dring_exit:
1194 	if (rv == ECONNRESET) {
1195 		vgen_ldcsend_process_reset(ldcp);
1196 	}
1197 	freemsg(mp);
1198 	DBG1(vgenp, ldcp, "exit\n");
1199 	return (VGEN_TX_SUCCESS);
1200 }
1201 
1202 /* enable/disable a multicast address */
1203 int
1204 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
1205 {
1206 	vgen_t			*vgenp;
1207 	vnet_mcast_msg_t	mcastmsg;
1208 	vio_msg_tag_t		*tagp;
1209 	vgen_port_t		*portp;
1210 	vgen_portlist_t		*plistp;
1211 	vgen_ldc_t		*ldcp;
1212 	vgen_ldclist_t		*ldclp;
1213 	struct ether_addr	*addrp;
1214 	int			rv = DDI_FAILURE;
1215 	uint32_t		i;
1216 
1217 	portp = (vgen_port_t *)arg;
1218 	vgenp = portp->vgenp;
1219 
1220 	if (portp != vgenp->vsw_portp) {
1221 		return (DDI_SUCCESS);
1222 	}
1223 
1224 	addrp = (struct ether_addr *)mca;
1225 	tagp = &mcastmsg.tag;
1226 	bzero(&mcastmsg, sizeof (mcastmsg));
1227 
1228 	mutex_enter(&vgenp->lock);
1229 
1230 	plistp = &(vgenp->vgenports);
1231 
1232 	READ_ENTER(&plistp->rwlock);
1233 
1234 	portp = vgenp->vsw_portp;
1235 	if (portp == NULL) {
1236 		RW_EXIT(&plistp->rwlock);
1237 		mutex_exit(&vgenp->lock);
1238 		return (rv);
1239 	}
1240 	ldclp = &portp->ldclist;
1241 
1242 	READ_ENTER(&ldclp->rwlock);
1243 
1244 	ldcp = ldclp->headp;
1245 	if (ldcp == NULL)
1246 		goto vgen_mcast_exit;
1247 
1248 	mutex_enter(&ldcp->cblock);
1249 
1250 	if (ldcp->hphase == VH_DONE) {
1251 		/*
1252 		 * If handshake is done, send a msg to vsw to add/remove
1253 		 * the multicast address. Otherwise, we just update this
1254 		 * mcast address in our table and the table will be sync'd
1255 		 * with vsw when handshake completes.
1256 		 */
1257 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1258 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1259 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1260 		tagp->vio_sid = ldcp->local_sid;
1261 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1262 		mcastmsg.set = add;
1263 		mcastmsg.count = 1;
1264 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1265 		    B_FALSE) != VGEN_SUCCESS) {
1266 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1267 			mutex_exit(&ldcp->cblock);
1268 			goto vgen_mcast_exit;
1269 		}
1270 	}
1271 
1272 	mutex_exit(&ldcp->cblock);
1273 
1274 	if (add) {
1275 
1276 		/* expand multicast table if necessary */
1277 		if (vgenp->mccount >= vgenp->mcsize) {
1278 			struct ether_addr	*newtab;
1279 			uint32_t		newsize;
1280 
1281 
1282 			newsize = vgenp->mcsize * 2;
1283 
1284 			newtab = kmem_zalloc(newsize *
1285 			    sizeof (struct ether_addr), KM_NOSLEEP);
1286 			if (newtab == NULL)
1287 				goto vgen_mcast_exit;
1288 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1289 			    sizeof (struct ether_addr));
1290 			kmem_free(vgenp->mctab,
1291 			    vgenp->mcsize * sizeof (struct ether_addr));
1292 
1293 			vgenp->mctab = newtab;
1294 			vgenp->mcsize = newsize;
1295 		}
1296 
1297 		/* add address to the table */
1298 		vgenp->mctab[vgenp->mccount++] = *addrp;
1299 
1300 	} else {
1301 
1302 		/* delete address from the table */
1303 		for (i = 0; i < vgenp->mccount; i++) {
1304 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1305 
1306 				/*
1307 				 * If there's more than one address in this
1308 				 * table, delete the unwanted one by moving
1309 				 * the last one in the list over top of it;
1310 				 * otherwise, just remove it.
1311 				 */
1312 				if (vgenp->mccount > 1) {
1313 					vgenp->mctab[i] =
1314 					    vgenp->mctab[vgenp->mccount-1];
1315 				}
1316 				vgenp->mccount--;
1317 				break;
1318 			}
1319 		}
1320 	}
1321 
1322 	rv = DDI_SUCCESS;
1323 
1324 vgen_mcast_exit:
1325 	RW_EXIT(&ldclp->rwlock);
1326 	RW_EXIT(&plistp->rwlock);
1327 
1328 	mutex_exit(&vgenp->lock);
1329 	return (rv);
1330 }
1331 
1332 /* set or clear promiscuous mode on the device */
1333 static int
1334 vgen_promisc(void *arg, boolean_t on)
1335 {
1336 	_NOTE(ARGUNUSED(arg, on))
1337 	return (DDI_SUCCESS);
1338 }
1339 
1340 /* set the unicast mac address of the device */
1341 static int
1342 vgen_unicst(void *arg, const uint8_t *mca)
1343 {
1344 	_NOTE(ARGUNUSED(arg, mca))
1345 	return (DDI_SUCCESS);
1346 }
1347 
1348 /* get device statistics */
1349 int
1350 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1351 {
1352 	vgen_port_t	*portp = (vgen_port_t *)arg;
1353 
1354 	*val = vgen_port_stat(portp, stat);
1355 
1356 	return (0);
1357 }
1358 
1359 static void
1360 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
1361 {
1362 	 _NOTE(ARGUNUSED(arg, wq, mp))
1363 }
1364 
1365 /* vgen internal functions */
1366 /* detach all ports from the device */
1367 static void
1368 vgen_detach_ports(vgen_t *vgenp)
1369 {
1370 	vgen_port_t	*portp;
1371 	vgen_portlist_t	*plistp;
1372 
1373 	plistp = &(vgenp->vgenports);
1374 	WRITE_ENTER(&plistp->rwlock);
1375 	while ((portp = plistp->headp) != NULL) {
1376 		vgen_port_detach(portp);
1377 	}
1378 	RW_EXIT(&plistp->rwlock);
1379 }
1380 
1381 /*
1382  * detach the given port.
1383  */
1384 static void
1385 vgen_port_detach(vgen_port_t *portp)
1386 {
1387 	vgen_t		*vgenp;
1388 	vgen_ldclist_t	*ldclp;
1389 	int		port_num;
1390 
1391 	vgenp = portp->vgenp;
1392 	port_num = portp->port_num;
1393 
1394 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1395 
1396 	/*
1397 	 * If this port is connected to the vswitch, then
1398 	 * potentially there could be ports that may be using
1399 	 * this port to transmit packets. To address this do
1400 	 * the following:
1401 	 *	- First set vgenp->vsw_portp to NULL, so that
1402 	 *	  its not used after that.
1403 	 *	- Then wait for the refcnt to go down to 0.
1404 	 *	- Now we can safely detach this port.
1405 	 */
1406 	if (vgenp->vsw_portp == portp) {
1407 		vgenp->vsw_portp = NULL;
1408 		while (vgenp->vsw_port_refcnt > 0) {
1409 			delay(drv_usectohz(vgen_tx_delay));
1410 		}
1411 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1412 	}
1413 
1414 	if (portp->vhp != NULL) {
1415 		vio_net_resource_unreg(portp->vhp);
1416 		portp->vhp = NULL;
1417 	}
1418 
1419 	vgen_vlan_destroy_hash(portp);
1420 
1421 	/* remove it from port list */
1422 	vgen_port_list_remove(portp);
1423 
1424 	/* detach channels from this port */
1425 	ldclp = &portp->ldclist;
1426 	WRITE_ENTER(&ldclp->rwlock);
1427 	while (ldclp->headp) {
1428 		vgen_ldc_detach(ldclp->headp);
1429 	}
1430 	RW_EXIT(&ldclp->rwlock);
1431 	rw_destroy(&ldclp->rwlock);
1432 
1433 	if (portp->num_ldcs != 0) {
1434 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1435 		portp->num_ldcs = 0;
1436 	}
1437 
1438 	mutex_destroy(&portp->lock);
1439 	KMEM_FREE(portp);
1440 
1441 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1442 }
1443 
1444 /* add a port to port list */
1445 static void
1446 vgen_port_list_insert(vgen_port_t *portp)
1447 {
1448 	vgen_portlist_t *plistp;
1449 	vgen_t *vgenp;
1450 
1451 	vgenp = portp->vgenp;
1452 	plistp = &(vgenp->vgenports);
1453 
1454 	if (plistp->headp == NULL) {
1455 		plistp->headp = portp;
1456 	} else {
1457 		plistp->tailp->nextp = portp;
1458 	}
1459 	plistp->tailp = portp;
1460 	portp->nextp = NULL;
1461 }
1462 
1463 /* remove a port from port list */
1464 static void
1465 vgen_port_list_remove(vgen_port_t *portp)
1466 {
1467 	vgen_port_t *prevp;
1468 	vgen_port_t *nextp;
1469 	vgen_portlist_t *plistp;
1470 	vgen_t *vgenp;
1471 
1472 	vgenp = portp->vgenp;
1473 
1474 	plistp = &(vgenp->vgenports);
1475 
1476 	if (plistp->headp == NULL)
1477 		return;
1478 
1479 	if (portp == plistp->headp) {
1480 		plistp->headp = portp->nextp;
1481 		if (portp == plistp->tailp)
1482 			plistp->tailp = plistp->headp;
1483 	} else {
1484 		for (prevp = plistp->headp;
1485 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1486 		    prevp = nextp)
1487 			;
1488 		if (nextp == portp) {
1489 			prevp->nextp = portp->nextp;
1490 		}
1491 		if (portp == plistp->tailp)
1492 			plistp->tailp = prevp;
1493 	}
1494 }
1495 
1496 /* lookup a port in the list based on port_num */
1497 static vgen_port_t *
1498 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1499 {
1500 	vgen_port_t *portp = NULL;
1501 
1502 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1503 		if (portp->port_num == port_num) {
1504 			break;
1505 		}
1506 	}
1507 
1508 	return (portp);
1509 }
1510 
1511 /* enable ports for transmit/receive */
1512 static void
1513 vgen_init_ports(vgen_t *vgenp)
1514 {
1515 	vgen_port_t	*portp;
1516 	vgen_portlist_t	*plistp;
1517 
1518 	plistp = &(vgenp->vgenports);
1519 	READ_ENTER(&plistp->rwlock);
1520 
1521 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1522 		vgen_port_init(portp);
1523 	}
1524 
1525 	RW_EXIT(&plistp->rwlock);
1526 }
1527 
1528 static void
1529 vgen_port_init(vgen_port_t *portp)
1530 {
1531 	/* Add the port to the specified vlans */
1532 	vgen_vlan_add_ids(portp);
1533 
1534 	/* Bring up the channels of this port */
1535 	vgen_init_ldcs(portp);
1536 }
1537 
1538 /* disable transmit/receive on ports */
1539 static void
1540 vgen_uninit_ports(vgen_t *vgenp)
1541 {
1542 	vgen_port_t	*portp;
1543 	vgen_portlist_t	*plistp;
1544 
1545 	plistp = &(vgenp->vgenports);
1546 	READ_ENTER(&plistp->rwlock);
1547 
1548 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1549 		vgen_port_uninit(portp);
1550 	}
1551 
1552 	RW_EXIT(&plistp->rwlock);
1553 }
1554 
1555 static void
1556 vgen_port_uninit(vgen_port_t *portp)
1557 {
1558 	vgen_uninit_ldcs(portp);
1559 
1560 	/* remove the port from vlans it has been assigned to */
1561 	vgen_vlan_remove_ids(portp);
1562 }
1563 
1564 /*
1565  * Scan the machine description for this instance of vnet
1566  * and read its properties. Called only from vgen_init().
1567  * Returns: 0 on success, 1 on failure.
1568  */
1569 static int
1570 vgen_read_mdprops(vgen_t *vgenp)
1571 {
1572 	vnet_t		*vnetp = vgenp->vnetp;
1573 	md_t		*mdp = NULL;
1574 	mde_cookie_t	rootnode;
1575 	mde_cookie_t	*listp = NULL;
1576 	uint64_t	cfgh;
1577 	char		*name;
1578 	int		rv = 1;
1579 	int		num_nodes = 0;
1580 	int		num_devs = 0;
1581 	int		listsz = 0;
1582 	int		i;
1583 
1584 	if ((mdp = md_get_handle()) == NULL) {
1585 		return (rv);
1586 	}
1587 
1588 	num_nodes = md_node_count(mdp);
1589 	ASSERT(num_nodes > 0);
1590 
1591 	listsz = num_nodes * sizeof (mde_cookie_t);
1592 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1593 
1594 	rootnode = md_root_node(mdp);
1595 
1596 	/* search for all "virtual_device" nodes */
1597 	num_devs = md_scan_dag(mdp, rootnode,
1598 	    md_find_name(mdp, vdev_propname),
1599 	    md_find_name(mdp, "fwd"), listp);
1600 	if (num_devs <= 0) {
1601 		goto vgen_readmd_exit;
1602 	}
1603 
1604 	/*
1605 	 * Now loop through the list of virtual-devices looking for
1606 	 * devices with name "network" and for each such device compare
1607 	 * its instance with what we have from the 'reg' property to
1608 	 * find the right node in MD and then read all its properties.
1609 	 */
1610 	for (i = 0; i < num_devs; i++) {
1611 
1612 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1613 			goto vgen_readmd_exit;
1614 		}
1615 
1616 		/* is this a "network" device? */
1617 		if (strcmp(name, vnet_propname) != 0)
1618 			continue;
1619 
1620 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1621 			goto vgen_readmd_exit;
1622 		}
1623 
1624 		/* is this the required instance of vnet? */
1625 		if (vgenp->regprop != cfgh)
1626 			continue;
1627 
1628 		/*
1629 		 * Read the mtu. Note that we set the mtu of vnet device within
1630 		 * this routine itself, after validating the range.
1631 		 */
1632 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1633 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1634 			vnetp->mtu = ETHERMTU;
1635 		}
1636 		vgenp->max_frame_size = vnetp->mtu +
1637 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1638 
1639 		/* read priority ether types */
1640 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1641 
1642 		/* read vlan id properties of this vnet instance */
1643 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1644 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1645 		    &vnetp->default_vlan_id);
1646 
1647 		rv = 0;
1648 		break;
1649 	}
1650 
1651 vgen_readmd_exit:
1652 
1653 	kmem_free(listp, listsz);
1654 	(void) md_fini_handle(mdp);
1655 	return (rv);
1656 }
1657 
1658 /*
1659  * Read vlan id properties of the given MD node.
1660  * Arguments:
1661  *   arg:          device argument(vnet device or a port)
1662  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1663  *   mdp:          machine description
1664  *   node:         md node cookie
1665  *
1666  * Returns:
1667  *   pvidp:        port-vlan-id of the node
1668  *   vidspp:       list of vlan-ids of the node
1669  *   nvidsp:       # of vlan-ids in the list
1670  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1671  */
1672 static void
1673 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1674 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1675 	uint16_t *default_idp)
1676 {
1677 	vgen_t		*vgenp;
1678 	vnet_t		*vnetp;
1679 	vgen_port_t	*portp;
1680 	char		*pvid_propname;
1681 	char		*vid_propname;
1682 	uint_t		nvids;
1683 	uint32_t	vids_size;
1684 	int		rv;
1685 	int		i;
1686 	uint64_t	*data;
1687 	uint64_t	val;
1688 	int		size;
1689 	int		inst;
1690 
1691 	if (type == VGEN_LOCAL) {
1692 
1693 		vgenp = (vgen_t *)arg;
1694 		vnetp = vgenp->vnetp;
1695 		pvid_propname = vgen_pvid_propname;
1696 		vid_propname = vgen_vid_propname;
1697 		inst = vnetp->instance;
1698 
1699 	} else if (type == VGEN_PEER) {
1700 
1701 		portp = (vgen_port_t *)arg;
1702 		vgenp = portp->vgenp;
1703 		vnetp = vgenp->vnetp;
1704 		pvid_propname = port_pvid_propname;
1705 		vid_propname = port_vid_propname;
1706 		inst = portp->port_num;
1707 
1708 	} else {
1709 		return;
1710 	}
1711 
1712 	if (type == VGEN_LOCAL && default_idp != NULL) {
1713 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1714 		if (rv != 0) {
1715 			DWARN(vgenp, NULL, "prop(%s) not found",
1716 			    vgen_dvid_propname);
1717 
1718 			*default_idp = vnet_default_vlan_id;
1719 		} else {
1720 			*default_idp = val & 0xFFF;
1721 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1722 			    inst, *default_idp);
1723 		}
1724 	}
1725 
1726 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1727 	if (rv != 0) {
1728 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1729 		*pvidp = vnet_default_vlan_id;
1730 	} else {
1731 
1732 		*pvidp = val & 0xFFF;
1733 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1734 		    pvid_propname, inst, *pvidp);
1735 	}
1736 
1737 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1738 	    &size);
1739 	if (rv != 0) {
1740 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1741 		size = 0;
1742 	} else {
1743 		size /= sizeof (uint64_t);
1744 	}
1745 	nvids = size;
1746 
1747 	if (nvids != 0) {
1748 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1749 		vids_size = sizeof (uint16_t) * nvids;
1750 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1751 		for (i = 0; i < nvids; i++) {
1752 			(*vidspp)[i] = data[i] & 0xFFFF;
1753 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1754 		}
1755 		DBG2(vgenp, NULL, "\n");
1756 	}
1757 
1758 	*nvidsp = nvids;
1759 }
1760 
1761 /*
1762  * Create a vlan id hash table for the given port.
1763  */
1764 static void
1765 vgen_vlan_create_hash(vgen_port_t *portp)
1766 {
1767 	char		hashname[MAXNAMELEN];
1768 
1769 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1770 	    portp->port_num);
1771 
1772 	portp->vlan_nchains = vgen_vlan_nchains;
1773 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1774 	    portp->vlan_nchains, mod_hash_null_valdtor);
1775 }
1776 
1777 /*
1778  * Destroy the vlan id hash table in the given port.
1779  */
1780 static void
1781 vgen_vlan_destroy_hash(vgen_port_t *portp)
1782 {
1783 	if (portp->vlan_hashp != NULL) {
1784 		mod_hash_destroy_hash(portp->vlan_hashp);
1785 		portp->vlan_hashp = NULL;
1786 		portp->vlan_nchains = 0;
1787 	}
1788 }
1789 
1790 /*
1791  * Add a port to the vlans specified in its port properites.
1792  */
1793 static void
1794 vgen_vlan_add_ids(vgen_port_t *portp)
1795 {
1796 	int		rv;
1797 	int		i;
1798 
1799 	rv = mod_hash_insert(portp->vlan_hashp,
1800 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1801 	    (mod_hash_val_t)B_TRUE);
1802 	ASSERT(rv == 0);
1803 
1804 	for (i = 0; i < portp->nvids; i++) {
1805 		rv = mod_hash_insert(portp->vlan_hashp,
1806 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1807 		    (mod_hash_val_t)B_TRUE);
1808 		ASSERT(rv == 0);
1809 	}
1810 }
1811 
1812 /*
1813  * Remove a port from the vlans it has been assigned to.
1814  */
1815 static void
1816 vgen_vlan_remove_ids(vgen_port_t *portp)
1817 {
1818 	int		rv;
1819 	int		i;
1820 	mod_hash_val_t	vp;
1821 
1822 	rv = mod_hash_remove(portp->vlan_hashp,
1823 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1824 	    (mod_hash_val_t *)&vp);
1825 	ASSERT(rv == 0);
1826 
1827 	for (i = 0; i < portp->nvids; i++) {
1828 		rv = mod_hash_remove(portp->vlan_hashp,
1829 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1830 		    (mod_hash_val_t *)&vp);
1831 		ASSERT(rv == 0);
1832 	}
1833 }
1834 
1835 /*
1836  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1837  * then the vlan-id is available in the tag; otherwise, its vlan id is
1838  * implicitly obtained from the port-vlan-id of the vnet device.
1839  * The vlan id determined is returned in vidp.
1840  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1841  */
1842 static boolean_t
1843 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1844 {
1845 	struct ether_vlan_header	*evhp;
1846 
1847 	/* If it's a tagged frame, get the vlan id from vlan header */
1848 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1849 
1850 		evhp = (struct ether_vlan_header *)ehp;
1851 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1852 		return (B_TRUE);
1853 	}
1854 
1855 	/* Untagged frame, vlan-id is the pvid of vnet device */
1856 	*vidp = vnetp->pvid;
1857 	return (B_FALSE);
1858 }
1859 
1860 /*
1861  * Find the given vlan id in the hash table.
1862  * Return: B_TRUE if the id is found; B_FALSE if not found.
1863  */
1864 static boolean_t
1865 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1866 {
1867 	int		rv;
1868 	mod_hash_val_t	vp;
1869 
1870 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1871 
1872 	if (rv != 0)
1873 		return (B_FALSE);
1874 
1875 	return (B_TRUE);
1876 }
1877 
1878 /*
1879  * This function reads "priority-ether-types" property from md. This property
1880  * is used to enable support for priority frames. Applications which need
1881  * guaranteed and timely delivery of certain high priority frames to/from
1882  * a vnet or vsw within ldoms, should configure this property by providing
1883  * the ether type(s) for which the priority facility is needed.
1884  * Normal data frames are delivered over a ldc channel using the descriptor
1885  * ring mechanism which is constrained by factors such as descriptor ring size,
1886  * the rate at which the ring is processed at the peer ldc end point, etc.
1887  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1888  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1889  * descriptor ring path and enables a more reliable and timely delivery of
1890  * frames to the peer.
1891  */
1892 static void
1893 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1894 {
1895 	int		rv;
1896 	uint16_t	*types;
1897 	uint64_t	*data;
1898 	int		size;
1899 	int		i;
1900 	size_t		mblk_sz;
1901 
1902 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1903 	    (uint8_t **)&data, &size);
1904 	if (rv != 0) {
1905 		/*
1906 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1907 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1908 		 */
1909 		if (vgen_pri_eth_type != 0) {
1910 			size = sizeof (vgen_pri_eth_type);
1911 			data = &vgen_pri_eth_type;
1912 		} else {
1913 			DBG2(vgenp, NULL,
1914 			    "prop(%s) not found", pri_types_propname);
1915 			size = 0;
1916 		}
1917 	}
1918 
1919 	if (size == 0) {
1920 		vgenp->pri_num_types = 0;
1921 		return;
1922 	}
1923 
1924 	/*
1925 	 * we have some priority-ether-types defined;
1926 	 * allocate a table of these types and also
1927 	 * allocate a pool of mblks to transmit these
1928 	 * priority packets.
1929 	 */
1930 	size /= sizeof (uint64_t);
1931 	vgenp->pri_num_types = size;
1932 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1933 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1934 		types[i] = data[i] & 0xFFFF;
1935 	}
1936 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1937 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz,
1938 	    &vgenp->pri_tx_vmp);
1939 }
1940 
1941 static void
1942 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1943 {
1944 	int		rv;
1945 	uint64_t	val;
1946 	char		*mtu_propname;
1947 
1948 	mtu_propname = vgen_mtu_propname;
1949 
1950 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1951 	if (rv != 0) {
1952 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1953 		*mtu = vnet_ethermtu;
1954 	} else {
1955 
1956 		*mtu = val & 0xFFFF;
1957 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1958 		    vgenp->instance, *mtu);
1959 	}
1960 }
1961 
1962 /* register with MD event generator */
1963 static int
1964 vgen_mdeg_reg(vgen_t *vgenp)
1965 {
1966 	mdeg_prop_spec_t	*pspecp;
1967 	mdeg_node_spec_t	*parentp;
1968 	uint_t			templatesz;
1969 	int			rv;
1970 	mdeg_handle_t		dev_hdl = NULL;
1971 	mdeg_handle_t		port_hdl = NULL;
1972 
1973 	templatesz = sizeof (vgen_prop_template);
1974 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1975 	if (pspecp == NULL) {
1976 		return (DDI_FAILURE);
1977 	}
1978 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1979 	if (parentp == NULL) {
1980 		kmem_free(pspecp, templatesz);
1981 		return (DDI_FAILURE);
1982 	}
1983 
1984 	bcopy(vgen_prop_template, pspecp, templatesz);
1985 
1986 	/*
1987 	 * NOTE: The instance here refers to the value of "reg" property and
1988 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1989 	 */
1990 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1991 
1992 	parentp->namep = "virtual-device";
1993 	parentp->specp = pspecp;
1994 
1995 	/* save parentp in vgen_t */
1996 	vgenp->mdeg_parentp = parentp;
1997 
1998 	/*
1999 	 * Register an interest in 'virtual-device' nodes with a
2000 	 * 'name' property of 'network'
2001 	 */
2002 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
2003 	if (rv != MDEG_SUCCESS) {
2004 		DERR(vgenp, NULL, "mdeg_register failed\n");
2005 		goto mdeg_reg_fail;
2006 	}
2007 
2008 	/* Register an interest in 'port' nodes */
2009 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
2010 	    &port_hdl);
2011 	if (rv != MDEG_SUCCESS) {
2012 		DERR(vgenp, NULL, "mdeg_register failed\n");
2013 		goto mdeg_reg_fail;
2014 	}
2015 
2016 	/* save mdeg handle in vgen_t */
2017 	vgenp->mdeg_dev_hdl = dev_hdl;
2018 	vgenp->mdeg_port_hdl = port_hdl;
2019 
2020 	return (DDI_SUCCESS);
2021 
2022 mdeg_reg_fail:
2023 	if (dev_hdl != NULL) {
2024 		(void) mdeg_unregister(dev_hdl);
2025 	}
2026 	KMEM_FREE(parentp);
2027 	kmem_free(pspecp, templatesz);
2028 	vgenp->mdeg_parentp = NULL;
2029 	return (DDI_FAILURE);
2030 }
2031 
2032 /* unregister with MD event generator */
2033 static void
2034 vgen_mdeg_unreg(vgen_t *vgenp)
2035 {
2036 	(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
2037 	(void) mdeg_unregister(vgenp->mdeg_port_hdl);
2038 	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
2039 	KMEM_FREE(vgenp->mdeg_parentp);
2040 	vgenp->mdeg_parentp = NULL;
2041 	vgenp->mdeg_dev_hdl = NULL;
2042 	vgenp->mdeg_port_hdl = NULL;
2043 }
2044 
2045 /* mdeg callback function for the port node */
2046 static int
2047 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
2048 {
2049 	int idx;
2050 	int vsw_idx = -1;
2051 	uint64_t val;
2052 	vgen_t *vgenp;
2053 
2054 	if ((resp == NULL) || (cb_argp == NULL)) {
2055 		return (MDEG_FAILURE);
2056 	}
2057 
2058 	vgenp = (vgen_t *)cb_argp;
2059 	DBG1(vgenp, NULL, "enter\n");
2060 
2061 	mutex_enter(&vgenp->lock);
2062 
2063 	DBG1(vgenp, NULL, "ports: removed(%x), "
2064 	"added(%x), updated(%x)\n", resp->removed.nelem,
2065 	    resp->added.nelem, resp->match_curr.nelem);
2066 
2067 	for (idx = 0; idx < resp->removed.nelem; idx++) {
2068 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
2069 		    resp->removed.mdep[idx]);
2070 	}
2071 
2072 	if (vgenp->vsw_portp == NULL) {
2073 		/*
2074 		 * find vsw_port and add it first, because other ports need
2075 		 * this when adding fdb entry (see vgen_port_init()).
2076 		 */
2077 		for (idx = 0; idx < resp->added.nelem; idx++) {
2078 			if (!(md_get_prop_val(resp->added.mdp,
2079 			    resp->added.mdep[idx], swport_propname, &val))) {
2080 				if (val == 0) {
2081 					/*
2082 					 * This port is connected to the
2083 					 * vsw on service domain.
2084 					 */
2085 					vsw_idx = idx;
2086 					if (vgen_add_port(vgenp,
2087 					    resp->added.mdp,
2088 					    resp->added.mdep[idx]) !=
2089 					    DDI_SUCCESS) {
2090 						cmn_err(CE_NOTE, "vnet%d Could "
2091 						    "not initialize virtual "
2092 						    "switch port.",
2093 						    vgenp->instance);
2094 						mutex_exit(&vgenp->lock);
2095 						return (MDEG_FAILURE);
2096 					}
2097 					break;
2098 				}
2099 			}
2100 		}
2101 		if (vsw_idx == -1) {
2102 			DWARN(vgenp, NULL, "can't find vsw_port\n");
2103 			mutex_exit(&vgenp->lock);
2104 			return (MDEG_FAILURE);
2105 		}
2106 	}
2107 
2108 	for (idx = 0; idx < resp->added.nelem; idx++) {
2109 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
2110 			continue;
2111 
2112 		/* If this port can't be added just skip it. */
2113 		(void) vgen_add_port(vgenp, resp->added.mdp,
2114 		    resp->added.mdep[idx]);
2115 	}
2116 
2117 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
2118 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
2119 		    resp->match_curr.mdep[idx],
2120 		    resp->match_prev.mdp,
2121 		    resp->match_prev.mdep[idx]);
2122 	}
2123 
2124 	mutex_exit(&vgenp->lock);
2125 	DBG1(vgenp, NULL, "exit\n");
2126 	return (MDEG_SUCCESS);
2127 }
2128 
2129 /* mdeg callback function for the vnet node */
2130 static int
2131 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
2132 {
2133 	vgen_t		*vgenp;
2134 	vnet_t		*vnetp;
2135 	md_t		*mdp;
2136 	mde_cookie_t	node;
2137 	uint64_t	inst;
2138 	char		*node_name = NULL;
2139 
2140 	if ((resp == NULL) || (cb_argp == NULL)) {
2141 		return (MDEG_FAILURE);
2142 	}
2143 
2144 	vgenp = (vgen_t *)cb_argp;
2145 	vnetp = vgenp->vnetp;
2146 
2147 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
2148 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
2149 	    resp->match_curr.nelem, resp->match_prev.nelem);
2150 
2151 	mutex_enter(&vgenp->lock);
2152 
2153 	/*
2154 	 * We get an initial callback for this node as 'added' after
2155 	 * registering with mdeg. Note that we would have already gathered
2156 	 * information about this vnet node by walking MD earlier during attach
2157 	 * (in vgen_read_mdprops()). So, there is a window where the properties
2158 	 * of this node might have changed when we get this initial 'added'
2159 	 * callback. We handle this as if an update occured and invoke the same
2160 	 * function which handles updates to the properties of this vnet-node
2161 	 * if any. A non-zero 'match' value indicates that the MD has been
2162 	 * updated and that a 'network' node is present which may or may not
2163 	 * have been updated. It is up to the clients to examine their own
2164 	 * nodes and determine if they have changed.
2165 	 */
2166 	if (resp->added.nelem != 0) {
2167 
2168 		if (resp->added.nelem != 1) {
2169 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
2170 			    "invalid: %d\n", vnetp->instance,
2171 			    resp->added.nelem);
2172 			goto vgen_mdeg_cb_err;
2173 		}
2174 
2175 		mdp = resp->added.mdp;
2176 		node = resp->added.mdep[0];
2177 
2178 	} else if (resp->match_curr.nelem != 0) {
2179 
2180 		if (resp->match_curr.nelem != 1) {
2181 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
2182 			    "invalid: %d\n", vnetp->instance,
2183 			    resp->match_curr.nelem);
2184 			goto vgen_mdeg_cb_err;
2185 		}
2186 
2187 		mdp = resp->match_curr.mdp;
2188 		node = resp->match_curr.mdep[0];
2189 
2190 	} else {
2191 		goto vgen_mdeg_cb_err;
2192 	}
2193 
2194 	/* Validate name and instance */
2195 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
2196 		DERR(vgenp, NULL, "unable to get node name\n");
2197 		goto vgen_mdeg_cb_err;
2198 	}
2199 
2200 	/* is this a virtual-network device? */
2201 	if (strcmp(node_name, vnet_propname) != 0) {
2202 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
2203 		goto vgen_mdeg_cb_err;
2204 	}
2205 
2206 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
2207 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
2208 		goto vgen_mdeg_cb_err;
2209 	}
2210 
2211 	/* is this the right instance of vnet? */
2212 	if (inst != vgenp->regprop) {
2213 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
2214 		goto vgen_mdeg_cb_err;
2215 	}
2216 
2217 	vgen_update_md_prop(vgenp, mdp, node);
2218 
2219 	mutex_exit(&vgenp->lock);
2220 	return (MDEG_SUCCESS);
2221 
2222 vgen_mdeg_cb_err:
2223 	mutex_exit(&vgenp->lock);
2224 	return (MDEG_FAILURE);
2225 }
2226 
2227 /*
2228  * Check to see if the relevant properties in the specified node have
2229  * changed, and if so take the appropriate action.
2230  */
2231 static void
2232 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2233 {
2234 	uint16_t	pvid;
2235 	uint16_t	*vids;
2236 	uint16_t	nvids;
2237 	vnet_t		*vnetp = vgenp->vnetp;
2238 	uint32_t	mtu;
2239 	enum		{ MD_init = 0x1,
2240 			    MD_vlans = 0x2,
2241 			    MD_mtu = 0x4 } updated;
2242 	int		rv;
2243 
2244 	updated = MD_init;
2245 
2246 	/* Read the vlan ids */
2247 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2248 	    &nvids, NULL);
2249 
2250 	/* Determine if there are any vlan id updates */
2251 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2252 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2253 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2254 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2255 		updated |= MD_vlans;
2256 	}
2257 
2258 	/* Read mtu */
2259 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2260 	if (mtu != vnetp->mtu) {
2261 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2262 			updated |= MD_mtu;
2263 		} else {
2264 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2265 			    " as the specified value:%d is invalid\n",
2266 			    vnetp->instance, mtu);
2267 		}
2268 	}
2269 
2270 	/* Now process the updated props */
2271 
2272 	if (updated & MD_vlans) {
2273 
2274 		/* save the new vlan ids */
2275 		vnetp->pvid = pvid;
2276 		if (vnetp->nvids != 0) {
2277 			kmem_free(vnetp->vids,
2278 			    sizeof (uint16_t) * vnetp->nvids);
2279 			vnetp->nvids = 0;
2280 		}
2281 		if (nvids != 0) {
2282 			vnetp->nvids = nvids;
2283 			vnetp->vids = vids;
2284 		}
2285 
2286 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2287 		vgen_reset_vlan_unaware_ports(vgenp);
2288 
2289 	} else {
2290 
2291 		if (nvids != 0) {
2292 			kmem_free(vids, sizeof (uint16_t) * nvids);
2293 		}
2294 	}
2295 
2296 	if (updated & MD_mtu) {
2297 
2298 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2299 		    vnetp->mtu, mtu);
2300 
2301 		rv = vnet_mtu_update(vnetp, mtu);
2302 		if (rv == 0) {
2303 			vgenp->max_frame_size = mtu +
2304 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2305 		}
2306 	}
2307 }
2308 
2309 /* add a new port to the device */
2310 static int
2311 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2312 {
2313 	vgen_port_t	*portp;
2314 	int		rv;
2315 
2316 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2317 
2318 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2319 	if (rv != DDI_SUCCESS) {
2320 		KMEM_FREE(portp);
2321 		return (DDI_FAILURE);
2322 	}
2323 
2324 	rv = vgen_port_attach(portp);
2325 	if (rv != DDI_SUCCESS) {
2326 		return (DDI_FAILURE);
2327 	}
2328 
2329 	return (DDI_SUCCESS);
2330 }
2331 
2332 /* read properties of the port from its md node */
2333 static int
2334 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2335 	mde_cookie_t mdex)
2336 {
2337 	uint64_t		port_num;
2338 	uint64_t		*ldc_ids;
2339 	uint64_t		macaddr;
2340 	uint64_t		val;
2341 	int			num_ldcs;
2342 	int			i;
2343 	int			addrsz;
2344 	int			num_nodes = 0;
2345 	int			listsz = 0;
2346 	mde_cookie_t		*listp = NULL;
2347 	uint8_t			*addrp;
2348 	struct ether_addr	ea;
2349 
2350 	/* read "id" property to get the port number */
2351 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2352 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2353 		return (DDI_FAILURE);
2354 	}
2355 
2356 	/*
2357 	 * Find the channel endpoint node(s) under this port node.
2358 	 */
2359 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2360 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2361 		    num_nodes);
2362 		return (DDI_FAILURE);
2363 	}
2364 
2365 	/* allocate space for node list */
2366 	listsz = num_nodes * sizeof (mde_cookie_t);
2367 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2368 	if (listp == NULL)
2369 		return (DDI_FAILURE);
2370 
2371 	num_ldcs = md_scan_dag(mdp, mdex,
2372 	    md_find_name(mdp, channel_propname),
2373 	    md_find_name(mdp, "fwd"), listp);
2374 
2375 	if (num_ldcs <= 0) {
2376 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2377 		kmem_free(listp, listsz);
2378 		return (DDI_FAILURE);
2379 	}
2380 
2381 	DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs);
2382 
2383 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2384 	if (ldc_ids == NULL) {
2385 		kmem_free(listp, listsz);
2386 		return (DDI_FAILURE);
2387 	}
2388 
2389 	for (i = 0; i < num_ldcs; i++) {
2390 		/* read channel ids */
2391 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2392 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2393 			    id_propname);
2394 			kmem_free(listp, listsz);
2395 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2396 			return (DDI_FAILURE);
2397 		}
2398 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2399 	}
2400 
2401 	kmem_free(listp, listsz);
2402 
2403 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2404 	    &addrsz)) {
2405 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2406 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2407 		return (DDI_FAILURE);
2408 	}
2409 
2410 	if (addrsz < ETHERADDRL) {
2411 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2412 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2413 		return (DDI_FAILURE);
2414 	}
2415 
2416 	macaddr = *((uint64_t *)addrp);
2417 
2418 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2419 
2420 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2421 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2422 		macaddr >>= 8;
2423 	}
2424 
2425 	if (vgenp->vsw_portp == NULL) {
2426 		if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2427 			if (val == 0) {
2428 				(void) atomic_swap_32(
2429 				    &vgenp->vsw_port_refcnt, 0);
2430 				/* This port is connected to the vsw */
2431 				vgenp->vsw_portp = portp;
2432 			}
2433 		}
2434 	}
2435 
2436 	/* now update all properties into the port */
2437 	portp->vgenp = vgenp;
2438 	portp->port_num = port_num;
2439 	ether_copy(&ea, &portp->macaddr);
2440 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2441 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2442 	portp->num_ldcs = num_ldcs;
2443 
2444 	/* read vlan id properties of this port node */
2445 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2446 	    &portp->vids, &portp->nvids, NULL);
2447 
2448 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2449 
2450 	return (DDI_SUCCESS);
2451 }
2452 
2453 /* remove a port from the device */
2454 static int
2455 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2456 {
2457 	uint64_t	port_num;
2458 	vgen_port_t	*portp;
2459 	vgen_portlist_t	*plistp;
2460 
2461 	/* read "id" property to get the port number */
2462 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2463 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2464 		return (DDI_FAILURE);
2465 	}
2466 
2467 	plistp = &(vgenp->vgenports);
2468 
2469 	WRITE_ENTER(&plistp->rwlock);
2470 	portp = vgen_port_lookup(plistp, (int)port_num);
2471 	if (portp == NULL) {
2472 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2473 		RW_EXIT(&plistp->rwlock);
2474 		return (DDI_FAILURE);
2475 	}
2476 
2477 	vgen_port_detach_mdeg(portp);
2478 	RW_EXIT(&plistp->rwlock);
2479 
2480 	return (DDI_SUCCESS);
2481 }
2482 
2483 /* attach a port to the device based on mdeg data */
2484 static int
2485 vgen_port_attach(vgen_port_t *portp)
2486 {
2487 	int			i;
2488 	vgen_portlist_t		*plistp;
2489 	vgen_t			*vgenp;
2490 	uint64_t		*ldcids;
2491 	uint32_t		num_ldcs;
2492 	mac_register_t		*macp;
2493 	vio_net_res_type_t	type;
2494 	int			rv;
2495 
2496 	ASSERT(portp != NULL);
2497 
2498 	vgenp = portp->vgenp;
2499 	ldcids = portp->ldc_ids;
2500 	num_ldcs = portp->num_ldcs;
2501 
2502 	DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num);
2503 
2504 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2505 	rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
2506 	portp->ldclist.headp = NULL;
2507 
2508 	for (i = 0; i < num_ldcs; i++) {
2509 		DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]);
2510 		if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) {
2511 			vgen_port_detach(portp);
2512 			return (DDI_FAILURE);
2513 		}
2514 	}
2515 
2516 	/* create vlan id hash table */
2517 	vgen_vlan_create_hash(portp);
2518 
2519 	if (portp == vgenp->vsw_portp) {
2520 		/* This port is connected to the switch port */
2521 		vgenp->vsw_portp = portp;
2522 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2523 		type = VIO_NET_RES_LDC_SERVICE;
2524 	} else {
2525 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2526 		type = VIO_NET_RES_LDC_GUEST;
2527 	}
2528 
2529 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2530 		vgen_port_detach(portp);
2531 		return (DDI_FAILURE);
2532 	}
2533 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2534 	macp->m_driver = portp;
2535 	macp->m_dip = vgenp->vnetdip;
2536 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2537 	macp->m_callbacks = &vgen_m_callbacks;
2538 	macp->m_min_sdu = 0;
2539 	macp->m_max_sdu = ETHERMTU;
2540 
2541 	mutex_enter(&portp->lock);
2542 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2543 	    portp->macaddr, &portp->vhp, &portp->vcb);
2544 	mutex_exit(&portp->lock);
2545 	mac_free(macp);
2546 
2547 	if (rv == 0) {
2548 		/* link it into the list of ports */
2549 		plistp = &(vgenp->vgenports);
2550 		WRITE_ENTER(&plistp->rwlock);
2551 		vgen_port_list_insert(portp);
2552 		RW_EXIT(&plistp->rwlock);
2553 	} else {
2554 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2555 		    portp);
2556 		vgen_port_detach(portp);
2557 	}
2558 
2559 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2560 	return (DDI_SUCCESS);
2561 }
2562 
2563 /* detach a port from the device based on mdeg data */
2564 static void
2565 vgen_port_detach_mdeg(vgen_port_t *portp)
2566 {
2567 	vgen_t *vgenp = portp->vgenp;
2568 
2569 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2570 
2571 	mutex_enter(&portp->lock);
2572 
2573 	/* stop the port if needed */
2574 	if (portp->flags & VGEN_STARTED) {
2575 		vgen_port_uninit(portp);
2576 	}
2577 
2578 	mutex_exit(&portp->lock);
2579 	vgen_port_detach(portp);
2580 
2581 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2582 }
2583 
2584 static int
2585 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2586 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2587 {
2588 	uint64_t	cport_num;
2589 	uint64_t	pport_num;
2590 	vgen_portlist_t	*plistp;
2591 	vgen_port_t	*portp;
2592 	boolean_t	updated_vlans = B_FALSE;
2593 	uint16_t	pvid;
2594 	uint16_t	*vids;
2595 	uint16_t	nvids;
2596 
2597 	/*
2598 	 * For now, we get port updates only if vlan ids changed.
2599 	 * We read the port num and do some sanity check.
2600 	 */
2601 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2602 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2603 		return (DDI_FAILURE);
2604 	}
2605 
2606 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2607 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2608 		return (DDI_FAILURE);
2609 	}
2610 	if (cport_num != pport_num)
2611 		return (DDI_FAILURE);
2612 
2613 	plistp = &(vgenp->vgenports);
2614 
2615 	READ_ENTER(&plistp->rwlock);
2616 
2617 	portp = vgen_port_lookup(plistp, (int)cport_num);
2618 	if (portp == NULL) {
2619 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2620 		RW_EXIT(&plistp->rwlock);
2621 		return (DDI_FAILURE);
2622 	}
2623 
2624 	/* Read the vlan ids */
2625 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2626 	    &nvids, NULL);
2627 
2628 	/* Determine if there are any vlan id updates */
2629 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2630 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2631 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2632 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2633 		updated_vlans = B_TRUE;
2634 	}
2635 
2636 	if (updated_vlans == B_FALSE) {
2637 		RW_EXIT(&plistp->rwlock);
2638 		return (DDI_FAILURE);
2639 	}
2640 
2641 	/* remove the port from vlans it has been assigned to */
2642 	vgen_vlan_remove_ids(portp);
2643 
2644 	/* save the new vlan ids */
2645 	portp->pvid = pvid;
2646 	if (portp->nvids != 0) {
2647 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2648 		portp->nvids = 0;
2649 	}
2650 	if (nvids != 0) {
2651 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2652 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2653 		portp->nvids = nvids;
2654 		kmem_free(vids, sizeof (uint16_t) * nvids);
2655 	}
2656 
2657 	/* add port to the new vlans */
2658 	vgen_vlan_add_ids(portp);
2659 
2660 	/* reset the port if it is vlan unaware (ver < 1.3) */
2661 	vgen_vlan_unaware_port_reset(portp);
2662 
2663 	RW_EXIT(&plistp->rwlock);
2664 
2665 	return (DDI_SUCCESS);
2666 }
2667 
2668 static uint64_t
2669 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2670 {
2671 	vgen_ldclist_t	*ldclp;
2672 	vgen_ldc_t *ldcp;
2673 	uint64_t	val;
2674 
2675 	val = 0;
2676 	ldclp = &portp->ldclist;
2677 
2678 	READ_ENTER(&ldclp->rwlock);
2679 	for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
2680 		val += vgen_ldc_stat(ldcp, stat);
2681 	}
2682 	RW_EXIT(&ldclp->rwlock);
2683 
2684 	return (val);
2685 }
2686 
2687 /* allocate receive resources */
2688 static int
2689 vgen_init_multipools(vgen_ldc_t *ldcp)
2690 {
2691 	size_t		data_sz;
2692 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2693 	int		status;
2694 	uint32_t	sz1 = 0;
2695 	uint32_t	sz2 = 0;
2696 	uint32_t	sz3 = 0;
2697 	uint32_t	sz4 = 0;
2698 
2699 	/*
2700 	 * We round up the mtu specified to be a multiple of 2K.
2701 	 * We then create rx pools based on the rounded up size.
2702 	 */
2703 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
2704 	data_sz = VNET_ROUNDUP_2K(data_sz);
2705 
2706 	/*
2707 	 * If pool sizes are specified, use them. Note that the presence of
2708 	 * the first tunable will be used as a hint.
2709 	 */
2710 	if (vgen_rbufsz1 != 0) {
2711 
2712 		sz1 = vgen_rbufsz1;
2713 		sz2 = vgen_rbufsz2;
2714 		sz3 = vgen_rbufsz3;
2715 		sz4 = vgen_rbufsz4;
2716 
2717 		if (sz4 == 0) { /* need 3 pools */
2718 
2719 			ldcp->max_rxpool_size = sz3;
2720 			status = vio_init_multipools(&ldcp->vmp,
2721 			    VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1,
2722 			    vgen_nrbufs2, vgen_nrbufs3);
2723 
2724 		} else {
2725 
2726 			ldcp->max_rxpool_size = sz4;
2727 			status = vio_init_multipools(&ldcp->vmp,
2728 			    VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
2729 			    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3,
2730 			    vgen_nrbufs4);
2731 		}
2732 		return (status);
2733 	}
2734 
2735 	/*
2736 	 * Pool sizes are not specified. We select the pool sizes based on the
2737 	 * mtu if vnet_jumbo_rxpools is enabled.
2738 	 */
2739 	if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
2740 		/*
2741 		 * Receive buffer pool allocation based on mtu is disabled.
2742 		 * Use the default mechanism of standard size pool allocation.
2743 		 */
2744 		sz1 = VGEN_DBLK_SZ_128;
2745 		sz2 = VGEN_DBLK_SZ_256;
2746 		sz3 = VGEN_DBLK_SZ_2048;
2747 		ldcp->max_rxpool_size = sz3;
2748 
2749 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS,
2750 		    sz1, sz2, sz3,
2751 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3);
2752 
2753 		return (status);
2754 	}
2755 
2756 	switch (data_sz) {
2757 
2758 	case VNET_4K:
2759 
2760 		sz1 = VGEN_DBLK_SZ_128;
2761 		sz2 = VGEN_DBLK_SZ_256;
2762 		sz3 = VGEN_DBLK_SZ_2048;
2763 		sz4 = sz3 << 1;			/* 4K */
2764 		ldcp->max_rxpool_size = sz4;
2765 
2766 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2767 		    sz1, sz2, sz3, sz4,
2768 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2769 		break;
2770 
2771 	default:	/* data_sz:  4K+ to 16K */
2772 
2773 		sz1 = VGEN_DBLK_SZ_256;
2774 		sz2 = VGEN_DBLK_SZ_2048;
2775 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
2776 		sz4 = data_sz;		/* Jumbo-size  */
2777 		ldcp->max_rxpool_size = sz4;
2778 
2779 		status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1,
2780 		    sz1, sz2, sz3, sz4,
2781 		    vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4);
2782 		break;
2783 
2784 	}
2785 
2786 	return (status);
2787 }
2788 
2789 /* attach the channel corresponding to the given ldc_id to the port */
2790 static int
2791 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2792 {
2793 	vgen_t 		*vgenp;
2794 	vgen_ldclist_t	*ldclp;
2795 	vgen_ldc_t 	*ldcp, **prev_ldcp;
2796 	ldc_attr_t 	attr;
2797 	int 		status;
2798 	ldc_status_t	istatus;
2799 	char		kname[MAXNAMELEN];
2800 	int		instance;
2801 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2802 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2803 		AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10,
2804 		AST_create_rxmblks = 0x20,
2805 		AST_create_rcv_thread = 0x40} attach_state;
2806 
2807 	attach_state = AST_init;
2808 	vgenp = portp->vgenp;
2809 	ldclp = &portp->ldclist;
2810 
2811 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2812 	if (ldcp == NULL) {
2813 		goto ldc_attach_failed;
2814 	}
2815 	ldcp->ldc_id = ldc_id;
2816 	ldcp->portp = portp;
2817 
2818 	attach_state |= AST_ldc_alloc;
2819 
2820 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2821 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2822 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2823 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2824 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2825 
2826 	attach_state |= AST_mutex_init;
2827 
2828 	attr.devclass = LDC_DEV_NT;
2829 	attr.instance = vgenp->instance;
2830 	attr.mode = LDC_MODE_UNRELIABLE;
2831 	attr.mtu = vnet_ldc_mtu;
2832 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2833 	if (status != 0) {
2834 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2835 		goto ldc_attach_failed;
2836 	}
2837 	attach_state |= AST_ldc_init;
2838 
2839 	if (vgen_rcv_thread_enabled) {
2840 		ldcp->rcv_thr_flags = 0;
2841 
2842 		mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL);
2843 		cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL);
2844 		ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
2845 		    vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
2846 
2847 		attach_state |= AST_create_rcv_thread;
2848 		if (ldcp->rcv_thread == NULL) {
2849 			DWARN(vgenp, ldcp, "Failed to create worker thread");
2850 			goto ldc_attach_failed;
2851 		}
2852 	}
2853 
2854 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2855 	if (status != 0) {
2856 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2857 		    status);
2858 		goto ldc_attach_failed;
2859 	}
2860 	/*
2861 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2862 	 * data msgs, including raw data msgs used to recv priority frames.
2863 	 */
2864 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2865 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2866 	attach_state |= AST_ldc_reg_cb;
2867 
2868 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2869 	ASSERT(istatus == LDC_INIT);
2870 	ldcp->ldc_status = istatus;
2871 
2872 	/* allocate transmit resources */
2873 	status = vgen_alloc_tx_ring(ldcp);
2874 	if (status != 0) {
2875 		goto ldc_attach_failed;
2876 	}
2877 	attach_state |= AST_alloc_tx_ring;
2878 
2879 	/* allocate receive resources */
2880 	status = vgen_init_multipools(ldcp);
2881 	if (status != 0) {
2882 		/*
2883 		 * We do not return failure if receive mblk pools can't be
2884 		 * allocated; instead allocb(9F) will be used to dynamically
2885 		 * allocate buffers during receive.
2886 		 */
2887 		DWARN(vgenp, ldcp,
2888 		    "vnet%d: status(%d), failed to allocate rx mblk pools for "
2889 		    "channel(0x%lx)\n",
2890 		    vgenp->instance, status, ldcp->ldc_id);
2891 	} else {
2892 		attach_state |= AST_create_rxmblks;
2893 	}
2894 
2895 	/* Setup kstats for the channel */
2896 	instance = vgenp->instance;
2897 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2898 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2899 	if (ldcp->ksp == NULL) {
2900 		goto ldc_attach_failed;
2901 	}
2902 
2903 	/* initialize vgen_versions supported */
2904 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2905 	vgen_reset_vnet_proto_ops(ldcp);
2906 
2907 	/* link it into the list of channels for this port */
2908 	WRITE_ENTER(&ldclp->rwlock);
2909 	prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
2910 	ldcp->nextp = *prev_ldcp;
2911 	*prev_ldcp = ldcp;
2912 	RW_EXIT(&ldclp->rwlock);
2913 
2914 	ldcp->flags |= CHANNEL_ATTACHED;
2915 	return (DDI_SUCCESS);
2916 
2917 ldc_attach_failed:
2918 	if (attach_state & AST_ldc_reg_cb) {
2919 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2920 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2921 	}
2922 	if (attach_state & AST_create_rcv_thread) {
2923 		if (ldcp->rcv_thread != NULL) {
2924 			vgen_stop_rcv_thread(ldcp);
2925 		}
2926 		mutex_destroy(&ldcp->rcv_thr_lock);
2927 		cv_destroy(&ldcp->rcv_thr_cv);
2928 	}
2929 	if (attach_state & AST_create_rxmblks) {
2930 		vio_mblk_pool_t *fvmp = NULL;
2931 		vio_destroy_multipools(&ldcp->vmp, &fvmp);
2932 		ASSERT(fvmp == NULL);
2933 	}
2934 	if (attach_state & AST_alloc_tx_ring) {
2935 		vgen_free_tx_ring(ldcp);
2936 	}
2937 	if (attach_state & AST_ldc_init) {
2938 		(void) ldc_fini(ldcp->ldc_handle);
2939 	}
2940 	if (attach_state & AST_mutex_init) {
2941 		mutex_destroy(&ldcp->tclock);
2942 		mutex_destroy(&ldcp->txlock);
2943 		mutex_destroy(&ldcp->cblock);
2944 		mutex_destroy(&ldcp->wrlock);
2945 		mutex_destroy(&ldcp->rxlock);
2946 	}
2947 	if (attach_state & AST_ldc_alloc) {
2948 		KMEM_FREE(ldcp);
2949 	}
2950 	return (DDI_FAILURE);
2951 }
2952 
2953 /* detach a channel from the port */
2954 static void
2955 vgen_ldc_detach(vgen_ldc_t *ldcp)
2956 {
2957 	vgen_port_t	*portp;
2958 	vgen_t 		*vgenp;
2959 	vgen_ldc_t 	*pldcp;
2960 	vgen_ldc_t	**prev_ldcp;
2961 	vgen_ldclist_t	*ldclp;
2962 
2963 	portp = ldcp->portp;
2964 	vgenp = portp->vgenp;
2965 	ldclp = &portp->ldclist;
2966 
2967 	prev_ldcp =  (vgen_ldc_t **)&ldclp->headp;
2968 	for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
2969 		if (pldcp == ldcp) {
2970 			break;
2971 		}
2972 	}
2973 
2974 	if (pldcp == NULL) {
2975 		/* invalid ldcp? */
2976 		return;
2977 	}
2978 
2979 	if (ldcp->ldc_status != LDC_INIT) {
2980 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2981 	}
2982 
2983 	if (ldcp->flags & CHANNEL_ATTACHED) {
2984 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2985 
2986 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2987 		if (ldcp->rcv_thread != NULL) {
2988 			/* First stop the receive thread */
2989 			vgen_stop_rcv_thread(ldcp);
2990 			mutex_destroy(&ldcp->rcv_thr_lock);
2991 			cv_destroy(&ldcp->rcv_thr_cv);
2992 		}
2993 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2994 
2995 		vgen_destroy_kstats(ldcp->ksp);
2996 		ldcp->ksp = NULL;
2997 
2998 		/*
2999 		 * if we cannot reclaim all mblks, put this
3000 		 * on the list of pools(vgenp->rmp) to be reclaimed when the
3001 		 * device gets detached (see vgen_uninit()).
3002 		 */
3003 		vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp);
3004 
3005 		/* free transmit resources */
3006 		vgen_free_tx_ring(ldcp);
3007 
3008 		(void) ldc_fini(ldcp->ldc_handle);
3009 		mutex_destroy(&ldcp->tclock);
3010 		mutex_destroy(&ldcp->txlock);
3011 		mutex_destroy(&ldcp->cblock);
3012 		mutex_destroy(&ldcp->wrlock);
3013 		mutex_destroy(&ldcp->rxlock);
3014 
3015 		/* unlink it from the list */
3016 		*prev_ldcp = ldcp->nextp;
3017 		KMEM_FREE(ldcp);
3018 	}
3019 }
3020 
3021 /*
3022  * This function allocates transmit resources for the channel.
3023  * The resources consist of a transmit descriptor ring and an associated
3024  * transmit buffer ring.
3025  */
3026 static int
3027 vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
3028 {
3029 	void *tbufp;
3030 	ldc_mem_info_t minfo;
3031 	uint32_t txdsize;
3032 	uint32_t tbufsize;
3033 	int status;
3034 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3035 
3036 	ldcp->num_txds = vnet_ntxds;
3037 	txdsize = sizeof (vnet_public_desc_t);
3038 	tbufsize = sizeof (vgen_private_desc_t);
3039 
3040 	/* allocate transmit buffer ring */
3041 	tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
3042 	if (tbufp == NULL) {
3043 		return (DDI_FAILURE);
3044 	}
3045 
3046 	/* create transmit descriptor ring */
3047 	status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
3048 	    &ldcp->tx_dhandle);
3049 	if (status) {
3050 		DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
3051 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3052 		return (DDI_FAILURE);
3053 	}
3054 
3055 	/* get the addr of descripror ring */
3056 	status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
3057 	if (status) {
3058 		DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
3059 		kmem_free(tbufp, ldcp->num_txds * tbufsize);
3060 		(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3061 		ldcp->tbufp = NULL;
3062 		return (DDI_FAILURE);
3063 	}
3064 	ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
3065 	ldcp->tbufp = tbufp;
3066 
3067 	ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
3068 	ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
3069 
3070 	return (DDI_SUCCESS);
3071 }
3072 
3073 /* Free transmit resources for the channel */
3074 static void
3075 vgen_free_tx_ring(vgen_ldc_t *ldcp)
3076 {
3077 	int tbufsize = sizeof (vgen_private_desc_t);
3078 
3079 	/* free transmit descriptor ring */
3080 	(void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
3081 
3082 	/* free transmit buffer ring */
3083 	kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
3084 	ldcp->txdp = ldcp->txdendp = NULL;
3085 	ldcp->tbufp = ldcp->tbufendp = NULL;
3086 }
3087 
3088 /* enable transmit/receive on the channels for the port */
3089 static void
3090 vgen_init_ldcs(vgen_port_t *portp)
3091 {
3092 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3093 	vgen_ldc_t	*ldcp;
3094 
3095 	READ_ENTER(&ldclp->rwlock);
3096 	ldcp =  ldclp->headp;
3097 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3098 		(void) vgen_ldc_init(ldcp);
3099 	}
3100 	RW_EXIT(&ldclp->rwlock);
3101 }
3102 
3103 /* stop transmit/receive on the channels for the port */
3104 static void
3105 vgen_uninit_ldcs(vgen_port_t *portp)
3106 {
3107 	vgen_ldclist_t	*ldclp = &portp->ldclist;
3108 	vgen_ldc_t	*ldcp;
3109 
3110 	READ_ENTER(&ldclp->rwlock);
3111 	ldcp =  ldclp->headp;
3112 	for (; ldcp  != NULL; ldcp = ldcp->nextp) {
3113 		vgen_ldc_uninit(ldcp);
3114 	}
3115 	RW_EXIT(&ldclp->rwlock);
3116 }
3117 
3118 /* enable transmit/receive on the channel */
3119 static int
3120 vgen_ldc_init(vgen_ldc_t *ldcp)
3121 {
3122 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3123 	ldc_status_t	istatus;
3124 	int		rv;
3125 	uint32_t	retries = 0;
3126 	enum	{ ST_init = 0x0, ST_ldc_open = 0x1,
3127 		ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state;
3128 	init_state = ST_init;
3129 
3130 	DBG1(vgenp, ldcp, "enter\n");
3131 	LDC_LOCK(ldcp);
3132 
3133 	rv = ldc_open(ldcp->ldc_handle);
3134 	if (rv != 0) {
3135 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
3136 		goto ldcinit_failed;
3137 	}
3138 	init_state |= ST_ldc_open;
3139 
3140 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3141 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
3142 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
3143 		goto ldcinit_failed;
3144 	}
3145 	ldcp->ldc_status = istatus;
3146 
3147 	rv = vgen_init_tbufs(ldcp);
3148 	if (rv != 0) {
3149 		DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n");
3150 		goto ldcinit_failed;
3151 	}
3152 	init_state |= ST_init_tbufs;
3153 
3154 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
3155 	if (rv != 0) {
3156 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
3157 		goto ldcinit_failed;
3158 	}
3159 
3160 	init_state |= ST_cb_enable;
3161 
3162 	do {
3163 		rv = ldc_up(ldcp->ldc_handle);
3164 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
3165 			DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3166 			drv_usecwait(VGEN_LDC_UP_DELAY);
3167 		}
3168 		if (retries++ >= vgen_ldcup_retries)
3169 			break;
3170 	} while (rv == EWOULDBLOCK);
3171 
3172 	(void) ldc_status(ldcp->ldc_handle, &istatus);
3173 	if (istatus == LDC_UP) {
3174 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
3175 	}
3176 
3177 	ldcp->ldc_status = istatus;
3178 
3179 	/* initialize transmit watchdog timeout */
3180 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
3181 	    drv_usectohz(vnet_ldcwd_interval * 1000));
3182 
3183 	ldcp->hphase = -1;
3184 	ldcp->flags |= CHANNEL_STARTED;
3185 
3186 	/* if channel is already UP - start handshake */
3187 	if (istatus == LDC_UP) {
3188 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3189 		if (ldcp->portp != vgenp->vsw_portp) {
3190 			/*
3191 			 * As the channel is up, use this port from now on.
3192 			 */
3193 			(void) atomic_swap_32(
3194 			    &ldcp->portp->use_vsw_port, B_FALSE);
3195 		}
3196 
3197 		/* Initialize local session id */
3198 		ldcp->local_sid = ddi_get_lbolt();
3199 
3200 		/* clear peer session id */
3201 		ldcp->peer_sid = 0;
3202 		ldcp->hretries = 0;
3203 
3204 		/* Initiate Handshake process with peer ldc endpoint */
3205 		vgen_reset_hphase(ldcp);
3206 
3207 		mutex_exit(&ldcp->tclock);
3208 		mutex_exit(&ldcp->txlock);
3209 		mutex_exit(&ldcp->wrlock);
3210 		mutex_exit(&ldcp->rxlock);
3211 		vgen_handshake(vh_nextphase(ldcp));
3212 		mutex_exit(&ldcp->cblock);
3213 	} else {
3214 		LDC_UNLOCK(ldcp);
3215 	}
3216 
3217 	return (DDI_SUCCESS);
3218 
3219 ldcinit_failed:
3220 	if (init_state & ST_cb_enable) {
3221 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3222 	}
3223 	if (init_state & ST_init_tbufs) {
3224 		vgen_uninit_tbufs(ldcp);
3225 	}
3226 	if (init_state & ST_ldc_open) {
3227 		(void) ldc_close(ldcp->ldc_handle);
3228 	}
3229 	LDC_UNLOCK(ldcp);
3230 	DBG1(vgenp, ldcp, "exit\n");
3231 	return (DDI_FAILURE);
3232 }
3233 
3234 /* stop transmit/receive on the channel */
3235 static void
3236 vgen_ldc_uninit(vgen_ldc_t *ldcp)
3237 {
3238 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3239 	int	rv;
3240 	uint_t	retries = 0;
3241 
3242 	DBG1(vgenp, ldcp, "enter\n");
3243 	LDC_LOCK(ldcp);
3244 
3245 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
3246 		LDC_UNLOCK(ldcp);
3247 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
3248 		return;
3249 	}
3250 
3251 	/* disable further callbacks */
3252 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3253 	if (rv != 0) {
3254 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3255 	}
3256 
3257 	if (vgenp->vsw_portp == ldcp->portp) {
3258 		vio_net_report_err_t rep_err =
3259 		    ldcp->portp->vcb.vio_net_report_err;
3260 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3261 	}
3262 
3263 	/*
3264 	 * clear handshake done bit and wait for pending tx and cb to finish.
3265 	 * release locks before untimeout(9F) is invoked to cancel timeouts.
3266 	 */
3267 	ldcp->hphase &= ~(VH_DONE);
3268 	LDC_UNLOCK(ldcp);
3269 
3270 	/* cancel handshake watchdog timeout */
3271 	if (ldcp->htid) {
3272 		(void) untimeout(ldcp->htid);
3273 		ldcp->htid = 0;
3274 	}
3275 
3276 	if (ldcp->cancel_htid) {
3277 		(void) untimeout(ldcp->cancel_htid);
3278 		ldcp->cancel_htid = 0;
3279 	}
3280 
3281 	/* cancel transmit watchdog timeout */
3282 	if (ldcp->wd_tid) {
3283 		(void) untimeout(ldcp->wd_tid);
3284 		ldcp->wd_tid = 0;
3285 	}
3286 
3287 	drv_usecwait(1000);
3288 
3289 	if (ldcp->rcv_thread != NULL) {
3290 		/*
3291 		 * Note that callbacks have been disabled already(above). The
3292 		 * drain function takes care of the condition when an already
3293 		 * executing callback signals the worker to start processing or
3294 		 * the worker has already been signalled and is in the middle of
3295 		 * processing.
3296 		 */
3297 		vgen_drain_rcv_thread(ldcp);
3298 	}
3299 
3300 	/* acquire locks again; any pending transmits and callbacks are done */
3301 	LDC_LOCK(ldcp);
3302 
3303 	vgen_reset_hphase(ldcp);
3304 
3305 	vgen_uninit_tbufs(ldcp);
3306 
3307 	/* close the channel - retry on EAGAIN */
3308 	while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
3309 		if (++retries > vgen_ldccl_retries) {
3310 			break;
3311 		}
3312 		drv_usecwait(VGEN_LDC_CLOSE_DELAY);
3313 	}
3314 	if (rv != 0) {
3315 		cmn_err(CE_NOTE,
3316 		    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
3317 		    vgenp->instance, rv, ldcp->ldc_id);
3318 	}
3319 
3320 	ldcp->ldc_status = LDC_INIT;
3321 	ldcp->flags &= ~(CHANNEL_STARTED);
3322 
3323 	LDC_UNLOCK(ldcp);
3324 
3325 	DBG1(vgenp, ldcp, "exit\n");
3326 }
3327 
3328 /* Initialize the transmit buffer ring for the channel */
3329 static int
3330 vgen_init_tbufs(vgen_ldc_t *ldcp)
3331 {
3332 	vgen_private_desc_t	*tbufp;
3333 	vnet_public_desc_t	*txdp;
3334 	vio_dring_entry_hdr_t		*hdrp;
3335 	int 			i;
3336 	int 			rv;
3337 	caddr_t			datap = NULL;
3338 	int			ci;
3339 	uint32_t		ncookies;
3340 	size_t			data_sz;
3341 	vgen_t			*vgenp;
3342 
3343 	vgenp = LDC_TO_VGEN(ldcp);
3344 
3345 	bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
3346 	bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
3347 
3348 	/*
3349 	 * In order to ensure that the number of ldc cookies per descriptor is
3350 	 * limited to be within the default MAX_COOKIES (2), we take the steps
3351 	 * outlined below:
3352 	 *
3353 	 * Align the entire data buffer area to 8K and carve out per descriptor
3354 	 * data buffers starting from this 8K aligned base address.
3355 	 *
3356 	 * We round up the mtu specified to be a multiple of 2K or 4K.
3357 	 * For sizes up to 12K we round up the size to the next 2K.
3358 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
3359 	 * 14K could end up needing 3 cookies, with the buffer spread across
3360 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
3361 	 */
3362 	data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
3363 	if (data_sz <= VNET_12K) {
3364 		data_sz = VNET_ROUNDUP_2K(data_sz);
3365 	} else {
3366 		data_sz = VNET_ROUNDUP_4K(data_sz);
3367 	}
3368 
3369 	/* allocate extra 8K bytes for alignment */
3370 	ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K;
3371 	datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP);
3372 	ldcp->tx_datap = datap;
3373 
3374 
3375 	/* align the starting address of the data area to 8K */
3376 	datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap);
3377 
3378 	/*
3379 	 * for each private descriptor, allocate a ldc mem_handle which is
3380 	 * required to map the data during transmit, set the flags
3381 	 * to free (available for use by transmit routine).
3382 	 */
3383 
3384 	for (i = 0; i < ldcp->num_txds; i++) {
3385 
3386 		tbufp = &(ldcp->tbufp[i]);
3387 		rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
3388 		    &(tbufp->memhandle));
3389 		if (rv) {
3390 			tbufp->memhandle = 0;
3391 			goto init_tbufs_failed;
3392 		}
3393 
3394 		/*
3395 		 * bind ldc memhandle to the corresponding transmit buffer.
3396 		 */
3397 		ci = ncookies = 0;
3398 		rv = ldc_mem_bind_handle(tbufp->memhandle,
3399 		    (caddr_t)datap, data_sz, LDC_SHADOW_MAP,
3400 		    LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
3401 		if (rv != 0) {
3402 			goto init_tbufs_failed;
3403 		}
3404 
3405 		/*
3406 		 * successful in binding the handle to tx data buffer.
3407 		 * set datap in the private descr to this buffer.
3408 		 */
3409 		tbufp->datap = datap;
3410 
3411 		if ((ncookies == 0) ||
3412 		    (ncookies > MAX_COOKIES)) {
3413 			goto init_tbufs_failed;
3414 		}
3415 
3416 		for (ci = 1; ci < ncookies; ci++) {
3417 			rv = ldc_mem_nextcookie(tbufp->memhandle,
3418 			    &(tbufp->memcookie[ci]));
3419 			if (rv != 0) {
3420 				goto init_tbufs_failed;
3421 			}
3422 		}
3423 
3424 		tbufp->ncookies = ncookies;
3425 		datap += data_sz;
3426 
3427 		tbufp->flags = VGEN_PRIV_DESC_FREE;
3428 		txdp = &(ldcp->txdp[i]);
3429 		hdrp = &txdp->hdr;
3430 		hdrp->dstate = VIO_DESC_FREE;
3431 		hdrp->ack = B_FALSE;
3432 		tbufp->descp = txdp;
3433 
3434 	}
3435 
3436 	/* reset tbuf walking pointers */
3437 	ldcp->next_tbufp = ldcp->tbufp;
3438 	ldcp->cur_tbufp = ldcp->tbufp;
3439 
3440 	/* initialize tx seqnum and index */
3441 	ldcp->next_txseq = VNET_ISS;
3442 	ldcp->next_txi = 0;
3443 
3444 	ldcp->resched_peer = B_TRUE;
3445 	ldcp->resched_peer_txi = 0;
3446 
3447 	return (DDI_SUCCESS);
3448 
3449 init_tbufs_failed:;
3450 	vgen_uninit_tbufs(ldcp);
3451 	return (DDI_FAILURE);
3452 }
3453 
3454 /* Uninitialize transmit buffer ring for the channel */
3455 static void
3456 vgen_uninit_tbufs(vgen_ldc_t *ldcp)
3457 {
3458 	vgen_private_desc_t	*tbufp = ldcp->tbufp;
3459 	int 			i;
3460 
3461 	/* for each tbuf (priv_desc), free ldc mem_handle */
3462 	for (i = 0; i < ldcp->num_txds; i++) {
3463 
3464 		tbufp = &(ldcp->tbufp[i]);
3465 
3466 		if (tbufp->datap) { /* if bound to a ldc memhandle */
3467 			(void) ldc_mem_unbind_handle(tbufp->memhandle);
3468 			tbufp->datap = NULL;
3469 		}
3470 		if (tbufp->memhandle) {
3471 			(void) ldc_mem_free_handle(tbufp->memhandle);
3472 			tbufp->memhandle = 0;
3473 		}
3474 	}
3475 
3476 	if (ldcp->tx_datap) {
3477 		/* prealloc'd tx data buffer */
3478 		kmem_free(ldcp->tx_datap, ldcp->tx_data_sz);
3479 		ldcp->tx_datap = NULL;
3480 		ldcp->tx_data_sz = 0;
3481 	}
3482 
3483 	bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds));
3484 	bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds));
3485 }
3486 
3487 /* clobber tx descriptor ring */
3488 static void
3489 vgen_clobber_tbufs(vgen_ldc_t *ldcp)
3490 {
3491 	vnet_public_desc_t	*txdp;
3492 	vgen_private_desc_t	*tbufp;
3493 	vio_dring_entry_hdr_t	*hdrp;
3494 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3495 	int i;
3496 #ifdef DEBUG
3497 	int ndone = 0;
3498 #endif
3499 
3500 	for (i = 0; i < ldcp->num_txds; i++) {
3501 
3502 		tbufp = &(ldcp->tbufp[i]);
3503 		txdp = tbufp->descp;
3504 		hdrp = &txdp->hdr;
3505 
3506 		if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
3507 			tbufp->flags = VGEN_PRIV_DESC_FREE;
3508 #ifdef DEBUG
3509 			if (hdrp->dstate == VIO_DESC_DONE)
3510 				ndone++;
3511 #endif
3512 			hdrp->dstate = VIO_DESC_FREE;
3513 			hdrp->ack = B_FALSE;
3514 		}
3515 	}
3516 	/* reset tbuf walking pointers */
3517 	ldcp->next_tbufp = ldcp->tbufp;
3518 	ldcp->cur_tbufp = ldcp->tbufp;
3519 
3520 	/* reset tx seqnum and index */
3521 	ldcp->next_txseq = VNET_ISS;
3522 	ldcp->next_txi = 0;
3523 
3524 	ldcp->resched_peer = B_TRUE;
3525 	ldcp->resched_peer_txi = 0;
3526 
3527 	DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone);
3528 }
3529 
3530 /* clobber receive descriptor ring */
3531 static void
3532 vgen_clobber_rxds(vgen_ldc_t *ldcp)
3533 {
3534 	ldcp->rx_dhandle = 0;
3535 	bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
3536 	ldcp->rxdp = NULL;
3537 	ldcp->next_rxi = 0;
3538 	ldcp->num_rxds = 0;
3539 	ldcp->next_rxseq = VNET_ISS;
3540 }
3541 
3542 /* initialize receive descriptor ring */
3543 static int
3544 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
3545 	ldc_mem_cookie_t *dcookie, uint32_t ncookies)
3546 {
3547 	int rv;
3548 	ldc_mem_info_t minfo;
3549 
3550 	rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
3551 	    desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle));
3552 	if (rv != 0) {
3553 		return (DDI_FAILURE);
3554 	}
3555 
3556 	/*
3557 	 * sucessfully mapped, now try to
3558 	 * get info about the mapped dring
3559 	 */
3560 	rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
3561 	if (rv != 0) {
3562 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
3563 		return (DDI_FAILURE);
3564 	}
3565 
3566 	/*
3567 	 * save ring address, number of descriptors.
3568 	 */
3569 	ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
3570 	bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
3571 	ldcp->num_rxdcookies = ncookies;
3572 	ldcp->num_rxds = num_desc;
3573 	ldcp->next_rxi = 0;
3574 	ldcp->next_rxseq = VNET_ISS;
3575 	ldcp->dring_mtype = minfo.mtype;
3576 
3577 	return (DDI_SUCCESS);
3578 }
3579 
3580 /* get channel statistics */
3581 static uint64_t
3582 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
3583 {
3584 	vgen_stats_t *statsp;
3585 	uint64_t val;
3586 
3587 	val = 0;
3588 	statsp = &ldcp->stats;
3589 	switch (stat) {
3590 
3591 	case MAC_STAT_MULTIRCV:
3592 		val = statsp->multircv;
3593 		break;
3594 
3595 	case MAC_STAT_BRDCSTRCV:
3596 		val = statsp->brdcstrcv;
3597 		break;
3598 
3599 	case MAC_STAT_MULTIXMT:
3600 		val = statsp->multixmt;
3601 		break;
3602 
3603 	case MAC_STAT_BRDCSTXMT:
3604 		val = statsp->brdcstxmt;
3605 		break;
3606 
3607 	case MAC_STAT_NORCVBUF:
3608 		val = statsp->norcvbuf;
3609 		break;
3610 
3611 	case MAC_STAT_IERRORS:
3612 		val = statsp->ierrors;
3613 		break;
3614 
3615 	case MAC_STAT_NOXMTBUF:
3616 		val = statsp->noxmtbuf;
3617 		break;
3618 
3619 	case MAC_STAT_OERRORS:
3620 		val = statsp->oerrors;
3621 		break;
3622 
3623 	case MAC_STAT_COLLISIONS:
3624 		break;
3625 
3626 	case MAC_STAT_RBYTES:
3627 		val = statsp->rbytes;
3628 		break;
3629 
3630 	case MAC_STAT_IPACKETS:
3631 		val = statsp->ipackets;
3632 		break;
3633 
3634 	case MAC_STAT_OBYTES:
3635 		val = statsp->obytes;
3636 		break;
3637 
3638 	case MAC_STAT_OPACKETS:
3639 		val = statsp->opackets;
3640 		break;
3641 
3642 	/* stats not relevant to ldc, return 0 */
3643 	case MAC_STAT_IFSPEED:
3644 	case ETHER_STAT_ALIGN_ERRORS:
3645 	case ETHER_STAT_FCS_ERRORS:
3646 	case ETHER_STAT_FIRST_COLLISIONS:
3647 	case ETHER_STAT_MULTI_COLLISIONS:
3648 	case ETHER_STAT_DEFER_XMTS:
3649 	case ETHER_STAT_TX_LATE_COLLISIONS:
3650 	case ETHER_STAT_EX_COLLISIONS:
3651 	case ETHER_STAT_MACXMT_ERRORS:
3652 	case ETHER_STAT_CARRIER_ERRORS:
3653 	case ETHER_STAT_TOOLONG_ERRORS:
3654 	case ETHER_STAT_XCVR_ADDR:
3655 	case ETHER_STAT_XCVR_ID:
3656 	case ETHER_STAT_XCVR_INUSE:
3657 	case ETHER_STAT_CAP_1000FDX:
3658 	case ETHER_STAT_CAP_1000HDX:
3659 	case ETHER_STAT_CAP_100FDX:
3660 	case ETHER_STAT_CAP_100HDX:
3661 	case ETHER_STAT_CAP_10FDX:
3662 	case ETHER_STAT_CAP_10HDX:
3663 	case ETHER_STAT_CAP_ASMPAUSE:
3664 	case ETHER_STAT_CAP_PAUSE:
3665 	case ETHER_STAT_CAP_AUTONEG:
3666 	case ETHER_STAT_ADV_CAP_1000FDX:
3667 	case ETHER_STAT_ADV_CAP_1000HDX:
3668 	case ETHER_STAT_ADV_CAP_100FDX:
3669 	case ETHER_STAT_ADV_CAP_100HDX:
3670 	case ETHER_STAT_ADV_CAP_10FDX:
3671 	case ETHER_STAT_ADV_CAP_10HDX:
3672 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
3673 	case ETHER_STAT_ADV_CAP_PAUSE:
3674 	case ETHER_STAT_ADV_CAP_AUTONEG:
3675 	case ETHER_STAT_LP_CAP_1000FDX:
3676 	case ETHER_STAT_LP_CAP_1000HDX:
3677 	case ETHER_STAT_LP_CAP_100FDX:
3678 	case ETHER_STAT_LP_CAP_100HDX:
3679 	case ETHER_STAT_LP_CAP_10FDX:
3680 	case ETHER_STAT_LP_CAP_10HDX:
3681 	case ETHER_STAT_LP_CAP_ASMPAUSE:
3682 	case ETHER_STAT_LP_CAP_PAUSE:
3683 	case ETHER_STAT_LP_CAP_AUTONEG:
3684 	case ETHER_STAT_LINK_ASMPAUSE:
3685 	case ETHER_STAT_LINK_PAUSE:
3686 	case ETHER_STAT_LINK_AUTONEG:
3687 	case ETHER_STAT_LINK_DUPLEX:
3688 	default:
3689 		val = 0;
3690 		break;
3691 
3692 	}
3693 	return (val);
3694 }
3695 
3696 /*
3697  * LDC channel is UP, start handshake process with peer.
3698  */
3699 static void
3700 vgen_handle_evt_up(vgen_ldc_t *ldcp)
3701 {
3702 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3703 
3704 	DBG1(vgenp, ldcp, "enter\n");
3705 
3706 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3707 
3708 	if (ldcp->portp != vgenp->vsw_portp) {
3709 		/*
3710 		 * As the channel is up, use this port from now on.
3711 		 */
3712 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
3713 	}
3714 
3715 	/* Initialize local session id */
3716 	ldcp->local_sid = ddi_get_lbolt();
3717 
3718 	/* clear peer session id */
3719 	ldcp->peer_sid = 0;
3720 	ldcp->hretries = 0;
3721 
3722 	if (ldcp->hphase != VH_PHASE0) {
3723 		vgen_handshake_reset(ldcp);
3724 	}
3725 
3726 	/* Initiate Handshake process with peer ldc endpoint */
3727 	vgen_handshake(vh_nextphase(ldcp));
3728 
3729 	DBG1(vgenp, ldcp, "exit\n");
3730 }
3731 
3732 /*
3733  * LDC channel is Reset, terminate connection with peer and try to
3734  * bring the channel up again.
3735  */
3736 static void
3737 vgen_handle_evt_reset(vgen_ldc_t *ldcp)
3738 {
3739 	ldc_status_t istatus;
3740 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3741 	int	rv;
3742 
3743 	DBG1(vgenp, ldcp, "enter\n");
3744 
3745 	ASSERT(MUTEX_HELD(&ldcp->cblock));
3746 
3747 	if ((ldcp->portp != vgenp->vsw_portp) &&
3748 	    (vgenp->vsw_portp != NULL)) {
3749 		/*
3750 		 * As the channel is down, use the switch port until
3751 		 * the channel becomes ready to be used.
3752 		 */
3753 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE);
3754 	}
3755 
3756 	if (vgenp->vsw_portp == ldcp->portp) {
3757 		vio_net_report_err_t rep_err =
3758 		    ldcp->portp->vcb.vio_net_report_err;
3759 
3760 		/* Post a reset message */
3761 		rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN);
3762 	}
3763 
3764 	if (ldcp->hphase != VH_PHASE0) {
3765 		vgen_handshake_reset(ldcp);
3766 	}
3767 
3768 	/* try to bring the channel up */
3769 	rv = ldc_up(ldcp->ldc_handle);
3770 	if (rv != 0) {
3771 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
3772 	}
3773 
3774 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3775 		DWARN(vgenp, ldcp, "ldc_status err\n");
3776 	} else {
3777 		ldcp->ldc_status = istatus;
3778 	}
3779 
3780 	/* if channel is already UP - restart handshake */
3781 	if (ldcp->ldc_status == LDC_UP) {
3782 		vgen_handle_evt_up(ldcp);
3783 	}
3784 
3785 	DBG1(vgenp, ldcp, "exit\n");
3786 }
3787 
3788 /* Interrupt handler for the channel */
3789 static uint_t
3790 vgen_ldc_cb(uint64_t event, caddr_t arg)
3791 {
3792 	_NOTE(ARGUNUSED(event))
3793 	vgen_ldc_t	*ldcp;
3794 	vgen_t		*vgenp;
3795 	ldc_status_t 	istatus;
3796 	vgen_stats_t	*statsp;
3797 	timeout_id_t	cancel_htid = 0;
3798 	uint_t		ret = LDC_SUCCESS;
3799 
3800 	ldcp = (vgen_ldc_t *)arg;
3801 	vgenp = LDC_TO_VGEN(ldcp);
3802 	statsp = &ldcp->stats;
3803 
3804 	DBG1(vgenp, ldcp, "enter\n");
3805 
3806 	mutex_enter(&ldcp->cblock);
3807 	statsp->callbacks++;
3808 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3809 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3810 		    ldcp->ldc_status);
3811 		mutex_exit(&ldcp->cblock);
3812 		return (LDC_SUCCESS);
3813 	}
3814 
3815 	/*
3816 	 * cache cancel_htid before the events specific
3817 	 * code may overwrite it. Do not clear ldcp->cancel_htid
3818 	 * as it is also used to indicate the timer to quit immediately.
3819 	 */
3820 	cancel_htid = ldcp->cancel_htid;
3821 
3822 	/*
3823 	 * NOTE: not using switch() as event could be triggered by
3824 	 * a state change and a read request. Also the ordering	of the
3825 	 * check for the event types is deliberate.
3826 	 */
3827 	if (event & LDC_EVT_UP) {
3828 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3829 			DWARN(vgenp, ldcp, "ldc_status err\n");
3830 			/* status couldn't be determined */
3831 			ret = LDC_FAILURE;
3832 			goto ldc_cb_ret;
3833 		}
3834 		ldcp->ldc_status = istatus;
3835 		if (ldcp->ldc_status != LDC_UP) {
3836 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3837 			    " but ldc status is not UP(0x%x)\n",
3838 			    ldcp->ldc_status);
3839 			/* spurious interrupt, return success */
3840 			goto ldc_cb_ret;
3841 		}
3842 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3843 		    event, ldcp->ldc_status);
3844 
3845 		vgen_handle_evt_up(ldcp);
3846 
3847 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3848 	}
3849 
3850 	/* Handle RESET/DOWN before READ event */
3851 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3852 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3853 			DWARN(vgenp, ldcp, "ldc_status error\n");
3854 			/* status couldn't be determined */
3855 			ret = LDC_FAILURE;
3856 			goto ldc_cb_ret;
3857 		}
3858 		ldcp->ldc_status = istatus;
3859 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3860 		    event, ldcp->ldc_status);
3861 
3862 		vgen_handle_evt_reset(ldcp);
3863 
3864 		/*
3865 		 * As the channel is down/reset, ignore READ event
3866 		 * but print a debug warning message.
3867 		 */
3868 		if (event & LDC_EVT_READ) {
3869 			DWARN(vgenp, ldcp,
3870 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3871 			event &= ~LDC_EVT_READ;
3872 		}
3873 	}
3874 
3875 	if (event & LDC_EVT_READ) {
3876 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3877 		    event, ldcp->ldc_status);
3878 
3879 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3880 
3881 		if (ldcp->rcv_thread != NULL) {
3882 			/*
3883 			 * If the receive thread is enabled, then
3884 			 * wakeup the receive thread to process the
3885 			 * LDC messages.
3886 			 */
3887 			mutex_exit(&ldcp->cblock);
3888 			mutex_enter(&ldcp->rcv_thr_lock);
3889 			if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) {
3890 				ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD;
3891 				cv_signal(&ldcp->rcv_thr_cv);
3892 			}
3893 			mutex_exit(&ldcp->rcv_thr_lock);
3894 			mutex_enter(&ldcp->cblock);
3895 		} else  {
3896 			vgen_handle_evt_read(ldcp);
3897 		}
3898 	}
3899 
3900 ldc_cb_ret:
3901 	/*
3902 	 * Check to see if the status of cancel_htid has
3903 	 * changed. If another timer needs to be cancelled,
3904 	 * then let the next callback to clear it.
3905 	 */
3906 	if (cancel_htid == 0) {
3907 		cancel_htid = ldcp->cancel_htid;
3908 	}
3909 	mutex_exit(&ldcp->cblock);
3910 
3911 	if (cancel_htid) {
3912 		/*
3913 		 * Cancel handshake timer.
3914 		 * untimeout(9F) will not return until the pending callback is
3915 		 * cancelled or has run. No problems will result from calling
3916 		 * untimeout if the handler has already completed.
3917 		 * If the timeout handler did run, then it would just
3918 		 * return as cancel_htid is set.
3919 		 */
3920 		DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n", cancel_htid);
3921 		(void) untimeout(cancel_htid);
3922 		mutex_enter(&ldcp->cblock);
3923 		/* clear it only if its the same as the one we cancelled */
3924 		if (ldcp->cancel_htid == cancel_htid) {
3925 			ldcp->cancel_htid = 0;
3926 		}
3927 		mutex_exit(&ldcp->cblock);
3928 	}
3929 	DBG1(vgenp, ldcp, "exit\n");
3930 	return (ret);
3931 }
3932 
3933 static void
3934 vgen_handle_evt_read(vgen_ldc_t *ldcp)
3935 {
3936 	int		rv;
3937 	uint64_t	*ldcmsg;
3938 	size_t		msglen;
3939 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3940 	vio_msg_tag_t	*tagp;
3941 	ldc_status_t 	istatus;
3942 	boolean_t 	has_data;
3943 
3944 	DBG1(vgenp, ldcp, "enter\n");
3945 
3946 	ldcmsg = ldcp->ldcmsg;
3947 	/*
3948 	 * If the receive thread is enabled, then the cblock
3949 	 * need to be acquired here. If not, the vgen_ldc_cb()
3950 	 * calls this function with cblock held already.
3951 	 */
3952 	if (ldcp->rcv_thread != NULL) {
3953 		mutex_enter(&ldcp->cblock);
3954 	} else {
3955 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3956 	}
3957 
3958 vgen_evt_read:
3959 	do {
3960 		msglen = ldcp->msglen;
3961 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3962 
3963 		if (rv != 0) {
3964 			DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n",
3965 			    rv, msglen);
3966 			if (rv == ECONNRESET)
3967 				goto vgen_evtread_error;
3968 			break;
3969 		}
3970 		if (msglen == 0) {
3971 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3972 			break;
3973 		}
3974 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3975 
3976 		tagp = (vio_msg_tag_t *)ldcmsg;
3977 
3978 		if (ldcp->peer_sid) {
3979 			/*
3980 			 * check sid only after we have received peer's sid
3981 			 * in the version negotiate msg.
3982 			 */
3983 #ifdef DEBUG
3984 			if (vgen_hdbg & HDBG_BAD_SID) {
3985 				/* simulate bad sid condition */
3986 				tagp->vio_sid = 0;
3987 				vgen_hdbg &= ~(HDBG_BAD_SID);
3988 			}
3989 #endif
3990 			rv = vgen_check_sid(ldcp, tagp);
3991 			if (rv != VGEN_SUCCESS) {
3992 				/*
3993 				 * If sid mismatch is detected,
3994 				 * reset the channel.
3995 				 */
3996 				ldcp->need_ldc_reset = B_TRUE;
3997 				goto vgen_evtread_error;
3998 			}
3999 		}
4000 
4001 		switch (tagp->vio_msgtype) {
4002 		case VIO_TYPE_CTRL:
4003 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
4004 			break;
4005 
4006 		case VIO_TYPE_DATA:
4007 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
4008 			break;
4009 
4010 		case VIO_TYPE_ERR:
4011 			vgen_handle_errmsg(ldcp, tagp);
4012 			break;
4013 
4014 		default:
4015 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
4016 			    tagp->vio_msgtype);
4017 			break;
4018 		}
4019 
4020 		/*
4021 		 * If an error is encountered, stop processing and
4022 		 * handle the error.
4023 		 */
4024 		if (rv != 0) {
4025 			goto vgen_evtread_error;
4026 		}
4027 
4028 	} while (msglen);
4029 
4030 	/* check once more before exiting */
4031 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
4032 	if ((rv == 0) && (has_data == B_TRUE)) {
4033 		DTRACE_PROBE(vgen_chkq);
4034 		goto vgen_evt_read;
4035 	}
4036 
4037 vgen_evtread_error:
4038 	if (rv == ECONNRESET) {
4039 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4040 			DWARN(vgenp, ldcp, "ldc_status err\n");
4041 		} else {
4042 			ldcp->ldc_status = istatus;
4043 		}
4044 		vgen_handle_evt_reset(ldcp);
4045 	} else if (rv) {
4046 		vgen_handshake_retry(ldcp);
4047 	}
4048 
4049 	/*
4050 	 * If the receive thread is enabled, then cancel the
4051 	 * handshake timeout here.
4052 	 */
4053 	if (ldcp->rcv_thread != NULL) {
4054 		timeout_id_t cancel_htid = ldcp->cancel_htid;
4055 
4056 		mutex_exit(&ldcp->cblock);
4057 		if (cancel_htid) {
4058 			/*
4059 			 * Cancel handshake timer. untimeout(9F) will
4060 			 * not return until the pending callback is cancelled
4061 			 * or has run. No problems will result from calling
4062 			 * untimeout if the handler has already completed.
4063 			 * If the timeout handler did run, then it would just
4064 			 * return as cancel_htid is set.
4065 			 */
4066 			DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n",
4067 			    cancel_htid);
4068 			(void) untimeout(cancel_htid);
4069 
4070 			/*
4071 			 * clear it only if its the same as the one we
4072 			 * cancelled
4073 			 */
4074 			mutex_enter(&ldcp->cblock);
4075 			if (ldcp->cancel_htid == cancel_htid) {
4076 				ldcp->cancel_htid = 0;
4077 			}
4078 			mutex_exit(&ldcp->cblock);
4079 		}
4080 	}
4081 
4082 	DBG1(vgenp, ldcp, "exit\n");
4083 }
4084 
4085 /* vgen handshake functions */
4086 
4087 /* change the hphase for the channel to the next phase */
4088 static vgen_ldc_t *
4089 vh_nextphase(vgen_ldc_t *ldcp)
4090 {
4091 	if (ldcp->hphase == VH_PHASE3) {
4092 		ldcp->hphase = VH_DONE;
4093 	} else {
4094 		ldcp->hphase++;
4095 	}
4096 	return (ldcp);
4097 }
4098 
4099 /*
4100  * wrapper routine to send the given message over ldc using ldc_write().
4101  */
4102 static int
4103 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
4104     boolean_t caller_holds_lock)
4105 {
4106 	int			rv;
4107 	size_t			len;
4108 	uint32_t		retries = 0;
4109 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4110 	vio_msg_tag_t		*tagp = (vio_msg_tag_t *)msg;
4111 	vio_dring_msg_t		*dmsg;
4112 	vio_raw_data_msg_t	*rmsg;
4113 	boolean_t		data_msg = B_FALSE;
4114 
4115 	len = msglen;
4116 	if ((len == 0) || (msg == NULL))
4117 		return (VGEN_FAILURE);
4118 
4119 	if (!caller_holds_lock) {
4120 		mutex_enter(&ldcp->wrlock);
4121 	}
4122 
4123 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
4124 		if (tagp->vio_subtype_env == VIO_DRING_DATA) {
4125 			dmsg = (vio_dring_msg_t *)tagp;
4126 			dmsg->seq_num = ldcp->next_txseq;
4127 			data_msg = B_TRUE;
4128 		} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
4129 			rmsg = (vio_raw_data_msg_t *)tagp;
4130 			rmsg->seq_num = ldcp->next_txseq;
4131 			data_msg = B_TRUE;
4132 		}
4133 	}
4134 
4135 	do {
4136 		len = msglen;
4137 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
4138 		if (retries++ >= vgen_ldcwr_retries)
4139 			break;
4140 	} while (rv == EWOULDBLOCK);
4141 
4142 	if (rv == 0 && data_msg == B_TRUE) {
4143 		ldcp->next_txseq++;
4144 	}
4145 
4146 	if (!caller_holds_lock) {
4147 		mutex_exit(&ldcp->wrlock);
4148 	}
4149 
4150 	if (rv != 0) {
4151 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n",
4152 		    rv, msglen);
4153 		return (rv);
4154 	}
4155 
4156 	if (len != msglen) {
4157 		DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
4158 		    rv, msglen);
4159 		return (VGEN_FAILURE);
4160 	}
4161 
4162 	return (VGEN_SUCCESS);
4163 }
4164 
4165 /* send version negotiate message to the peer over ldc */
4166 static int
4167 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
4168 {
4169 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4170 	vio_ver_msg_t	vermsg;
4171 	vio_msg_tag_t	*tagp = &vermsg.tag;
4172 	int		rv;
4173 
4174 	bzero(&vermsg, sizeof (vermsg));
4175 
4176 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4177 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4178 	tagp->vio_subtype_env = VIO_VER_INFO;
4179 	tagp->vio_sid = ldcp->local_sid;
4180 
4181 	/* get version msg payload from ldcp->local */
4182 	vermsg.ver_major = ldcp->local_hparams.ver_major;
4183 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
4184 	vermsg.dev_class = ldcp->local_hparams.dev_class;
4185 
4186 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
4187 	if (rv != VGEN_SUCCESS) {
4188 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4189 		return (rv);
4190 	}
4191 
4192 	ldcp->hstate |= VER_INFO_SENT;
4193 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
4194 	    vermsg.ver_major, vermsg.ver_minor);
4195 
4196 	return (VGEN_SUCCESS);
4197 }
4198 
4199 /* send attr info message to the peer over ldc */
4200 static int
4201 vgen_send_attr_info(vgen_ldc_t *ldcp)
4202 {
4203 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4204 	vnet_attr_msg_t	attrmsg;
4205 	vio_msg_tag_t	*tagp = &attrmsg.tag;
4206 	int		rv;
4207 
4208 	bzero(&attrmsg, sizeof (attrmsg));
4209 
4210 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4211 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4212 	tagp->vio_subtype_env = VIO_ATTR_INFO;
4213 	tagp->vio_sid = ldcp->local_sid;
4214 
4215 	/* get attr msg payload from ldcp->local */
4216 	attrmsg.mtu = ldcp->local_hparams.mtu;
4217 	attrmsg.addr = ldcp->local_hparams.addr;
4218 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
4219 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
4220 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
4221 
4222 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
4223 	if (rv != VGEN_SUCCESS) {
4224 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4225 		return (rv);
4226 	}
4227 
4228 	ldcp->hstate |= ATTR_INFO_SENT;
4229 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
4230 
4231 	return (VGEN_SUCCESS);
4232 }
4233 
4234 /* send descriptor ring register message to the peer over ldc */
4235 static int
4236 vgen_send_dring_reg(vgen_ldc_t *ldcp)
4237 {
4238 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
4239 	vio_dring_reg_msg_t	msg;
4240 	vio_msg_tag_t		*tagp = &msg.tag;
4241 	int		rv;
4242 
4243 	bzero(&msg, sizeof (msg));
4244 
4245 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4246 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4247 	tagp->vio_subtype_env = VIO_DRING_REG;
4248 	tagp->vio_sid = ldcp->local_sid;
4249 
4250 	/* get dring info msg payload from ldcp->local */
4251 	bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
4252 	    sizeof (ldc_mem_cookie_t));
4253 	msg.ncookies = ldcp->local_hparams.num_dcookies;
4254 	msg.num_descriptors = ldcp->local_hparams.num_desc;
4255 	msg.descriptor_size = ldcp->local_hparams.desc_size;
4256 
4257 	/*
4258 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4259 	 * value and sends it in the ack, which is saved in
4260 	 * vgen_handle_dring_reg().
4261 	 */
4262 	msg.dring_ident = 0;
4263 
4264 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
4265 	if (rv != VGEN_SUCCESS) {
4266 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4267 		return (rv);
4268 	}
4269 
4270 	ldcp->hstate |= DRING_INFO_SENT;
4271 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
4272 
4273 	return (VGEN_SUCCESS);
4274 }
4275 
4276 static int
4277 vgen_send_rdx_info(vgen_ldc_t *ldcp)
4278 {
4279 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4280 	vio_rdx_msg_t	rdxmsg;
4281 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
4282 	int		rv;
4283 
4284 	bzero(&rdxmsg, sizeof (rdxmsg));
4285 
4286 	tagp->vio_msgtype = VIO_TYPE_CTRL;
4287 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4288 	tagp->vio_subtype_env = VIO_RDX;
4289 	tagp->vio_sid = ldcp->local_sid;
4290 
4291 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
4292 	if (rv != VGEN_SUCCESS) {
4293 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4294 		return (rv);
4295 	}
4296 
4297 	ldcp->hstate |= RDX_INFO_SENT;
4298 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
4299 
4300 	return (VGEN_SUCCESS);
4301 }
4302 
4303 /* send descriptor ring data message to the peer over ldc */
4304 static int
4305 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
4306 {
4307 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4308 	vio_dring_msg_t	dringmsg, *msgp = &dringmsg;
4309 	vio_msg_tag_t	*tagp = &msgp->tag;
4310 	vgen_stats_t	*statsp = &ldcp->stats;
4311 	int		rv;
4312 
4313 	bzero(msgp, sizeof (*msgp));
4314 
4315 	tagp->vio_msgtype = VIO_TYPE_DATA;
4316 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
4317 	tagp->vio_subtype_env = VIO_DRING_DATA;
4318 	tagp->vio_sid = ldcp->local_sid;
4319 
4320 	msgp->dring_ident = ldcp->local_hparams.dring_ident;
4321 	msgp->start_idx = start;
4322 	msgp->end_idx = end;
4323 
4324 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
4325 	if (rv != VGEN_SUCCESS) {
4326 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
4327 		return (rv);
4328 	}
4329 
4330 	statsp->dring_data_msgs++;
4331 
4332 	DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
4333 
4334 	return (VGEN_SUCCESS);
4335 }
4336 
4337 /* send multicast addr info message to vsw */
4338 static int
4339 vgen_send_mcast_info(vgen_ldc_t *ldcp)
4340 {
4341 	vnet_mcast_msg_t	mcastmsg;
4342 	vnet_mcast_msg_t	*msgp;
4343 	vio_msg_tag_t		*tagp;
4344 	vgen_t			*vgenp;
4345 	struct ether_addr	*mca;
4346 	int			rv;
4347 	int			i;
4348 	uint32_t		size;
4349 	uint32_t		mccount;
4350 	uint32_t		n;
4351 
4352 	msgp = &mcastmsg;
4353 	tagp = &msgp->tag;
4354 	vgenp = LDC_TO_VGEN(ldcp);
4355 
4356 	mccount = vgenp->mccount;
4357 	i = 0;
4358 
4359 	do {
4360 		tagp->vio_msgtype = VIO_TYPE_CTRL;
4361 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
4362 		tagp->vio_subtype_env = VNET_MCAST_INFO;
4363 		tagp->vio_sid = ldcp->local_sid;
4364 
4365 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
4366 		size = n * sizeof (struct ether_addr);
4367 
4368 		mca = &(vgenp->mctab[i]);
4369 		bcopy(mca, (msgp->mca), size);
4370 		msgp->set = B_TRUE;
4371 		msgp->count = n;
4372 
4373 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
4374 		    B_FALSE);
4375 		if (rv != VGEN_SUCCESS) {
4376 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
4377 			return (rv);
4378 		}
4379 
4380 		mccount -= n;
4381 		i += n;
4382 
4383 	} while (mccount);
4384 
4385 	return (VGEN_SUCCESS);
4386 }
4387 
4388 /* Initiate Phase 2 of handshake */
4389 static int
4390 vgen_handshake_phase2(vgen_ldc_t *ldcp)
4391 {
4392 	int rv;
4393 	uint32_t ncookies = 0;
4394 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4395 
4396 #ifdef DEBUG
4397 	if (vgen_hdbg & HDBG_OUT_STATE) {
4398 		/* simulate out of state condition */
4399 		vgen_hdbg &= ~(HDBG_OUT_STATE);
4400 		rv = vgen_send_rdx_info(ldcp);
4401 		return (rv);
4402 	}
4403 	if (vgen_hdbg & HDBG_TIMEOUT) {
4404 		/* simulate timeout condition */
4405 		vgen_hdbg &= ~(HDBG_TIMEOUT);
4406 		return (VGEN_SUCCESS);
4407 	}
4408 #endif
4409 	rv = vgen_send_attr_info(ldcp);
4410 	if (rv != VGEN_SUCCESS) {
4411 		return (rv);
4412 	}
4413 
4414 	/* Bind descriptor ring to the channel */
4415 	if (ldcp->num_txdcookies == 0) {
4416 		rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
4417 		    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
4418 		    &ldcp->tx_dcookie, &ncookies);
4419 		if (rv != 0) {
4420 			DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
4421 			    "rv(%x)\n", rv);
4422 			return (rv);
4423 		}
4424 		ASSERT(ncookies == 1);
4425 		ldcp->num_txdcookies = ncookies;
4426 	}
4427 
4428 	/* update local dring_info params */
4429 	bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
4430 	    sizeof (ldc_mem_cookie_t));
4431 	ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
4432 	ldcp->local_hparams.num_desc = ldcp->num_txds;
4433 	ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
4434 
4435 	rv = vgen_send_dring_reg(ldcp);
4436 	if (rv != VGEN_SUCCESS) {
4437 		return (rv);
4438 	}
4439 
4440 	return (VGEN_SUCCESS);
4441 }
4442 
4443 /*
4444  * Set vnet-protocol-version dependent functions based on version.
4445  */
4446 static void
4447 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
4448 {
4449 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4450 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4451 
4452 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4453 		/*
4454 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
4455 		 * Support), set the mtu in our attributes to max_frame_size.
4456 		 */
4457 		lp->mtu = vgenp->max_frame_size;
4458 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
4459 		/*
4460 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
4461 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
4462 		 */
4463 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
4464 	} else {
4465 		vgen_port_t	*portp = ldcp->portp;
4466 		vnet_t		*vnetp = vgenp->vnetp;
4467 		/*
4468 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
4469 		 * We can negotiate that size with those peers provided the
4470 		 * following conditions are true:
4471 		 * - Only pvid is defined for our peer and there are no vids.
4472 		 * - pvids are equal.
4473 		 * If the above conditions are true, then we can send/recv only
4474 		 * untagged frames of max size ETHERMAX.
4475 		 */
4476 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
4477 			lp->mtu = ETHERMAX;
4478 		}
4479 	}
4480 
4481 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {
4482 		/* Versions >= 1.2 */
4483 
4484 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
4485 			/*
4486 			 * enable priority routines and pkt mode only if
4487 			 * at least one pri-eth-type is specified in MD.
4488 			 */
4489 
4490 			ldcp->tx = vgen_ldcsend;
4491 			ldcp->rx_pktdata = vgen_handle_pkt_data;
4492 
4493 			/* set xfer mode for vgen_send_attr_info() */
4494 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
4495 
4496 		} else {
4497 			/* no priority eth types defined in MD */
4498 
4499 			ldcp->tx = vgen_ldcsend_dring;
4500 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4501 
4502 			/* set xfer mode for vgen_send_attr_info() */
4503 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
4504 
4505 		}
4506 	} else {
4507 		/* Versions prior to 1.2  */
4508 
4509 		vgen_reset_vnet_proto_ops(ldcp);
4510 	}
4511 }
4512 
4513 /*
4514  * Reset vnet-protocol-version dependent functions to pre-v1.2.
4515  */
4516 static void
4517 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
4518 {
4519 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4520 
4521 	ldcp->tx = vgen_ldcsend_dring;
4522 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
4523 
4524 	/* set xfer mode for vgen_send_attr_info() */
4525 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
4526 }
4527 
4528 static void
4529 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
4530 {
4531 	vgen_ldclist_t	*ldclp;
4532 	vgen_ldc_t	*ldcp;
4533 	vgen_t		*vgenp = portp->vgenp;
4534 	vnet_t		*vnetp = vgenp->vnetp;
4535 
4536 	ldclp = &portp->ldclist;
4537 
4538 	READ_ENTER(&ldclp->rwlock);
4539 
4540 	/*
4541 	 * NOTE: for now, we will assume we have a single channel.
4542 	 */
4543 	if (ldclp->headp == NULL) {
4544 		RW_EXIT(&ldclp->rwlock);
4545 		return;
4546 	}
4547 	ldcp = ldclp->headp;
4548 
4549 	mutex_enter(&ldcp->cblock);
4550 
4551 	/*
4552 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
4553 	 * the connection. See comments in vgen_set_vnet_proto_ops().
4554 	 */
4555 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
4556 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
4557 		ldcp->need_ldc_reset = B_TRUE;
4558 		vgen_handshake_retry(ldcp);
4559 	}
4560 
4561 	mutex_exit(&ldcp->cblock);
4562 
4563 	RW_EXIT(&ldclp->rwlock);
4564 }
4565 
4566 static void
4567 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
4568 {
4569 	vgen_port_t	*portp;
4570 	vgen_portlist_t	*plistp;
4571 
4572 	plistp = &(vgenp->vgenports);
4573 	READ_ENTER(&plistp->rwlock);
4574 
4575 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
4576 
4577 		vgen_vlan_unaware_port_reset(portp);
4578 
4579 	}
4580 
4581 	RW_EXIT(&plistp->rwlock);
4582 }
4583 
4584 /*
4585  * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
4586  * This can happen after a channel comes up (status: LDC_UP) or
4587  * when handshake gets terminated due to various conditions.
4588  */
4589 static void
4590 vgen_reset_hphase(vgen_ldc_t *ldcp)
4591 {
4592 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4593 	ldc_status_t istatus;
4594 	int rv;
4595 
4596 	DBG1(vgenp, ldcp, "enter\n");
4597 	/* reset hstate and hphase */
4598 	ldcp->hstate = 0;
4599 	ldcp->hphase = VH_PHASE0;
4600 
4601 	vgen_reset_vnet_proto_ops(ldcp);
4602 
4603 	/*
4604 	 * Save the id of pending handshake timer in cancel_htid.
4605 	 * This will be checked in vgen_ldc_cb() and the handshake timer will
4606 	 * be cancelled after releasing cblock.
4607 	 */
4608 	if (ldcp->htid) {
4609 		ldcp->cancel_htid = ldcp->htid;
4610 		ldcp->htid = 0;
4611 	}
4612 
4613 	if (ldcp->local_hparams.dring_ready) {
4614 		ldcp->local_hparams.dring_ready = B_FALSE;
4615 	}
4616 
4617 	/* Unbind tx descriptor ring from the channel */
4618 	if (ldcp->num_txdcookies) {
4619 		rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
4620 		if (rv != 0) {
4621 			DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n");
4622 		}
4623 		ldcp->num_txdcookies = 0;
4624 	}
4625 
4626 	if (ldcp->peer_hparams.dring_ready) {
4627 		ldcp->peer_hparams.dring_ready = B_FALSE;
4628 		/* Unmap peer's dring */
4629 		(void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
4630 		vgen_clobber_rxds(ldcp);
4631 	}
4632 
4633 	vgen_clobber_tbufs(ldcp);
4634 
4635 	/*
4636 	 * clear local handshake params and initialize.
4637 	 */
4638 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
4639 
4640 	/* set version to the highest version supported */
4641 	ldcp->local_hparams.ver_major =
4642 	    ldcp->vgen_versions[0].ver_major;
4643 	ldcp->local_hparams.ver_minor =
4644 	    ldcp->vgen_versions[0].ver_minor;
4645 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
4646 
4647 	/* set attr_info params */
4648 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
4649 	ldcp->local_hparams.addr =
4650 	    vnet_macaddr_strtoul(vgenp->macaddr);
4651 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
4652 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
4653 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
4654 
4655 	/*
4656 	 * Note: dring is created, but not bound yet.
4657 	 * local dring_info params will be updated when we bind the dring in
4658 	 * vgen_handshake_phase2().
4659 	 * dring_ident is set to 0. After mapping the dring, peer sets this
4660 	 * value and sends it in the ack, which is saved in
4661 	 * vgen_handle_dring_reg().
4662 	 */
4663 	ldcp->local_hparams.dring_ident = 0;
4664 
4665 	/* clear peer_hparams */
4666 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
4667 
4668 	/* reset the channel if required */
4669 	if (ldcp->need_ldc_reset) {
4670 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
4671 		ldcp->need_ldc_reset = B_FALSE;
4672 		(void) ldc_down(ldcp->ldc_handle);
4673 		(void) ldc_status(ldcp->ldc_handle, &istatus);
4674 		DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus);
4675 		ldcp->ldc_status = istatus;
4676 
4677 		/* clear sids */
4678 		ldcp->local_sid = 0;
4679 		ldcp->peer_sid = 0;
4680 
4681 		/* try to bring the channel up */
4682 		rv = ldc_up(ldcp->ldc_handle);
4683 		if (rv != 0) {
4684 			DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
4685 		}
4686 
4687 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4688 			DWARN(vgenp, ldcp, "ldc_status err\n");
4689 		} else {
4690 			ldcp->ldc_status = istatus;
4691 		}
4692 	}
4693 }
4694 
4695 /* wrapper function for vgen_reset_hphase */
4696 static void
4697 vgen_handshake_reset(vgen_ldc_t *ldcp)
4698 {
4699 	ASSERT(MUTEX_HELD(&ldcp->cblock));
4700 	mutex_enter(&ldcp->rxlock);
4701 	mutex_enter(&ldcp->wrlock);
4702 	mutex_enter(&ldcp->txlock);
4703 	mutex_enter(&ldcp->tclock);
4704 
4705 	vgen_reset_hphase(ldcp);
4706 
4707 	mutex_exit(&ldcp->tclock);
4708 	mutex_exit(&ldcp->txlock);
4709 	mutex_exit(&ldcp->wrlock);
4710 	mutex_exit(&ldcp->rxlock);
4711 }
4712 
4713 /*
4714  * Initiate handshake with the peer by sending various messages
4715  * based on the handshake-phase that the channel is currently in.
4716  */
4717 static void
4718 vgen_handshake(vgen_ldc_t *ldcp)
4719 {
4720 	uint32_t hphase = ldcp->hphase;
4721 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
4722 	ldc_status_t	istatus;
4723 	int	rv = 0;
4724 
4725 	switch (hphase) {
4726 
4727 	case VH_PHASE1:
4728 
4729 		/*
4730 		 * start timer, for entire handshake process, turn this timer
4731 		 * off if all phases of handshake complete successfully and
4732 		 * hphase goes to VH_DONE(below) or
4733 		 * vgen_reset_hphase() gets called or
4734 		 * channel is reset due to errors or
4735 		 * vgen_ldc_uninit() is invoked(vgen_stop).
4736 		 */
4737 		ASSERT(ldcp->htid == 0);
4738 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4739 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4740 
4741 		/* Phase 1 involves negotiating the version */
4742 		rv = vgen_send_version_negotiate(ldcp);
4743 		break;
4744 
4745 	case VH_PHASE2:
4746 		rv = vgen_handshake_phase2(ldcp);
4747 		break;
4748 
4749 	case VH_PHASE3:
4750 		rv = vgen_send_rdx_info(ldcp);
4751 		break;
4752 
4753 	case VH_DONE:
4754 		/*
4755 		 * Save the id of pending handshake timer in cancel_htid.
4756 		 * This will be checked in vgen_ldc_cb() and the handshake
4757 		 * timer will be cancelled after releasing cblock.
4758 		 */
4759 		if (ldcp->htid) {
4760 			ldcp->cancel_htid = ldcp->htid;
4761 			ldcp->htid = 0;
4762 		}
4763 		ldcp->hretries = 0;
4764 		DBG1(vgenp, ldcp, "Handshake Done\n");
4765 
4766 		if (ldcp->portp == vgenp->vsw_portp) {
4767 			/*
4768 			 * If this channel(port) is connected to vsw,
4769 			 * need to sync multicast table with vsw.
4770 			 */
4771 			mutex_exit(&ldcp->cblock);
4772 
4773 			mutex_enter(&vgenp->lock);
4774 			rv = vgen_send_mcast_info(ldcp);
4775 			mutex_exit(&vgenp->lock);
4776 
4777 			mutex_enter(&ldcp->cblock);
4778 			if (rv != VGEN_SUCCESS)
4779 				break;
4780 		}
4781 
4782 		/*
4783 		 * Check if mac layer should be notified to restart
4784 		 * transmissions. This can happen if the channel got
4785 		 * reset and vgen_clobber_tbufs() is called, while
4786 		 * need_resched is set.
4787 		 */
4788 		mutex_enter(&ldcp->tclock);
4789 		if (ldcp->need_resched) {
4790 			vio_net_tx_update_t vtx_update =
4791 			    ldcp->portp->vcb.vio_net_tx_update;
4792 
4793 			ldcp->need_resched = B_FALSE;
4794 			vtx_update(ldcp->portp->vhp);
4795 		}
4796 		mutex_exit(&ldcp->tclock);
4797 
4798 		break;
4799 
4800 	default:
4801 		break;
4802 	}
4803 
4804 	if (rv == ECONNRESET) {
4805 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4806 			DWARN(vgenp, ldcp, "ldc_status err\n");
4807 		} else {
4808 			ldcp->ldc_status = istatus;
4809 		}
4810 		vgen_handle_evt_reset(ldcp);
4811 	} else if (rv) {
4812 		vgen_handshake_reset(ldcp);
4813 	}
4814 }
4815 
4816 /*
4817  * Check if the current handshake phase has completed successfully and
4818  * return the status.
4819  */
4820 static int
4821 vgen_handshake_done(vgen_ldc_t *ldcp)
4822 {
4823 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4824 	uint32_t	hphase = ldcp->hphase;
4825 	int 		status = 0;
4826 
4827 	switch (hphase) {
4828 
4829 	case VH_PHASE1:
4830 		/*
4831 		 * Phase1 is done, if version negotiation
4832 		 * completed successfully.
4833 		 */
4834 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4835 		    VER_NEGOTIATED);
4836 		break;
4837 
4838 	case VH_PHASE2:
4839 		/*
4840 		 * Phase 2 is done, if attr info and dring info
4841 		 * have been exchanged successfully.
4842 		 */
4843 		status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4844 		    ATTR_INFO_EXCHANGED) &&
4845 		    ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4846 		    DRING_INFO_EXCHANGED));
4847 		break;
4848 
4849 	case VH_PHASE3:
4850 		/* Phase 3 is done, if rdx msg has been exchanged */
4851 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4852 		    RDX_EXCHANGED);
4853 		break;
4854 
4855 	default:
4856 		break;
4857 	}
4858 
4859 	if (status == 0) {
4860 		return (VGEN_FAILURE);
4861 	}
4862 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4863 	return (VGEN_SUCCESS);
4864 }
4865 
4866 /* retry handshake on failure */
4867 static void
4868 vgen_handshake_retry(vgen_ldc_t *ldcp)
4869 {
4870 	/* reset handshake phase */
4871 	vgen_handshake_reset(ldcp);
4872 
4873 	/* handshake retry is specified and the channel is UP */
4874 	if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) {
4875 		if (ldcp->hretries++ < vgen_max_hretries) {
4876 			ldcp->local_sid = ddi_get_lbolt();
4877 			vgen_handshake(vh_nextphase(ldcp));
4878 		}
4879 	}
4880 }
4881 
4882 /*
4883  * Handle a version info msg from the peer or an ACK/NACK from the peer
4884  * to a version info msg that we sent.
4885  */
4886 static int
4887 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4888 {
4889 	vgen_t		*vgenp;
4890 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4891 	int		ack = 0;
4892 	int		failed = 0;
4893 	int		idx;
4894 	vgen_ver_t	*versions = ldcp->vgen_versions;
4895 	int		rv = 0;
4896 
4897 	vgenp = LDC_TO_VGEN(ldcp);
4898 	DBG1(vgenp, ldcp, "enter\n");
4899 	switch (tagp->vio_subtype) {
4900 	case VIO_SUBTYPE_INFO:
4901 
4902 		/*  Cache sid of peer if this is the first time */
4903 		if (ldcp->peer_sid == 0) {
4904 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4905 			    tagp->vio_sid);
4906 			ldcp->peer_sid = tagp->vio_sid;
4907 		}
4908 
4909 		if (ldcp->hphase != VH_PHASE1) {
4910 			/*
4911 			 * If we are not already in VH_PHASE1, reset to
4912 			 * pre-handshake state, and initiate handshake
4913 			 * to the peer too.
4914 			 */
4915 			vgen_handshake_reset(ldcp);
4916 			vgen_handshake(vh_nextphase(ldcp));
4917 		}
4918 		ldcp->hstate |= VER_INFO_RCVD;
4919 
4920 		/* save peer's requested values */
4921 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4922 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4923 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4924 
4925 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4926 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4927 			/* unsupported dev_class, send NACK */
4928 
4929 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4930 
4931 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4932 			tagp->vio_sid = ldcp->local_sid;
4933 			/* send reply msg back to peer */
4934 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4935 			    sizeof (*vermsg), B_FALSE);
4936 			if (rv != VGEN_SUCCESS) {
4937 				return (rv);
4938 			}
4939 			return (VGEN_FAILURE);
4940 		}
4941 
4942 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4943 		    vermsg->ver_major,  vermsg->ver_minor);
4944 
4945 		idx = 0;
4946 
4947 		for (;;) {
4948 
4949 			if (vermsg->ver_major > versions[idx].ver_major) {
4950 
4951 				/* nack with next lower version */
4952 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4953 				vermsg->ver_major = versions[idx].ver_major;
4954 				vermsg->ver_minor = versions[idx].ver_minor;
4955 				break;
4956 			}
4957 
4958 			if (vermsg->ver_major == versions[idx].ver_major) {
4959 
4960 				/* major version match - ACK version */
4961 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4962 				ack = 1;
4963 
4964 				/*
4965 				 * lower minor version to the one this endpt
4966 				 * supports, if necessary
4967 				 */
4968 				if (vermsg->ver_minor >
4969 				    versions[idx].ver_minor) {
4970 					vermsg->ver_minor =
4971 					    versions[idx].ver_minor;
4972 					ldcp->peer_hparams.ver_minor =
4973 					    versions[idx].ver_minor;
4974 				}
4975 				break;
4976 			}
4977 
4978 			idx++;
4979 
4980 			if (idx == VGEN_NUM_VER) {
4981 
4982 				/* no version match - send NACK */
4983 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4984 				vermsg->ver_major = 0;
4985 				vermsg->ver_minor = 0;
4986 				failed = 1;
4987 				break;
4988 			}
4989 
4990 		}
4991 
4992 		tagp->vio_sid = ldcp->local_sid;
4993 
4994 		/* send reply msg back to peer */
4995 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4996 		    B_FALSE);
4997 		if (rv != VGEN_SUCCESS) {
4998 			return (rv);
4999 		}
5000 
5001 		if (ack) {
5002 			ldcp->hstate |= VER_ACK_SENT;
5003 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
5004 			    vermsg->ver_major, vermsg->ver_minor);
5005 		}
5006 		if (failed) {
5007 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
5008 			return (VGEN_FAILURE);
5009 		}
5010 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5011 
5012 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5013 
5014 			/* local and peer versions match? */
5015 			ASSERT((ldcp->local_hparams.ver_major ==
5016 			    ldcp->peer_hparams.ver_major) &&
5017 			    (ldcp->local_hparams.ver_minor ==
5018 			    ldcp->peer_hparams.ver_minor));
5019 
5020 			vgen_set_vnet_proto_ops(ldcp);
5021 
5022 			/* move to the next phase */
5023 			vgen_handshake(vh_nextphase(ldcp));
5024 		}
5025 
5026 		break;
5027 
5028 	case VIO_SUBTYPE_ACK:
5029 
5030 		if (ldcp->hphase != VH_PHASE1) {
5031 			/*  This should not happen. */
5032 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
5033 			return (VGEN_FAILURE);
5034 		}
5035 
5036 		/* SUCCESS - we have agreed on a version */
5037 		ldcp->local_hparams.ver_major = vermsg->ver_major;
5038 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
5039 		ldcp->hstate |= VER_ACK_RCVD;
5040 
5041 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
5042 		    vermsg->ver_major,  vermsg->ver_minor);
5043 
5044 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5045 
5046 			/*  VER_ACK_SENT and VER_ACK_RCVD */
5047 
5048 			/* local and peer versions match? */
5049 			ASSERT((ldcp->local_hparams.ver_major ==
5050 			    ldcp->peer_hparams.ver_major) &&
5051 			    (ldcp->local_hparams.ver_minor ==
5052 			    ldcp->peer_hparams.ver_minor));
5053 
5054 			vgen_set_vnet_proto_ops(ldcp);
5055 
5056 			/* move to the next phase */
5057 			vgen_handshake(vh_nextphase(ldcp));
5058 		}
5059 		break;
5060 
5061 	case VIO_SUBTYPE_NACK:
5062 
5063 		if (ldcp->hphase != VH_PHASE1) {
5064 			/*  This should not happen.  */
5065 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
5066 			"Phase(%u)\n", ldcp->hphase);
5067 			return (VGEN_FAILURE);
5068 		}
5069 
5070 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
5071 		    vermsg->ver_major, vermsg->ver_minor);
5072 
5073 		/* check if version in NACK is zero */
5074 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
5075 			/*
5076 			 * Version Negotiation has failed.
5077 			 */
5078 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
5079 			return (VGEN_FAILURE);
5080 		}
5081 
5082 		idx = 0;
5083 
5084 		for (;;) {
5085 
5086 			if (vermsg->ver_major > versions[idx].ver_major) {
5087 				/* select next lower version */
5088 
5089 				ldcp->local_hparams.ver_major =
5090 				    versions[idx].ver_major;
5091 				ldcp->local_hparams.ver_minor =
5092 				    versions[idx].ver_minor;
5093 				break;
5094 			}
5095 
5096 			if (vermsg->ver_major == versions[idx].ver_major) {
5097 				/* major version match */
5098 
5099 				ldcp->local_hparams.ver_major =
5100 				    versions[idx].ver_major;
5101 
5102 				ldcp->local_hparams.ver_minor =
5103 				    versions[idx].ver_minor;
5104 				break;
5105 			}
5106 
5107 			idx++;
5108 
5109 			if (idx == VGEN_NUM_VER) {
5110 				/*
5111 				 * no version match.
5112 				 * Version Negotiation has failed.
5113 				 */
5114 				DWARN(vgenp, ldcp,
5115 				    "Version Negotiation Failed\n");
5116 				return (VGEN_FAILURE);
5117 			}
5118 
5119 		}
5120 
5121 		rv = vgen_send_version_negotiate(ldcp);
5122 		if (rv != VGEN_SUCCESS) {
5123 			return (rv);
5124 		}
5125 
5126 		break;
5127 	}
5128 
5129 	DBG1(vgenp, ldcp, "exit\n");
5130 	return (VGEN_SUCCESS);
5131 }
5132 
5133 /* Check if the attributes are supported */
5134 static int
5135 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
5136 {
5137 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5138 
5139 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
5140 	    (msg->ack_freq > 64) ||
5141 	    (msg->xfer_mode != lp->xfer_mode)) {
5142 		return (VGEN_FAILURE);
5143 	}
5144 
5145 	if (VGEN_VER_LT(ldcp, 1, 4)) {
5146 		/* versions < 1.4, mtu must match */
5147 		if (msg->mtu != lp->mtu) {
5148 			return (VGEN_FAILURE);
5149 		}
5150 	} else {
5151 		/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
5152 		if (msg->mtu < ETHERMAX) {
5153 			return (VGEN_FAILURE);
5154 		}
5155 	}
5156 
5157 	return (VGEN_SUCCESS);
5158 }
5159 
5160 /*
5161  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
5162  * to an attr info msg that we sent.
5163  */
5164 static int
5165 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5166 {
5167 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5168 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
5169 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5170 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
5171 	int		ack = 1;
5172 	int		rv = 0;
5173 	uint32_t	mtu;
5174 
5175 	DBG1(vgenp, ldcp, "enter\n");
5176 	if (ldcp->hphase != VH_PHASE2) {
5177 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
5178 		" Invalid Phase(%u)\n",
5179 		    tagp->vio_subtype, ldcp->hphase);
5180 		return (VGEN_FAILURE);
5181 	}
5182 	switch (tagp->vio_subtype) {
5183 	case VIO_SUBTYPE_INFO:
5184 
5185 		DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n");
5186 		ldcp->hstate |= ATTR_INFO_RCVD;
5187 
5188 		/* save peer's values */
5189 		rp->mtu = msg->mtu;
5190 		rp->addr = msg->addr;
5191 		rp->addr_type = msg->addr_type;
5192 		rp->xfer_mode = msg->xfer_mode;
5193 		rp->ack_freq = msg->ack_freq;
5194 
5195 		rv = vgen_check_attr_info(ldcp, msg);
5196 		if (rv == VGEN_FAILURE) {
5197 			/* unsupported attr, send NACK */
5198 			ack = 0;
5199 		} else {
5200 
5201 			if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5202 
5203 				/*
5204 				 * Versions >= 1.4:
5205 				 * The mtu is negotiated down to the
5206 				 * minimum of our mtu and peer's mtu.
5207 				 */
5208 				mtu = MIN(msg->mtu, vgenp->max_frame_size);
5209 
5210 				/*
5211 				 * If we have received an ack for the attr info
5212 				 * that we sent, then check if the mtu computed
5213 				 * above matches the mtu that the peer had ack'd
5214 				 * (saved in local hparams). If they don't
5215 				 * match, we fail the handshake.
5216 				 */
5217 				if (ldcp->hstate & ATTR_ACK_RCVD) {
5218 					if (mtu != lp->mtu) {
5219 						/* send NACK */
5220 						ack = 0;
5221 					}
5222 				} else {
5223 					/*
5224 					 * Save the mtu computed above in our
5225 					 * attr parameters, so it gets sent in
5226 					 * the attr info from us to the peer.
5227 					 */
5228 					lp->mtu = mtu;
5229 				}
5230 
5231 				/* save the MIN mtu in the msg to be replied */
5232 				msg->mtu = mtu;
5233 
5234 			}
5235 		}
5236 
5237 
5238 		if (ack) {
5239 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5240 		} else {
5241 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5242 		}
5243 		tagp->vio_sid = ldcp->local_sid;
5244 
5245 		/* send reply msg back to peer */
5246 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5247 		    B_FALSE);
5248 		if (rv != VGEN_SUCCESS) {
5249 			return (rv);
5250 		}
5251 
5252 		if (ack) {
5253 			ldcp->hstate |= ATTR_ACK_SENT;
5254 			DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
5255 		} else {
5256 			/* failed */
5257 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n");
5258 			return (VGEN_FAILURE);
5259 		}
5260 
5261 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5262 			vgen_handshake(vh_nextphase(ldcp));
5263 		}
5264 
5265 		break;
5266 
5267 	case VIO_SUBTYPE_ACK:
5268 
5269 		if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
5270 			/*
5271 			 * Versions >= 1.4:
5272 			 * The ack msg sent by the peer contains the minimum of
5273 			 * our mtu (that we had sent in our attr info) and the
5274 			 * peer's mtu.
5275 			 *
5276 			 * If we have sent an ack for the attr info msg from
5277 			 * the peer, check if the mtu that was computed then
5278 			 * (saved in local hparams) matches the mtu that the
5279 			 * peer has ack'd. If they don't match, we fail the
5280 			 * handshake.
5281 			 */
5282 			if (ldcp->hstate & ATTR_ACK_SENT) {
5283 				if (lp->mtu != msg->mtu) {
5284 					return (VGEN_FAILURE);
5285 				}
5286 			} else {
5287 				/*
5288 				 * If the mtu ack'd by the peer is > our mtu
5289 				 * fail handshake. Otherwise, save the mtu, so
5290 				 * we can validate it when we receive attr info
5291 				 * from our peer.
5292 				 */
5293 				if (msg->mtu > lp->mtu) {
5294 					return (VGEN_FAILURE);
5295 				}
5296 				if (msg->mtu <= lp->mtu) {
5297 					lp->mtu = msg->mtu;
5298 				}
5299 			}
5300 		}
5301 
5302 		ldcp->hstate |= ATTR_ACK_RCVD;
5303 
5304 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
5305 
5306 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5307 			vgen_handshake(vh_nextphase(ldcp));
5308 		}
5309 		break;
5310 
5311 	case VIO_SUBTYPE_NACK:
5312 
5313 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
5314 		return (VGEN_FAILURE);
5315 	}
5316 	DBG1(vgenp, ldcp, "exit\n");
5317 	return (VGEN_SUCCESS);
5318 }
5319 
5320 /* Check if the dring info msg is ok */
5321 static int
5322 vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
5323 {
5324 	/* check if msg contents are ok */
5325 	if ((msg->num_descriptors < 128) || (msg->descriptor_size <
5326 	    sizeof (vnet_public_desc_t))) {
5327 		return (VGEN_FAILURE);
5328 	}
5329 	return (VGEN_SUCCESS);
5330 }
5331 
5332 /*
5333  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
5334  * the peer to a dring register msg that we sent.
5335  */
5336 static int
5337 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5338 {
5339 	vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
5340 	ldc_mem_cookie_t dcookie;
5341 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5342 	int ack = 0;
5343 	int rv = 0;
5344 
5345 	DBG1(vgenp, ldcp, "enter\n");
5346 	if (ldcp->hphase < VH_PHASE2) {
5347 		/* dring_info can be rcvd in any of the phases after Phase1 */
5348 		DWARN(vgenp, ldcp,
5349 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
5350 		    tagp->vio_subtype, ldcp->hphase);
5351 		return (VGEN_FAILURE);
5352 	}
5353 	switch (tagp->vio_subtype) {
5354 	case VIO_SUBTYPE_INFO:
5355 
5356 		DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n");
5357 		ldcp->hstate |= DRING_INFO_RCVD;
5358 		bcopy((msg->cookie), &dcookie, sizeof (dcookie));
5359 
5360 		ASSERT(msg->ncookies == 1);
5361 
5362 		if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
5363 			/*
5364 			 * verified dring info msg to be ok,
5365 			 * now try to map the remote dring.
5366 			 */
5367 			rv = vgen_init_rxds(ldcp, msg->num_descriptors,
5368 			    msg->descriptor_size, &dcookie,
5369 			    msg->ncookies);
5370 			if (rv == DDI_SUCCESS) {
5371 				/* now we can ack the peer */
5372 				ack = 1;
5373 			}
5374 		}
5375 		if (ack == 0) {
5376 			/* failed, send NACK */
5377 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
5378 		} else {
5379 			if (!(ldcp->peer_hparams.dring_ready)) {
5380 
5381 				/* save peer's dring_info values */
5382 				bcopy(&dcookie,
5383 				    &(ldcp->peer_hparams.dring_cookie),
5384 				    sizeof (dcookie));
5385 				ldcp->peer_hparams.num_desc =
5386 				    msg->num_descriptors;
5387 				ldcp->peer_hparams.desc_size =
5388 				    msg->descriptor_size;
5389 				ldcp->peer_hparams.num_dcookies =
5390 				    msg->ncookies;
5391 
5392 				/* set dring_ident for the peer */
5393 				ldcp->peer_hparams.dring_ident =
5394 				    (uint64_t)ldcp->rxdp;
5395 				/* return the dring_ident in ack msg */
5396 				msg->dring_ident =
5397 				    (uint64_t)ldcp->rxdp;
5398 
5399 				ldcp->peer_hparams.dring_ready = B_TRUE;
5400 			}
5401 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
5402 		}
5403 		tagp->vio_sid = ldcp->local_sid;
5404 		/* send reply msg back to peer */
5405 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
5406 		    B_FALSE);
5407 		if (rv != VGEN_SUCCESS) {
5408 			return (rv);
5409 		}
5410 
5411 		if (ack) {
5412 			ldcp->hstate |= DRING_ACK_SENT;
5413 			DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5414 		} else {
5415 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5416 			return (VGEN_FAILURE);
5417 		}
5418 
5419 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5420 			vgen_handshake(vh_nextphase(ldcp));
5421 		}
5422 
5423 		break;
5424 
5425 	case VIO_SUBTYPE_ACK:
5426 
5427 		ldcp->hstate |= DRING_ACK_RCVD;
5428 
5429 		DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
5430 
5431 		if (!(ldcp->local_hparams.dring_ready)) {
5432 			/* local dring is now ready */
5433 			ldcp->local_hparams.dring_ready = B_TRUE;
5434 
5435 			/* save dring_ident acked by peer */
5436 			ldcp->local_hparams.dring_ident =
5437 			    msg->dring_ident;
5438 		}
5439 
5440 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5441 			vgen_handshake(vh_nextphase(ldcp));
5442 		}
5443 
5444 		break;
5445 
5446 	case VIO_SUBTYPE_NACK:
5447 
5448 		DBG2(vgenp, ldcp, "DRING_NACK_RCVD");
5449 		return (VGEN_FAILURE);
5450 	}
5451 	DBG1(vgenp, ldcp, "exit\n");
5452 	return (VGEN_SUCCESS);
5453 }
5454 
5455 /*
5456  * Handle a rdx info msg from the peer or an ACK/NACK
5457  * from the peer to a rdx info msg that we sent.
5458  */
5459 static int
5460 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5461 {
5462 	int rv = 0;
5463 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5464 
5465 	DBG1(vgenp, ldcp, "enter\n");
5466 	if (ldcp->hphase != VH_PHASE3) {
5467 		DWARN(vgenp, ldcp,
5468 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5469 		    tagp->vio_subtype, ldcp->hphase);
5470 		return (VGEN_FAILURE);
5471 	}
5472 	switch (tagp->vio_subtype) {
5473 	case VIO_SUBTYPE_INFO:
5474 
5475 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5476 		ldcp->hstate |= RDX_INFO_RCVD;
5477 
5478 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5479 		tagp->vio_sid = ldcp->local_sid;
5480 		/* send reply msg back to peer */
5481 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5482 		    B_FALSE);
5483 		if (rv != VGEN_SUCCESS) {
5484 			return (rv);
5485 		}
5486 
5487 		ldcp->hstate |= RDX_ACK_SENT;
5488 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5489 
5490 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5491 			vgen_handshake(vh_nextphase(ldcp));
5492 		}
5493 
5494 		break;
5495 
5496 	case VIO_SUBTYPE_ACK:
5497 
5498 		ldcp->hstate |= RDX_ACK_RCVD;
5499 
5500 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5501 
5502 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5503 			vgen_handshake(vh_nextphase(ldcp));
5504 		}
5505 		break;
5506 
5507 	case VIO_SUBTYPE_NACK:
5508 
5509 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5510 		return (VGEN_FAILURE);
5511 	}
5512 	DBG1(vgenp, ldcp, "exit\n");
5513 	return (VGEN_SUCCESS);
5514 }
5515 
5516 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5517 static int
5518 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5519 {
5520 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5521 	vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5522 	struct ether_addr *addrp;
5523 	int count;
5524 	int i;
5525 
5526 	DBG1(vgenp, ldcp, "enter\n");
5527 	switch (tagp->vio_subtype) {
5528 
5529 	case VIO_SUBTYPE_INFO:
5530 
5531 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5532 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5533 		break;
5534 
5535 	case VIO_SUBTYPE_ACK:
5536 
5537 		/* success adding/removing multicast addr */
5538 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5539 		break;
5540 
5541 	case VIO_SUBTYPE_NACK:
5542 
5543 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5544 		if (!(msgp->set)) {
5545 			/* multicast remove request failed */
5546 			break;
5547 		}
5548 
5549 		/* multicast add request failed */
5550 		for (count = 0; count < msgp->count; count++) {
5551 			addrp = &(msgp->mca[count]);
5552 
5553 			/* delete address from the table */
5554 			for (i = 0; i < vgenp->mccount; i++) {
5555 				if (ether_cmp(addrp,
5556 				    &(vgenp->mctab[i])) == 0) {
5557 					if (vgenp->mccount > 1) {
5558 						int t = vgenp->mccount - 1;
5559 						vgenp->mctab[i] =
5560 						    vgenp->mctab[t];
5561 					}
5562 					vgenp->mccount--;
5563 					break;
5564 				}
5565 			}
5566 		}
5567 		break;
5568 
5569 	}
5570 	DBG1(vgenp, ldcp, "exit\n");
5571 
5572 	return (VGEN_SUCCESS);
5573 }
5574 
5575 /* handler for control messages received from the peer ldc end-point */
5576 static int
5577 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5578 {
5579 	int rv = 0;
5580 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5581 
5582 	DBG1(vgenp, ldcp, "enter\n");
5583 	switch (tagp->vio_subtype_env) {
5584 
5585 	case VIO_VER_INFO:
5586 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5587 		break;
5588 
5589 	case VIO_ATTR_INFO:
5590 		rv = vgen_handle_attr_info(ldcp, tagp);
5591 		break;
5592 
5593 	case VIO_DRING_REG:
5594 		rv = vgen_handle_dring_reg(ldcp, tagp);
5595 		break;
5596 
5597 	case VIO_RDX:
5598 		rv = vgen_handle_rdx_info(ldcp, tagp);
5599 		break;
5600 
5601 	case VNET_MCAST_INFO:
5602 		rv = vgen_handle_mcast_info(ldcp, tagp);
5603 		break;
5604 
5605 	case VIO_DDS_INFO:
5606 		rv = vgen_dds_rx(ldcp, tagp);
5607 		break;
5608 	}
5609 
5610 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5611 	return (rv);
5612 }
5613 
5614 /* handler for data messages received from the peer ldc end-point */
5615 static int
5616 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5617 {
5618 	int rv = 0;
5619 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5620 
5621 	DBG1(vgenp, ldcp, "enter\n");
5622 
5623 	if (ldcp->hphase != VH_DONE)
5624 		return (rv);
5625 
5626 	if (tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5627 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5628 		if (rv != 0) {
5629 			return (rv);
5630 		}
5631 	}
5632 
5633 	switch (tagp->vio_subtype_env) {
5634 	case VIO_DRING_DATA:
5635 		rv = vgen_handle_dring_data(ldcp, tagp);
5636 		break;
5637 
5638 	case VIO_PKT_DATA:
5639 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5640 		break;
5641 	default:
5642 		break;
5643 	}
5644 
5645 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5646 	return (rv);
5647 }
5648 
5649 /*
5650  * dummy pkt data handler function for vnet protocol version 1.0
5651  */
5652 static void
5653 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5654 {
5655 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5656 }
5657 
5658 /*
5659  * This function handles raw pkt data messages received over the channel.
5660  * Currently, only priority-eth-type frames are received through this mechanism.
5661  * In this case, the frame(data) is present within the message itself which
5662  * is copied into an mblk before sending it up the stack.
5663  */
5664 static void
5665 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5666 {
5667 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5668 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5669 	uint32_t		size;
5670 	mblk_t			*mp;
5671 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5672 	vgen_stats_t		*statsp = &ldcp->stats;
5673 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5674 	vio_net_rx_cb_t		vrx_cb;
5675 
5676 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5677 
5678 	mutex_exit(&ldcp->cblock);
5679 
5680 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5681 	if (size < ETHERMIN || size > lp->mtu) {
5682 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5683 		goto exit;
5684 	}
5685 
5686 	mp = vio_multipool_allocb(&ldcp->vmp, size);
5687 	if (mp == NULL) {
5688 		mp = allocb(size, BPRI_MED);
5689 		if (mp == NULL) {
5690 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5691 			DWARN(vgenp, ldcp, "allocb failure, "
5692 			    "unable to process priority frame\n");
5693 			goto exit;
5694 		}
5695 	}
5696 
5697 	/* copy the frame from the payload of raw data msg into the mblk */
5698 	bcopy(pkt->data, mp->b_rptr, size);
5699 	mp->b_wptr = mp->b_rptr + size;
5700 
5701 	/* update stats */
5702 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5703 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5704 
5705 	/* send up; call vrx_cb() as cblock is already released */
5706 	vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5707 	vrx_cb(ldcp->portp->vhp, mp);
5708 
5709 exit:
5710 	mutex_enter(&ldcp->cblock);
5711 }
5712 
5713 static int
5714 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
5715     int32_t end, uint8_t pstate)
5716 {
5717 	int rv = 0;
5718 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5719 	vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
5720 
5721 	tagp->vio_subtype = VIO_SUBTYPE_ACK;
5722 	tagp->vio_sid = ldcp->local_sid;
5723 	msgp->start_idx = start;
5724 	msgp->end_idx = end;
5725 	msgp->dring_process_state = pstate;
5726 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE);
5727 	if (rv != VGEN_SUCCESS) {
5728 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
5729 	}
5730 	return (rv);
5731 }
5732 
5733 static int
5734 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5735 {
5736 	int rv = 0;
5737 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5738 
5739 
5740 	DBG1(vgenp, ldcp, "enter\n");
5741 	switch (tagp->vio_subtype) {
5742 
5743 	case VIO_SUBTYPE_INFO:
5744 		/*
5745 		 * To reduce the locking contention, release the
5746 		 * cblock here and re-acquire it once we are done
5747 		 * receiving packets.
5748 		 */
5749 		mutex_exit(&ldcp->cblock);
5750 		mutex_enter(&ldcp->rxlock);
5751 		rv = vgen_handle_dring_data_info(ldcp, tagp);
5752 		mutex_exit(&ldcp->rxlock);
5753 		mutex_enter(&ldcp->cblock);
5754 		break;
5755 
5756 	case VIO_SUBTYPE_ACK:
5757 		rv = vgen_handle_dring_data_ack(ldcp, tagp);
5758 		break;
5759 
5760 	case VIO_SUBTYPE_NACK:
5761 		rv = vgen_handle_dring_data_nack(ldcp, tagp);
5762 		break;
5763 	}
5764 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5765 	return (rv);
5766 }
5767 
5768 static int
5769 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5770 {
5771 	uint32_t start;
5772 	int32_t end;
5773 	int rv = 0;
5774 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5775 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5776 #ifdef VGEN_HANDLE_LOST_PKTS
5777 	vgen_stats_t *statsp = &ldcp->stats;
5778 	uint32_t rxi;
5779 	int n;
5780 #endif
5781 
5782 	DBG1(vgenp, ldcp, "enter\n");
5783 
5784 	start = dringmsg->start_idx;
5785 	end = dringmsg->end_idx;
5786 	/*
5787 	 * received a data msg, which contains the start and end
5788 	 * indices of the descriptors within the rx ring holding data,
5789 	 * the seq_num of data packet corresponding to the start index,
5790 	 * and the dring_ident.
5791 	 * We can now read the contents of each of these descriptors
5792 	 * and gather data from it.
5793 	 */
5794 	DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
5795 	    start, end);
5796 
5797 	/* validate rx start and end indeces */
5798 	if (!(CHECK_RXI(start, ldcp)) || ((end != -1) &&
5799 	    !(CHECK_RXI(end, ldcp)))) {
5800 		DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
5801 		    start, end);
5802 		/* drop the message if invalid index */
5803 		return (rv);
5804 	}
5805 
5806 	/* validate dring_ident */
5807 	if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
5808 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
5809 		    dringmsg->dring_ident);
5810 		/* invalid dring_ident, drop the msg */
5811 		return (rv);
5812 	}
5813 #ifdef DEBUG
5814 	if (vgen_trigger_rxlost) {
5815 		/* drop this msg to simulate lost pkts for debugging */
5816 		vgen_trigger_rxlost = 0;
5817 		return (rv);
5818 	}
5819 #endif
5820 
5821 #ifdef	VGEN_HANDLE_LOST_PKTS
5822 
5823 	/* receive start index doesn't match expected index */
5824 	if (ldcp->next_rxi != start) {
5825 		DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n",
5826 		    ldcp->next_rxi, start);
5827 
5828 		/* calculate the number of pkts lost */
5829 		if (start >= ldcp->next_rxi) {
5830 			n = start - ldcp->next_rxi;
5831 		} else  {
5832 			n = ldcp->num_rxds - (ldcp->next_rxi - start);
5833 		}
5834 
5835 		statsp->rx_lost_pkts += n;
5836 		tagp->vio_subtype = VIO_SUBTYPE_NACK;
5837 		tagp->vio_sid = ldcp->local_sid;
5838 		/* indicate the range of lost descriptors */
5839 		dringmsg->start_idx = ldcp->next_rxi;
5840 		rxi = start;
5841 		DECR_RXI(rxi, ldcp);
5842 		dringmsg->end_idx = rxi;
5843 		/* dring ident is left unchanged */
5844 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5845 		    sizeof (*dringmsg), B_FALSE);
5846 		if (rv != VGEN_SUCCESS) {
5847 			DWARN(vgenp, ldcp,
5848 			    "vgen_sendmsg failed, stype:NACK\n");
5849 			return (rv);
5850 		}
5851 		/*
5852 		 * treat this range of descrs/pkts as dropped
5853 		 * and set the new expected value of next_rxi
5854 		 * and continue(below) to process from the new
5855 		 * start index.
5856 		 */
5857 		ldcp->next_rxi = start;
5858 	}
5859 
5860 #endif	/* VGEN_HANDLE_LOST_PKTS */
5861 
5862 	/* Now receive messages */
5863 	rv = vgen_process_dring_data(ldcp, tagp);
5864 
5865 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5866 	return (rv);
5867 }
5868 
5869 static int
5870 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5871 {
5872 	boolean_t set_ack_start = B_FALSE;
5873 	uint32_t start;
5874 	uint32_t ack_end;
5875 	uint32_t next_rxi;
5876 	uint32_t rxi;
5877 	int count = 0;
5878 	int rv = 0;
5879 	uint32_t retries = 0;
5880 	vgen_stats_t *statsp;
5881 	vnet_public_desc_t rxd;
5882 	vio_dring_entry_hdr_t *hdrp;
5883 	mblk_t *bp = NULL;
5884 	mblk_t *bpt = NULL;
5885 	uint32_t ack_start;
5886 	boolean_t rxd_err = B_FALSE;
5887 	mblk_t *mp = NULL;
5888 	size_t nbytes;
5889 	boolean_t ack_needed = B_FALSE;
5890 	size_t nread;
5891 	uint64_t off = 0;
5892 	struct ether_header *ehp;
5893 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
5894 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5895 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5896 
5897 	DBG1(vgenp, ldcp, "enter\n");
5898 
5899 	statsp = &ldcp->stats;
5900 	start = dringmsg->start_idx;
5901 
5902 	/*
5903 	 * start processing the descriptors from the specified
5904 	 * start index, up to the index a descriptor is not ready
5905 	 * to be processed or we process the entire descriptor ring
5906 	 * and wrap around upto the start index.
5907 	 */
5908 
5909 	/* need to set the start index of descriptors to be ack'd */
5910 	set_ack_start = B_TRUE;
5911 
5912 	/* index upto which we have ack'd */
5913 	ack_end = start;
5914 	DECR_RXI(ack_end, ldcp);
5915 
5916 	next_rxi = rxi =  start;
5917 	do {
5918 vgen_recv_retry:
5919 		rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd,
5920 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi);
5921 		if (rv != 0) {
5922 			DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed"
5923 			    " rv(%d)\n", rv);
5924 			statsp->ierrors++;
5925 			return (rv);
5926 		}
5927 
5928 		hdrp = &rxd.hdr;
5929 
5930 		if (hdrp->dstate != VIO_DESC_READY) {
5931 			/*
5932 			 * Before waiting and retry here, send up
5933 			 * the packets that are received already
5934 			 */
5935 			if (bp != NULL) {
5936 				DTRACE_PROBE1(vgen_rcv_msgs, int, count);
5937 				vgen_rx(ldcp, bp);
5938 				count = 0;
5939 				bp = bpt = NULL;
5940 			}
5941 			/*
5942 			 * descriptor is not ready.
5943 			 * retry descriptor acquire, stop processing
5944 			 * after max # retries.
5945 			 */
5946 			if (retries == vgen_recv_retries)
5947 				break;
5948 			retries++;
5949 			drv_usecwait(vgen_recv_delay);
5950 			goto vgen_recv_retry;
5951 		}
5952 		retries = 0;
5953 
5954 		if (set_ack_start) {
5955 			/*
5956 			 * initialize the start index of the range
5957 			 * of descriptors to be ack'd.
5958 			 */
5959 			ack_start = rxi;
5960 			set_ack_start = B_FALSE;
5961 		}
5962 
5963 		if ((rxd.nbytes < ETHERMIN) ||
5964 		    (rxd.nbytes > lp->mtu) ||
5965 		    (rxd.ncookies == 0) ||
5966 		    (rxd.ncookies > MAX_COOKIES)) {
5967 			rxd_err = B_TRUE;
5968 		} else {
5969 			/*
5970 			 * Try to allocate an mblk from the free pool
5971 			 * of recv mblks for the channel.
5972 			 * If this fails, use allocb().
5973 			 */
5974 			nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7;
5975 			if (nbytes > ldcp->max_rxpool_size) {
5976 				mp = allocb(VNET_IPALIGN + rxd.nbytes + 8,
5977 				    BPRI_MED);
5978 			} else {
5979 				mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
5980 				if (mp == NULL) {
5981 					statsp->rx_vio_allocb_fail++;
5982 					/*
5983 					 * Data buffer returned by allocb(9F)
5984 					 * is 8byte aligned. We allocate extra
5985 					 * 8 bytes to ensure size is multiple
5986 					 * of 8 bytes for ldc_mem_copy().
5987 					 */
5988 					mp = allocb(VNET_IPALIGN +
5989 					    rxd.nbytes + 8, BPRI_MED);
5990 				}
5991 			}
5992 		}
5993 		if ((rxd_err) || (mp == NULL)) {
5994 			/*
5995 			 * rxd_err or allocb() failure,
5996 			 * drop this packet, get next.
5997 			 */
5998 			if (rxd_err) {
5999 				statsp->ierrors++;
6000 				rxd_err = B_FALSE;
6001 			} else {
6002 				statsp->rx_allocb_fail++;
6003 			}
6004 
6005 			ack_needed = hdrp->ack;
6006 
6007 			/* set descriptor done bit */
6008 			rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6009 			    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6010 			    VIO_DESC_DONE);
6011 			if (rv != 0) {
6012 				DWARN(vgenp, ldcp,
6013 				    "vnet_dring_entry_set_dstate err rv(%d)\n",
6014 				    rv);
6015 				return (rv);
6016 			}
6017 
6018 			if (ack_needed) {
6019 				ack_needed = B_FALSE;
6020 				/*
6021 				 * sender needs ack for this packet,
6022 				 * ack pkts upto this index.
6023 				 */
6024 				ack_end = rxi;
6025 
6026 				rv = vgen_send_dring_ack(ldcp, tagp,
6027 				    ack_start, ack_end,
6028 				    VIO_DP_ACTIVE);
6029 				if (rv != VGEN_SUCCESS) {
6030 					goto error_ret;
6031 				}
6032 
6033 				/* need to set new ack start index */
6034 				set_ack_start = B_TRUE;
6035 			}
6036 			goto vgen_next_rxi;
6037 		}
6038 
6039 		nread = nbytes;
6040 		rv = ldc_mem_copy(ldcp->ldc_handle,
6041 		    (caddr_t)mp->b_rptr, off, &nread,
6042 		    rxd.memcookie, rxd.ncookies, LDC_COPY_IN);
6043 
6044 		/* if ldc_mem_copy() failed */
6045 		if (rv) {
6046 			DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv);
6047 			statsp->ierrors++;
6048 			freemsg(mp);
6049 			goto error_ret;
6050 		}
6051 
6052 		ack_needed = hdrp->ack;
6053 
6054 		rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]),
6055 		    ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi,
6056 		    VIO_DESC_DONE);
6057 		if (rv != 0) {
6058 			DWARN(vgenp, ldcp,
6059 			    "vnet_dring_entry_set_dstate err rv(%d)\n", rv);
6060 			goto error_ret;
6061 		}
6062 
6063 		mp->b_rptr += VNET_IPALIGN;
6064 
6065 		if (ack_needed) {
6066 			ack_needed = B_FALSE;
6067 			/*
6068 			 * sender needs ack for this packet,
6069 			 * ack pkts upto this index.
6070 			 */
6071 			ack_end = rxi;
6072 
6073 			rv = vgen_send_dring_ack(ldcp, tagp,
6074 			    ack_start, ack_end, VIO_DP_ACTIVE);
6075 			if (rv != VGEN_SUCCESS) {
6076 				goto error_ret;
6077 			}
6078 
6079 			/* need to set new ack start index */
6080 			set_ack_start = B_TRUE;
6081 		}
6082 
6083 		if (nread != nbytes) {
6084 			DWARN(vgenp, ldcp,
6085 			    "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
6086 			    nread, nbytes);
6087 			statsp->ierrors++;
6088 			freemsg(mp);
6089 			goto vgen_next_rxi;
6090 		}
6091 
6092 		/* point to the actual end of data */
6093 		mp->b_wptr = mp->b_rptr + rxd.nbytes;
6094 
6095 		/* update stats */
6096 		statsp->ipackets++;
6097 		statsp->rbytes += rxd.nbytes;
6098 		ehp = (struct ether_header *)mp->b_rptr;
6099 		if (IS_BROADCAST(ehp))
6100 			statsp->brdcstrcv++;
6101 		else if (IS_MULTICAST(ehp))
6102 			statsp->multircv++;
6103 
6104 		/* build a chain of received packets */
6105 		if (bp == NULL) {
6106 			/* first pkt */
6107 			bp = mp;
6108 			bpt = bp;
6109 			bpt->b_next = NULL;
6110 		} else {
6111 			mp->b_next = NULL;
6112 			bpt->b_next = mp;
6113 			bpt = mp;
6114 		}
6115 
6116 		if (count++ > vgen_chain_len) {
6117 			DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6118 			vgen_rx(ldcp, bp);
6119 			count = 0;
6120 			bp = bpt = NULL;
6121 		}
6122 
6123 vgen_next_rxi:
6124 		/* update end index of range of descrs to be ack'd */
6125 		ack_end = rxi;
6126 
6127 		/* update the next index to be processed */
6128 		INCR_RXI(next_rxi, ldcp);
6129 		if (next_rxi == start) {
6130 			/*
6131 			 * processed the entire descriptor ring upto
6132 			 * the index at which we started.
6133 			 */
6134 			break;
6135 		}
6136 
6137 		rxi = next_rxi;
6138 
6139 	_NOTE(CONSTCOND)
6140 	} while (1);
6141 
6142 	/*
6143 	 * send an ack message to peer indicating that we have stopped
6144 	 * processing descriptors.
6145 	 */
6146 	if (set_ack_start) {
6147 		/*
6148 		 * We have ack'd upto some index and we have not
6149 		 * processed any descriptors beyond that index.
6150 		 * Use the last ack'd index as both the start and
6151 		 * end of range of descrs being ack'd.
6152 		 * Note: This results in acking the last index twice
6153 		 * and should be harmless.
6154 		 */
6155 		ack_start = ack_end;
6156 	}
6157 
6158 	rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end,
6159 	    VIO_DP_STOPPED);
6160 	if (rv != VGEN_SUCCESS) {
6161 		goto error_ret;
6162 	}
6163 
6164 	/* save new recv index of next dring msg */
6165 	ldcp->next_rxi = next_rxi;
6166 
6167 error_ret:
6168 	/* send up packets received so far */
6169 	if (bp != NULL) {
6170 		DTRACE_PROBE1(vgen_rcv_msgs, int, count);
6171 		vgen_rx(ldcp, bp);
6172 		bp = bpt = NULL;
6173 	}
6174 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6175 	return (rv);
6176 
6177 }
6178 
6179 static int
6180 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6181 {
6182 	int rv = 0;
6183 	uint32_t start;
6184 	int32_t end;
6185 	uint32_t txi;
6186 	boolean_t ready_txd = B_FALSE;
6187 	vgen_stats_t *statsp;
6188 	vgen_private_desc_t *tbufp;
6189 	vnet_public_desc_t *txdp;
6190 	vio_dring_entry_hdr_t *hdrp;
6191 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6192 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6193 
6194 	DBG1(vgenp, ldcp, "enter\n");
6195 	start = dringmsg->start_idx;
6196 	end = dringmsg->end_idx;
6197 	statsp = &ldcp->stats;
6198 
6199 	/*
6200 	 * received an ack corresponding to a specific descriptor for
6201 	 * which we had set the ACK bit in the descriptor (during
6202 	 * transmit). This enables us to reclaim descriptors.
6203 	 */
6204 
6205 	DBG2(vgenp, ldcp, "ACK:  start(%d), end(%d)\n", start, end);
6206 
6207 	/* validate start and end indeces in the tx ack msg */
6208 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6209 		/* drop the message if invalid index */
6210 		DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
6211 		    start, end);
6212 		return (rv);
6213 	}
6214 	/* validate dring_ident */
6215 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6216 		/* invalid dring_ident, drop the msg */
6217 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6218 		    dringmsg->dring_ident);
6219 		return (rv);
6220 	}
6221 	statsp->dring_data_acks++;
6222 
6223 	/* reclaim descriptors that are done */
6224 	vgen_reclaim(ldcp);
6225 
6226 	if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
6227 		/*
6228 		 * receiver continued processing descriptors after
6229 		 * sending us the ack.
6230 		 */
6231 		return (rv);
6232 	}
6233 
6234 	statsp->dring_stopped_acks++;
6235 
6236 	/* receiver stopped processing descriptors */
6237 	mutex_enter(&ldcp->wrlock);
6238 	mutex_enter(&ldcp->tclock);
6239 
6240 	/*
6241 	 * determine if there are any pending tx descriptors
6242 	 * ready to be processed by the receiver(peer) and if so,
6243 	 * send a message to the peer to restart receiving.
6244 	 */
6245 	ready_txd = B_FALSE;
6246 
6247 	/*
6248 	 * using the end index of the descriptor range for which
6249 	 * we received the ack, check if the next descriptor is
6250 	 * ready.
6251 	 */
6252 	txi = end;
6253 	INCR_TXI(txi, ldcp);
6254 	tbufp = &ldcp->tbufp[txi];
6255 	txdp = tbufp->descp;
6256 	hdrp = &txdp->hdr;
6257 	if (hdrp->dstate == VIO_DESC_READY) {
6258 		ready_txd = B_TRUE;
6259 	} else {
6260 		/*
6261 		 * descr next to the end of ack'd descr range is not
6262 		 * ready.
6263 		 * starting from the current reclaim index, check
6264 		 * if any descriptor is ready.
6265 		 */
6266 
6267 		txi = ldcp->cur_tbufp - ldcp->tbufp;
6268 		tbufp = &ldcp->tbufp[txi];
6269 
6270 		txdp = tbufp->descp;
6271 		hdrp = &txdp->hdr;
6272 		if (hdrp->dstate == VIO_DESC_READY) {
6273 			ready_txd = B_TRUE;
6274 		}
6275 
6276 	}
6277 
6278 	if (ready_txd) {
6279 		/*
6280 		 * we have tx descriptor(s) ready to be
6281 		 * processed by the receiver.
6282 		 * send a message to the peer with the start index
6283 		 * of ready descriptors.
6284 		 */
6285 		rv = vgen_send_dring_data(ldcp, txi, -1);
6286 		if (rv != VGEN_SUCCESS) {
6287 			ldcp->resched_peer = B_TRUE;
6288 			ldcp->resched_peer_txi = txi;
6289 			mutex_exit(&ldcp->tclock);
6290 			mutex_exit(&ldcp->wrlock);
6291 			return (rv);
6292 		}
6293 	} else {
6294 		/*
6295 		 * no ready tx descriptors. set the flag to send a
6296 		 * message to peer when tx descriptors are ready in
6297 		 * transmit routine.
6298 		 */
6299 		ldcp->resched_peer = B_TRUE;
6300 		ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp;
6301 	}
6302 
6303 	mutex_exit(&ldcp->tclock);
6304 	mutex_exit(&ldcp->wrlock);
6305 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6306 	return (rv);
6307 }
6308 
6309 static int
6310 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6311 {
6312 	int rv = 0;
6313 	uint32_t start;
6314 	int32_t end;
6315 	uint32_t txi;
6316 	vnet_public_desc_t *txdp;
6317 	vio_dring_entry_hdr_t *hdrp;
6318 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6319 	vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
6320 
6321 	DBG1(vgenp, ldcp, "enter\n");
6322 	start = dringmsg->start_idx;
6323 	end = dringmsg->end_idx;
6324 
6325 	/*
6326 	 * peer sent a NACK msg to indicate lost packets.
6327 	 * The start and end correspond to the range of descriptors
6328 	 * for which the peer didn't receive a dring data msg and so
6329 	 * didn't receive the corresponding data.
6330 	 */
6331 	DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
6332 
6333 	/* validate start and end indeces in the tx nack msg */
6334 	if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
6335 		/* drop the message if invalid index */
6336 		DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
6337 		    start, end);
6338 		return (rv);
6339 	}
6340 	/* validate dring_ident */
6341 	if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
6342 		/* invalid dring_ident, drop the msg */
6343 		DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
6344 		    dringmsg->dring_ident);
6345 		return (rv);
6346 	}
6347 	mutex_enter(&ldcp->txlock);
6348 	mutex_enter(&ldcp->tclock);
6349 
6350 	if (ldcp->next_tbufp == ldcp->cur_tbufp) {
6351 		/* no busy descriptors, bogus nack ? */
6352 		mutex_exit(&ldcp->tclock);
6353 		mutex_exit(&ldcp->txlock);
6354 		return (rv);
6355 	}
6356 
6357 	/* we just mark the descrs as done so they can be reclaimed */
6358 	for (txi = start; txi <= end; ) {
6359 		txdp = &(ldcp->txdp[txi]);
6360 		hdrp = &txdp->hdr;
6361 		if (hdrp->dstate == VIO_DESC_READY)
6362 			hdrp->dstate = VIO_DESC_DONE;
6363 		INCR_TXI(txi, ldcp);
6364 	}
6365 	mutex_exit(&ldcp->tclock);
6366 	mutex_exit(&ldcp->txlock);
6367 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
6368 	return (rv);
6369 }
6370 
6371 static void
6372 vgen_reclaim(vgen_ldc_t *ldcp)
6373 {
6374 	mutex_enter(&ldcp->tclock);
6375 
6376 	vgen_reclaim_dring(ldcp);
6377 	ldcp->reclaim_lbolt = ddi_get_lbolt();
6378 
6379 	mutex_exit(&ldcp->tclock);
6380 }
6381 
6382 /*
6383  * transmit reclaim function. starting from the current reclaim index
6384  * look for descriptors marked DONE and reclaim the descriptor and the
6385  * corresponding buffers (tbuf).
6386  */
6387 static void
6388 vgen_reclaim_dring(vgen_ldc_t *ldcp)
6389 {
6390 	int count = 0;
6391 	vnet_public_desc_t *txdp;
6392 	vgen_private_desc_t *tbufp;
6393 	vio_dring_entry_hdr_t	*hdrp;
6394 
6395 #ifdef DEBUG
6396 	if (vgen_trigger_txtimeout)
6397 		return;
6398 #endif
6399 
6400 	tbufp = ldcp->cur_tbufp;
6401 	txdp = tbufp->descp;
6402 	hdrp = &txdp->hdr;
6403 
6404 	while ((hdrp->dstate == VIO_DESC_DONE) &&
6405 	    (tbufp != ldcp->next_tbufp)) {
6406 		tbufp->flags = VGEN_PRIV_DESC_FREE;
6407 		hdrp->dstate = VIO_DESC_FREE;
6408 		hdrp->ack = B_FALSE;
6409 
6410 		tbufp = NEXTTBUF(ldcp, tbufp);
6411 		txdp = tbufp->descp;
6412 		hdrp = &txdp->hdr;
6413 		count++;
6414 	}
6415 
6416 	ldcp->cur_tbufp = tbufp;
6417 
6418 	/*
6419 	 * Check if mac layer should be notified to restart transmissions
6420 	 */
6421 	if ((ldcp->need_resched) && (count > 0)) {
6422 		vio_net_tx_update_t vtx_update =
6423 		    ldcp->portp->vcb.vio_net_tx_update;
6424 
6425 		ldcp->need_resched = B_FALSE;
6426 		vtx_update(ldcp->portp->vhp);
6427 	}
6428 }
6429 
6430 /* return the number of pending transmits for the channel */
6431 static int
6432 vgen_num_txpending(vgen_ldc_t *ldcp)
6433 {
6434 	int n;
6435 
6436 	if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
6437 		n = ldcp->next_tbufp - ldcp->cur_tbufp;
6438 	} else  {
6439 		/* cur_tbufp > next_tbufp */
6440 		n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
6441 	}
6442 
6443 	return (n);
6444 }
6445 
6446 /* determine if the transmit descriptor ring is full */
6447 static int
6448 vgen_tx_dring_full(vgen_ldc_t *ldcp)
6449 {
6450 	vgen_private_desc_t	*tbufp;
6451 	vgen_private_desc_t	*ntbufp;
6452 
6453 	tbufp = ldcp->next_tbufp;
6454 	ntbufp = NEXTTBUF(ldcp, tbufp);
6455 	if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
6456 		return (VGEN_SUCCESS);
6457 	}
6458 	return (VGEN_FAILURE);
6459 }
6460 
6461 /* determine if timeout condition has occured */
6462 static int
6463 vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
6464 {
6465 	if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
6466 	    drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
6467 	    (vnet_ldcwd_txtimeout) &&
6468 	    (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
6469 		return (VGEN_SUCCESS);
6470 	} else {
6471 		return (VGEN_FAILURE);
6472 	}
6473 }
6474 
6475 /* transmit watchdog timeout handler */
6476 static void
6477 vgen_ldc_watchdog(void *arg)
6478 {
6479 	vgen_ldc_t *ldcp;
6480 	vgen_t *vgenp;
6481 	int rv;
6482 
6483 	ldcp = (vgen_ldc_t *)arg;
6484 	vgenp = LDC_TO_VGEN(ldcp);
6485 
6486 	rv = vgen_ldc_txtimeout(ldcp);
6487 	if (rv == VGEN_SUCCESS) {
6488 		DWARN(vgenp, ldcp, "transmit timeout\n");
6489 #ifdef DEBUG
6490 		if (vgen_trigger_txtimeout) {
6491 			/* tx timeout triggered for debugging */
6492 			vgen_trigger_txtimeout = 0;
6493 		}
6494 #endif
6495 		mutex_enter(&ldcp->cblock);
6496 		ldcp->need_ldc_reset = B_TRUE;
6497 		vgen_handshake_retry(ldcp);
6498 		mutex_exit(&ldcp->cblock);
6499 		if (ldcp->need_resched) {
6500 			vio_net_tx_update_t vtx_update =
6501 			    ldcp->portp->vcb.vio_net_tx_update;
6502 
6503 			ldcp->need_resched = B_FALSE;
6504 			vtx_update(ldcp->portp->vhp);
6505 		}
6506 	}
6507 
6508 	ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
6509 	    drv_usectohz(vnet_ldcwd_interval * 1000));
6510 }
6511 
6512 /* handler for error messages received from the peer ldc end-point */
6513 static void
6514 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6515 {
6516 	_NOTE(ARGUNUSED(ldcp, tagp))
6517 }
6518 
6519 static int
6520 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6521 {
6522 	vio_raw_data_msg_t	*rmsg;
6523 	vio_dring_msg_t		*dmsg;
6524 	uint64_t		seq_num;
6525 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
6526 
6527 	if (tagp->vio_subtype_env == VIO_DRING_DATA) {
6528 		dmsg = (vio_dring_msg_t *)tagp;
6529 		seq_num = dmsg->seq_num;
6530 	} else if (tagp->vio_subtype_env == VIO_PKT_DATA) {
6531 		rmsg = (vio_raw_data_msg_t *)tagp;
6532 		seq_num = rmsg->seq_num;
6533 	} else {
6534 		return (EINVAL);
6535 	}
6536 
6537 	if (seq_num != ldcp->next_rxseq) {
6538 
6539 		/* seqnums don't match */
6540 		DWARN(vgenp, ldcp,
6541 		    "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
6542 		    ldcp->next_rxseq, seq_num);
6543 
6544 		ldcp->need_ldc_reset = B_TRUE;
6545 		return (EINVAL);
6546 
6547 	}
6548 
6549 	ldcp->next_rxseq++;
6550 
6551 	return (0);
6552 }
6553 
6554 /* Check if the session id in the received message is valid */
6555 static int
6556 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6557 {
6558 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6559 
6560 	if (tagp->vio_sid != ldcp->peer_sid) {
6561 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
6562 		    ldcp->peer_sid, tagp->vio_sid);
6563 		return (VGEN_FAILURE);
6564 	}
6565 	else
6566 		return (VGEN_SUCCESS);
6567 }
6568 
6569 static caddr_t
6570 vgen_print_ethaddr(uint8_t *a, char *ebuf)
6571 {
6572 	(void) sprintf(ebuf,
6573 	    "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
6574 	return (ebuf);
6575 }
6576 
6577 /* Handshake watchdog timeout handler */
6578 static void
6579 vgen_hwatchdog(void *arg)
6580 {
6581 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6582 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6583 
6584 	DWARN(vgenp, ldcp,
6585 	    "handshake timeout ldc(%lx) phase(%x) state(%x)\n",
6586 	    ldcp->hphase, ldcp->hstate);
6587 
6588 	mutex_enter(&ldcp->cblock);
6589 	if (ldcp->cancel_htid) {
6590 		ldcp->cancel_htid = 0;
6591 		mutex_exit(&ldcp->cblock);
6592 		return;
6593 	}
6594 	ldcp->htid = 0;
6595 	ldcp->need_ldc_reset = B_TRUE;
6596 	vgen_handshake_retry(ldcp);
6597 	mutex_exit(&ldcp->cblock);
6598 }
6599 
6600 static void
6601 vgen_print_hparams(vgen_hparams_t *hp)
6602 {
6603 	uint8_t	addr[6];
6604 	char	ea[6];
6605 	ldc_mem_cookie_t *dc;
6606 
6607 	cmn_err(CE_CONT, "version_info:\n");
6608 	cmn_err(CE_CONT,
6609 	    "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
6610 	    hp->ver_major, hp->ver_minor, hp->dev_class);
6611 
6612 	vnet_macaddr_ultostr(hp->addr, addr);
6613 	cmn_err(CE_CONT, "attr_info:\n");
6614 	cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
6615 	    vgen_print_ethaddr(addr, ea));
6616 	cmn_err(CE_CONT,
6617 	    "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
6618 	    hp->addr_type, hp->xfer_mode, hp->ack_freq);
6619 
6620 	dc = &hp->dring_cookie;
6621 	cmn_err(CE_CONT, "dring_info:\n");
6622 	cmn_err(CE_CONT,
6623 	    "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
6624 	cmn_err(CE_CONT,
6625 	    "\tldc_addr: 0x%lx, ldc_size: %ld\n",
6626 	    dc->addr, dc->size);
6627 	cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
6628 }
6629 
6630 static void
6631 vgen_print_ldcinfo(vgen_ldc_t *ldcp)
6632 {
6633 	vgen_hparams_t *hp;
6634 
6635 	cmn_err(CE_CONT, "Channel Information:\n");
6636 	cmn_err(CE_CONT,
6637 	    "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
6638 	    ldcp->ldc_id, ldcp->ldc_status);
6639 	cmn_err(CE_CONT,
6640 	    "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
6641 	    ldcp->local_sid, ldcp->peer_sid);
6642 	cmn_err(CE_CONT,
6643 	    "\thphase: 0x%x, hstate: 0x%x\n",
6644 	    ldcp->hphase, ldcp->hstate);
6645 
6646 	cmn_err(CE_CONT, "Local handshake params:\n");
6647 	hp = &ldcp->local_hparams;
6648 	vgen_print_hparams(hp);
6649 
6650 	cmn_err(CE_CONT, "Peer handshake params:\n");
6651 	hp = &ldcp->peer_hparams;
6652 	vgen_print_hparams(hp);
6653 }
6654 
6655 /*
6656  * Send received packets up the stack.
6657  */
6658 static void
6659 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp)
6660 {
6661 	vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
6662 
6663 	if (ldcp->rcv_thread != NULL) {
6664 		ASSERT(MUTEX_HELD(&ldcp->rxlock));
6665 		mutex_exit(&ldcp->rxlock);
6666 	} else {
6667 		ASSERT(MUTEX_HELD(&ldcp->cblock));
6668 		mutex_exit(&ldcp->cblock);
6669 	}
6670 
6671 	vrx_cb(ldcp->portp->vhp, bp);
6672 
6673 	if (ldcp->rcv_thread != NULL) {
6674 		mutex_enter(&ldcp->rxlock);
6675 	} else {
6676 		mutex_enter(&ldcp->cblock);
6677 	}
6678 }
6679 
6680 /*
6681  * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
6682  * This thread is woken up by the LDC interrupt handler to process
6683  * LDC packets and receive data.
6684  */
6685 static void
6686 vgen_ldc_rcv_worker(void *arg)
6687 {
6688 	callb_cpr_t	cprinfo;
6689 	vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
6690 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6691 
6692 	DBG1(vgenp, ldcp, "enter\n");
6693 	CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr,
6694 	    "vnet_rcv_thread");
6695 	mutex_enter(&ldcp->rcv_thr_lock);
6696 	while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) {
6697 
6698 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
6699 		/*
6700 		 * Wait until the data is received or a stop
6701 		 * request is received.
6702 		 */
6703 		while (!(ldcp->rcv_thr_flags &
6704 		    (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
6705 			cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock);
6706 		}
6707 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock)
6708 
6709 		/*
6710 		 * First process the stop request.
6711 		 */
6712 		if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) {
6713 			DBG2(vgenp, ldcp, "stopped\n");
6714 			break;
6715 		}
6716 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD;
6717 		ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING;
6718 		mutex_exit(&ldcp->rcv_thr_lock);
6719 		DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n");
6720 		vgen_handle_evt_read(ldcp);
6721 		mutex_enter(&ldcp->rcv_thr_lock);
6722 		ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING;
6723 	}
6724 
6725 	/*
6726 	 * Update the run status and wakeup the thread that
6727 	 * has sent the stop request.
6728 	 */
6729 	ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP;
6730 	ldcp->rcv_thread = NULL;
6731 	CALLB_CPR_EXIT(&cprinfo);
6732 
6733 	thread_exit();
6734 	DBG1(vgenp, ldcp, "exit\n");
6735 }
6736 
6737 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
6738 static void
6739 vgen_stop_rcv_thread(vgen_ldc_t *ldcp)
6740 {
6741 	kt_did_t	tid = 0;
6742 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6743 
6744 	DBG1(vgenp, ldcp, "enter\n");
6745 	/*
6746 	 * Send a stop request by setting the stop flag and
6747 	 * wait until the receive thread stops.
6748 	 */
6749 	mutex_enter(&ldcp->rcv_thr_lock);
6750 	if (ldcp->rcv_thread != NULL) {
6751 		tid = ldcp->rcv_thread->t_did;
6752 		ldcp->rcv_thr_flags |= VGEN_WTHR_STOP;
6753 		cv_signal(&ldcp->rcv_thr_cv);
6754 	}
6755 	mutex_exit(&ldcp->rcv_thr_lock);
6756 
6757 	if (tid != 0) {
6758 		thread_join(tid);
6759 	}
6760 	DBG1(vgenp, ldcp, "exit\n");
6761 }
6762 
6763 /*
6764  * Wait for the channel rx-queue to be drained by allowing the receive
6765  * worker thread to read all messages from the rx-queue of the channel.
6766  * Assumption: further callbacks are disabled at this time.
6767  */
6768 static void
6769 vgen_drain_rcv_thread(vgen_ldc_t *ldcp)
6770 {
6771 	clock_t	tm;
6772 	clock_t	wt;
6773 	clock_t	rv;
6774 
6775 	/*
6776 	 * If there is data in ldc rx queue, wait until the rx
6777 	 * worker thread runs and drains all msgs in the queue.
6778 	 */
6779 	wt = drv_usectohz(MILLISEC);
6780 
6781 	mutex_enter(&ldcp->rcv_thr_lock);
6782 
6783 	tm = ddi_get_lbolt() + wt;
6784 
6785 	/*
6786 	 * We need to check both bits - DATARCVD and PROCESSING, to be cleared.
6787 	 * If DATARCVD is set, that means the callback has signalled the worker
6788 	 * thread, but the worker hasn't started processing yet. If PROCESSING
6789 	 * is set, that means the thread is awake and processing. Note that the
6790 	 * DATARCVD state can only be seen once, as the assumption is that
6791 	 * further callbacks have been disabled at this point.
6792 	 */
6793 	while (ldcp->rcv_thr_flags &
6794 	    (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) {
6795 		rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm);
6796 		if (rv == -1) {	/* timeout */
6797 			/*
6798 			 * Note that the only way we return is due to a timeout;
6799 			 * we set the new time to wait, before we go back and
6800 			 * check the condition. The other(unlikely) possibility
6801 			 * is a premature wakeup(see cv_timedwait(9F)) in which
6802 			 * case we just continue to use the same time to wait.
6803 			 */
6804 			tm = ddi_get_lbolt() + wt;
6805 		}
6806 	}
6807 
6808 	mutex_exit(&ldcp->rcv_thr_lock);
6809 }
6810 
6811 /*
6812  * vgen_dds_rx -- post DDS messages to vnet.
6813  */
6814 static int
6815 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
6816 {
6817 	vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
6818 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
6819 
6820 	if (dmsg->dds_class != DDS_VNET_NIU) {
6821 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
6822 		return (EBADMSG);
6823 	}
6824 	vnet_dds_rx(vgenp->vnetp, dmsg);
6825 	return (0);
6826 }
6827 
6828 /*
6829  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
6830  */
6831 int
6832 vgen_dds_tx(void *arg, void *msg)
6833 {
6834 	vgen_t *vgenp = arg;
6835 	vio_dds_msg_t *dmsg = msg;
6836 	vgen_portlist_t *plistp = &vgenp->vgenports;
6837 	vgen_ldc_t *ldcp;
6838 	vgen_ldclist_t *ldclp;
6839 	int rv = EIO;
6840 
6841 
6842 	READ_ENTER(&plistp->rwlock);
6843 	ldclp = &(vgenp->vsw_portp->ldclist);
6844 	READ_ENTER(&ldclp->rwlock);
6845 	ldcp = ldclp->headp;
6846 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
6847 		goto vgen_dsend_exit;
6848 	}
6849 
6850 	dmsg->tag.vio_sid = ldcp->local_sid;
6851 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
6852 	if (rv != VGEN_SUCCESS) {
6853 		rv = EIO;
6854 	} else {
6855 		rv = 0;
6856 	}
6857 
6858 vgen_dsend_exit:
6859 	RW_EXIT(&ldclp->rwlock);
6860 	RW_EXIT(&plistp->rwlock);
6861 	return (rv);
6862 
6863 }
6864 
6865 #if DEBUG
6866 
6867 /*
6868  * Print debug messages - set to 0xf to enable all msgs
6869  */
6870 static void
6871 debug_printf(const char *fname, vgen_t *vgenp,
6872     vgen_ldc_t *ldcp, const char *fmt, ...)
6873 {
6874 	char    buf[256];
6875 	char    *bufp = buf;
6876 	va_list ap;
6877 
6878 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
6879 		(void) sprintf(bufp, "vnet%d:",
6880 		    ((vnet_t *)(vgenp->vnetp))->instance);
6881 		bufp += strlen(bufp);
6882 	}
6883 	if (ldcp != NULL) {
6884 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
6885 		bufp += strlen(bufp);
6886 	}
6887 	(void) sprintf(bufp, "%s: ", fname);
6888 	bufp += strlen(bufp);
6889 
6890 	va_start(ap, fmt);
6891 	(void) vsprintf(bufp, fmt, ap);
6892 	va_end(ap);
6893 
6894 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
6895 	    (vgendbg_ldcid == ldcp->ldc_id)) {
6896 		cmn_err(CE_CONT, "%s\n", buf);
6897 	}
6898 }
6899 #endif
6900