xref: /titanic_44/usr/src/uts/sun4v/io/vsw_ldc.c (revision 99dda20867d903eec23291ba1ecb18a82d70096b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 #include <sys/callb.h>
75 
76 /* Port add/deletion/etc routines */
77 static	int vsw_port_delete(vsw_port_t *port);
78 static	int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id);
79 static	int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id);
80 static	int vsw_init_ldcs(vsw_port_t *port);
81 static	int vsw_uninit_ldcs(vsw_port_t *port);
82 static	int vsw_ldc_init(vsw_ldc_t *ldcp);
83 static	int vsw_ldc_uninit(vsw_ldc_t *ldcp);
84 static	int vsw_drain_ldcs(vsw_port_t *port);
85 static	int vsw_drain_port_taskq(vsw_port_t *port);
86 static	void vsw_marker_task(void *);
87 static	int vsw_plist_del_node(vsw_t *, vsw_port_t *port);
88 int vsw_detach_ports(vsw_t *vswp);
89 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
90 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
91 int vsw_port_detach(vsw_t *vswp, int p_instance);
92 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count);
93 int vsw_port_attach(vsw_t *vswp, int p_instance,
94 	uint64_t *ldcids, int nids, struct ether_addr *macaddr);
95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
96 
97 
98 /* Interrupt routines */
99 static	uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg);
100 
101 /* Handshake routines */
102 static	void vsw_ldc_reinit(vsw_ldc_t *);
103 static	void vsw_process_conn_evt(vsw_ldc_t *, uint16_t);
104 static	void vsw_conn_task(void *);
105 static	int vsw_check_flag(vsw_ldc_t *, int, uint64_t);
106 static	void vsw_next_milestone(vsw_ldc_t *);
107 static	int vsw_supported_version(vio_ver_msg_t *);
108 static	void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp);
109 static	void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp);
110 
111 /* Data processing routines */
112 static void vsw_process_pkt(void *);
113 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *);
114 static void vsw_process_ctrl_pkt(void *);
115 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *);
116 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *);
117 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *);
118 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *);
119 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *);
120 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *);
121 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *,
122 	uint32_t);
123 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *);
124 static void vsw_process_pkt_data_nop(void *, void *, uint32_t);
125 static void vsw_process_pkt_data(void *, void *, uint32_t);
126 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *);
127 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *);
128 
129 /* Switching/data transmit routines */
130 static	int vsw_dringsend(vsw_ldc_t *, mblk_t *);
131 static	int vsw_descrsend(vsw_ldc_t *, mblk_t *);
132 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp);
133 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries);
134 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count);
135 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count);
136 
137 /* Packet creation routines */
138 static void vsw_send_ver(void *);
139 static void vsw_send_attr(vsw_ldc_t *);
140 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *);
141 static void vsw_send_dring_info(vsw_ldc_t *);
142 static void vsw_send_rdx(vsw_ldc_t *);
143 static int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t);
144 
145 /* Dring routines */
146 static dring_info_t *vsw_create_dring(vsw_ldc_t *);
147 static void vsw_create_privring(vsw_ldc_t *);
148 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp);
149 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **,
150     int *);
151 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t);
152 static int vsw_reclaim_dring(dring_info_t *dp, int start);
153 
154 static void vsw_set_lane_attr(vsw_t *, lane_t *);
155 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *);
156 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg);
157 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *);
158 static int vsw_check_dring_info(vio_dring_reg_msg_t *);
159 
160 /* Rcv/Tx thread routines */
161 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp);
162 static void vsw_ldc_tx_worker(void *arg);
163 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp);
164 static void vsw_ldc_rx_worker(void *arg);
165 
166 /* Misc support routines */
167 static	caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf);
168 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t);
169 static int vsw_free_ring(dring_info_t *);
170 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
171 static int vsw_get_same_dest_list(struct ether_header *ehp,
172     mblk_t **rhead, mblk_t **rtail, mblk_t **mpp);
173 static mblk_t *vsw_dupmsgchain(mblk_t *mp);
174 static void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh,
175     mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags);
176 
177 /* Debugging routines */
178 static void dump_flags(uint64_t);
179 static void display_state(void);
180 static void display_lane(lane_t *);
181 static void display_ring(dring_info_t *);
182 
183 /*
184  * Functions imported from other files.
185  */
186 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
187 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
188 extern void vsw_reconfig_hw(vsw_t *);
189 extern int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port);
190 extern int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port);
191 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port);
192 extern void vsw_del_mcst_port(vsw_port_t *port);
193 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg);
194 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg);
195 
196 #define	VSW_NUM_VMPOOLS		3	/* number of vio mblk pools */
197 #define	VSW_PORT_REF_DELAY	30	/* delay for port ref_cnt to become 0 */
198 
199 /*
200  * Tunables used in this file.
201  */
202 extern int vsw_num_handshakes;
203 extern int vsw_wretries;
204 extern int vsw_desc_delay;
205 extern int vsw_read_attempts;
206 extern int vsw_ldc_tx_delay;
207 extern int vsw_ldc_tx_retries;
208 extern boolean_t vsw_ldc_rxthr_enabled;
209 extern boolean_t vsw_ldc_txthr_enabled;
210 extern uint32_t vsw_ntxds;
211 extern uint32_t vsw_max_tx_qcount;
212 extern uint32_t vsw_chain_len;
213 extern uint32_t vsw_mblk_size1;
214 extern uint32_t vsw_mblk_size2;
215 extern uint32_t vsw_mblk_size3;
216 extern uint32_t vsw_num_mblks1;
217 extern uint32_t vsw_num_mblks2;
218 extern uint32_t vsw_num_mblks3;
219 extern boolean_t vsw_obp_ver_proto_workaround;
220 
221 #define	LDC_ENTER_LOCK(ldcp)	\
222 				mutex_enter(&((ldcp)->ldc_cblock));\
223 				mutex_enter(&((ldcp)->ldc_rxlock));\
224 				mutex_enter(&((ldcp)->ldc_txlock));
225 #define	LDC_EXIT_LOCK(ldcp)	\
226 				mutex_exit(&((ldcp)->ldc_txlock));\
227 				mutex_exit(&((ldcp)->ldc_rxlock));\
228 				mutex_exit(&((ldcp)->ldc_cblock));
229 
230 #define	VSW_VER_EQ(ldcp, major, minor)	\
231 	((ldcp)->lane_out.ver_major == (major) &&	\
232 	    (ldcp)->lane_out.ver_minor == (minor))
233 
234 #define	VSW_VER_LT(ldcp, major, minor)	\
235 	(((ldcp)->lane_out.ver_major < (major)) ||	\
236 	    ((ldcp)->lane_out.ver_major == (major) &&	\
237 	    (ldcp)->lane_out.ver_minor < (minor)))
238 
239 /* supported versions */
240 static	ver_sup_t	vsw_versions[] = { {1, 2} };
241 
242 /*
243  * For the moment the state dump routines have their own
244  * private flag.
245  */
246 #define	DUMP_STATE	0
247 
248 #if DUMP_STATE
249 
250 #define	DUMP_TAG(tag) \
251 {			\
252 	D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \
253 	D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype);	\
254 	D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env);	\
255 }
256 
257 #define	DUMP_TAG_PTR(tag) \
258 {			\
259 	D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \
260 	D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype);	\
261 	D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env);	\
262 }
263 
264 #define	DUMP_FLAGS(flags) dump_flags(flags);
265 #define	DISPLAY_STATE()	display_state()
266 
267 #else
268 
269 #define	DUMP_TAG(tag)
270 #define	DUMP_TAG_PTR(tag)
271 #define	DUMP_FLAGS(state)
272 #define	DISPLAY_STATE()
273 
274 #endif	/* DUMP_STATE */
275 
276 /*
277  * Attach the specified port.
278  *
279  * Returns 0 on success, 1 on failure.
280  */
281 int
282 vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids,
283 struct ether_addr *macaddr)
284 {
285 	vsw_port_list_t		*plist = &vswp->plist;
286 	vsw_port_t		*port, **prev_port;
287 	int			i;
288 
289 	D1(vswp, "%s: enter : port %d", __func__, p_instance);
290 
291 	/* port already exists? */
292 	READ_ENTER(&plist->lockrw);
293 	for (port = plist->head; port != NULL; port = port->p_next) {
294 		if (port->p_instance == p_instance) {
295 			DWARN(vswp, "%s: port instance %d already attached",
296 			    __func__, p_instance);
297 			RW_EXIT(&plist->lockrw);
298 			return (1);
299 		}
300 	}
301 	RW_EXIT(&plist->lockrw);
302 
303 	port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
304 	port->p_vswp = vswp;
305 	port->p_instance = p_instance;
306 	port->p_ldclist.num_ldcs = 0;
307 	port->p_ldclist.head = NULL;
308 	port->addr_set = VSW_ADDR_UNSET;
309 
310 	rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL);
311 
312 	mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL);
313 	mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL);
314 
315 	mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL);
316 	cv_init(&port->state_cv, NULL, CV_DRIVER, NULL);
317 	port->state = VSW_PORT_INIT;
318 
319 	if (nids > VSW_PORT_MAX_LDCS) {
320 		D2(vswp, "%s: using first of %d ldc ids",
321 		    __func__, nids);
322 		nids = VSW_PORT_MAX_LDCS;
323 	}
324 
325 	D2(vswp, "%s: %d nids", __func__, nids);
326 	for (i = 0; i < nids; i++) {
327 		D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]);
328 		if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) {
329 			DERR(vswp, "%s: ldc_attach failed", __func__);
330 
331 			rw_destroy(&port->p_ldclist.lockrw);
332 
333 			cv_destroy(&port->state_cv);
334 			mutex_destroy(&port->state_lock);
335 
336 			mutex_destroy(&port->tx_lock);
337 			mutex_destroy(&port->mca_lock);
338 			kmem_free(port, sizeof (vsw_port_t));
339 			return (1);
340 		}
341 	}
342 
343 	ether_copy(macaddr, &port->p_macaddr);
344 
345 	if (vswp->switching_setup_done == B_TRUE) {
346 		/*
347 		 * If the underlying physical device has been setup,
348 		 * program the mac address of this port in it.
349 		 * Otherwise, port macaddr will be set after the physical
350 		 * device is successfully setup by the timeout handler.
351 		 */
352 		mutex_enter(&vswp->hw_lock);
353 		(void) vsw_set_hw(vswp, port, VSW_VNETPORT);
354 		mutex_exit(&vswp->hw_lock);
355 	}
356 
357 	WRITE_ENTER(&plist->lockrw);
358 
359 	/* create the fdb entry for this port/mac address */
360 	(void) vsw_add_fdb(vswp, port);
361 
362 	/* link it into the list of ports for this vsw instance */
363 	prev_port = (vsw_port_t **)(&plist->head);
364 	port->p_next = *prev_port;
365 	*prev_port = port;
366 	plist->num_ports++;
367 
368 	RW_EXIT(&plist->lockrw);
369 
370 	/*
371 	 * Initialise the port and any ldc's under it.
372 	 */
373 	(void) vsw_init_ldcs(port);
374 
375 	D1(vswp, "%s: exit", __func__);
376 	return (0);
377 }
378 
379 /*
380  * Detach the specified port.
381  *
382  * Returns 0 on success, 1 on failure.
383  */
384 int
385 vsw_port_detach(vsw_t *vswp, int p_instance)
386 {
387 	vsw_port_t	*port = NULL;
388 	vsw_port_list_t	*plist = &vswp->plist;
389 
390 	D1(vswp, "%s: enter: port id %d", __func__, p_instance);
391 
392 	WRITE_ENTER(&plist->lockrw);
393 
394 	if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) {
395 		RW_EXIT(&plist->lockrw);
396 		return (1);
397 	}
398 
399 	if (vsw_plist_del_node(vswp, port)) {
400 		RW_EXIT(&plist->lockrw);
401 		return (1);
402 	}
403 
404 	/* Remove the fdb entry for this port/mac address */
405 	(void) vsw_del_fdb(vswp, port);
406 
407 	/* Remove any multicast addresses.. */
408 	vsw_del_mcst_port(port);
409 
410 	/*
411 	 * No longer need to hold writer lock on port list now
412 	 * that we have unlinked the target port from the list.
413 	 */
414 	RW_EXIT(&plist->lockrw);
415 
416 	/* Remove address if was programmed into HW. */
417 	mutex_enter(&vswp->hw_lock);
418 
419 	/*
420 	 * Port's address may not have been set in hardware. This could
421 	 * happen if the underlying physical device is not yet available and
422 	 * vsw_setup_switching_timeout() may be in progress.
423 	 * We remove its addr from hardware only if it has been set before.
424 	 */
425 	if (port->addr_set != VSW_ADDR_UNSET)
426 		(void) vsw_unset_hw(vswp, port, VSW_VNETPORT);
427 
428 	if (vswp->recfg_reqd)
429 		vsw_reconfig_hw(vswp);
430 
431 	mutex_exit(&vswp->hw_lock);
432 
433 	if (vsw_port_delete(port)) {
434 		return (1);
435 	}
436 
437 	D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance);
438 	return (0);
439 }
440 
441 /*
442  * Detach all active ports.
443  *
444  * Returns 0 on success, 1 on failure.
445  */
446 int
447 vsw_detach_ports(vsw_t *vswp)
448 {
449 	vsw_port_list_t 	*plist = &vswp->plist;
450 	vsw_port_t		*port = NULL;
451 
452 	D1(vswp, "%s: enter", __func__);
453 
454 	WRITE_ENTER(&plist->lockrw);
455 
456 	while ((port = plist->head) != NULL) {
457 		if (vsw_plist_del_node(vswp, port)) {
458 			DERR(vswp, "%s: Error deleting port %d"
459 			    " from port list", __func__, port->p_instance);
460 			RW_EXIT(&plist->lockrw);
461 			return (1);
462 		}
463 
464 		/* Remove address if was programmed into HW. */
465 		mutex_enter(&vswp->hw_lock);
466 		(void) vsw_unset_hw(vswp, port, VSW_VNETPORT);
467 		mutex_exit(&vswp->hw_lock);
468 
469 		/* Remove the fdb entry for this port/mac address */
470 		(void) vsw_del_fdb(vswp, port);
471 
472 		/* Remove any multicast addresses.. */
473 		vsw_del_mcst_port(port);
474 
475 		/*
476 		 * No longer need to hold the lock on the port list
477 		 * now that we have unlinked the target port from the
478 		 * list.
479 		 */
480 		RW_EXIT(&plist->lockrw);
481 		if (vsw_port_delete(port)) {
482 			DERR(vswp, "%s: Error deleting port %d",
483 			    __func__, port->p_instance);
484 			return (1);
485 		}
486 		WRITE_ENTER(&plist->lockrw);
487 	}
488 	RW_EXIT(&plist->lockrw);
489 
490 	D1(vswp, "%s: exit", __func__);
491 
492 	return (0);
493 }
494 
495 /*
496  * Delete the specified port.
497  *
498  * Returns 0 on success, 1 on failure.
499  */
500 static int
501 vsw_port_delete(vsw_port_t *port)
502 {
503 	vsw_ldc_list_t 		*ldcl;
504 	vsw_t			*vswp = port->p_vswp;
505 
506 	D1(vswp, "%s: enter : port id %d", __func__, port->p_instance);
507 
508 	(void) vsw_uninit_ldcs(port);
509 
510 	/*
511 	 * Wait for any pending ctrl msg tasks which reference this
512 	 * port to finish.
513 	 */
514 	if (vsw_drain_port_taskq(port))
515 		return (1);
516 
517 	/*
518 	 * Wait for port reference count to hit zero.
519 	 */
520 	while (port->ref_cnt != 0) {
521 		delay(drv_usectohz(VSW_PORT_REF_DELAY));
522 	}
523 
524 	/*
525 	 * Wait for any active callbacks to finish
526 	 */
527 	if (vsw_drain_ldcs(port))
528 		return (1);
529 
530 	ldcl = &port->p_ldclist;
531 	WRITE_ENTER(&ldcl->lockrw);
532 	while (ldcl->num_ldcs > 0) {
533 		if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {
534 			cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld",
535 			    vswp->instance, ldcl->head->ldc_id);
536 			RW_EXIT(&ldcl->lockrw);
537 			return (1);
538 		}
539 	}
540 	RW_EXIT(&ldcl->lockrw);
541 
542 	rw_destroy(&port->p_ldclist.lockrw);
543 
544 	mutex_destroy(&port->mca_lock);
545 	mutex_destroy(&port->tx_lock);
546 	cv_destroy(&port->state_cv);
547 	mutex_destroy(&port->state_lock);
548 
549 	kmem_free(port, sizeof (vsw_port_t));
550 
551 	D1(vswp, "%s: exit", __func__);
552 
553 	return (0);
554 }
555 
556 /*
557  * Attach a logical domain channel (ldc) under a specified port.
558  *
559  * Returns 0 on success, 1 on failure.
560  */
561 static int
562 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id)
563 {
564 	vsw_t 		*vswp = port->p_vswp;
565 	vsw_ldc_list_t *ldcl = &port->p_ldclist;
566 	vsw_ldc_t 	*ldcp = NULL;
567 	ldc_attr_t 	attr;
568 	ldc_status_t	istatus;
569 	int 		status = DDI_FAILURE;
570 	int		rv;
571 	char		kname[MAXNAMELEN];
572 	enum		{ PROG_init = 0x0, PROG_mblks = 0x1,
573 			    PROG_callback = 0x2, PROG_rx_thread = 0x4,
574 			    PROG_tx_thread = 0x8}
575 			progress;
576 
577 	progress = PROG_init;
578 
579 	D1(vswp, "%s: enter", __func__);
580 
581 	ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP);
582 	if (ldcp == NULL) {
583 		DERR(vswp, "%s: kmem_zalloc failed", __func__);
584 		return (1);
585 	}
586 	ldcp->ldc_id = ldc_id;
587 
588 	/* Allocate pools of receive mblks */
589 	rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS,
590 	    vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3,
591 	    vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3);
592 	if (rv) {
593 		DWARN(vswp, "%s: unable to create free mblk pools for"
594 		    " channel %ld (rv %d)", __func__, ldc_id, rv);
595 		kmem_free(ldcp, sizeof (vsw_ldc_t));
596 		return (1);
597 	}
598 
599 	progress |= PROG_mblks;
600 
601 	mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL);
602 	mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL);
603 	mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL);
604 	mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL);
605 	cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL);
606 	rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL);
607 	rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL);
608 
609 	/* required for handshake with peer */
610 	ldcp->local_session = (uint64_t)ddi_get_lbolt();
611 	ldcp->peer_session = 0;
612 	ldcp->session_status = 0;
613 	ldcp->hss_id = 1;	/* Initial handshake session id */
614 
615 	/* only set for outbound lane, inbound set by peer */
616 	vsw_set_lane_attr(vswp, &ldcp->lane_out);
617 
618 	attr.devclass = LDC_DEV_NT_SVC;
619 	attr.instance = ddi_get_instance(vswp->dip);
620 	attr.mode = LDC_MODE_UNRELIABLE;
621 	attr.mtu = VSW_LDC_MTU;
622 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
623 	if (status != 0) {
624 		DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)",
625 		    __func__, ldc_id, status);
626 		goto ldc_attach_fail;
627 	}
628 
629 	if (vsw_ldc_rxthr_enabled) {
630 		ldcp->rx_thr_flags = 0;
631 
632 		mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL);
633 		cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL);
634 		ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
635 		    vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
636 
637 		progress |= PROG_rx_thread;
638 		if (ldcp->rx_thread == NULL) {
639 			DWARN(vswp, "%s(%lld): Failed to create worker thread",
640 			    __func__, ldc_id);
641 			goto ldc_attach_fail;
642 		}
643 	}
644 
645 	if (vsw_ldc_txthr_enabled) {
646 		ldcp->tx_thr_flags = 0;
647 		ldcp->tx_mhead = ldcp->tx_mtail = NULL;
648 
649 		mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL);
650 		cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL);
651 		ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
652 		    vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri);
653 
654 		progress |= PROG_tx_thread;
655 		if (ldcp->tx_thread == NULL) {
656 			DWARN(vswp, "%s(%lld): Failed to create worker thread",
657 			    __func__, ldc_id);
658 			goto ldc_attach_fail;
659 		}
660 	}
661 
662 	status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp);
663 	if (status != 0) {
664 		DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)",
665 		    __func__, ldc_id, status);
666 		(void) ldc_fini(ldcp->ldc_handle);
667 		goto ldc_attach_fail;
668 	}
669 	/*
670 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
671 	 * data msgs, including raw data msgs used to recv priority frames.
672 	 */
673 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + ETHERMAX;
674 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
675 
676 	progress |= PROG_callback;
677 
678 	mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL);
679 
680 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
681 		DERR(vswp, "%s: ldc_status failed", __func__);
682 		mutex_destroy(&ldcp->status_lock);
683 		goto ldc_attach_fail;
684 	}
685 
686 	ldcp->ldc_status = istatus;
687 	ldcp->ldc_port = port;
688 	ldcp->ldc_vswp = vswp;
689 
690 	vsw_reset_vnet_proto_ops(ldcp);
691 
692 	(void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id);
693 	ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance,
694 	    kname, &ldcp->ldc_stats);
695 	if (ldcp->ksp == NULL) {
696 		DERR(vswp, "%s: kstats setup failed", __func__);
697 		goto ldc_attach_fail;
698 	}
699 
700 	/* link it into the list of channels for this port */
701 	WRITE_ENTER(&ldcl->lockrw);
702 	ldcp->ldc_next = ldcl->head;
703 	ldcl->head = ldcp;
704 	ldcl->num_ldcs++;
705 	RW_EXIT(&ldcl->lockrw);
706 
707 	D1(vswp, "%s: exit", __func__);
708 	return (0);
709 
710 ldc_attach_fail:
711 
712 	if (progress & PROG_callback) {
713 		(void) ldc_unreg_callback(ldcp->ldc_handle);
714 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
715 	}
716 
717 	if (progress & PROG_rx_thread) {
718 		if (ldcp->rx_thread != NULL) {
719 			vsw_stop_rx_thread(ldcp);
720 		}
721 		mutex_destroy(&ldcp->rx_thr_lock);
722 		cv_destroy(&ldcp->rx_thr_cv);
723 	}
724 
725 	if (progress & PROG_tx_thread) {
726 		if (ldcp->tx_thread != NULL) {
727 			vsw_stop_tx_thread(ldcp);
728 		}
729 		mutex_destroy(&ldcp->tx_thr_lock);
730 		cv_destroy(&ldcp->tx_thr_cv);
731 	}
732 	if (ldcp->ksp != NULL) {
733 		vgen_destroy_kstats(ldcp->ksp);
734 	}
735 	mutex_destroy(&ldcp->ldc_txlock);
736 	mutex_destroy(&ldcp->ldc_rxlock);
737 	mutex_destroy(&ldcp->ldc_cblock);
738 	mutex_destroy(&ldcp->drain_cv_lock);
739 
740 	cv_destroy(&ldcp->drain_cv);
741 
742 	rw_destroy(&ldcp->lane_in.dlistrw);
743 	rw_destroy(&ldcp->lane_out.dlistrw);
744 
745 	if (progress & PROG_mblks) {
746 		vio_destroy_multipools(&ldcp->vmp, &vswp->rxh);
747 	}
748 	kmem_free(ldcp, sizeof (vsw_ldc_t));
749 
750 	return (1);
751 }
752 
753 /*
754  * Detach a logical domain channel (ldc) belonging to a
755  * particular port.
756  *
757  * Returns 0 on success, 1 on failure.
758  */
759 static int
760 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id)
761 {
762 	vsw_t 		*vswp = port->p_vswp;
763 	vsw_ldc_t 	*ldcp, *prev_ldcp;
764 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
765 	int 		rv;
766 
767 	prev_ldcp = ldcl->head;
768 	for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) {
769 		if (ldcp->ldc_id == ldc_id) {
770 			break;
771 		}
772 	}
773 
774 	/* specified ldc id not found */
775 	if (ldcp == NULL) {
776 		DERR(vswp, "%s: ldcp = NULL", __func__);
777 		return (1);
778 	}
779 
780 	D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id);
781 
782 	/* Stop the receive thread */
783 	if (ldcp->rx_thread != NULL) {
784 		vsw_stop_rx_thread(ldcp);
785 		mutex_destroy(&ldcp->rx_thr_lock);
786 		cv_destroy(&ldcp->rx_thr_cv);
787 	}
788 	kmem_free(ldcp->ldcmsg, ldcp->msglen);
789 
790 	/* Stop the tx thread */
791 	if (ldcp->tx_thread != NULL) {
792 		vsw_stop_tx_thread(ldcp);
793 		mutex_destroy(&ldcp->tx_thr_lock);
794 		cv_destroy(&ldcp->tx_thr_cv);
795 		if (ldcp->tx_mhead != NULL) {
796 			freemsgchain(ldcp->tx_mhead);
797 			ldcp->tx_mhead = ldcp->tx_mtail = NULL;
798 			ldcp->tx_cnt = 0;
799 		}
800 	}
801 
802 	/* Destory kstats */
803 	vgen_destroy_kstats(ldcp->ksp);
804 
805 	/*
806 	 * Before we can close the channel we must release any mapped
807 	 * resources (e.g. drings).
808 	 */
809 	vsw_free_lane_resources(ldcp, INBOUND);
810 	vsw_free_lane_resources(ldcp, OUTBOUND);
811 
812 	/*
813 	 * If the close fails we are in serious trouble, as won't
814 	 * be able to delete the parent port.
815 	 */
816 	if ((rv = ldc_close(ldcp->ldc_handle)) != 0) {
817 		DERR(vswp, "%s: error %d closing channel %lld",
818 		    __func__, rv, ldcp->ldc_id);
819 		return (1);
820 	}
821 
822 	(void) ldc_fini(ldcp->ldc_handle);
823 
824 	ldcp->ldc_status = LDC_INIT;
825 	ldcp->ldc_handle = NULL;
826 	ldcp->ldc_vswp = NULL;
827 
828 
829 	/*
830 	 * Most likely some mblks are still in use and
831 	 * have not been returned to the pool. These mblks are
832 	 * added to the pool that is maintained in the device instance.
833 	 * Another attempt will be made to destroy the pool
834 	 * when the device detaches.
835 	 */
836 	vio_destroy_multipools(&ldcp->vmp, &vswp->rxh);
837 
838 	/* unlink it from the list */
839 	prev_ldcp = ldcp->ldc_next;
840 	ldcl->num_ldcs--;
841 
842 	mutex_destroy(&ldcp->ldc_txlock);
843 	mutex_destroy(&ldcp->ldc_rxlock);
844 	mutex_destroy(&ldcp->ldc_cblock);
845 	cv_destroy(&ldcp->drain_cv);
846 	mutex_destroy(&ldcp->drain_cv_lock);
847 	mutex_destroy(&ldcp->status_lock);
848 	rw_destroy(&ldcp->lane_in.dlistrw);
849 	rw_destroy(&ldcp->lane_out.dlistrw);
850 
851 	kmem_free(ldcp, sizeof (vsw_ldc_t));
852 
853 	return (0);
854 }
855 
856 /*
857  * Open and attempt to bring up the channel. Note that channel
858  * can only be brought up if peer has also opened channel.
859  *
860  * Returns 0 if can open and bring up channel, otherwise
861  * returns 1.
862  */
863 static int
864 vsw_ldc_init(vsw_ldc_t *ldcp)
865 {
866 	vsw_t 		*vswp = ldcp->ldc_vswp;
867 	ldc_status_t	istatus = 0;
868 	int		rv;
869 
870 	D1(vswp, "%s: enter", __func__);
871 
872 	LDC_ENTER_LOCK(ldcp);
873 
874 	/* don't start at 0 in case clients don't like that */
875 	ldcp->next_ident = 1;
876 
877 	rv = ldc_open(ldcp->ldc_handle);
878 	if (rv != 0) {
879 		DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)",
880 		    __func__, ldcp->ldc_id, rv);
881 		LDC_EXIT_LOCK(ldcp);
882 		return (1);
883 	}
884 
885 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
886 		DERR(vswp, "%s: unable to get status", __func__);
887 		LDC_EXIT_LOCK(ldcp);
888 		return (1);
889 
890 	} else if (istatus != LDC_OPEN && istatus != LDC_READY) {
891 		DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY",
892 		    __func__, ldcp->ldc_id, istatus);
893 		LDC_EXIT_LOCK(ldcp);
894 		return (1);
895 	}
896 
897 	mutex_enter(&ldcp->status_lock);
898 	ldcp->ldc_status = istatus;
899 	mutex_exit(&ldcp->status_lock);
900 
901 	rv = ldc_up(ldcp->ldc_handle);
902 	if (rv != 0) {
903 		/*
904 		 * Not a fatal error for ldc_up() to fail, as peer
905 		 * end point may simply not be ready yet.
906 		 */
907 		D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__,
908 		    ldcp->ldc_id, rv);
909 		LDC_EXIT_LOCK(ldcp);
910 		return (1);
911 	}
912 
913 	/*
914 	 * ldc_up() call is non-blocking so need to explicitly
915 	 * check channel status to see if in fact the channel
916 	 * is UP.
917 	 */
918 	mutex_enter(&ldcp->status_lock);
919 	if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) {
920 		DERR(vswp, "%s: unable to get status", __func__);
921 		mutex_exit(&ldcp->status_lock);
922 		LDC_EXIT_LOCK(ldcp);
923 		return (1);
924 
925 	}
926 
927 	if (ldcp->ldc_status == LDC_UP) {
928 		D2(vswp, "%s: channel %ld now UP (%ld)", __func__,
929 		    ldcp->ldc_id, istatus);
930 		mutex_exit(&ldcp->status_lock);
931 		LDC_EXIT_LOCK(ldcp);
932 
933 		vsw_process_conn_evt(ldcp, VSW_CONN_UP);
934 		return (0);
935 	}
936 
937 	mutex_exit(&ldcp->status_lock);
938 	LDC_EXIT_LOCK(ldcp);
939 
940 	D1(vswp, "%s: exit", __func__);
941 	return (0);
942 }
943 
944 /* disable callbacks on the channel */
945 static int
946 vsw_ldc_uninit(vsw_ldc_t *ldcp)
947 {
948 	vsw_t	*vswp = ldcp->ldc_vswp;
949 	int	rv;
950 
951 	D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id);
952 
953 	LDC_ENTER_LOCK(ldcp);
954 
955 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
956 	if (rv != 0) {
957 		DERR(vswp, "vsw_ldc_uninit(%lld): error disabling "
958 		    "interrupts (rv = %d)\n", ldcp->ldc_id, rv);
959 		LDC_EXIT_LOCK(ldcp);
960 		return (1);
961 	}
962 
963 	mutex_enter(&ldcp->status_lock);
964 	ldcp->ldc_status = LDC_INIT;
965 	mutex_exit(&ldcp->status_lock);
966 
967 	LDC_EXIT_LOCK(ldcp);
968 
969 	D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id);
970 
971 	return (0);
972 }
973 
974 static int
975 vsw_init_ldcs(vsw_port_t *port)
976 {
977 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
978 	vsw_ldc_t	*ldcp;
979 
980 	READ_ENTER(&ldcl->lockrw);
981 	ldcp =  ldcl->head;
982 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
983 		(void) vsw_ldc_init(ldcp);
984 	}
985 	RW_EXIT(&ldcl->lockrw);
986 
987 	return (0);
988 }
989 
990 static int
991 vsw_uninit_ldcs(vsw_port_t *port)
992 {
993 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
994 	vsw_ldc_t	*ldcp;
995 
996 	D1(NULL, "vsw_uninit_ldcs: enter\n");
997 
998 	READ_ENTER(&ldcl->lockrw);
999 	ldcp =  ldcl->head;
1000 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
1001 		(void) vsw_ldc_uninit(ldcp);
1002 	}
1003 	RW_EXIT(&ldcl->lockrw);
1004 
1005 	D1(NULL, "vsw_uninit_ldcs: exit\n");
1006 
1007 	return (0);
1008 }
1009 
1010 /*
1011  * Wait until the callback(s) associated with the ldcs under the specified
1012  * port have completed.
1013  *
1014  * Prior to this function being invoked each channel under this port
1015  * should have been quiesced via ldc_set_cb_mode(DISABLE).
1016  *
1017  * A short explaination of what we are doing below..
1018  *
1019  * The simplest approach would be to have a reference counter in
1020  * the ldc structure which is increment/decremented by the callbacks as
1021  * they use the channel. The drain function could then simply disable any
1022  * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately
1023  * there is a tiny window here - before the callback is able to get the lock
1024  * on the channel it is interrupted and this function gets to execute. It
1025  * sees that the ref count is zero and believes its free to delete the
1026  * associated data structures.
1027  *
1028  * We get around this by taking advantage of the fact that before the ldc
1029  * framework invokes a callback it sets a flag to indicate that there is a
1030  * callback active (or about to become active). If when we attempt to
1031  * unregister a callback when this active flag is set then the unregister
1032  * will fail with EWOULDBLOCK.
1033  *
1034  * If the unregister fails we do a cv_timedwait. We will either be signaled
1035  * by the callback as it is exiting (note we have to wait a short period to
1036  * allow the callback to return fully to the ldc framework and it to clear
1037  * the active flag), or by the timer expiring. In either case we again attempt
1038  * the unregister. We repeat this until we can succesfully unregister the
1039  * callback.
1040  *
1041  * The reason we use a cv_timedwait rather than a simple cv_wait is to catch
1042  * the case where the callback has finished but the ldc framework has not yet
1043  * cleared the active flag. In this case we would never get a cv_signal.
1044  */
1045 static int
1046 vsw_drain_ldcs(vsw_port_t *port)
1047 {
1048 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
1049 	vsw_ldc_t	*ldcp;
1050 	vsw_t		*vswp = port->p_vswp;
1051 
1052 	D1(vswp, "%s: enter", __func__);
1053 
1054 	READ_ENTER(&ldcl->lockrw);
1055 
1056 	ldcp = ldcl->head;
1057 
1058 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
1059 		/*
1060 		 * If we can unregister the channel callback then we
1061 		 * know that there is no callback either running or
1062 		 * scheduled to run for this channel so move on to next
1063 		 * channel in the list.
1064 		 */
1065 		mutex_enter(&ldcp->drain_cv_lock);
1066 
1067 		/* prompt active callbacks to quit */
1068 		ldcp->drain_state = VSW_LDC_DRAINING;
1069 
1070 		if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) {
1071 			D2(vswp, "%s: unreg callback for chan %ld", __func__,
1072 			    ldcp->ldc_id);
1073 			mutex_exit(&ldcp->drain_cv_lock);
1074 			continue;
1075 		} else {
1076 			/*
1077 			 * If we end up here we know that either 1) a callback
1078 			 * is currently executing, 2) is about to start (i.e.
1079 			 * the ldc framework has set the active flag but
1080 			 * has not actually invoked the callback yet, or 3)
1081 			 * has finished and has returned to the ldc framework
1082 			 * but the ldc framework has not yet cleared the
1083 			 * active bit.
1084 			 *
1085 			 * Wait for it to finish.
1086 			 */
1087 			while (ldc_unreg_callback(ldcp->ldc_handle)
1088 			    == EWOULDBLOCK)
1089 				(void) cv_timedwait(&ldcp->drain_cv,
1090 				    &ldcp->drain_cv_lock, lbolt + hz);
1091 
1092 			mutex_exit(&ldcp->drain_cv_lock);
1093 			D2(vswp, "%s: unreg callback for chan %ld after "
1094 			    "timeout", __func__, ldcp->ldc_id);
1095 		}
1096 	}
1097 	RW_EXIT(&ldcl->lockrw);
1098 
1099 	D1(vswp, "%s: exit", __func__);
1100 	return (0);
1101 }
1102 
1103 /*
1104  * Wait until all tasks which reference this port have completed.
1105  *
1106  * Prior to this function being invoked each channel under this port
1107  * should have been quiesced via ldc_set_cb_mode(DISABLE).
1108  */
1109 static int
1110 vsw_drain_port_taskq(vsw_port_t *port)
1111 {
1112 	vsw_t		*vswp = port->p_vswp;
1113 
1114 	D1(vswp, "%s: enter", __func__);
1115 
1116 	/*
1117 	 * Mark the port as in the process of being detached, and
1118 	 * dispatch a marker task to the queue so we know when all
1119 	 * relevant tasks have completed.
1120 	 */
1121 	mutex_enter(&port->state_lock);
1122 	port->state = VSW_PORT_DETACHING;
1123 
1124 	if ((vswp->taskq_p == NULL) ||
1125 	    (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task,
1126 	    port, DDI_NOSLEEP) != DDI_SUCCESS)) {
1127 		DERR(vswp, "%s: unable to dispatch marker task",
1128 		    __func__);
1129 		mutex_exit(&port->state_lock);
1130 		return (1);
1131 	}
1132 
1133 	/*
1134 	 * Wait for the marker task to finish.
1135 	 */
1136 	while (port->state != VSW_PORT_DETACHABLE)
1137 		cv_wait(&port->state_cv, &port->state_lock);
1138 
1139 	mutex_exit(&port->state_lock);
1140 
1141 	D1(vswp, "%s: exit", __func__);
1142 
1143 	return (0);
1144 }
1145 
1146 static void
1147 vsw_marker_task(void *arg)
1148 {
1149 	vsw_port_t	*port = arg;
1150 	vsw_t		*vswp = port->p_vswp;
1151 
1152 	D1(vswp, "%s: enter", __func__);
1153 
1154 	mutex_enter(&port->state_lock);
1155 
1156 	/*
1157 	 * No further tasks should be dispatched which reference
1158 	 * this port so ok to mark it as safe to detach.
1159 	 */
1160 	port->state = VSW_PORT_DETACHABLE;
1161 
1162 	cv_signal(&port->state_cv);
1163 
1164 	mutex_exit(&port->state_lock);
1165 
1166 	D1(vswp, "%s: exit", __func__);
1167 }
1168 
1169 vsw_port_t *
1170 vsw_lookup_port(vsw_t *vswp, int p_instance)
1171 {
1172 	vsw_port_list_t *plist = &vswp->plist;
1173 	vsw_port_t	*port;
1174 
1175 	for (port = plist->head; port != NULL; port = port->p_next) {
1176 		if (port->p_instance == p_instance) {
1177 			D2(vswp, "vsw_lookup_port: found p_instance\n");
1178 			return (port);
1179 		}
1180 	}
1181 
1182 	return (NULL);
1183 }
1184 
1185 /*
1186  * Search for and remove the specified port from the port
1187  * list. Returns 0 if able to locate and remove port, otherwise
1188  * returns 1.
1189  */
1190 static int
1191 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port)
1192 {
1193 	vsw_port_list_t *plist = &vswp->plist;
1194 	vsw_port_t	*curr_p, *prev_p;
1195 
1196 	if (plist->head == NULL)
1197 		return (1);
1198 
1199 	curr_p = prev_p = plist->head;
1200 
1201 	while (curr_p != NULL) {
1202 		if (curr_p == port) {
1203 			if (prev_p == curr_p) {
1204 				plist->head = curr_p->p_next;
1205 			} else {
1206 				prev_p->p_next = curr_p->p_next;
1207 			}
1208 			plist->num_ports--;
1209 			break;
1210 		} else {
1211 			prev_p = curr_p;
1212 			curr_p = curr_p->p_next;
1213 		}
1214 	}
1215 	return (0);
1216 }
1217 
1218 /*
1219  * Interrupt handler for ldc messages.
1220  */
1221 static uint_t
1222 vsw_ldc_cb(uint64_t event, caddr_t arg)
1223 {
1224 	vsw_ldc_t	*ldcp = (vsw_ldc_t  *)arg;
1225 	vsw_t 		*vswp = ldcp->ldc_vswp;
1226 
1227 	D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
1228 
1229 	mutex_enter(&ldcp->ldc_cblock);
1230 	ldcp->ldc_stats.callbacks++;
1231 
1232 	mutex_enter(&ldcp->status_lock);
1233 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
1234 		mutex_exit(&ldcp->status_lock);
1235 		mutex_exit(&ldcp->ldc_cblock);
1236 		return (LDC_SUCCESS);
1237 	}
1238 	mutex_exit(&ldcp->status_lock);
1239 
1240 	if (event & LDC_EVT_UP) {
1241 		/*
1242 		 * Channel has come up.
1243 		 */
1244 		D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)",
1245 		    __func__, ldcp->ldc_id, event, ldcp->ldc_status);
1246 
1247 		vsw_process_conn_evt(ldcp, VSW_CONN_UP);
1248 
1249 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
1250 	}
1251 
1252 	if (event & LDC_EVT_READ) {
1253 		/*
1254 		 * Data available for reading.
1255 		 */
1256 		D2(vswp, "%s: id(ld) event(%llx) data READ",
1257 		    __func__, ldcp->ldc_id, event);
1258 
1259 		if (ldcp->rx_thread != NULL) {
1260 			/*
1261 			 * If the receive thread is enabled, then
1262 			 * wakeup the receive thread to process the
1263 			 * LDC messages.
1264 			 */
1265 			mutex_exit(&ldcp->ldc_cblock);
1266 			mutex_enter(&ldcp->rx_thr_lock);
1267 			if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) {
1268 				ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD;
1269 				cv_signal(&ldcp->rx_thr_cv);
1270 			}
1271 			mutex_exit(&ldcp->rx_thr_lock);
1272 			mutex_enter(&ldcp->ldc_cblock);
1273 		} else {
1274 			vsw_process_pkt(ldcp);
1275 		}
1276 
1277 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
1278 
1279 		goto vsw_cb_exit;
1280 	}
1281 
1282 	if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) {
1283 		D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)",
1284 		    __func__, ldcp->ldc_id, event, ldcp->ldc_status);
1285 
1286 		vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
1287 	}
1288 
1289 	/*
1290 	 * Catch either LDC_EVT_WRITE which we don't support or any
1291 	 * unknown event.
1292 	 */
1293 	if (event &
1294 	    ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) {
1295 		DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)",
1296 		    __func__, ldcp->ldc_id, event, ldcp->ldc_status);
1297 	}
1298 
1299 vsw_cb_exit:
1300 	mutex_exit(&ldcp->ldc_cblock);
1301 
1302 	/*
1303 	 * Let the drain function know we are finishing if it
1304 	 * is waiting.
1305 	 */
1306 	mutex_enter(&ldcp->drain_cv_lock);
1307 	if (ldcp->drain_state == VSW_LDC_DRAINING)
1308 		cv_signal(&ldcp->drain_cv);
1309 	mutex_exit(&ldcp->drain_cv_lock);
1310 
1311 	return (LDC_SUCCESS);
1312 }
1313 
1314 /*
1315  * Reinitialise data structures associated with the channel.
1316  */
1317 static void
1318 vsw_ldc_reinit(vsw_ldc_t *ldcp)
1319 {
1320 	vsw_t		*vswp = ldcp->ldc_vswp;
1321 	vsw_port_t	*port;
1322 	vsw_ldc_list_t	*ldcl;
1323 
1324 	D1(vswp, "%s: enter", __func__);
1325 
1326 	port = ldcp->ldc_port;
1327 	ldcl = &port->p_ldclist;
1328 
1329 	READ_ENTER(&ldcl->lockrw);
1330 
1331 	D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__,
1332 	    ldcp->lane_in.lstate, ldcp->lane_out.lstate);
1333 
1334 	vsw_free_lane_resources(ldcp, INBOUND);
1335 	vsw_free_lane_resources(ldcp, OUTBOUND);
1336 	RW_EXIT(&ldcl->lockrw);
1337 
1338 	ldcp->lane_in.lstate = 0;
1339 	ldcp->lane_out.lstate = 0;
1340 
1341 	/*
1342 	 * Remove parent port from any multicast groups
1343 	 * it may have registered with. Client must resend
1344 	 * multicast add command after handshake completes.
1345 	 */
1346 	(void) vsw_del_fdb(vswp, port);
1347 
1348 	vsw_del_mcst_port(port);
1349 
1350 	ldcp->peer_session = 0;
1351 	ldcp->session_status = 0;
1352 	ldcp->hcnt = 0;
1353 	ldcp->hphase = VSW_MILESTONE0;
1354 
1355 	vsw_reset_vnet_proto_ops(ldcp);
1356 
1357 	D1(vswp, "%s: exit", __func__);
1358 }
1359 
1360 /*
1361  * Process a connection event.
1362  *
1363  * Note - care must be taken to ensure that this function is
1364  * not called with the dlistrw lock held.
1365  */
1366 static void
1367 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt)
1368 {
1369 	vsw_t		*vswp = ldcp->ldc_vswp;
1370 	vsw_conn_evt_t	*conn = NULL;
1371 
1372 	D1(vswp, "%s: enter", __func__);
1373 
1374 	/*
1375 	 * Check if either a reset or restart event is pending
1376 	 * or in progress. If so just return.
1377 	 *
1378 	 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT
1379 	 * being received by the callback handler, or a ECONNRESET error
1380 	 * code being returned from a ldc_read() or ldc_write() call.
1381 	 *
1382 	 * A VSW_CONN_RESTART event occurs when some error checking code
1383 	 * decides that there is a problem with data from the channel,
1384 	 * and that the handshake should be restarted.
1385 	 */
1386 	if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) &&
1387 	    (ldstub((uint8_t *)&ldcp->reset_active)))
1388 		return;
1389 
1390 	/*
1391 	 * If it is an LDC_UP event we first check the recorded
1392 	 * state of the channel. If this is UP then we know that
1393 	 * the channel moving to the UP state has already been dealt
1394 	 * with and don't need to dispatch a  new task.
1395 	 *
1396 	 * The reason for this check is that when we do a ldc_up(),
1397 	 * depending on the state of the peer, we may or may not get
1398 	 * a LDC_UP event. As we can't depend on getting a LDC_UP evt
1399 	 * every time we do ldc_up() we explicitly check the channel
1400 	 * status to see has it come up (ldc_up() is asynch and will
1401 	 * complete at some undefined time), and take the appropriate
1402 	 * action.
1403 	 *
1404 	 * The flip side of this is that we may get a LDC_UP event
1405 	 * when we have already seen that the channel is up and have
1406 	 * dealt with that.
1407 	 */
1408 	mutex_enter(&ldcp->status_lock);
1409 	if (evt == VSW_CONN_UP) {
1410 		if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) {
1411 			mutex_exit(&ldcp->status_lock);
1412 			return;
1413 		}
1414 	}
1415 	mutex_exit(&ldcp->status_lock);
1416 
1417 	/*
1418 	 * The transaction group id allows us to identify and discard
1419 	 * any tasks which are still pending on the taskq and refer
1420 	 * to the handshake session we are about to restart or reset.
1421 	 * These stale messages no longer have any real meaning.
1422 	 */
1423 	(void) atomic_inc_32(&ldcp->hss_id);
1424 
1425 	ASSERT(vswp->taskq_p != NULL);
1426 
1427 	if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) {
1428 		cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for"
1429 		    " connection event", vswp->instance);
1430 		goto err_exit;
1431 	}
1432 
1433 	conn->evt = evt;
1434 	conn->ldcp = ldcp;
1435 
1436 	if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn,
1437 	    DDI_NOSLEEP) != DDI_SUCCESS) {
1438 		cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task",
1439 		    vswp->instance);
1440 
1441 		kmem_free(conn, sizeof (vsw_conn_evt_t));
1442 		goto err_exit;
1443 	}
1444 
1445 	D1(vswp, "%s: exit", __func__);
1446 	return;
1447 
1448 err_exit:
1449 	/*
1450 	 * Have mostly likely failed due to memory shortage. Clear the flag so
1451 	 * that future requests will at least be attempted and will hopefully
1452 	 * succeed.
1453 	 */
1454 	if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART))
1455 		ldcp->reset_active = 0;
1456 }
1457 
1458 /*
1459  * Deal with events relating to a connection. Invoked from a taskq.
1460  */
1461 static void
1462 vsw_conn_task(void *arg)
1463 {
1464 	vsw_conn_evt_t	*conn = (vsw_conn_evt_t *)arg;
1465 	vsw_ldc_t	*ldcp = NULL;
1466 	vsw_t		*vswp = NULL;
1467 	uint16_t	evt;
1468 	ldc_status_t	curr_status;
1469 
1470 	ldcp = conn->ldcp;
1471 	evt = conn->evt;
1472 	vswp = ldcp->ldc_vswp;
1473 
1474 	D1(vswp, "%s: enter", __func__);
1475 
1476 	/* can safely free now have copied out data */
1477 	kmem_free(conn, sizeof (vsw_conn_evt_t));
1478 
1479 	mutex_enter(&ldcp->status_lock);
1480 	if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) {
1481 		cmn_err(CE_WARN, "!vsw%d: Unable to read status of "
1482 		    "channel %ld", vswp->instance, ldcp->ldc_id);
1483 		mutex_exit(&ldcp->status_lock);
1484 		return;
1485 	}
1486 
1487 	/*
1488 	 * If we wish to restart the handshake on this channel, then if
1489 	 * the channel is UP we bring it DOWN to flush the underlying
1490 	 * ldc queue.
1491 	 */
1492 	if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP))
1493 		(void) ldc_down(ldcp->ldc_handle);
1494 
1495 	/*
1496 	 * re-init all the associated data structures.
1497 	 */
1498 	vsw_ldc_reinit(ldcp);
1499 
1500 	/*
1501 	 * Bring the channel back up (note it does no harm to
1502 	 * do this even if the channel is already UP, Just
1503 	 * becomes effectively a no-op).
1504 	 */
1505 	(void) ldc_up(ldcp->ldc_handle);
1506 
1507 	/*
1508 	 * Check if channel is now UP. This will only happen if
1509 	 * peer has also done a ldc_up().
1510 	 */
1511 	if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) {
1512 		cmn_err(CE_WARN, "!vsw%d: Unable to read status of "
1513 		    "channel %ld", vswp->instance, ldcp->ldc_id);
1514 		mutex_exit(&ldcp->status_lock);
1515 		return;
1516 	}
1517 
1518 	ldcp->ldc_status = curr_status;
1519 
1520 	/* channel UP so restart handshake by sending version info */
1521 	if (curr_status == LDC_UP) {
1522 		if (ldcp->hcnt++ > vsw_num_handshakes) {
1523 			cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted"
1524 			    " handshake attempts (%d) on channel %ld",
1525 			    vswp->instance, ldcp->hcnt, ldcp->ldc_id);
1526 			mutex_exit(&ldcp->status_lock);
1527 			return;
1528 		}
1529 
1530 		if (vsw_obp_ver_proto_workaround == B_FALSE &&
1531 		    (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp,
1532 		    DDI_NOSLEEP) != DDI_SUCCESS)) {
1533 			cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task",
1534 			    vswp->instance);
1535 
1536 			/*
1537 			 * Don't count as valid restart attempt if couldn't
1538 			 * send version msg.
1539 			 */
1540 			if (ldcp->hcnt > 0)
1541 				ldcp->hcnt--;
1542 		}
1543 	}
1544 
1545 	/*
1546 	 * Mark that the process is complete by clearing the flag.
1547 	 *
1548 	 * Note is it possible that the taskq dispatch above may have failed,
1549 	 * most likely due to memory shortage. We still clear the flag so
1550 	 * future attempts will at least be attempted and will hopefully
1551 	 * succeed.
1552 	 */
1553 	if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART))
1554 		ldcp->reset_active = 0;
1555 
1556 	mutex_exit(&ldcp->status_lock);
1557 
1558 	D1(vswp, "%s: exit", __func__);
1559 }
1560 
1561 /*
1562  * returns 0 if legal for event signified by flag to have
1563  * occured at the time it did. Otherwise returns 1.
1564  */
1565 int
1566 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag)
1567 {
1568 	vsw_t		*vswp = ldcp->ldc_vswp;
1569 	uint64_t	state;
1570 	uint64_t	phase;
1571 
1572 	if (dir == INBOUND)
1573 		state = ldcp->lane_in.lstate;
1574 	else
1575 		state = ldcp->lane_out.lstate;
1576 
1577 	phase = ldcp->hphase;
1578 
1579 	switch (flag) {
1580 	case VSW_VER_INFO_RECV:
1581 		if (phase > VSW_MILESTONE0) {
1582 			DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV"
1583 			    " when in state %d\n", ldcp->ldc_id, phase);
1584 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1585 			return (1);
1586 		}
1587 		break;
1588 
1589 	case VSW_VER_ACK_RECV:
1590 	case VSW_VER_NACK_RECV:
1591 		if (!(state & VSW_VER_INFO_SENT)) {
1592 			DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or "
1593 			    "VER_NACK when in state %d\n", ldcp->ldc_id, phase);
1594 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1595 			return (1);
1596 		} else
1597 			state &= ~VSW_VER_INFO_SENT;
1598 		break;
1599 
1600 	case VSW_ATTR_INFO_RECV:
1601 		if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) {
1602 			DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV"
1603 			    " when in state %d\n", ldcp->ldc_id, phase);
1604 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1605 			return (1);
1606 		}
1607 		break;
1608 
1609 	case VSW_ATTR_ACK_RECV:
1610 	case VSW_ATTR_NACK_RECV:
1611 		if (!(state & VSW_ATTR_INFO_SENT)) {
1612 			DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK"
1613 			    " or ATTR_NACK when in state %d\n",
1614 			    ldcp->ldc_id, phase);
1615 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1616 			return (1);
1617 		} else
1618 			state &= ~VSW_ATTR_INFO_SENT;
1619 		break;
1620 
1621 	case VSW_DRING_INFO_RECV:
1622 		if (phase < VSW_MILESTONE1) {
1623 			DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV"
1624 			    " when in state %d\n", ldcp->ldc_id, phase);
1625 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1626 			return (1);
1627 		}
1628 		break;
1629 
1630 	case VSW_DRING_ACK_RECV:
1631 	case VSW_DRING_NACK_RECV:
1632 		if (!(state & VSW_DRING_INFO_SENT)) {
1633 			DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK "
1634 			    " or DRING_NACK when in state %d\n",
1635 			    ldcp->ldc_id, phase);
1636 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1637 			return (1);
1638 		} else
1639 			state &= ~VSW_DRING_INFO_SENT;
1640 		break;
1641 
1642 	case VSW_RDX_INFO_RECV:
1643 		if (phase < VSW_MILESTONE3) {
1644 			DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV"
1645 			    " when in state %d\n", ldcp->ldc_id, phase);
1646 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1647 			return (1);
1648 		}
1649 		break;
1650 
1651 	case VSW_RDX_ACK_RECV:
1652 	case VSW_RDX_NACK_RECV:
1653 		if (!(state & VSW_RDX_INFO_SENT)) {
1654 			DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or "
1655 			    "RDX_NACK when in state %d\n", ldcp->ldc_id, phase);
1656 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1657 			return (1);
1658 		} else
1659 			state &= ~VSW_RDX_INFO_SENT;
1660 		break;
1661 
1662 	case VSW_MCST_INFO_RECV:
1663 		if (phase < VSW_MILESTONE3) {
1664 			DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV"
1665 			    " when in state %d\n", ldcp->ldc_id, phase);
1666 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1667 			return (1);
1668 		}
1669 		break;
1670 
1671 	default:
1672 		DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)",
1673 		    ldcp->ldc_id, flag);
1674 		return (1);
1675 	}
1676 
1677 	if (dir == INBOUND)
1678 		ldcp->lane_in.lstate = state;
1679 	else
1680 		ldcp->lane_out.lstate = state;
1681 
1682 	D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id);
1683 
1684 	return (0);
1685 }
1686 
1687 void
1688 vsw_next_milestone(vsw_ldc_t *ldcp)
1689 {
1690 	vsw_t		*vswp = ldcp->ldc_vswp;
1691 
1692 	D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__,
1693 	    ldcp->ldc_id, ldcp->hphase);
1694 
1695 	DUMP_FLAGS(ldcp->lane_in.lstate);
1696 	DUMP_FLAGS(ldcp->lane_out.lstate);
1697 
1698 	switch (ldcp->hphase) {
1699 
1700 	case VSW_MILESTONE0:
1701 		/*
1702 		 * If we haven't started to handshake with our peer,
1703 		 * start to do so now.
1704 		 */
1705 		if (ldcp->lane_out.lstate == 0) {
1706 			D2(vswp, "%s: (chan %lld) starting handshake "
1707 			    "with peer", __func__, ldcp->ldc_id);
1708 			vsw_process_conn_evt(ldcp, VSW_CONN_UP);
1709 		}
1710 
1711 		/*
1712 		 * Only way to pass this milestone is to have successfully
1713 		 * negotiated version info.
1714 		 */
1715 		if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) &&
1716 		    (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) {
1717 
1718 			D2(vswp, "%s: (chan %lld) leaving milestone 0",
1719 			    __func__, ldcp->ldc_id);
1720 
1721 			vsw_set_vnet_proto_ops(ldcp);
1722 
1723 			/*
1724 			 * Next milestone is passed when attribute
1725 			 * information has been successfully exchanged.
1726 			 */
1727 			ldcp->hphase = VSW_MILESTONE1;
1728 			vsw_send_attr(ldcp);
1729 
1730 		}
1731 		break;
1732 
1733 	case VSW_MILESTONE1:
1734 		/*
1735 		 * Only way to pass this milestone is to have successfully
1736 		 * negotiated attribute information.
1737 		 */
1738 		if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) {
1739 
1740 			ldcp->hphase = VSW_MILESTONE2;
1741 
1742 			/*
1743 			 * If the peer device has said it wishes to
1744 			 * use descriptor rings then we send it our ring
1745 			 * info, otherwise we just set up a private ring
1746 			 * which we use an internal buffer
1747 			 */
1748 			if ((VSW_VER_EQ(ldcp, 1, 2) &&
1749 			    (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) ||
1750 			    (VSW_VER_LT(ldcp, 1, 2) &&
1751 			    (ldcp->lane_in.xfer_mode ==
1752 			    VIO_DRING_MODE_V1_0))) {
1753 				vsw_send_dring_info(ldcp);
1754 			}
1755 		}
1756 		break;
1757 
1758 	case VSW_MILESTONE2:
1759 		/*
1760 		 * If peer has indicated in its attribute message that
1761 		 * it wishes to use descriptor rings then the only way
1762 		 * to pass this milestone is for us to have received
1763 		 * valid dring info.
1764 		 *
1765 		 * If peer is not using descriptor rings then just fall
1766 		 * through.
1767 		 */
1768 		if ((VSW_VER_EQ(ldcp, 1, 2) &&
1769 		    (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) ||
1770 		    (VSW_VER_LT(ldcp, 1, 2) &&
1771 		    (ldcp->lane_in.xfer_mode ==
1772 		    VIO_DRING_MODE_V1_0))) {
1773 			if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))
1774 				break;
1775 		}
1776 
1777 		D2(vswp, "%s: (chan %lld) leaving milestone 2",
1778 		    __func__, ldcp->ldc_id);
1779 
1780 		ldcp->hphase = VSW_MILESTONE3;
1781 		vsw_send_rdx(ldcp);
1782 		break;
1783 
1784 	case VSW_MILESTONE3:
1785 		/*
1786 		 * Pass this milestone when all paramaters have been
1787 		 * successfully exchanged and RDX sent in both directions.
1788 		 *
1789 		 * Mark outbound lane as available to transmit data.
1790 		 */
1791 		if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) &&
1792 		    (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) {
1793 
1794 			D2(vswp, "%s: (chan %lld) leaving milestone 3",
1795 			    __func__, ldcp->ldc_id);
1796 			D2(vswp, "%s: ** handshake complete (0x%llx : "
1797 			    "0x%llx) **", __func__, ldcp->lane_in.lstate,
1798 			    ldcp->lane_out.lstate);
1799 			ldcp->lane_out.lstate |= VSW_LANE_ACTIVE;
1800 			ldcp->hphase = VSW_MILESTONE4;
1801 			ldcp->hcnt = 0;
1802 			DISPLAY_STATE();
1803 		} else {
1804 			D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)",
1805 			    __func__, ldcp->lane_in.lstate,
1806 			    ldcp->lane_out.lstate);
1807 		}
1808 		break;
1809 
1810 	case VSW_MILESTONE4:
1811 		D2(vswp, "%s: (chan %lld) in milestone 4", __func__,
1812 		    ldcp->ldc_id);
1813 		break;
1814 
1815 	default:
1816 		DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__,
1817 		    ldcp->ldc_id, ldcp->hphase);
1818 	}
1819 
1820 	D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id,
1821 	    ldcp->hphase);
1822 }
1823 
1824 /*
1825  * Check if major version is supported.
1826  *
1827  * Returns 0 if finds supported major number, and if necessary
1828  * adjusts the minor field.
1829  *
1830  * Returns 1 if can't match major number exactly. Sets mjor/minor
1831  * to next lowest support values, or to zero if no other values possible.
1832  */
1833 static int
1834 vsw_supported_version(vio_ver_msg_t *vp)
1835 {
1836 	int	i;
1837 
1838 	D1(NULL, "vsw_supported_version: enter");
1839 
1840 	for (i = 0; i < VSW_NUM_VER; i++) {
1841 		if (vsw_versions[i].ver_major == vp->ver_major) {
1842 			/*
1843 			 * Matching or lower major version found. Update
1844 			 * minor number if necessary.
1845 			 */
1846 			if (vp->ver_minor > vsw_versions[i].ver_minor) {
1847 				D2(NULL, "%s: adjusting minor value from %d "
1848 				    "to %d", __func__, vp->ver_minor,
1849 				    vsw_versions[i].ver_minor);
1850 				vp->ver_minor = vsw_versions[i].ver_minor;
1851 			}
1852 
1853 			return (0);
1854 		}
1855 
1856 		/*
1857 		 * If the message contains a higher major version number, set
1858 		 * the message's major/minor versions to the current values
1859 		 * and return false, so this message will get resent with
1860 		 * these values.
1861 		 */
1862 		if (vsw_versions[i].ver_major < vp->ver_major) {
1863 			D2(NULL, "%s: adjusting major and minor "
1864 			    "values to %d, %d\n",
1865 			    __func__, vsw_versions[i].ver_major,
1866 			    vsw_versions[i].ver_minor);
1867 			vp->ver_major = vsw_versions[i].ver_major;
1868 			vp->ver_minor = vsw_versions[i].ver_minor;
1869 			return (1);
1870 		}
1871 	}
1872 
1873 	/* No match was possible, zero out fields */
1874 	vp->ver_major = 0;
1875 	vp->ver_minor = 0;
1876 
1877 	D1(NULL, "vsw_supported_version: exit");
1878 
1879 	return (1);
1880 }
1881 
1882 /*
1883  * Set vnet-protocol-version dependent functions based on version.
1884  */
1885 static void
1886 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp)
1887 {
1888 	vsw_t	*vswp = ldcp->ldc_vswp;
1889 	lane_t	*lp = &ldcp->lane_out;
1890 
1891 	if (VSW_VER_EQ(ldcp, 1, 2)) {
1892 		/* Version 1.2 */
1893 
1894 		if (VSW_PRI_ETH_DEFINED(vswp)) {
1895 			/*
1896 			 * enable priority routines and pkt mode only if
1897 			 * at least one pri-eth-type is specified in MD.
1898 			 */
1899 			ldcp->tx = vsw_ldctx_pri;
1900 			ldcp->rx_pktdata = vsw_process_pkt_data;
1901 
1902 			/* set xfer mode for vsw_send_attr() */
1903 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
1904 		} else {
1905 			/* no priority eth types defined in MD */
1906 
1907 			ldcp->tx = vsw_ldctx;
1908 			ldcp->rx_pktdata = vsw_process_pkt_data_nop;
1909 
1910 			/* set xfer mode for vsw_send_attr() */
1911 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
1912 
1913 		}
1914 	} else {
1915 		/* Versions prior to 1.2  */
1916 
1917 		vsw_reset_vnet_proto_ops(ldcp);
1918 	}
1919 }
1920 
1921 /*
1922  * Reset vnet-protocol-version dependent functions to v1.0.
1923  */
1924 static void
1925 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp)
1926 {
1927 	lane_t	*lp = &ldcp->lane_out;
1928 
1929 	ldcp->tx = vsw_ldctx;
1930 	ldcp->rx_pktdata = vsw_process_pkt_data_nop;
1931 
1932 	/* set xfer mode for vsw_send_attr() */
1933 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
1934 }
1935 
1936 /*
1937  * Main routine for processing messages received over LDC.
1938  */
1939 static void
1940 vsw_process_pkt(void *arg)
1941 {
1942 	vsw_ldc_t	*ldcp = (vsw_ldc_t  *)arg;
1943 	vsw_t 		*vswp = ldcp->ldc_vswp;
1944 	size_t		msglen;
1945 	vio_msg_tag_t	*tagp;
1946 	uint64_t	*ldcmsg;
1947 	int 		rv = 0;
1948 
1949 
1950 	D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
1951 
1952 	ASSERT(MUTEX_HELD(&ldcp->ldc_cblock));
1953 
1954 	ldcmsg = ldcp->ldcmsg;
1955 	/*
1956 	 * If channel is up read messages until channel is empty.
1957 	 */
1958 	do {
1959 		msglen = ldcp->msglen;
1960 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
1961 
1962 		if (rv != 0) {
1963 			DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n",
1964 			    __func__, ldcp->ldc_id, rv, msglen);
1965 		}
1966 
1967 		/* channel has been reset */
1968 		if (rv == ECONNRESET) {
1969 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
1970 			break;
1971 		}
1972 
1973 		if (msglen == 0) {
1974 			D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__,
1975 			    ldcp->ldc_id);
1976 			break;
1977 		}
1978 
1979 		D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__,
1980 		    ldcp->ldc_id, msglen);
1981 
1982 		/*
1983 		 * Figure out what sort of packet we have gotten by
1984 		 * examining the msg tag, and then switch it appropriately.
1985 		 */
1986 		tagp = (vio_msg_tag_t *)ldcmsg;
1987 
1988 		switch (tagp->vio_msgtype) {
1989 		case VIO_TYPE_CTRL:
1990 			vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp);
1991 			break;
1992 		case VIO_TYPE_DATA:
1993 			vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen);
1994 			break;
1995 		case VIO_TYPE_ERR:
1996 			vsw_process_err_pkt(ldcp, ldcmsg, tagp);
1997 			break;
1998 		default:
1999 			DERR(vswp, "%s: Unknown tag(%lx) ", __func__,
2000 			    "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id);
2001 			break;
2002 		}
2003 	} while (msglen);
2004 
2005 	D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id);
2006 }
2007 
2008 /*
2009  * Dispatch a task to process a VIO control message.
2010  */
2011 static void
2012 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp)
2013 {
2014 	vsw_ctrl_task_t		*ctaskp = NULL;
2015 	vsw_port_t		*port = ldcp->ldc_port;
2016 	vsw_t			*vswp = port->p_vswp;
2017 
2018 	D1(vswp, "%s: enter", __func__);
2019 
2020 	/*
2021 	 * We need to handle RDX ACK messages in-band as once they
2022 	 * are exchanged it is possible that we will get an
2023 	 * immediate (legitimate) data packet.
2024 	 */
2025 	if ((tagp->vio_subtype_env == VIO_RDX) &&
2026 	    (tagp->vio_subtype == VIO_SUBTYPE_ACK)) {
2027 
2028 		if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV))
2029 			return;
2030 
2031 		ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV;
2032 		D2(vswp, "%s (%ld) handling RDX_ACK in place "
2033 		    "(ostate 0x%llx : hphase %d)", __func__,
2034 		    ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase);
2035 		vsw_next_milestone(ldcp);
2036 		return;
2037 	}
2038 
2039 	ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP);
2040 
2041 	if (ctaskp == NULL) {
2042 		DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__);
2043 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
2044 		return;
2045 	}
2046 
2047 	ctaskp->ldcp = ldcp;
2048 	bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t));
2049 	ctaskp->hss_id = ldcp->hss_id;
2050 
2051 	/*
2052 	 * Dispatch task to processing taskq if port is not in
2053 	 * the process of being detached.
2054 	 */
2055 	mutex_enter(&port->state_lock);
2056 	if (port->state == VSW_PORT_INIT) {
2057 		if ((vswp->taskq_p == NULL) ||
2058 		    (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt,
2059 		    ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) {
2060 			DERR(vswp, "%s: unable to dispatch task to taskq",
2061 			    __func__);
2062 			kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
2063 			mutex_exit(&port->state_lock);
2064 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
2065 			return;
2066 		}
2067 	} else {
2068 		DWARN(vswp, "%s: port %d detaching, not dispatching "
2069 		    "task", __func__, port->p_instance);
2070 	}
2071 
2072 	mutex_exit(&port->state_lock);
2073 
2074 	D2(vswp, "%s: dispatched task to taskq for chan %d", __func__,
2075 	    ldcp->ldc_id);
2076 	D1(vswp, "%s: exit", __func__);
2077 }
2078 
2079 /*
2080  * Process a VIO ctrl message. Invoked from taskq.
2081  */
2082 static void
2083 vsw_process_ctrl_pkt(void *arg)
2084 {
2085 	vsw_ctrl_task_t	*ctaskp = (vsw_ctrl_task_t *)arg;
2086 	vsw_ldc_t	*ldcp = ctaskp->ldcp;
2087 	vsw_t 		*vswp = ldcp->ldc_vswp;
2088 	vio_msg_tag_t	tag;
2089 	uint16_t	env;
2090 
2091 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
2092 
2093 	bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t));
2094 	env = tag.vio_subtype_env;
2095 
2096 	/* stale pkt check */
2097 	if (ctaskp->hss_id < ldcp->hss_id) {
2098 		DWARN(vswp, "%s: discarding stale packet belonging to earlier"
2099 		    " (%ld) handshake session", __func__, ctaskp->hss_id);
2100 		return;
2101 	}
2102 
2103 	/* session id check */
2104 	if (ldcp->session_status & VSW_PEER_SESSION) {
2105 		if (ldcp->peer_session != tag.vio_sid) {
2106 			DERR(vswp, "%s (chan %d): invalid session id (%llx)",
2107 			    __func__, ldcp->ldc_id, tag.vio_sid);
2108 			kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
2109 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
2110 			return;
2111 		}
2112 	}
2113 
2114 	/*
2115 	 * Switch on vio_subtype envelope, then let lower routines
2116 	 * decide if its an INFO, ACK or NACK packet.
2117 	 */
2118 	switch (env) {
2119 	case VIO_VER_INFO:
2120 		vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp);
2121 		break;
2122 	case VIO_DRING_REG:
2123 		vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp);
2124 		break;
2125 	case VIO_DRING_UNREG:
2126 		vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp);
2127 		break;
2128 	case VIO_ATTR_INFO:
2129 		vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp);
2130 		break;
2131 	case VNET_MCAST_INFO:
2132 		vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp);
2133 		break;
2134 	case VIO_RDX:
2135 		vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp);
2136 		break;
2137 	default:
2138 		DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env);
2139 	}
2140 
2141 	kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
2142 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
2143 }
2144 
2145 /*
2146  * Version negotiation. We can end up here either because our peer
2147  * has responded to a handshake message we have sent it, or our peer
2148  * has initiated a handshake with us. If its the former then can only
2149  * be ACK or NACK, if its the later can only be INFO.
2150  *
2151  * If its an ACK we move to the next stage of the handshake, namely
2152  * attribute exchange. If its a NACK we see if we can specify another
2153  * version, if we can't we stop.
2154  *
2155  * If it is an INFO we reset all params associated with communication
2156  * in that direction over this channel (remember connection is
2157  * essentially 2 independent simplex channels).
2158  */
2159 void
2160 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt)
2161 {
2162 	vio_ver_msg_t	*ver_pkt;
2163 	vsw_t 		*vswp = ldcp->ldc_vswp;
2164 
2165 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
2166 
2167 	/*
2168 	 * We know this is a ctrl/version packet so
2169 	 * cast it into the correct structure.
2170 	 */
2171 	ver_pkt = (vio_ver_msg_t *)pkt;
2172 
2173 	switch (ver_pkt->tag.vio_subtype) {
2174 	case VIO_SUBTYPE_INFO:
2175 		D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n");
2176 
2177 		/*
2178 		 * Record the session id, which we will use from now
2179 		 * until we see another VER_INFO msg. Even then the
2180 		 * session id in most cases will be unchanged, execpt
2181 		 * if channel was reset.
2182 		 */
2183 		if ((ldcp->session_status & VSW_PEER_SESSION) &&
2184 		    (ldcp->peer_session != ver_pkt->tag.vio_sid)) {
2185 			DERR(vswp, "%s: updating session id for chan %lld "
2186 			    "from %llx to %llx", __func__, ldcp->ldc_id,
2187 			    ldcp->peer_session, ver_pkt->tag.vio_sid);
2188 		}
2189 
2190 		ldcp->peer_session = ver_pkt->tag.vio_sid;
2191 		ldcp->session_status |= VSW_PEER_SESSION;
2192 
2193 		/* Legal message at this time ? */
2194 		if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV))
2195 			return;
2196 
2197 		/*
2198 		 * First check the device class. Currently only expect
2199 		 * to be talking to a network device. In the future may
2200 		 * also talk to another switch.
2201 		 */
2202 		if (ver_pkt->dev_class != VDEV_NETWORK) {
2203 			DERR(vswp, "%s: illegal device class %d", __func__,
2204 			    ver_pkt->dev_class);
2205 
2206 			ver_pkt->tag.vio_sid = ldcp->local_session;
2207 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
2208 
2209 			DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
2210 
2211 			(void) vsw_send_msg(ldcp, (void *)ver_pkt,
2212 			    sizeof (vio_ver_msg_t), B_TRUE);
2213 
2214 			ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
2215 			vsw_next_milestone(ldcp);
2216 			return;
2217 		} else {
2218 			ldcp->dev_class = ver_pkt->dev_class;
2219 		}
2220 
2221 		/*
2222 		 * Now check the version.
2223 		 */
2224 		if (vsw_supported_version(ver_pkt) == 0) {
2225 			/*
2226 			 * Support this major version and possibly
2227 			 * adjusted minor version.
2228 			 */
2229 
2230 			D2(vswp, "%s: accepted ver %d:%d", __func__,
2231 			    ver_pkt->ver_major, ver_pkt->ver_minor);
2232 
2233 			/* Store accepted values */
2234 			ldcp->lane_in.ver_major = ver_pkt->ver_major;
2235 			ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
2236 
2237 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
2238 
2239 			ldcp->lane_in.lstate |= VSW_VER_ACK_SENT;
2240 
2241 			if (vsw_obp_ver_proto_workaround == B_TRUE) {
2242 				/*
2243 				 * Send a version info message
2244 				 * using the accepted version that
2245 				 * we are about to ack. Also note that
2246 				 * we send our ver info before we ack.
2247 				 * Otherwise, as soon as receiving the
2248 				 * ack, obp sends attr info msg, which
2249 				 * breaks vsw_check_flag() invoked
2250 				 * from vsw_process_ctrl_attr_pkt();
2251 				 * as we also need VSW_VER_ACK_RECV to
2252 				 * be set in lane_out.lstate, before
2253 				 * we can receive attr info.
2254 				 */
2255 				vsw_send_ver(ldcp);
2256 			}
2257 		} else {
2258 			/*
2259 			 * NACK back with the next lower major/minor
2260 			 * pairing we support (if don't suuport any more
2261 			 * versions then they will be set to zero.
2262 			 */
2263 
2264 			D2(vswp, "%s: replying with ver %d:%d", __func__,
2265 			    ver_pkt->ver_major, ver_pkt->ver_minor);
2266 
2267 			/* Store updated values */
2268 			ldcp->lane_in.ver_major = ver_pkt->ver_major;
2269 			ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
2270 
2271 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
2272 
2273 			ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
2274 		}
2275 
2276 		DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
2277 		ver_pkt->tag.vio_sid = ldcp->local_session;
2278 		(void) vsw_send_msg(ldcp, (void *)ver_pkt,
2279 		    sizeof (vio_ver_msg_t), B_TRUE);
2280 
2281 		vsw_next_milestone(ldcp);
2282 		break;
2283 
2284 	case VIO_SUBTYPE_ACK:
2285 		D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__);
2286 
2287 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV))
2288 			return;
2289 
2290 		/* Store updated values */
2291 		ldcp->lane_out.ver_major = ver_pkt->ver_major;
2292 		ldcp->lane_out.ver_minor = ver_pkt->ver_minor;
2293 
2294 		ldcp->lane_out.lstate |= VSW_VER_ACK_RECV;
2295 		vsw_next_milestone(ldcp);
2296 
2297 		break;
2298 
2299 	case VIO_SUBTYPE_NACK:
2300 		D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__);
2301 
2302 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV))
2303 			return;
2304 
2305 		/*
2306 		 * If our peer sent us a NACK with the ver fields set to
2307 		 * zero then there is nothing more we can do. Otherwise see
2308 		 * if we support either the version suggested, or a lesser
2309 		 * one.
2310 		 */
2311 		if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
2312 			DERR(vswp, "%s: peer unable to negotiate any "
2313 			    "further.", __func__);
2314 			ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
2315 			vsw_next_milestone(ldcp);
2316 			return;
2317 		}
2318 
2319 		/*
2320 		 * Check to see if we support this major version or
2321 		 * a lower one. If we don't then maj/min will be set
2322 		 * to zero.
2323 		 */
2324 		(void) vsw_supported_version(ver_pkt);
2325 		if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
2326 			/* Nothing more we can do */
2327 			DERR(vswp, "%s: version negotiation failed.\n",
2328 			    __func__);
2329 			ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
2330 			vsw_next_milestone(ldcp);
2331 		} else {
2332 			/* found a supported major version */
2333 			ldcp->lane_out.ver_major = ver_pkt->ver_major;
2334 			ldcp->lane_out.ver_minor = ver_pkt->ver_minor;
2335 
2336 			D2(vswp, "%s: resending with updated values (%x, %x)",
2337 			    __func__, ver_pkt->ver_major, ver_pkt->ver_minor);
2338 
2339 			ldcp->lane_out.lstate |= VSW_VER_INFO_SENT;
2340 			ver_pkt->tag.vio_sid = ldcp->local_session;
2341 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
2342 
2343 			DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
2344 
2345 			(void) vsw_send_msg(ldcp, (void *)ver_pkt,
2346 			    sizeof (vio_ver_msg_t), B_TRUE);
2347 
2348 			vsw_next_milestone(ldcp);
2349 
2350 		}
2351 		break;
2352 
2353 	default:
2354 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
2355 		    ver_pkt->tag.vio_subtype);
2356 	}
2357 
2358 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
2359 }
2360 
2361 /*
2362  * Process an attribute packet. We can end up here either because our peer
2363  * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our
2364  * peer has sent us an attribute INFO message
2365  *
2366  * If its an ACK we then move to the next stage of the handshake which
2367  * is to send our descriptor ring info to our peer. If its a NACK then
2368  * there is nothing more we can (currently) do.
2369  *
2370  * If we get a valid/acceptable INFO packet (and we have already negotiated
2371  * a version) we ACK back and set channel state to ATTR_RECV, otherwise we
2372  * NACK back and reset channel state to INACTIV.
2373  *
2374  * FUTURE: in time we will probably negotiate over attributes, but for
2375  * the moment unacceptable attributes are regarded as a fatal error.
2376  *
2377  */
2378 void
2379 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt)
2380 {
2381 	vnet_attr_msg_t		*attr_pkt;
2382 	vsw_t			*vswp = ldcp->ldc_vswp;
2383 	vsw_port_t		*port = ldcp->ldc_port;
2384 	uint64_t		macaddr = 0;
2385 	int			i;
2386 
2387 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
2388 
2389 	/*
2390 	 * We know this is a ctrl/attr packet so
2391 	 * cast it into the correct structure.
2392 	 */
2393 	attr_pkt = (vnet_attr_msg_t *)pkt;
2394 
2395 	switch (attr_pkt->tag.vio_subtype) {
2396 	case VIO_SUBTYPE_INFO:
2397 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
2398 
2399 		if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV))
2400 			return;
2401 
2402 		/*
2403 		 * If the attributes are unacceptable then we NACK back.
2404 		 */
2405 		if (vsw_check_attr(attr_pkt, ldcp)) {
2406 
2407 			DERR(vswp, "%s (chan %d): invalid attributes",
2408 			    __func__, ldcp->ldc_id);
2409 
2410 			vsw_free_lane_resources(ldcp, INBOUND);
2411 
2412 			attr_pkt->tag.vio_sid = ldcp->local_session;
2413 			attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
2414 
2415 			DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
2416 			ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT;
2417 			(void) vsw_send_msg(ldcp, (void *)attr_pkt,
2418 			    sizeof (vnet_attr_msg_t), B_TRUE);
2419 
2420 			vsw_next_milestone(ldcp);
2421 			return;
2422 		}
2423 
2424 		/*
2425 		 * Otherwise store attributes for this lane and update
2426 		 * lane state.
2427 		 */
2428 		ldcp->lane_in.mtu = attr_pkt->mtu;
2429 		ldcp->lane_in.addr = attr_pkt->addr;
2430 		ldcp->lane_in.addr_type = attr_pkt->addr_type;
2431 		ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode;
2432 		ldcp->lane_in.ack_freq = attr_pkt->ack_freq;
2433 
2434 		macaddr = ldcp->lane_in.addr;
2435 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2436 			port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF;
2437 			macaddr >>= 8;
2438 		}
2439 
2440 		/* create the fdb entry for this port/mac address */
2441 		(void) vsw_add_fdb(vswp, port);
2442 
2443 		/* setup device specifc xmit routines */
2444 		mutex_enter(&port->tx_lock);
2445 		if ((VSW_VER_EQ(ldcp, 1, 2) &&
2446 		    (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) ||
2447 		    (VSW_VER_LT(ldcp, 1, 2) &&
2448 		    (ldcp->lane_in.xfer_mode == VIO_DRING_MODE_V1_0))) {
2449 			D2(vswp, "%s: mode = VIO_DRING_MODE", __func__);
2450 			port->transmit = vsw_dringsend;
2451 		} else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) {
2452 			D2(vswp, "%s: mode = VIO_DESC_MODE", __func__);
2453 			vsw_create_privring(ldcp);
2454 			port->transmit = vsw_descrsend;
2455 			ldcp->lane_out.xfer_mode = VIO_DESC_MODE;
2456 		}
2457 		mutex_exit(&port->tx_lock);
2458 
2459 		attr_pkt->tag.vio_sid = ldcp->local_session;
2460 		attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
2461 
2462 		DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
2463 
2464 		ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT;
2465 
2466 		(void) vsw_send_msg(ldcp, (void *)attr_pkt,
2467 		    sizeof (vnet_attr_msg_t), B_TRUE);
2468 
2469 		vsw_next_milestone(ldcp);
2470 		break;
2471 
2472 	case VIO_SUBTYPE_ACK:
2473 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
2474 
2475 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV))
2476 			return;
2477 
2478 		ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV;
2479 		vsw_next_milestone(ldcp);
2480 		break;
2481 
2482 	case VIO_SUBTYPE_NACK:
2483 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
2484 
2485 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV))
2486 			return;
2487 
2488 		ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV;
2489 		vsw_next_milestone(ldcp);
2490 		break;
2491 
2492 	default:
2493 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
2494 		    attr_pkt->tag.vio_subtype);
2495 	}
2496 
2497 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
2498 }
2499 
2500 /*
2501  * Process a dring info packet. We can end up here either because our peer
2502  * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our
2503  * peer has sent us a dring INFO message.
2504  *
2505  * If we get a valid/acceptable INFO packet (and we have already negotiated
2506  * a version) we ACK back and update the lane state, otherwise we NACK back.
2507  *
2508  * FUTURE: nothing to stop client from sending us info on multiple dring's
2509  * but for the moment we will just use the first one we are given.
2510  *
2511  */
2512 void
2513 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt)
2514 {
2515 	vio_dring_reg_msg_t	*dring_pkt;
2516 	vsw_t			*vswp = ldcp->ldc_vswp;
2517 	ldc_mem_info_t		minfo;
2518 	dring_info_t		*dp, *dbp;
2519 	int			dring_found = 0;
2520 
2521 	/*
2522 	 * We know this is a ctrl/dring packet so
2523 	 * cast it into the correct structure.
2524 	 */
2525 	dring_pkt = (vio_dring_reg_msg_t *)pkt;
2526 
2527 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
2528 
2529 	switch (dring_pkt->tag.vio_subtype) {
2530 	case VIO_SUBTYPE_INFO:
2531 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
2532 
2533 		if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
2534 			return;
2535 
2536 		/*
2537 		 * If the dring params are unacceptable then we NACK back.
2538 		 */
2539 		if (vsw_check_dring_info(dring_pkt)) {
2540 
2541 			DERR(vswp, "%s (%lld): invalid dring info",
2542 			    __func__, ldcp->ldc_id);
2543 
2544 			vsw_free_lane_resources(ldcp, INBOUND);
2545 
2546 			dring_pkt->tag.vio_sid = ldcp->local_session;
2547 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
2548 
2549 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
2550 
2551 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
2552 
2553 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
2554 			    sizeof (vio_dring_reg_msg_t), B_TRUE);
2555 
2556 			vsw_next_milestone(ldcp);
2557 			return;
2558 		}
2559 
2560 		/*
2561 		 * Otherwise, attempt to map in the dring using the
2562 		 * cookie. If that succeeds we send back a unique dring
2563 		 * identifier that the sending side will use in future
2564 		 * to refer to this descriptor ring.
2565 		 */
2566 		dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
2567 
2568 		dp->num_descriptors = dring_pkt->num_descriptors;
2569 		dp->descriptor_size = dring_pkt->descriptor_size;
2570 		dp->options = dring_pkt->options;
2571 		dp->ncookies = dring_pkt->ncookies;
2572 
2573 		/*
2574 		 * Note: should only get one cookie. Enforced in
2575 		 * the ldc layer.
2576 		 */
2577 		bcopy(&dring_pkt->cookie[0], &dp->cookie[0],
2578 		    sizeof (ldc_mem_cookie_t));
2579 
2580 		D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__,
2581 		    dp->num_descriptors, dp->descriptor_size);
2582 		D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__,
2583 		    dp->options, dp->ncookies);
2584 
2585 		if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0],
2586 		    dp->ncookies, dp->num_descriptors, dp->descriptor_size,
2587 		    LDC_SHADOW_MAP, &(dp->handle))) != 0) {
2588 
2589 			DERR(vswp, "%s: dring_map failed\n", __func__);
2590 
2591 			kmem_free(dp, sizeof (dring_info_t));
2592 			vsw_free_lane_resources(ldcp, INBOUND);
2593 
2594 			dring_pkt->tag.vio_sid = ldcp->local_session;
2595 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
2596 
2597 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
2598 
2599 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
2600 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
2601 			    sizeof (vio_dring_reg_msg_t), B_TRUE);
2602 
2603 			vsw_next_milestone(ldcp);
2604 			return;
2605 		}
2606 
2607 		if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
2608 
2609 			DERR(vswp, "%s: dring_addr failed\n", __func__);
2610 
2611 			kmem_free(dp, sizeof (dring_info_t));
2612 			vsw_free_lane_resources(ldcp, INBOUND);
2613 
2614 			dring_pkt->tag.vio_sid = ldcp->local_session;
2615 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
2616 
2617 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
2618 
2619 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
2620 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
2621 			    sizeof (vio_dring_reg_msg_t), B_TRUE);
2622 
2623 			vsw_next_milestone(ldcp);
2624 			return;
2625 		} else {
2626 			/* store the address of the pub part of ring */
2627 			dp->pub_addr = minfo.vaddr;
2628 		}
2629 
2630 		/* no private section as we are importing */
2631 		dp->priv_addr = NULL;
2632 
2633 		/*
2634 		 * Using simple mono increasing int for ident at
2635 		 * the moment.
2636 		 */
2637 		dp->ident = ldcp->next_ident;
2638 		ldcp->next_ident++;
2639 
2640 		dp->end_idx = 0;
2641 		dp->next = NULL;
2642 
2643 		/*
2644 		 * Link it onto the end of the list of drings
2645 		 * for this lane.
2646 		 */
2647 		if (ldcp->lane_in.dringp == NULL) {
2648 			D2(vswp, "%s: adding first INBOUND dring", __func__);
2649 			ldcp->lane_in.dringp = dp;
2650 		} else {
2651 			dbp = ldcp->lane_in.dringp;
2652 
2653 			while (dbp->next != NULL)
2654 				dbp = dbp->next;
2655 
2656 			dbp->next = dp;
2657 		}
2658 
2659 		/* acknowledge it */
2660 		dring_pkt->tag.vio_sid = ldcp->local_session;
2661 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
2662 		dring_pkt->dring_ident = dp->ident;
2663 
2664 		(void) vsw_send_msg(ldcp, (void *)dring_pkt,
2665 		    sizeof (vio_dring_reg_msg_t), B_TRUE);
2666 
2667 		ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT;
2668 		vsw_next_milestone(ldcp);
2669 		break;
2670 
2671 	case VIO_SUBTYPE_ACK:
2672 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
2673 
2674 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV))
2675 			return;
2676 
2677 		/*
2678 		 * Peer is acknowledging our dring info and will have
2679 		 * sent us a dring identifier which we will use to
2680 		 * refer to this ring w.r.t. our peer.
2681 		 */
2682 		dp = ldcp->lane_out.dringp;
2683 		if (dp != NULL) {
2684 			/*
2685 			 * Find the ring this ident should be associated
2686 			 * with.
2687 			 */
2688 			if (vsw_dring_match(dp, dring_pkt)) {
2689 				dring_found = 1;
2690 
2691 			} else while (dp != NULL) {
2692 				if (vsw_dring_match(dp, dring_pkt)) {
2693 					dring_found = 1;
2694 					break;
2695 				}
2696 				dp = dp->next;
2697 			}
2698 
2699 			if (dring_found == 0) {
2700 				DERR(NULL, "%s: unrecognised ring cookie",
2701 				    __func__);
2702 				vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
2703 				return;
2704 			}
2705 
2706 		} else {
2707 			DERR(vswp, "%s: DRING ACK received but no drings "
2708 			    "allocated", __func__);
2709 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
2710 			return;
2711 		}
2712 
2713 		/* store ident */
2714 		dp->ident = dring_pkt->dring_ident;
2715 		ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV;
2716 		vsw_next_milestone(ldcp);
2717 		break;
2718 
2719 	case VIO_SUBTYPE_NACK:
2720 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
2721 
2722 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV))
2723 			return;
2724 
2725 		ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV;
2726 		vsw_next_milestone(ldcp);
2727 		break;
2728 
2729 	default:
2730 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
2731 		    dring_pkt->tag.vio_subtype);
2732 	}
2733 
2734 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
2735 }
2736 
2737 /*
2738  * Process a request from peer to unregister a dring.
2739  *
2740  * For the moment we just restart the handshake if our
2741  * peer endpoint attempts to unregister a dring.
2742  */
2743 void
2744 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt)
2745 {
2746 	vsw_t			*vswp = ldcp->ldc_vswp;
2747 	vio_dring_unreg_msg_t	*dring_pkt;
2748 
2749 	/*
2750 	 * We know this is a ctrl/dring packet so
2751 	 * cast it into the correct structure.
2752 	 */
2753 	dring_pkt = (vio_dring_unreg_msg_t *)pkt;
2754 
2755 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
2756 
2757 	switch (dring_pkt->tag.vio_subtype) {
2758 	case VIO_SUBTYPE_INFO:
2759 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
2760 
2761 		DWARN(vswp, "%s: restarting handshake..", __func__);
2762 		break;
2763 
2764 	case VIO_SUBTYPE_ACK:
2765 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
2766 
2767 		DWARN(vswp, "%s: restarting handshake..", __func__);
2768 		break;
2769 
2770 	case VIO_SUBTYPE_NACK:
2771 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
2772 
2773 		DWARN(vswp, "%s: restarting handshake..", __func__);
2774 		break;
2775 
2776 	default:
2777 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
2778 		    dring_pkt->tag.vio_subtype);
2779 	}
2780 
2781 	vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
2782 
2783 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
2784 }
2785 
2786 #define	SND_MCST_NACK(ldcp, pkt) \
2787 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
2788 	pkt->tag.vio_sid = ldcp->local_session; \
2789 	(void) vsw_send_msg(ldcp, (void *)pkt, \
2790 			sizeof (vnet_mcast_msg_t), B_TRUE);
2791 
2792 /*
2793  * Process a multicast request from a vnet.
2794  *
2795  * Vnet's specify a multicast address that they are interested in. This
2796  * address is used as a key into the hash table which forms the multicast
2797  * forwarding database (mFDB).
2798  *
2799  * The table keys are the multicast addresses, while the table entries
2800  * are pointers to lists of ports which wish to receive packets for the
2801  * specified multicast address.
2802  *
2803  * When a multicast packet is being switched we use the address as a key
2804  * into the hash table, and then walk the appropriate port list forwarding
2805  * the pkt to each port in turn.
2806  *
2807  * If a vnet is no longer interested in a particular multicast grouping
2808  * we simply find the correct location in the hash table and then delete
2809  * the relevant port from the port list.
2810  *
2811  * To deal with the case whereby a port is being deleted without first
2812  * removing itself from the lists in the hash table, we maintain a list
2813  * of multicast addresses the port has registered an interest in, within
2814  * the port structure itself. We then simply walk that list of addresses
2815  * using them as keys into the hash table and remove the port from the
2816  * appropriate lists.
2817  */
2818 static void
2819 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt)
2820 {
2821 	vnet_mcast_msg_t	*mcst_pkt;
2822 	vsw_port_t		*port = ldcp->ldc_port;
2823 	vsw_t			*vswp = ldcp->ldc_vswp;
2824 	int			i;
2825 
2826 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
2827 
2828 	/*
2829 	 * We know this is a ctrl/mcast packet so
2830 	 * cast it into the correct structure.
2831 	 */
2832 	mcst_pkt = (vnet_mcast_msg_t *)pkt;
2833 
2834 	switch (mcst_pkt->tag.vio_subtype) {
2835 	case VIO_SUBTYPE_INFO:
2836 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
2837 
2838 		/*
2839 		 * Check if in correct state to receive a multicast
2840 		 * message (i.e. handshake complete). If not reset
2841 		 * the handshake.
2842 		 */
2843 		if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV))
2844 			return;
2845 
2846 		/*
2847 		 * Before attempting to add or remove address check
2848 		 * that they are valid multicast addresses.
2849 		 * If not, then NACK back.
2850 		 */
2851 		for (i = 0; i < mcst_pkt->count; i++) {
2852 			if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) {
2853 				DERR(vswp, "%s: invalid multicast address",
2854 				    __func__);
2855 				SND_MCST_NACK(ldcp, mcst_pkt);
2856 				return;
2857 			}
2858 		}
2859 
2860 		/*
2861 		 * Now add/remove the addresses. If this fails we
2862 		 * NACK back.
2863 		 */
2864 		if (vsw_add_rem_mcst(mcst_pkt, port) != 0) {
2865 			SND_MCST_NACK(ldcp, mcst_pkt);
2866 			return;
2867 		}
2868 
2869 		mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
2870 		mcst_pkt->tag.vio_sid = ldcp->local_session;
2871 
2872 		DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt);
2873 
2874 		(void) vsw_send_msg(ldcp, (void *)mcst_pkt,
2875 		    sizeof (vnet_mcast_msg_t), B_TRUE);
2876 		break;
2877 
2878 	case VIO_SUBTYPE_ACK:
2879 		DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
2880 
2881 		/*
2882 		 * We shouldn't ever get a multicast ACK message as
2883 		 * at the moment we never request multicast addresses
2884 		 * to be set on some other device. This may change in
2885 		 * the future if we have cascading switches.
2886 		 */
2887 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV))
2888 			return;
2889 
2890 				/* Do nothing */
2891 		break;
2892 
2893 	case VIO_SUBTYPE_NACK:
2894 		DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
2895 
2896 		/*
2897 		 * We shouldn't get a multicast NACK packet for the
2898 		 * same reasons as we shouldn't get a ACK packet.
2899 		 */
2900 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV))
2901 			return;
2902 
2903 				/* Do nothing */
2904 		break;
2905 
2906 	default:
2907 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
2908 		    mcst_pkt->tag.vio_subtype);
2909 	}
2910 
2911 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
2912 }
2913 
2914 static void
2915 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt)
2916 {
2917 	vio_rdx_msg_t	*rdx_pkt;
2918 	vsw_t		*vswp = ldcp->ldc_vswp;
2919 
2920 	/*
2921 	 * We know this is a ctrl/rdx packet so
2922 	 * cast it into the correct structure.
2923 	 */
2924 	rdx_pkt = (vio_rdx_msg_t *)pkt;
2925 
2926 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
2927 
2928 	switch (rdx_pkt->tag.vio_subtype) {
2929 	case VIO_SUBTYPE_INFO:
2930 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
2931 
2932 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV))
2933 			return;
2934 
2935 		rdx_pkt->tag.vio_sid = ldcp->local_session;
2936 		rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
2937 
2938 		DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt);
2939 
2940 		ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT;
2941 
2942 		(void) vsw_send_msg(ldcp, (void *)rdx_pkt,
2943 		    sizeof (vio_rdx_msg_t), B_TRUE);
2944 
2945 		vsw_next_milestone(ldcp);
2946 		break;
2947 
2948 	case VIO_SUBTYPE_ACK:
2949 		/*
2950 		 * Should be handled in-band by callback handler.
2951 		 */
2952 		DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__);
2953 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
2954 		break;
2955 
2956 	case VIO_SUBTYPE_NACK:
2957 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
2958 
2959 		if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV))
2960 			return;
2961 
2962 		ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV;
2963 		vsw_next_milestone(ldcp);
2964 		break;
2965 
2966 	default:
2967 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
2968 		    rdx_pkt->tag.vio_subtype);
2969 	}
2970 
2971 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
2972 }
2973 
2974 static void
2975 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp,
2976 	uint32_t msglen)
2977 {
2978 	uint16_t	env = tagp->vio_subtype_env;
2979 	vsw_t		*vswp = ldcp->ldc_vswp;
2980 
2981 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
2982 
2983 	/* session id check */
2984 	if (ldcp->session_status & VSW_PEER_SESSION) {
2985 		if (ldcp->peer_session != tagp->vio_sid) {
2986 			DERR(vswp, "%s (chan %d): invalid session id (%llx)",
2987 			    __func__, ldcp->ldc_id, tagp->vio_sid);
2988 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
2989 			return;
2990 		}
2991 	}
2992 
2993 	/*
2994 	 * It is an error for us to be getting data packets
2995 	 * before the handshake has completed.
2996 	 */
2997 	if (ldcp->hphase != VSW_MILESTONE4) {
2998 		DERR(vswp, "%s: got data packet before handshake complete "
2999 		    "hphase %d (%x: %x)", __func__, ldcp->hphase,
3000 		    ldcp->lane_in.lstate, ldcp->lane_out.lstate);
3001 		DUMP_FLAGS(ldcp->lane_in.lstate);
3002 		DUMP_FLAGS(ldcp->lane_out.lstate);
3003 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
3004 		return;
3005 	}
3006 
3007 	/*
3008 	 * To reduce the locking contention, release the
3009 	 * ldc_cblock here and re-acquire it once we are done
3010 	 * receiving packets.
3011 	 */
3012 	mutex_exit(&ldcp->ldc_cblock);
3013 	mutex_enter(&ldcp->ldc_rxlock);
3014 
3015 	/*
3016 	 * Switch on vio_subtype envelope, then let lower routines
3017 	 * decide if its an INFO, ACK or NACK packet.
3018 	 */
3019 	if (env == VIO_DRING_DATA) {
3020 		vsw_process_data_dring_pkt(ldcp, dpkt);
3021 	} else if (env == VIO_PKT_DATA) {
3022 		ldcp->rx_pktdata(ldcp, dpkt, msglen);
3023 	} else if (env == VIO_DESC_DATA) {
3024 		vsw_process_data_ibnd_pkt(ldcp, dpkt);
3025 	} else {
3026 		DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env);
3027 	}
3028 
3029 	mutex_exit(&ldcp->ldc_rxlock);
3030 	mutex_enter(&ldcp->ldc_cblock);
3031 
3032 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
3033 }
3034 
3035 #define	SND_DRING_NACK(ldcp, pkt) \
3036 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
3037 	pkt->tag.vio_sid = ldcp->local_session; \
3038 	(void) vsw_send_msg(ldcp, (void *)pkt, \
3039 			sizeof (vio_dring_msg_t), B_TRUE);
3040 
3041 static void
3042 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt)
3043 {
3044 	vio_dring_msg_t		*dring_pkt;
3045 	vnet_public_desc_t	*pub_addr = NULL;
3046 	vsw_private_desc_t	*priv_addr = NULL;
3047 	dring_info_t		*dp = NULL;
3048 	vsw_t			*vswp = ldcp->ldc_vswp;
3049 	mblk_t			*mp = NULL;
3050 	mblk_t			*bp = NULL;
3051 	mblk_t			*bpt = NULL;
3052 	size_t			nbytes = 0;
3053 	uint64_t		ncookies = 0;
3054 	uint64_t		chain = 0;
3055 	uint64_t		len;
3056 	uint32_t		pos, start, datalen;
3057 	uint32_t		range_start, range_end;
3058 	int32_t			end, num, cnt = 0;
3059 	int			i, rv, msg_rv = 0;
3060 	boolean_t		ack_needed = B_FALSE;
3061 	boolean_t		prev_desc_ack = B_FALSE;
3062 	int			read_attempts = 0;
3063 	struct ether_header	*ehp;
3064 
3065 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
3066 
3067 	/*
3068 	 * We know this is a data/dring packet so
3069 	 * cast it into the correct structure.
3070 	 */
3071 	dring_pkt = (vio_dring_msg_t *)dpkt;
3072 
3073 	/*
3074 	 * Switch on the vio_subtype. If its INFO then we need to
3075 	 * process the data. If its an ACK we need to make sure
3076 	 * it makes sense (i.e did we send an earlier data/info),
3077 	 * and if its a NACK then we maybe attempt a retry.
3078 	 */
3079 	switch (dring_pkt->tag.vio_subtype) {
3080 	case VIO_SUBTYPE_INFO:
3081 		D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id);
3082 
3083 		READ_ENTER(&ldcp->lane_in.dlistrw);
3084 		if ((dp = vsw_ident2dring(&ldcp->lane_in,
3085 		    dring_pkt->dring_ident)) == NULL) {
3086 			RW_EXIT(&ldcp->lane_in.dlistrw);
3087 
3088 			DERR(vswp, "%s(%lld): unable to find dring from "
3089 			    "ident 0x%llx", __func__, ldcp->ldc_id,
3090 			    dring_pkt->dring_ident);
3091 
3092 			SND_DRING_NACK(ldcp, dring_pkt);
3093 			return;
3094 		}
3095 
3096 		start = pos = dring_pkt->start_idx;
3097 		end = dring_pkt->end_idx;
3098 		len = dp->num_descriptors;
3099 
3100 		range_start = range_end = pos;
3101 
3102 		D2(vswp, "%s(%lld): start index %ld : end %ld\n",
3103 		    __func__, ldcp->ldc_id, start, end);
3104 
3105 		if (end == -1) {
3106 			num = -1;
3107 		} else if (end >= 0) {
3108 			num = end >= pos ? end - pos + 1: (len - pos + 1) + end;
3109 
3110 			/* basic sanity check */
3111 			if (end > len) {
3112 				RW_EXIT(&ldcp->lane_in.dlistrw);
3113 				DERR(vswp, "%s(%lld): endpoint %lld outside "
3114 				    "ring length %lld", __func__,
3115 				    ldcp->ldc_id, end, len);
3116 
3117 				SND_DRING_NACK(ldcp, dring_pkt);
3118 				return;
3119 			}
3120 		} else {
3121 			RW_EXIT(&ldcp->lane_in.dlistrw);
3122 			DERR(vswp, "%s(%lld): invalid endpoint %lld",
3123 			    __func__, ldcp->ldc_id, end);
3124 			SND_DRING_NACK(ldcp, dring_pkt);
3125 			return;
3126 		}
3127 
3128 		while (cnt != num) {
3129 vsw_recheck_desc:
3130 			if ((rv = ldc_mem_dring_acquire(dp->handle,
3131 			    pos, pos)) != 0) {
3132 				RW_EXIT(&ldcp->lane_in.dlistrw);
3133 				DERR(vswp, "%s(%lld): unable to acquire "
3134 				    "descriptor at pos %d: err %d",
3135 				    __func__, pos, ldcp->ldc_id, rv);
3136 				SND_DRING_NACK(ldcp, dring_pkt);
3137 				ldcp->ldc_stats.ierrors++;
3138 				return;
3139 			}
3140 
3141 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos;
3142 
3143 			/*
3144 			 * When given a bounded range of descriptors
3145 			 * to process, its an error to hit a descriptor
3146 			 * which is not ready. In the non-bounded case
3147 			 * (end_idx == -1) this simply indicates we have
3148 			 * reached the end of the current active range.
3149 			 */
3150 			if (pub_addr->hdr.dstate != VIO_DESC_READY) {
3151 				/* unbound - no error */
3152 				if (end == -1) {
3153 					if (read_attempts == vsw_read_attempts)
3154 						break;
3155 
3156 					delay(drv_usectohz(vsw_desc_delay));
3157 					read_attempts++;
3158 					goto vsw_recheck_desc;
3159 				}
3160 
3161 				/* bounded - error - so NACK back */
3162 				RW_EXIT(&ldcp->lane_in.dlistrw);
3163 				DERR(vswp, "%s(%lld): descriptor not READY "
3164 				    "(%d)", __func__, ldcp->ldc_id,
3165 				    pub_addr->hdr.dstate);
3166 				SND_DRING_NACK(ldcp, dring_pkt);
3167 				return;
3168 			}
3169 
3170 			DTRACE_PROBE1(read_attempts, int, read_attempts);
3171 
3172 			range_end = pos;
3173 
3174 			/*
3175 			 * If we ACK'd the previous descriptor then now
3176 			 * record the new range start position for later
3177 			 * ACK's.
3178 			 */
3179 			if (prev_desc_ack) {
3180 				range_start = pos;
3181 
3182 				D2(vswp, "%s(%lld): updating range start to be "
3183 				    "%d", __func__, ldcp->ldc_id, range_start);
3184 
3185 				prev_desc_ack = B_FALSE;
3186 			}
3187 
3188 			/*
3189 			 * Data is padded to align on 8 byte boundary,
3190 			 * datalen is actual data length, i.e. minus that
3191 			 * padding.
3192 			 */
3193 			datalen = pub_addr->nbytes;
3194 
3195 			/*
3196 			 * Does peer wish us to ACK when we have finished
3197 			 * with this descriptor ?
3198 			 */
3199 			if (pub_addr->hdr.ack)
3200 				ack_needed = B_TRUE;
3201 
3202 			D2(vswp, "%s(%lld): processing desc %lld at pos"
3203 			    " 0x%llx : dstate 0x%lx : datalen 0x%lx",
3204 			    __func__, ldcp->ldc_id, pos, pub_addr,
3205 			    pub_addr->hdr.dstate, datalen);
3206 
3207 			/*
3208 			 * Mark that we are starting to process descriptor.
3209 			 */
3210 			pub_addr->hdr.dstate = VIO_DESC_ACCEPTED;
3211 
3212 			/*
3213 			 * Ensure that we ask ldc for an aligned
3214 			 * number of bytes.
3215 			 */
3216 			nbytes = (datalen + VNET_IPALIGN + 7) & ~7;
3217 
3218 			mp = vio_multipool_allocb(&ldcp->vmp, nbytes);
3219 			if (mp == NULL) {
3220 				ldcp->ldc_stats.rx_vio_allocb_fail++;
3221 				/*
3222 				 * No free receive buffers available, so
3223 				 * fallback onto allocb(9F). Make sure that
3224 				 * we get a data buffer which is a multiple
3225 				 * of 8 as this is required by ldc_mem_copy.
3226 				 */
3227 				DTRACE_PROBE(allocb);
3228 				if ((mp = allocb(datalen + VNET_IPALIGN + 8,
3229 				    BPRI_MED)) == NULL) {
3230 					DERR(vswp, "%s(%ld): allocb failed",
3231 					    __func__, ldcp->ldc_id);
3232 					pub_addr->hdr.dstate = VIO_DESC_DONE;
3233 					(void) ldc_mem_dring_release(dp->handle,
3234 					    pos, pos);
3235 					ldcp->ldc_stats.ierrors++;
3236 					ldcp->ldc_stats.rx_allocb_fail++;
3237 					break;
3238 				}
3239 			}
3240 
3241 			ncookies = pub_addr->ncookies;
3242 			rv = ldc_mem_copy(ldcp->ldc_handle,
3243 			    (caddr_t)mp->b_rptr, 0, &nbytes,
3244 			    pub_addr->memcookie, ncookies, LDC_COPY_IN);
3245 
3246 			if (rv != 0) {
3247 				DERR(vswp, "%s(%d): unable to copy in data "
3248 				    "from %d cookies in desc %d (rv %d)",
3249 				    __func__, ldcp->ldc_id, ncookies, pos, rv);
3250 				freemsg(mp);
3251 
3252 				pub_addr->hdr.dstate = VIO_DESC_DONE;
3253 				(void) ldc_mem_dring_release(dp->handle,
3254 				    pos, pos);
3255 				ldcp->ldc_stats.ierrors++;
3256 				break;
3257 			} else {
3258 				D2(vswp, "%s(%d): copied in %ld bytes"
3259 				    " using %d cookies", __func__,
3260 				    ldcp->ldc_id, nbytes, ncookies);
3261 			}
3262 
3263 			/* adjust the read pointer to skip over the padding */
3264 			mp->b_rptr += VNET_IPALIGN;
3265 
3266 			/* point to the actual end of data */
3267 			mp->b_wptr = mp->b_rptr + datalen;
3268 
3269 			/* update statistics */
3270 			ehp = (struct ether_header *)mp->b_rptr;
3271 			if (IS_BROADCAST(ehp))
3272 				ldcp->ldc_stats.brdcstrcv++;
3273 			else if (IS_MULTICAST(ehp))
3274 				ldcp->ldc_stats.multircv++;
3275 
3276 			ldcp->ldc_stats.ipackets++;
3277 			ldcp->ldc_stats.rbytes += datalen;
3278 
3279 			/* build a chain of received packets */
3280 			if (bp == NULL) {
3281 				/* first pkt */
3282 				bp = mp;
3283 				bp->b_next = bp->b_prev = NULL;
3284 				bpt = bp;
3285 				chain = 1;
3286 			} else {
3287 				mp->b_next = mp->b_prev = NULL;
3288 				bpt->b_next = mp;
3289 				bpt = mp;
3290 				chain++;
3291 			}
3292 
3293 			/* mark we are finished with this descriptor */
3294 			pub_addr->hdr.dstate = VIO_DESC_DONE;
3295 
3296 			(void) ldc_mem_dring_release(dp->handle, pos, pos);
3297 
3298 			/*
3299 			 * Send an ACK back to peer if requested.
3300 			 */
3301 			if (ack_needed) {
3302 				ack_needed = B_FALSE;
3303 
3304 				dring_pkt->start_idx = range_start;
3305 				dring_pkt->end_idx = range_end;
3306 
3307 				DERR(vswp, "%s(%lld): processed %d %d, ACK"
3308 				    " requested", __func__, ldcp->ldc_id,
3309 				    dring_pkt->start_idx, dring_pkt->end_idx);
3310 
3311 				dring_pkt->dring_process_state = VIO_DP_ACTIVE;
3312 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
3313 				dring_pkt->tag.vio_sid = ldcp->local_session;
3314 
3315 				msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt,
3316 				    sizeof (vio_dring_msg_t), B_FALSE);
3317 
3318 				/*
3319 				 * Check if ACK was successfully sent. If not
3320 				 * we break and deal with that below.
3321 				 */
3322 				if (msg_rv != 0)
3323 					break;
3324 
3325 				prev_desc_ack = B_TRUE;
3326 				range_start = pos;
3327 			}
3328 
3329 			/* next descriptor */
3330 			pos = (pos + 1) % len;
3331 			cnt++;
3332 
3333 			/*
3334 			 * Break out of loop here and stop processing to
3335 			 * allow some other network device (or disk) to
3336 			 * get access to the cpu.
3337 			 */
3338 			if (chain > vsw_chain_len) {
3339 				D3(vswp, "%s(%lld): switching chain of %d "
3340 				    "msgs", __func__, ldcp->ldc_id, chain);
3341 				break;
3342 			}
3343 		}
3344 		RW_EXIT(&ldcp->lane_in.dlistrw);
3345 
3346 		/*
3347 		 * If when we attempted to send the ACK we found that the
3348 		 * channel had been reset then now handle this. We deal with
3349 		 * it here as we cannot reset the channel while holding the
3350 		 * dlistrw lock, and we don't want to acquire/release it
3351 		 * continuously in the above loop, as a channel reset should
3352 		 * be a rare event.
3353 		 */
3354 		if (msg_rv == ECONNRESET) {
3355 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
3356 			break;
3357 		}
3358 
3359 		/* send the chain of packets to be switched */
3360 		if (bp != NULL) {
3361 			DTRACE_PROBE1(vsw_rcv_msgs, int, chain);
3362 			D3(vswp, "%s(%lld): switching chain of %d msgs",
3363 			    __func__, ldcp->ldc_id, chain);
3364 			vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT,
3365 			    ldcp->ldc_port, NULL);
3366 		}
3367 
3368 		DTRACE_PROBE1(msg_cnt, int, cnt);
3369 
3370 		/*
3371 		 * We are now finished so ACK back with the state
3372 		 * set to STOPPING so our peer knows we are finished
3373 		 */
3374 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
3375 		dring_pkt->tag.vio_sid = ldcp->local_session;
3376 
3377 		dring_pkt->dring_process_state = VIO_DP_STOPPED;
3378 
3379 		DTRACE_PROBE(stop_process_sent);
3380 
3381 		/*
3382 		 * We have not processed any more descriptors beyond
3383 		 * the last one we ACK'd.
3384 		 */
3385 		if (prev_desc_ack)
3386 			range_start = range_end;
3387 
3388 		dring_pkt->start_idx = range_start;
3389 		dring_pkt->end_idx = range_end;
3390 
3391 		D2(vswp, "%s(%lld) processed : %d : %d, now stopping",
3392 		    __func__, ldcp->ldc_id, dring_pkt->start_idx,
3393 		    dring_pkt->end_idx);
3394 
3395 		(void) vsw_send_msg(ldcp, (void *)dring_pkt,
3396 		    sizeof (vio_dring_msg_t), B_TRUE);
3397 		break;
3398 
3399 	case VIO_SUBTYPE_ACK:
3400 		D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id);
3401 		/*
3402 		 * Verify that the relevant descriptors are all
3403 		 * marked as DONE
3404 		 */
3405 		READ_ENTER(&ldcp->lane_out.dlistrw);
3406 		if ((dp = vsw_ident2dring(&ldcp->lane_out,
3407 		    dring_pkt->dring_ident)) == NULL) {
3408 			RW_EXIT(&ldcp->lane_out.dlistrw);
3409 			DERR(vswp, "%s: unknown ident in ACK", __func__);
3410 			return;
3411 		}
3412 
3413 		start = end = 0;
3414 		start = dring_pkt->start_idx;
3415 		end = dring_pkt->end_idx;
3416 		len = dp->num_descriptors;
3417 
3418 
3419 		mutex_enter(&dp->dlock);
3420 		dp->last_ack_recv = end;
3421 		ldcp->ldc_stats.dring_data_acks++;
3422 		mutex_exit(&dp->dlock);
3423 
3424 		(void) vsw_reclaim_dring(dp, start);
3425 
3426 		/*
3427 		 * If our peer is stopping processing descriptors then
3428 		 * we check to make sure it has processed all the descriptors
3429 		 * we have updated. If not then we send it a new message
3430 		 * to prompt it to restart.
3431 		 */
3432 		if (dring_pkt->dring_process_state == VIO_DP_STOPPED) {
3433 			DTRACE_PROBE(stop_process_recv);
3434 			D2(vswp, "%s(%lld): got stopping msg : %d : %d",
3435 			    __func__, ldcp->ldc_id, dring_pkt->start_idx,
3436 			    dring_pkt->end_idx);
3437 
3438 			/*
3439 			 * Check next descriptor in public section of ring.
3440 			 * If its marked as READY then we need to prompt our
3441 			 * peer to start processing the ring again.
3442 			 */
3443 			i = (end + 1) % len;
3444 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
3445 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
3446 
3447 			/*
3448 			 * Hold the restart lock across all of this to
3449 			 * make sure that its not possible for us to
3450 			 * decide that a msg needs to be sent in the future
3451 			 * but the sending code having already checked is
3452 			 * about to exit.
3453 			 */
3454 			mutex_enter(&dp->restart_lock);
3455 			ldcp->ldc_stats.dring_stopped_acks++;
3456 			mutex_enter(&priv_addr->dstate_lock);
3457 			if (pub_addr->hdr.dstate == VIO_DESC_READY) {
3458 
3459 				mutex_exit(&priv_addr->dstate_lock);
3460 
3461 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
3462 				dring_pkt->tag.vio_sid = ldcp->local_session;
3463 
3464 				dring_pkt->start_idx = (end + 1) % len;
3465 				dring_pkt->end_idx = -1;
3466 
3467 				D2(vswp, "%s(%lld) : sending restart msg:"
3468 				    " %d : %d", __func__, ldcp->ldc_id,
3469 				    dring_pkt->start_idx, dring_pkt->end_idx);
3470 
3471 				msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt,
3472 				    sizeof (vio_dring_msg_t), B_FALSE);
3473 				ldcp->ldc_stats.dring_data_msgs++;
3474 
3475 			} else {
3476 				mutex_exit(&priv_addr->dstate_lock);
3477 				dp->restart_reqd = B_TRUE;
3478 			}
3479 			mutex_exit(&dp->restart_lock);
3480 		}
3481 		RW_EXIT(&ldcp->lane_out.dlistrw);
3482 
3483 		/* only do channel reset after dropping dlistrw lock */
3484 		if (msg_rv == ECONNRESET)
3485 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
3486 
3487 		break;
3488 
3489 	case VIO_SUBTYPE_NACK:
3490 		DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK",
3491 		    __func__, ldcp->ldc_id);
3492 		/*
3493 		 * Something is badly wrong if we are getting NACK's
3494 		 * for our data pkts. So reset the channel.
3495 		 */
3496 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
3497 
3498 		break;
3499 
3500 	default:
3501 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
3502 		    ldcp->ldc_id, dring_pkt->tag.vio_subtype);
3503 	}
3504 
3505 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
3506 }
3507 
3508 /*
3509  * dummy pkt data handler function for vnet protocol version 1.0
3510  */
3511 static void
3512 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
3513 {
3514 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
3515 }
3516 
3517 /*
3518  * This function handles raw pkt data messages received over the channel.
3519  * Currently, only priority-eth-type frames are received through this mechanism.
3520  * In this case, the frame(data) is present within the message itself which
3521  * is copied into an mblk before switching it.
3522  */
3523 static void
3524 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen)
3525 {
3526 	vsw_ldc_t		*ldcp = (vsw_ldc_t *)arg1;
3527 	vio_raw_data_msg_t	*dpkt = (vio_raw_data_msg_t *)arg2;
3528 	uint32_t		size;
3529 	mblk_t			*mp;
3530 	vsw_t			*vswp = ldcp->ldc_vswp;
3531 	vgen_stats_t		*statsp = &ldcp->ldc_stats;
3532 
3533 	size = msglen - VIO_PKT_DATA_HDRSIZE;
3534 	if (size < ETHERMIN || size > ETHERMAX) {
3535 		(void) atomic_inc_32(&statsp->rx_pri_fail);
3536 		DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__,
3537 		    ldcp->ldc_id, size);
3538 		return;
3539 	}
3540 
3541 	mp = vio_multipool_allocb(&ldcp->vmp, size);
3542 	if (mp == NULL) {
3543 		mp = allocb(size, BPRI_MED);
3544 		if (mp == NULL) {
3545 			(void) atomic_inc_32(&statsp->rx_pri_fail);
3546 			DWARN(vswp, "%s(%lld) allocb failure, "
3547 			    "unable to process priority frame\n", __func__,
3548 			    ldcp->ldc_id);
3549 			return;
3550 		}
3551 	}
3552 
3553 	/* copy the frame from the payload of raw data msg into the mblk */
3554 	bcopy(dpkt->data, mp->b_rptr, size);
3555 	mp->b_wptr = mp->b_rptr + size;
3556 
3557 	/* update stats */
3558 	(void) atomic_inc_64(&statsp->rx_pri_packets);
3559 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
3560 
3561 	/* switch the frame to destination */
3562 	vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL);
3563 }
3564 
3565 /*
3566  * Process an in-band descriptor message (most likely from
3567  * OBP).
3568  */
3569 static void
3570 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt)
3571 {
3572 	vnet_ibnd_desc_t	*ibnd_desc;
3573 	dring_info_t		*dp = NULL;
3574 	vsw_private_desc_t	*priv_addr = NULL;
3575 	vsw_t			*vswp = ldcp->ldc_vswp;
3576 	mblk_t			*mp = NULL;
3577 	size_t			nbytes = 0;
3578 	size_t			off = 0;
3579 	uint64_t		idx = 0;
3580 	uint32_t		num = 1, len, datalen = 0;
3581 	uint64_t		ncookies = 0;
3582 	int			i, rv;
3583 	int			j = 0;
3584 
3585 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
3586 
3587 	ibnd_desc = (vnet_ibnd_desc_t *)pkt;
3588 
3589 	switch (ibnd_desc->hdr.tag.vio_subtype) {
3590 	case VIO_SUBTYPE_INFO:
3591 		D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
3592 
3593 		if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
3594 			return;
3595 
3596 		/*
3597 		 * Data is padded to align on a 8 byte boundary,
3598 		 * nbytes is actual data length, i.e. minus that
3599 		 * padding.
3600 		 */
3601 		datalen = ibnd_desc->nbytes;
3602 
3603 		D2(vswp, "%s(%lld): processing inband desc : "
3604 		    ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen);
3605 
3606 		ncookies = ibnd_desc->ncookies;
3607 
3608 		/*
3609 		 * allocb(9F) returns an aligned data block. We
3610 		 * need to ensure that we ask ldc for an aligned
3611 		 * number of bytes also.
3612 		 */
3613 		nbytes = datalen;
3614 		if (nbytes & 0x7) {
3615 			off = 8 - (nbytes & 0x7);
3616 			nbytes += off;
3617 		}
3618 
3619 		mp = allocb(datalen, BPRI_MED);
3620 		if (mp == NULL) {
3621 			DERR(vswp, "%s(%lld): allocb failed",
3622 			    __func__, ldcp->ldc_id);
3623 			ldcp->ldc_stats.rx_allocb_fail++;
3624 			return;
3625 		}
3626 
3627 		rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr,
3628 		    0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies,
3629 		    LDC_COPY_IN);
3630 
3631 		if (rv != 0) {
3632 			DERR(vswp, "%s(%d): unable to copy in data from "
3633 			    "%d cookie(s)", __func__, ldcp->ldc_id, ncookies);
3634 			freemsg(mp);
3635 			ldcp->ldc_stats.ierrors++;
3636 			return;
3637 		}
3638 
3639 		D2(vswp, "%s(%d): copied in %ld bytes using %d cookies",
3640 		    __func__, ldcp->ldc_id, nbytes, ncookies);
3641 
3642 		/* point to the actual end of data */
3643 		mp->b_wptr = mp->b_rptr + datalen;
3644 		ldcp->ldc_stats.ipackets++;
3645 		ldcp->ldc_stats.rbytes += datalen;
3646 
3647 		/*
3648 		 * We ACK back every in-band descriptor message we process
3649 		 */
3650 		ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK;
3651 		ibnd_desc->hdr.tag.vio_sid = ldcp->local_session;
3652 		(void) vsw_send_msg(ldcp, (void *)ibnd_desc,
3653 		    sizeof (vnet_ibnd_desc_t), B_TRUE);
3654 
3655 		/* send the packet to be switched */
3656 		vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT,
3657 		    ldcp->ldc_port, NULL);
3658 
3659 		break;
3660 
3661 	case VIO_SUBTYPE_ACK:
3662 		D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
3663 
3664 		/* Verify the ACK is valid */
3665 		idx = ibnd_desc->hdr.desc_handle;
3666 
3667 		if (idx >= vsw_ntxds) {
3668 			cmn_err(CE_WARN, "!vsw%d: corrupted ACK received "
3669 			    "(idx %ld)", vswp->instance, idx);
3670 			return;
3671 		}
3672 
3673 		if ((dp = ldcp->lane_out.dringp) == NULL) {
3674 			DERR(vswp, "%s: no dring found", __func__);
3675 			return;
3676 		}
3677 
3678 		len = dp->num_descriptors;
3679 		/*
3680 		 * If the descriptor we are being ACK'ed for is not the
3681 		 * one we expected, then pkts were lost somwhere, either
3682 		 * when we tried to send a msg, or a previous ACK msg from
3683 		 * our peer. In either case we now reclaim the descriptors
3684 		 * in the range from the last ACK we received up to the
3685 		 * current ACK.
3686 		 */
3687 		if (idx != dp->last_ack_recv) {
3688 			DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)",
3689 			    __func__, dp->last_ack_recv, idx);
3690 			num = idx >= dp->last_ack_recv ?
3691 			    idx - dp->last_ack_recv + 1:
3692 			    (len - dp->last_ack_recv + 1) + idx;
3693 		}
3694 
3695 		/*
3696 		 * When we sent the in-band message to our peer we
3697 		 * marked the copy in our private ring as READY. We now
3698 		 * check that the descriptor we are being ACK'ed for is in
3699 		 * fact READY, i.e. it is one we have shared with our peer.
3700 		 *
3701 		 * If its not we flag an error, but still reset the descr
3702 		 * back to FREE.
3703 		 */
3704 		for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) {
3705 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
3706 			mutex_enter(&priv_addr->dstate_lock);
3707 			if (priv_addr->dstate != VIO_DESC_READY) {
3708 				DERR(vswp, "%s: (%ld) desc at index %ld not "
3709 				    "READY (0x%lx)", __func__,
3710 				    ldcp->ldc_id, idx, priv_addr->dstate);
3711 				DERR(vswp, "%s: bound %d: ncookies %ld : "
3712 				    "datalen %ld", __func__,
3713 				    priv_addr->bound, priv_addr->ncookies,
3714 				    priv_addr->datalen);
3715 			}
3716 			D2(vswp, "%s: (%lld) freeing descp at %lld", __func__,
3717 			    ldcp->ldc_id, idx);
3718 			/* release resources associated with sent msg */
3719 			priv_addr->datalen = 0;
3720 			priv_addr->dstate = VIO_DESC_FREE;
3721 			mutex_exit(&priv_addr->dstate_lock);
3722 		}
3723 		/* update to next expected value */
3724 		dp->last_ack_recv = (idx + 1) % dp->num_descriptors;
3725 
3726 		break;
3727 
3728 	case VIO_SUBTYPE_NACK:
3729 		DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
3730 
3731 		/*
3732 		 * We should only get a NACK if our peer doesn't like
3733 		 * something about a message we have sent it. If this
3734 		 * happens we just release the resources associated with
3735 		 * the message. (We are relying on higher layers to decide
3736 		 * whether or not to resend.
3737 		 */
3738 
3739 		/* limit check */
3740 		idx = ibnd_desc->hdr.desc_handle;
3741 
3742 		if (idx >= vsw_ntxds) {
3743 			DERR(vswp, "%s: corrupted NACK received (idx %lld)",
3744 			    __func__, idx);
3745 			return;
3746 		}
3747 
3748 		if ((dp = ldcp->lane_out.dringp) == NULL) {
3749 			DERR(vswp, "%s: no dring found", __func__);
3750 			return;
3751 		}
3752 
3753 		priv_addr = (vsw_private_desc_t *)dp->priv_addr;
3754 
3755 		/* move to correct location in ring */
3756 		priv_addr += idx;
3757 
3758 		/* release resources associated with sent msg */
3759 		mutex_enter(&priv_addr->dstate_lock);
3760 		priv_addr->datalen = 0;
3761 		priv_addr->dstate = VIO_DESC_FREE;
3762 		mutex_exit(&priv_addr->dstate_lock);
3763 
3764 		break;
3765 
3766 	default:
3767 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
3768 		    ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype);
3769 	}
3770 
3771 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
3772 }
3773 
3774 static void
3775 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp)
3776 {
3777 	_NOTE(ARGUNUSED(epkt))
3778 
3779 	vsw_t		*vswp = ldcp->ldc_vswp;
3780 	uint16_t	env = tagp->vio_subtype_env;
3781 
3782 	D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
3783 
3784 	/*
3785 	 * Error vio_subtypes have yet to be defined. So for
3786 	 * the moment we can't do anything.
3787 	 */
3788 	D2(vswp, "%s: (%x) vio_subtype env", __func__, env);
3789 
3790 	D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
3791 }
3792 
3793 /* transmit the packet over the given port */
3794 int
3795 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count)
3796 {
3797 	vsw_ldc_list_t 	*ldcl = &port->p_ldclist;
3798 	vsw_ldc_t 	*ldcp;
3799 	int		status = 0;
3800 
3801 	READ_ENTER(&ldcl->lockrw);
3802 	/*
3803 	 * Note for now, we have a single channel.
3804 	 */
3805 	ldcp = ldcl->head;
3806 	if (ldcp == NULL) {
3807 		DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n");
3808 		freemsgchain(mp);
3809 		RW_EXIT(&ldcl->lockrw);
3810 		return (1);
3811 	}
3812 
3813 	status = ldcp->tx(ldcp, mp, mpt, count);
3814 
3815 	RW_EXIT(&ldcl->lockrw);
3816 
3817 	return (status);
3818 }
3819 
3820 /*
3821  * Break up frames into 2 seperate chains: normal and
3822  * priority, based on the frame type. The number of
3823  * priority frames is also counted and returned.
3824  *
3825  * Params:
3826  * 	vswp:	pointer to the instance of vsw
3827  *	np:	head of packet chain to be broken
3828  *	npt:	tail of packet chain to be broken
3829  *
3830  * Returns:
3831  *	np:	head of normal data packets
3832  *	npt:	tail of normal data packets
3833  *	hp:	head of high priority packets
3834  *	hpt:	tail of high priority packets
3835  */
3836 static uint32_t
3837 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt,
3838 	mblk_t **hp, mblk_t **hpt)
3839 {
3840 	mblk_t			*tmp = NULL;
3841 	mblk_t			*smp = NULL;
3842 	mblk_t			*hmp = NULL;	/* high prio pkts head */
3843 	mblk_t			*hmpt = NULL;	/* high prio pkts tail */
3844 	mblk_t			*nmp = NULL;	/* normal pkts head */
3845 	mblk_t			*nmpt = NULL;	/* normal pkts tail */
3846 	uint32_t		count = 0;
3847 	int			i;
3848 	struct ether_header	*ehp;
3849 	uint32_t		num_types;
3850 	uint16_t		*types;
3851 
3852 	tmp = *np;
3853 	while (tmp != NULL) {
3854 
3855 		smp = tmp;
3856 		tmp = tmp->b_next;
3857 		smp->b_next = NULL;
3858 		smp->b_prev = NULL;
3859 
3860 		ehp = (struct ether_header *)smp->b_rptr;
3861 		num_types = vswp->pri_num_types;
3862 		types = vswp->pri_types;
3863 		for (i = 0; i < num_types; i++) {
3864 			if (ehp->ether_type == types[i]) {
3865 				/* high priority frame */
3866 
3867 				if (hmp != NULL) {
3868 					hmpt->b_next = smp;
3869 					hmpt = smp;
3870 				} else {
3871 					hmp = hmpt = smp;
3872 				}
3873 				count++;
3874 				break;
3875 			}
3876 		}
3877 		if (i == num_types) {
3878 			/* normal data frame */
3879 
3880 			if (nmp != NULL) {
3881 				nmpt->b_next = smp;
3882 				nmpt = smp;
3883 			} else {
3884 				nmp = nmpt = smp;
3885 			}
3886 		}
3887 	}
3888 
3889 	*hp = hmp;
3890 	*hpt = hmpt;
3891 	*np = nmp;
3892 	*npt = nmpt;
3893 
3894 	return (count);
3895 }
3896 
3897 /*
3898  * Wrapper function to transmit normal and/or priority frames over the channel.
3899  */
3900 static int
3901 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count)
3902 {
3903 	vsw_ldc_t 		*ldcp = (vsw_ldc_t *)arg;
3904 	mblk_t			*tmp;
3905 	mblk_t			*smp;
3906 	mblk_t			*hmp;	/* high prio pkts head */
3907 	mblk_t			*hmpt;	/* high prio pkts tail */
3908 	mblk_t			*nmp;	/* normal pkts head */
3909 	mblk_t			*nmpt;	/* normal pkts tail */
3910 	uint32_t		n = 0;
3911 	vsw_t			*vswp = ldcp->ldc_vswp;
3912 
3913 	ASSERT(VSW_PRI_ETH_DEFINED(vswp));
3914 	ASSERT(count != 0);
3915 
3916 	nmp = mp;
3917 	nmpt = mpt;
3918 
3919 	/* gather any priority frames from the chain of packets */
3920 	n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt);
3921 
3922 	/* transmit priority frames */
3923 	tmp = hmp;
3924 	while (tmp != NULL) {
3925 		smp = tmp;
3926 		tmp = tmp->b_next;
3927 		smp->b_next = NULL;
3928 		vsw_ldcsend_pkt(ldcp, smp);
3929 	}
3930 
3931 	count -= n;
3932 
3933 	if (count == 0) {
3934 		/* no normal data frames to process */
3935 		return (0);
3936 	}
3937 
3938 	return (vsw_ldctx(ldcp, nmp, nmpt, count));
3939 }
3940 
3941 /*
3942  * Wrapper function to transmit normal frames over the channel.
3943  */
3944 static int
3945 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count)
3946 {
3947 	vsw_ldc_t 	*ldcp = (vsw_ldc_t *)arg;
3948 	mblk_t		*tmp = NULL;
3949 
3950 	ASSERT(count != 0);
3951 	/*
3952 	 * If the TX thread is enabled, then queue the
3953 	 * ordinary frames and signal the tx thread.
3954 	 */
3955 	if (ldcp->tx_thread != NULL) {
3956 
3957 		mutex_enter(&ldcp->tx_thr_lock);
3958 
3959 		if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) {
3960 			/*
3961 			 * If we reached queue limit,
3962 			 * do not queue new packets,
3963 			 * drop them.
3964 			 */
3965 			ldcp->ldc_stats.tx_qfull += count;
3966 			mutex_exit(&ldcp->tx_thr_lock);
3967 			freemsgchain(mp);
3968 			goto exit;
3969 		}
3970 		if (ldcp->tx_mhead == NULL) {
3971 			ldcp->tx_mhead = mp;
3972 			ldcp->tx_mtail = mpt;
3973 			cv_signal(&ldcp->tx_thr_cv);
3974 		} else {
3975 			ldcp->tx_mtail->b_next = mp;
3976 			ldcp->tx_mtail = mpt;
3977 		}
3978 		ldcp->tx_cnt += count;
3979 		mutex_exit(&ldcp->tx_thr_lock);
3980 	} else {
3981 		while (mp != NULL) {
3982 			tmp = mp->b_next;
3983 			mp->b_next = mp->b_prev = NULL;
3984 			(void) vsw_ldcsend(ldcp, mp, 1);
3985 			mp = tmp;
3986 		}
3987 	}
3988 
3989 exit:
3990 	return (0);
3991 }
3992 
3993 /*
3994  * This function transmits the frame in the payload of a raw data
3995  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
3996  * send special frames with high priorities, without going through
3997  * the normal data path which uses descriptor ring mechanism.
3998  */
3999 static void
4000 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp)
4001 {
4002 	vio_raw_data_msg_t	*pkt;
4003 	mblk_t			*bp;
4004 	mblk_t			*nmp = NULL;
4005 	caddr_t			dst;
4006 	uint32_t		mblksz;
4007 	uint32_t		size;
4008 	uint32_t		nbytes;
4009 	int			rv;
4010 	vsw_t			*vswp = ldcp->ldc_vswp;
4011 	vgen_stats_t		*statsp = &ldcp->ldc_stats;
4012 
4013 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
4014 	    (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
4015 		(void) atomic_inc_32(&statsp->tx_pri_fail);
4016 		DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping "
4017 		    "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status,
4018 		    ldcp->lane_out.lstate);
4019 		goto send_pkt_exit;
4020 	}
4021 
4022 	size = msgsize(mp);
4023 
4024 	/* frame size bigger than available payload len of raw data msg ? */
4025 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
4026 		(void) atomic_inc_32(&statsp->tx_pri_fail);
4027 		DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__,
4028 		    ldcp->ldc_id, size);
4029 		goto send_pkt_exit;
4030 	}
4031 
4032 	if (size < ETHERMIN)
4033 		size = ETHERMIN;
4034 
4035 	/* alloc space for a raw data message */
4036 	nmp = vio_allocb(vswp->pri_tx_vmp);
4037 	if (nmp == NULL) {
4038 		(void) atomic_inc_32(&statsp->tx_pri_fail);
4039 		DWARN(vswp, "vio_allocb failed\n");
4040 		goto send_pkt_exit;
4041 	}
4042 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
4043 
4044 	/* copy frame into the payload of raw data message */
4045 	dst = (caddr_t)pkt->data;
4046 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
4047 		mblksz = MBLKL(bp);
4048 		bcopy(bp->b_rptr, dst, mblksz);
4049 		dst += mblksz;
4050 	}
4051 
4052 	/* setup the raw data msg */
4053 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
4054 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
4055 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
4056 	pkt->tag.vio_sid = ldcp->local_session;
4057 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
4058 
4059 	/* send the msg over ldc */
4060 	rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE);
4061 	if (rv != 0) {
4062 		(void) atomic_inc_32(&statsp->tx_pri_fail);
4063 		DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__,
4064 		    ldcp->ldc_id);
4065 		goto send_pkt_exit;
4066 	}
4067 
4068 	/* update stats */
4069 	(void) atomic_inc_64(&statsp->tx_pri_packets);
4070 	(void) atomic_add_64(&statsp->tx_pri_packets, size);
4071 
4072 send_pkt_exit:
4073 	if (nmp != NULL)
4074 		freemsg(nmp);
4075 	freemsg(mp);
4076 }
4077 
4078 /*
4079  * Transmit the packet over the given LDC channel.
4080  *
4081  * The 'retries' argument indicates how many times a packet
4082  * is retried before it is dropped. Note, the retry is done
4083  * only for a resource related failure, for all other failures
4084  * the packet is dropped immediately.
4085  */
4086 static int
4087 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries)
4088 {
4089 	int i;
4090 	int rc;
4091 	int status = 0;
4092 	vsw_port_t *port = ldcp->ldc_port;
4093 	dring_info_t *dp = NULL;
4094 
4095 
4096 	for (i = 0; i < retries; ) {
4097 		/*
4098 		 * Send the message out using the appropriate
4099 		 * transmit function which will free mblock when it
4100 		 * is finished with it.
4101 		 */
4102 		mutex_enter(&port->tx_lock);
4103 		if (port->transmit != NULL) {
4104 			status = (*port->transmit)(ldcp, mp);
4105 		}
4106 		if (status == LDC_TX_SUCCESS) {
4107 			mutex_exit(&port->tx_lock);
4108 			break;
4109 		}
4110 		i++;	/* increment the counter here */
4111 
4112 		/* If its the last retry, then update the oerror */
4113 		if ((i == retries) && (status == LDC_TX_NORESOURCES)) {
4114 			ldcp->ldc_stats.oerrors++;
4115 		}
4116 		mutex_exit(&port->tx_lock);
4117 
4118 		if (status != LDC_TX_NORESOURCES) {
4119 			/*
4120 			 * No retrying required for errors un-related
4121 			 * to resources.
4122 			 */
4123 			break;
4124 		}
4125 		READ_ENTER(&ldcp->lane_out.dlistrw);
4126 		if (((dp = ldcp->lane_out.dringp) != NULL) &&
4127 		    ((VSW_VER_EQ(ldcp, 1, 2) &&
4128 		    (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) ||
4129 		    ((VSW_VER_LT(ldcp, 1, 2) &&
4130 		    (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) {
4131 			rc = vsw_reclaim_dring(dp, dp->end_idx);
4132 		} else {
4133 			/*
4134 			 * If there is no dring or the xfer_mode is
4135 			 * set to DESC_MODE(ie., OBP), then simply break here.
4136 			 */
4137 			RW_EXIT(&ldcp->lane_out.dlistrw);
4138 			break;
4139 		}
4140 		RW_EXIT(&ldcp->lane_out.dlistrw);
4141 
4142 		/*
4143 		 * Delay only if none were reclaimed
4144 		 * and its not the last retry.
4145 		 */
4146 		if ((rc == 0) && (i < retries)) {
4147 			delay(drv_usectohz(vsw_ldc_tx_delay));
4148 		}
4149 	}
4150 	freemsg(mp);
4151 	return (status);
4152 }
4153 
4154 /*
4155  * Send packet out via descriptor ring to a logical device.
4156  */
4157 static int
4158 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp)
4159 {
4160 	vio_dring_msg_t		dring_pkt;
4161 	dring_info_t		*dp = NULL;
4162 	vsw_private_desc_t	*priv_desc = NULL;
4163 	vnet_public_desc_t	*pub = NULL;
4164 	vsw_t			*vswp = ldcp->ldc_vswp;
4165 	mblk_t			*bp;
4166 	size_t			n, size;
4167 	caddr_t			bufp;
4168 	int			idx;
4169 	int			status = LDC_TX_SUCCESS;
4170 	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
4171 
4172 	D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id);
4173 
4174 	/* TODO: make test a macro */
4175 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
4176 	    (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
4177 		DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping "
4178 		    "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status,
4179 		    ldcp->lane_out.lstate);
4180 		ldcp->ldc_stats.oerrors++;
4181 		return (LDC_TX_FAILURE);
4182 	}
4183 
4184 	/*
4185 	 * Note - using first ring only, this may change
4186 	 * in the future.
4187 	 */
4188 	READ_ENTER(&ldcp->lane_out.dlistrw);
4189 	if ((dp = ldcp->lane_out.dringp) == NULL) {
4190 		RW_EXIT(&ldcp->lane_out.dlistrw);
4191 		DERR(vswp, "%s(%lld): no dring for outbound lane on"
4192 		    " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id);
4193 		ldcp->ldc_stats.oerrors++;
4194 		return (LDC_TX_FAILURE);
4195 	}
4196 
4197 	size = msgsize(mp);
4198 	if (size > (size_t)ETHERMAX) {
4199 		RW_EXIT(&ldcp->lane_out.dlistrw);
4200 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
4201 		    ldcp->ldc_id, size);
4202 		ldcp->ldc_stats.oerrors++;
4203 		return (LDC_TX_FAILURE);
4204 	}
4205 
4206 	/*
4207 	 * Find a free descriptor
4208 	 *
4209 	 * Note: for the moment we are assuming that we will only
4210 	 * have one dring going from the switch to each of its
4211 	 * peers. This may change in the future.
4212 	 */
4213 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
4214 		D2(vswp, "%s(%lld): no descriptor available for ring "
4215 		    "at 0x%llx", __func__, ldcp->ldc_id, dp);
4216 
4217 		/* nothing more we can do */
4218 		status = LDC_TX_NORESOURCES;
4219 		ldcp->ldc_stats.tx_no_desc++;
4220 		goto vsw_dringsend_free_exit;
4221 	} else {
4222 		D2(vswp, "%s(%lld): free private descriptor found at pos %ld "
4223 		    "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc);
4224 	}
4225 
4226 	/* copy data into the descriptor */
4227 	bufp = priv_desc->datap;
4228 	bufp += VNET_IPALIGN;
4229 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
4230 		n = MBLKL(bp);
4231 		bcopy(bp->b_rptr, bufp, n);
4232 		bufp += n;
4233 	}
4234 
4235 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
4236 
4237 	pub = priv_desc->descp;
4238 	pub->nbytes = priv_desc->datalen;
4239 
4240 	/* update statistics */
4241 	if (IS_BROADCAST(ehp))
4242 		ldcp->ldc_stats.brdcstxmt++;
4243 	else if (IS_MULTICAST(ehp))
4244 		ldcp->ldc_stats.multixmt++;
4245 	ldcp->ldc_stats.opackets++;
4246 	ldcp->ldc_stats.obytes += priv_desc->datalen;
4247 
4248 	mutex_enter(&priv_desc->dstate_lock);
4249 	pub->hdr.dstate = VIO_DESC_READY;
4250 	mutex_exit(&priv_desc->dstate_lock);
4251 
4252 	/*
4253 	 * Determine whether or not we need to send a message to our
4254 	 * peer prompting them to read our newly updated descriptor(s).
4255 	 */
4256 	mutex_enter(&dp->restart_lock);
4257 	if (dp->restart_reqd) {
4258 		dp->restart_reqd = B_FALSE;
4259 		ldcp->ldc_stats.dring_data_msgs++;
4260 		mutex_exit(&dp->restart_lock);
4261 
4262 		/*
4263 		 * Send a vio_dring_msg to peer to prompt them to read
4264 		 * the updated descriptor ring.
4265 		 */
4266 		dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA;
4267 		dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
4268 		dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA;
4269 		dring_pkt.tag.vio_sid = ldcp->local_session;
4270 
4271 		/* Note - for now using first ring */
4272 		dring_pkt.dring_ident = dp->ident;
4273 
4274 		/*
4275 		 * If last_ack_recv is -1 then we know we've not
4276 		 * received any ack's yet, so this must be the first
4277 		 * msg sent, so set the start to the begining of the ring.
4278 		 */
4279 		mutex_enter(&dp->dlock);
4280 		if (dp->last_ack_recv == -1) {
4281 			dring_pkt.start_idx = 0;
4282 		} else {
4283 			dring_pkt.start_idx =
4284 			    (dp->last_ack_recv + 1) % dp->num_descriptors;
4285 		}
4286 		dring_pkt.end_idx = -1;
4287 		mutex_exit(&dp->dlock);
4288 
4289 		D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__,
4290 		    ldcp->ldc_id, dp, dring_pkt.dring_ident);
4291 		D3(vswp, "%s(%lld): start %lld : end %lld :\n",
4292 		    __func__, ldcp->ldc_id, dring_pkt.start_idx,
4293 		    dring_pkt.end_idx);
4294 
4295 		RW_EXIT(&ldcp->lane_out.dlistrw);
4296 
4297 		(void) vsw_send_msg(ldcp, (void *)&dring_pkt,
4298 		    sizeof (vio_dring_msg_t), B_TRUE);
4299 
4300 		return (status);
4301 
4302 	} else {
4303 		mutex_exit(&dp->restart_lock);
4304 		D2(vswp, "%s(%lld): updating descp %d", __func__,
4305 		    ldcp->ldc_id, idx);
4306 	}
4307 
4308 vsw_dringsend_free_exit:
4309 
4310 	RW_EXIT(&ldcp->lane_out.dlistrw);
4311 
4312 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
4313 	return (status);
4314 }
4315 
4316 /*
4317  * Send an in-band descriptor message over ldc.
4318  */
4319 static int
4320 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp)
4321 {
4322 	vsw_t			*vswp = ldcp->ldc_vswp;
4323 	vnet_ibnd_desc_t	ibnd_msg;
4324 	vsw_private_desc_t	*priv_desc = NULL;
4325 	dring_info_t		*dp = NULL;
4326 	size_t			n, size = 0;
4327 	caddr_t			bufp;
4328 	mblk_t			*bp;
4329 	int			idx, i;
4330 	int			status = LDC_TX_SUCCESS;
4331 	static int		warn_msg = 1;
4332 
4333 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
4334 
4335 	ASSERT(mp != NULL);
4336 
4337 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
4338 	    (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
4339 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt",
4340 		    __func__, ldcp->ldc_id, ldcp->ldc_status,
4341 		    ldcp->lane_out.lstate);
4342 		ldcp->ldc_stats.oerrors++;
4343 		return (LDC_TX_FAILURE);
4344 	}
4345 
4346 	/*
4347 	 * only expect single dring to exist, which we use
4348 	 * as an internal buffer, rather than a transfer channel.
4349 	 */
4350 	READ_ENTER(&ldcp->lane_out.dlistrw);
4351 	if ((dp = ldcp->lane_out.dringp) == NULL) {
4352 		DERR(vswp, "%s(%lld): no dring for outbound lane",
4353 		    __func__, ldcp->ldc_id);
4354 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__,
4355 		    ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate);
4356 		RW_EXIT(&ldcp->lane_out.dlistrw);
4357 		ldcp->ldc_stats.oerrors++;
4358 		return (LDC_TX_FAILURE);
4359 	}
4360 
4361 	size = msgsize(mp);
4362 	if (size > (size_t)ETHERMAX) {
4363 		RW_EXIT(&ldcp->lane_out.dlistrw);
4364 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
4365 		    ldcp->ldc_id, size);
4366 		ldcp->ldc_stats.oerrors++;
4367 		return (LDC_TX_FAILURE);
4368 	}
4369 
4370 	/*
4371 	 * Find a free descriptor in our buffer ring
4372 	 */
4373 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
4374 		RW_EXIT(&ldcp->lane_out.dlistrw);
4375 		if (warn_msg) {
4376 			DERR(vswp, "%s(%lld): no descriptor available for ring "
4377 			    "at 0x%llx", __func__, ldcp->ldc_id, dp);
4378 			warn_msg = 0;
4379 		}
4380 
4381 		/* nothing more we can do */
4382 		status = LDC_TX_NORESOURCES;
4383 		goto vsw_descrsend_free_exit;
4384 	} else {
4385 		D2(vswp, "%s(%lld): free private descriptor found at pos "
4386 		    "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc);
4387 		warn_msg = 1;
4388 	}
4389 
4390 	/* copy data into the descriptor */
4391 	bufp = priv_desc->datap;
4392 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
4393 		n = MBLKL(bp);
4394 		bcopy(bp->b_rptr, bufp, n);
4395 		bufp += n;
4396 	}
4397 
4398 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
4399 
4400 	/* create and send the in-band descp msg */
4401 	ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA;
4402 	ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO;
4403 	ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA;
4404 	ibnd_msg.hdr.tag.vio_sid = ldcp->local_session;
4405 
4406 	/*
4407 	 * Copy the mem cookies describing the data from the
4408 	 * private region of the descriptor ring into the inband
4409 	 * descriptor.
4410 	 */
4411 	for (i = 0; i < priv_desc->ncookies; i++) {
4412 		bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i],
4413 		    sizeof (ldc_mem_cookie_t));
4414 	}
4415 
4416 	ibnd_msg.hdr.desc_handle = idx;
4417 	ibnd_msg.ncookies = priv_desc->ncookies;
4418 	ibnd_msg.nbytes = size;
4419 
4420 	ldcp->ldc_stats.opackets++;
4421 	ldcp->ldc_stats.obytes += size;
4422 
4423 	RW_EXIT(&ldcp->lane_out.dlistrw);
4424 
4425 	(void) vsw_send_msg(ldcp, (void *)&ibnd_msg,
4426 	    sizeof (vnet_ibnd_desc_t), B_TRUE);
4427 
4428 vsw_descrsend_free_exit:
4429 
4430 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
4431 	return (status);
4432 }
4433 
4434 static void
4435 vsw_send_ver(void *arg)
4436 {
4437 	vsw_ldc_t	*ldcp = (vsw_ldc_t *)arg;
4438 	vsw_t		*vswp = ldcp->ldc_vswp;
4439 	lane_t		*lp = &ldcp->lane_out;
4440 	vio_ver_msg_t	ver_msg;
4441 
4442 	D1(vswp, "%s enter", __func__);
4443 
4444 	ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
4445 	ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
4446 	ver_msg.tag.vio_subtype_env = VIO_VER_INFO;
4447 	ver_msg.tag.vio_sid = ldcp->local_session;
4448 
4449 	if (vsw_obp_ver_proto_workaround == B_FALSE) {
4450 		ver_msg.ver_major = vsw_versions[0].ver_major;
4451 		ver_msg.ver_minor = vsw_versions[0].ver_minor;
4452 	} else {
4453 		/* use the major,minor that we've ack'd */
4454 		lane_t	*lpi = &ldcp->lane_in;
4455 		ver_msg.ver_major = lpi->ver_major;
4456 		ver_msg.ver_minor = lpi->ver_minor;
4457 	}
4458 	ver_msg.dev_class = VDEV_NETWORK_SWITCH;
4459 
4460 	lp->lstate |= VSW_VER_INFO_SENT;
4461 	lp->ver_major = ver_msg.ver_major;
4462 	lp->ver_minor = ver_msg.ver_minor;
4463 
4464 	DUMP_TAG(ver_msg.tag);
4465 
4466 	(void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE);
4467 
4468 	D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id);
4469 }
4470 
4471 static void
4472 vsw_send_attr(vsw_ldc_t *ldcp)
4473 {
4474 	vsw_t			*vswp = ldcp->ldc_vswp;
4475 	lane_t			*lp = &ldcp->lane_out;
4476 	vnet_attr_msg_t		attr_msg;
4477 
4478 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
4479 
4480 	/*
4481 	 * Subtype is set to INFO by default
4482 	 */
4483 	attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
4484 	attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
4485 	attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO;
4486 	attr_msg.tag.vio_sid = ldcp->local_session;
4487 
4488 	/* payload copied from default settings for lane */
4489 	attr_msg.mtu = lp->mtu;
4490 	attr_msg.addr_type = lp->addr_type;
4491 	attr_msg.xfer_mode = lp->xfer_mode;
4492 	attr_msg.ack_freq = lp->xfer_mode;
4493 
4494 	READ_ENTER(&vswp->if_lockrw);
4495 	attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet);
4496 	RW_EXIT(&vswp->if_lockrw);
4497 
4498 	ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT;
4499 
4500 	DUMP_TAG(attr_msg.tag);
4501 
4502 	(void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE);
4503 
4504 	D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id);
4505 }
4506 
4507 /*
4508  * Create dring info msg (which also results in the creation of
4509  * a dring).
4510  */
4511 static vio_dring_reg_msg_t *
4512 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp)
4513 {
4514 	vio_dring_reg_msg_t	*mp;
4515 	dring_info_t		*dp;
4516 	vsw_t			*vswp = ldcp->ldc_vswp;
4517 
4518 	D1(vswp, "vsw_create_dring_info_pkt enter\n");
4519 
4520 	/*
4521 	 * If we can't create a dring, obviously no point sending
4522 	 * a message.
4523 	 */
4524 	if ((dp = vsw_create_dring(ldcp)) == NULL)
4525 		return (NULL);
4526 
4527 	mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP);
4528 
4529 	mp->tag.vio_msgtype = VIO_TYPE_CTRL;
4530 	mp->tag.vio_subtype = VIO_SUBTYPE_INFO;
4531 	mp->tag.vio_subtype_env = VIO_DRING_REG;
4532 	mp->tag.vio_sid = ldcp->local_session;
4533 
4534 	/* payload */
4535 	mp->num_descriptors = dp->num_descriptors;
4536 	mp->descriptor_size = dp->descriptor_size;
4537 	mp->options = dp->options;
4538 	mp->ncookies = dp->ncookies;
4539 	bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t));
4540 
4541 	mp->dring_ident = 0;
4542 
4543 	D1(vswp, "vsw_create_dring_info_pkt exit\n");
4544 
4545 	return (mp);
4546 }
4547 
4548 static void
4549 vsw_send_dring_info(vsw_ldc_t *ldcp)
4550 {
4551 	vio_dring_reg_msg_t	*dring_msg;
4552 	vsw_t			*vswp = ldcp->ldc_vswp;
4553 
4554 	D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id);
4555 
4556 	dring_msg = vsw_create_dring_info_pkt(ldcp);
4557 	if (dring_msg == NULL) {
4558 		cmn_err(CE_WARN, "!vsw%d: %s: error creating msg",
4559 		    vswp->instance, __func__);
4560 		return;
4561 	}
4562 
4563 	ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT;
4564 
4565 	DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg);
4566 
4567 	(void) vsw_send_msg(ldcp, dring_msg,
4568 	    sizeof (vio_dring_reg_msg_t), B_TRUE);
4569 
4570 	kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t));
4571 
4572 	D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id);
4573 }
4574 
4575 static void
4576 vsw_send_rdx(vsw_ldc_t *ldcp)
4577 {
4578 	vsw_t		*vswp = ldcp->ldc_vswp;
4579 	vio_rdx_msg_t	rdx_msg;
4580 
4581 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
4582 
4583 	rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
4584 	rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
4585 	rdx_msg.tag.vio_subtype_env = VIO_RDX;
4586 	rdx_msg.tag.vio_sid = ldcp->local_session;
4587 
4588 	ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT;
4589 
4590 	DUMP_TAG(rdx_msg.tag);
4591 
4592 	(void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE);
4593 
4594 	D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id);
4595 }
4596 
4597 /*
4598  * Generic routine to send message out over ldc channel.
4599  *
4600  * It is possible that when we attempt to write over the ldc channel
4601  * that we get notified that it has been reset. Depending on the value
4602  * of the handle_reset flag we either handle that event here or simply
4603  * notify the caller that the channel was reset.
4604  */
4605 static int
4606 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset)
4607 {
4608 	int			rv;
4609 	size_t			msglen = size;
4610 	vio_msg_tag_t		*tag = (vio_msg_tag_t *)msgp;
4611 	vsw_t			*vswp = ldcp->ldc_vswp;
4612 	vio_dring_msg_t		*dmsg;
4613 	vio_raw_data_msg_t	*rmsg;
4614 	vnet_ibnd_desc_t	*imsg;
4615 	boolean_t		data_msg = B_FALSE;
4616 
4617 	D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes",
4618 	    ldcp->ldc_id, size);
4619 
4620 	D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype);
4621 	D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype);
4622 	D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env);
4623 
4624 	mutex_enter(&ldcp->ldc_txlock);
4625 
4626 	if (tag->vio_subtype == VIO_SUBTYPE_INFO) {
4627 		if (tag->vio_subtype_env == VIO_DRING_DATA) {
4628 			dmsg = (vio_dring_msg_t *)tag;
4629 			dmsg->seq_num = ldcp->lane_out.seq_num;
4630 			data_msg = B_TRUE;
4631 		} else if (tag->vio_subtype_env == VIO_PKT_DATA) {
4632 			rmsg = (vio_raw_data_msg_t *)tag;
4633 			rmsg->seq_num = ldcp->lane_out.seq_num;
4634 			data_msg = B_TRUE;
4635 		} else if (tag->vio_subtype_env == VIO_DESC_DATA) {
4636 			imsg = (vnet_ibnd_desc_t *)tag;
4637 			imsg->hdr.seq_num = ldcp->lane_out.seq_num;
4638 			data_msg = B_TRUE;
4639 		}
4640 	}
4641 
4642 	do {
4643 		msglen = size;
4644 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen);
4645 	} while (rv == EWOULDBLOCK && --vsw_wretries > 0);
4646 
4647 	if (rv == 0 && data_msg == B_TRUE) {
4648 		ldcp->lane_out.seq_num++;
4649 	}
4650 
4651 	if ((rv != 0) || (msglen != size)) {
4652 		DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) "
4653 		    "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen);
4654 		ldcp->ldc_stats.oerrors++;
4655 	}
4656 
4657 	mutex_exit(&ldcp->ldc_txlock);
4658 
4659 	/*
4660 	 * If channel has been reset we either handle it here or
4661 	 * simply report back that it has been reset and let caller
4662 	 * decide what to do.
4663 	 */
4664 	if (rv == ECONNRESET) {
4665 		DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id);
4666 
4667 		/*
4668 		 * N.B - must never be holding the dlistrw lock when
4669 		 * we do a reset of the channel.
4670 		 */
4671 		if (handle_reset) {
4672 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
4673 		}
4674 	}
4675 
4676 	return (rv);
4677 }
4678 
4679 /*
4680  * Remove the specified address from the list of address maintained
4681  * in this port node.
4682  */
4683 mcst_addr_t *
4684 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr)
4685 {
4686 	vsw_t		*vswp = NULL;
4687 	vsw_port_t	*port = NULL;
4688 	mcst_addr_t	*prev_p = NULL;
4689 	mcst_addr_t	*curr_p = NULL;
4690 
4691 	D1(NULL, "%s: enter : devtype %d : addr 0x%llx",
4692 	    __func__, devtype, addr);
4693 
4694 	if (devtype == VSW_VNETPORT) {
4695 		port = (vsw_port_t *)arg;
4696 		mutex_enter(&port->mca_lock);
4697 		prev_p = curr_p = port->mcap;
4698 	} else {
4699 		vswp = (vsw_t *)arg;
4700 		mutex_enter(&vswp->mca_lock);
4701 		prev_p = curr_p = vswp->mcap;
4702 	}
4703 
4704 	while (curr_p != NULL) {
4705 		if (curr_p->addr == addr) {
4706 			D2(NULL, "%s: address found", __func__);
4707 			/* match found */
4708 			if (prev_p == curr_p) {
4709 				/* list head */
4710 				if (devtype == VSW_VNETPORT)
4711 					port->mcap = curr_p->nextp;
4712 				else
4713 					vswp->mcap = curr_p->nextp;
4714 			} else {
4715 				prev_p->nextp = curr_p->nextp;
4716 			}
4717 			break;
4718 		} else {
4719 			prev_p = curr_p;
4720 			curr_p = curr_p->nextp;
4721 		}
4722 	}
4723 
4724 	if (devtype == VSW_VNETPORT)
4725 		mutex_exit(&port->mca_lock);
4726 	else
4727 		mutex_exit(&vswp->mca_lock);
4728 
4729 	D1(NULL, "%s: exit", __func__);
4730 
4731 	return (curr_p);
4732 }
4733 
4734 /*
4735  * Creates a descriptor ring (dring) and links it into the
4736  * link of outbound drings for this channel.
4737  *
4738  * Returns NULL if creation failed.
4739  */
4740 static dring_info_t *
4741 vsw_create_dring(vsw_ldc_t *ldcp)
4742 {
4743 	vsw_private_desc_t	*priv_addr = NULL;
4744 	vsw_t			*vswp = ldcp->ldc_vswp;
4745 	ldc_mem_info_t		minfo;
4746 	dring_info_t		*dp, *tp;
4747 	int			i;
4748 
4749 	dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
4750 
4751 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
4752 
4753 	/* create public section of ring */
4754 	if ((ldc_mem_dring_create(vsw_ntxds,
4755 	    VSW_PUB_SIZE, &dp->handle)) != 0) {
4756 
4757 		DERR(vswp, "vsw_create_dring(%lld): ldc dring create "
4758 		    "failed", ldcp->ldc_id);
4759 		goto create_fail_exit;
4760 	}
4761 
4762 	ASSERT(dp->handle != NULL);
4763 
4764 	/*
4765 	 * Get the base address of the public section of the ring.
4766 	 */
4767 	if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
4768 		DERR(vswp, "vsw_create_dring(%lld): dring info failed\n",
4769 		    ldcp->ldc_id);
4770 		goto dring_fail_exit;
4771 	} else {
4772 		ASSERT(minfo.vaddr != 0);
4773 		dp->pub_addr = minfo.vaddr;
4774 	}
4775 
4776 	dp->num_descriptors = vsw_ntxds;
4777 	dp->descriptor_size = VSW_PUB_SIZE;
4778 	dp->options = VIO_TX_DRING;
4779 	dp->ncookies = 1;	/* guaranteed by ldc */
4780 
4781 	/*
4782 	 * create private portion of ring
4783 	 */
4784 	dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc(
4785 	    (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP);
4786 
4787 	if (vsw_setup_ring(ldcp, dp)) {
4788 		DERR(vswp, "%s: unable to setup ring", __func__);
4789 		goto dring_fail_exit;
4790 	}
4791 
4792 	/* haven't used any descriptors yet */
4793 	dp->end_idx = 0;
4794 	dp->last_ack_recv = -1;
4795 
4796 	/* bind dring to the channel */
4797 	if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle,
4798 	    LDC_SHADOW_MAP, LDC_MEM_RW,
4799 	    &dp->cookie[0], &dp->ncookies)) != 0) {
4800 		DERR(vswp, "vsw_create_dring: unable to bind to channel "
4801 		    "%lld", ldcp->ldc_id);
4802 		goto dring_fail_exit;
4803 	}
4804 
4805 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
4806 	dp->restart_reqd = B_TRUE;
4807 
4808 	/*
4809 	 * Only ever create rings for outgoing lane. Link it onto
4810 	 * end of list.
4811 	 */
4812 	WRITE_ENTER(&ldcp->lane_out.dlistrw);
4813 	if (ldcp->lane_out.dringp == NULL) {
4814 		D2(vswp, "vsw_create_dring: adding first outbound ring");
4815 		ldcp->lane_out.dringp = dp;
4816 	} else {
4817 		tp = ldcp->lane_out.dringp;
4818 		while (tp->next != NULL)
4819 			tp = tp->next;
4820 
4821 		tp->next = dp;
4822 	}
4823 	RW_EXIT(&ldcp->lane_out.dlistrw);
4824 
4825 	return (dp);
4826 
4827 dring_fail_exit:
4828 	(void) ldc_mem_dring_destroy(dp->handle);
4829 
4830 create_fail_exit:
4831 	if (dp->priv_addr != NULL) {
4832 		priv_addr = dp->priv_addr;
4833 		for (i = 0; i < vsw_ntxds; i++) {
4834 			if (priv_addr->memhandle != NULL)
4835 				(void) ldc_mem_free_handle(
4836 				    priv_addr->memhandle);
4837 			priv_addr++;
4838 		}
4839 		kmem_free(dp->priv_addr,
4840 		    (sizeof (vsw_private_desc_t) * vsw_ntxds));
4841 	}
4842 	mutex_destroy(&dp->dlock);
4843 
4844 	kmem_free(dp, sizeof (dring_info_t));
4845 	return (NULL);
4846 }
4847 
4848 /*
4849  * Create a ring consisting of just a private portion and link
4850  * it into the list of rings for the outbound lane.
4851  *
4852  * These type of rings are used primarily for temporary data
4853  * storage (i.e. as data buffers).
4854  */
4855 void
4856 vsw_create_privring(vsw_ldc_t *ldcp)
4857 {
4858 	dring_info_t		*dp, *tp;
4859 	vsw_t			*vswp = ldcp->ldc_vswp;
4860 
4861 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
4862 
4863 	dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
4864 
4865 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
4866 
4867 	/* no public section */
4868 	dp->pub_addr = NULL;
4869 
4870 	dp->priv_addr = kmem_zalloc(
4871 	    (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP);
4872 
4873 	dp->num_descriptors = vsw_ntxds;
4874 
4875 	if (vsw_setup_ring(ldcp, dp)) {
4876 		DERR(vswp, "%s: setup of ring failed", __func__);
4877 		kmem_free(dp->priv_addr,
4878 		    (sizeof (vsw_private_desc_t) * vsw_ntxds));
4879 		mutex_destroy(&dp->dlock);
4880 		kmem_free(dp, sizeof (dring_info_t));
4881 		return;
4882 	}
4883 
4884 	/* haven't used any descriptors yet */
4885 	dp->end_idx = 0;
4886 
4887 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
4888 	dp->restart_reqd = B_TRUE;
4889 
4890 	/*
4891 	 * Only ever create rings for outgoing lane. Link it onto
4892 	 * end of list.
4893 	 */
4894 	WRITE_ENTER(&ldcp->lane_out.dlistrw);
4895 	if (ldcp->lane_out.dringp == NULL) {
4896 		D2(vswp, "%s: adding first outbound privring", __func__);
4897 		ldcp->lane_out.dringp = dp;
4898 	} else {
4899 		tp = ldcp->lane_out.dringp;
4900 		while (tp->next != NULL)
4901 			tp = tp->next;
4902 
4903 		tp->next = dp;
4904 	}
4905 	RW_EXIT(&ldcp->lane_out.dlistrw);
4906 
4907 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
4908 }
4909 
4910 /*
4911  * Setup the descriptors in the dring. Returns 0 on success, 1 on
4912  * failure.
4913  */
4914 int
4915 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp)
4916 {
4917 	vnet_public_desc_t	*pub_addr = NULL;
4918 	vsw_private_desc_t	*priv_addr = NULL;
4919 	vsw_t			*vswp = ldcp->ldc_vswp;
4920 	uint64_t		*tmpp;
4921 	uint64_t		offset = 0;
4922 	uint32_t		ncookies = 0;
4923 	static char		*name = "vsw_setup_ring";
4924 	int			i, j, nc, rv;
4925 
4926 	priv_addr = dp->priv_addr;
4927 	pub_addr = dp->pub_addr;
4928 
4929 	/* public section may be null but private should never be */
4930 	ASSERT(priv_addr != NULL);
4931 
4932 	/*
4933 	 * Allocate the region of memory which will be used to hold
4934 	 * the data the descriptors will refer to.
4935 	 */
4936 	dp->data_sz = (vsw_ntxds * VSW_RING_EL_DATA_SZ);
4937 	dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP);
4938 
4939 	D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name,
4940 	    dp->data_sz, dp->data_addr);
4941 
4942 	tmpp = (uint64_t *)dp->data_addr;
4943 	offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp);
4944 
4945 	/*
4946 	 * Initialise some of the private and public (if they exist)
4947 	 * descriptor fields.
4948 	 */
4949 	for (i = 0; i < vsw_ntxds; i++) {
4950 		mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL);
4951 
4952 		if ((ldc_mem_alloc_handle(ldcp->ldc_handle,
4953 		    &priv_addr->memhandle)) != 0) {
4954 			DERR(vswp, "%s: alloc mem handle failed", name);
4955 			goto setup_ring_cleanup;
4956 		}
4957 
4958 		priv_addr->datap = (void *)tmpp;
4959 
4960 		rv = ldc_mem_bind_handle(priv_addr->memhandle,
4961 		    (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ,
4962 		    LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W,
4963 		    &(priv_addr->memcookie[0]), &ncookies);
4964 		if (rv != 0) {
4965 			DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed "
4966 			    "(rv %d)", name, ldcp->ldc_id, rv);
4967 			goto setup_ring_cleanup;
4968 		}
4969 		priv_addr->bound = 1;
4970 
4971 		D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx",
4972 		    name, i, priv_addr->memcookie[0].addr,
4973 		    priv_addr->memcookie[0].size);
4974 
4975 		if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) {
4976 			DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned "
4977 			    "invalid num of cookies (%d) for size 0x%llx",
4978 			    name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ);
4979 
4980 			goto setup_ring_cleanup;
4981 		} else {
4982 			for (j = 1; j < ncookies; j++) {
4983 				rv = ldc_mem_nextcookie(priv_addr->memhandle,
4984 				    &(priv_addr->memcookie[j]));
4985 				if (rv != 0) {
4986 					DERR(vswp, "%s: ldc_mem_nextcookie "
4987 					    "failed rv (%d)", name, rv);
4988 					goto setup_ring_cleanup;
4989 				}
4990 				D3(vswp, "%s: memcookie %d : addr 0x%llx : "
4991 				    "size 0x%llx", name, j,
4992 				    priv_addr->memcookie[j].addr,
4993 				    priv_addr->memcookie[j].size);
4994 			}
4995 
4996 		}
4997 		priv_addr->ncookies = ncookies;
4998 		priv_addr->dstate = VIO_DESC_FREE;
4999 
5000 		if (pub_addr != NULL) {
5001 
5002 			/* link pub and private sides */
5003 			priv_addr->descp = pub_addr;
5004 
5005 			pub_addr->ncookies = priv_addr->ncookies;
5006 
5007 			for (nc = 0; nc < pub_addr->ncookies; nc++) {
5008 				bcopy(&priv_addr->memcookie[nc],
5009 				    &pub_addr->memcookie[nc],
5010 				    sizeof (ldc_mem_cookie_t));
5011 			}
5012 
5013 			pub_addr->hdr.dstate = VIO_DESC_FREE;
5014 			pub_addr++;
5015 		}
5016 
5017 		/*
5018 		 * move to next element in the dring and the next
5019 		 * position in the data buffer.
5020 		 */
5021 		priv_addr++;
5022 		tmpp += offset;
5023 	}
5024 
5025 	return (0);
5026 
5027 setup_ring_cleanup:
5028 	priv_addr = dp->priv_addr;
5029 
5030 	for (j = 0; j < i; j++) {
5031 		(void) ldc_mem_unbind_handle(priv_addr->memhandle);
5032 		(void) ldc_mem_free_handle(priv_addr->memhandle);
5033 
5034 		mutex_destroy(&priv_addr->dstate_lock);
5035 
5036 		priv_addr++;
5037 	}
5038 	kmem_free(dp->data_addr, dp->data_sz);
5039 
5040 	return (1);
5041 }
5042 
5043 /*
5044  * Searches the private section of a ring for a free descriptor,
5045  * starting at the location of the last free descriptor found
5046  * previously.
5047  *
5048  * Returns 0 if free descriptor is available, and updates state
5049  * of private descriptor to VIO_DESC_READY,  otherwise returns 1.
5050  *
5051  * FUTURE: might need to return contiguous range of descriptors
5052  * as dring info msg assumes all will be contiguous.
5053  */
5054 static int
5055 vsw_dring_find_free_desc(dring_info_t *dringp,
5056 		vsw_private_desc_t **priv_p, int *idx)
5057 {
5058 	vsw_private_desc_t	*addr = NULL;
5059 	int			num = vsw_ntxds;
5060 	int			ret = 1;
5061 
5062 	D1(NULL, "%s enter\n", __func__);
5063 
5064 	ASSERT(dringp->priv_addr != NULL);
5065 
5066 	D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld",
5067 	    __func__, dringp, dringp->end_idx);
5068 
5069 	addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx;
5070 
5071 	mutex_enter(&addr->dstate_lock);
5072 	if (addr->dstate == VIO_DESC_FREE) {
5073 		addr->dstate = VIO_DESC_READY;
5074 		*priv_p = addr;
5075 		*idx = dringp->end_idx;
5076 		dringp->end_idx = (dringp->end_idx + 1) % num;
5077 		ret = 0;
5078 
5079 	}
5080 	mutex_exit(&addr->dstate_lock);
5081 
5082 	/* ring full */
5083 	if (ret == 1) {
5084 		D2(NULL, "%s: no desp free: started at %d", __func__,
5085 		    dringp->end_idx);
5086 	}
5087 
5088 	D1(NULL, "%s: exit\n", __func__);
5089 
5090 	return (ret);
5091 }
5092 
5093 /*
5094  * Map from a dring identifier to the ring itself. Returns
5095  * pointer to ring or NULL if no match found.
5096  *
5097  * Should be called with dlistrw rwlock held as reader.
5098  */
5099 static dring_info_t *
5100 vsw_ident2dring(lane_t *lane, uint64_t ident)
5101 {
5102 	dring_info_t	*dp = NULL;
5103 
5104 	if ((dp = lane->dringp) == NULL) {
5105 		return (NULL);
5106 	} else {
5107 		if (dp->ident == ident)
5108 			return (dp);
5109 
5110 		while (dp != NULL) {
5111 			if (dp->ident == ident)
5112 				break;
5113 			dp = dp->next;
5114 		}
5115 	}
5116 
5117 	return (dp);
5118 }
5119 
5120 /*
5121  * Set the default lane attributes. These are copied into
5122  * the attr msg we send to our peer. If they are not acceptable
5123  * then (currently) the handshake ends.
5124  */
5125 static void
5126 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp)
5127 {
5128 	bzero(lp, sizeof (lane_t));
5129 
5130 	READ_ENTER(&vswp->if_lockrw);
5131 	ether_copy(&(vswp->if_addr), &(lp->addr));
5132 	RW_EXIT(&vswp->if_lockrw);
5133 
5134 	lp->mtu = VSW_MTU;
5135 	lp->addr_type = ADDR_TYPE_MAC;
5136 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
5137 	lp->ack_freq = 0;	/* for shared mode */
5138 	lp->seq_num = VNET_ISS;
5139 }
5140 
5141 /*
5142  * Verify that the attributes are acceptable.
5143  *
5144  * FUTURE: If some attributes are not acceptable, change them
5145  * our desired values.
5146  */
5147 static int
5148 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp)
5149 {
5150 	int			ret = 0;
5151 	struct ether_addr	ea;
5152 	vsw_port_t		*port = ldcp->ldc_port;
5153 	lane_t			*lp = &ldcp->lane_out;
5154 
5155 
5156 	D1(NULL, "vsw_check_attr enter\n");
5157 
5158 	if ((pkt->xfer_mode != VIO_DESC_MODE) &&
5159 	    (pkt->xfer_mode != lp->xfer_mode)) {
5160 		D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode);
5161 		ret = 1;
5162 	}
5163 
5164 	/* Only support MAC addresses at moment. */
5165 	if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) {
5166 		D2(NULL, "vsw_check_attr: invalid addr_type %x, "
5167 		    "or address 0x%llx\n", pkt->addr_type, pkt->addr);
5168 		ret = 1;
5169 	}
5170 
5171 	/*
5172 	 * MAC address supplied by device should match that stored
5173 	 * in the vsw-port OBP node. Need to decide what to do if they
5174 	 * don't match, for the moment just warn but don't fail.
5175 	 */
5176 	vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet);
5177 	if (ether_cmp(&ea, &port->p_macaddr) != 0) {
5178 		DERR(NULL, "vsw_check_attr: device supplied address "
5179 		    "0x%llx doesn't match node address 0x%llx\n",
5180 		    pkt->addr, port->p_macaddr);
5181 	}
5182 
5183 	/*
5184 	 * Ack freq only makes sense in pkt mode, in shared
5185 	 * mode the ring descriptors say whether or not to
5186 	 * send back an ACK.
5187 	 */
5188 	if ((VSW_VER_EQ(ldcp, 1, 2) &&
5189 	    (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) ||
5190 	    (VSW_VER_LT(ldcp, 1, 2) &&
5191 	    (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) {
5192 		if (pkt->ack_freq > 0) {
5193 			D2(NULL, "vsw_check_attr: non zero ack freq "
5194 			    " in SHM mode\n");
5195 			ret = 1;
5196 		}
5197 	}
5198 
5199 	/*
5200 	 * Note: for the moment we only support ETHER
5201 	 * frames. This may change in the future.
5202 	 */
5203 	if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) {
5204 		D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n",
5205 		    pkt->mtu);
5206 		ret = 1;
5207 	}
5208 
5209 	D1(NULL, "vsw_check_attr exit\n");
5210 
5211 	return (ret);
5212 }
5213 
5214 /*
5215  * Returns 1 if there is a problem, 0 otherwise.
5216  */
5217 static int
5218 vsw_check_dring_info(vio_dring_reg_msg_t *pkt)
5219 {
5220 	_NOTE(ARGUNUSED(pkt))
5221 
5222 	int	ret = 0;
5223 
5224 	D1(NULL, "vsw_check_dring_info enter\n");
5225 
5226 	if ((pkt->num_descriptors == 0) ||
5227 	    (pkt->descriptor_size == 0) ||
5228 	    (pkt->ncookies != 1)) {
5229 		DERR(NULL, "vsw_check_dring_info: invalid dring msg");
5230 		ret = 1;
5231 	}
5232 
5233 	D1(NULL, "vsw_check_dring_info exit\n");
5234 
5235 	return (ret);
5236 }
5237 
5238 /*
5239  * Returns 1 if two memory cookies match. Otherwise returns 0.
5240  */
5241 static int
5242 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2)
5243 {
5244 	if ((m1->addr != m2->addr) ||
5245 	    (m2->size != m2->size)) {
5246 		return (0);
5247 	} else {
5248 		return (1);
5249 	}
5250 }
5251 
5252 /*
5253  * Returns 1 if ring described in reg message matches that
5254  * described by dring_info structure. Otherwise returns 0.
5255  */
5256 static int
5257 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg)
5258 {
5259 	if ((msg->descriptor_size != dp->descriptor_size) ||
5260 	    (msg->num_descriptors != dp->num_descriptors) ||
5261 	    (msg->ncookies != dp->ncookies) ||
5262 	    !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) {
5263 		return (0);
5264 	} else {
5265 		return (1);
5266 	}
5267 
5268 }
5269 
5270 static caddr_t
5271 vsw_print_ethaddr(uint8_t *a, char *ebuf)
5272 {
5273 	(void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x",
5274 	    a[0], a[1], a[2], a[3], a[4], a[5]);
5275 	return (ebuf);
5276 }
5277 
5278 /*
5279  * Reset and free all the resources associated with
5280  * the channel.
5281  */
5282 static void
5283 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir)
5284 {
5285 	dring_info_t		*dp, *dpp;
5286 	lane_t			*lp = NULL;
5287 	int			rv = 0;
5288 
5289 	ASSERT(ldcp != NULL);
5290 
5291 	D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id);
5292 
5293 	if (dir == INBOUND) {
5294 		D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane"
5295 		    " of channel %lld", __func__, ldcp->ldc_id);
5296 		lp = &ldcp->lane_in;
5297 	} else {
5298 		D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane"
5299 		    " of channel %lld", __func__, ldcp->ldc_id);
5300 		lp = &ldcp->lane_out;
5301 	}
5302 
5303 	lp->lstate = VSW_LANE_INACTIV;
5304 	lp->seq_num = VNET_ISS;
5305 
5306 	if (lp->dringp) {
5307 		if (dir == INBOUND) {
5308 			WRITE_ENTER(&lp->dlistrw);
5309 			dp = lp->dringp;
5310 			while (dp != NULL) {
5311 				dpp = dp->next;
5312 				if (dp->handle != NULL)
5313 					(void) ldc_mem_dring_unmap(dp->handle);
5314 				kmem_free(dp, sizeof (dring_info_t));
5315 				dp = dpp;
5316 			}
5317 			RW_EXIT(&lp->dlistrw);
5318 		} else {
5319 			/*
5320 			 * unbind, destroy exported dring, free dring struct
5321 			 */
5322 			WRITE_ENTER(&lp->dlistrw);
5323 			dp = lp->dringp;
5324 			rv = vsw_free_ring(dp);
5325 			RW_EXIT(&lp->dlistrw);
5326 		}
5327 		if (rv == 0) {
5328 			lp->dringp = NULL;
5329 		}
5330 	}
5331 
5332 	D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id);
5333 }
5334 
5335 /*
5336  * Free ring and all associated resources.
5337  *
5338  * Should be called with dlistrw rwlock held as writer.
5339  */
5340 static int
5341 vsw_free_ring(dring_info_t *dp)
5342 {
5343 	vsw_private_desc_t	*paddr = NULL;
5344 	dring_info_t		*dpp;
5345 	int			i, rv = 1;
5346 
5347 	while (dp != NULL) {
5348 		mutex_enter(&dp->dlock);
5349 		dpp = dp->next;
5350 		if (dp->priv_addr != NULL) {
5351 			/*
5352 			 * First unbind and free the memory handles
5353 			 * stored in each descriptor within the ring.
5354 			 */
5355 			for (i = 0; i < vsw_ntxds; i++) {
5356 				paddr = (vsw_private_desc_t *)
5357 				    dp->priv_addr + i;
5358 				if (paddr->memhandle != NULL) {
5359 					if (paddr->bound == 1) {
5360 						rv = ldc_mem_unbind_handle(
5361 						    paddr->memhandle);
5362 
5363 						if (rv != 0) {
5364 							DERR(NULL, "error "
5365 							"unbinding handle for "
5366 							"ring 0x%llx at pos %d",
5367 							    dp, i);
5368 							mutex_exit(&dp->dlock);
5369 							return (rv);
5370 						}
5371 						paddr->bound = 0;
5372 					}
5373 
5374 					rv = ldc_mem_free_handle(
5375 					    paddr->memhandle);
5376 					if (rv != 0) {
5377 						DERR(NULL, "error freeing "
5378 						    "handle for ring 0x%llx "
5379 						    "at pos %d", dp, i);
5380 						mutex_exit(&dp->dlock);
5381 						return (rv);
5382 					}
5383 					paddr->memhandle = NULL;
5384 				}
5385 				mutex_destroy(&paddr->dstate_lock);
5386 			}
5387 			kmem_free(dp->priv_addr,
5388 			    (sizeof (vsw_private_desc_t) * vsw_ntxds));
5389 		}
5390 
5391 		/*
5392 		 * Now unbind and destroy the ring itself.
5393 		 */
5394 		if (dp->handle != NULL) {
5395 			(void) ldc_mem_dring_unbind(dp->handle);
5396 			(void) ldc_mem_dring_destroy(dp->handle);
5397 		}
5398 
5399 		if (dp->data_addr != NULL) {
5400 			kmem_free(dp->data_addr, dp->data_sz);
5401 		}
5402 
5403 		mutex_exit(&dp->dlock);
5404 		mutex_destroy(&dp->dlock);
5405 		mutex_destroy(&dp->restart_lock);
5406 		kmem_free(dp, sizeof (dring_info_t));
5407 
5408 		dp = dpp;
5409 	}
5410 	return (0);
5411 }
5412 
5413 /*
5414  * vsw_ldc_rx_worker -- A per LDC worker thread to receive data.
5415  * This thread is woken up by the LDC interrupt handler to process
5416  * LDC packets and receive data.
5417  */
5418 static void
5419 vsw_ldc_rx_worker(void *arg)
5420 {
5421 	callb_cpr_t	cprinfo;
5422 	vsw_ldc_t *ldcp = (vsw_ldc_t *)arg;
5423 	vsw_t *vswp = ldcp->ldc_vswp;
5424 
5425 	D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id);
5426 	CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr,
5427 	    "vsw_rx_thread");
5428 	mutex_enter(&ldcp->rx_thr_lock);
5429 	ldcp->rx_thr_flags |= VSW_WTHR_RUNNING;
5430 	while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) {
5431 
5432 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
5433 		/*
5434 		 * Wait until the data is received or a stop
5435 		 * request is received.
5436 		 */
5437 		while (!(ldcp->rx_thr_flags &
5438 		    (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) {
5439 			cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock);
5440 		}
5441 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock)
5442 
5443 		/*
5444 		 * First process the stop request.
5445 		 */
5446 		if (ldcp->rx_thr_flags & VSW_WTHR_STOP) {
5447 			D2(vswp, "%s(%lld):Rx thread stopped\n",
5448 			    __func__, ldcp->ldc_id);
5449 			break;
5450 		}
5451 		ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD;
5452 		mutex_exit(&ldcp->rx_thr_lock);
5453 		D1(vswp, "%s(%lld):calling vsw_process_pkt\n",
5454 		    __func__, ldcp->ldc_id);
5455 		mutex_enter(&ldcp->ldc_cblock);
5456 		vsw_process_pkt(ldcp);
5457 		mutex_exit(&ldcp->ldc_cblock);
5458 		mutex_enter(&ldcp->rx_thr_lock);
5459 	}
5460 
5461 	/*
5462 	 * Update the run status and wakeup the thread that
5463 	 * has sent the stop request.
5464 	 */
5465 	ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING;
5466 	cv_signal(&ldcp->rx_thr_cv);
5467 	CALLB_CPR_EXIT(&cprinfo);
5468 	D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id);
5469 	thread_exit();
5470 }
5471 
5472 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */
5473 static void
5474 vsw_stop_rx_thread(vsw_ldc_t *ldcp)
5475 {
5476 	vsw_t *vswp = ldcp->ldc_vswp;
5477 
5478 	D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id);
5479 	/*
5480 	 * Send a stop request by setting the stop flag and
5481 	 * wait until the receive thread stops.
5482 	 */
5483 	mutex_enter(&ldcp->rx_thr_lock);
5484 	if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) {
5485 		ldcp->rx_thr_flags |= VSW_WTHR_STOP;
5486 		cv_signal(&ldcp->rx_thr_cv);
5487 		while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) {
5488 			cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock);
5489 		}
5490 	}
5491 	mutex_exit(&ldcp->rx_thr_lock);
5492 	ldcp->rx_thread = NULL;
5493 	D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id);
5494 }
5495 
5496 /*
5497  * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data.
5498  * This thread is woken up by the vsw_portsend to transmit
5499  * packets.
5500  */
5501 static void
5502 vsw_ldc_tx_worker(void *arg)
5503 {
5504 	callb_cpr_t	cprinfo;
5505 	vsw_ldc_t *ldcp = (vsw_ldc_t *)arg;
5506 	vsw_t *vswp = ldcp->ldc_vswp;
5507 	mblk_t *mp;
5508 	mblk_t *tmp;
5509 
5510 	D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id);
5511 	CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr,
5512 	    "vnet_tx_thread");
5513 	mutex_enter(&ldcp->tx_thr_lock);
5514 	ldcp->tx_thr_flags |= VSW_WTHR_RUNNING;
5515 	while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) {
5516 
5517 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
5518 		/*
5519 		 * Wait until the data is received or a stop
5520 		 * request is received.
5521 		 */
5522 		while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) &&
5523 		    (ldcp->tx_mhead == NULL)) {
5524 			cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock);
5525 		}
5526 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock)
5527 
5528 		/*
5529 		 * First process the stop request.
5530 		 */
5531 		if (ldcp->tx_thr_flags & VSW_WTHR_STOP) {
5532 			D2(vswp, "%s(%lld):tx thread stopped\n",
5533 			    __func__, ldcp->ldc_id);
5534 			break;
5535 		}
5536 		mp = ldcp->tx_mhead;
5537 		ldcp->tx_mhead = ldcp->tx_mtail = NULL;
5538 		ldcp->tx_cnt = 0;
5539 		mutex_exit(&ldcp->tx_thr_lock);
5540 		D2(vswp, "%s(%lld):calling vsw_ldcsend\n",
5541 		    __func__, ldcp->ldc_id);
5542 		while (mp != NULL) {
5543 			tmp = mp->b_next;
5544 			mp->b_next = mp->b_prev = NULL;
5545 			(void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries);
5546 			mp = tmp;
5547 		}
5548 		mutex_enter(&ldcp->tx_thr_lock);
5549 	}
5550 
5551 	/*
5552 	 * Update the run status and wakeup the thread that
5553 	 * has sent the stop request.
5554 	 */
5555 	ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING;
5556 	cv_signal(&ldcp->tx_thr_cv);
5557 	CALLB_CPR_EXIT(&cprinfo);
5558 	D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id);
5559 	thread_exit();
5560 }
5561 
5562 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */
5563 static void
5564 vsw_stop_tx_thread(vsw_ldc_t *ldcp)
5565 {
5566 	vsw_t *vswp = ldcp->ldc_vswp;
5567 
5568 	D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id);
5569 	/*
5570 	 * Send a stop request by setting the stop flag and
5571 	 * wait until the receive thread stops.
5572 	 */
5573 	mutex_enter(&ldcp->tx_thr_lock);
5574 	if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) {
5575 		ldcp->tx_thr_flags |= VSW_WTHR_STOP;
5576 		cv_signal(&ldcp->tx_thr_cv);
5577 		while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) {
5578 			cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock);
5579 		}
5580 	}
5581 	mutex_exit(&ldcp->tx_thr_lock);
5582 	ldcp->tx_thread = NULL;
5583 	D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id);
5584 }
5585 
5586 /* vsw_reclaim_dring -- reclaim descriptors */
5587 static int
5588 vsw_reclaim_dring(dring_info_t *dp, int start)
5589 {
5590 	int i, j, len;
5591 	vsw_private_desc_t *priv_addr;
5592 	vnet_public_desc_t *pub_addr;
5593 
5594 	pub_addr = (vnet_public_desc_t *)dp->pub_addr;
5595 	priv_addr = (vsw_private_desc_t *)dp->priv_addr;
5596 	len = dp->num_descriptors;
5597 
5598 	D2(NULL, "%s: start index %ld\n", __func__, start);
5599 
5600 	j = 0;
5601 	for (i = start; j < len; i = (i + 1) % len, j++) {
5602 		pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
5603 		priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
5604 
5605 		mutex_enter(&priv_addr->dstate_lock);
5606 		if (pub_addr->hdr.dstate != VIO_DESC_DONE) {
5607 			mutex_exit(&priv_addr->dstate_lock);
5608 			break;
5609 		}
5610 		pub_addr->hdr.dstate = VIO_DESC_FREE;
5611 		priv_addr->dstate = VIO_DESC_FREE;
5612 		/* clear all the fields */
5613 		priv_addr->datalen = 0;
5614 		pub_addr->hdr.ack = 0;
5615 		mutex_exit(&priv_addr->dstate_lock);
5616 
5617 		D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx",
5618 		    i, pub_addr->hdr.dstate, priv_addr->dstate);
5619 	}
5620 	return (j);
5621 }
5622 
5623 /*
5624  * Debugging routines
5625  */
5626 static void
5627 display_state(void)
5628 {
5629 	vsw_t		*vswp;
5630 	vsw_port_list_t	*plist;
5631 	vsw_port_t 	*port;
5632 	vsw_ldc_list_t	*ldcl;
5633 	vsw_ldc_t 	*ldcp;
5634 	extern vsw_t 	*vsw_head;
5635 
5636 	cmn_err(CE_NOTE, "***** system state *****");
5637 
5638 	for (vswp = vsw_head; vswp; vswp = vswp->next) {
5639 		plist = &vswp->plist;
5640 		READ_ENTER(&plist->lockrw);
5641 		cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n",
5642 		    vswp->instance, plist->num_ports);
5643 
5644 		for (port = plist->head; port != NULL; port = port->p_next) {
5645 			ldcl = &port->p_ldclist;
5646 			cmn_err(CE_CONT, "port %d : %d ldcs attached\n",
5647 			    port->p_instance, ldcl->num_ldcs);
5648 			READ_ENTER(&ldcl->lockrw);
5649 			ldcp = ldcl->head;
5650 			for (; ldcp != NULL; ldcp = ldcp->ldc_next) {
5651 				cmn_err(CE_CONT, "chan %lu : dev %d : "
5652 				    "status %d : phase %u\n",
5653 				    ldcp->ldc_id, ldcp->dev_class,
5654 				    ldcp->ldc_status, ldcp->hphase);
5655 				cmn_err(CE_CONT, "chan %lu : lsession %lu : "
5656 				    "psession %lu\n", ldcp->ldc_id,
5657 				    ldcp->local_session, ldcp->peer_session);
5658 
5659 				cmn_err(CE_CONT, "Inbound lane:\n");
5660 				display_lane(&ldcp->lane_in);
5661 				cmn_err(CE_CONT, "Outbound lane:\n");
5662 				display_lane(&ldcp->lane_out);
5663 			}
5664 			RW_EXIT(&ldcl->lockrw);
5665 		}
5666 		RW_EXIT(&plist->lockrw);
5667 	}
5668 	cmn_err(CE_NOTE, "***** system state *****");
5669 }
5670 
5671 static void
5672 display_lane(lane_t *lp)
5673 {
5674 	dring_info_t	*drp;
5675 
5676 	cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n",
5677 	    lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu);
5678 	cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n",
5679 	    lp->addr_type, lp->addr, lp->xfer_mode);
5680 	cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp);
5681 
5682 	cmn_err(CE_CONT, "Dring info:\n");
5683 	for (drp = lp->dringp; drp != NULL; drp = drp->next) {
5684 		cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n",
5685 		    drp->num_descriptors, drp->descriptor_size);
5686 		cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle);
5687 		cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n",
5688 		    (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr);
5689 		cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n",
5690 		    drp->ident, drp->end_idx);
5691 		display_ring(drp);
5692 	}
5693 }
5694 
5695 static void
5696 display_ring(dring_info_t *dringp)
5697 {
5698 	uint64_t		i;
5699 	uint64_t		priv_count = 0;
5700 	uint64_t		pub_count = 0;
5701 	vnet_public_desc_t	*pub_addr = NULL;
5702 	vsw_private_desc_t	*priv_addr = NULL;
5703 
5704 	for (i = 0; i < vsw_ntxds; i++) {
5705 		if (dringp->pub_addr != NULL) {
5706 			pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i;
5707 
5708 			if (pub_addr->hdr.dstate == VIO_DESC_FREE)
5709 				pub_count++;
5710 		}
5711 
5712 		if (dringp->priv_addr != NULL) {
5713 			priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i;
5714 
5715 			if (priv_addr->dstate == VIO_DESC_FREE)
5716 				priv_count++;
5717 		}
5718 	}
5719 	cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n",
5720 	    i, priv_count, pub_count);
5721 }
5722 
5723 static void
5724 dump_flags(uint64_t state)
5725 {
5726 	int	i;
5727 
5728 	typedef struct flag_name {
5729 		int	flag_val;
5730 		char	*flag_name;
5731 	} flag_name_t;
5732 
5733 	flag_name_t	flags[] = {
5734 		VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT",
5735 		VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV",
5736 		VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV",
5737 		VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT",
5738 		VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV",
5739 		VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT",
5740 		VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT",
5741 		VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV",
5742 		VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT",
5743 		VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV",
5744 		VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT",
5745 		VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV",
5746 		VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT",
5747 		VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV",
5748 		VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT",
5749 		VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV",
5750 		VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT",
5751 		VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV",
5752 		VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT",
5753 		VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV",
5754 		VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT",
5755 		VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV",
5756 		VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT",
5757 		VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV",
5758 		VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT",
5759 		VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV",
5760 		VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT",
5761 		VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV",
5762 		VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT",
5763 		VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV",
5764 		VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"};
5765 
5766 	DERR(NULL, "DUMP_FLAGS: %llx\n", state);
5767 	for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) {
5768 		if (state & flags[i].flag_val)
5769 			DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name);
5770 	}
5771 }
5772