xref: /titanic_53/usr/src/uts/sun4v/io/vsw.c (revision 19b65a69adc64b3289ccb2fc32b805782e3f4540)
11ae08745Sheppo /*
21ae08745Sheppo  * CDDL HEADER START
31ae08745Sheppo  *
41ae08745Sheppo  * The contents of this file are subject to the terms of the
51ae08745Sheppo  * Common Development and Distribution License (the "License").
61ae08745Sheppo  * You may not use this file except in compliance with the License.
71ae08745Sheppo  *
81ae08745Sheppo  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91ae08745Sheppo  * or http://www.opensolaris.org/os/licensing.
101ae08745Sheppo  * See the License for the specific language governing permissions
111ae08745Sheppo  * and limitations under the License.
121ae08745Sheppo  *
131ae08745Sheppo  * When distributing Covered Code, include this CDDL HEADER in each
141ae08745Sheppo  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151ae08745Sheppo  * If applicable, add the following below this CDDL HEADER, with the
161ae08745Sheppo  * fields enclosed by brackets "[]" replaced with your own identifying
171ae08745Sheppo  * information: Portions Copyright [yyyy] [name of copyright owner]
181ae08745Sheppo  *
191ae08745Sheppo  * CDDL HEADER END
201ae08745Sheppo  */
211ae08745Sheppo 
221ae08745Sheppo /*
23b071742bSsg70180  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
241ae08745Sheppo  * Use is subject to license terms.
251ae08745Sheppo  */
261ae08745Sheppo 
271ae08745Sheppo #pragma ident	"%Z%%M%	%I%	%E% SMI"
281ae08745Sheppo 
291ae08745Sheppo #include <sys/types.h>
301ae08745Sheppo #include <sys/errno.h>
311ae08745Sheppo #include <sys/debug.h>
321ae08745Sheppo #include <sys/time.h>
331ae08745Sheppo #include <sys/sysmacros.h>
341ae08745Sheppo #include <sys/systm.h>
351ae08745Sheppo #include <sys/user.h>
361ae08745Sheppo #include <sys/stropts.h>
371ae08745Sheppo #include <sys/stream.h>
381ae08745Sheppo #include <sys/strlog.h>
391ae08745Sheppo #include <sys/strsubr.h>
401ae08745Sheppo #include <sys/cmn_err.h>
411ae08745Sheppo #include <sys/cpu.h>
421ae08745Sheppo #include <sys/kmem.h>
431ae08745Sheppo #include <sys/conf.h>
441ae08745Sheppo #include <sys/ddi.h>
451ae08745Sheppo #include <sys/sunddi.h>
461ae08745Sheppo #include <sys/ksynch.h>
471ae08745Sheppo #include <sys/stat.h>
481ae08745Sheppo #include <sys/kstat.h>
491ae08745Sheppo #include <sys/vtrace.h>
501ae08745Sheppo #include <sys/strsun.h>
511ae08745Sheppo #include <sys/dlpi.h>
521ae08745Sheppo #include <sys/ethernet.h>
531ae08745Sheppo #include <net/if.h>
541ae08745Sheppo #include <sys/varargs.h>
551ae08745Sheppo #include <sys/machsystm.h>
561ae08745Sheppo #include <sys/modctl.h>
571ae08745Sheppo #include <sys/modhash.h>
581ae08745Sheppo #include <sys/mac.h>
59ba2e4443Sseb #include <sys/mac_ether.h>
601ae08745Sheppo #include <sys/taskq.h>
611ae08745Sheppo #include <sys/note.h>
621ae08745Sheppo #include <sys/mach_descrip.h>
631ae08745Sheppo #include <sys/mac.h>
641ae08745Sheppo #include <sys/mdeg.h>
651ae08745Sheppo #include <sys/ldc.h>
661ae08745Sheppo #include <sys/vsw_fdb.h>
671ae08745Sheppo #include <sys/vsw.h>
681ae08745Sheppo #include <sys/vio_mailbox.h>
691ae08745Sheppo #include <sys/vnet_mailbox.h>
701ae08745Sheppo #include <sys/vnet_common.h>
71d10e4ef2Snarayan #include <sys/vio_util.h>
72d10e4ef2Snarayan #include <sys/sdt.h>
73*19b65a69Ssb155480 #include <sys/atomic.h>
741ae08745Sheppo 
751ae08745Sheppo /*
761ae08745Sheppo  * Function prototypes.
771ae08745Sheppo  */
781ae08745Sheppo static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
791ae08745Sheppo static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
801ae08745Sheppo static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
8134683adeSsg70180 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
8234683adeSsg70180 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
83*19b65a69Ssb155480 static	void vsw_setup_switching_timeout(void *arg);
84*19b65a69Ssb155480 static	void vsw_stop_switching_timeout(vsw_t *vswp);
8534683adeSsg70180 static	int vsw_setup_switching(vsw_t *);
861ae08745Sheppo static	int vsw_setup_layer2(vsw_t *);
871ae08745Sheppo static	int vsw_setup_layer3(vsw_t *);
881ae08745Sheppo 
897636cb21Slm66018 /* MAC Ring table functions. */
907636cb21Slm66018 static void vsw_mac_ring_tbl_init(vsw_t *vswp);
917636cb21Slm66018 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp);
927636cb21Slm66018 static void vsw_queue_worker(vsw_mac_ring_t *rrp);
937636cb21Slm66018 static void vsw_queue_stop(vsw_queue_t *vqp);
947636cb21Slm66018 static vsw_queue_t *vsw_queue_create();
957636cb21Slm66018 static void vsw_queue_destroy(vsw_queue_t *vqp);
967636cb21Slm66018 
971ae08745Sheppo /* MAC layer routines */
987636cb21Slm66018 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg,
997636cb21Slm66018 		mac_resource_t *mrp);
100e1ebb9ecSlm66018 static	int vsw_get_hw_maddr(vsw_t *);
1015f94e909Ssg70180 static	int vsw_set_hw(vsw_t *, vsw_port_t *, int);
1025f94e909Ssg70180 static	int vsw_set_hw_addr(vsw_t *, mac_multi_addr_t *);
1035f94e909Ssg70180 static	int vsw_set_hw_promisc(vsw_t *, vsw_port_t *, int);
1045f94e909Ssg70180 static	int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
1055f94e909Ssg70180 static	int vsw_unset_hw_addr(vsw_t *, int);
1065f94e909Ssg70180 static	int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *, int);
1075f94e909Ssg70180 static void vsw_reconfig_hw(vsw_t *);
1085f94e909Ssg70180 static int vsw_prog_if(vsw_t *);
1095f94e909Ssg70180 static int vsw_prog_ports(vsw_t *);
1107636cb21Slm66018 static int vsw_mac_attach(vsw_t *vswp);
1117636cb21Slm66018 static void vsw_mac_detach(vsw_t *vswp);
112*19b65a69Ssb155480 static int vsw_mac_open(vsw_t *vswp);
113*19b65a69Ssb155480 static void vsw_mac_close(vsw_t *vswp);
114*19b65a69Ssb155480 static void vsw_set_addrs(vsw_t *vswp);
115*19b65a69Ssb155480 static void vsw_unset_addrs(vsw_t *vswp);
1167636cb21Slm66018 
1177636cb21Slm66018 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *);
1181ae08745Sheppo static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *);
1191ae08745Sheppo static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *);
1201ae08745Sheppo static int vsw_mac_register(vsw_t *);
1211ae08745Sheppo static int vsw_mac_unregister(vsw_t *);
122ba2e4443Sseb static int vsw_m_stat(void *, uint_t, uint64_t *);
1231ae08745Sheppo static void vsw_m_stop(void *arg);
1241ae08745Sheppo static int vsw_m_start(void *arg);
1251ae08745Sheppo static int vsw_m_unicst(void *arg, const uint8_t *);
1261ae08745Sheppo static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
1271ae08745Sheppo static int vsw_m_promisc(void *arg, boolean_t);
1281ae08745Sheppo static mblk_t *vsw_m_tx(void *arg, mblk_t *);
1291ae08745Sheppo 
1301ae08745Sheppo /* MDEG routines */
13134683adeSsg70180 static	int vsw_mdeg_register(vsw_t *vswp);
1321ae08745Sheppo static	void vsw_mdeg_unregister(vsw_t *vswp);
1331ae08745Sheppo static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
13434683adeSsg70180 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
135*19b65a69Ssb155480 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
13634683adeSsg70180 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
137*19b65a69Ssb155480 static	int vsw_read_mdprops(vsw_t *vswp);
1381ae08745Sheppo 
1391ae08745Sheppo /* Port add/deletion routines */
1401ae08745Sheppo static	int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
1411ae08745Sheppo static	int vsw_port_attach(vsw_t *vswp, int p_instance,
1421ae08745Sheppo 	uint64_t *ldcids, int nids, struct ether_addr *macaddr);
1431ae08745Sheppo static	int vsw_detach_ports(vsw_t *vswp);
1441ae08745Sheppo static	int vsw_port_detach(vsw_t *vswp, int p_instance);
1451ae08745Sheppo static	int vsw_port_delete(vsw_port_t *port);
1461ae08745Sheppo static	int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id);
1471ae08745Sheppo static	int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id);
1481ae08745Sheppo static	int vsw_init_ldcs(vsw_port_t *port);
1491ae08745Sheppo static	int vsw_uninit_ldcs(vsw_port_t *port);
1501ae08745Sheppo static	int vsw_ldc_init(vsw_ldc_t *ldcp);
1511ae08745Sheppo static	int vsw_ldc_uninit(vsw_ldc_t *ldcp);
1521ae08745Sheppo static	int vsw_drain_ldcs(vsw_port_t *port);
1531ae08745Sheppo static	int vsw_drain_port_taskq(vsw_port_t *port);
1541ae08745Sheppo static	void vsw_marker_task(void *);
1551ae08745Sheppo static	vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
1561ae08745Sheppo static	int vsw_plist_del_node(vsw_t *, vsw_port_t *port);
1571ae08745Sheppo 
1581ae08745Sheppo /* Interrupt routines */
1591ae08745Sheppo static	uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg);
1601ae08745Sheppo 
1611ae08745Sheppo /* Handshake routines */
162b071742bSsg70180 static	void vsw_ldc_reinit(vsw_ldc_t *);
163b071742bSsg70180 static	void vsw_process_conn_evt(vsw_ldc_t *, uint16_t);
164b071742bSsg70180 static	void vsw_conn_task(void *);
1651ae08745Sheppo static	int vsw_check_flag(vsw_ldc_t *, int, uint64_t);
1661ae08745Sheppo static	void vsw_next_milestone(vsw_ldc_t *);
1671ae08745Sheppo static	int vsw_supported_version(vio_ver_msg_t *);
1681ae08745Sheppo 
1691ae08745Sheppo /* Data processing routines */
1701ae08745Sheppo static void vsw_process_pkt(void *);
1711ae08745Sheppo static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t);
1721ae08745Sheppo static void vsw_process_ctrl_pkt(void *);
1731ae08745Sheppo static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *);
1741ae08745Sheppo static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *);
1751ae08745Sheppo static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *);
1761ae08745Sheppo static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *);
1771ae08745Sheppo static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *);
1781ae08745Sheppo static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *);
1791ae08745Sheppo static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t);
1801ae08745Sheppo static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *);
1811ae08745Sheppo static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *);
1821ae08745Sheppo static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *);
1831ae08745Sheppo static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t);
1841ae08745Sheppo 
1851ae08745Sheppo /* Switching/data transmit routines */
1861ae08745Sheppo static	void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
1871ae08745Sheppo 	    vsw_port_t *port, mac_resource_handle_t);
1881ae08745Sheppo static	void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
1891ae08745Sheppo 	    vsw_port_t *port, mac_resource_handle_t);
1901ae08745Sheppo static	int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller,
1911ae08745Sheppo 	    vsw_port_t *port);
1921ae08745Sheppo static	int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller,
1931ae08745Sheppo 	    vsw_port_t *port);
1941ae08745Sheppo static	int vsw_portsend(vsw_port_t *, mblk_t *);
1951ae08745Sheppo static	int vsw_dringsend(vsw_ldc_t *, mblk_t *);
1961ae08745Sheppo static	int vsw_descrsend(vsw_ldc_t *, mblk_t *);
1971ae08745Sheppo 
1981ae08745Sheppo /* Packet creation routines */
1993af08d82Slm66018 static void vsw_send_ver(void *);
2001ae08745Sheppo static void vsw_send_attr(vsw_ldc_t *);
2011ae08745Sheppo static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *);
2021ae08745Sheppo static void vsw_send_dring_info(vsw_ldc_t *);
2031ae08745Sheppo static void vsw_send_rdx(vsw_ldc_t *);
2041ae08745Sheppo 
205b071742bSsg70180 static int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t);
2061ae08745Sheppo 
2071ae08745Sheppo /* Forwarding database (FDB) routines */
2081ae08745Sheppo static	int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port);
2091ae08745Sheppo static	int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port);
2101ae08745Sheppo static	vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *);
2111ae08745Sheppo static	int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
2121ae08745Sheppo static	int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
2131ae08745Sheppo static	int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
214*19b65a69Ssb155480 static	mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t);
2151ae08745Sheppo static	void vsw_del_mcst_port(vsw_port_t *);
2161ae08745Sheppo static	void vsw_del_mcst_vsw(vsw_t *);
2171ae08745Sheppo 
2181ae08745Sheppo /* Dring routines */
2191ae08745Sheppo static dring_info_t *vsw_create_dring(vsw_ldc_t *);
2201ae08745Sheppo static void vsw_create_privring(vsw_ldc_t *);
2211ae08745Sheppo static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp);
2221ae08745Sheppo static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **,
2231ae08745Sheppo     int *);
2241ae08745Sheppo static dring_info_t *vsw_ident2dring(lane_t *, uint64_t);
2251ae08745Sheppo 
2261ae08745Sheppo static void vsw_set_lane_attr(vsw_t *, lane_t *);
2271ae08745Sheppo static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *);
2281ae08745Sheppo static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg);
2291ae08745Sheppo static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *);
2301ae08745Sheppo static int vsw_check_dring_info(vio_dring_reg_msg_t *);
2311ae08745Sheppo 
2321ae08745Sheppo /* Misc support routines */
2331ae08745Sheppo static	caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf);
2341ae08745Sheppo static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t);
2351ae08745Sheppo static int vsw_free_ring(dring_info_t *);
236*19b65a69Ssb155480 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
2371ae08745Sheppo 
2381ae08745Sheppo /* Debugging routines */
2391ae08745Sheppo static void dump_flags(uint64_t);
2401ae08745Sheppo static void display_state(void);
2411ae08745Sheppo static void display_lane(lane_t *);
2421ae08745Sheppo static void display_ring(dring_info_t *);
2431ae08745Sheppo 
244445b4c2eSsb155480 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
2451ae08745Sheppo int	vsw_wretries = 100;		/* # of write attempts */
246d10e4ef2Snarayan int	vsw_chain_len = 150;		/* max # of mblks in msg chain */
247d10e4ef2Snarayan int	vsw_desc_delay = 0;		/* delay in us */
248d10e4ef2Snarayan int	vsw_read_attempts = 5;		/* # of reads of descriptor */
249*19b65a69Ssb155480 int	vsw_mac_open_retries = 20;	/* max # of mac_open() retries */
250*19b65a69Ssb155480 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
251d10e4ef2Snarayan 
252d10e4ef2Snarayan uint32_t	vsw_mblk_size = VSW_MBLK_SIZE;
253d10e4ef2Snarayan uint32_t	vsw_num_mblks = VSW_NUM_MBLKS;
254d10e4ef2Snarayan 
255ba2e4443Sseb static	mac_callbacks_t	vsw_m_callbacks = {
256ba2e4443Sseb 	0,
257ba2e4443Sseb 	vsw_m_stat,
258ba2e4443Sseb 	vsw_m_start,
259ba2e4443Sseb 	vsw_m_stop,
260ba2e4443Sseb 	vsw_m_promisc,
261ba2e4443Sseb 	vsw_m_multicst,
262ba2e4443Sseb 	vsw_m_unicst,
263ba2e4443Sseb 	vsw_m_tx,
264ba2e4443Sseb 	NULL,
265ba2e4443Sseb 	NULL,
266ba2e4443Sseb 	NULL
267ba2e4443Sseb };
268ba2e4443Sseb 
2691ae08745Sheppo static	struct	cb_ops	vsw_cb_ops = {
2701ae08745Sheppo 	nulldev,			/* cb_open */
2711ae08745Sheppo 	nulldev,			/* cb_close */
2721ae08745Sheppo 	nodev,				/* cb_strategy */
2731ae08745Sheppo 	nodev,				/* cb_print */
2741ae08745Sheppo 	nodev,				/* cb_dump */
2751ae08745Sheppo 	nodev,				/* cb_read */
2761ae08745Sheppo 	nodev,				/* cb_write */
2771ae08745Sheppo 	nodev,				/* cb_ioctl */
2781ae08745Sheppo 	nodev,				/* cb_devmap */
2791ae08745Sheppo 	nodev,				/* cb_mmap */
2801ae08745Sheppo 	nodev,				/* cb_segmap */
2811ae08745Sheppo 	nochpoll,			/* cb_chpoll */
2821ae08745Sheppo 	ddi_prop_op,			/* cb_prop_op */
2831ae08745Sheppo 	NULL,				/* cb_stream */
2841ae08745Sheppo 	D_MP,				/* cb_flag */
2851ae08745Sheppo 	CB_REV,				/* rev */
2861ae08745Sheppo 	nodev,				/* int (*cb_aread)() */
2871ae08745Sheppo 	nodev				/* int (*cb_awrite)() */
2881ae08745Sheppo };
2891ae08745Sheppo 
2901ae08745Sheppo static	struct	dev_ops	vsw_ops = {
2911ae08745Sheppo 	DEVO_REV,		/* devo_rev */
2921ae08745Sheppo 	0,			/* devo_refcnt */
2931ae08745Sheppo 	vsw_getinfo,		/* devo_getinfo */
2941ae08745Sheppo 	nulldev,		/* devo_identify */
2951ae08745Sheppo 	nulldev,		/* devo_probe */
2961ae08745Sheppo 	vsw_attach,		/* devo_attach */
2971ae08745Sheppo 	vsw_detach,		/* devo_detach */
2981ae08745Sheppo 	nodev,			/* devo_reset */
2991ae08745Sheppo 	&vsw_cb_ops,		/* devo_cb_ops */
3001ae08745Sheppo 	(struct bus_ops *)NULL,	/* devo_bus_ops */
3011ae08745Sheppo 	ddi_power		/* devo_power */
3021ae08745Sheppo };
3031ae08745Sheppo 
3041ae08745Sheppo extern	struct	mod_ops	mod_driverops;
3051ae08745Sheppo static struct modldrv vswmodldrv = {
3061ae08745Sheppo 	&mod_driverops,
307205eeb1aSlm66018 	"sun4v Virtual Switch",
3081ae08745Sheppo 	&vsw_ops,
3091ae08745Sheppo };
3101ae08745Sheppo 
3111ae08745Sheppo #define	LDC_ENTER_LOCK(ldcp)	\
3121ae08745Sheppo 				mutex_enter(&((ldcp)->ldc_cblock));\
3131ae08745Sheppo 				mutex_enter(&((ldcp)->ldc_txlock));
3141ae08745Sheppo #define	LDC_EXIT_LOCK(ldcp)	\
3151ae08745Sheppo 				mutex_exit(&((ldcp)->ldc_txlock));\
3161ae08745Sheppo 				mutex_exit(&((ldcp)->ldc_cblock));
3171ae08745Sheppo 
3181ae08745Sheppo /* Driver soft state ptr  */
3191ae08745Sheppo static void	*vsw_state;
3201ae08745Sheppo 
3211ae08745Sheppo /*
3221ae08745Sheppo  * Linked list of "vsw_t" structures - one per instance.
3231ae08745Sheppo  */
3241ae08745Sheppo vsw_t		*vsw_head = NULL;
3251ae08745Sheppo krwlock_t	vsw_rw;
3261ae08745Sheppo 
3271ae08745Sheppo /*
3281ae08745Sheppo  * Property names
3291ae08745Sheppo  */
3301ae08745Sheppo static char vdev_propname[] = "virtual-device";
3311ae08745Sheppo static char vsw_propname[] = "virtual-network-switch";
3321ae08745Sheppo static char physdev_propname[] = "vsw-phys-dev";
3331ae08745Sheppo static char smode_propname[] = "vsw-switch-mode";
3341ae08745Sheppo static char macaddr_propname[] = "local-mac-address";
3351ae08745Sheppo static char remaddr_propname[] = "remote-mac-address";
3361ae08745Sheppo static char ldcids_propname[] = "ldc-ids";
3371ae08745Sheppo static char chan_propname[] = "channel-endpoint";
3381ae08745Sheppo static char id_propname[] = "id";
3391ae08745Sheppo static char reg_propname[] = "reg";
3401ae08745Sheppo 
3411ae08745Sheppo /* supported versions */
3421ae08745Sheppo static	ver_sup_t	vsw_versions[] = { {1, 0} };
3431ae08745Sheppo 
3441ae08745Sheppo /*
3451ae08745Sheppo  * Matching criteria passed to the MDEG to register interest
3461ae08745Sheppo  * in changes to 'virtual-device-port' nodes identified by their
3471ae08745Sheppo  * 'id' property.
3481ae08745Sheppo  */
3491ae08745Sheppo static md_prop_match_t vport_prop_match[] = {
3501ae08745Sheppo 	{ MDET_PROP_VAL,    "id"   },
3511ae08745Sheppo 	{ MDET_LIST_END,    NULL    }
3521ae08745Sheppo };
3531ae08745Sheppo 
3541ae08745Sheppo static mdeg_node_match_t vport_match = { "virtual-device-port",
3551ae08745Sheppo 						vport_prop_match };
3561ae08745Sheppo 
3571ae08745Sheppo /*
35834683adeSsg70180  * Matching criteria passed to the MDEG to register interest
35934683adeSsg70180  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
36034683adeSsg70180  * by their 'name' and 'cfg-handle' properties.
36134683adeSsg70180  */
36234683adeSsg70180 static md_prop_match_t vdev_prop_match[] = {
36334683adeSsg70180 	{ MDET_PROP_STR,    "name"   },
36434683adeSsg70180 	{ MDET_PROP_VAL,    "cfg-handle" },
36534683adeSsg70180 	{ MDET_LIST_END,    NULL    }
36634683adeSsg70180 };
36734683adeSsg70180 
36834683adeSsg70180 static mdeg_node_match_t vdev_match = { "virtual-device",
36934683adeSsg70180 						vdev_prop_match };
37034683adeSsg70180 
37134683adeSsg70180 
37234683adeSsg70180 /*
3731ae08745Sheppo  * Specification of an MD node passed to the MDEG to filter any
3741ae08745Sheppo  * 'vport' nodes that do not belong to the specified node. This
3751ae08745Sheppo  * template is copied for each vsw instance and filled in with
3761ae08745Sheppo  * the appropriate 'cfg-handle' value before being passed to the MDEG.
3771ae08745Sheppo  */
3781ae08745Sheppo static mdeg_prop_spec_t vsw_prop_template[] = {
3791ae08745Sheppo 	{ MDET_PROP_STR,    "name",		vsw_propname },
3801ae08745Sheppo 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
3811ae08745Sheppo 	{ MDET_LIST_END,    NULL,		NULL	}
3821ae08745Sheppo };
3831ae08745Sheppo 
3841ae08745Sheppo #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
3851ae08745Sheppo 
3861ae08745Sheppo /*
3877636cb21Slm66018  * From /etc/system enable/disable thread per ring. This is a mode
3887636cb21Slm66018  * selection that is done a vsw driver attach time.
3897636cb21Slm66018  */
3907636cb21Slm66018 boolean_t vsw_multi_ring_enable = B_FALSE;
3917636cb21Slm66018 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
3927636cb21Slm66018 
3937636cb21Slm66018 /*
3941ae08745Sheppo  * Print debug messages - set to 0x1f to enable all msgs
3951ae08745Sheppo  * or 0x0 to turn all off.
3961ae08745Sheppo  */
3971ae08745Sheppo int vswdbg = 0x0;
3981ae08745Sheppo 
3991ae08745Sheppo /*
4001ae08745Sheppo  * debug levels:
4011ae08745Sheppo  * 0x01:	Function entry/exit tracing
4021ae08745Sheppo  * 0x02:	Internal function messages
4031ae08745Sheppo  * 0x04:	Verbose internal messages
4041ae08745Sheppo  * 0x08:	Warning messages
4051ae08745Sheppo  * 0x10:	Error messages
4061ae08745Sheppo  */
4071ae08745Sheppo 
4081ae08745Sheppo static void
4091ae08745Sheppo vswdebug(vsw_t *vswp, const char *fmt, ...)
4101ae08745Sheppo {
4111ae08745Sheppo 	char buf[512];
4121ae08745Sheppo 	va_list ap;
4131ae08745Sheppo 
4141ae08745Sheppo 	va_start(ap, fmt);
4151ae08745Sheppo 	(void) vsprintf(buf, fmt, ap);
4161ae08745Sheppo 	va_end(ap);
4171ae08745Sheppo 
4181ae08745Sheppo 	if (vswp == NULL)
4191ae08745Sheppo 		cmn_err(CE_CONT, "%s\n", buf);
4201ae08745Sheppo 	else
4211ae08745Sheppo 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
4221ae08745Sheppo }
4231ae08745Sheppo 
4241ae08745Sheppo /*
4251ae08745Sheppo  * For the moment the state dump routines have their own
4261ae08745Sheppo  * private flag.
4271ae08745Sheppo  */
4281ae08745Sheppo #define	DUMP_STATE	0
4291ae08745Sheppo 
4301ae08745Sheppo #if DUMP_STATE
4311ae08745Sheppo 
4321ae08745Sheppo #define	DUMP_TAG(tag) \
4331ae08745Sheppo {			\
4341ae08745Sheppo 	D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \
4351ae08745Sheppo 	D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype);	\
4361ae08745Sheppo 	D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env);	\
4371ae08745Sheppo }
4381ae08745Sheppo 
4391ae08745Sheppo #define	DUMP_TAG_PTR(tag) \
4401ae08745Sheppo {			\
4411ae08745Sheppo 	D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \
4421ae08745Sheppo 	D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype);	\
4431ae08745Sheppo 	D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env);	\
4441ae08745Sheppo }
4451ae08745Sheppo 
4461ae08745Sheppo #define	DUMP_FLAGS(flags) dump_flags(flags);
4471ae08745Sheppo #define	DISPLAY_STATE()	display_state()
4481ae08745Sheppo 
4491ae08745Sheppo #else
4501ae08745Sheppo 
4511ae08745Sheppo #define	DUMP_TAG(tag)
4521ae08745Sheppo #define	DUMP_TAG_PTR(tag)
4531ae08745Sheppo #define	DUMP_FLAGS(state)
4541ae08745Sheppo #define	DISPLAY_STATE()
4551ae08745Sheppo 
4561ae08745Sheppo #endif	/* DUMP_STATE */
4571ae08745Sheppo 
4581ae08745Sheppo #ifdef DEBUG
4591ae08745Sheppo 
4601ae08745Sheppo #define	D1		\
4611ae08745Sheppo if (vswdbg & 0x01)	\
4621ae08745Sheppo 	vswdebug
4631ae08745Sheppo 
4641ae08745Sheppo #define	D2		\
4651ae08745Sheppo if (vswdbg & 0x02)	\
4661ae08745Sheppo 	vswdebug
4671ae08745Sheppo 
4681ae08745Sheppo #define	D3		\
4691ae08745Sheppo if (vswdbg & 0x04)	\
4701ae08745Sheppo 	vswdebug
4711ae08745Sheppo 
4721ae08745Sheppo #define	DWARN		\
4731ae08745Sheppo if (vswdbg & 0x08)	\
4741ae08745Sheppo 	vswdebug
4751ae08745Sheppo 
4761ae08745Sheppo #define	DERR		\
4771ae08745Sheppo if (vswdbg & 0x10)	\
4781ae08745Sheppo 	vswdebug
4791ae08745Sheppo 
4801ae08745Sheppo #else
4811ae08745Sheppo 
4821ae08745Sheppo #define	DERR		if (0)	vswdebug
4831ae08745Sheppo #define	DWARN		if (0)	vswdebug
4841ae08745Sheppo #define	D1		if (0)	vswdebug
4851ae08745Sheppo #define	D2		if (0)	vswdebug
4861ae08745Sheppo #define	D3		if (0)	vswdebug
4871ae08745Sheppo 
4881ae08745Sheppo #endif	/* DEBUG */
4891ae08745Sheppo 
4901ae08745Sheppo static struct modlinkage modlinkage = {
4911ae08745Sheppo 	MODREV_1,
4921ae08745Sheppo 	&vswmodldrv,
4931ae08745Sheppo 	NULL
4941ae08745Sheppo };
4951ae08745Sheppo 
4961ae08745Sheppo int
4971ae08745Sheppo _init(void)
4981ae08745Sheppo {
4991ae08745Sheppo 	int status;
5001ae08745Sheppo 
5011ae08745Sheppo 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
5021ae08745Sheppo 
5031ae08745Sheppo 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
5041ae08745Sheppo 	if (status != 0) {
5051ae08745Sheppo 		return (status);
5061ae08745Sheppo 	}
5071ae08745Sheppo 
5081ae08745Sheppo 	mac_init_ops(&vsw_ops, "vsw");
5091ae08745Sheppo 	status = mod_install(&modlinkage);
5101ae08745Sheppo 	if (status != 0) {
5111ae08745Sheppo 		ddi_soft_state_fini(&vsw_state);
5121ae08745Sheppo 	}
5131ae08745Sheppo 	return (status);
5141ae08745Sheppo }
5151ae08745Sheppo 
5161ae08745Sheppo int
5171ae08745Sheppo _fini(void)
5181ae08745Sheppo {
5191ae08745Sheppo 	int status;
5201ae08745Sheppo 
5211ae08745Sheppo 	status = mod_remove(&modlinkage);
5221ae08745Sheppo 	if (status != 0)
5231ae08745Sheppo 		return (status);
5241ae08745Sheppo 	mac_fini_ops(&vsw_ops);
5251ae08745Sheppo 	ddi_soft_state_fini(&vsw_state);
5261ae08745Sheppo 
5271ae08745Sheppo 	rw_destroy(&vsw_rw);
5281ae08745Sheppo 
5291ae08745Sheppo 	return (status);
5301ae08745Sheppo }
5311ae08745Sheppo 
5321ae08745Sheppo int
5331ae08745Sheppo _info(struct modinfo *modinfop)
5341ae08745Sheppo {
5351ae08745Sheppo 	return (mod_info(&modlinkage, modinfop));
5361ae08745Sheppo }
5371ae08745Sheppo 
5381ae08745Sheppo static int
5391ae08745Sheppo vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5401ae08745Sheppo {
5411ae08745Sheppo 	vsw_t		*vswp;
54234683adeSsg70180 	int		instance;
5431ae08745Sheppo 	char		hashname[MAXNAMELEN];
5441ae08745Sheppo 	char		qname[TASKQ_NAMELEN];
5457636cb21Slm66018 	enum		{ PROG_init = 0x00,
546*19b65a69Ssb155480 				PROG_locks = 0x01,
547*19b65a69Ssb155480 				PROG_readmd = 0x02,
548*19b65a69Ssb155480 				PROG_fdb = 0x04,
549*19b65a69Ssb155480 				PROG_mfdb = 0x08,
550*19b65a69Ssb155480 				PROG_taskq = 0x10,
551*19b65a69Ssb155480 				PROG_swmode = 0x20,
552*19b65a69Ssb155480 				PROG_macreg = 0x40,
553*19b65a69Ssb155480 				PROG_mdreg = 0x80}
5541ae08745Sheppo 			progress;
5551ae08745Sheppo 
5561ae08745Sheppo 	progress = PROG_init;
557*19b65a69Ssb155480 	int		rv;
5581ae08745Sheppo 
5591ae08745Sheppo 	switch (cmd) {
5601ae08745Sheppo 	case DDI_ATTACH:
5611ae08745Sheppo 		break;
5621ae08745Sheppo 	case DDI_RESUME:
5631ae08745Sheppo 		/* nothing to do for this non-device */
5641ae08745Sheppo 		return (DDI_SUCCESS);
5651ae08745Sheppo 	case DDI_PM_RESUME:
5661ae08745Sheppo 	default:
5671ae08745Sheppo 		return (DDI_FAILURE);
5681ae08745Sheppo 	}
5691ae08745Sheppo 
5701ae08745Sheppo 	instance = ddi_get_instance(dip);
5711ae08745Sheppo 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
5721ae08745Sheppo 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
5731ae08745Sheppo 		return (DDI_FAILURE);
5741ae08745Sheppo 	}
5751ae08745Sheppo 	vswp = ddi_get_soft_state(vsw_state, instance);
5761ae08745Sheppo 
5771ae08745Sheppo 	if (vswp == NULL) {
5781ae08745Sheppo 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
5791ae08745Sheppo 		goto vsw_attach_fail;
5801ae08745Sheppo 	}
5811ae08745Sheppo 
5821ae08745Sheppo 	vswp->dip = dip;
5831ae08745Sheppo 	vswp->instance = instance;
5841ae08745Sheppo 	ddi_set_driver_private(dip, (caddr_t)vswp);
5851ae08745Sheppo 
5865f94e909Ssg70180 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
58734683adeSsg70180 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
588*19b65a69Ssb155480 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
589*19b65a69Ssb155480 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
5901ae08745Sheppo 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
591*19b65a69Ssb155480 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
592*19b65a69Ssb155480 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
593*19b65a69Ssb155480 
594*19b65a69Ssb155480 	progress |= PROG_locks;
595*19b65a69Ssb155480 
596*19b65a69Ssb155480 	rv = vsw_read_mdprops(vswp);
597*19b65a69Ssb155480 	if (rv != 0)
598*19b65a69Ssb155480 		goto vsw_attach_fail;
599*19b65a69Ssb155480 
600*19b65a69Ssb155480 	progress |= PROG_readmd;
6011ae08745Sheppo 
6021ae08745Sheppo 	/* setup the unicast forwarding database  */
6031ae08745Sheppo 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
6041ae08745Sheppo 	    vswp->instance);
6051ae08745Sheppo 	D2(vswp, "creating unicast hash table (%s)...", hashname);
6061ae08745Sheppo 	vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
6071ae08745Sheppo 	    mod_hash_null_valdtor, sizeof (void *));
6081ae08745Sheppo 
6091ae08745Sheppo 	progress |= PROG_fdb;
6101ae08745Sheppo 
6111ae08745Sheppo 	/* setup the multicast fowarding database */
6121ae08745Sheppo 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
6131ae08745Sheppo 	    vswp->instance);
6141ae08745Sheppo 	D2(vswp, "creating multicast hash table %s)...", hashname);
6151ae08745Sheppo 	vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
6161ae08745Sheppo 	    mod_hash_null_valdtor, sizeof (void *));
6171ae08745Sheppo 
6181ae08745Sheppo 	progress |= PROG_mfdb;
6191ae08745Sheppo 
6201ae08745Sheppo 	/*
6211ae08745Sheppo 	 * Create the taskq which will process all the VIO
6221ae08745Sheppo 	 * control messages.
6231ae08745Sheppo 	 */
6241ae08745Sheppo 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
6251ae08745Sheppo 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
6261ae08745Sheppo 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
62734683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
62834683adeSsg70180 		    vswp->instance);
6291ae08745Sheppo 		goto vsw_attach_fail;
6301ae08745Sheppo 	}
6311ae08745Sheppo 
6321ae08745Sheppo 	progress |= PROG_taskq;
6331ae08745Sheppo 
634d10e4ef2Snarayan 	/* prevent auto-detaching */
635d10e4ef2Snarayan 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
636d10e4ef2Snarayan 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
63734683adeSsg70180 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
638d10e4ef2Snarayan 		    "instance %u", DDI_NO_AUTODETACH, instance);
639d10e4ef2Snarayan 	}
640d10e4ef2Snarayan 
6411ae08745Sheppo 	/*
642*19b65a69Ssb155480 	 * Setup the required switching mode,
643*19b65a69Ssb155480 	 * based on the mdprops that we read earlier.
644*19b65a69Ssb155480 	 */
645*19b65a69Ssb155480 	rv = vsw_setup_switching(vswp);
646*19b65a69Ssb155480 	if (rv == EAGAIN) {
647*19b65a69Ssb155480 		/*
648*19b65a69Ssb155480 		 * Unable to setup switching mode;
649*19b65a69Ssb155480 		 * as the error is EAGAIN, schedule a timeout to retry.
650*19b65a69Ssb155480 		 */
651*19b65a69Ssb155480 		mutex_enter(&vswp->swtmout_lock);
652*19b65a69Ssb155480 
653*19b65a69Ssb155480 		vswp->swtmout_enabled = B_TRUE;
654*19b65a69Ssb155480 		vswp->swtmout_id =
655*19b65a69Ssb155480 		    timeout(vsw_setup_switching_timeout, vswp,
656*19b65a69Ssb155480 		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
657*19b65a69Ssb155480 
658*19b65a69Ssb155480 		mutex_exit(&vswp->swtmout_lock);
659*19b65a69Ssb155480 	} else if (rv != 0) {
660*19b65a69Ssb155480 		goto vsw_attach_fail;
661*19b65a69Ssb155480 	}
662*19b65a69Ssb155480 
663*19b65a69Ssb155480 	progress |= PROG_swmode;
664*19b65a69Ssb155480 
665*19b65a69Ssb155480 	/* Register with mac layer as a provider */
666*19b65a69Ssb155480 	rv = vsw_mac_register(vswp);
667*19b65a69Ssb155480 	if (rv != 0)
668*19b65a69Ssb155480 		goto vsw_attach_fail;
669*19b65a69Ssb155480 
670*19b65a69Ssb155480 	progress |= PROG_macreg;
671*19b65a69Ssb155480 
672*19b65a69Ssb155480 	/*
67334683adeSsg70180 	 * Now we have everything setup, register an interest in
67434683adeSsg70180 	 * specific MD nodes.
67534683adeSsg70180 	 *
67634683adeSsg70180 	 * The callback is invoked in 2 cases, firstly if upon mdeg
67734683adeSsg70180 	 * registration there are existing nodes which match our specified
67834683adeSsg70180 	 * criteria, and secondly if the MD is changed (and again, there
67934683adeSsg70180 	 * are nodes which we are interested in present within it. Note
68034683adeSsg70180 	 * that our callback will be invoked even if our specified nodes
68134683adeSsg70180 	 * have not actually changed).
68234683adeSsg70180 	 *
6831ae08745Sheppo 	 */
684*19b65a69Ssb155480 	rv = vsw_mdeg_register(vswp);
685*19b65a69Ssb155480 	if (rv != 0)
68634683adeSsg70180 		goto vsw_attach_fail;
6871ae08745Sheppo 
688*19b65a69Ssb155480 	progress |= PROG_mdreg;
689*19b65a69Ssb155480 
690*19b65a69Ssb155480 	WRITE_ENTER(&vsw_rw);
691*19b65a69Ssb155480 	vswp->next = vsw_head;
692*19b65a69Ssb155480 	vsw_head = vswp;
693*19b65a69Ssb155480 	RW_EXIT(&vsw_rw);
694*19b65a69Ssb155480 
695*19b65a69Ssb155480 	ddi_report_dev(vswp->dip);
6961ae08745Sheppo 	return (DDI_SUCCESS);
6971ae08745Sheppo 
6981ae08745Sheppo vsw_attach_fail:
6991ae08745Sheppo 	DERR(NULL, "vsw_attach: failed");
7001ae08745Sheppo 
701*19b65a69Ssb155480 	if (progress & PROG_mdreg) {
702*19b65a69Ssb155480 		vsw_mdeg_unregister(vswp);
703*19b65a69Ssb155480 		(void) vsw_detach_ports(vswp);
704*19b65a69Ssb155480 	}
705*19b65a69Ssb155480 
706*19b65a69Ssb155480 	if (progress & PROG_macreg)
707*19b65a69Ssb155480 		(void) vsw_mac_unregister(vswp);
708*19b65a69Ssb155480 
709*19b65a69Ssb155480 	if (progress & PROG_swmode) {
710*19b65a69Ssb155480 		vsw_stop_switching_timeout(vswp);
711*19b65a69Ssb155480 		mutex_enter(&vswp->mac_lock);
712*19b65a69Ssb155480 		vsw_mac_detach(vswp);
713*19b65a69Ssb155480 		vsw_mac_close(vswp);
714*19b65a69Ssb155480 		mutex_exit(&vswp->mac_lock);
715*19b65a69Ssb155480 	}
716*19b65a69Ssb155480 
7171ae08745Sheppo 	if (progress & PROG_taskq)
7181ae08745Sheppo 		ddi_taskq_destroy(vswp->taskq_p);
7191ae08745Sheppo 
720*19b65a69Ssb155480 	if (progress & PROG_mfdb)
7211ae08745Sheppo 		mod_hash_destroy_hash(vswp->mfdb);
7221ae08745Sheppo 
723*19b65a69Ssb155480 	if (progress & PROG_fdb)
7241ae08745Sheppo 		mod_hash_destroy_hash(vswp->fdb);
7251ae08745Sheppo 
726*19b65a69Ssb155480 	if (progress & PROG_locks) {
727*19b65a69Ssb155480 		rw_destroy(&vswp->plist.lockrw);
728*19b65a69Ssb155480 		rw_destroy(&vswp->mfdbrw);
7291ae08745Sheppo 		rw_destroy(&vswp->if_lockrw);
730*19b65a69Ssb155480 		mutex_destroy(&vswp->swtmout_lock);
731*19b65a69Ssb155480 		mutex_destroy(&vswp->mca_lock);
73234683adeSsg70180 		mutex_destroy(&vswp->mac_lock);
7335f94e909Ssg70180 		mutex_destroy(&vswp->hw_lock);
73434683adeSsg70180 	}
7351ae08745Sheppo 
7361ae08745Sheppo 	ddi_soft_state_free(vsw_state, instance);
7371ae08745Sheppo 	return (DDI_FAILURE);
7381ae08745Sheppo }
7391ae08745Sheppo 
7401ae08745Sheppo static int
7411ae08745Sheppo vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
7421ae08745Sheppo {
743d10e4ef2Snarayan 	vio_mblk_pool_t		*poolp, *npoolp;
7441ae08745Sheppo 	vsw_t			**vswpp, *vswp;
7451ae08745Sheppo 	int 			instance;
7461ae08745Sheppo 
7471ae08745Sheppo 	instance = ddi_get_instance(dip);
7481ae08745Sheppo 	vswp = ddi_get_soft_state(vsw_state, instance);
7491ae08745Sheppo 
7501ae08745Sheppo 	if (vswp == NULL) {
7511ae08745Sheppo 		return (DDI_FAILURE);
7521ae08745Sheppo 	}
7531ae08745Sheppo 
7541ae08745Sheppo 	switch (cmd) {
7551ae08745Sheppo 	case DDI_DETACH:
7561ae08745Sheppo 		break;
7571ae08745Sheppo 	case DDI_SUSPEND:
7581ae08745Sheppo 	case DDI_PM_SUSPEND:
7591ae08745Sheppo 	default:
7601ae08745Sheppo 		return (DDI_FAILURE);
7611ae08745Sheppo 	}
7621ae08745Sheppo 
7631ae08745Sheppo 	D2(vswp, "detaching instance %d", instance);
7641ae08745Sheppo 
765*19b65a69Ssb155480 	/* Stop any pending timeout to setup switching mode. */
766*19b65a69Ssb155480 	vsw_stop_switching_timeout(vswp);
767*19b65a69Ssb155480 
76834683adeSsg70180 	if (vswp->if_state & VSW_IF_REG) {
7691ae08745Sheppo 		if (vsw_mac_unregister(vswp) != 0) {
77034683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
77134683adeSsg70180 			    "MAC layer", vswp->instance);
7721ae08745Sheppo 			return (DDI_FAILURE);
7731ae08745Sheppo 		}
774d10e4ef2Snarayan 	}
7751ae08745Sheppo 
7761ae08745Sheppo 	vsw_mdeg_unregister(vswp);
7771ae08745Sheppo 
778e1ebb9ecSlm66018 	/* remove mac layer callback */
77934683adeSsg70180 	mutex_enter(&vswp->mac_lock);
780e1ebb9ecSlm66018 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
7811f8aaf0dSethindra 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
782e1ebb9ecSlm66018 		vswp->mrh = NULL;
7831ae08745Sheppo 	}
78434683adeSsg70180 	mutex_exit(&vswp->mac_lock);
7851ae08745Sheppo 
7861ae08745Sheppo 	if (vsw_detach_ports(vswp) != 0) {
78734683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to detach ports",
78834683adeSsg70180 		    vswp->instance);
7891ae08745Sheppo 		return (DDI_FAILURE);
7901ae08745Sheppo 	}
7911ae08745Sheppo 
79234683adeSsg70180 	rw_destroy(&vswp->if_lockrw);
79334683adeSsg70180 
7945f94e909Ssg70180 	mutex_destroy(&vswp->hw_lock);
7955f94e909Ssg70180 
7961ae08745Sheppo 	/*
797e1ebb9ecSlm66018 	 * Now that the ports have been deleted, stop and close
798e1ebb9ecSlm66018 	 * the physical device.
799e1ebb9ecSlm66018 	 */
80034683adeSsg70180 	mutex_enter(&vswp->mac_lock);
801e1ebb9ecSlm66018 
802*19b65a69Ssb155480 	vsw_mac_detach(vswp);
803*19b65a69Ssb155480 	vsw_mac_close(vswp);
804*19b65a69Ssb155480 
80534683adeSsg70180 	mutex_exit(&vswp->mac_lock);
806*19b65a69Ssb155480 
80734683adeSsg70180 	mutex_destroy(&vswp->mac_lock);
808*19b65a69Ssb155480 	mutex_destroy(&vswp->swtmout_lock);
809e1ebb9ecSlm66018 
810e1ebb9ecSlm66018 	/*
811d10e4ef2Snarayan 	 * Destroy any free pools that may still exist.
812d10e4ef2Snarayan 	 */
813d10e4ef2Snarayan 	poolp = vswp->rxh;
814d10e4ef2Snarayan 	while (poolp != NULL) {
815d10e4ef2Snarayan 		npoolp = vswp->rxh = poolp->nextp;
816d10e4ef2Snarayan 		if (vio_destroy_mblks(poolp) != 0) {
817d10e4ef2Snarayan 			vswp->rxh = poolp;
818d10e4ef2Snarayan 			return (DDI_FAILURE);
819d10e4ef2Snarayan 		}
820d10e4ef2Snarayan 		poolp = npoolp;
821d10e4ef2Snarayan 	}
822d10e4ef2Snarayan 
823d10e4ef2Snarayan 	/*
8241ae08745Sheppo 	 * Remove this instance from any entries it may be on in
8251ae08745Sheppo 	 * the hash table by using the list of addresses maintained
8261ae08745Sheppo 	 * in the vsw_t structure.
8271ae08745Sheppo 	 */
8281ae08745Sheppo 	vsw_del_mcst_vsw(vswp);
8291ae08745Sheppo 
8301ae08745Sheppo 	vswp->mcap = NULL;
8311ae08745Sheppo 	mutex_destroy(&vswp->mca_lock);
8321ae08745Sheppo 
8331ae08745Sheppo 	/*
8341ae08745Sheppo 	 * By now any pending tasks have finished and the underlying
8351ae08745Sheppo 	 * ldc's have been destroyed, so its safe to delete the control
8361ae08745Sheppo 	 * message taskq.
8371ae08745Sheppo 	 */
8381ae08745Sheppo 	if (vswp->taskq_p != NULL)
8391ae08745Sheppo 		ddi_taskq_destroy(vswp->taskq_p);
8401ae08745Sheppo 
8411ae08745Sheppo 	/*
8421ae08745Sheppo 	 * At this stage all the data pointers in the hash table
8431ae08745Sheppo 	 * should be NULL, as all the ports have been removed and will
8441ae08745Sheppo 	 * have deleted themselves from the port lists which the data
8451ae08745Sheppo 	 * pointers point to. Hence we can destroy the table using the
8461ae08745Sheppo 	 * default destructors.
8471ae08745Sheppo 	 */
8481ae08745Sheppo 	D2(vswp, "vsw_detach: destroying hash tables..");
8491ae08745Sheppo 	mod_hash_destroy_hash(vswp->fdb);
8501ae08745Sheppo 	vswp->fdb = NULL;
8511ae08745Sheppo 
8521ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
8531ae08745Sheppo 	mod_hash_destroy_hash(vswp->mfdb);
8541ae08745Sheppo 	vswp->mfdb = NULL;
8551ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
8561ae08745Sheppo 	rw_destroy(&vswp->mfdbrw);
8571ae08745Sheppo 
8581ae08745Sheppo 	ddi_remove_minor_node(dip, NULL);
8591ae08745Sheppo 
8601ae08745Sheppo 	rw_destroy(&vswp->plist.lockrw);
8611ae08745Sheppo 	WRITE_ENTER(&vsw_rw);
8621ae08745Sheppo 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
8631ae08745Sheppo 		if (*vswpp == vswp) {
8641ae08745Sheppo 			*vswpp = vswp->next;
8651ae08745Sheppo 			break;
8661ae08745Sheppo 		}
8671ae08745Sheppo 	}
8681ae08745Sheppo 	RW_EXIT(&vsw_rw);
8691ae08745Sheppo 	ddi_soft_state_free(vsw_state, instance);
8701ae08745Sheppo 
8711ae08745Sheppo 	return (DDI_SUCCESS);
8721ae08745Sheppo }
8731ae08745Sheppo 
8741ae08745Sheppo static int
8751ae08745Sheppo vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
8761ae08745Sheppo {
8771ae08745Sheppo 	_NOTE(ARGUNUSED(dip))
8781ae08745Sheppo 
8791ae08745Sheppo 	vsw_t	*vswp = NULL;
8801ae08745Sheppo 	dev_t	dev = (dev_t)arg;
8811ae08745Sheppo 	int	instance;
8821ae08745Sheppo 
8831ae08745Sheppo 	instance = getminor(dev);
8841ae08745Sheppo 
8851ae08745Sheppo 	switch (infocmd) {
8861ae08745Sheppo 	case DDI_INFO_DEVT2DEVINFO:
8871ae08745Sheppo 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
8881ae08745Sheppo 			*result = NULL;
8891ae08745Sheppo 			return (DDI_FAILURE);
8901ae08745Sheppo 		}
8911ae08745Sheppo 		*result = vswp->dip;
8921ae08745Sheppo 		return (DDI_SUCCESS);
8931ae08745Sheppo 
8941ae08745Sheppo 	case DDI_INFO_DEVT2INSTANCE:
8951ae08745Sheppo 		*result = (void *)(uintptr_t)instance;
8961ae08745Sheppo 		return (DDI_SUCCESS);
8971ae08745Sheppo 
8981ae08745Sheppo 	default:
8991ae08745Sheppo 		*result = NULL;
9001ae08745Sheppo 		return (DDI_FAILURE);
9011ae08745Sheppo 	}
9021ae08745Sheppo }
9031ae08745Sheppo 
9041ae08745Sheppo /*
90534683adeSsg70180  * Get the value of the "vsw-phys-dev" property in the specified
90634683adeSsg70180  * node. This property is the name of the physical device that
90734683adeSsg70180  * the virtual switch will use to talk to the outside world.
90834683adeSsg70180  *
90934683adeSsg70180  * Note it is valid for this property to be NULL (but the property
91034683adeSsg70180  * itself must exist). Callers of this routine should verify that
91134683adeSsg70180  * the value returned is what they expected (i.e. either NULL or non NULL).
91234683adeSsg70180  *
91334683adeSsg70180  * On success returns value of the property in region pointed to by
91434683adeSsg70180  * the 'name' argument, and with return value of 0. Otherwise returns 1.
9151ae08745Sheppo  */
91634683adeSsg70180 static int
91734683adeSsg70180 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
9181ae08745Sheppo {
91934683adeSsg70180 	int	len = 0;
9201ae08745Sheppo 	char	*physname = NULL;
9211ae08745Sheppo 	char	*dev;
9221ae08745Sheppo 
92334683adeSsg70180 	if (md_get_prop_data(mdp, node, physdev_propname,
9241ae08745Sheppo 	    (uint8_t **)(&physname), &len) != 0) {
92534683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
92634683adeSsg70180 		    "device(s) from MD", vswp->instance);
92734683adeSsg70180 		return (1);
9281ae08745Sheppo 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
92934683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
93034683adeSsg70180 		    vswp->instance, physname);
93134683adeSsg70180 		return (1);
9321ae08745Sheppo 	} else {
93334683adeSsg70180 		(void) strncpy(name, physname, strlen(physname) + 1);
9341ae08745Sheppo 		D2(vswp, "%s: using first device specified (%s)",
93534683adeSsg70180 		    __func__, physname);
9361ae08745Sheppo 	}
9371ae08745Sheppo 
9381ae08745Sheppo #ifdef DEBUG
9391ae08745Sheppo 	/*
9401ae08745Sheppo 	 * As a temporary measure to aid testing we check to see if there
9411ae08745Sheppo 	 * is a vsw.conf file present. If there is we use the value of the
9421ae08745Sheppo 	 * vsw_physname property in the file as the name of the physical
9431ae08745Sheppo 	 * device, overriding the value from the MD.
9441ae08745Sheppo 	 *
9451ae08745Sheppo 	 * There may be multiple devices listed, but for the moment
9461ae08745Sheppo 	 * we just use the first one.
9471ae08745Sheppo 	 */
9481ae08745Sheppo 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
9491ae08745Sheppo 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
9501ae08745Sheppo 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
95134683adeSsg70180 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
95234683adeSsg70180 			    vswp->instance, dev);
95334683adeSsg70180 			ddi_prop_free(dev);
95434683adeSsg70180 			return (1);
9551ae08745Sheppo 		} else {
95634683adeSsg70180 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
95734683adeSsg70180 			    "config file", vswp->instance, dev);
9581ae08745Sheppo 
95934683adeSsg70180 			(void) strncpy(name, dev, strlen(dev) + 1);
9601ae08745Sheppo 		}
9611ae08745Sheppo 
9621ae08745Sheppo 		ddi_prop_free(dev);
9631ae08745Sheppo 	}
9641ae08745Sheppo #endif
9651ae08745Sheppo 
96634683adeSsg70180 	return (0);
96734683adeSsg70180 }
968e1ebb9ecSlm66018 
969e1ebb9ecSlm66018 /*
97034683adeSsg70180  * Read the 'vsw-switch-mode' property from the specified MD node.
97134683adeSsg70180  *
97234683adeSsg70180  * Returns 0 on success and the number of modes found in 'found',
97334683adeSsg70180  * otherwise returns 1.
974e1ebb9ecSlm66018  */
97534683adeSsg70180 static int
97634683adeSsg70180 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
97734683adeSsg70180 						uint8_t *modes, int *found)
97834683adeSsg70180 {
97934683adeSsg70180 	int		len = 0;
98034683adeSsg70180 	int		smode_num = 0;
98134683adeSsg70180 	char		*smode = NULL;
98234683adeSsg70180 	char		*curr_mode = NULL;
98334683adeSsg70180 
98434683adeSsg70180 	D1(vswp, "%s: enter", __func__);
9851ae08745Sheppo 
9861ae08745Sheppo 	/*
9871ae08745Sheppo 	 * Get the switch-mode property. The modes are listed in
9881ae08745Sheppo 	 * decreasing order of preference, i.e. prefered mode is
9891ae08745Sheppo 	 * first item in list.
9901ae08745Sheppo 	 */
9911ae08745Sheppo 	len = 0;
99234683adeSsg70180 	smode_num = 0;
99334683adeSsg70180 	if (md_get_prop_data(mdp, node, smode_propname,
9941ae08745Sheppo 	    (uint8_t **)(&smode), &len) != 0) {
9951ae08745Sheppo 		/*
996e1ebb9ecSlm66018 		 * Unable to get switch-mode property from MD, nothing
997e1ebb9ecSlm66018 		 * more we can do.
9981ae08745Sheppo 		 */
99934683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
100034683adeSsg70180 		    " from the MD", vswp->instance);
100134683adeSsg70180 		*found = 0;
100234683adeSsg70180 		return (1);
1003e1ebb9ecSlm66018 	}
1004e1ebb9ecSlm66018 
10051ae08745Sheppo 	curr_mode = smode;
10061ae08745Sheppo 	/*
10071ae08745Sheppo 	 * Modes of operation:
10081ae08745Sheppo 	 * 'switched'	 - layer 2 switching, underlying HW in
1009e1ebb9ecSlm66018 	 *			programmed mode.
10101ae08745Sheppo 	 * 'promiscuous' - layer 2 switching, underlying HW in
10111ae08745Sheppo 	 *			promiscuous mode.
10121ae08745Sheppo 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
10131ae08745Sheppo 	 *			in non-promiscuous mode.
10141ae08745Sheppo 	 */
101534683adeSsg70180 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
10161ae08745Sheppo 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
1017e1ebb9ecSlm66018 		if (strcmp(curr_mode, "switched") == 0) {
101834683adeSsg70180 			modes[smode_num++] = VSW_LAYER2;
1019e1ebb9ecSlm66018 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
102034683adeSsg70180 			modes[smode_num++] = VSW_LAYER2_PROMISC;
1021e1ebb9ecSlm66018 		} else if (strcmp(curr_mode, "routed") == 0) {
102234683adeSsg70180 			modes[smode_num++] = VSW_LAYER3;
1023e1ebb9ecSlm66018 		} else {
102434683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
102534683adeSsg70180 			    "setting to default switched mode",
102634683adeSsg70180 			    vswp->instance, curr_mode);
102734683adeSsg70180 			modes[smode_num++] = VSW_LAYER2;
10281ae08745Sheppo 		}
10291ae08745Sheppo 		curr_mode += strlen(curr_mode) + 1;
10301ae08745Sheppo 	}
103134683adeSsg70180 	*found = smode_num;
10321ae08745Sheppo 
103334683adeSsg70180 	D2(vswp, "%s: %d modes found", __func__, smode_num);
10341ae08745Sheppo 
10351ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
103634683adeSsg70180 
103734683adeSsg70180 	return (0);
10381ae08745Sheppo }
10391ae08745Sheppo 
1040e1ebb9ecSlm66018 /*
1041e1ebb9ecSlm66018  * Check to see if the card supports the setting of multiple unicst
1042e1ebb9ecSlm66018  * addresses.
1043e1ebb9ecSlm66018  *
10445f94e909Ssg70180  * Returns 0 if card supports the programming of multiple unicast addresses,
10455f94e909Ssg70180  * otherwise returns 1.
1046e1ebb9ecSlm66018  */
1047e1ebb9ecSlm66018 static int
1048e1ebb9ecSlm66018 vsw_get_hw_maddr(vsw_t *vswp)
1049e1ebb9ecSlm66018 {
1050e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1051e1ebb9ecSlm66018 
1052*19b65a69Ssb155480 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
1053*19b65a69Ssb155480 
1054*19b65a69Ssb155480 	if (vswp->mh == NULL)
1055e1ebb9ecSlm66018 		return (1);
1056e1ebb9ecSlm66018 
1057e1ebb9ecSlm66018 	if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) {
10585f94e909Ssg70180 		cmn_err(CE_WARN, "!vsw%d: device (%s) does not support "
10595f94e909Ssg70180 		    "setting multiple unicast addresses", vswp->instance,
10605f94e909Ssg70180 		    vswp->physname);
1061e1ebb9ecSlm66018 		return (1);
1062e1ebb9ecSlm66018 	}
1063e1ebb9ecSlm66018 
1064e1ebb9ecSlm66018 	D2(vswp, "%s: %d addrs : %d free", __func__,
1065e1ebb9ecSlm66018 	    vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree);
1066e1ebb9ecSlm66018 
1067e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1068e1ebb9ecSlm66018 
1069e1ebb9ecSlm66018 	return (0);
1070e1ebb9ecSlm66018 }
1071e1ebb9ecSlm66018 
1072e1ebb9ecSlm66018 /*
1073*19b65a69Ssb155480  * Program unicast and multicast addresses of vsw interface and the ports
1074*19b65a69Ssb155480  * into the physical device.
1075*19b65a69Ssb155480  */
1076*19b65a69Ssb155480 static void
1077*19b65a69Ssb155480 vsw_set_addrs(vsw_t *vswp)
1078*19b65a69Ssb155480 {
1079*19b65a69Ssb155480 	vsw_port_list_t	*plist = &vswp->plist;
1080*19b65a69Ssb155480 	vsw_port_t	*port;
1081*19b65a69Ssb155480 	mcst_addr_t	*mcap;
1082*19b65a69Ssb155480 	int		rv;
1083*19b65a69Ssb155480 
1084*19b65a69Ssb155480 	READ_ENTER(&vswp->if_lockrw);
1085*19b65a69Ssb155480 
1086*19b65a69Ssb155480 	if (vswp->if_state & VSW_IF_UP) {
1087*19b65a69Ssb155480 
1088*19b65a69Ssb155480 		/* program unicst addr of vsw interface in the physdev */
1089*19b65a69Ssb155480 		if (vswp->addr_set == VSW_ADDR_UNSET) {
1090*19b65a69Ssb155480 			mutex_enter(&vswp->hw_lock);
1091*19b65a69Ssb155480 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1092*19b65a69Ssb155480 			mutex_exit(&vswp->hw_lock);
1093*19b65a69Ssb155480 			if (rv != 0) {
1094*19b65a69Ssb155480 				cmn_err(CE_NOTE,
1095*19b65a69Ssb155480 				    "!vsw%d: failed to program interface "
1096*19b65a69Ssb155480 				    "unicast address\n", vswp->instance);
1097*19b65a69Ssb155480 			}
1098*19b65a69Ssb155480 			/*
1099*19b65a69Ssb155480 			 * Notify the MAC layer of the changed address.
1100*19b65a69Ssb155480 			 */
1101*19b65a69Ssb155480 			mac_unicst_update(vswp->if_mh,
1102*19b65a69Ssb155480 			    (uint8_t *)&vswp->if_addr);
1103*19b65a69Ssb155480 		}
1104*19b65a69Ssb155480 
1105*19b65a69Ssb155480 		/* program mcast addrs of vsw interface in the physdev */
1106*19b65a69Ssb155480 		mutex_enter(&vswp->mca_lock);
1107*19b65a69Ssb155480 		mutex_enter(&vswp->mac_lock);
1108*19b65a69Ssb155480 		for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) {
1109*19b65a69Ssb155480 			if (mcap->mac_added)
1110*19b65a69Ssb155480 				continue;
1111*19b65a69Ssb155480 			rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca);
1112*19b65a69Ssb155480 			if (rv == 0) {
1113*19b65a69Ssb155480 				mcap->mac_added = B_TRUE;
1114*19b65a69Ssb155480 			} else {
1115*19b65a69Ssb155480 				cmn_err(CE_WARN, "!vsw%d: unable to add "
1116*19b65a69Ssb155480 				    "multicast address: %s\n", vswp->instance,
1117*19b65a69Ssb155480 				    ether_sprintf((void *)&mcap->mca));
1118*19b65a69Ssb155480 			}
1119*19b65a69Ssb155480 		}
1120*19b65a69Ssb155480 		mutex_exit(&vswp->mac_lock);
1121*19b65a69Ssb155480 		mutex_exit(&vswp->mca_lock);
1122*19b65a69Ssb155480 
1123*19b65a69Ssb155480 	}
1124*19b65a69Ssb155480 
1125*19b65a69Ssb155480 	RW_EXIT(&vswp->if_lockrw);
1126*19b65a69Ssb155480 
1127*19b65a69Ssb155480 	WRITE_ENTER(&plist->lockrw);
1128*19b65a69Ssb155480 
1129*19b65a69Ssb155480 	/* program unicast address of ports in the physical device */
1130*19b65a69Ssb155480 	mutex_enter(&vswp->hw_lock);
1131*19b65a69Ssb155480 	for (port = plist->head; port != NULL; port = port->p_next) {
1132*19b65a69Ssb155480 		if (port->addr_set != VSW_ADDR_UNSET) /* addr already set */
1133*19b65a69Ssb155480 			continue;
1134*19b65a69Ssb155480 		if (vsw_set_hw(vswp, port, VSW_VNETPORT)) {
1135*19b65a69Ssb155480 			cmn_err(CE_NOTE,
1136*19b65a69Ssb155480 			    "!vsw%d: port:%d failed to set unicast address\n",
1137*19b65a69Ssb155480 			    vswp->instance, port->p_instance);
1138*19b65a69Ssb155480 		}
1139*19b65a69Ssb155480 	}
1140*19b65a69Ssb155480 	mutex_exit(&vswp->hw_lock);
1141*19b65a69Ssb155480 
1142*19b65a69Ssb155480 	/* program multicast addresses of ports in the physdev */
1143*19b65a69Ssb155480 	for (port = plist->head; port != NULL; port = port->p_next) {
1144*19b65a69Ssb155480 		mutex_enter(&port->mca_lock);
1145*19b65a69Ssb155480 		mutex_enter(&vswp->mac_lock);
1146*19b65a69Ssb155480 		for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) {
1147*19b65a69Ssb155480 			if (mcap->mac_added)
1148*19b65a69Ssb155480 				continue;
1149*19b65a69Ssb155480 			rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca);
1150*19b65a69Ssb155480 			if (rv == 0) {
1151*19b65a69Ssb155480 				mcap->mac_added = B_TRUE;
1152*19b65a69Ssb155480 			} else {
1153*19b65a69Ssb155480 				cmn_err(CE_WARN, "!vsw%d: unable to add "
1154*19b65a69Ssb155480 				    "multicast address: %s\n", vswp->instance,
1155*19b65a69Ssb155480 				    ether_sprintf((void *)&mcap->mca));
1156*19b65a69Ssb155480 			}
1157*19b65a69Ssb155480 		}
1158*19b65a69Ssb155480 		mutex_exit(&vswp->mac_lock);
1159*19b65a69Ssb155480 		mutex_exit(&port->mca_lock);
1160*19b65a69Ssb155480 	}
1161*19b65a69Ssb155480 
1162*19b65a69Ssb155480 	RW_EXIT(&plist->lockrw);
1163*19b65a69Ssb155480 }
1164*19b65a69Ssb155480 
1165*19b65a69Ssb155480 /*
1166*19b65a69Ssb155480  * Remove unicast and multicast addresses of vsw interface and the ports
1167*19b65a69Ssb155480  * from the physical device.
1168*19b65a69Ssb155480  */
1169*19b65a69Ssb155480 static void
1170*19b65a69Ssb155480 vsw_unset_addrs(vsw_t *vswp)
1171*19b65a69Ssb155480 {
1172*19b65a69Ssb155480 	vsw_port_list_t	*plist = &vswp->plist;
1173*19b65a69Ssb155480 	vsw_port_t	*port;
1174*19b65a69Ssb155480 	mcst_addr_t	*mcap;
1175*19b65a69Ssb155480 
1176*19b65a69Ssb155480 	READ_ENTER(&vswp->if_lockrw);
1177*19b65a69Ssb155480 
1178*19b65a69Ssb155480 	if (vswp->if_state & VSW_IF_UP) {
1179*19b65a69Ssb155480 
1180*19b65a69Ssb155480 		/*
1181*19b65a69Ssb155480 		 * Remove unicast addr of vsw interfce
1182*19b65a69Ssb155480 		 * from current physdev
1183*19b65a69Ssb155480 		 */
1184*19b65a69Ssb155480 		mutex_enter(&vswp->hw_lock);
1185*19b65a69Ssb155480 		(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1186*19b65a69Ssb155480 		mutex_exit(&vswp->hw_lock);
1187*19b65a69Ssb155480 
1188*19b65a69Ssb155480 		/*
1189*19b65a69Ssb155480 		 * Remove mcast addrs of vsw interface
1190*19b65a69Ssb155480 		 * from current physdev
1191*19b65a69Ssb155480 		 */
1192*19b65a69Ssb155480 		mutex_enter(&vswp->mca_lock);
1193*19b65a69Ssb155480 		mutex_enter(&vswp->mac_lock);
1194*19b65a69Ssb155480 		for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) {
1195*19b65a69Ssb155480 			if (!mcap->mac_added)
1196*19b65a69Ssb155480 				continue;
1197*19b65a69Ssb155480 			(void) mac_multicst_remove(vswp->mh,
1198*19b65a69Ssb155480 			    (uchar_t *)&mcap->mca);
1199*19b65a69Ssb155480 			mcap->mac_added = B_FALSE;
1200*19b65a69Ssb155480 		}
1201*19b65a69Ssb155480 		mutex_exit(&vswp->mac_lock);
1202*19b65a69Ssb155480 		mutex_exit(&vswp->mca_lock);
1203*19b65a69Ssb155480 
1204*19b65a69Ssb155480 	}
1205*19b65a69Ssb155480 
1206*19b65a69Ssb155480 	RW_EXIT(&vswp->if_lockrw);
1207*19b65a69Ssb155480 
1208*19b65a69Ssb155480 	WRITE_ENTER(&plist->lockrw);
1209*19b65a69Ssb155480 
1210*19b65a69Ssb155480 	/*
1211*19b65a69Ssb155480 	 * Remove unicast address of ports from the current physical device
1212*19b65a69Ssb155480 	 */
1213*19b65a69Ssb155480 	mutex_enter(&vswp->hw_lock);
1214*19b65a69Ssb155480 	for (port = plist->head; port != NULL; port = port->p_next) {
1215*19b65a69Ssb155480 		/* Remove address if was programmed into HW. */
1216*19b65a69Ssb155480 		if (port->addr_set == VSW_ADDR_UNSET)
1217*19b65a69Ssb155480 			continue;
1218*19b65a69Ssb155480 		(void) vsw_unset_hw(vswp, port, VSW_VNETPORT);
1219*19b65a69Ssb155480 	}
1220*19b65a69Ssb155480 	mutex_exit(&vswp->hw_lock);
1221*19b65a69Ssb155480 
1222*19b65a69Ssb155480 	/* Remove multicast addresses of ports from the current physdev */
1223*19b65a69Ssb155480 	for (port = plist->head; port != NULL; port = port->p_next) {
1224*19b65a69Ssb155480 		mutex_enter(&port->mca_lock);
1225*19b65a69Ssb155480 		mutex_enter(&vswp->mac_lock);
1226*19b65a69Ssb155480 		for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) {
1227*19b65a69Ssb155480 			if (!mcap->mac_added)
1228*19b65a69Ssb155480 				continue;
1229*19b65a69Ssb155480 			(void) mac_multicst_remove(vswp->mh,
1230*19b65a69Ssb155480 			    (uchar_t *)&mcap->mca);
1231*19b65a69Ssb155480 			mcap->mac_added = B_FALSE;
1232*19b65a69Ssb155480 		}
1233*19b65a69Ssb155480 		mutex_exit(&vswp->mac_lock);
1234*19b65a69Ssb155480 		mutex_exit(&port->mca_lock);
1235*19b65a69Ssb155480 	}
1236*19b65a69Ssb155480 
1237*19b65a69Ssb155480 	RW_EXIT(&plist->lockrw);
1238*19b65a69Ssb155480 }
1239*19b65a69Ssb155480 
1240*19b65a69Ssb155480 /* copy mac address of vsw into soft state structure */
1241*19b65a69Ssb155480 static void
1242*19b65a69Ssb155480 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
1243*19b65a69Ssb155480 {
1244*19b65a69Ssb155480 	int	i;
1245*19b65a69Ssb155480 
1246*19b65a69Ssb155480 	WRITE_ENTER(&vswp->if_lockrw);
1247*19b65a69Ssb155480 	for (i = ETHERADDRL - 1; i >= 0; i--) {
1248*19b65a69Ssb155480 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
1249*19b65a69Ssb155480 		macaddr >>= 8;
1250*19b65a69Ssb155480 	}
1251*19b65a69Ssb155480 	RW_EXIT(&vswp->if_lockrw);
1252*19b65a69Ssb155480 }
1253*19b65a69Ssb155480 
1254*19b65a69Ssb155480 /*
1255*19b65a69Ssb155480  * Timeout routine to setup switching mode:
1256*19b65a69Ssb155480  * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop()
1257*19b65a69Ssb155480  * initially. If it fails and the error is EAGAIN, then this timeout handler
1258*19b65a69Ssb155480  * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried
1259*19b65a69Ssb155480  * until we successfully finish it; or the returned error is not EAGAIN.
1260*19b65a69Ssb155480  */
1261*19b65a69Ssb155480 static void
1262*19b65a69Ssb155480 vsw_setup_switching_timeout(void *arg)
1263*19b65a69Ssb155480 {
1264*19b65a69Ssb155480 	vsw_t		*vswp = (vsw_t *)arg;
1265*19b65a69Ssb155480 	int		rv;
1266*19b65a69Ssb155480 
1267*19b65a69Ssb155480 	if (vswp->swtmout_enabled == B_FALSE)
1268*19b65a69Ssb155480 		return;
1269*19b65a69Ssb155480 
1270*19b65a69Ssb155480 	rv = vsw_setup_switching(vswp);
1271*19b65a69Ssb155480 
1272*19b65a69Ssb155480 	if (rv == 0) {
1273*19b65a69Ssb155480 		/*
1274*19b65a69Ssb155480 		 * Successfully setup switching mode.
1275*19b65a69Ssb155480 		 * Program unicst, mcst addrs of vsw
1276*19b65a69Ssb155480 		 * interface and ports in the physdev.
1277*19b65a69Ssb155480 		 */
1278*19b65a69Ssb155480 		vsw_set_addrs(vswp);
1279*19b65a69Ssb155480 	}
1280*19b65a69Ssb155480 
1281*19b65a69Ssb155480 	mutex_enter(&vswp->swtmout_lock);
1282*19b65a69Ssb155480 
1283*19b65a69Ssb155480 	if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) {
1284*19b65a69Ssb155480 		/*
1285*19b65a69Ssb155480 		 * Reschedule timeout() if the error is EAGAIN and the
1286*19b65a69Ssb155480 		 * timeout is still enabled. For errors other than EAGAIN,
1287*19b65a69Ssb155480 		 * we simply return without rescheduling timeout().
1288*19b65a69Ssb155480 		 */
1289*19b65a69Ssb155480 		vswp->swtmout_id =
1290*19b65a69Ssb155480 		    timeout(vsw_setup_switching_timeout, vswp,
1291*19b65a69Ssb155480 		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
1292*19b65a69Ssb155480 		goto exit;
1293*19b65a69Ssb155480 	}
1294*19b65a69Ssb155480 
1295*19b65a69Ssb155480 	/* timeout handler completed */
1296*19b65a69Ssb155480 	vswp->swtmout_enabled = B_FALSE;
1297*19b65a69Ssb155480 	vswp->swtmout_id = 0;
1298*19b65a69Ssb155480 
1299*19b65a69Ssb155480 exit:
1300*19b65a69Ssb155480 	mutex_exit(&vswp->swtmout_lock);
1301*19b65a69Ssb155480 }
1302*19b65a69Ssb155480 
1303*19b65a69Ssb155480 /*
1304*19b65a69Ssb155480  * Cancel the timeout handler to setup switching mode.
1305*19b65a69Ssb155480  */
1306*19b65a69Ssb155480 static void
1307*19b65a69Ssb155480 vsw_stop_switching_timeout(vsw_t *vswp)
1308*19b65a69Ssb155480 {
1309*19b65a69Ssb155480 	timeout_id_t tid;
1310*19b65a69Ssb155480 
1311*19b65a69Ssb155480 	mutex_enter(&vswp->swtmout_lock);
1312*19b65a69Ssb155480 
1313*19b65a69Ssb155480 	tid = vswp->swtmout_id;
1314*19b65a69Ssb155480 
1315*19b65a69Ssb155480 	if (tid != 0) {
1316*19b65a69Ssb155480 		/* signal timeout handler to stop */
1317*19b65a69Ssb155480 		vswp->swtmout_enabled = B_FALSE;
1318*19b65a69Ssb155480 		vswp->swtmout_id = 0;
1319*19b65a69Ssb155480 		mutex_exit(&vswp->swtmout_lock);
1320*19b65a69Ssb155480 
1321*19b65a69Ssb155480 		(void) untimeout(tid);
1322*19b65a69Ssb155480 	} else {
1323*19b65a69Ssb155480 		mutex_exit(&vswp->swtmout_lock);
1324*19b65a69Ssb155480 	}
1325*19b65a69Ssb155480 
1326*19b65a69Ssb155480 	(void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE);
1327*19b65a69Ssb155480 
1328*19b65a69Ssb155480 	mutex_enter(&vswp->mac_lock);
1329*19b65a69Ssb155480 	vswp->mac_open_retries = 0;
1330*19b65a69Ssb155480 	mutex_exit(&vswp->mac_lock);
1331*19b65a69Ssb155480 }
1332*19b65a69Ssb155480 
1333*19b65a69Ssb155480 /*
133434683adeSsg70180  * Setup the required switching mode.
1335*19b65a69Ssb155480  * This routine is invoked from vsw_attach() or vsw_update_md_prop()
1336*19b65a69Ssb155480  * initially. If it fails and the error is EAGAIN, then a timeout handler
1337*19b65a69Ssb155480  * is started to retry vsw_setup_switching(), until it successfully finishes;
1338*19b65a69Ssb155480  * or the returned error is not EAGAIN.
133934683adeSsg70180  *
1340*19b65a69Ssb155480  * Returns:
1341*19b65a69Ssb155480  *  0 on success.
1342*19b65a69Ssb155480  *  EAGAIN if retry is needed.
1343*19b65a69Ssb155480  *  1 on all other failures.
134434683adeSsg70180  */
134534683adeSsg70180 static int
134634683adeSsg70180 vsw_setup_switching(vsw_t *vswp)
134734683adeSsg70180 {
134834683adeSsg70180 	int	i, rv = 1;
134934683adeSsg70180 
135034683adeSsg70180 	D1(vswp, "%s: enter", __func__);
135134683adeSsg70180 
1352*19b65a69Ssb155480 	/*
1353*19b65a69Ssb155480 	 * Select best switching mode.
1354*19b65a69Ssb155480 	 * Note that we start from the saved smode_idx. This is done as
1355*19b65a69Ssb155480 	 * this routine can be called from the timeout handler to retry
1356*19b65a69Ssb155480 	 * setting up a specific mode. Currently only the function which
1357*19b65a69Ssb155480 	 * sets up layer2/promisc mode returns EAGAIN if the underlying
1358*19b65a69Ssb155480 	 * physical device is not available yet, causing retries.
1359*19b65a69Ssb155480 	 */
1360*19b65a69Ssb155480 	for (i = vswp->smode_idx; i < vswp->smode_num; i++) {
136134683adeSsg70180 		vswp->smode_idx = i;
136234683adeSsg70180 		switch (vswp->smode[i]) {
136334683adeSsg70180 		case VSW_LAYER2:
136434683adeSsg70180 		case VSW_LAYER2_PROMISC:
136534683adeSsg70180 			rv = vsw_setup_layer2(vswp);
136634683adeSsg70180 			break;
136734683adeSsg70180 
136834683adeSsg70180 		case VSW_LAYER3:
136934683adeSsg70180 			rv = vsw_setup_layer3(vswp);
137034683adeSsg70180 			break;
137134683adeSsg70180 
137234683adeSsg70180 		default:
137334683adeSsg70180 			DERR(vswp, "unknown switch mode");
1374*19b65a69Ssb155480 			break;
1375*19b65a69Ssb155480 		}
1376*19b65a69Ssb155480 
1377*19b65a69Ssb155480 		if ((rv == 0) || (rv == EAGAIN))
1378*19b65a69Ssb155480 			break;
1379*19b65a69Ssb155480 
1380*19b65a69Ssb155480 		/* all other errors(rv != 0): continue & select the next mode */
138134683adeSsg70180 		rv = 1;
138234683adeSsg70180 	}
138334683adeSsg70180 
1384*19b65a69Ssb155480 	if (rv && (rv != EAGAIN)) {
138534683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to setup specified "
138634683adeSsg70180 		    "switching mode", vswp->instance);
1387*19b65a69Ssb155480 	} else if (rv == 0) {
1388*19b65a69Ssb155480 		(void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE);
138934683adeSsg70180 	}
139034683adeSsg70180 
139134683adeSsg70180 	D2(vswp, "%s: Operating in mode %d", __func__,
139234683adeSsg70180 	    vswp->smode[vswp->smode_idx]);
139334683adeSsg70180 
139434683adeSsg70180 	D1(vswp, "%s: exit", __func__);
139534683adeSsg70180 
1396*19b65a69Ssb155480 	return (rv);
139734683adeSsg70180 }
139834683adeSsg70180 
139934683adeSsg70180 /*
1400e1ebb9ecSlm66018  * Setup for layer 2 switching.
1401e1ebb9ecSlm66018  *
1402*19b65a69Ssb155480  * Returns:
1403*19b65a69Ssb155480  *  0 on success.
1404*19b65a69Ssb155480  *  EAGAIN if retry is needed.
1405*19b65a69Ssb155480  *  EIO on all other failures.
1406e1ebb9ecSlm66018  */
14071ae08745Sheppo static int
14081ae08745Sheppo vsw_setup_layer2(vsw_t *vswp)
14091ae08745Sheppo {
1410*19b65a69Ssb155480 	int	rv;
1411*19b65a69Ssb155480 
14121ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
14131ae08745Sheppo 
141434683adeSsg70180 	vswp->vsw_switch_frame = vsw_switch_l2_frame;
14151ae08745Sheppo 
1416*19b65a69Ssb155480 	rv = strlen(vswp->physname);
1417*19b65a69Ssb155480 	if (rv == 0) {
14181ae08745Sheppo 		/*
1419*19b65a69Ssb155480 		 * Physical device name is NULL, which is
1420*19b65a69Ssb155480 		 * required for layer 2.
14211ae08745Sheppo 		 */
1422*19b65a69Ssb155480 		cmn_err(CE_WARN, "!vsw%d: no physical device name specified",
1423*19b65a69Ssb155480 		    vswp->instance);
1424*19b65a69Ssb155480 		return (EIO);
1425*19b65a69Ssb155480 	}
1426*19b65a69Ssb155480 
1427*19b65a69Ssb155480 	mutex_enter(&vswp->mac_lock);
1428*19b65a69Ssb155480 
1429*19b65a69Ssb155480 	rv = vsw_mac_open(vswp);
1430*19b65a69Ssb155480 	if (rv != 0) {
1431*19b65a69Ssb155480 		if (rv != EAGAIN) {
1432*19b65a69Ssb155480 			cmn_err(CE_WARN, "!vsw%d: Unable to open physical "
1433*19b65a69Ssb155480 			    "device: %s\n", vswp->instance, vswp->physname);
1434*19b65a69Ssb155480 		}
1435*19b65a69Ssb155480 		mutex_exit(&vswp->mac_lock);
1436*19b65a69Ssb155480 		return (rv);
14371ae08745Sheppo 	}
1438e1ebb9ecSlm66018 
1439e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) {
1440e1ebb9ecSlm66018 		/*
1441e1ebb9ecSlm66018 		 * Verify that underlying device can support multiple
14425f94e909Ssg70180 		 * unicast mac addresses.
1443e1ebb9ecSlm66018 		 */
1444*19b65a69Ssb155480 		rv = vsw_get_hw_maddr(vswp);
1445*19b65a69Ssb155480 		if (rv != 0) {
144634683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to setup "
14475f94e909Ssg70180 			    "layer2 switching", vswp->instance);
1448*19b65a69Ssb155480 			goto exit_error;
1449e1ebb9ecSlm66018 		}
1450e1ebb9ecSlm66018 	}
1451e1ebb9ecSlm66018 
1452e1ebb9ecSlm66018 	/*
1453*19b65a69Ssb155480 	 * Attempt to link into the MAC layer so we can get
1454*19b65a69Ssb155480 	 * and send packets out over the physical adapter.
1455e1ebb9ecSlm66018 	 */
1456*19b65a69Ssb155480 	rv = vsw_mac_attach(vswp);
1457*19b65a69Ssb155480 	if (rv != 0) {
1458*19b65a69Ssb155480 		/*
1459*19b65a69Ssb155480 		 * Registration with the MAC layer has failed,
1460*19b65a69Ssb155480 		 * so return error so that can fall back to next
1461*19b65a69Ssb155480 		 * prefered switching method.
1462*19b65a69Ssb155480 		 */
1463*19b65a69Ssb155480 		cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: "
1464*19b65a69Ssb155480 		    "%s\n", vswp->instance, vswp->physname);
1465*19b65a69Ssb155480 		goto exit_error;
14661ae08745Sheppo 	}
14671ae08745Sheppo 
14681ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
14691ae08745Sheppo 
1470*19b65a69Ssb155480 	mutex_exit(&vswp->mac_lock);
1471e1ebb9ecSlm66018 	return (0);
1472*19b65a69Ssb155480 
1473*19b65a69Ssb155480 exit_error:
1474*19b65a69Ssb155480 	vsw_mac_close(vswp);
1475*19b65a69Ssb155480 	mutex_exit(&vswp->mac_lock);
1476*19b65a69Ssb155480 	return (EIO);
14771ae08745Sheppo }
14781ae08745Sheppo 
14791ae08745Sheppo static int
14801ae08745Sheppo vsw_setup_layer3(vsw_t *vswp)
14811ae08745Sheppo {
14821ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
14831ae08745Sheppo 
14841ae08745Sheppo 	D2(vswp, "%s: operating in layer 3 mode", __func__);
148534683adeSsg70180 	vswp->vsw_switch_frame = vsw_switch_l3_frame;
14861ae08745Sheppo 
14871ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
14881ae08745Sheppo 
14891ae08745Sheppo 	return (0);
14901ae08745Sheppo }
14911ae08745Sheppo 
14921ae08745Sheppo /*
1493*19b65a69Ssb155480  * Open the underlying physical device for access in layer2 mode.
1494*19b65a69Ssb155480  * Returns:
1495*19b65a69Ssb155480  * 0 on success
1496*19b65a69Ssb155480  * EAGAIN if mac_open() fails due to the device being not available yet.
1497*19b65a69Ssb155480  * EIO on any other failures.
1498*19b65a69Ssb155480  */
1499*19b65a69Ssb155480 static int
1500*19b65a69Ssb155480 vsw_mac_open(vsw_t *vswp)
1501*19b65a69Ssb155480 {
1502*19b65a69Ssb155480 	char	drv[LIFNAMSIZ];
1503*19b65a69Ssb155480 	uint_t	ddi_instance;
1504*19b65a69Ssb155480 	int	rv;
1505*19b65a69Ssb155480 
1506*19b65a69Ssb155480 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
1507*19b65a69Ssb155480 
1508*19b65a69Ssb155480 	if (vswp->mh != NULL) {
1509*19b65a69Ssb155480 		/* already open */
1510*19b65a69Ssb155480 		return (0);
1511*19b65a69Ssb155480 	}
1512*19b65a69Ssb155480 
1513*19b65a69Ssb155480 	if (vswp->mac_open_retries++ >= vsw_mac_open_retries) {
1514*19b65a69Ssb155480 		/* exceeded max retries */
1515*19b65a69Ssb155480 		return (EIO);
1516*19b65a69Ssb155480 	}
1517*19b65a69Ssb155480 
1518*19b65a69Ssb155480 	if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) {
1519*19b65a69Ssb155480 		cmn_err(CE_WARN, "!vsw%d: invalid device name: %s",
1520*19b65a69Ssb155480 		    vswp->instance, vswp->physname);
1521*19b65a69Ssb155480 		return (EIO);
1522*19b65a69Ssb155480 	}
1523*19b65a69Ssb155480 
1524*19b65a69Ssb155480 	/*
1525*19b65a69Ssb155480 	 * Aggregation devices are special in that the device instance
1526*19b65a69Ssb155480 	 * must be set to zero when they are being mac_open()'ed.
1527*19b65a69Ssb155480 	 *
1528*19b65a69Ssb155480 	 * The only way to determine if we are being passed an aggregated
1529*19b65a69Ssb155480 	 * device is to check the device name.
1530*19b65a69Ssb155480 	 */
1531*19b65a69Ssb155480 	if (strcmp(drv, "aggr") == 0) {
1532*19b65a69Ssb155480 		ddi_instance = 0;
1533*19b65a69Ssb155480 	}
1534*19b65a69Ssb155480 
1535*19b65a69Ssb155480 	rv = mac_open(vswp->physname, ddi_instance, &vswp->mh);
1536*19b65a69Ssb155480 	if (rv != 0) {
1537*19b65a69Ssb155480 		/*
1538*19b65a69Ssb155480 		 * If mac_open() failed and the error indicates that the
1539*19b65a69Ssb155480 		 * device is not available yet, then, we return EAGAIN to
1540*19b65a69Ssb155480 		 * indicate that it needs to be retried.
1541*19b65a69Ssb155480 		 * For example, this may happen during boot up, as the
1542*19b65a69Ssb155480 		 * required link aggregation groups(devices) have not been
1543*19b65a69Ssb155480 		 * created yet.
1544*19b65a69Ssb155480 		 */
1545*19b65a69Ssb155480 		if (rv == ENOENT) {
1546*19b65a69Ssb155480 			return (EAGAIN);
1547*19b65a69Ssb155480 		} else {
1548*19b65a69Ssb155480 			cmn_err(CE_WARN, "vsw%d: mac_open %s failed rv:%x",
1549*19b65a69Ssb155480 			    vswp->instance, vswp->physname, rv);
1550*19b65a69Ssb155480 			return (EIO);
1551*19b65a69Ssb155480 		}
1552*19b65a69Ssb155480 	}
1553*19b65a69Ssb155480 
1554*19b65a69Ssb155480 	vswp->mac_open_retries = 0;
1555*19b65a69Ssb155480 
1556*19b65a69Ssb155480 	return (0);
1557*19b65a69Ssb155480 }
1558*19b65a69Ssb155480 
1559*19b65a69Ssb155480 /*
1560*19b65a69Ssb155480  * Close the underlying physical device.
1561*19b65a69Ssb155480  */
1562*19b65a69Ssb155480 static void
1563*19b65a69Ssb155480 vsw_mac_close(vsw_t *vswp)
1564*19b65a69Ssb155480 {
1565*19b65a69Ssb155480 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
1566*19b65a69Ssb155480 
1567*19b65a69Ssb155480 	if (vswp->mh != NULL) {
1568*19b65a69Ssb155480 		mac_close(vswp->mh);
1569*19b65a69Ssb155480 		vswp->mh = NULL;
1570*19b65a69Ssb155480 	}
1571*19b65a69Ssb155480 }
1572*19b65a69Ssb155480 
1573*19b65a69Ssb155480 /*
15741ae08745Sheppo  * Link into the MAC layer to gain access to the services provided by
15751ae08745Sheppo  * the underlying physical device driver (which should also have
15761ae08745Sheppo  * registered with the MAC layer).
15771ae08745Sheppo  *
15781ae08745Sheppo  * Only when in layer 2 mode.
15791ae08745Sheppo  */
15801ae08745Sheppo static int
15811ae08745Sheppo vsw_mac_attach(vsw_t *vswp)
15821ae08745Sheppo {
15837636cb21Slm66018 	D1(vswp, "%s: enter", __func__);
15841ae08745Sheppo 
158534683adeSsg70180 	ASSERT(vswp->mrh == NULL);
158634683adeSsg70180 	ASSERT(vswp->mstarted == B_FALSE);
158734683adeSsg70180 	ASSERT(vswp->mresources == B_FALSE);
15881ae08745Sheppo 
1589*19b65a69Ssb155480 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
15901ae08745Sheppo 
15917636cb21Slm66018 	ASSERT(vswp->mh != NULL);
15927636cb21Slm66018 
15931ae08745Sheppo 	D2(vswp, "vsw_mac_attach: using device %s", vswp->physname);
15941ae08745Sheppo 
15957636cb21Slm66018 	if (vsw_multi_ring_enable) {
159634683adeSsg70180 		/*
159734683adeSsg70180 		 * Initialize the ring table.
159834683adeSsg70180 		 */
15997636cb21Slm66018 		vsw_mac_ring_tbl_init(vswp);
16001ae08745Sheppo 
16017636cb21Slm66018 		/*
160234683adeSsg70180 		 * Register our rx callback function.
16037636cb21Slm66018 		 */
16047636cb21Slm66018 		vswp->mrh = mac_rx_add(vswp->mh,
16057636cb21Slm66018 		    vsw_rx_queue_cb, (void *)vswp);
160634683adeSsg70180 		ASSERT(vswp->mrh != NULL);
16077636cb21Slm66018 
16087636cb21Slm66018 		/*
16097636cb21Slm66018 		 * Register our mac resource callback.
16107636cb21Slm66018 		 */
16117636cb21Slm66018 		mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp);
16127636cb21Slm66018 		vswp->mresources = B_TRUE;
16137636cb21Slm66018 
16147636cb21Slm66018 		/*
16157636cb21Slm66018 		 * Get the ring resources available to us from
16167636cb21Slm66018 		 * the mac below us.
16177636cb21Slm66018 		 */
16187636cb21Slm66018 		mac_resources(vswp->mh);
16197636cb21Slm66018 	} else {
16207636cb21Slm66018 		/*
16217636cb21Slm66018 		 * Just register our rx callback function
16227636cb21Slm66018 		 */
16237636cb21Slm66018 		vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp);
16247636cb21Slm66018 		ASSERT(vswp->mrh != NULL);
162534683adeSsg70180 	}
16267636cb21Slm66018 
16277636cb21Slm66018 	/* Get the MAC tx fn */
16281ae08745Sheppo 	vswp->txinfo = mac_tx_get(vswp->mh);
16291ae08745Sheppo 
16301ae08745Sheppo 	/* start the interface */
16311ae08745Sheppo 	if (mac_start(vswp->mh) != 0) {
163234683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Could not start mac interface",
163334683adeSsg70180 		    vswp->instance);
16341ae08745Sheppo 		goto mac_fail_exit;
16351ae08745Sheppo 	}
16361ae08745Sheppo 
16377636cb21Slm66018 	vswp->mstarted = B_TRUE;
16387636cb21Slm66018 
16397636cb21Slm66018 	D1(vswp, "%s: exit", __func__);
16401ae08745Sheppo 	return (0);
16411ae08745Sheppo 
16421ae08745Sheppo mac_fail_exit:
16437636cb21Slm66018 	vsw_mac_detach(vswp);
16441ae08745Sheppo 
16457636cb21Slm66018 	D1(vswp, "%s: exit", __func__);
16461ae08745Sheppo 	return (1);
16471ae08745Sheppo }
16481ae08745Sheppo 
16491ae08745Sheppo static void
16501ae08745Sheppo vsw_mac_detach(vsw_t *vswp)
16511ae08745Sheppo {
16521ae08745Sheppo 	D1(vswp, "vsw_mac_detach: enter");
16531ae08745Sheppo 
16547636cb21Slm66018 	ASSERT(vswp != NULL);
1655*19b65a69Ssb155480 	ASSERT(MUTEX_HELD(&vswp->mac_lock));
16567636cb21Slm66018 
16577636cb21Slm66018 	if (vsw_multi_ring_enable) {
16587636cb21Slm66018 		vsw_mac_ring_tbl_destroy(vswp);
16597636cb21Slm66018 	}
16607636cb21Slm66018 
1661b9a6d57aSsg70180 	if (vswp->mh != NULL) {
16627636cb21Slm66018 		if (vswp->mstarted)
16637636cb21Slm66018 			mac_stop(vswp->mh);
16641ae08745Sheppo 		if (vswp->mrh != NULL)
16651f8aaf0dSethindra 			mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
16667636cb21Slm66018 		if (vswp->mresources)
16677636cb21Slm66018 			mac_resource_set(vswp->mh, NULL, NULL);
1668b9a6d57aSsg70180 	}
16691ae08745Sheppo 
16701ae08745Sheppo 	vswp->mrh = NULL;
16711ae08745Sheppo 	vswp->txinfo = NULL;
16727636cb21Slm66018 	vswp->mstarted = B_FALSE;
16731ae08745Sheppo 
16741ae08745Sheppo 	D1(vswp, "vsw_mac_detach: exit");
16751ae08745Sheppo }
16761ae08745Sheppo 
16771ae08745Sheppo /*
1678e1ebb9ecSlm66018  * Depending on the mode specified, the capabilites and capacity
1679e1ebb9ecSlm66018  * of the underlying device setup the physical device.
16801ae08745Sheppo  *
1681e1ebb9ecSlm66018  * If in layer 3 mode, then do nothing.
1682e1ebb9ecSlm66018  *
1683e1ebb9ecSlm66018  * If in layer 2 programmed mode attempt to program the unicast address
1684e1ebb9ecSlm66018  * associated with the port into the physical device. If this is not
1685e1ebb9ecSlm66018  * possible due to resource exhaustion or simply because the device does
1686e1ebb9ecSlm66018  * not support multiple unicast addresses then if required fallback onto
1687e1ebb9ecSlm66018  * putting the card into promisc mode.
1688e1ebb9ecSlm66018  *
1689e1ebb9ecSlm66018  * If in promisc mode then simply set the card into promisc mode.
1690e1ebb9ecSlm66018  *
1691e1ebb9ecSlm66018  * Returns 0 success, 1 on failure.
16921ae08745Sheppo  */
1693e1ebb9ecSlm66018 static int
16945f94e909Ssg70180 vsw_set_hw(vsw_t *vswp, vsw_port_t *port, int type)
16951ae08745Sheppo {
1696e1ebb9ecSlm66018 	mac_multi_addr_t	mac_addr;
1697e1ebb9ecSlm66018 	int			err;
16981ae08745Sheppo 
1699e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1700e1ebb9ecSlm66018 
17015f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
17025f94e909Ssg70180 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
17035f94e909Ssg70180 
1704e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER3)
1705e1ebb9ecSlm66018 		return (0);
1706e1ebb9ecSlm66018 
1707e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) {
17085f94e909Ssg70180 		return (vsw_set_hw_promisc(vswp, port, type));
1709e1ebb9ecSlm66018 	}
1710e1ebb9ecSlm66018 
1711e1ebb9ecSlm66018 	/*
1712e1ebb9ecSlm66018 	 * Attempt to program the unicast address into the HW.
1713e1ebb9ecSlm66018 	 */
1714e1ebb9ecSlm66018 	mac_addr.mma_addrlen = ETHERADDRL;
17155f94e909Ssg70180 	if (type == VSW_VNETPORT) {
17165f94e909Ssg70180 		ASSERT(port != NULL);
1717e1ebb9ecSlm66018 		ether_copy(&port->p_macaddr, &mac_addr.mma_addr);
17185f94e909Ssg70180 	} else {
17195f94e909Ssg70180 		ether_copy(&vswp->if_addr, &mac_addr.mma_addr);
17205f94e909Ssg70180 	}
1721e1ebb9ecSlm66018 
17225f94e909Ssg70180 	err = vsw_set_hw_addr(vswp, &mac_addr);
1723*19b65a69Ssb155480 	if (err == ENOSPC) {
1724e1ebb9ecSlm66018 		/*
1725e1ebb9ecSlm66018 		 * Mark that attempt should be made to re-config sometime
1726e1ebb9ecSlm66018 		 * in future if a port is deleted.
1727e1ebb9ecSlm66018 		 */
1728e1ebb9ecSlm66018 		vswp->recfg_reqd = B_TRUE;
1729e1ebb9ecSlm66018 
1730e1ebb9ecSlm66018 		/*
1731e1ebb9ecSlm66018 		 * Only 1 mode specified, nothing more to do.
1732e1ebb9ecSlm66018 		 */
1733e1ebb9ecSlm66018 		if (vswp->smode_num == 1)
1734e1ebb9ecSlm66018 			return (err);
1735e1ebb9ecSlm66018 
1736e1ebb9ecSlm66018 		/*
1737e1ebb9ecSlm66018 		 * If promiscuous was next mode specified try to
1738e1ebb9ecSlm66018 		 * set the card into that mode.
1739e1ebb9ecSlm66018 		 */
1740e1ebb9ecSlm66018 		if ((vswp->smode_idx <= (vswp->smode_num - 2)) &&
1741205eeb1aSlm66018 		    (vswp->smode[vswp->smode_idx + 1] ==
1742205eeb1aSlm66018 		    VSW_LAYER2_PROMISC)) {
1743e1ebb9ecSlm66018 			vswp->smode_idx += 1;
17445f94e909Ssg70180 			return (vsw_set_hw_promisc(vswp, port, type));
1745e1ebb9ecSlm66018 		}
1746e1ebb9ecSlm66018 		return (err);
1747e1ebb9ecSlm66018 	}
1748e1ebb9ecSlm66018 
1749*19b65a69Ssb155480 	if (err != 0)
1750*19b65a69Ssb155480 		return (err);
1751*19b65a69Ssb155480 
17525f94e909Ssg70180 	if (type == VSW_VNETPORT) {
1753e1ebb9ecSlm66018 		port->addr_slot = mac_addr.mma_slot;
1754e1ebb9ecSlm66018 		port->addr_set = VSW_ADDR_HW;
17555f94e909Ssg70180 	} else {
17565f94e909Ssg70180 		vswp->addr_slot = mac_addr.mma_slot;
17575f94e909Ssg70180 		vswp->addr_set = VSW_ADDR_HW;
17585f94e909Ssg70180 	}
1759e1ebb9ecSlm66018 
1760*19b65a69Ssb155480 	D2(vswp, "programmed addr %s into slot %d "
1761*19b65a69Ssb155480 	"of device %s", ether_sprintf((void *)mac_addr.mma_addr),
17625f94e909Ssg70180 	    mac_addr.mma_slot, vswp->physname);
1763e1ebb9ecSlm66018 
1764e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1765e1ebb9ecSlm66018 
1766e1ebb9ecSlm66018 	return (0);
1767e1ebb9ecSlm66018 }
1768e1ebb9ecSlm66018 
1769e1ebb9ecSlm66018 /*
1770e1ebb9ecSlm66018  * If in layer 3 mode do nothing.
1771e1ebb9ecSlm66018  *
1772e1ebb9ecSlm66018  * If in layer 2 switched mode remove the address from the physical
1773e1ebb9ecSlm66018  * device.
1774e1ebb9ecSlm66018  *
1775e1ebb9ecSlm66018  * If in layer 2 promiscuous mode disable promisc mode.
1776e1ebb9ecSlm66018  *
1777e1ebb9ecSlm66018  * Returns 0 on success.
1778e1ebb9ecSlm66018  */
1779e1ebb9ecSlm66018 static int
17805f94e909Ssg70180 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port, int type)
1781e1ebb9ecSlm66018 {
17825f94e909Ssg70180 	mac_addr_slot_t	slot;
17835f94e909Ssg70180 	int		rv;
1784e1ebb9ecSlm66018 
1785e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1786e1ebb9ecSlm66018 
17875f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
17885f94e909Ssg70180 
1789e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER3)
1790e1ebb9ecSlm66018 		return (0);
1791e1ebb9ecSlm66018 
17925f94e909Ssg70180 	switch (type) {
17935f94e909Ssg70180 	case VSW_VNETPORT:
17945f94e909Ssg70180 		ASSERT(port != NULL);
17955f94e909Ssg70180 
1796e1ebb9ecSlm66018 		if (port->addr_set == VSW_ADDR_PROMISC) {
17975f94e909Ssg70180 			return (vsw_unset_hw_promisc(vswp, port, type));
17985f94e909Ssg70180 
17995f94e909Ssg70180 		} else if (port->addr_set == VSW_ADDR_HW) {
18005f94e909Ssg70180 			slot = port->addr_slot;
18015f94e909Ssg70180 			if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0)
18025f94e909Ssg70180 				port->addr_set = VSW_ADDR_UNSET;
1803e1ebb9ecSlm66018 		}
1804e1ebb9ecSlm66018 
18055f94e909Ssg70180 		break;
18065f94e909Ssg70180 
18075f94e909Ssg70180 	case VSW_LOCALDEV:
18085f94e909Ssg70180 		if (vswp->addr_set == VSW_ADDR_PROMISC) {
18095f94e909Ssg70180 			return (vsw_unset_hw_promisc(vswp, NULL, type));
18105f94e909Ssg70180 
18115f94e909Ssg70180 		} else if (vswp->addr_set == VSW_ADDR_HW) {
18125f94e909Ssg70180 			slot = vswp->addr_slot;
18135f94e909Ssg70180 			if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0)
18145f94e909Ssg70180 				vswp->addr_set = VSW_ADDR_UNSET;
18155f94e909Ssg70180 		}
18165f94e909Ssg70180 
18175f94e909Ssg70180 		break;
18185f94e909Ssg70180 
18195f94e909Ssg70180 	default:
18205f94e909Ssg70180 		/* should never happen */
18215f94e909Ssg70180 		DERR(vswp, "%s: unknown type %d", __func__, type);
18225f94e909Ssg70180 		ASSERT(0);
18235f94e909Ssg70180 		return (1);
18245f94e909Ssg70180 	}
18255f94e909Ssg70180 
18265f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
18275f94e909Ssg70180 	return (rv);
18285f94e909Ssg70180 }
18295f94e909Ssg70180 
18305f94e909Ssg70180 /*
18315f94e909Ssg70180  * Attempt to program a unicast address into HW.
18325f94e909Ssg70180  *
18335f94e909Ssg70180  * Returns 0 on sucess, 1 on failure.
18345f94e909Ssg70180  */
18355f94e909Ssg70180 static int
18365f94e909Ssg70180 vsw_set_hw_addr(vsw_t *vswp, mac_multi_addr_t *mac)
18375f94e909Ssg70180 {
18385f94e909Ssg70180 	void	*mah;
1839*19b65a69Ssb155480 	int	rv = EINVAL;
18405f94e909Ssg70180 
18415f94e909Ssg70180 	D1(vswp, "%s: enter", __func__);
18425f94e909Ssg70180 
18435f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
18445f94e909Ssg70180 
1845e1ebb9ecSlm66018 	if (vswp->maddr.maddr_handle == NULL)
1846*19b65a69Ssb155480 		return (rv);
1847e1ebb9ecSlm66018 
1848e1ebb9ecSlm66018 	mah = vswp->maddr.maddr_handle;
1849e1ebb9ecSlm66018 
18505f94e909Ssg70180 	rv = vswp->maddr.maddr_add(mah, mac);
18515f94e909Ssg70180 
18525f94e909Ssg70180 	if (rv == 0)
1853*19b65a69Ssb155480 		return (rv);
18545f94e909Ssg70180 
18555f94e909Ssg70180 	/*
18565f94e909Ssg70180 	 * Its okay for the add to fail because we have exhausted
18575f94e909Ssg70180 	 * all the resouces in the hardware device. Any other error
18585f94e909Ssg70180 	 * we want to flag.
18595f94e909Ssg70180 	 */
18605f94e909Ssg70180 	if (rv != ENOSPC) {
18615f94e909Ssg70180 		cmn_err(CE_WARN, "!vsw%d: error programming "
1862*19b65a69Ssb155480 		    "address %s into HW err (%d)",
1863*19b65a69Ssb155480 		    vswp->instance, ether_sprintf((void *)mac->mma_addr), rv);
18645f94e909Ssg70180 	}
18655f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
1866*19b65a69Ssb155480 	return (rv);
1867e1ebb9ecSlm66018 }
1868e1ebb9ecSlm66018 
18695f94e909Ssg70180 /*
18705f94e909Ssg70180  * Remove a unicast mac address which has previously been programmed
18715f94e909Ssg70180  * into HW.
18725f94e909Ssg70180  *
18735f94e909Ssg70180  * Returns 0 on sucess, 1 on failure.
18745f94e909Ssg70180  */
18755f94e909Ssg70180 static int
18765f94e909Ssg70180 vsw_unset_hw_addr(vsw_t *vswp, int slot)
18775f94e909Ssg70180 {
18785f94e909Ssg70180 	void	*mah;
18795f94e909Ssg70180 	int	rv;
1880e1ebb9ecSlm66018 
18815f94e909Ssg70180 	D1(vswp, "%s: enter", __func__);
18825f94e909Ssg70180 
18835f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
18845f94e909Ssg70180 	ASSERT(slot >= 0);
18855f94e909Ssg70180 
18865f94e909Ssg70180 	if (vswp->maddr.maddr_handle == NULL)
18875f94e909Ssg70180 		return (1);
18885f94e909Ssg70180 
18895f94e909Ssg70180 	mah = vswp->maddr.maddr_handle;
18905f94e909Ssg70180 
18915f94e909Ssg70180 	rv = vswp->maddr.maddr_remove(mah, slot);
18925f94e909Ssg70180 	if (rv != 0) {
18935f94e909Ssg70180 		cmn_err(CE_WARN, "!vsw%d: unable to remove address "
18945f94e909Ssg70180 		    "from slot %d in device %s (err %d)",
18955f94e909Ssg70180 		    vswp->instance, slot, vswp->physname, rv);
18965f94e909Ssg70180 		return (1);
1897e1ebb9ecSlm66018 	}
1898e1ebb9ecSlm66018 
18995f94e909Ssg70180 	D2(vswp, "removed addr from slot %d in device %s",
19005f94e909Ssg70180 	    slot, vswp->physname);
19015f94e909Ssg70180 
1902e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1903e1ebb9ecSlm66018 	return (0);
1904e1ebb9ecSlm66018 }
1905e1ebb9ecSlm66018 
1906e1ebb9ecSlm66018 /*
1907e1ebb9ecSlm66018  * Set network card into promisc mode.
1908e1ebb9ecSlm66018  *
1909e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1910e1ebb9ecSlm66018  */
1911e1ebb9ecSlm66018 static int
19125f94e909Ssg70180 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type)
1913e1ebb9ecSlm66018 {
1914e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1915e1ebb9ecSlm66018 
19165f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
19175f94e909Ssg70180 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
19185f94e909Ssg70180 
191934683adeSsg70180 	mutex_enter(&vswp->mac_lock);
192034683adeSsg70180 	if (vswp->mh == NULL) {
192134683adeSsg70180 		mutex_exit(&vswp->mac_lock);
1922e1ebb9ecSlm66018 		return (1);
192334683adeSsg70180 	}
1924e1ebb9ecSlm66018 
1925e1ebb9ecSlm66018 	if (vswp->promisc_cnt++ == 0) {
1926e1ebb9ecSlm66018 		if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) {
1927e1ebb9ecSlm66018 			vswp->promisc_cnt--;
192834683adeSsg70180 			mutex_exit(&vswp->mac_lock);
1929e1ebb9ecSlm66018 			return (1);
1930e1ebb9ecSlm66018 		}
193134683adeSsg70180 		cmn_err(CE_NOTE, "!vsw%d: switching device %s into "
193234683adeSsg70180 		    "promiscuous mode", vswp->instance, vswp->physname);
1933e1ebb9ecSlm66018 	}
193434683adeSsg70180 	mutex_exit(&vswp->mac_lock);
19355f94e909Ssg70180 
19365f94e909Ssg70180 	if (type == VSW_VNETPORT) {
19375f94e909Ssg70180 		ASSERT(port != NULL);
1938e1ebb9ecSlm66018 		port->addr_set = VSW_ADDR_PROMISC;
19395f94e909Ssg70180 	} else {
19405f94e909Ssg70180 		vswp->addr_set = VSW_ADDR_PROMISC;
19415f94e909Ssg70180 	}
1942e1ebb9ecSlm66018 
1943e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1944e1ebb9ecSlm66018 
1945e1ebb9ecSlm66018 	return (0);
1946e1ebb9ecSlm66018 }
1947e1ebb9ecSlm66018 
1948e1ebb9ecSlm66018 /*
1949e1ebb9ecSlm66018  * Turn off promiscuous mode on network card.
1950e1ebb9ecSlm66018  *
1951e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1952e1ebb9ecSlm66018  */
1953e1ebb9ecSlm66018 static int
19545f94e909Ssg70180 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type)
1955e1ebb9ecSlm66018 {
1956e1ebb9ecSlm66018 	vsw_port_list_t 	*plist = &vswp->plist;
1957e1ebb9ecSlm66018 
195834683adeSsg70180 	D2(vswp, "%s: enter", __func__);
1959e1ebb9ecSlm66018 
19605f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
19615f94e909Ssg70180 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
19625f94e909Ssg70180 
196334683adeSsg70180 	mutex_enter(&vswp->mac_lock);
196434683adeSsg70180 	if (vswp->mh == NULL) {
196534683adeSsg70180 		mutex_exit(&vswp->mac_lock);
1966e1ebb9ecSlm66018 		return (1);
196734683adeSsg70180 	}
1968e1ebb9ecSlm66018 
1969e1ebb9ecSlm66018 	if (--vswp->promisc_cnt == 0) {
1970e1ebb9ecSlm66018 		if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) {
1971e1ebb9ecSlm66018 			vswp->promisc_cnt++;
197234683adeSsg70180 			mutex_exit(&vswp->mac_lock);
1973e1ebb9ecSlm66018 			return (1);
1974e1ebb9ecSlm66018 		}
1975e1ebb9ecSlm66018 
1976e1ebb9ecSlm66018 		/*
1977e1ebb9ecSlm66018 		 * We are exiting promisc mode either because we were
1978e1ebb9ecSlm66018 		 * only in promisc mode because we had failed over from
1979e1ebb9ecSlm66018 		 * switched mode due to HW resource issues, or the user
1980e1ebb9ecSlm66018 		 * wanted the card in promisc mode for all the ports and
1981e1ebb9ecSlm66018 		 * the last port is now being deleted. Tweak the message
1982e1ebb9ecSlm66018 		 * accordingly.
1983e1ebb9ecSlm66018 		 */
1984e1ebb9ecSlm66018 		if (plist->num_ports != 0) {
198534683adeSsg70180 			cmn_err(CE_NOTE, "!vsw%d: switching device %s back to "
1986205eeb1aSlm66018 			    "programmed mode", vswp->instance, vswp->physname);
19871ae08745Sheppo 		} else {
198834683adeSsg70180 			cmn_err(CE_NOTE, "!vsw%d: switching device %s out of "
1989205eeb1aSlm66018 			    "promiscuous mode", vswp->instance, vswp->physname);
19901ae08745Sheppo 		}
19911ae08745Sheppo 	}
199234683adeSsg70180 	mutex_exit(&vswp->mac_lock);
19935f94e909Ssg70180 
19945f94e909Ssg70180 	if (type == VSW_VNETPORT) {
19955f94e909Ssg70180 		ASSERT(port != NULL);
19965f94e909Ssg70180 		ASSERT(port->addr_set == VSW_ADDR_PROMISC);
1997e1ebb9ecSlm66018 		port->addr_set = VSW_ADDR_UNSET;
19985f94e909Ssg70180 	} else {
19995f94e909Ssg70180 		ASSERT(vswp->addr_set == VSW_ADDR_PROMISC);
20005f94e909Ssg70180 		vswp->addr_set = VSW_ADDR_UNSET;
20015f94e909Ssg70180 	}
2002e1ebb9ecSlm66018 
2003e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
2004e1ebb9ecSlm66018 	return (0);
2005e1ebb9ecSlm66018 }
2006e1ebb9ecSlm66018 
2007e1ebb9ecSlm66018 /*
2008e1ebb9ecSlm66018  * Determine whether or not we are operating in our prefered
2009e1ebb9ecSlm66018  * mode and if not whether the physical resources now allow us
2010e1ebb9ecSlm66018  * to operate in it.
2011e1ebb9ecSlm66018  *
20125f94e909Ssg70180  * If a port is being removed should only be invoked after port has been
2013e1ebb9ecSlm66018  * removed from the port list.
2014e1ebb9ecSlm66018  */
20155f94e909Ssg70180 static void
2016e1ebb9ecSlm66018 vsw_reconfig_hw(vsw_t *vswp)
2017e1ebb9ecSlm66018 {
2018e1ebb9ecSlm66018 	int			s_idx;
2019e1ebb9ecSlm66018 
2020e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
2021e1ebb9ecSlm66018 
20225f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
2023e1ebb9ecSlm66018 
20245f94e909Ssg70180 	if (vswp->maddr.maddr_handle == NULL) {
20255f94e909Ssg70180 		return;
20265f94e909Ssg70180 	}
2027e1ebb9ecSlm66018 
2028e1ebb9ecSlm66018 	/*
2029e1ebb9ecSlm66018 	 * If we are in layer 2 (i.e. switched) or would like to be
20305f94e909Ssg70180 	 * in layer 2 then check if any ports or the vswitch itself
20315f94e909Ssg70180 	 * need to be programmed into the HW.
2032e1ebb9ecSlm66018 	 *
2033e1ebb9ecSlm66018 	 * This can happen in two cases - switched was specified as
2034e1ebb9ecSlm66018 	 * the prefered mode of operation but we exhausted the HW
2035e1ebb9ecSlm66018 	 * resources and so failed over to the next specifed mode,
2036e1ebb9ecSlm66018 	 * or switched was the only mode specified so after HW
2037e1ebb9ecSlm66018 	 * resources were exhausted there was nothing more we
2038e1ebb9ecSlm66018 	 * could do.
2039e1ebb9ecSlm66018 	 */
2040e1ebb9ecSlm66018 	if (vswp->smode_idx > 0)
2041e1ebb9ecSlm66018 		s_idx = vswp->smode_idx - 1;
2042e1ebb9ecSlm66018 	else
2043e1ebb9ecSlm66018 		s_idx = vswp->smode_idx;
2044e1ebb9ecSlm66018 
20455f94e909Ssg70180 	if (vswp->smode[s_idx] != VSW_LAYER2) {
20465f94e909Ssg70180 		return;
20475f94e909Ssg70180 	}
2048e1ebb9ecSlm66018 
2049e1ebb9ecSlm66018 	D2(vswp, "%s: attempting reconfig..", __func__);
2050e1ebb9ecSlm66018 
2051e1ebb9ecSlm66018 	/*
20525f94e909Ssg70180 	 * First, attempt to set the vswitch mac address into HW,
20535f94e909Ssg70180 	 * if required.
2054e1ebb9ecSlm66018 	 */
20555f94e909Ssg70180 	if (vsw_prog_if(vswp)) {
20565f94e909Ssg70180 		return;
2057e1ebb9ecSlm66018 	}
2058e1ebb9ecSlm66018 
2059e1ebb9ecSlm66018 	/*
20605f94e909Ssg70180 	 * Next, attempt to set any ports which have not yet been
20615f94e909Ssg70180 	 * programmed into HW.
2062e1ebb9ecSlm66018 	 */
20635f94e909Ssg70180 	if (vsw_prog_ports(vswp)) {
20645f94e909Ssg70180 		return;
2065e1ebb9ecSlm66018 	}
2066e1ebb9ecSlm66018 
20675f94e909Ssg70180 	/*
20685f94e909Ssg70180 	 * By now we know that have programmed all desired ports etc
20695f94e909Ssg70180 	 * into HW, so safe to mark reconfiguration as complete.
20705f94e909Ssg70180 	 */
2071e1ebb9ecSlm66018 	vswp->recfg_reqd = B_FALSE;
2072e1ebb9ecSlm66018 
2073e1ebb9ecSlm66018 	vswp->smode_idx = s_idx;
2074e1ebb9ecSlm66018 
20755f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
20765f94e909Ssg70180 }
20775f94e909Ssg70180 
20785f94e909Ssg70180 /*
20795f94e909Ssg70180  * Check to see if vsw itself is plumbed, and if so whether or not
20805f94e909Ssg70180  * its mac address should be written into HW.
20815f94e909Ssg70180  *
20825f94e909Ssg70180  * Returns 0 if could set address, or didn't have to set it.
20835f94e909Ssg70180  * Returns 1 if failed to set address.
20845f94e909Ssg70180  */
20855f94e909Ssg70180 static int
20865f94e909Ssg70180 vsw_prog_if(vsw_t *vswp)
20875f94e909Ssg70180 {
20885f94e909Ssg70180 	mac_multi_addr_t	addr;
20895f94e909Ssg70180 
20905f94e909Ssg70180 	D1(vswp, "%s: enter", __func__);
20915f94e909Ssg70180 
20925f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
20935f94e909Ssg70180 
20945f94e909Ssg70180 	READ_ENTER(&vswp->if_lockrw);
20955f94e909Ssg70180 	if ((vswp->if_state & VSW_IF_UP) &&
20965f94e909Ssg70180 	    (vswp->addr_set != VSW_ADDR_HW)) {
20975f94e909Ssg70180 
20985f94e909Ssg70180 		addr.mma_addrlen = ETHERADDRL;
20995f94e909Ssg70180 		ether_copy(&vswp->if_addr, &addr.mma_addr);
21005f94e909Ssg70180 
21015f94e909Ssg70180 		if (vsw_set_hw_addr(vswp, &addr) != 0) {
21025f94e909Ssg70180 			RW_EXIT(&vswp->if_lockrw);
21035f94e909Ssg70180 			return (1);
21045f94e909Ssg70180 		}
21055f94e909Ssg70180 
21065f94e909Ssg70180 		vswp->addr_slot = addr.mma_slot;
21075f94e909Ssg70180 
21085f94e909Ssg70180 		/*
21095f94e909Ssg70180 		 * If previously when plumbed had had to place
21105f94e909Ssg70180 		 * interface into promisc mode, now reverse that.
21115f94e909Ssg70180 		 *
21125f94e909Ssg70180 		 * Note that interface will only actually be set into
21135f94e909Ssg70180 		 * non-promisc mode when last port/interface has been
21145f94e909Ssg70180 		 * programmed into HW.
21155f94e909Ssg70180 		 */
21165f94e909Ssg70180 		if (vswp->addr_set == VSW_ADDR_PROMISC)
21175f94e909Ssg70180 			(void) vsw_unset_hw_promisc(vswp, NULL, VSW_LOCALDEV);
21185f94e909Ssg70180 
21195f94e909Ssg70180 		vswp->addr_set = VSW_ADDR_HW;
21205f94e909Ssg70180 	}
21215f94e909Ssg70180 	RW_EXIT(&vswp->if_lockrw);
21225f94e909Ssg70180 
21235f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
2124e1ebb9ecSlm66018 	return (0);
2125e1ebb9ecSlm66018 }
2126e1ebb9ecSlm66018 
21275f94e909Ssg70180 /*
21285f94e909Ssg70180  * Scan the port list for any ports which have not yet been set
21295f94e909Ssg70180  * into HW. For those found attempt to program their mac addresses
21305f94e909Ssg70180  * into the physical device.
21315f94e909Ssg70180  *
21325f94e909Ssg70180  * Returns 0 if able to program all required ports (can be 0) into HW.
21335f94e909Ssg70180  * Returns 1 if failed to set at least one mac address.
21345f94e909Ssg70180  */
21355f94e909Ssg70180 static int
21365f94e909Ssg70180 vsw_prog_ports(vsw_t *vswp)
21375f94e909Ssg70180 {
21385f94e909Ssg70180 	mac_multi_addr_t	addr;
21395f94e909Ssg70180 	vsw_port_list_t		*plist = &vswp->plist;
21405f94e909Ssg70180 	vsw_port_t		*tp;
21415f94e909Ssg70180 	int			rv = 0;
21425f94e909Ssg70180 
21435f94e909Ssg70180 	D1(vswp, "%s: enter", __func__);
21445f94e909Ssg70180 
21455f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
21465f94e909Ssg70180 
21475f94e909Ssg70180 	READ_ENTER(&plist->lockrw);
21485f94e909Ssg70180 	for (tp = plist->head; tp != NULL; tp = tp->p_next) {
21495f94e909Ssg70180 		if (tp->addr_set != VSW_ADDR_HW) {
21505f94e909Ssg70180 			addr.mma_addrlen = ETHERADDRL;
21515f94e909Ssg70180 			ether_copy(&tp->p_macaddr, &addr.mma_addr);
21525f94e909Ssg70180 
21535f94e909Ssg70180 			if (vsw_set_hw_addr(vswp, &addr) != 0) {
21545f94e909Ssg70180 				rv = 1;
21555f94e909Ssg70180 				break;
21565f94e909Ssg70180 			}
21575f94e909Ssg70180 
21585f94e909Ssg70180 			tp->addr_slot = addr.mma_slot;
21595f94e909Ssg70180 
21605f94e909Ssg70180 			/*
21615f94e909Ssg70180 			 * If when this port had first attached we had
21625f94e909Ssg70180 			 * had to place the interface into promisc mode,
21635f94e909Ssg70180 			 * then now reverse that.
21645f94e909Ssg70180 			 *
21655f94e909Ssg70180 			 * Note that the interface will not actually
21665f94e909Ssg70180 			 * change to non-promisc mode until all ports
21675f94e909Ssg70180 			 * have been programmed.
21685f94e909Ssg70180 			 */
21695f94e909Ssg70180 			if (tp->addr_set == VSW_ADDR_PROMISC)
21705f94e909Ssg70180 				(void) vsw_unset_hw_promisc(vswp,
21715f94e909Ssg70180 				    tp, VSW_VNETPORT);
21725f94e909Ssg70180 
21735f94e909Ssg70180 			tp->addr_set = VSW_ADDR_HW;
21745f94e909Ssg70180 		}
21755f94e909Ssg70180 	}
21765f94e909Ssg70180 	RW_EXIT(&plist->lockrw);
21775f94e909Ssg70180 
21785f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
2179e1ebb9ecSlm66018 	return (rv);
21801ae08745Sheppo }
21811ae08745Sheppo 
21827636cb21Slm66018 static void
21837636cb21Slm66018 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp)
21847636cb21Slm66018 {
21857636cb21Slm66018 	ringp->ring_state = VSW_MAC_RING_FREE;
21867636cb21Slm66018 	ringp->ring_arg = NULL;
21877636cb21Slm66018 	ringp->ring_blank = NULL;
21887636cb21Slm66018 	ringp->ring_vqp = NULL;
21897636cb21Slm66018 	ringp->ring_vswp = vswp;
21907636cb21Slm66018 }
21917636cb21Slm66018 
21927636cb21Slm66018 static void
21937636cb21Slm66018 vsw_mac_ring_tbl_init(vsw_t *vswp)
21947636cb21Slm66018 {
21957636cb21Slm66018 	int		i;
21967636cb21Slm66018 
21977636cb21Slm66018 	mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL);
21987636cb21Slm66018 
21997636cb21Slm66018 	vswp->mac_ring_tbl_sz = vsw_mac_rx_rings;
22007636cb21Slm66018 	vswp->mac_ring_tbl  =
2201205eeb1aSlm66018 	    kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t), KM_SLEEP);
22027636cb21Slm66018 
22037636cb21Slm66018 	for (i = 0; i < vswp->mac_ring_tbl_sz; i++)
22047636cb21Slm66018 		vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]);
22057636cb21Slm66018 }
22067636cb21Slm66018 
22077636cb21Slm66018 static void
22087636cb21Slm66018 vsw_mac_ring_tbl_destroy(vsw_t *vswp)
22097636cb21Slm66018 {
22107636cb21Slm66018 	int		i;
221134683adeSsg70180 	vsw_mac_ring_t	*ringp;
22127636cb21Slm66018 
22137636cb21Slm66018 	mutex_enter(&vswp->mac_ring_lock);
22147636cb21Slm66018 	for (i = 0; i < vswp->mac_ring_tbl_sz; i++) {
221534683adeSsg70180 		ringp = &vswp->mac_ring_tbl[i];
221634683adeSsg70180 
221734683adeSsg70180 		if (ringp->ring_state != VSW_MAC_RING_FREE) {
22187636cb21Slm66018 			/*
22197636cb21Slm66018 			 * Destroy the queue.
22207636cb21Slm66018 			 */
222134683adeSsg70180 			vsw_queue_stop(ringp->ring_vqp);
222234683adeSsg70180 			vsw_queue_destroy(ringp->ring_vqp);
22237636cb21Slm66018 
22247636cb21Slm66018 			/*
22257636cb21Slm66018 			 * Re-initialize the structure.
22267636cb21Slm66018 			 */
222734683adeSsg70180 			vsw_mac_ring_tbl_entry_init(vswp, ringp);
22287636cb21Slm66018 		}
22297636cb21Slm66018 	}
22307636cb21Slm66018 	mutex_exit(&vswp->mac_ring_lock);
22317636cb21Slm66018 
22327636cb21Slm66018 	mutex_destroy(&vswp->mac_ring_lock);
22337636cb21Slm66018 	kmem_free(vswp->mac_ring_tbl,
22347636cb21Slm66018 	    vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t));
22357636cb21Slm66018 	vswp->mac_ring_tbl_sz = 0;
22367636cb21Slm66018 }
22377636cb21Slm66018 
22387636cb21Slm66018 /*
22397636cb21Slm66018  * Handle resource add callbacks from the driver below.
22407636cb21Slm66018  */
22417636cb21Slm66018 static mac_resource_handle_t
22427636cb21Slm66018 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp)
22437636cb21Slm66018 {
22447636cb21Slm66018 	vsw_t		*vswp = (vsw_t *)arg;
22457636cb21Slm66018 	mac_rx_fifo_t	*mrfp = (mac_rx_fifo_t *)mrp;
22467636cb21Slm66018 	vsw_mac_ring_t	*ringp;
22477636cb21Slm66018 	vsw_queue_t	*vqp;
22487636cb21Slm66018 	int		i;
22497636cb21Slm66018 
22507636cb21Slm66018 	ASSERT(vswp != NULL);
22517636cb21Slm66018 	ASSERT(mrp != NULL);
22527636cb21Slm66018 	ASSERT(vswp->mac_ring_tbl != NULL);
22537636cb21Slm66018 
22547636cb21Slm66018 	D1(vswp, "%s: enter", __func__);
22557636cb21Slm66018 
22567636cb21Slm66018 	/*
22577636cb21Slm66018 	 * Check to make sure we have the correct resource type.
22587636cb21Slm66018 	 */
22597636cb21Slm66018 	if (mrp->mr_type != MAC_RX_FIFO)
22607636cb21Slm66018 		return (NULL);
22617636cb21Slm66018 
22627636cb21Slm66018 	/*
22637636cb21Slm66018 	 * Find a open entry in the ring table.
22647636cb21Slm66018 	 */
22657636cb21Slm66018 	mutex_enter(&vswp->mac_ring_lock);
22667636cb21Slm66018 	for (i = 0; i < vswp->mac_ring_tbl_sz; i++) {
22677636cb21Slm66018 		ringp = &vswp->mac_ring_tbl[i];
22687636cb21Slm66018 
22697636cb21Slm66018 		/*
22707636cb21Slm66018 		 * Check for an empty slot, if found, then setup queue
22717636cb21Slm66018 		 * and thread.
22727636cb21Slm66018 		 */
22737636cb21Slm66018 		if (ringp->ring_state == VSW_MAC_RING_FREE) {
22747636cb21Slm66018 			/*
22757636cb21Slm66018 			 * Create the queue for this ring.
22767636cb21Slm66018 			 */
22777636cb21Slm66018 			vqp = vsw_queue_create();
22787636cb21Slm66018 
22797636cb21Slm66018 			/*
22807636cb21Slm66018 			 * Initialize the ring data structure.
22817636cb21Slm66018 			 */
22827636cb21Slm66018 			ringp->ring_vqp = vqp;
22837636cb21Slm66018 			ringp->ring_arg = mrfp->mrf_arg;
22847636cb21Slm66018 			ringp->ring_blank = mrfp->mrf_blank;
22857636cb21Slm66018 			ringp->ring_state = VSW_MAC_RING_INUSE;
22867636cb21Slm66018 
22877636cb21Slm66018 			/*
22887636cb21Slm66018 			 * Create the worker thread.
22897636cb21Slm66018 			 */
22907636cb21Slm66018 			vqp->vq_worker = thread_create(NULL, 0,
22917636cb21Slm66018 			    vsw_queue_worker, ringp, 0, &p0,
22927636cb21Slm66018 			    TS_RUN, minclsyspri);
22937636cb21Slm66018 			if (vqp->vq_worker == NULL) {
22947636cb21Slm66018 				vsw_queue_destroy(vqp);
22957636cb21Slm66018 				vsw_mac_ring_tbl_entry_init(vswp, ringp);
22967636cb21Slm66018 				ringp = NULL;
22977636cb21Slm66018 			}
22987636cb21Slm66018 
229934683adeSsg70180 			if (ringp != NULL) {
230034683adeSsg70180 				/*
230134683adeSsg70180 				 * Make sure thread get's running state for
230234683adeSsg70180 				 * this ring.
230334683adeSsg70180 				 */
230434683adeSsg70180 				mutex_enter(&vqp->vq_lock);
230534683adeSsg70180 				while ((vqp->vq_state != VSW_QUEUE_RUNNING) &&
230634683adeSsg70180 				    (vqp->vq_state != VSW_QUEUE_DRAINED)) {
230734683adeSsg70180 					cv_wait(&vqp->vq_cv, &vqp->vq_lock);
230834683adeSsg70180 				}
230934683adeSsg70180 
231034683adeSsg70180 				/*
231134683adeSsg70180 				 * If the thread is not running, cleanup.
231234683adeSsg70180 				 */
231334683adeSsg70180 				if (vqp->vq_state == VSW_QUEUE_DRAINED) {
231434683adeSsg70180 					vsw_queue_destroy(vqp);
231534683adeSsg70180 					vsw_mac_ring_tbl_entry_init(vswp,
231634683adeSsg70180 					    ringp);
231734683adeSsg70180 					ringp = NULL;
231834683adeSsg70180 				}
231934683adeSsg70180 				mutex_exit(&vqp->vq_lock);
232034683adeSsg70180 			}
232134683adeSsg70180 
23227636cb21Slm66018 			mutex_exit(&vswp->mac_ring_lock);
23237636cb21Slm66018 			D1(vswp, "%s: exit", __func__);
23247636cb21Slm66018 			return ((mac_resource_handle_t)ringp);
23257636cb21Slm66018 		}
23267636cb21Slm66018 	}
23277636cb21Slm66018 	mutex_exit(&vswp->mac_ring_lock);
23287636cb21Slm66018 
23297636cb21Slm66018 	/*
23307636cb21Slm66018 	 * No slots in the ring table available.
23317636cb21Slm66018 	 */
23327636cb21Slm66018 	D1(vswp, "%s: exit", __func__);
23337636cb21Slm66018 	return (NULL);
23347636cb21Slm66018 }
23357636cb21Slm66018 
23367636cb21Slm66018 static void
23377636cb21Slm66018 vsw_queue_stop(vsw_queue_t *vqp)
23387636cb21Slm66018 {
23397636cb21Slm66018 	mutex_enter(&vqp->vq_lock);
23407636cb21Slm66018 
23417636cb21Slm66018 	if (vqp->vq_state == VSW_QUEUE_RUNNING) {
23427636cb21Slm66018 		vqp->vq_state = VSW_QUEUE_STOP;
23437636cb21Slm66018 		cv_signal(&vqp->vq_cv);
23447636cb21Slm66018 
23457636cb21Slm66018 		while (vqp->vq_state != VSW_QUEUE_DRAINED)
23467636cb21Slm66018 			cv_wait(&vqp->vq_cv, &vqp->vq_lock);
23477636cb21Slm66018 	}
23487636cb21Slm66018 
234934683adeSsg70180 	vqp->vq_state = VSW_QUEUE_STOPPED;
235034683adeSsg70180 
23517636cb21Slm66018 	mutex_exit(&vqp->vq_lock);
23527636cb21Slm66018 }
23537636cb21Slm66018 
23547636cb21Slm66018 static vsw_queue_t *
23557636cb21Slm66018 vsw_queue_create()
23567636cb21Slm66018 {
23577636cb21Slm66018 	vsw_queue_t *vqp;
23587636cb21Slm66018 
23597636cb21Slm66018 	vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP);
23607636cb21Slm66018 
23617636cb21Slm66018 	mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL);
23627636cb21Slm66018 	cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL);
23637636cb21Slm66018 	vqp->vq_first = NULL;
23647636cb21Slm66018 	vqp->vq_last = NULL;
236534683adeSsg70180 	vqp->vq_state = VSW_QUEUE_STOPPED;
23667636cb21Slm66018 
23677636cb21Slm66018 	return (vqp);
23687636cb21Slm66018 }
23697636cb21Slm66018 
23707636cb21Slm66018 static void
23717636cb21Slm66018 vsw_queue_destroy(vsw_queue_t *vqp)
23727636cb21Slm66018 {
23737636cb21Slm66018 	cv_destroy(&vqp->vq_cv);
23747636cb21Slm66018 	mutex_destroy(&vqp->vq_lock);
23757636cb21Slm66018 	kmem_free(vqp, sizeof (vsw_queue_t));
23767636cb21Slm66018 }
23777636cb21Slm66018 
23787636cb21Slm66018 static void
23797636cb21Slm66018 vsw_queue_worker(vsw_mac_ring_t *rrp)
23807636cb21Slm66018 {
23817636cb21Slm66018 	mblk_t		*mp;
23827636cb21Slm66018 	vsw_queue_t	*vqp = rrp->ring_vqp;
23837636cb21Slm66018 	vsw_t		*vswp = rrp->ring_vswp;
23847636cb21Slm66018 
23857636cb21Slm66018 	mutex_enter(&vqp->vq_lock);
23867636cb21Slm66018 
238734683adeSsg70180 	ASSERT(vqp->vq_state == VSW_QUEUE_STOPPED);
23887636cb21Slm66018 
23897636cb21Slm66018 	/*
23907636cb21Slm66018 	 * Set the state to running, since the thread is now active.
23917636cb21Slm66018 	 */
23927636cb21Slm66018 	vqp->vq_state = VSW_QUEUE_RUNNING;
239334683adeSsg70180 	cv_signal(&vqp->vq_cv);
23947636cb21Slm66018 
23957636cb21Slm66018 	while (vqp->vq_state == VSW_QUEUE_RUNNING) {
23967636cb21Slm66018 		/*
23977636cb21Slm66018 		 * Wait for work to do or the state has changed
23987636cb21Slm66018 		 * to not running.
23997636cb21Slm66018 		 */
24007636cb21Slm66018 		while ((vqp->vq_state == VSW_QUEUE_RUNNING) &&
24017636cb21Slm66018 		    (vqp->vq_first == NULL)) {
24027636cb21Slm66018 			cv_wait(&vqp->vq_cv, &vqp->vq_lock);
24037636cb21Slm66018 		}
24047636cb21Slm66018 
24057636cb21Slm66018 		/*
24067636cb21Slm66018 		 * Process packets that we received from the interface.
24077636cb21Slm66018 		 */
24087636cb21Slm66018 		if (vqp->vq_first != NULL) {
24097636cb21Slm66018 			mp = vqp->vq_first;
24107636cb21Slm66018 
24117636cb21Slm66018 			vqp->vq_first = NULL;
24127636cb21Slm66018 			vqp->vq_last = NULL;
24137636cb21Slm66018 
24147636cb21Slm66018 			mutex_exit(&vqp->vq_lock);
24157636cb21Slm66018 
24167636cb21Slm66018 			/* switch the chain of packets received */
241734683adeSsg70180 			vswp->vsw_switch_frame(vswp, mp,
241834683adeSsg70180 			    VSW_PHYSDEV, NULL, NULL);
24197636cb21Slm66018 
24207636cb21Slm66018 			mutex_enter(&vqp->vq_lock);
24217636cb21Slm66018 		}
24227636cb21Slm66018 	}
24237636cb21Slm66018 
24247636cb21Slm66018 	/*
24257636cb21Slm66018 	 * We are drained and signal we are done.
24267636cb21Slm66018 	 */
24277636cb21Slm66018 	vqp->vq_state = VSW_QUEUE_DRAINED;
24287636cb21Slm66018 	cv_signal(&vqp->vq_cv);
24297636cb21Slm66018 
24307636cb21Slm66018 	/*
24317636cb21Slm66018 	 * Exit lock and drain the remaining packets.
24327636cb21Slm66018 	 */
24337636cb21Slm66018 	mutex_exit(&vqp->vq_lock);
24347636cb21Slm66018 
24357636cb21Slm66018 	/*
24367636cb21Slm66018 	 * Exit the thread
24377636cb21Slm66018 	 */
24387636cb21Slm66018 	thread_exit();
24397636cb21Slm66018 }
24407636cb21Slm66018 
24417636cb21Slm66018 /*
24427636cb21Slm66018  * static void
24437636cb21Slm66018  * vsw_rx_queue_cb() - Receive callback routine when
24447636cb21Slm66018  *	vsw_multi_ring_enable is non-zero.  Queue the packets
24457636cb21Slm66018  *	to a packet queue for a worker thread to process.
24467636cb21Slm66018  */
24477636cb21Slm66018 static void
24487636cb21Slm66018 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
24497636cb21Slm66018 {
24507636cb21Slm66018 	vsw_mac_ring_t	*ringp = (vsw_mac_ring_t *)mrh;
24517636cb21Slm66018 	vsw_t		*vswp = (vsw_t *)arg;
24527636cb21Slm66018 	vsw_queue_t	*vqp;
24537636cb21Slm66018 	mblk_t		*bp, *last;
24547636cb21Slm66018 
24557636cb21Slm66018 	ASSERT(mrh != NULL);
24567636cb21Slm66018 	ASSERT(vswp != NULL);
24577636cb21Slm66018 	ASSERT(mp != NULL);
24587636cb21Slm66018 
24597636cb21Slm66018 	D1(vswp, "%s: enter", __func__);
24607636cb21Slm66018 
24617636cb21Slm66018 	/*
24627636cb21Slm66018 	 * Find the last element in the mblk chain.
24637636cb21Slm66018 	 */
24647636cb21Slm66018 	bp = mp;
24657636cb21Slm66018 	do {
24667636cb21Slm66018 		last = bp;
24677636cb21Slm66018 		bp = bp->b_next;
24687636cb21Slm66018 	} while (bp != NULL);
24697636cb21Slm66018 
24707636cb21Slm66018 	/* Get the queue for the packets */
24717636cb21Slm66018 	vqp = ringp->ring_vqp;
24727636cb21Slm66018 
24737636cb21Slm66018 	/*
24747636cb21Slm66018 	 * Grab the lock such we can queue the packets.
24757636cb21Slm66018 	 */
24767636cb21Slm66018 	mutex_enter(&vqp->vq_lock);
24777636cb21Slm66018 
24787636cb21Slm66018 	if (vqp->vq_state != VSW_QUEUE_RUNNING) {
24797636cb21Slm66018 		freemsg(mp);
248034683adeSsg70180 		mutex_exit(&vqp->vq_lock);
24817636cb21Slm66018 		goto vsw_rx_queue_cb_exit;
24827636cb21Slm66018 	}
24837636cb21Slm66018 
24847636cb21Slm66018 	/*
24857636cb21Slm66018 	 * Add the mblk chain to the queue.  If there
24867636cb21Slm66018 	 * is some mblks in the queue, then add the new
24877636cb21Slm66018 	 * chain to the end.
24887636cb21Slm66018 	 */
24897636cb21Slm66018 	if (vqp->vq_first == NULL)
24907636cb21Slm66018 		vqp->vq_first = mp;
24917636cb21Slm66018 	else
24927636cb21Slm66018 		vqp->vq_last->b_next = mp;
24937636cb21Slm66018 
24947636cb21Slm66018 	vqp->vq_last = last;
24957636cb21Slm66018 
24967636cb21Slm66018 	/*
24977636cb21Slm66018 	 * Signal the worker thread that there is work to
24987636cb21Slm66018 	 * do.
24997636cb21Slm66018 	 */
25007636cb21Slm66018 	cv_signal(&vqp->vq_cv);
25017636cb21Slm66018 
25027636cb21Slm66018 	/*
25037636cb21Slm66018 	 * Let go of the lock and exit.
25047636cb21Slm66018 	 */
25057636cb21Slm66018 	mutex_exit(&vqp->vq_lock);
250634683adeSsg70180 
250734683adeSsg70180 vsw_rx_queue_cb_exit:
25087636cb21Slm66018 	D1(vswp, "%s: exit", __func__);
25097636cb21Slm66018 }
25107636cb21Slm66018 
25111ae08745Sheppo /*
25121ae08745Sheppo  * receive callback routine. Invoked by MAC layer when there
25131ae08745Sheppo  * are pkts being passed up from physical device.
25141ae08745Sheppo  *
25151ae08745Sheppo  * PERF: It may be more efficient when the card is in promisc
25161ae08745Sheppo  * mode to check the dest address of the pkts here (against
25171ae08745Sheppo  * the FDB) rather than checking later. Needs to be investigated.
25181ae08745Sheppo  */
25191ae08745Sheppo static void
25201ae08745Sheppo vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
25211ae08745Sheppo {
25221ae08745Sheppo 	_NOTE(ARGUNUSED(mrh))
25231ae08745Sheppo 
25241ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
25251ae08745Sheppo 
25261ae08745Sheppo 	ASSERT(vswp != NULL);
25271ae08745Sheppo 
25281ae08745Sheppo 	D1(vswp, "vsw_rx_cb: enter");
25291ae08745Sheppo 
25301ae08745Sheppo 	/* switch the chain of packets received */
253134683adeSsg70180 	vswp->vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL);
25321ae08745Sheppo 
25331ae08745Sheppo 	D1(vswp, "vsw_rx_cb: exit");
25341ae08745Sheppo }
25351ae08745Sheppo 
25361ae08745Sheppo /*
25371ae08745Sheppo  * Send a message out over the physical device via the MAC layer.
25381ae08745Sheppo  *
25391ae08745Sheppo  * Returns any mblks that it was unable to transmit.
25401ae08745Sheppo  */
25411ae08745Sheppo static mblk_t *
25421ae08745Sheppo vsw_tx_msg(vsw_t *vswp, mblk_t *mp)
25431ae08745Sheppo {
25441ae08745Sheppo 	const mac_txinfo_t	*mtp;
25451ae08745Sheppo 	mblk_t			*nextp;
25461ae08745Sheppo 
254734683adeSsg70180 	mutex_enter(&vswp->mac_lock);
2548*19b65a69Ssb155480 	if ((vswp->mh == NULL) || (vswp->mstarted == B_FALSE)) {
2549*19b65a69Ssb155480 
25501ae08745Sheppo 		DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail");
255134683adeSsg70180 		mutex_exit(&vswp->mac_lock);
25521ae08745Sheppo 		return (mp);
25531ae08745Sheppo 	} else {
25541ae08745Sheppo 		for (;;) {
25551ae08745Sheppo 			nextp = mp->b_next;
25561ae08745Sheppo 			mp->b_next = NULL;
25571ae08745Sheppo 
25581ae08745Sheppo 			mtp = vswp->txinfo;
255934683adeSsg70180 
25601ae08745Sheppo 			if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) {
25611ae08745Sheppo 				mp->b_next = nextp;
25621ae08745Sheppo 				break;
25631ae08745Sheppo 			}
25641ae08745Sheppo 
25651ae08745Sheppo 			if ((mp = nextp) == NULL)
25661ae08745Sheppo 				break;
25671ae08745Sheppo 		}
25681ae08745Sheppo 	}
256934683adeSsg70180 	mutex_exit(&vswp->mac_lock);
25701ae08745Sheppo 
25711ae08745Sheppo 	return (mp);
25721ae08745Sheppo }
25731ae08745Sheppo 
25741ae08745Sheppo /*
25751ae08745Sheppo  * Register with the MAC layer as a network device, so we
25761ae08745Sheppo  * can be plumbed if necessary.
25771ae08745Sheppo  */
25781ae08745Sheppo static int
25791ae08745Sheppo vsw_mac_register(vsw_t *vswp)
25801ae08745Sheppo {
2581ba2e4443Sseb 	mac_register_t	*macp;
2582ba2e4443Sseb 	int		rv;
25831ae08745Sheppo 
25841ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
25851ae08745Sheppo 
2586ba2e4443Sseb 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
2587ba2e4443Sseb 		return (EINVAL);
2588ba2e4443Sseb 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
25891ae08745Sheppo 	macp->m_driver = vswp;
2590ba2e4443Sseb 	macp->m_dip = vswp->dip;
2591ba2e4443Sseb 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
2592ba2e4443Sseb 	macp->m_callbacks = &vsw_m_callbacks;
2593ba2e4443Sseb 	macp->m_min_sdu = 0;
2594ba2e4443Sseb 	macp->m_max_sdu = ETHERMTU;
2595ba2e4443Sseb 	rv = mac_register(macp, &vswp->if_mh);
2596ba2e4443Sseb 	mac_free(macp);
2597*19b65a69Ssb155480 	if (rv != 0) {
2598*19b65a69Ssb155480 		/*
2599*19b65a69Ssb155480 		 * Treat this as a non-fatal error as we may be
2600*19b65a69Ssb155480 		 * able to operate in some other mode.
2601*19b65a69Ssb155480 		 */
2602*19b65a69Ssb155480 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
2603*19b65a69Ssb155480 		    "a provider with MAC layer", vswp->instance);
2604*19b65a69Ssb155480 		return (rv);
2605*19b65a69Ssb155480 	}
2606*19b65a69Ssb155480 
2607ba2e4443Sseb 	vswp->if_state |= VSW_IF_REG;
26081ae08745Sheppo 
26091ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
26101ae08745Sheppo 
26111ae08745Sheppo 	return (rv);
26121ae08745Sheppo }
26131ae08745Sheppo 
26141ae08745Sheppo static int
26151ae08745Sheppo vsw_mac_unregister(vsw_t *vswp)
26161ae08745Sheppo {
26171ae08745Sheppo 	int		rv = 0;
26181ae08745Sheppo 
26191ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
26201ae08745Sheppo 
26211ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
26221ae08745Sheppo 
2623ba2e4443Sseb 	if (vswp->if_state & VSW_IF_REG) {
2624ba2e4443Sseb 		rv = mac_unregister(vswp->if_mh);
26251ae08745Sheppo 		if (rv != 0) {
26261ae08745Sheppo 			DWARN(vswp, "%s: unable to unregister from MAC "
26271ae08745Sheppo 			    "framework", __func__);
26281ae08745Sheppo 
26291ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
26301ae08745Sheppo 			D1(vswp, "%s: fail exit", __func__);
26311ae08745Sheppo 			return (rv);
26321ae08745Sheppo 		}
26331ae08745Sheppo 
2634ba2e4443Sseb 		/* mark i/f as down and unregistered */
2635ba2e4443Sseb 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
26361ae08745Sheppo 	}
26371ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
26381ae08745Sheppo 
26391ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
26401ae08745Sheppo 
26411ae08745Sheppo 	return (rv);
26421ae08745Sheppo }
26431ae08745Sheppo 
2644ba2e4443Sseb static int
2645ba2e4443Sseb vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
26461ae08745Sheppo {
26471ae08745Sheppo 	vsw_t			*vswp = (vsw_t *)arg;
26481ae08745Sheppo 
26491ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
26501ae08745Sheppo 
265134683adeSsg70180 	mutex_enter(&vswp->mac_lock);
265234683adeSsg70180 	if (vswp->mh == NULL) {
265334683adeSsg70180 		mutex_exit(&vswp->mac_lock);
2654ba2e4443Sseb 		return (EINVAL);
265534683adeSsg70180 	}
26561ae08745Sheppo 
26571ae08745Sheppo 	/* return stats from underlying device */
2658ba2e4443Sseb 	*val = mac_stat_get(vswp->mh, stat);
265934683adeSsg70180 
266034683adeSsg70180 	mutex_exit(&vswp->mac_lock);
266134683adeSsg70180 
2662ba2e4443Sseb 	return (0);
26631ae08745Sheppo }
26641ae08745Sheppo 
26651ae08745Sheppo static void
26661ae08745Sheppo vsw_m_stop(void *arg)
26671ae08745Sheppo {
26681ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
26691ae08745Sheppo 
26701ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
26711ae08745Sheppo 
26721ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
26731ae08745Sheppo 	vswp->if_state &= ~VSW_IF_UP;
26741ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
26751ae08745Sheppo 
26765f94e909Ssg70180 	mutex_enter(&vswp->hw_lock);
26775f94e909Ssg70180 
26785f94e909Ssg70180 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
26795f94e909Ssg70180 
26805f94e909Ssg70180 	if (vswp->recfg_reqd)
26815f94e909Ssg70180 		vsw_reconfig_hw(vswp);
26825f94e909Ssg70180 
26835f94e909Ssg70180 	mutex_exit(&vswp->hw_lock);
26845f94e909Ssg70180 
26851ae08745Sheppo 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
26861ae08745Sheppo }
26871ae08745Sheppo 
26881ae08745Sheppo static int
26891ae08745Sheppo vsw_m_start(void *arg)
26901ae08745Sheppo {
26911ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
26921ae08745Sheppo 
26931ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
26941ae08745Sheppo 
26951ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
26961ae08745Sheppo 
2697*19b65a69Ssb155480 	vswp->if_state |= VSW_IF_UP;
2698*19b65a69Ssb155480 
2699*19b65a69Ssb155480 	if (vswp->switching_setup_done == B_FALSE) {
2700*19b65a69Ssb155480 		/*
2701*19b65a69Ssb155480 		 * If the switching mode has not been setup yet, just
2702*19b65a69Ssb155480 		 * return. The unicast address will be programmed
2703*19b65a69Ssb155480 		 * after the physical device is successfully setup by the
2704*19b65a69Ssb155480 		 * timeout handler.
2705*19b65a69Ssb155480 		 */
2706*19b65a69Ssb155480 		RW_EXIT(&vswp->if_lockrw);
2707*19b65a69Ssb155480 		return (0);
2708*19b65a69Ssb155480 	}
2709*19b65a69Ssb155480 
2710*19b65a69Ssb155480 	/* if in layer2 mode, program unicast address. */
2711*19b65a69Ssb155480 	if (vswp->mh != NULL) {
27125f94e909Ssg70180 		mutex_enter(&vswp->hw_lock);
27135f94e909Ssg70180 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
27145f94e909Ssg70180 		mutex_exit(&vswp->hw_lock);
2715*19b65a69Ssb155480 	}
2716*19b65a69Ssb155480 
2717*19b65a69Ssb155480 	RW_EXIT(&vswp->if_lockrw);
27185f94e909Ssg70180 
27191ae08745Sheppo 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
27201ae08745Sheppo 	return (0);
27211ae08745Sheppo }
27221ae08745Sheppo 
27231ae08745Sheppo /*
27241ae08745Sheppo  * Change the local interface address.
27255f94e909Ssg70180  *
27265f94e909Ssg70180  * Note: we don't support this entry point. The local
27275f94e909Ssg70180  * mac address of the switch can only be changed via its
27285f94e909Ssg70180  * MD node properties.
27291ae08745Sheppo  */
27301ae08745Sheppo static int
27311ae08745Sheppo vsw_m_unicst(void *arg, const uint8_t *macaddr)
27321ae08745Sheppo {
27335f94e909Ssg70180 	_NOTE(ARGUNUSED(arg, macaddr))
27341ae08745Sheppo 
27355f94e909Ssg70180 	return (DDI_FAILURE);
27361ae08745Sheppo }
27371ae08745Sheppo 
27381ae08745Sheppo static int
27391ae08745Sheppo vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
27401ae08745Sheppo {
27411ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
27421ae08745Sheppo 	mcst_addr_t	*mcst_p = NULL;
27431ae08745Sheppo 	uint64_t	addr = 0x0;
2744e1ebb9ecSlm66018 	int		i, ret = 0;
27451ae08745Sheppo 
27461ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
27471ae08745Sheppo 
27481ae08745Sheppo 	/*
27491ae08745Sheppo 	 * Convert address into form that can be used
27501ae08745Sheppo 	 * as hash table key.
27511ae08745Sheppo 	 */
27521ae08745Sheppo 	for (i = 0; i < ETHERADDRL; i++) {
27531ae08745Sheppo 		addr = (addr << 8) | mca[i];
27541ae08745Sheppo 	}
27551ae08745Sheppo 
27561ae08745Sheppo 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
27571ae08745Sheppo 
27581ae08745Sheppo 	if (add) {
27591ae08745Sheppo 		D2(vswp, "%s: adding multicast", __func__);
27601ae08745Sheppo 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
27611ae08745Sheppo 			/*
27621ae08745Sheppo 			 * Update the list of multicast addresses
27631ae08745Sheppo 			 * contained within the vsw_t structure to
27641ae08745Sheppo 			 * include this new one.
27651ae08745Sheppo 			 */
27661ae08745Sheppo 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
27671ae08745Sheppo 			if (mcst_p == NULL) {
27681ae08745Sheppo 				DERR(vswp, "%s unable to alloc mem", __func__);
2769*19b65a69Ssb155480 				(void) vsw_del_mcst(vswp,
2770*19b65a69Ssb155480 				    VSW_LOCALDEV, addr, NULL);
27711ae08745Sheppo 				return (1);
27721ae08745Sheppo 			}
27731ae08745Sheppo 			mcst_p->addr = addr;
2774*19b65a69Ssb155480 			ether_copy(mca, &mcst_p->mca);
27751ae08745Sheppo 
27761ae08745Sheppo 			/*
27771ae08745Sheppo 			 * Call into the underlying driver to program the
27781ae08745Sheppo 			 * address into HW.
27791ae08745Sheppo 			 */
278034683adeSsg70180 			mutex_enter(&vswp->mac_lock);
2781e1ebb9ecSlm66018 			if (vswp->mh != NULL) {
2782e1ebb9ecSlm66018 				ret = mac_multicst_add(vswp->mh, mca);
2783e1ebb9ecSlm66018 				if (ret != 0) {
278434683adeSsg70180 					cmn_err(CE_WARN, "!vsw%d: unable to "
278534683adeSsg70180 					    "add multicast address",
278634683adeSsg70180 					    vswp->instance);
278734683adeSsg70180 					mutex_exit(&vswp->mac_lock);
2788*19b65a69Ssb155480 					(void) vsw_del_mcst(vswp,
2789*19b65a69Ssb155480 					    VSW_LOCALDEV, addr, NULL);
2790*19b65a69Ssb155480 					kmem_free(mcst_p, sizeof (*mcst_p));
2791*19b65a69Ssb155480 					return (ret);
2792e1ebb9ecSlm66018 				}
2793*19b65a69Ssb155480 				mcst_p->mac_added = B_TRUE;
27941ae08745Sheppo 			}
279534683adeSsg70180 			mutex_exit(&vswp->mac_lock);
2796*19b65a69Ssb155480 
2797*19b65a69Ssb155480 			mutex_enter(&vswp->mca_lock);
2798*19b65a69Ssb155480 			mcst_p->nextp = vswp->mcap;
2799*19b65a69Ssb155480 			vswp->mcap = mcst_p;
2800*19b65a69Ssb155480 			mutex_exit(&vswp->mca_lock);
28011ae08745Sheppo 		} else {
280234683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
280334683adeSsg70180 			    "address", vswp->instance);
2804e1ebb9ecSlm66018 		}
2805e1ebb9ecSlm66018 		return (ret);
2806e1ebb9ecSlm66018 	}
2807e1ebb9ecSlm66018 
28081ae08745Sheppo 	D2(vswp, "%s: removing multicast", __func__);
28091ae08745Sheppo 	/*
28101ae08745Sheppo 	 * Remove the address from the hash table..
28111ae08745Sheppo 	 */
28121ae08745Sheppo 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
28131ae08745Sheppo 
28141ae08745Sheppo 		/*
28151ae08745Sheppo 		 * ..and then from the list maintained in the
28161ae08745Sheppo 		 * vsw_t structure.
28171ae08745Sheppo 		 */
2818*19b65a69Ssb155480 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
2819*19b65a69Ssb155480 		ASSERT(mcst_p != NULL);
28201ae08745Sheppo 
282134683adeSsg70180 		mutex_enter(&vswp->mac_lock);
2822*19b65a69Ssb155480 		if (vswp->mh != NULL && mcst_p->mac_added) {
28231ae08745Sheppo 			(void) mac_multicst_remove(vswp->mh, mca);
2824*19b65a69Ssb155480 			mcst_p->mac_added = B_FALSE;
2825*19b65a69Ssb155480 		}
282634683adeSsg70180 		mutex_exit(&vswp->mac_lock);
2827*19b65a69Ssb155480 		kmem_free(mcst_p, sizeof (*mcst_p));
28281ae08745Sheppo 	}
28291ae08745Sheppo 
28301ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
28311ae08745Sheppo 
28321ae08745Sheppo 	return (0);
28331ae08745Sheppo }
28341ae08745Sheppo 
28351ae08745Sheppo static int
28361ae08745Sheppo vsw_m_promisc(void *arg, boolean_t on)
28371ae08745Sheppo {
28381ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
28391ae08745Sheppo 
28401ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
28411ae08745Sheppo 
28421ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
28431ae08745Sheppo 	if (on)
28441ae08745Sheppo 		vswp->if_state |= VSW_IF_PROMISC;
28451ae08745Sheppo 	else
28461ae08745Sheppo 		vswp->if_state &= ~VSW_IF_PROMISC;
28471ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
28481ae08745Sheppo 
28491ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
28501ae08745Sheppo 
28511ae08745Sheppo 	return (0);
28521ae08745Sheppo }
28531ae08745Sheppo 
28541ae08745Sheppo static mblk_t *
28551ae08745Sheppo vsw_m_tx(void *arg, mblk_t *mp)
28561ae08745Sheppo {
28571ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
28581ae08745Sheppo 
28591ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
28601ae08745Sheppo 
286134683adeSsg70180 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
28621ae08745Sheppo 
28631ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
28641ae08745Sheppo 
28651ae08745Sheppo 	return (NULL);
28661ae08745Sheppo }
28671ae08745Sheppo 
28681ae08745Sheppo /*
28691ae08745Sheppo  * Register for machine description (MD) updates.
287034683adeSsg70180  *
287134683adeSsg70180  * Returns 0 on success, 1 on failure.
28721ae08745Sheppo  */
287334683adeSsg70180 static int
28741ae08745Sheppo vsw_mdeg_register(vsw_t *vswp)
28751ae08745Sheppo {
28761ae08745Sheppo 	mdeg_prop_spec_t	*pspecp;
28771ae08745Sheppo 	mdeg_node_spec_t	*inst_specp;
287834683adeSsg70180 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
28791ae08745Sheppo 	size_t			templatesz;
2880*19b65a69Ssb155480 	int			rv;
28811ae08745Sheppo 
28821ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
28831ae08745Sheppo 
288434683adeSsg70180 	/*
28851ae08745Sheppo 	 * Allocate and initialize a per-instance copy
28861ae08745Sheppo 	 * of the global property spec array that will
28871ae08745Sheppo 	 * uniquely identify this vsw instance.
28881ae08745Sheppo 	 */
28891ae08745Sheppo 	templatesz = sizeof (vsw_prop_template);
28901ae08745Sheppo 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
28911ae08745Sheppo 
28921ae08745Sheppo 	bcopy(vsw_prop_template, pspecp, templatesz);
28931ae08745Sheppo 
2894*19b65a69Ssb155480 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
28951ae08745Sheppo 
28961ae08745Sheppo 	/* initialize the complete prop spec structure */
28971ae08745Sheppo 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
28981ae08745Sheppo 	inst_specp->namep = "virtual-device";
28991ae08745Sheppo 	inst_specp->specp = pspecp;
29001ae08745Sheppo 
2901*19b65a69Ssb155480 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
2902*19b65a69Ssb155480 	    vswp->regprop);
290334683adeSsg70180 	/*
290434683adeSsg70180 	 * Register an interest in 'virtual-device' nodes with a
290534683adeSsg70180 	 * 'name' property of 'virtual-network-switch'
290634683adeSsg70180 	 */
290734683adeSsg70180 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
29081ae08745Sheppo 	    (void *)vswp, &mdeg_hdl);
290934683adeSsg70180 	if (rv != MDEG_SUCCESS) {
291034683adeSsg70180 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
291134683adeSsg70180 		    __func__, rv);
291234683adeSsg70180 		goto mdeg_reg_fail;
291334683adeSsg70180 	}
29141ae08745Sheppo 
291534683adeSsg70180 	/*
291634683adeSsg70180 	 * Register an interest in 'vsw-port' nodes.
291734683adeSsg70180 	 */
291834683adeSsg70180 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
291934683adeSsg70180 	    (void *)vswp, &mdeg_port_hdl);
29201ae08745Sheppo 	if (rv != MDEG_SUCCESS) {
29211ae08745Sheppo 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
292234683adeSsg70180 		(void) mdeg_unregister(mdeg_hdl);
292334683adeSsg70180 		goto mdeg_reg_fail;
29241ae08745Sheppo 	}
29251ae08745Sheppo 
29261ae08745Sheppo 	/* save off data that will be needed later */
29271ae08745Sheppo 	vswp->inst_spec = inst_specp;
29281ae08745Sheppo 	vswp->mdeg_hdl = mdeg_hdl;
292934683adeSsg70180 	vswp->mdeg_port_hdl = mdeg_port_hdl;
29301ae08745Sheppo 
29311ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
293234683adeSsg70180 	return (0);
293334683adeSsg70180 
293434683adeSsg70180 mdeg_reg_fail:
293534683adeSsg70180 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
293634683adeSsg70180 	    vswp->instance);
293734683adeSsg70180 	kmem_free(pspecp, templatesz);
293834683adeSsg70180 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
293934683adeSsg70180 
294034683adeSsg70180 	vswp->mdeg_hdl = NULL;
294134683adeSsg70180 	vswp->mdeg_port_hdl = NULL;
294234683adeSsg70180 
294334683adeSsg70180 	return (1);
29441ae08745Sheppo }
29451ae08745Sheppo 
29461ae08745Sheppo static void
29471ae08745Sheppo vsw_mdeg_unregister(vsw_t *vswp)
29481ae08745Sheppo {
29491ae08745Sheppo 	D1(vswp, "vsw_mdeg_unregister: enter");
29501ae08745Sheppo 
295134683adeSsg70180 	if (vswp->mdeg_hdl != NULL)
29521ae08745Sheppo 		(void) mdeg_unregister(vswp->mdeg_hdl);
29531ae08745Sheppo 
295434683adeSsg70180 	if (vswp->mdeg_port_hdl != NULL)
295534683adeSsg70180 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
295634683adeSsg70180 
295734683adeSsg70180 	if (vswp->inst_spec != NULL) {
29581ae08745Sheppo 		if (vswp->inst_spec->specp != NULL) {
29591ae08745Sheppo 			(void) kmem_free(vswp->inst_spec->specp,
29601ae08745Sheppo 			    sizeof (vsw_prop_template));
29611ae08745Sheppo 			vswp->inst_spec->specp = NULL;
29621ae08745Sheppo 		}
29631ae08745Sheppo 
2964205eeb1aSlm66018 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
29651ae08745Sheppo 		vswp->inst_spec = NULL;
29661ae08745Sheppo 	}
29671ae08745Sheppo 
29681ae08745Sheppo 	D1(vswp, "vsw_mdeg_unregister: exit");
29691ae08745Sheppo }
29701ae08745Sheppo 
297134683adeSsg70180 /*
297234683adeSsg70180  * Mdeg callback invoked for the vsw node itself.
297334683adeSsg70180  */
29741ae08745Sheppo static int
29751ae08745Sheppo vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
29761ae08745Sheppo {
29771ae08745Sheppo 	vsw_t		*vswp;
29781ae08745Sheppo 	md_t		*mdp;
29791ae08745Sheppo 	mde_cookie_t	node;
29801ae08745Sheppo 	uint64_t	inst;
298134683adeSsg70180 	char		*node_name = NULL;
29821ae08745Sheppo 
29831ae08745Sheppo 	if (resp == NULL)
29841ae08745Sheppo 		return (MDEG_FAILURE);
29851ae08745Sheppo 
29861ae08745Sheppo 	vswp = (vsw_t *)cb_argp;
29871ae08745Sheppo 
298834683adeSsg70180 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
298934683adeSsg70180 	    " : prev matched %d", __func__, resp->added.nelem,
299034683adeSsg70180 	    resp->removed.nelem, resp->match_curr.nelem,
299134683adeSsg70180 	    resp->match_prev.nelem);
299234683adeSsg70180 
299334683adeSsg70180 	/*
2994*19b65a69Ssb155480 	 * We get an initial callback for this node as 'added'
2995*19b65a69Ssb155480 	 * after registering with mdeg. Note that we would have
2996*19b65a69Ssb155480 	 * already gathered information about this vsw node by
2997*19b65a69Ssb155480 	 * walking MD earlier during attach (in vsw_read_mdprops()).
2998*19b65a69Ssb155480 	 * So, there is a window where the properties of this
2999*19b65a69Ssb155480 	 * node might have changed when we get this initial 'added'
3000*19b65a69Ssb155480 	 * callback. We handle this as if an update occured
3001*19b65a69Ssb155480 	 * and invoke the same function which handles updates to
3002*19b65a69Ssb155480 	 * the properties of this vsw-node if any.
3003*19b65a69Ssb155480 	 *
300434683adeSsg70180 	 * A non-zero 'match' value indicates that the MD has been
3005*19b65a69Ssb155480 	 * updated and that a virtual-network-switch node is
3006*19b65a69Ssb155480 	 * present which may or may not have been updated. It is
3007*19b65a69Ssb155480 	 * up to the clients to examine their own nodes and
3008*19b65a69Ssb155480 	 * determine if they have changed.
300934683adeSsg70180 	 */
3010*19b65a69Ssb155480 	if (resp->added.nelem != 0) {
301134683adeSsg70180 
3012*19b65a69Ssb155480 		if (resp->added.nelem != 1) {
3013*19b65a69Ssb155480 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
3014*19b65a69Ssb155480 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
3015*19b65a69Ssb155480 			return (MDEG_FAILURE);
3016*19b65a69Ssb155480 		}
3017*19b65a69Ssb155480 
3018*19b65a69Ssb155480 		mdp = resp->added.mdp;
3019*19b65a69Ssb155480 		node = resp->added.mdep[0];
3020*19b65a69Ssb155480 
3021*19b65a69Ssb155480 	} else if (resp->match_curr.nelem != 0) {
3022*19b65a69Ssb155480 
3023*19b65a69Ssb155480 		if (resp->match_curr.nelem != 1) {
3024*19b65a69Ssb155480 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
3025*19b65a69Ssb155480 			    "invalid: %d\n", vswp->instance,
3026*19b65a69Ssb155480 			    resp->match_curr.nelem);
3027*19b65a69Ssb155480 			return (MDEG_FAILURE);
3028*19b65a69Ssb155480 		}
3029*19b65a69Ssb155480 
3030*19b65a69Ssb155480 		mdp = resp->match_curr.mdp;
3031*19b65a69Ssb155480 		node = resp->match_curr.mdep[0];
3032*19b65a69Ssb155480 
3033*19b65a69Ssb155480 	} else {
3034*19b65a69Ssb155480 		return (MDEG_FAILURE);
3035*19b65a69Ssb155480 	}
3036*19b65a69Ssb155480 
3037*19b65a69Ssb155480 	/* Validate name and instance */
303834683adeSsg70180 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
3039*19b65a69Ssb155480 		DERR(vswp, "%s: unable to get node name\n",  __func__);
3040*19b65a69Ssb155480 		return (MDEG_FAILURE);
3041*19b65a69Ssb155480 	}
3042*19b65a69Ssb155480 
3043*19b65a69Ssb155480 	/* is this a virtual-network-switch? */
3044*19b65a69Ssb155480 	if (strcmp(node_name, vsw_propname) != 0) {
3045*19b65a69Ssb155480 		DERR(vswp, "%s: Invalid node name: %s\n",
3046*19b65a69Ssb155480 		    __func__, node_name);
3047*19b65a69Ssb155480 		return (MDEG_FAILURE);
304834683adeSsg70180 	}
304934683adeSsg70180 
305034683adeSsg70180 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
3051*19b65a69Ssb155480 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
3052*19b65a69Ssb155480 		    __func__);
3053*19b65a69Ssb155480 		return (MDEG_FAILURE);
305434683adeSsg70180 	}
305534683adeSsg70180 
3056*19b65a69Ssb155480 	/* is this the right instance of vsw? */
3057*19b65a69Ssb155480 	if (inst != vswp->regprop) {
3058*19b65a69Ssb155480 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
3059*19b65a69Ssb155480 		    __func__, inst);
3060*19b65a69Ssb155480 		return (MDEG_FAILURE);
3061*19b65a69Ssb155480 	}
306234683adeSsg70180 
306334683adeSsg70180 	vsw_update_md_prop(vswp, mdp, node);
306434683adeSsg70180 
306534683adeSsg70180 	return (MDEG_SUCCESS);
306634683adeSsg70180 }
306734683adeSsg70180 
306834683adeSsg70180 /*
306934683adeSsg70180  * Mdeg callback invoked for changes to the vsw-port nodes
307034683adeSsg70180  * under the vsw node.
307134683adeSsg70180  */
307234683adeSsg70180 static int
307334683adeSsg70180 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
307434683adeSsg70180 {
307534683adeSsg70180 	vsw_t		*vswp;
307634683adeSsg70180 	int		idx;
307734683adeSsg70180 	md_t		*mdp;
307834683adeSsg70180 	mde_cookie_t	node;
307934683adeSsg70180 	uint64_t	inst;
308034683adeSsg70180 
308134683adeSsg70180 	if ((resp == NULL) || (cb_argp == NULL))
308234683adeSsg70180 		return (MDEG_FAILURE);
308334683adeSsg70180 
308434683adeSsg70180 	vswp = (vsw_t *)cb_argp;
308534683adeSsg70180 
308634683adeSsg70180 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
308734683adeSsg70180 	    " : prev matched %d", __func__, resp->added.nelem,
308834683adeSsg70180 	    resp->removed.nelem, resp->match_curr.nelem,
30891ae08745Sheppo 	    resp->match_prev.nelem);
30901ae08745Sheppo 
30911ae08745Sheppo 	/* process added ports */
30921ae08745Sheppo 	for (idx = 0; idx < resp->added.nelem; idx++) {
30931ae08745Sheppo 		mdp = resp->added.mdp;
30941ae08745Sheppo 		node = resp->added.mdep[idx];
30951ae08745Sheppo 
30961ae08745Sheppo 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
30971ae08745Sheppo 
30981ae08745Sheppo 		if (vsw_port_add(vswp, mdp, &node) != 0) {
309934683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
310034683adeSsg70180 			    "(0x%lx)", vswp->instance, node);
31011ae08745Sheppo 		}
31021ae08745Sheppo 	}
31031ae08745Sheppo 
31041ae08745Sheppo 	/* process removed ports */
31051ae08745Sheppo 	for (idx = 0; idx < resp->removed.nelem; idx++) {
31061ae08745Sheppo 		mdp = resp->removed.mdp;
31071ae08745Sheppo 		node = resp->removed.mdep[idx];
31081ae08745Sheppo 
31091ae08745Sheppo 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
311034683adeSsg70180 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
31111ae08745Sheppo 			    __func__, id_propname, idx);
31121ae08745Sheppo 			continue;
31131ae08745Sheppo 		}
31141ae08745Sheppo 
31151ae08745Sheppo 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
31161ae08745Sheppo 
31171ae08745Sheppo 		if (vsw_port_detach(vswp, inst) != 0) {
311834683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
311934683adeSsg70180 			    vswp->instance, inst);
31201ae08745Sheppo 		}
31211ae08745Sheppo 	}
31221ae08745Sheppo 
31231ae08745Sheppo 	/*
31241ae08745Sheppo 	 * Currently no support for updating already active ports.
31251ae08745Sheppo 	 * So, ignore the match_curr and match_priv arrays for now.
31261ae08745Sheppo 	 */
31271ae08745Sheppo 
31281ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
31291ae08745Sheppo 
31301ae08745Sheppo 	return (MDEG_SUCCESS);
31311ae08745Sheppo }
31321ae08745Sheppo 
31331ae08745Sheppo /*
3134*19b65a69Ssb155480  * Scan the machine description for this instance of vsw
3135*19b65a69Ssb155480  * and read its properties. Called only from vsw_attach().
3136*19b65a69Ssb155480  * Returns: 0 on success, 1 on failure.
3137*19b65a69Ssb155480  */
3138*19b65a69Ssb155480 static int
3139*19b65a69Ssb155480 vsw_read_mdprops(vsw_t *vswp)
3140*19b65a69Ssb155480 {
3141*19b65a69Ssb155480 	md_t		*mdp = NULL;
3142*19b65a69Ssb155480 	mde_cookie_t	rootnode;
3143*19b65a69Ssb155480 	mde_cookie_t	*listp = NULL;
3144*19b65a69Ssb155480 	uint64_t	inst;
3145*19b65a69Ssb155480 	uint64_t	cfgh;
3146*19b65a69Ssb155480 	char		*name;
3147*19b65a69Ssb155480 	int		rv = 1;
3148*19b65a69Ssb155480 	int		num_nodes = 0;
3149*19b65a69Ssb155480 	int		num_devs = 0;
3150*19b65a69Ssb155480 	int		listsz = 0;
3151*19b65a69Ssb155480 	int		i;
3152*19b65a69Ssb155480 
3153*19b65a69Ssb155480 	/*
3154*19b65a69Ssb155480 	 * In each 'virtual-device' node in the MD there is a
3155*19b65a69Ssb155480 	 * 'cfg-handle' property which is the MD's concept of
3156*19b65a69Ssb155480 	 * an instance number (this may be completely different from
3157*19b65a69Ssb155480 	 * the device drivers instance #). OBP reads that value and
3158*19b65a69Ssb155480 	 * stores it in the 'reg' property of the appropriate node in
3159*19b65a69Ssb155480 	 * the device tree. We first read this reg property and use this
3160*19b65a69Ssb155480 	 * to compare against the 'cfg-handle' property of vsw nodes
3161*19b65a69Ssb155480 	 * in MD to get to this specific vsw instance and then read
3162*19b65a69Ssb155480 	 * other properties that we are interested in.
3163*19b65a69Ssb155480 	 * We also cache the value of 'reg' property and use it later
3164*19b65a69Ssb155480 	 * to register callbacks with mdeg (see vsw_mdeg_register())
3165*19b65a69Ssb155480 	 */
3166*19b65a69Ssb155480 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
3167*19b65a69Ssb155480 	    DDI_PROP_DONTPASS, reg_propname, -1);
3168*19b65a69Ssb155480 	if (inst == -1) {
3169*19b65a69Ssb155480 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
3170*19b65a69Ssb155480 		    "OBP device tree", vswp->instance, reg_propname);
3171*19b65a69Ssb155480 		return (rv);
3172*19b65a69Ssb155480 	}
3173*19b65a69Ssb155480 
3174*19b65a69Ssb155480 	vswp->regprop = inst;
3175*19b65a69Ssb155480 
3176*19b65a69Ssb155480 	if ((mdp = md_get_handle()) == NULL) {
3177*19b65a69Ssb155480 		DWARN(vswp, "%s: cannot init MD\n", __func__);
3178*19b65a69Ssb155480 		return (rv);
3179*19b65a69Ssb155480 	}
3180*19b65a69Ssb155480 
3181*19b65a69Ssb155480 	num_nodes = md_node_count(mdp);
3182*19b65a69Ssb155480 	ASSERT(num_nodes > 0);
3183*19b65a69Ssb155480 
3184*19b65a69Ssb155480 	listsz = num_nodes * sizeof (mde_cookie_t);
3185*19b65a69Ssb155480 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
3186*19b65a69Ssb155480 
3187*19b65a69Ssb155480 	rootnode = md_root_node(mdp);
3188*19b65a69Ssb155480 
3189*19b65a69Ssb155480 	/* search for all "virtual_device" nodes */
3190*19b65a69Ssb155480 	num_devs = md_scan_dag(mdp, rootnode,
3191*19b65a69Ssb155480 	    md_find_name(mdp, vdev_propname),
3192*19b65a69Ssb155480 	    md_find_name(mdp, "fwd"), listp);
3193*19b65a69Ssb155480 	if (num_devs <= 0) {
3194*19b65a69Ssb155480 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
3195*19b65a69Ssb155480 		goto vsw_readmd_exit;
3196*19b65a69Ssb155480 	}
3197*19b65a69Ssb155480 
3198*19b65a69Ssb155480 	/*
3199*19b65a69Ssb155480 	 * Now loop through the list of virtual-devices looking for
3200*19b65a69Ssb155480 	 * devices with name "virtual-network-switch" and for each
3201*19b65a69Ssb155480 	 * such device compare its instance with what we have from
3202*19b65a69Ssb155480 	 * the 'reg' property to find the right node in MD and then
3203*19b65a69Ssb155480 	 * read all its properties.
3204*19b65a69Ssb155480 	 */
3205*19b65a69Ssb155480 	for (i = 0; i < num_devs; i++) {
3206*19b65a69Ssb155480 
3207*19b65a69Ssb155480 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
3208*19b65a69Ssb155480 			DWARN(vswp, "%s: name property not found\n",
3209*19b65a69Ssb155480 			    __func__);
3210*19b65a69Ssb155480 			goto vsw_readmd_exit;
3211*19b65a69Ssb155480 		}
3212*19b65a69Ssb155480 
3213*19b65a69Ssb155480 		/* is this a virtual-network-switch? */
3214*19b65a69Ssb155480 		if (strcmp(name, vsw_propname) != 0)
3215*19b65a69Ssb155480 			continue;
3216*19b65a69Ssb155480 
3217*19b65a69Ssb155480 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
3218*19b65a69Ssb155480 			DWARN(vswp, "%s: cfg-handle property not found\n",
3219*19b65a69Ssb155480 			    __func__);
3220*19b65a69Ssb155480 			goto vsw_readmd_exit;
3221*19b65a69Ssb155480 		}
3222*19b65a69Ssb155480 
3223*19b65a69Ssb155480 		/* is this the required instance of vsw? */
3224*19b65a69Ssb155480 		if (inst != cfgh)
3225*19b65a69Ssb155480 			continue;
3226*19b65a69Ssb155480 
3227*19b65a69Ssb155480 		/* now read all properties of this vsw instance */
3228*19b65a69Ssb155480 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
3229*19b65a69Ssb155480 		break;
3230*19b65a69Ssb155480 	}
3231*19b65a69Ssb155480 
3232*19b65a69Ssb155480 vsw_readmd_exit:
3233*19b65a69Ssb155480 
3234*19b65a69Ssb155480 	kmem_free(listp, listsz);
3235*19b65a69Ssb155480 	(void) md_fini_handle(mdp);
3236*19b65a69Ssb155480 	return (rv);
3237*19b65a69Ssb155480 }
3238*19b65a69Ssb155480 
3239*19b65a69Ssb155480 /*
324034683adeSsg70180  * Read the initial start-of-day values from the specified MD node.
324134683adeSsg70180  */
3242*19b65a69Ssb155480 static int
324334683adeSsg70180 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
324434683adeSsg70180 {
324534683adeSsg70180 	int		i;
324634683adeSsg70180 	uint64_t 	macaddr = 0;
324734683adeSsg70180 
324834683adeSsg70180 	D1(vswp, "%s: enter", __func__);
324934683adeSsg70180 
3250*19b65a69Ssb155480 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
3251*19b65a69Ssb155480 		return (1);
325234683adeSsg70180 	}
325334683adeSsg70180 
325434683adeSsg70180 	/* mac address for vswitch device itself */
325534683adeSsg70180 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
325634683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
325734683adeSsg70180 		    vswp->instance);
3258*19b65a69Ssb155480 		return (1);
3259*19b65a69Ssb155480 	}
326034683adeSsg70180 
3261*19b65a69Ssb155480 	vsw_save_lmacaddr(vswp, macaddr);
326234683adeSsg70180 
3263205eeb1aSlm66018 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
326434683adeSsg70180 		cmn_err(CE_WARN, "vsw%d: Unable to read %s property from "
326534683adeSsg70180 		    "MD, defaulting to programmed mode", vswp->instance,
326634683adeSsg70180 		    smode_propname);
326734683adeSsg70180 
326834683adeSsg70180 		for (i = 0; i < NUM_SMODES; i++)
326934683adeSsg70180 			vswp->smode[i] = VSW_LAYER2;
327034683adeSsg70180 
327134683adeSsg70180 		vswp->smode_num = NUM_SMODES;
327234683adeSsg70180 	} else {
327334683adeSsg70180 		ASSERT(vswp->smode_num != 0);
327434683adeSsg70180 	}
327534683adeSsg70180 
327634683adeSsg70180 	D1(vswp, "%s: exit", __func__);
3277*19b65a69Ssb155480 	return (0);
327834683adeSsg70180 }
327934683adeSsg70180 
328034683adeSsg70180 /*
328134683adeSsg70180  * Check to see if the relevant properties in the specified node have
328234683adeSsg70180  * changed, and if so take the appropriate action.
328334683adeSsg70180  *
328434683adeSsg70180  * If any of the properties are missing or invalid we don't take
328534683adeSsg70180  * any action, as this function should only be invoked when modifications
328634683adeSsg70180  * have been made to what we assume is a working configuration, which
328734683adeSsg70180  * we leave active.
328834683adeSsg70180  *
328934683adeSsg70180  * Note it is legal for this routine to be invoked even if none of the
329034683adeSsg70180  * properties in the port node within the MD have actually changed.
329134683adeSsg70180  */
329234683adeSsg70180 static void
329334683adeSsg70180 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
329434683adeSsg70180 {
329534683adeSsg70180 	char		physname[LIFNAMSIZ];
329634683adeSsg70180 	char		drv[LIFNAMSIZ];
329734683adeSsg70180 	uint_t		ddi_instance;
329834683adeSsg70180 	uint8_t		new_smode[NUM_SMODES];
329934683adeSsg70180 	int		i, smode_num = 0;
330034683adeSsg70180 	uint64_t 	macaddr = 0;
330134683adeSsg70180 	enum		{MD_init = 0x1,
330234683adeSsg70180 				MD_physname = 0x2,
330334683adeSsg70180 				MD_macaddr = 0x4,
330434683adeSsg70180 				MD_smode = 0x8} updated;
3305*19b65a69Ssb155480 	int		rv;
330634683adeSsg70180 
330734683adeSsg70180 	updated = MD_init;
330834683adeSsg70180 
330934683adeSsg70180 	D1(vswp, "%s: enter", __func__);
331034683adeSsg70180 
331134683adeSsg70180 	/*
331234683adeSsg70180 	 * Check if name of physical device in MD has changed.
331334683adeSsg70180 	 */
331434683adeSsg70180 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
331534683adeSsg70180 		/*
331634683adeSsg70180 		 * Do basic sanity check on new device name/instance,
331734683adeSsg70180 		 * if its non NULL. It is valid for the device name to
331834683adeSsg70180 		 * have changed from a non NULL to a NULL value, i.e.
331934683adeSsg70180 		 * the vsw is being changed to 'routed' mode.
332034683adeSsg70180 		 */
332134683adeSsg70180 		if ((strlen(physname) != 0) &&
3322*19b65a69Ssb155480 		    (ddi_parse(physname, drv,
3323*19b65a69Ssb155480 		    &ddi_instance) != DDI_SUCCESS)) {
332434683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: new device name %s is not"
332534683adeSsg70180 			    " a valid device name/instance",
332634683adeSsg70180 			    vswp->instance, physname);
332734683adeSsg70180 			goto fail_reconf;
332834683adeSsg70180 		}
332934683adeSsg70180 
333034683adeSsg70180 		if (strcmp(physname, vswp->physname)) {
333134683adeSsg70180 			D2(vswp, "%s: device name changed from %s to %s",
333234683adeSsg70180 			    __func__, vswp->physname, physname);
333334683adeSsg70180 
333434683adeSsg70180 			updated |= MD_physname;
333534683adeSsg70180 		} else {
333634683adeSsg70180 			D2(vswp, "%s: device name unchanged at %s",
333734683adeSsg70180 			    __func__, vswp->physname);
333834683adeSsg70180 		}
333934683adeSsg70180 	} else {
334034683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
334134683adeSsg70180 		    "device from updated MD.", vswp->instance);
334234683adeSsg70180 		goto fail_reconf;
334334683adeSsg70180 	}
334434683adeSsg70180 
334534683adeSsg70180 	/*
334634683adeSsg70180 	 * Check if MAC address has changed.
334734683adeSsg70180 	 */
334834683adeSsg70180 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
334934683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
335034683adeSsg70180 		    vswp->instance);
335134683adeSsg70180 		goto fail_reconf;
335234683adeSsg70180 	} else {
3353*19b65a69Ssb155480 		uint64_t maddr = macaddr;
335434683adeSsg70180 		READ_ENTER(&vswp->if_lockrw);
335534683adeSsg70180 		for (i = ETHERADDRL - 1; i >= 0; i--) {
3356*19b65a69Ssb155480 			if (vswp->if_addr.ether_addr_octet[i]
3357*19b65a69Ssb155480 			    != (macaddr & 0xFF)) {
335834683adeSsg70180 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
335934683adeSsg70180 				    __func__, i,
336034683adeSsg70180 				    vswp->if_addr.ether_addr_octet[i],
336134683adeSsg70180 				    (macaddr & 0xFF));
336234683adeSsg70180 				updated |= MD_macaddr;
3363*19b65a69Ssb155480 				macaddr = maddr;
336434683adeSsg70180 				break;
336534683adeSsg70180 			}
336634683adeSsg70180 			macaddr >>= 8;
336734683adeSsg70180 		}
336834683adeSsg70180 		RW_EXIT(&vswp->if_lockrw);
3369*19b65a69Ssb155480 		if (updated & MD_macaddr) {
3370*19b65a69Ssb155480 			vsw_save_lmacaddr(vswp, macaddr);
3371*19b65a69Ssb155480 		}
337234683adeSsg70180 	}
337334683adeSsg70180 
337434683adeSsg70180 	/*
337534683adeSsg70180 	 * Check if switching modes have changed.
337634683adeSsg70180 	 */
3377*19b65a69Ssb155480 	if (vsw_get_md_smodes(vswp, mdp, node,
3378*19b65a69Ssb155480 	    new_smode, &smode_num)) {
337934683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
338034683adeSsg70180 		    vswp->instance, smode_propname);
338134683adeSsg70180 		goto fail_reconf;
338234683adeSsg70180 	} else {
338334683adeSsg70180 		ASSERT(smode_num != 0);
338434683adeSsg70180 		if (smode_num != vswp->smode_num) {
338534683adeSsg70180 			D2(vswp, "%s: number of modes changed from %d to %d",
338634683adeSsg70180 			    __func__, vswp->smode_num, smode_num);
338734683adeSsg70180 		}
338834683adeSsg70180 
338934683adeSsg70180 		for (i = 0; i < smode_num; i++) {
339034683adeSsg70180 			if (new_smode[i] != vswp->smode[i]) {
339134683adeSsg70180 				D2(vswp, "%s: mode changed from %d to %d",
339234683adeSsg70180 				    __func__, vswp->smode[i], new_smode[i]);
339334683adeSsg70180 				updated |= MD_smode;
339434683adeSsg70180 				break;
339534683adeSsg70180 			}
339634683adeSsg70180 		}
339734683adeSsg70180 	}
339834683adeSsg70180 
339934683adeSsg70180 	/*
340034683adeSsg70180 	 * Now make any changes which are needed...
340134683adeSsg70180 	 */
340234683adeSsg70180 
340334683adeSsg70180 	if (updated & (MD_physname | MD_smode)) {
340434683adeSsg70180 
340534683adeSsg70180 		/*
3406*19b65a69Ssb155480 		 * Stop any pending timeout to setup switching mode.
340734683adeSsg70180 		 */
3408*19b65a69Ssb155480 		vsw_stop_switching_timeout(vswp);
3409*19b65a69Ssb155480 
3410*19b65a69Ssb155480 		/*
3411*19b65a69Ssb155480 		 * Remove unicst, mcst addrs of vsw interface
3412*19b65a69Ssb155480 		 * and ports from the physdev.
3413*19b65a69Ssb155480 		 */
3414*19b65a69Ssb155480 		vsw_unset_addrs(vswp);
3415*19b65a69Ssb155480 
3416*19b65a69Ssb155480 		/*
3417*19b65a69Ssb155480 		 * Stop, detach and close the old device..
3418*19b65a69Ssb155480 		 */
3419*19b65a69Ssb155480 		mutex_enter(&vswp->mac_lock);
3420*19b65a69Ssb155480 
342134683adeSsg70180 		vsw_mac_detach(vswp);
3422*19b65a69Ssb155480 		vsw_mac_close(vswp);
3423*19b65a69Ssb155480 
3424*19b65a69Ssb155480 		mutex_exit(&vswp->mac_lock);
342534683adeSsg70180 
342634683adeSsg70180 		/*
342734683adeSsg70180 		 * Update phys name.
342834683adeSsg70180 		 */
342934683adeSsg70180 		if (updated & MD_physname) {
343034683adeSsg70180 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
343134683adeSsg70180 			    vswp->instance, vswp->physname, physname);
343234683adeSsg70180 			(void) strncpy(vswp->physname,
343334683adeSsg70180 			    physname, strlen(physname) + 1);
343434683adeSsg70180 		}
343534683adeSsg70180 
343634683adeSsg70180 		/*
343734683adeSsg70180 		 * Update array with the new switch mode values.
343834683adeSsg70180 		 */
343934683adeSsg70180 		if (updated & MD_smode) {
344034683adeSsg70180 			for (i = 0; i < smode_num; i++)
344134683adeSsg70180 				vswp->smode[i] = new_smode[i];
344234683adeSsg70180 
344334683adeSsg70180 			vswp->smode_num = smode_num;
344434683adeSsg70180 			vswp->smode_idx = 0;
344534683adeSsg70180 		}
344634683adeSsg70180 
344734683adeSsg70180 		/*
344834683adeSsg70180 		 * ..and attach, start the new device.
344934683adeSsg70180 		 */
3450*19b65a69Ssb155480 		rv = vsw_setup_switching(vswp);
3451*19b65a69Ssb155480 		if (rv == EAGAIN) {
3452*19b65a69Ssb155480 			/*
3453*19b65a69Ssb155480 			 * Unable to setup switching mode.
3454*19b65a69Ssb155480 			 * As the error is EAGAIN, schedule a timeout to retry
3455*19b65a69Ssb155480 			 * and return. Programming addresses of ports and
3456*19b65a69Ssb155480 			 * vsw interface will be done when the timeout handler
3457*19b65a69Ssb155480 			 * completes successfully.
3458*19b65a69Ssb155480 			 */
3459*19b65a69Ssb155480 			mutex_enter(&vswp->swtmout_lock);
3460*19b65a69Ssb155480 
3461*19b65a69Ssb155480 			vswp->swtmout_enabled = B_TRUE;
3462*19b65a69Ssb155480 			vswp->swtmout_id =
3463*19b65a69Ssb155480 			    timeout(vsw_setup_switching_timeout, vswp,
3464*19b65a69Ssb155480 			    (vsw_setup_switching_delay *
3465*19b65a69Ssb155480 			    drv_usectohz(MICROSEC)));
3466*19b65a69Ssb155480 
3467*19b65a69Ssb155480 			mutex_exit(&vswp->swtmout_lock);
3468*19b65a69Ssb155480 
3469*19b65a69Ssb155480 			return;
3470*19b65a69Ssb155480 
3471*19b65a69Ssb155480 		} else if (rv) {
347234683adeSsg70180 			goto fail_update;
3473*19b65a69Ssb155480 		}
347434683adeSsg70180 
347534683adeSsg70180 		/*
3476*19b65a69Ssb155480 		 * program unicst, mcst addrs of vsw interface
3477*19b65a69Ssb155480 		 * and ports in the physdev.
347834683adeSsg70180 		 */
3479*19b65a69Ssb155480 		vsw_set_addrs(vswp);
348034683adeSsg70180 
3481*19b65a69Ssb155480 	} else if (updated & MD_macaddr) {
3482*19b65a69Ssb155480 		/*
3483*19b65a69Ssb155480 		 * We enter here if only MD_macaddr is exclusively updated.
3484*19b65a69Ssb155480 		 * If MD_physname and/or MD_smode are also updated, then
3485*19b65a69Ssb155480 		 * as part of that, we would have implicitly processed
3486*19b65a69Ssb155480 		 * MD_macaddr update (above).
3487*19b65a69Ssb155480 		 */
348834683adeSsg70180 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
348934683adeSsg70180 		    vswp->instance, macaddr);
349034683adeSsg70180 
3491*19b65a69Ssb155480 		READ_ENTER(&vswp->if_lockrw);
3492*19b65a69Ssb155480 		if (vswp->if_state & VSW_IF_UP) {
349334683adeSsg70180 
34945f94e909Ssg70180 			mutex_enter(&vswp->hw_lock);
3495*19b65a69Ssb155480 			/*
3496*19b65a69Ssb155480 			 * Remove old mac address of vsw interface
3497*19b65a69Ssb155480 			 * from the physdev
3498*19b65a69Ssb155480 			 */
34995f94e909Ssg70180 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
3500*19b65a69Ssb155480 			/*
3501*19b65a69Ssb155480 			 * Program new mac address of vsw interface
3502*19b65a69Ssb155480 			 * in the physdev
3503*19b65a69Ssb155480 			 */
3504*19b65a69Ssb155480 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
35055f94e909Ssg70180 			mutex_exit(&vswp->hw_lock);
3506*19b65a69Ssb155480 			if (rv != 0) {
3507*19b65a69Ssb155480 				cmn_err(CE_NOTE,
3508*19b65a69Ssb155480 				    "!vsw%d: failed to program interface "
3509*19b65a69Ssb155480 				    "unicast address\n", vswp->instance);
3510*19b65a69Ssb155480 			}
35115f94e909Ssg70180 			/*
351234683adeSsg70180 			 * Notify the MAC layer of the changed address.
351334683adeSsg70180 			 */
3514*19b65a69Ssb155480 			mac_unicst_update(vswp->if_mh,
3515*19b65a69Ssb155480 			    (uint8_t *)&vswp->if_addr);
3516*19b65a69Ssb155480 
3517*19b65a69Ssb155480 		}
3518*19b65a69Ssb155480 		RW_EXIT(&vswp->if_lockrw);
3519*19b65a69Ssb155480 
352034683adeSsg70180 	}
352134683adeSsg70180 
352234683adeSsg70180 	return;
352334683adeSsg70180 
352434683adeSsg70180 fail_reconf:
352534683adeSsg70180 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
352634683adeSsg70180 	return;
352734683adeSsg70180 
352834683adeSsg70180 fail_update:
352934683adeSsg70180 	cmn_err(CE_WARN, "!vsw%d: update of configuration failed",
353034683adeSsg70180 	    vswp->instance);
353134683adeSsg70180 }
353234683adeSsg70180 
353334683adeSsg70180 /*
35341ae08745Sheppo  * Add a new port to the system.
35351ae08745Sheppo  *
35361ae08745Sheppo  * Returns 0 on success, 1 on failure.
35371ae08745Sheppo  */
35381ae08745Sheppo int
35391ae08745Sheppo vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
35401ae08745Sheppo {
35411ae08745Sheppo 	uint64_t		ldc_id;
35421ae08745Sheppo 	uint8_t			*addrp;
35431ae08745Sheppo 	int			i, addrsz;
35441ae08745Sheppo 	int			num_nodes = 0, nchan = 0;
35451ae08745Sheppo 	int			listsz = 0;
35461ae08745Sheppo 	mde_cookie_t		*listp = NULL;
35471ae08745Sheppo 	struct ether_addr	ea;
35481ae08745Sheppo 	uint64_t		macaddr;
35491ae08745Sheppo 	uint64_t		inst = 0;
35501ae08745Sheppo 	vsw_port_t		*port;
35511ae08745Sheppo 
35521ae08745Sheppo 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
35531ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found", __func__,
35541ae08745Sheppo 		    id_propname);
35551ae08745Sheppo 		return (1);
35561ae08745Sheppo 	}
35571ae08745Sheppo 
35581ae08745Sheppo 	/*
35591ae08745Sheppo 	 * Find the channel endpoint node(s) (which should be under this
35601ae08745Sheppo 	 * port node) which contain the channel id(s).
35611ae08745Sheppo 	 */
35621ae08745Sheppo 	if ((num_nodes = md_node_count(mdp)) <= 0) {
35631ae08745Sheppo 		DERR(vswp, "%s: invalid number of nodes found (%d)",
35641ae08745Sheppo 		    __func__, num_nodes);
35651ae08745Sheppo 		return (1);
35661ae08745Sheppo 	}
35671ae08745Sheppo 
356834683adeSsg70180 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
356934683adeSsg70180 
35701ae08745Sheppo 	/* allocate enough space for node list */
35711ae08745Sheppo 	listsz = num_nodes * sizeof (mde_cookie_t);
35721ae08745Sheppo 	listp = kmem_zalloc(listsz, KM_SLEEP);
35731ae08745Sheppo 
3574205eeb1aSlm66018 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
35751ae08745Sheppo 	    md_find_name(mdp, "fwd"), listp);
35761ae08745Sheppo 
35771ae08745Sheppo 	if (nchan <= 0) {
35781ae08745Sheppo 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
35791ae08745Sheppo 		kmem_free(listp, listsz);
35801ae08745Sheppo 		return (1);
35811ae08745Sheppo 	}
35821ae08745Sheppo 
35831ae08745Sheppo 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
35841ae08745Sheppo 
35851ae08745Sheppo 	/* use property from first node found */
35861ae08745Sheppo 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
35871ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
35881ae08745Sheppo 		    id_propname);
35891ae08745Sheppo 		kmem_free(listp, listsz);
35901ae08745Sheppo 		return (1);
35911ae08745Sheppo 	}
35921ae08745Sheppo 
35931ae08745Sheppo 	/* don't need list any more */
35941ae08745Sheppo 	kmem_free(listp, listsz);
35951ae08745Sheppo 
35961ae08745Sheppo 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
35971ae08745Sheppo 
35981ae08745Sheppo 	/* read mac-address property */
35991ae08745Sheppo 	if (md_get_prop_data(mdp, *node, remaddr_propname,
36001ae08745Sheppo 	    &addrp, &addrsz)) {
36011ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found",
36021ae08745Sheppo 		    __func__, remaddr_propname);
36031ae08745Sheppo 		return (1);
36041ae08745Sheppo 	}
36051ae08745Sheppo 
36061ae08745Sheppo 	if (addrsz < ETHERADDRL) {
36071ae08745Sheppo 		DWARN(vswp, "%s: invalid address size", __func__);
36081ae08745Sheppo 		return (1);
36091ae08745Sheppo 	}
36101ae08745Sheppo 
36111ae08745Sheppo 	macaddr = *((uint64_t *)addrp);
36121ae08745Sheppo 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
36131ae08745Sheppo 
36141ae08745Sheppo 	for (i = ETHERADDRL - 1; i >= 0; i--) {
36151ae08745Sheppo 		ea.ether_addr_octet[i] = macaddr & 0xFF;
36161ae08745Sheppo 		macaddr >>= 8;
36171ae08745Sheppo 	}
36181ae08745Sheppo 
36191ae08745Sheppo 	if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) {
36201ae08745Sheppo 		DERR(vswp, "%s: failed to attach port", __func__);
36211ae08745Sheppo 		return (1);
36221ae08745Sheppo 	}
36231ae08745Sheppo 
36241ae08745Sheppo 	port = vsw_lookup_port(vswp, (int)inst);
36251ae08745Sheppo 
36261ae08745Sheppo 	/* just successfuly created the port, so it should exist */
36271ae08745Sheppo 	ASSERT(port != NULL);
36281ae08745Sheppo 
36291ae08745Sheppo 	return (0);
36301ae08745Sheppo }
36311ae08745Sheppo 
36321ae08745Sheppo /*
36331ae08745Sheppo  * Attach the specified port.
36341ae08745Sheppo  *
36351ae08745Sheppo  * Returns 0 on success, 1 on failure.
36361ae08745Sheppo  */
36371ae08745Sheppo static int
36381ae08745Sheppo vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids,
36391ae08745Sheppo struct ether_addr *macaddr)
36401ae08745Sheppo {
36411ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
36421ae08745Sheppo 	vsw_port_t		*port, **prev_port;
36431ae08745Sheppo 	int			i;
36441ae08745Sheppo 
36451ae08745Sheppo 	D1(vswp, "%s: enter : port %d", __func__, p_instance);
36461ae08745Sheppo 
36471ae08745Sheppo 	/* port already exists? */
36481ae08745Sheppo 	READ_ENTER(&plist->lockrw);
36491ae08745Sheppo 	for (port = plist->head; port != NULL; port = port->p_next) {
36501ae08745Sheppo 		if (port->p_instance == p_instance) {
36511ae08745Sheppo 			DWARN(vswp, "%s: port instance %d already attached",
36521ae08745Sheppo 			    __func__, p_instance);
36531ae08745Sheppo 			RW_EXIT(&plist->lockrw);
36541ae08745Sheppo 			return (1);
36551ae08745Sheppo 		}
36561ae08745Sheppo 	}
36571ae08745Sheppo 	RW_EXIT(&plist->lockrw);
36581ae08745Sheppo 
36591ae08745Sheppo 	port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
36601ae08745Sheppo 	port->p_vswp = vswp;
36611ae08745Sheppo 	port->p_instance = p_instance;
36621ae08745Sheppo 	port->p_ldclist.num_ldcs = 0;
36631ae08745Sheppo 	port->p_ldclist.head = NULL;
3664e1ebb9ecSlm66018 	port->addr_set = VSW_ADDR_UNSET;
36651ae08745Sheppo 
36661ae08745Sheppo 	rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL);
36671ae08745Sheppo 
36681ae08745Sheppo 	mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL);
36691ae08745Sheppo 	mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL);
36701ae08745Sheppo 
36711ae08745Sheppo 	mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL);
36721ae08745Sheppo 	cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL);
36731ae08745Sheppo 
36741ae08745Sheppo 	mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL);
36751ae08745Sheppo 	cv_init(&port->state_cv, NULL, CV_DRIVER, NULL);
36761ae08745Sheppo 	port->state = VSW_PORT_INIT;
36771ae08745Sheppo 
36781ae08745Sheppo 	if (nids > VSW_PORT_MAX_LDCS) {
3679*19b65a69Ssb155480 		D2(vswp, "%s: using first of %d ldc ids",
3680*19b65a69Ssb155480 		    __func__, nids);
36811ae08745Sheppo 		nids = VSW_PORT_MAX_LDCS;
36821ae08745Sheppo 	}
36831ae08745Sheppo 
36841ae08745Sheppo 	D2(vswp, "%s: %d nids", __func__, nids);
36851ae08745Sheppo 	for (i = 0; i < nids; i++) {
36861ae08745Sheppo 		D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]);
36871ae08745Sheppo 		if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) {
36881ae08745Sheppo 			DERR(vswp, "%s: ldc_attach failed", __func__);
36891ae08745Sheppo 
36901ae08745Sheppo 			rw_destroy(&port->p_ldclist.lockrw);
36911ae08745Sheppo 
36921ae08745Sheppo 			cv_destroy(&port->ref_cv);
36931ae08745Sheppo 			mutex_destroy(&port->ref_lock);
36941ae08745Sheppo 
36951ae08745Sheppo 			cv_destroy(&port->state_cv);
36961ae08745Sheppo 			mutex_destroy(&port->state_lock);
36971ae08745Sheppo 
36981ae08745Sheppo 			mutex_destroy(&port->tx_lock);
36991ae08745Sheppo 			mutex_destroy(&port->mca_lock);
37001ae08745Sheppo 			kmem_free(port, sizeof (vsw_port_t));
37011ae08745Sheppo 			return (1);
37021ae08745Sheppo 		}
37031ae08745Sheppo 	}
37041ae08745Sheppo 
37051ae08745Sheppo 	ether_copy(macaddr, &port->p_macaddr);
37061ae08745Sheppo 
3707*19b65a69Ssb155480 	if (vswp->switching_setup_done == B_TRUE) {
3708*19b65a69Ssb155480 		/*
3709*19b65a69Ssb155480 		 * If the underlying physical device has been setup,
3710*19b65a69Ssb155480 		 * program the mac address of this port in it.
3711*19b65a69Ssb155480 		 * Otherwise, port macaddr will be set after the physical
3712*19b65a69Ssb155480 		 * device is successfully setup by the timeout handler.
3713*19b65a69Ssb155480 		 */
3714*19b65a69Ssb155480 		mutex_enter(&vswp->hw_lock);
3715*19b65a69Ssb155480 		(void) vsw_set_hw(vswp, port, VSW_VNETPORT);
3716*19b65a69Ssb155480 		mutex_exit(&vswp->hw_lock);
3717*19b65a69Ssb155480 	}
3718*19b65a69Ssb155480 
37191ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
37201ae08745Sheppo 
37211ae08745Sheppo 	/* create the fdb entry for this port/mac address */
37221ae08745Sheppo 	(void) vsw_add_fdb(vswp, port);
37231ae08745Sheppo 
37241ae08745Sheppo 	/* link it into the list of ports for this vsw instance */
37251ae08745Sheppo 	prev_port = (vsw_port_t **)(&plist->head);
37261ae08745Sheppo 	port->p_next = *prev_port;
37271ae08745Sheppo 	*prev_port = port;
37281ae08745Sheppo 	plist->num_ports++;
3729*19b65a69Ssb155480 
37301ae08745Sheppo 	RW_EXIT(&plist->lockrw);
37311ae08745Sheppo 
37321ae08745Sheppo 	/*
37331ae08745Sheppo 	 * Initialise the port and any ldc's under it.
37341ae08745Sheppo 	 */
37351ae08745Sheppo 	(void) vsw_init_ldcs(port);
37361ae08745Sheppo 
37371ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
37381ae08745Sheppo 	return (0);
37391ae08745Sheppo }
37401ae08745Sheppo 
37411ae08745Sheppo /*
37421ae08745Sheppo  * Detach the specified port.
37431ae08745Sheppo  *
37441ae08745Sheppo  * Returns 0 on success, 1 on failure.
37451ae08745Sheppo  */
37461ae08745Sheppo static int
37471ae08745Sheppo vsw_port_detach(vsw_t *vswp, int p_instance)
37481ae08745Sheppo {
37491ae08745Sheppo 	vsw_port_t	*port = NULL;
37501ae08745Sheppo 	vsw_port_list_t	*plist = &vswp->plist;
37511ae08745Sheppo 
37521ae08745Sheppo 	D1(vswp, "%s: enter: port id %d", __func__, p_instance);
37531ae08745Sheppo 
37541ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
37551ae08745Sheppo 
37561ae08745Sheppo 	if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) {
37571ae08745Sheppo 		RW_EXIT(&plist->lockrw);
37581ae08745Sheppo 		return (1);
37591ae08745Sheppo 	}
37601ae08745Sheppo 
37611ae08745Sheppo 	if (vsw_plist_del_node(vswp, port)) {
37621ae08745Sheppo 		RW_EXIT(&plist->lockrw);
37631ae08745Sheppo 		return (1);
37641ae08745Sheppo 	}
37651ae08745Sheppo 
37661ae08745Sheppo 	/* Remove the fdb entry for this port/mac address */
37671ae08745Sheppo 	(void) vsw_del_fdb(vswp, port);
37681ae08745Sheppo 
37691ae08745Sheppo 	/* Remove any multicast addresses.. */
37701ae08745Sheppo 	vsw_del_mcst_port(port);
37711ae08745Sheppo 
37721ae08745Sheppo 	/*
3773e1ebb9ecSlm66018 	 * No longer need to hold writer lock on port list now
3774e1ebb9ecSlm66018 	 * that we have unlinked the target port from the list.
37751ae08745Sheppo 	 */
37761ae08745Sheppo 	RW_EXIT(&plist->lockrw);
37771ae08745Sheppo 
37785f94e909Ssg70180 	/* Remove address if was programmed into HW. */
37795f94e909Ssg70180 	mutex_enter(&vswp->hw_lock);
3780*19b65a69Ssb155480 
3781*19b65a69Ssb155480 	/*
3782*19b65a69Ssb155480 	 * Port's address may not have been set in hardware. This could
3783*19b65a69Ssb155480 	 * happen if the underlying physical device is not yet available and
3784*19b65a69Ssb155480 	 * vsw_setup_switching_timeout() may be in progress.
3785*19b65a69Ssb155480 	 * We remove its addr from hardware only if it has been set before.
3786*19b65a69Ssb155480 	 */
3787*19b65a69Ssb155480 	if (port->addr_set != VSW_ADDR_UNSET)
37885f94e909Ssg70180 		(void) vsw_unset_hw(vswp, port, VSW_VNETPORT);
3789*19b65a69Ssb155480 
3790e1ebb9ecSlm66018 	if (vswp->recfg_reqd)
37915f94e909Ssg70180 		vsw_reconfig_hw(vswp);
3792*19b65a69Ssb155480 
37935f94e909Ssg70180 	mutex_exit(&vswp->hw_lock);
3794e1ebb9ecSlm66018 
37951ae08745Sheppo 	if (vsw_port_delete(port)) {
37961ae08745Sheppo 		return (1);
37971ae08745Sheppo 	}
37981ae08745Sheppo 
37991ae08745Sheppo 	D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance);
38001ae08745Sheppo 	return (0);
38011ae08745Sheppo }
38021ae08745Sheppo 
38031ae08745Sheppo /*
38041ae08745Sheppo  * Detach all active ports.
38051ae08745Sheppo  *
38061ae08745Sheppo  * Returns 0 on success, 1 on failure.
38071ae08745Sheppo  */
38081ae08745Sheppo static int
38091ae08745Sheppo vsw_detach_ports(vsw_t *vswp)
38101ae08745Sheppo {
38111ae08745Sheppo 	vsw_port_list_t 	*plist = &vswp->plist;
38121ae08745Sheppo 	vsw_port_t		*port = NULL;
38131ae08745Sheppo 
38141ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
38151ae08745Sheppo 
38161ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
38171ae08745Sheppo 
38181ae08745Sheppo 	while ((port = plist->head) != NULL) {
38191ae08745Sheppo 		if (vsw_plist_del_node(vswp, port)) {
38201ae08745Sheppo 			DERR(vswp, "%s: Error deleting port %d"
3821205eeb1aSlm66018 			    " from port list", __func__, port->p_instance);
38221ae08745Sheppo 			RW_EXIT(&plist->lockrw);
38231ae08745Sheppo 			return (1);
38241ae08745Sheppo 		}
38251ae08745Sheppo 
3826e1ebb9ecSlm66018 		/* Remove address if was programmed into HW. */
38275f94e909Ssg70180 		mutex_enter(&vswp->hw_lock);
38285f94e909Ssg70180 		(void) vsw_unset_hw(vswp, port, VSW_VNETPORT);
38295f94e909Ssg70180 		mutex_exit(&vswp->hw_lock);
3830e1ebb9ecSlm66018 
38311ae08745Sheppo 		/* Remove the fdb entry for this port/mac address */
38321ae08745Sheppo 		(void) vsw_del_fdb(vswp, port);
38331ae08745Sheppo 
38341ae08745Sheppo 		/* Remove any multicast addresses.. */
38351ae08745Sheppo 		vsw_del_mcst_port(port);
38361ae08745Sheppo 
38371ae08745Sheppo 		/*
38381ae08745Sheppo 		 * No longer need to hold the lock on the port list
38391ae08745Sheppo 		 * now that we have unlinked the target port from the
38401ae08745Sheppo 		 * list.
38411ae08745Sheppo 		 */
38421ae08745Sheppo 		RW_EXIT(&plist->lockrw);
38431ae08745Sheppo 		if (vsw_port_delete(port)) {
38441ae08745Sheppo 			DERR(vswp, "%s: Error deleting port %d",
38451ae08745Sheppo 			    __func__, port->p_instance);
38461ae08745Sheppo 			return (1);
38471ae08745Sheppo 		}
38481ae08745Sheppo 		WRITE_ENTER(&plist->lockrw);
38491ae08745Sheppo 	}
38501ae08745Sheppo 	RW_EXIT(&plist->lockrw);
38511ae08745Sheppo 
38521ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
38531ae08745Sheppo 
38541ae08745Sheppo 	return (0);
38551ae08745Sheppo }
38561ae08745Sheppo 
38571ae08745Sheppo /*
38581ae08745Sheppo  * Delete the specified port.
38591ae08745Sheppo  *
38601ae08745Sheppo  * Returns 0 on success, 1 on failure.
38611ae08745Sheppo  */
38621ae08745Sheppo static int
38631ae08745Sheppo vsw_port_delete(vsw_port_t *port)
38641ae08745Sheppo {
38651ae08745Sheppo 	vsw_ldc_list_t 		*ldcl;
38661ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
38671ae08745Sheppo 
38681ae08745Sheppo 	D1(vswp, "%s: enter : port id %d", __func__, port->p_instance);
38691ae08745Sheppo 
38701ae08745Sheppo 	(void) vsw_uninit_ldcs(port);
38711ae08745Sheppo 
38721ae08745Sheppo 	/*
38731ae08745Sheppo 	 * Wait for any pending ctrl msg tasks which reference this
38741ae08745Sheppo 	 * port to finish.
38751ae08745Sheppo 	 */
38761ae08745Sheppo 	if (vsw_drain_port_taskq(port))
38771ae08745Sheppo 		return (1);
38781ae08745Sheppo 
38791ae08745Sheppo 	/*
38801ae08745Sheppo 	 * Wait for port reference count to hit zero.
38811ae08745Sheppo 	 */
38821ae08745Sheppo 	mutex_enter(&port->ref_lock);
38831ae08745Sheppo 	while (port->ref_cnt != 0)
38841ae08745Sheppo 		cv_wait(&port->ref_cv, &port->ref_lock);
38851ae08745Sheppo 	mutex_exit(&port->ref_lock);
38861ae08745Sheppo 
38871ae08745Sheppo 	/*
38881ae08745Sheppo 	 * Wait for any active callbacks to finish
38891ae08745Sheppo 	 */
38901ae08745Sheppo 	if (vsw_drain_ldcs(port))
38911ae08745Sheppo 		return (1);
38921ae08745Sheppo 
38931ae08745Sheppo 	ldcl = &port->p_ldclist;
38941ae08745Sheppo 	WRITE_ENTER(&ldcl->lockrw);
38951ae08745Sheppo 	while (ldcl->num_ldcs > 0) {
3896205eeb1aSlm66018 		if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {
389734683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld",
389834683adeSsg70180 			    vswp->instance, ldcl->head->ldc_id);
38991ae08745Sheppo 			RW_EXIT(&ldcl->lockrw);
39001ae08745Sheppo 			return (1);
39011ae08745Sheppo 		}
39021ae08745Sheppo 	}
39031ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
39041ae08745Sheppo 
39051ae08745Sheppo 	rw_destroy(&port->p_ldclist.lockrw);
39061ae08745Sheppo 
39071ae08745Sheppo 	mutex_destroy(&port->mca_lock);
39081ae08745Sheppo 	mutex_destroy(&port->tx_lock);
39091ae08745Sheppo 	cv_destroy(&port->ref_cv);
39101ae08745Sheppo 	mutex_destroy(&port->ref_lock);
39111ae08745Sheppo 
39121ae08745Sheppo 	cv_destroy(&port->state_cv);
39131ae08745Sheppo 	mutex_destroy(&port->state_lock);
39141ae08745Sheppo 
39151ae08745Sheppo 	kmem_free(port, sizeof (vsw_port_t));
39161ae08745Sheppo 
39171ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
39181ae08745Sheppo 
39191ae08745Sheppo 	return (0);
39201ae08745Sheppo }
39211ae08745Sheppo 
39221ae08745Sheppo /*
39231ae08745Sheppo  * Attach a logical domain channel (ldc) under a specified port.
39241ae08745Sheppo  *
39251ae08745Sheppo  * Returns 0 on success, 1 on failure.
39261ae08745Sheppo  */
39271ae08745Sheppo static int
39281ae08745Sheppo vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id)
39291ae08745Sheppo {
39301ae08745Sheppo 	vsw_t 		*vswp = port->p_vswp;
39311ae08745Sheppo 	vsw_ldc_list_t *ldcl = &port->p_ldclist;
39321ae08745Sheppo 	vsw_ldc_t 	*ldcp = NULL;
39331ae08745Sheppo 	ldc_attr_t 	attr;
39341ae08745Sheppo 	ldc_status_t	istatus;
39351ae08745Sheppo 	int 		status = DDI_FAILURE;
3936d10e4ef2Snarayan 	int		rv;
39373af08d82Slm66018 	enum		{ PROG_init = 0x0, PROG_mblks = 0x1,
39383af08d82Slm66018 				PROG_callback = 0x2}
39393af08d82Slm66018 			progress;
39403af08d82Slm66018 
39413af08d82Slm66018 	progress = PROG_init;
39421ae08745Sheppo 
39431ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
39441ae08745Sheppo 
39451ae08745Sheppo 	ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP);
39461ae08745Sheppo 	if (ldcp == NULL) {
39471ae08745Sheppo 		DERR(vswp, "%s: kmem_zalloc failed", __func__);
39481ae08745Sheppo 		return (1);
39491ae08745Sheppo 	}
39501ae08745Sheppo 	ldcp->ldc_id = ldc_id;
39511ae08745Sheppo 
3952d10e4ef2Snarayan 	/* allocate pool of receive mblks */
3953d10e4ef2Snarayan 	rv = vio_create_mblks(vsw_num_mblks, vsw_mblk_size, &(ldcp->rxh));
3954d10e4ef2Snarayan 	if (rv) {
3955d10e4ef2Snarayan 		DWARN(vswp, "%s: unable to create free mblk pool for"
3956d10e4ef2Snarayan 		    " channel %ld (rv %d)", __func__, ldc_id, rv);
3957d10e4ef2Snarayan 		kmem_free(ldcp, sizeof (vsw_ldc_t));
3958d10e4ef2Snarayan 		return (1);
3959d10e4ef2Snarayan 	}
3960d10e4ef2Snarayan 
39613af08d82Slm66018 	progress |= PROG_mblks;
39623af08d82Slm66018 
39631ae08745Sheppo 	mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL);
39641ae08745Sheppo 	mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL);
39651ae08745Sheppo 	mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL);
39661ae08745Sheppo 	cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL);
3967445b4c2eSsb155480 	rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL);
3968445b4c2eSsb155480 	rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL);
39691ae08745Sheppo 
39701ae08745Sheppo 	/* required for handshake with peer */
39711ae08745Sheppo 	ldcp->local_session = (uint64_t)ddi_get_lbolt();
39721ae08745Sheppo 	ldcp->peer_session = 0;
39731ae08745Sheppo 	ldcp->session_status = 0;
39741ae08745Sheppo 
39751ae08745Sheppo 	mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL);
39761ae08745Sheppo 	ldcp->hss_id = 1;	/* Initial handshake session id */
39771ae08745Sheppo 
39781ae08745Sheppo 	/* only set for outbound lane, inbound set by peer */
3979d10e4ef2Snarayan 	mutex_init(&ldcp->lane_in.seq_lock, NULL, MUTEX_DRIVER, NULL);
3980d10e4ef2Snarayan 	mutex_init(&ldcp->lane_out.seq_lock, NULL, MUTEX_DRIVER, NULL);
39811ae08745Sheppo 	vsw_set_lane_attr(vswp, &ldcp->lane_out);
39821ae08745Sheppo 
39831ae08745Sheppo 	attr.devclass = LDC_DEV_NT_SVC;
39841ae08745Sheppo 	attr.instance = ddi_get_instance(vswp->dip);
39851ae08745Sheppo 	attr.mode = LDC_MODE_UNRELIABLE;
3986e1ebb9ecSlm66018 	attr.mtu = VSW_LDC_MTU;
39871ae08745Sheppo 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
39881ae08745Sheppo 	if (status != 0) {
39891ae08745Sheppo 		DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)",
39901ae08745Sheppo 		    __func__, ldc_id, status);
3991d10e4ef2Snarayan 		goto ldc_attach_fail;
39921ae08745Sheppo 	}
39931ae08745Sheppo 
39941ae08745Sheppo 	status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp);
39951ae08745Sheppo 	if (status != 0) {
39961ae08745Sheppo 		DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)",
39971ae08745Sheppo 		    __func__, ldc_id, status);
39981ae08745Sheppo 		(void) ldc_fini(ldcp->ldc_handle);
3999d10e4ef2Snarayan 		goto ldc_attach_fail;
40001ae08745Sheppo 	}
40011ae08745Sheppo 
40023af08d82Slm66018 	progress |= PROG_callback;
40033af08d82Slm66018 
40043af08d82Slm66018 	mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL);
40051ae08745Sheppo 
40061ae08745Sheppo 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
40071ae08745Sheppo 		DERR(vswp, "%s: ldc_status failed", __func__);
40083af08d82Slm66018 		mutex_destroy(&ldcp->status_lock);
40093af08d82Slm66018 		goto ldc_attach_fail;
40101ae08745Sheppo 	}
40111ae08745Sheppo 
40121ae08745Sheppo 	ldcp->ldc_status = istatus;
40131ae08745Sheppo 	ldcp->ldc_port = port;
40141ae08745Sheppo 	ldcp->ldc_vswp = vswp;
40151ae08745Sheppo 
40161ae08745Sheppo 	/* link it into the list of channels for this port */
40171ae08745Sheppo 	WRITE_ENTER(&ldcl->lockrw);
40181ae08745Sheppo 	ldcp->ldc_next = ldcl->head;
40191ae08745Sheppo 	ldcl->head = ldcp;
40201ae08745Sheppo 	ldcl->num_ldcs++;
40211ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
40221ae08745Sheppo 
40231ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
40241ae08745Sheppo 	return (0);
4025d10e4ef2Snarayan 
4026d10e4ef2Snarayan ldc_attach_fail:
4027d10e4ef2Snarayan 	mutex_destroy(&ldcp->ldc_txlock);
4028d10e4ef2Snarayan 	mutex_destroy(&ldcp->ldc_cblock);
4029d10e4ef2Snarayan 
4030d10e4ef2Snarayan 	cv_destroy(&ldcp->drain_cv);
4031d10e4ef2Snarayan 
4032445b4c2eSsb155480 	rw_destroy(&ldcp->lane_in.dlistrw);
4033445b4c2eSsb155480 	rw_destroy(&ldcp->lane_out.dlistrw);
4034445b4c2eSsb155480 
40353af08d82Slm66018 	if (progress & PROG_callback) {
40363af08d82Slm66018 		(void) ldc_unreg_callback(ldcp->ldc_handle);
40373af08d82Slm66018 	}
40383af08d82Slm66018 
40393af08d82Slm66018 	if ((progress & PROG_mblks) && (ldcp->rxh != NULL)) {
4040d10e4ef2Snarayan 		if (vio_destroy_mblks(ldcp->rxh) != 0) {
4041d10e4ef2Snarayan 			/*
4042d10e4ef2Snarayan 			 * Something odd has happened, as the destroy
4043d10e4ef2Snarayan 			 * will only fail if some mblks have been allocated
4044d10e4ef2Snarayan 			 * from the pool already (which shouldn't happen)
4045d10e4ef2Snarayan 			 * and have not been returned.
4046d10e4ef2Snarayan 			 *
4047d10e4ef2Snarayan 			 * Add the pool pointer to a list maintained in
4048d10e4ef2Snarayan 			 * the device instance. Another attempt will be made
4049d10e4ef2Snarayan 			 * to free the pool when the device itself detaches.
4050d10e4ef2Snarayan 			 */
405134683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Creation of ldc channel %ld "
405234683adeSsg70180 			    "failed and cannot destroy associated mblk "
405334683adeSsg70180 			    "pool", vswp->instance, ldc_id);
4054d10e4ef2Snarayan 			ldcp->rxh->nextp =  vswp->rxh;
4055d10e4ef2Snarayan 			vswp->rxh = ldcp->rxh;
4056d10e4ef2Snarayan 		}
4057d10e4ef2Snarayan 	}
4058d10e4ef2Snarayan 	mutex_destroy(&ldcp->drain_cv_lock);
4059d10e4ef2Snarayan 	mutex_destroy(&ldcp->hss_lock);
4060d10e4ef2Snarayan 
4061d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_in.seq_lock);
4062d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_out.seq_lock);
4063d10e4ef2Snarayan 	kmem_free(ldcp, sizeof (vsw_ldc_t));
4064d10e4ef2Snarayan 
4065d10e4ef2Snarayan 	return (1);
40661ae08745Sheppo }
40671ae08745Sheppo 
40681ae08745Sheppo /*
40691ae08745Sheppo  * Detach a logical domain channel (ldc) belonging to a
40701ae08745Sheppo  * particular port.
40711ae08745Sheppo  *
40721ae08745Sheppo  * Returns 0 on success, 1 on failure.
40731ae08745Sheppo  */
40741ae08745Sheppo static int
40751ae08745Sheppo vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id)
40761ae08745Sheppo {
40771ae08745Sheppo 	vsw_t 		*vswp = port->p_vswp;
40781ae08745Sheppo 	vsw_ldc_t 	*ldcp, *prev_ldcp;
40791ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
40801ae08745Sheppo 	int 		rv;
40811ae08745Sheppo 
40821ae08745Sheppo 	prev_ldcp = ldcl->head;
40831ae08745Sheppo 	for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) {
40841ae08745Sheppo 		if (ldcp->ldc_id == ldc_id) {
40851ae08745Sheppo 			break;
40861ae08745Sheppo 		}
40871ae08745Sheppo 	}
40881ae08745Sheppo 
40891ae08745Sheppo 	/* specified ldc id not found */
40901ae08745Sheppo 	if (ldcp == NULL) {
40911ae08745Sheppo 		DERR(vswp, "%s: ldcp = NULL", __func__);
40921ae08745Sheppo 		return (1);
40931ae08745Sheppo 	}
40941ae08745Sheppo 
40951ae08745Sheppo 	D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id);
40961ae08745Sheppo 
40971ae08745Sheppo 	/*
40981ae08745Sheppo 	 * Before we can close the channel we must release any mapped
40991ae08745Sheppo 	 * resources (e.g. drings).
41001ae08745Sheppo 	 */
41011ae08745Sheppo 	vsw_free_lane_resources(ldcp, INBOUND);
41021ae08745Sheppo 	vsw_free_lane_resources(ldcp, OUTBOUND);
41031ae08745Sheppo 
41041ae08745Sheppo 	/*
41051ae08745Sheppo 	 * If the close fails we are in serious trouble, as won't
41061ae08745Sheppo 	 * be able to delete the parent port.
41071ae08745Sheppo 	 */
41081ae08745Sheppo 	if ((rv = ldc_close(ldcp->ldc_handle)) != 0) {
41091ae08745Sheppo 		DERR(vswp, "%s: error %d closing channel %lld",
41101ae08745Sheppo 		    __func__, rv, ldcp->ldc_id);
41111ae08745Sheppo 		return (1);
41121ae08745Sheppo 	}
41131ae08745Sheppo 
41141ae08745Sheppo 	(void) ldc_fini(ldcp->ldc_handle);
41151ae08745Sheppo 
41161ae08745Sheppo 	ldcp->ldc_status = LDC_INIT;
41171ae08745Sheppo 	ldcp->ldc_handle = NULL;
41181ae08745Sheppo 	ldcp->ldc_vswp = NULL;
4119d10e4ef2Snarayan 
4120d10e4ef2Snarayan 	if (ldcp->rxh != NULL) {
4121d10e4ef2Snarayan 		if (vio_destroy_mblks(ldcp->rxh)) {
4122d10e4ef2Snarayan 			/*
4123d10e4ef2Snarayan 			 * Mostly likely some mblks are still in use and
4124d10e4ef2Snarayan 			 * have not been returned to the pool. Add the pool
4125d10e4ef2Snarayan 			 * to the list maintained in the device instance.
4126d10e4ef2Snarayan 			 * Another attempt will be made to destroy the pool
4127d10e4ef2Snarayan 			 * when the device detaches.
4128d10e4ef2Snarayan 			 */
4129d10e4ef2Snarayan 			ldcp->rxh->nextp =  vswp->rxh;
4130d10e4ef2Snarayan 			vswp->rxh = ldcp->rxh;
4131d10e4ef2Snarayan 		}
4132d10e4ef2Snarayan 	}
4133d10e4ef2Snarayan 
41343af08d82Slm66018 	/* unlink it from the list */
41353af08d82Slm66018 	prev_ldcp = ldcp->ldc_next;
41363af08d82Slm66018 	ldcl->num_ldcs--;
41373af08d82Slm66018 
41381ae08745Sheppo 	mutex_destroy(&ldcp->ldc_txlock);
41391ae08745Sheppo 	mutex_destroy(&ldcp->ldc_cblock);
41401ae08745Sheppo 	cv_destroy(&ldcp->drain_cv);
41411ae08745Sheppo 	mutex_destroy(&ldcp->drain_cv_lock);
41421ae08745Sheppo 	mutex_destroy(&ldcp->hss_lock);
4143d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_in.seq_lock);
4144d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_out.seq_lock);
41453af08d82Slm66018 	mutex_destroy(&ldcp->status_lock);
4146445b4c2eSsb155480 	rw_destroy(&ldcp->lane_in.dlistrw);
4147445b4c2eSsb155480 	rw_destroy(&ldcp->lane_out.dlistrw);
41481ae08745Sheppo 
41491ae08745Sheppo 	kmem_free(ldcp, sizeof (vsw_ldc_t));
41501ae08745Sheppo 
41511ae08745Sheppo 	return (0);
41521ae08745Sheppo }
41531ae08745Sheppo 
41541ae08745Sheppo /*
41551ae08745Sheppo  * Open and attempt to bring up the channel. Note that channel
41561ae08745Sheppo  * can only be brought up if peer has also opened channel.
41571ae08745Sheppo  *
41581ae08745Sheppo  * Returns 0 if can open and bring up channel, otherwise
41591ae08745Sheppo  * returns 1.
41601ae08745Sheppo  */
41611ae08745Sheppo static int
41621ae08745Sheppo vsw_ldc_init(vsw_ldc_t *ldcp)
41631ae08745Sheppo {
41641ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
41651ae08745Sheppo 	ldc_status_t	istatus = 0;
41661ae08745Sheppo 	int		rv;
41671ae08745Sheppo 
41681ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
41691ae08745Sheppo 
41701ae08745Sheppo 	LDC_ENTER_LOCK(ldcp);
41711ae08745Sheppo 
41721ae08745Sheppo 	/* don't start at 0 in case clients don't like that */
41731ae08745Sheppo 	ldcp->next_ident = 1;
41741ae08745Sheppo 
41751ae08745Sheppo 	rv = ldc_open(ldcp->ldc_handle);
41761ae08745Sheppo 	if (rv != 0) {
41771ae08745Sheppo 		DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)",
41781ae08745Sheppo 		    __func__, ldcp->ldc_id, rv);
41791ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
41801ae08745Sheppo 		return (1);
41811ae08745Sheppo 	}
41821ae08745Sheppo 
41831ae08745Sheppo 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
41841ae08745Sheppo 		DERR(vswp, "%s: unable to get status", __func__);
41851ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
41861ae08745Sheppo 		return (1);
41871ae08745Sheppo 
41881ae08745Sheppo 	} else if (istatus != LDC_OPEN && istatus != LDC_READY) {
41891ae08745Sheppo 		DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY",
41901ae08745Sheppo 		    __func__, ldcp->ldc_id, istatus);
41911ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
41921ae08745Sheppo 		return (1);
41931ae08745Sheppo 	}
41941ae08745Sheppo 
41953af08d82Slm66018 	mutex_enter(&ldcp->status_lock);
41961ae08745Sheppo 	ldcp->ldc_status = istatus;
41973af08d82Slm66018 	mutex_exit(&ldcp->status_lock);
41983af08d82Slm66018 
41991ae08745Sheppo 	rv = ldc_up(ldcp->ldc_handle);
42001ae08745Sheppo 	if (rv != 0) {
42011ae08745Sheppo 		/*
42021ae08745Sheppo 		 * Not a fatal error for ldc_up() to fail, as peer
42031ae08745Sheppo 		 * end point may simply not be ready yet.
42041ae08745Sheppo 		 */
42051ae08745Sheppo 		D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__,
42061ae08745Sheppo 		    ldcp->ldc_id, rv);
42071ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
42081ae08745Sheppo 		return (1);
42091ae08745Sheppo 	}
42101ae08745Sheppo 
42111ae08745Sheppo 	/*
42121ae08745Sheppo 	 * ldc_up() call is non-blocking so need to explicitly
42131ae08745Sheppo 	 * check channel status to see if in fact the channel
42141ae08745Sheppo 	 * is UP.
42151ae08745Sheppo 	 */
42163af08d82Slm66018 	mutex_enter(&ldcp->status_lock);
42173af08d82Slm66018 	if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) {
42181ae08745Sheppo 		DERR(vswp, "%s: unable to get status", __func__);
42193af08d82Slm66018 		mutex_exit(&ldcp->status_lock);
42201ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
42211ae08745Sheppo 		return (1);
42221ae08745Sheppo 
42231ae08745Sheppo 	}
4224b071742bSsg70180 
4225b071742bSsg70180 	if (ldcp->ldc_status == LDC_UP) {
4226b071742bSsg70180 		D2(vswp, "%s: channel %ld now UP (%ld)", __func__,
4227b071742bSsg70180 		    ldcp->ldc_id, istatus);
42283af08d82Slm66018 		mutex_exit(&ldcp->status_lock);
42291ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
42301ae08745Sheppo 
4231b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_UP);
4232b071742bSsg70180 		return (0);
42333af08d82Slm66018 	}
42343af08d82Slm66018 
4235b071742bSsg70180 	mutex_exit(&ldcp->status_lock);
4236b071742bSsg70180 	LDC_EXIT_LOCK(ldcp);
4237b071742bSsg70180 
42381ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
42391ae08745Sheppo 	return (0);
42401ae08745Sheppo }
42411ae08745Sheppo 
42421ae08745Sheppo /* disable callbacks on the channel */
42431ae08745Sheppo static int
42441ae08745Sheppo vsw_ldc_uninit(vsw_ldc_t *ldcp)
42451ae08745Sheppo {
42461ae08745Sheppo 	vsw_t	*vswp = ldcp->ldc_vswp;
42471ae08745Sheppo 	int	rv;
42481ae08745Sheppo 
42491ae08745Sheppo 	D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id);
42501ae08745Sheppo 
42511ae08745Sheppo 	LDC_ENTER_LOCK(ldcp);
42521ae08745Sheppo 
42531ae08745Sheppo 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
42541ae08745Sheppo 	if (rv != 0) {
42551ae08745Sheppo 		DERR(vswp, "vsw_ldc_uninit(%lld): error disabling "
42561ae08745Sheppo 		    "interrupts (rv = %d)\n", ldcp->ldc_id, rv);
42571ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
42581ae08745Sheppo 		return (1);
42591ae08745Sheppo 	}
42601ae08745Sheppo 
42613af08d82Slm66018 	mutex_enter(&ldcp->status_lock);
42621ae08745Sheppo 	ldcp->ldc_status = LDC_INIT;
42633af08d82Slm66018 	mutex_exit(&ldcp->status_lock);
42641ae08745Sheppo 
42651ae08745Sheppo 	LDC_EXIT_LOCK(ldcp);
42661ae08745Sheppo 
42671ae08745Sheppo 	D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id);
42681ae08745Sheppo 
42691ae08745Sheppo 	return (0);
42701ae08745Sheppo }
42711ae08745Sheppo 
42721ae08745Sheppo static int
42731ae08745Sheppo vsw_init_ldcs(vsw_port_t *port)
42741ae08745Sheppo {
42751ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
42761ae08745Sheppo 	vsw_ldc_t	*ldcp;
42771ae08745Sheppo 
42781ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
42791ae08745Sheppo 	ldcp =  ldcl->head;
42801ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
42811ae08745Sheppo 		(void) vsw_ldc_init(ldcp);
42821ae08745Sheppo 	}
42831ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
42841ae08745Sheppo 
42851ae08745Sheppo 	return (0);
42861ae08745Sheppo }
42871ae08745Sheppo 
42881ae08745Sheppo static int
42891ae08745Sheppo vsw_uninit_ldcs(vsw_port_t *port)
42901ae08745Sheppo {
42911ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
42921ae08745Sheppo 	vsw_ldc_t	*ldcp;
42931ae08745Sheppo 
42941ae08745Sheppo 	D1(NULL, "vsw_uninit_ldcs: enter\n");
42951ae08745Sheppo 
42961ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
42971ae08745Sheppo 	ldcp =  ldcl->head;
42981ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
42991ae08745Sheppo 		(void) vsw_ldc_uninit(ldcp);
43001ae08745Sheppo 	}
43011ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
43021ae08745Sheppo 
43031ae08745Sheppo 	D1(NULL, "vsw_uninit_ldcs: exit\n");
43041ae08745Sheppo 
43051ae08745Sheppo 	return (0);
43061ae08745Sheppo }
43071ae08745Sheppo 
43081ae08745Sheppo /*
43091ae08745Sheppo  * Wait until the callback(s) associated with the ldcs under the specified
43101ae08745Sheppo  * port have completed.
43111ae08745Sheppo  *
43121ae08745Sheppo  * Prior to this function being invoked each channel under this port
43131ae08745Sheppo  * should have been quiesced via ldc_set_cb_mode(DISABLE).
43141ae08745Sheppo  *
43151ae08745Sheppo  * A short explaination of what we are doing below..
43161ae08745Sheppo  *
43171ae08745Sheppo  * The simplest approach would be to have a reference counter in
43181ae08745Sheppo  * the ldc structure which is increment/decremented by the callbacks as
43191ae08745Sheppo  * they use the channel. The drain function could then simply disable any
43201ae08745Sheppo  * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately
43211ae08745Sheppo  * there is a tiny window here - before the callback is able to get the lock
43221ae08745Sheppo  * on the channel it is interrupted and this function gets to execute. It
43231ae08745Sheppo  * sees that the ref count is zero and believes its free to delete the
43241ae08745Sheppo  * associated data structures.
43251ae08745Sheppo  *
43261ae08745Sheppo  * We get around this by taking advantage of the fact that before the ldc
43271ae08745Sheppo  * framework invokes a callback it sets a flag to indicate that there is a
43281ae08745Sheppo  * callback active (or about to become active). If when we attempt to
43291ae08745Sheppo  * unregister a callback when this active flag is set then the unregister
43301ae08745Sheppo  * will fail with EWOULDBLOCK.
43311ae08745Sheppo  *
43321ae08745Sheppo  * If the unregister fails we do a cv_timedwait. We will either be signaled
43331ae08745Sheppo  * by the callback as it is exiting (note we have to wait a short period to
43341ae08745Sheppo  * allow the callback to return fully to the ldc framework and it to clear
43351ae08745Sheppo  * the active flag), or by the timer expiring. In either case we again attempt
43361ae08745Sheppo  * the unregister. We repeat this until we can succesfully unregister the
43371ae08745Sheppo  * callback.
43381ae08745Sheppo  *
43391ae08745Sheppo  * The reason we use a cv_timedwait rather than a simple cv_wait is to catch
43401ae08745Sheppo  * the case where the callback has finished but the ldc framework has not yet
43411ae08745Sheppo  * cleared the active flag. In this case we would never get a cv_signal.
43421ae08745Sheppo  */
43431ae08745Sheppo static int
43441ae08745Sheppo vsw_drain_ldcs(vsw_port_t *port)
43451ae08745Sheppo {
43461ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
43471ae08745Sheppo 	vsw_ldc_t	*ldcp;
43481ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
43491ae08745Sheppo 
43501ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
43511ae08745Sheppo 
43521ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
43531ae08745Sheppo 
43541ae08745Sheppo 	ldcp = ldcl->head;
43551ae08745Sheppo 
43561ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
43571ae08745Sheppo 		/*
43581ae08745Sheppo 		 * If we can unregister the channel callback then we
43591ae08745Sheppo 		 * know that there is no callback either running or
43601ae08745Sheppo 		 * scheduled to run for this channel so move on to next
43611ae08745Sheppo 		 * channel in the list.
43621ae08745Sheppo 		 */
43631ae08745Sheppo 		mutex_enter(&ldcp->drain_cv_lock);
43641ae08745Sheppo 
43651ae08745Sheppo 		/* prompt active callbacks to quit */
43661ae08745Sheppo 		ldcp->drain_state = VSW_LDC_DRAINING;
43671ae08745Sheppo 
43681ae08745Sheppo 		if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) {
43691ae08745Sheppo 			D2(vswp, "%s: unreg callback for chan %ld", __func__,
43701ae08745Sheppo 			    ldcp->ldc_id);
43711ae08745Sheppo 			mutex_exit(&ldcp->drain_cv_lock);
43721ae08745Sheppo 			continue;
43731ae08745Sheppo 		} else {
43741ae08745Sheppo 			/*
43751ae08745Sheppo 			 * If we end up here we know that either 1) a callback
43761ae08745Sheppo 			 * is currently executing, 2) is about to start (i.e.
43771ae08745Sheppo 			 * the ldc framework has set the active flag but
43781ae08745Sheppo 			 * has not actually invoked the callback yet, or 3)
43791ae08745Sheppo 			 * has finished and has returned to the ldc framework
43801ae08745Sheppo 			 * but the ldc framework has not yet cleared the
43811ae08745Sheppo 			 * active bit.
43821ae08745Sheppo 			 *
43831ae08745Sheppo 			 * Wait for it to finish.
43841ae08745Sheppo 			 */
43851ae08745Sheppo 			while (ldc_unreg_callback(ldcp->ldc_handle)
43861ae08745Sheppo 			    == EWOULDBLOCK)
43871ae08745Sheppo 				(void) cv_timedwait(&ldcp->drain_cv,
43881ae08745Sheppo 				    &ldcp->drain_cv_lock, lbolt + hz);
43891ae08745Sheppo 
43901ae08745Sheppo 			mutex_exit(&ldcp->drain_cv_lock);
43911ae08745Sheppo 			D2(vswp, "%s: unreg callback for chan %ld after "
43921ae08745Sheppo 			    "timeout", __func__, ldcp->ldc_id);
43931ae08745Sheppo 		}
43941ae08745Sheppo 	}
43951ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
43961ae08745Sheppo 
43971ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
43981ae08745Sheppo 	return (0);
43991ae08745Sheppo }
44001ae08745Sheppo 
44011ae08745Sheppo /*
44021ae08745Sheppo  * Wait until all tasks which reference this port have completed.
44031ae08745Sheppo  *
44041ae08745Sheppo  * Prior to this function being invoked each channel under this port
44051ae08745Sheppo  * should have been quiesced via ldc_set_cb_mode(DISABLE).
44061ae08745Sheppo  */
44071ae08745Sheppo static int
44081ae08745Sheppo vsw_drain_port_taskq(vsw_port_t *port)
44091ae08745Sheppo {
44101ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
44111ae08745Sheppo 
44121ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
44131ae08745Sheppo 
44141ae08745Sheppo 	/*
44151ae08745Sheppo 	 * Mark the port as in the process of being detached, and
44161ae08745Sheppo 	 * dispatch a marker task to the queue so we know when all
44171ae08745Sheppo 	 * relevant tasks have completed.
44181ae08745Sheppo 	 */
44191ae08745Sheppo 	mutex_enter(&port->state_lock);
44201ae08745Sheppo 	port->state = VSW_PORT_DETACHING;
44211ae08745Sheppo 
44221ae08745Sheppo 	if ((vswp->taskq_p == NULL) ||
44231ae08745Sheppo 	    (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task,
44241ae08745Sheppo 	    port, DDI_NOSLEEP) != DDI_SUCCESS)) {
44251ae08745Sheppo 		DERR(vswp, "%s: unable to dispatch marker task",
44261ae08745Sheppo 		    __func__);
44271ae08745Sheppo 		mutex_exit(&port->state_lock);
44281ae08745Sheppo 		return (1);
44291ae08745Sheppo 	}
44301ae08745Sheppo 
44311ae08745Sheppo 	/*
44321ae08745Sheppo 	 * Wait for the marker task to finish.
44331ae08745Sheppo 	 */
44341ae08745Sheppo 	while (port->state != VSW_PORT_DETACHABLE)
44351ae08745Sheppo 		cv_wait(&port->state_cv, &port->state_lock);
44361ae08745Sheppo 
44371ae08745Sheppo 	mutex_exit(&port->state_lock);
44381ae08745Sheppo 
44391ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
44401ae08745Sheppo 
44411ae08745Sheppo 	return (0);
44421ae08745Sheppo }
44431ae08745Sheppo 
44441ae08745Sheppo static void
44451ae08745Sheppo vsw_marker_task(void *arg)
44461ae08745Sheppo {
44471ae08745Sheppo 	vsw_port_t	*port = arg;
44481ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
44491ae08745Sheppo 
44501ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
44511ae08745Sheppo 
44521ae08745Sheppo 	mutex_enter(&port->state_lock);
44531ae08745Sheppo 
44541ae08745Sheppo 	/*
44551ae08745Sheppo 	 * No further tasks should be dispatched which reference
44561ae08745Sheppo 	 * this port so ok to mark it as safe to detach.
44571ae08745Sheppo 	 */
44581ae08745Sheppo 	port->state = VSW_PORT_DETACHABLE;
44591ae08745Sheppo 
44601ae08745Sheppo 	cv_signal(&port->state_cv);
44611ae08745Sheppo 
44621ae08745Sheppo 	mutex_exit(&port->state_lock);
44631ae08745Sheppo 
44641ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
44651ae08745Sheppo }
44661ae08745Sheppo 
44671ae08745Sheppo static vsw_port_t *
44681ae08745Sheppo vsw_lookup_port(vsw_t *vswp, int p_instance)
44691ae08745Sheppo {
44701ae08745Sheppo 	vsw_port_list_t *plist = &vswp->plist;
44711ae08745Sheppo 	vsw_port_t	*port;
44721ae08745Sheppo 
44731ae08745Sheppo 	for (port = plist->head; port != NULL; port = port->p_next) {
44741ae08745Sheppo 		if (port->p_instance == p_instance) {
44751ae08745Sheppo 			D2(vswp, "vsw_lookup_port: found p_instance\n");
44761ae08745Sheppo 			return (port);
44771ae08745Sheppo 		}
44781ae08745Sheppo 	}
44791ae08745Sheppo 
44801ae08745Sheppo 	return (NULL);
44811ae08745Sheppo }
44821ae08745Sheppo 
44831ae08745Sheppo /*
44841ae08745Sheppo  * Search for and remove the specified port from the port
44851ae08745Sheppo  * list. Returns 0 if able to locate and remove port, otherwise
44861ae08745Sheppo  * returns 1.
44871ae08745Sheppo  */
44881ae08745Sheppo static int
44891ae08745Sheppo vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port)
44901ae08745Sheppo {
44911ae08745Sheppo 	vsw_port_list_t *plist = &vswp->plist;
44921ae08745Sheppo 	vsw_port_t	*curr_p, *prev_p;
44931ae08745Sheppo 
44941ae08745Sheppo 	if (plist->head == NULL)
44951ae08745Sheppo 		return (1);
44961ae08745Sheppo 
44971ae08745Sheppo 	curr_p = prev_p = plist->head;
44981ae08745Sheppo 
44991ae08745Sheppo 	while (curr_p != NULL) {
45001ae08745Sheppo 		if (curr_p == port) {
45011ae08745Sheppo 			if (prev_p == curr_p) {
45021ae08745Sheppo 				plist->head = curr_p->p_next;
45031ae08745Sheppo 			} else {
45041ae08745Sheppo 				prev_p->p_next = curr_p->p_next;
45051ae08745Sheppo 			}
45061ae08745Sheppo 			plist->num_ports--;
45071ae08745Sheppo 			break;
45081ae08745Sheppo 		} else {
45091ae08745Sheppo 			prev_p = curr_p;
45101ae08745Sheppo 			curr_p = curr_p->p_next;
45111ae08745Sheppo 		}
45121ae08745Sheppo 	}
45131ae08745Sheppo 	return (0);
45141ae08745Sheppo }
45151ae08745Sheppo 
45161ae08745Sheppo /*
45171ae08745Sheppo  * Interrupt handler for ldc messages.
45181ae08745Sheppo  */
45191ae08745Sheppo static uint_t
45201ae08745Sheppo vsw_ldc_cb(uint64_t event, caddr_t arg)
45211ae08745Sheppo {
45221ae08745Sheppo 	vsw_ldc_t	*ldcp = (vsw_ldc_t  *)arg;
45231ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
45241ae08745Sheppo 
45251ae08745Sheppo 	D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
45261ae08745Sheppo 
45271ae08745Sheppo 	mutex_enter(&ldcp->ldc_cblock);
45281ae08745Sheppo 
4529b071742bSsg70180 	mutex_enter(&ldcp->status_lock);
45301ae08745Sheppo 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
4531b071742bSsg70180 		mutex_exit(&ldcp->status_lock);
45321ae08745Sheppo 		mutex_exit(&ldcp->ldc_cblock);
45331ae08745Sheppo 		return (LDC_SUCCESS);
45341ae08745Sheppo 	}
45353af08d82Slm66018 	mutex_exit(&ldcp->status_lock);
45363af08d82Slm66018 
45371ae08745Sheppo 	if (event & LDC_EVT_UP) {
45381ae08745Sheppo 		/*
4539b071742bSsg70180 		 * Channel has come up.
45401ae08745Sheppo 		 */
45411ae08745Sheppo 		D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)",
4542b071742bSsg70180 		    __func__, ldcp->ldc_id, event, ldcp->ldc_status);
4543b071742bSsg70180 
4544b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_UP);
45451ae08745Sheppo 
45461ae08745Sheppo 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
45471ae08745Sheppo 	}
45481ae08745Sheppo 
45491ae08745Sheppo 	if (event & LDC_EVT_READ) {
45501ae08745Sheppo 		/*
45511ae08745Sheppo 		 * Data available for reading.
45521ae08745Sheppo 		 */
45531ae08745Sheppo 		D2(vswp, "%s: id(ld) event(%llx) data READ",
45541ae08745Sheppo 		    __func__, ldcp->ldc_id, event);
45551ae08745Sheppo 
45561ae08745Sheppo 		vsw_process_pkt(ldcp);
45571ae08745Sheppo 
45581ae08745Sheppo 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
45591ae08745Sheppo 
45601ae08745Sheppo 		goto vsw_cb_exit;
45611ae08745Sheppo 	}
45621ae08745Sheppo 
45633af08d82Slm66018 	if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) {
4564b071742bSsg70180 		D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)",
4565b071742bSsg70180 		    __func__, ldcp->ldc_id, event, ldcp->ldc_status);
45663af08d82Slm66018 
4567b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
45681ae08745Sheppo 	}
45691ae08745Sheppo 
45701ae08745Sheppo 	/*
45711ae08745Sheppo 	 * Catch either LDC_EVT_WRITE which we don't support or any
45721ae08745Sheppo 	 * unknown event.
45731ae08745Sheppo 	 */
4574205eeb1aSlm66018 	if (event &
4575205eeb1aSlm66018 	    ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) {
45761ae08745Sheppo 		DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)",
45771ae08745Sheppo 		    __func__, ldcp->ldc_id, event, ldcp->ldc_status);
45781ae08745Sheppo 	}
45791ae08745Sheppo 
45801ae08745Sheppo vsw_cb_exit:
45811ae08745Sheppo 	mutex_exit(&ldcp->ldc_cblock);
45821ae08745Sheppo 
45831ae08745Sheppo 	/*
45841ae08745Sheppo 	 * Let the drain function know we are finishing if it
45851ae08745Sheppo 	 * is waiting.
45861ae08745Sheppo 	 */
45871ae08745Sheppo 	mutex_enter(&ldcp->drain_cv_lock);
45881ae08745Sheppo 	if (ldcp->drain_state == VSW_LDC_DRAINING)
45891ae08745Sheppo 		cv_signal(&ldcp->drain_cv);
45901ae08745Sheppo 	mutex_exit(&ldcp->drain_cv_lock);
45911ae08745Sheppo 
45921ae08745Sheppo 	return (LDC_SUCCESS);
45931ae08745Sheppo }
45941ae08745Sheppo 
45951ae08745Sheppo /*
4596b071742bSsg70180  * Reinitialise data structures associated with the channel.
45971ae08745Sheppo  */
45981ae08745Sheppo static void
4599b071742bSsg70180 vsw_ldc_reinit(vsw_ldc_t *ldcp)
46001ae08745Sheppo {
46011ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
46021ae08745Sheppo 	vsw_port_t	*port;
46031ae08745Sheppo 	vsw_ldc_list_t	*ldcl;
46041ae08745Sheppo 
46053af08d82Slm66018 	D1(vswp, "%s: enter", __func__);
46061ae08745Sheppo 
46071ae08745Sheppo 	port = ldcp->ldc_port;
46081ae08745Sheppo 	ldcl = &port->p_ldclist;
46091ae08745Sheppo 
46103af08d82Slm66018 	READ_ENTER(&ldcl->lockrw);
46111ae08745Sheppo 
46121ae08745Sheppo 	D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__,
46131ae08745Sheppo 	    ldcp->lane_in.lstate, ldcp->lane_out.lstate);
46141ae08745Sheppo 
46151ae08745Sheppo 	vsw_free_lane_resources(ldcp, INBOUND);
46161ae08745Sheppo 	vsw_free_lane_resources(ldcp, OUTBOUND);
46171ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
46181ae08745Sheppo 
46191ae08745Sheppo 	ldcp->lane_in.lstate = 0;
46201ae08745Sheppo 	ldcp->lane_out.lstate = 0;
46211ae08745Sheppo 
46221ae08745Sheppo 	/*
46231ae08745Sheppo 	 * Remove parent port from any multicast groups
46241ae08745Sheppo 	 * it may have registered with. Client must resend
46251ae08745Sheppo 	 * multicast add command after handshake completes.
46261ae08745Sheppo 	 */
46271ae08745Sheppo 	(void) vsw_del_fdb(vswp, port);
46281ae08745Sheppo 
46291ae08745Sheppo 	vsw_del_mcst_port(port);
46301ae08745Sheppo 
46311ae08745Sheppo 	ldcp->peer_session = 0;
46321ae08745Sheppo 	ldcp->session_status = 0;
46333af08d82Slm66018 	ldcp->hcnt = 0;
46343af08d82Slm66018 	ldcp->hphase = VSW_MILESTONE0;
46353af08d82Slm66018 
46363af08d82Slm66018 	D1(vswp, "%s: exit", __func__);
46373af08d82Slm66018 }
46383af08d82Slm66018 
46393af08d82Slm66018 /*
4640b071742bSsg70180  * Process a connection event.
4641b071742bSsg70180  *
4642b071742bSsg70180  * Note - care must be taken to ensure that this function is
4643b071742bSsg70180  * not called with the dlistrw lock held.
46443af08d82Slm66018  */
46453af08d82Slm66018 static void
4646b071742bSsg70180 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt)
46473af08d82Slm66018 {
46483af08d82Slm66018 	vsw_t		*vswp = ldcp->ldc_vswp;
4649b071742bSsg70180 	vsw_conn_evt_t	*conn = NULL;
46503af08d82Slm66018 
4651b071742bSsg70180 	D1(vswp, "%s: enter", __func__);
46521ae08745Sheppo 
46531ae08745Sheppo 	/*
4654b071742bSsg70180 	 * Check if either a reset or restart event is pending
4655b071742bSsg70180 	 * or in progress. If so just return.
4656b071742bSsg70180 	 *
4657b071742bSsg70180 	 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT
4658b071742bSsg70180 	 * being received by the callback handler, or a ECONNRESET error
4659b071742bSsg70180 	 * code being returned from a ldc_read() or ldc_write() call.
4660b071742bSsg70180 	 *
4661b071742bSsg70180 	 * A VSW_CONN_RESTART event occurs when some error checking code
4662b071742bSsg70180 	 * decides that there is a problem with data from the channel,
4663b071742bSsg70180 	 * and that the handshake should be restarted.
4664b071742bSsg70180 	 */
4665b071742bSsg70180 	if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) &&
4666b071742bSsg70180 	    (ldstub((uint8_t *)&ldcp->reset_active)))
4667b071742bSsg70180 		return;
4668b071742bSsg70180 
4669b071742bSsg70180 	/*
4670b071742bSsg70180 	 * If it is an LDC_UP event we first check the recorded
4671b071742bSsg70180 	 * state of the channel. If this is UP then we know that
4672b071742bSsg70180 	 * the channel moving to the UP state has already been dealt
4673b071742bSsg70180 	 * with and don't need to dispatch a  new task.
4674b071742bSsg70180 	 *
4675b071742bSsg70180 	 * The reason for this check is that when we do a ldc_up(),
4676b071742bSsg70180 	 * depending on the state of the peer, we may or may not get
4677b071742bSsg70180 	 * a LDC_UP event. As we can't depend on getting a LDC_UP evt
4678b071742bSsg70180 	 * every time we do ldc_up() we explicitly check the channel
4679b071742bSsg70180 	 * status to see has it come up (ldc_up() is asynch and will
4680b071742bSsg70180 	 * complete at some undefined time), and take the appropriate
4681b071742bSsg70180 	 * action.
4682b071742bSsg70180 	 *
4683b071742bSsg70180 	 * The flip side of this is that we may get a LDC_UP event
4684b071742bSsg70180 	 * when we have already seen that the channel is up and have
4685b071742bSsg70180 	 * dealt with that.
4686b071742bSsg70180 	 */
4687b071742bSsg70180 	mutex_enter(&ldcp->status_lock);
4688b071742bSsg70180 	if (evt == VSW_CONN_UP) {
4689205eeb1aSlm66018 		if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) {
4690b071742bSsg70180 			mutex_exit(&ldcp->status_lock);
4691b071742bSsg70180 			return;
4692b071742bSsg70180 		}
4693b071742bSsg70180 	}
4694b071742bSsg70180 	mutex_exit(&ldcp->status_lock);
4695b071742bSsg70180 
4696b071742bSsg70180 	/*
4697b071742bSsg70180 	 * The transaction group id allows us to identify and discard
4698b071742bSsg70180 	 * any tasks which are still pending on the taskq and refer
4699b071742bSsg70180 	 * to the handshake session we are about to restart or reset.
4700b071742bSsg70180 	 * These stale messages no longer have any real meaning.
47011ae08745Sheppo 	 */
47021ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
47031ae08745Sheppo 	ldcp->hss_id++;
47041ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
47051ae08745Sheppo 
4706b071742bSsg70180 	ASSERT(vswp->taskq_p != NULL);
4707b071742bSsg70180 
4708b071742bSsg70180 	if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) {
4709b071742bSsg70180 		cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for"
4710b071742bSsg70180 		    " connection event", vswp->instance);
4711b071742bSsg70180 		goto err_exit;
4712b071742bSsg70180 	}
4713b071742bSsg70180 
4714b071742bSsg70180 	conn->evt = evt;
4715b071742bSsg70180 	conn->ldcp = ldcp;
4716b071742bSsg70180 
4717b071742bSsg70180 	if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn,
4718b071742bSsg70180 	    DDI_NOSLEEP) != DDI_SUCCESS) {
4719b071742bSsg70180 		cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task",
4720b071742bSsg70180 		    vswp->instance);
4721b071742bSsg70180 
4722b071742bSsg70180 		kmem_free(conn, sizeof (vsw_conn_evt_t));
4723b071742bSsg70180 		goto err_exit;
4724b071742bSsg70180 	}
4725b071742bSsg70180 
4726b071742bSsg70180 	D1(vswp, "%s: exit", __func__);
4727b071742bSsg70180 	return;
4728b071742bSsg70180 
4729b071742bSsg70180 err_exit:
4730b071742bSsg70180 	/*
4731b071742bSsg70180 	 * Have mostly likely failed due to memory shortage. Clear the flag so
4732b071742bSsg70180 	 * that future requests will at least be attempted and will hopefully
4733b071742bSsg70180 	 * succeed.
4734b071742bSsg70180 	 */
4735b071742bSsg70180 	if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART))
4736b071742bSsg70180 		ldcp->reset_active = 0;
4737b071742bSsg70180 }
4738b071742bSsg70180 
4739b071742bSsg70180 /*
4740b071742bSsg70180  * Deal with events relating to a connection. Invoked from a taskq.
4741b071742bSsg70180  */
4742b071742bSsg70180 static void
4743b071742bSsg70180 vsw_conn_task(void *arg)
4744b071742bSsg70180 {
4745b071742bSsg70180 	vsw_conn_evt_t	*conn = (vsw_conn_evt_t *)arg;
4746b071742bSsg70180 	vsw_ldc_t	*ldcp = NULL;
4747b071742bSsg70180 	vsw_t		*vswp = NULL;
4748b071742bSsg70180 	uint16_t	evt;
4749b071742bSsg70180 	ldc_status_t	curr_status;
4750b071742bSsg70180 
4751b071742bSsg70180 	ldcp = conn->ldcp;
4752b071742bSsg70180 	evt = conn->evt;
4753b071742bSsg70180 	vswp = ldcp->ldc_vswp;
4754b071742bSsg70180 
4755b071742bSsg70180 	D1(vswp, "%s: enter", __func__);
4756b071742bSsg70180 
4757b071742bSsg70180 	/* can safely free now have copied out data */
4758b071742bSsg70180 	kmem_free(conn, sizeof (vsw_conn_evt_t));
4759b071742bSsg70180 
4760b071742bSsg70180 	mutex_enter(&ldcp->status_lock);
4761b071742bSsg70180 	if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) {
4762b071742bSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read status of "
4763b071742bSsg70180 		    "channel %ld", vswp->instance, ldcp->ldc_id);
4764b071742bSsg70180 		mutex_exit(&ldcp->status_lock);
4765b071742bSsg70180 		return;
4766b071742bSsg70180 	}
4767b071742bSsg70180 
4768b071742bSsg70180 	/*
4769b071742bSsg70180 	 * If we wish to restart the handshake on this channel, then if
4770b071742bSsg70180 	 * the channel is UP we bring it DOWN to flush the underlying
4771b071742bSsg70180 	 * ldc queue.
4772b071742bSsg70180 	 */
4773b071742bSsg70180 	if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP))
4774b071742bSsg70180 		(void) ldc_down(ldcp->ldc_handle);
4775b071742bSsg70180 
4776b071742bSsg70180 	/*
4777b071742bSsg70180 	 * re-init all the associated data structures.
4778b071742bSsg70180 	 */
4779b071742bSsg70180 	vsw_ldc_reinit(ldcp);
4780b071742bSsg70180 
4781b071742bSsg70180 	/*
4782b071742bSsg70180 	 * Bring the channel back up (note it does no harm to
4783b071742bSsg70180 	 * do this even if the channel is already UP, Just
4784b071742bSsg70180 	 * becomes effectively a no-op).
4785b071742bSsg70180 	 */
4786b071742bSsg70180 	(void) ldc_up(ldcp->ldc_handle);
4787b071742bSsg70180 
4788b071742bSsg70180 	/*
4789b071742bSsg70180 	 * Check if channel is now UP. This will only happen if
4790b071742bSsg70180 	 * peer has also done a ldc_up().
4791b071742bSsg70180 	 */
4792b071742bSsg70180 	if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) {
4793b071742bSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read status of "
4794b071742bSsg70180 		    "channel %ld", vswp->instance, ldcp->ldc_id);
4795b071742bSsg70180 		mutex_exit(&ldcp->status_lock);
4796b071742bSsg70180 		return;
4797b071742bSsg70180 	}
4798b071742bSsg70180 
4799b071742bSsg70180 	ldcp->ldc_status = curr_status;
4800b071742bSsg70180 
4801b071742bSsg70180 	/* channel UP so restart handshake by sending version info */
4802b071742bSsg70180 	if (curr_status == LDC_UP) {
48031ae08745Sheppo 		if (ldcp->hcnt++ > vsw_num_handshakes) {
480434683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted"
48051ae08745Sheppo 			    " handshake attempts (%d) on channel %ld",
480634683adeSsg70180 			    vswp->instance, ldcp->hcnt, ldcp->ldc_id);
48073af08d82Slm66018 			mutex_exit(&ldcp->status_lock);
48083af08d82Slm66018 			return;
48093af08d82Slm66018 		}
4810b071742bSsg70180 
4811b071742bSsg70180 		if (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp,
4812b071742bSsg70180 		    DDI_NOSLEEP) != DDI_SUCCESS) {
4813b071742bSsg70180 			cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task",
4814b071742bSsg70180 			    vswp->instance);
48153af08d82Slm66018 
48163af08d82Slm66018 			/*
4817b071742bSsg70180 			 * Don't count as valid restart attempt if couldn't
4818b071742bSsg70180 			 * send version msg.
48193af08d82Slm66018 			 */
4820b071742bSsg70180 			if (ldcp->hcnt > 0)
4821b071742bSsg70180 				ldcp->hcnt--;
4822b071742bSsg70180 		}
48233af08d82Slm66018 	}
48243af08d82Slm66018 
48253af08d82Slm66018 	/*
4826b071742bSsg70180 	 * Mark that the process is complete by clearing the flag.
4827b071742bSsg70180 	 *
4828b071742bSsg70180 	 * Note is it possible that the taskq dispatch above may have failed,
4829b071742bSsg70180 	 * most likely due to memory shortage. We still clear the flag so
4830b071742bSsg70180 	 * future attempts will at least be attempted and will hopefully
4831b071742bSsg70180 	 * succeed.
48323af08d82Slm66018 	 */
4833b071742bSsg70180 	if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART))
4834445b4c2eSsb155480 		ldcp->reset_active = 0;
4835b071742bSsg70180 
4836b071742bSsg70180 	mutex_exit(&ldcp->status_lock);
48373af08d82Slm66018 
48383af08d82Slm66018 	D1(vswp, "%s: exit", __func__);
48393af08d82Slm66018 }
48403af08d82Slm66018 
48413af08d82Slm66018 /*
48421ae08745Sheppo  * returns 0 if legal for event signified by flag to have
48431ae08745Sheppo  * occured at the time it did. Otherwise returns 1.
48441ae08745Sheppo  */
48451ae08745Sheppo int
48461ae08745Sheppo vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag)
48471ae08745Sheppo {
48481ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
48491ae08745Sheppo 	uint64_t	state;
48501ae08745Sheppo 	uint64_t	phase;
48511ae08745Sheppo 
48521ae08745Sheppo 	if (dir == INBOUND)
48531ae08745Sheppo 		state = ldcp->lane_in.lstate;
48541ae08745Sheppo 	else
48551ae08745Sheppo 		state = ldcp->lane_out.lstate;
48561ae08745Sheppo 
48571ae08745Sheppo 	phase = ldcp->hphase;
48581ae08745Sheppo 
48591ae08745Sheppo 	switch (flag) {
48601ae08745Sheppo 	case VSW_VER_INFO_RECV:
48611ae08745Sheppo 		if (phase > VSW_MILESTONE0) {
48621ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV"
48631ae08745Sheppo 			    " when in state %d\n", ldcp->ldc_id, phase);
4864b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
48651ae08745Sheppo 			return (1);
48661ae08745Sheppo 		}
48671ae08745Sheppo 		break;
48681ae08745Sheppo 
48691ae08745Sheppo 	case VSW_VER_ACK_RECV:
48701ae08745Sheppo 	case VSW_VER_NACK_RECV:
48711ae08745Sheppo 		if (!(state & VSW_VER_INFO_SENT)) {
4872205eeb1aSlm66018 			DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or "
4873205eeb1aSlm66018 			    "VER_NACK when in state %d\n", ldcp->ldc_id, phase);
4874b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
48751ae08745Sheppo 			return (1);
48761ae08745Sheppo 		} else
48771ae08745Sheppo 			state &= ~VSW_VER_INFO_SENT;
48781ae08745Sheppo 		break;
48791ae08745Sheppo 
48801ae08745Sheppo 	case VSW_ATTR_INFO_RECV:
48811ae08745Sheppo 		if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) {
48821ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV"
48831ae08745Sheppo 			    " when in state %d\n", ldcp->ldc_id, phase);
4884b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
48851ae08745Sheppo 			return (1);
48861ae08745Sheppo 		}
48871ae08745Sheppo 		break;
48881ae08745Sheppo 
48891ae08745Sheppo 	case VSW_ATTR_ACK_RECV:
48901ae08745Sheppo 	case VSW_ATTR_NACK_RECV:
48911ae08745Sheppo 		if (!(state & VSW_ATTR_INFO_SENT)) {
48921ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK"
48931ae08745Sheppo 			    " or ATTR_NACK when in state %d\n",
48941ae08745Sheppo 			    ldcp->ldc_id, phase);
4895b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
48961ae08745Sheppo 			return (1);
48971ae08745Sheppo 		} else
48981ae08745Sheppo 			state &= ~VSW_ATTR_INFO_SENT;
48991ae08745Sheppo 		break;
49001ae08745Sheppo 
49011ae08745Sheppo 	case VSW_DRING_INFO_RECV:
49021ae08745Sheppo 		if (phase < VSW_MILESTONE1) {
49031ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV"
49041ae08745Sheppo 			    " when in state %d\n", ldcp->ldc_id, phase);
4905b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
49061ae08745Sheppo 			return (1);
49071ae08745Sheppo 		}
49081ae08745Sheppo 		break;
49091ae08745Sheppo 
49101ae08745Sheppo 	case VSW_DRING_ACK_RECV:
49111ae08745Sheppo 	case VSW_DRING_NACK_RECV:
49121ae08745Sheppo 		if (!(state & VSW_DRING_INFO_SENT)) {
49131ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK "
49141ae08745Sheppo 			    " or DRING_NACK when in state %d\n",
49151ae08745Sheppo 			    ldcp->ldc_id, phase);
4916b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
49171ae08745Sheppo 			return (1);
49181ae08745Sheppo 		} else
49191ae08745Sheppo 			state &= ~VSW_DRING_INFO_SENT;
49201ae08745Sheppo 		break;
49211ae08745Sheppo 
49221ae08745Sheppo 	case VSW_RDX_INFO_RECV:
49231ae08745Sheppo 		if (phase < VSW_MILESTONE3) {
49241ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV"
49251ae08745Sheppo 			    " when in state %d\n", ldcp->ldc_id, phase);
4926b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
49271ae08745Sheppo 			return (1);
49281ae08745Sheppo 		}
49291ae08745Sheppo 		break;
49301ae08745Sheppo 
49311ae08745Sheppo 	case VSW_RDX_ACK_RECV:
49321ae08745Sheppo 	case VSW_RDX_NACK_RECV:
49331ae08745Sheppo 		if (!(state & VSW_RDX_INFO_SENT)) {
4934205eeb1aSlm66018 			DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or "
4935205eeb1aSlm66018 			    "RDX_NACK when in state %d\n", ldcp->ldc_id, phase);
4936b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
49371ae08745Sheppo 			return (1);
49381ae08745Sheppo 		} else
49391ae08745Sheppo 			state &= ~VSW_RDX_INFO_SENT;
49401ae08745Sheppo 		break;
49411ae08745Sheppo 
49421ae08745Sheppo 	case VSW_MCST_INFO_RECV:
49431ae08745Sheppo 		if (phase < VSW_MILESTONE3) {
49441ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV"
49451ae08745Sheppo 			    " when in state %d\n", ldcp->ldc_id, phase);
4946b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
49471ae08745Sheppo 			return (1);
49481ae08745Sheppo 		}
49491ae08745Sheppo 		break;
49501ae08745Sheppo 
49511ae08745Sheppo 	default:
49521ae08745Sheppo 		DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)",
49531ae08745Sheppo 		    ldcp->ldc_id, flag);
49541ae08745Sheppo 		return (1);
49551ae08745Sheppo 	}
49561ae08745Sheppo 
49571ae08745Sheppo 	if (dir == INBOUND)
49581ae08745Sheppo 		ldcp->lane_in.lstate = state;
49591ae08745Sheppo 	else
49601ae08745Sheppo 		ldcp->lane_out.lstate = state;
49611ae08745Sheppo 
49621ae08745Sheppo 	D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id);
49631ae08745Sheppo 
49641ae08745Sheppo 	return (0);
49651ae08745Sheppo }
49661ae08745Sheppo 
49671ae08745Sheppo void
49681ae08745Sheppo vsw_next_milestone(vsw_ldc_t *ldcp)
49691ae08745Sheppo {
49701ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
49711ae08745Sheppo 
49721ae08745Sheppo 	D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__,
49731ae08745Sheppo 	    ldcp->ldc_id, ldcp->hphase);
49741ae08745Sheppo 
49751ae08745Sheppo 	DUMP_FLAGS(ldcp->lane_in.lstate);
49761ae08745Sheppo 	DUMP_FLAGS(ldcp->lane_out.lstate);
49771ae08745Sheppo 
49781ae08745Sheppo 	switch (ldcp->hphase) {
49791ae08745Sheppo 
49801ae08745Sheppo 	case VSW_MILESTONE0:
49811ae08745Sheppo 		/*
49821ae08745Sheppo 		 * If we haven't started to handshake with our peer,
49831ae08745Sheppo 		 * start to do so now.
49841ae08745Sheppo 		 */
49851ae08745Sheppo 		if (ldcp->lane_out.lstate == 0) {
49861ae08745Sheppo 			D2(vswp, "%s: (chan %lld) starting handshake "
49871ae08745Sheppo 			    "with peer", __func__, ldcp->ldc_id);
4988b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_UP);
49891ae08745Sheppo 		}
49901ae08745Sheppo 
49911ae08745Sheppo 		/*
49921ae08745Sheppo 		 * Only way to pass this milestone is to have successfully
49931ae08745Sheppo 		 * negotiated version info.
49941ae08745Sheppo 		 */
49951ae08745Sheppo 		if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) &&
49961ae08745Sheppo 		    (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) {
49971ae08745Sheppo 
49981ae08745Sheppo 			D2(vswp, "%s: (chan %lld) leaving milestone 0",
49991ae08745Sheppo 			    __func__, ldcp->ldc_id);
50001ae08745Sheppo 
50011ae08745Sheppo 			/*
50021ae08745Sheppo 			 * Next milestone is passed when attribute
50031ae08745Sheppo 			 * information has been successfully exchanged.
50041ae08745Sheppo 			 */
50051ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE1;
50061ae08745Sheppo 			vsw_send_attr(ldcp);
50071ae08745Sheppo 
50081ae08745Sheppo 		}
50091ae08745Sheppo 		break;
50101ae08745Sheppo 
50111ae08745Sheppo 	case VSW_MILESTONE1:
50121ae08745Sheppo 		/*
50131ae08745Sheppo 		 * Only way to pass this milestone is to have successfully
50141ae08745Sheppo 		 * negotiated attribute information.
50151ae08745Sheppo 		 */
50161ae08745Sheppo 		if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) {
50171ae08745Sheppo 
50181ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE2;
50191ae08745Sheppo 
50201ae08745Sheppo 			/*
50211ae08745Sheppo 			 * If the peer device has said it wishes to
50221ae08745Sheppo 			 * use descriptor rings then we send it our ring
50231ae08745Sheppo 			 * info, otherwise we just set up a private ring
50241ae08745Sheppo 			 * which we use an internal buffer
50251ae08745Sheppo 			 */
50261ae08745Sheppo 			if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE)
50271ae08745Sheppo 				vsw_send_dring_info(ldcp);
50281ae08745Sheppo 		}
50291ae08745Sheppo 		break;
50301ae08745Sheppo 
50311ae08745Sheppo 	case VSW_MILESTONE2:
50321ae08745Sheppo 		/*
50331ae08745Sheppo 		 * If peer has indicated in its attribute message that
50341ae08745Sheppo 		 * it wishes to use descriptor rings then the only way
50351ae08745Sheppo 		 * to pass this milestone is for us to have received
50361ae08745Sheppo 		 * valid dring info.
50371ae08745Sheppo 		 *
50381ae08745Sheppo 		 * If peer is not using descriptor rings then just fall
50391ae08745Sheppo 		 * through.
50401ae08745Sheppo 		 */
50411ae08745Sheppo 		if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) &&
50421ae08745Sheppo 		    (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)))
50431ae08745Sheppo 			break;
50441ae08745Sheppo 
50451ae08745Sheppo 		D2(vswp, "%s: (chan %lld) leaving milestone 2",
50461ae08745Sheppo 		    __func__, ldcp->ldc_id);
50471ae08745Sheppo 
50481ae08745Sheppo 		ldcp->hphase = VSW_MILESTONE3;
50491ae08745Sheppo 		vsw_send_rdx(ldcp);
50501ae08745Sheppo 		break;
50511ae08745Sheppo 
50521ae08745Sheppo 	case VSW_MILESTONE3:
50531ae08745Sheppo 		/*
50541ae08745Sheppo 		 * Pass this milestone when all paramaters have been
50551ae08745Sheppo 		 * successfully exchanged and RDX sent in both directions.
50561ae08745Sheppo 		 *
50571ae08745Sheppo 		 * Mark outbound lane as available to transmit data.
50581ae08745Sheppo 		 */
5059b071742bSsg70180 		if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) &&
5060b071742bSsg70180 		    (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) {
50611ae08745Sheppo 
50621ae08745Sheppo 			D2(vswp, "%s: (chan %lld) leaving milestone 3",
50631ae08745Sheppo 			    __func__, ldcp->ldc_id);
50643af08d82Slm66018 			D2(vswp, "%s: ** handshake complete (0x%llx : "
50653af08d82Slm66018 			    "0x%llx) **", __func__, ldcp->lane_in.lstate,
50663af08d82Slm66018 			    ldcp->lane_out.lstate);
50671ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_LANE_ACTIVE;
50681ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE4;
50691ae08745Sheppo 			ldcp->hcnt = 0;
50701ae08745Sheppo 			DISPLAY_STATE();
50713af08d82Slm66018 		} else {
5072205eeb1aSlm66018 			D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)",
5073205eeb1aSlm66018 			    __func__, ldcp->lane_in.lstate,
50743af08d82Slm66018 			    ldcp->lane_out.lstate);
50751ae08745Sheppo 		}
50761ae08745Sheppo 		break;
50771ae08745Sheppo 
50781ae08745Sheppo 	case VSW_MILESTONE4:
50791ae08745Sheppo 		D2(vswp, "%s: (chan %lld) in milestone 4", __func__,
50801ae08745Sheppo 		    ldcp->ldc_id);
50811ae08745Sheppo 		break;
50821ae08745Sheppo 
50831ae08745Sheppo 	default:
50841ae08745Sheppo 		DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__,
50851ae08745Sheppo 		    ldcp->ldc_id, ldcp->hphase);
50861ae08745Sheppo 	}
50871ae08745Sheppo 
50881ae08745Sheppo 	D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id,
50891ae08745Sheppo 	    ldcp->hphase);
50901ae08745Sheppo }
50911ae08745Sheppo 
50921ae08745Sheppo /*
50931ae08745Sheppo  * Check if major version is supported.
50941ae08745Sheppo  *
50951ae08745Sheppo  * Returns 0 if finds supported major number, and if necessary
50961ae08745Sheppo  * adjusts the minor field.
50971ae08745Sheppo  *
50981ae08745Sheppo  * Returns 1 if can't match major number exactly. Sets mjor/minor
50991ae08745Sheppo  * to next lowest support values, or to zero if no other values possible.
51001ae08745Sheppo  */
51011ae08745Sheppo static int
51021ae08745Sheppo vsw_supported_version(vio_ver_msg_t *vp)
51031ae08745Sheppo {
51041ae08745Sheppo 	int	i;
51051ae08745Sheppo 
51061ae08745Sheppo 	D1(NULL, "vsw_supported_version: enter");
51071ae08745Sheppo 
51081ae08745Sheppo 	for (i = 0; i < VSW_NUM_VER; i++) {
51091ae08745Sheppo 		if (vsw_versions[i].ver_major == vp->ver_major) {
51101ae08745Sheppo 			/*
51111ae08745Sheppo 			 * Matching or lower major version found. Update
51121ae08745Sheppo 			 * minor number if necessary.
51131ae08745Sheppo 			 */
51141ae08745Sheppo 			if (vp->ver_minor > vsw_versions[i].ver_minor) {
5115205eeb1aSlm66018 				D2(NULL, "%s: adjusting minor value from %d "
5116205eeb1aSlm66018 				    "to %d", __func__, vp->ver_minor,
51171ae08745Sheppo 				    vsw_versions[i].ver_minor);
51181ae08745Sheppo 				vp->ver_minor = vsw_versions[i].ver_minor;
51191ae08745Sheppo 			}
51201ae08745Sheppo 
51211ae08745Sheppo 			return (0);
51221ae08745Sheppo 		}
51231ae08745Sheppo 
51241ae08745Sheppo 		if (vsw_versions[i].ver_major < vp->ver_major) {
51251ae08745Sheppo 			if (vp->ver_minor > vsw_versions[i].ver_minor) {
5126205eeb1aSlm66018 				D2(NULL, "%s: adjusting minor value from %d "
5127205eeb1aSlm66018 				    "to %d", __func__, vp->ver_minor,
51281ae08745Sheppo 				    vsw_versions[i].ver_minor);
51291ae08745Sheppo 				vp->ver_minor = vsw_versions[i].ver_minor;
51301ae08745Sheppo 			}
51311ae08745Sheppo 			return (1);
51321ae08745Sheppo 		}
51331ae08745Sheppo 	}
51341ae08745Sheppo 
51351ae08745Sheppo 	/* No match was possible, zero out fields */
51361ae08745Sheppo 	vp->ver_major = 0;
51371ae08745Sheppo 	vp->ver_minor = 0;
51381ae08745Sheppo 
51391ae08745Sheppo 	D1(NULL, "vsw_supported_version: exit");
51401ae08745Sheppo 
51411ae08745Sheppo 	return (1);
51421ae08745Sheppo }
51431ae08745Sheppo 
51441ae08745Sheppo /*
51451ae08745Sheppo  * Main routine for processing messages received over LDC.
51461ae08745Sheppo  */
51471ae08745Sheppo static void
51481ae08745Sheppo vsw_process_pkt(void *arg)
51491ae08745Sheppo {
51501ae08745Sheppo 	vsw_ldc_t	*ldcp = (vsw_ldc_t  *)arg;
51511ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
51521ae08745Sheppo 	size_t		msglen;
51531ae08745Sheppo 	vio_msg_tag_t	tag;
51541ae08745Sheppo 	def_msg_t	dmsg;
51551ae08745Sheppo 	int 		rv = 0;
51561ae08745Sheppo 
51573af08d82Slm66018 
51581ae08745Sheppo 	D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
51591ae08745Sheppo 
51601ae08745Sheppo 	/*
51611ae08745Sheppo 	 * If channel is up read messages until channel is empty.
51621ae08745Sheppo 	 */
51631ae08745Sheppo 	do {
51641ae08745Sheppo 		msglen = sizeof (dmsg);
51651ae08745Sheppo 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen);
51661ae08745Sheppo 
51671ae08745Sheppo 		if (rv != 0) {
5168205eeb1aSlm66018 			DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n",
5169205eeb1aSlm66018 			    __func__, ldcp->ldc_id, rv, msglen);
51703af08d82Slm66018 		}
51713af08d82Slm66018 
51723af08d82Slm66018 		/* channel has been reset */
51733af08d82Slm66018 		if (rv == ECONNRESET) {
5174b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
51751ae08745Sheppo 			break;
51761ae08745Sheppo 		}
51771ae08745Sheppo 
51781ae08745Sheppo 		if (msglen == 0) {
51791ae08745Sheppo 			D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__,
51801ae08745Sheppo 			    ldcp->ldc_id);
51811ae08745Sheppo 			break;
51821ae08745Sheppo 		}
51831ae08745Sheppo 
51841ae08745Sheppo 		D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__,
51851ae08745Sheppo 		    ldcp->ldc_id, msglen);
51861ae08745Sheppo 
51871ae08745Sheppo 		/*
51881ae08745Sheppo 		 * Figure out what sort of packet we have gotten by
51891ae08745Sheppo 		 * examining the msg tag, and then switch it appropriately.
51901ae08745Sheppo 		 */
51911ae08745Sheppo 		bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t));
51921ae08745Sheppo 
51931ae08745Sheppo 		switch (tag.vio_msgtype) {
51941ae08745Sheppo 		case VIO_TYPE_CTRL:
51951ae08745Sheppo 			vsw_dispatch_ctrl_task(ldcp, &dmsg, tag);
51961ae08745Sheppo 			break;
51971ae08745Sheppo 		case VIO_TYPE_DATA:
51981ae08745Sheppo 			vsw_process_data_pkt(ldcp, &dmsg, tag);
51991ae08745Sheppo 			break;
52001ae08745Sheppo 		case VIO_TYPE_ERR:
52011ae08745Sheppo 			vsw_process_err_pkt(ldcp, &dmsg, tag);
52021ae08745Sheppo 			break;
52031ae08745Sheppo 		default:
52041ae08745Sheppo 			DERR(vswp, "%s: Unknown tag(%lx) ", __func__,
52051ae08745Sheppo 			    "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id);
52061ae08745Sheppo 			break;
52071ae08745Sheppo 		}
52081ae08745Sheppo 	} while (msglen);
52091ae08745Sheppo 
52101ae08745Sheppo 	D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id);
52111ae08745Sheppo }
52121ae08745Sheppo 
52131ae08745Sheppo /*
52141ae08745Sheppo  * Dispatch a task to process a VIO control message.
52151ae08745Sheppo  */
52161ae08745Sheppo static void
52171ae08745Sheppo vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag)
52181ae08745Sheppo {
52191ae08745Sheppo 	vsw_ctrl_task_t		*ctaskp = NULL;
52201ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
52211ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
52221ae08745Sheppo 
52231ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
52241ae08745Sheppo 
52251ae08745Sheppo 	/*
52261ae08745Sheppo 	 * We need to handle RDX ACK messages in-band as once they
52271ae08745Sheppo 	 * are exchanged it is possible that we will get an
52281ae08745Sheppo 	 * immediate (legitimate) data packet.
52291ae08745Sheppo 	 */
52301ae08745Sheppo 	if ((tag.vio_subtype_env == VIO_RDX) &&
52311ae08745Sheppo 	    (tag.vio_subtype == VIO_SUBTYPE_ACK)) {
52323af08d82Slm66018 
5233b071742bSsg70180 		if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV))
52341ae08745Sheppo 			return;
52351ae08745Sheppo 
5236b071742bSsg70180 		ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV;
52373af08d82Slm66018 		D2(vswp, "%s (%ld) handling RDX_ACK in place "
52383af08d82Slm66018 		    "(ostate 0x%llx : hphase %d)", __func__,
5239b071742bSsg70180 		    ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase);
52401ae08745Sheppo 		vsw_next_milestone(ldcp);
52411ae08745Sheppo 		return;
52421ae08745Sheppo 	}
52431ae08745Sheppo 
52441ae08745Sheppo 	ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP);
52451ae08745Sheppo 
52461ae08745Sheppo 	if (ctaskp == NULL) {
5247205eeb1aSlm66018 		DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__);
5248b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
52491ae08745Sheppo 		return;
52501ae08745Sheppo 	}
52511ae08745Sheppo 
52521ae08745Sheppo 	ctaskp->ldcp = ldcp;
52531ae08745Sheppo 	bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t));
52541ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
52551ae08745Sheppo 	ctaskp->hss_id = ldcp->hss_id;
52561ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
52571ae08745Sheppo 
52581ae08745Sheppo 	/*
52591ae08745Sheppo 	 * Dispatch task to processing taskq if port is not in
52601ae08745Sheppo 	 * the process of being detached.
52611ae08745Sheppo 	 */
52621ae08745Sheppo 	mutex_enter(&port->state_lock);
52631ae08745Sheppo 	if (port->state == VSW_PORT_INIT) {
52641ae08745Sheppo 		if ((vswp->taskq_p == NULL) ||
5265205eeb1aSlm66018 		    (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt,
5266205eeb1aSlm66018 		    ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) {
52671ae08745Sheppo 			DERR(vswp, "%s: unable to dispatch task to taskq",
52681ae08745Sheppo 			    __func__);
52691ae08745Sheppo 			kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
52701ae08745Sheppo 			mutex_exit(&port->state_lock);
5271b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
52721ae08745Sheppo 			return;
52731ae08745Sheppo 		}
52741ae08745Sheppo 	} else {
52751ae08745Sheppo 		DWARN(vswp, "%s: port %d detaching, not dispatching "
52761ae08745Sheppo 		    "task", __func__, port->p_instance);
52771ae08745Sheppo 	}
52781ae08745Sheppo 
52791ae08745Sheppo 	mutex_exit(&port->state_lock);
52801ae08745Sheppo 
52811ae08745Sheppo 	D2(vswp, "%s: dispatched task to taskq for chan %d", __func__,
52821ae08745Sheppo 	    ldcp->ldc_id);
52831ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
52841ae08745Sheppo }
52851ae08745Sheppo 
52861ae08745Sheppo /*
52871ae08745Sheppo  * Process a VIO ctrl message. Invoked from taskq.
52881ae08745Sheppo  */
52891ae08745Sheppo static void
52901ae08745Sheppo vsw_process_ctrl_pkt(void *arg)
52911ae08745Sheppo {
52921ae08745Sheppo 	vsw_ctrl_task_t	*ctaskp = (vsw_ctrl_task_t *)arg;
52931ae08745Sheppo 	vsw_ldc_t	*ldcp = ctaskp->ldcp;
52941ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
52951ae08745Sheppo 	vio_msg_tag_t	tag;
52961ae08745Sheppo 	uint16_t	env;
52971ae08745Sheppo 
52981ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
52991ae08745Sheppo 
53001ae08745Sheppo 	bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t));
53011ae08745Sheppo 	env = tag.vio_subtype_env;
53021ae08745Sheppo 
53031ae08745Sheppo 	/* stale pkt check */
53041ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
53051ae08745Sheppo 	if (ctaskp->hss_id < ldcp->hss_id) {
5306205eeb1aSlm66018 		DWARN(vswp, "%s: discarding stale packet belonging to earlier"
5307205eeb1aSlm66018 		    " (%ld) handshake session", __func__, ctaskp->hss_id);
53081ae08745Sheppo 		mutex_exit(&ldcp->hss_lock);
53091ae08745Sheppo 		return;
53101ae08745Sheppo 	}
53111ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
53121ae08745Sheppo 
53131ae08745Sheppo 	/* session id check */
53141ae08745Sheppo 	if (ldcp->session_status & VSW_PEER_SESSION) {
53151ae08745Sheppo 		if (ldcp->peer_session != tag.vio_sid) {
53161ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid session id (%llx)",
53171ae08745Sheppo 			    __func__, ldcp->ldc_id, tag.vio_sid);
53181ae08745Sheppo 			kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
5319b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
53201ae08745Sheppo 			return;
53211ae08745Sheppo 		}
53221ae08745Sheppo 	}
53231ae08745Sheppo 
53241ae08745Sheppo 	/*
53251ae08745Sheppo 	 * Switch on vio_subtype envelope, then let lower routines
53261ae08745Sheppo 	 * decide if its an INFO, ACK or NACK packet.
53271ae08745Sheppo 	 */
53281ae08745Sheppo 	switch (env) {
53291ae08745Sheppo 	case VIO_VER_INFO:
53301ae08745Sheppo 		vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp);
53311ae08745Sheppo 		break;
53321ae08745Sheppo 	case VIO_DRING_REG:
53331ae08745Sheppo 		vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp);
53341ae08745Sheppo 		break;
53351ae08745Sheppo 	case VIO_DRING_UNREG:
53361ae08745Sheppo 		vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp);
53371ae08745Sheppo 		break;
53381ae08745Sheppo 	case VIO_ATTR_INFO:
53391ae08745Sheppo 		vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp);
53401ae08745Sheppo 		break;
53411ae08745Sheppo 	case VNET_MCAST_INFO:
53421ae08745Sheppo 		vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp);
53431ae08745Sheppo 		break;
53441ae08745Sheppo 	case VIO_RDX:
53451ae08745Sheppo 		vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp);
53461ae08745Sheppo 		break;
53471ae08745Sheppo 	default:
5348205eeb1aSlm66018 		DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env);
53491ae08745Sheppo 	}
53501ae08745Sheppo 
53511ae08745Sheppo 	kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
53521ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
53531ae08745Sheppo }
53541ae08745Sheppo 
53551ae08745Sheppo /*
53561ae08745Sheppo  * Version negotiation. We can end up here either because our peer
53571ae08745Sheppo  * has responded to a handshake message we have sent it, or our peer
53581ae08745Sheppo  * has initiated a handshake with us. If its the former then can only
53591ae08745Sheppo  * be ACK or NACK, if its the later can only be INFO.
53601ae08745Sheppo  *
53611ae08745Sheppo  * If its an ACK we move to the next stage of the handshake, namely
53621ae08745Sheppo  * attribute exchange. If its a NACK we see if we can specify another
53631ae08745Sheppo  * version, if we can't we stop.
53641ae08745Sheppo  *
53651ae08745Sheppo  * If it is an INFO we reset all params associated with communication
53661ae08745Sheppo  * in that direction over this channel (remember connection is
53671ae08745Sheppo  * essentially 2 independent simplex channels).
53681ae08745Sheppo  */
53691ae08745Sheppo void
53701ae08745Sheppo vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt)
53711ae08745Sheppo {
53721ae08745Sheppo 	vio_ver_msg_t	*ver_pkt;
53731ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
53741ae08745Sheppo 
53751ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
53761ae08745Sheppo 
53771ae08745Sheppo 	/*
53781ae08745Sheppo 	 * We know this is a ctrl/version packet so
53791ae08745Sheppo 	 * cast it into the correct structure.
53801ae08745Sheppo 	 */
53811ae08745Sheppo 	ver_pkt = (vio_ver_msg_t *)pkt;
53821ae08745Sheppo 
53831ae08745Sheppo 	switch (ver_pkt->tag.vio_subtype) {
53841ae08745Sheppo 	case VIO_SUBTYPE_INFO:
53851ae08745Sheppo 		D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n");
53861ae08745Sheppo 
53871ae08745Sheppo 		/*
53881ae08745Sheppo 		 * Record the session id, which we will use from now
53891ae08745Sheppo 		 * until we see another VER_INFO msg. Even then the
53901ae08745Sheppo 		 * session id in most cases will be unchanged, execpt
53911ae08745Sheppo 		 * if channel was reset.
53921ae08745Sheppo 		 */
53931ae08745Sheppo 		if ((ldcp->session_status & VSW_PEER_SESSION) &&
53941ae08745Sheppo 		    (ldcp->peer_session != ver_pkt->tag.vio_sid)) {
53951ae08745Sheppo 			DERR(vswp, "%s: updating session id for chan %lld "
53961ae08745Sheppo 			    "from %llx to %llx", __func__, ldcp->ldc_id,
53971ae08745Sheppo 			    ldcp->peer_session, ver_pkt->tag.vio_sid);
53981ae08745Sheppo 		}
53991ae08745Sheppo 
54001ae08745Sheppo 		ldcp->peer_session = ver_pkt->tag.vio_sid;
54011ae08745Sheppo 		ldcp->session_status |= VSW_PEER_SESSION;
54021ae08745Sheppo 
54031ae08745Sheppo 		/* Legal message at this time ? */
54041ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV))
54051ae08745Sheppo 			return;
54061ae08745Sheppo 
54071ae08745Sheppo 		/*
54081ae08745Sheppo 		 * First check the device class. Currently only expect
54091ae08745Sheppo 		 * to be talking to a network device. In the future may
54101ae08745Sheppo 		 * also talk to another switch.
54111ae08745Sheppo 		 */
54121ae08745Sheppo 		if (ver_pkt->dev_class != VDEV_NETWORK) {
54131ae08745Sheppo 			DERR(vswp, "%s: illegal device class %d", __func__,
54141ae08745Sheppo 			    ver_pkt->dev_class);
54151ae08745Sheppo 
54161ae08745Sheppo 			ver_pkt->tag.vio_sid = ldcp->local_session;
54171ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
54181ae08745Sheppo 
54191ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
54201ae08745Sheppo 
5421b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)ver_pkt,
5422b071742bSsg70180 			    sizeof (vio_ver_msg_t), B_TRUE);
54231ae08745Sheppo 
54241ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
54251ae08745Sheppo 			vsw_next_milestone(ldcp);
54261ae08745Sheppo 			return;
54271ae08745Sheppo 		} else {
54281ae08745Sheppo 			ldcp->dev_class = ver_pkt->dev_class;
54291ae08745Sheppo 		}
54301ae08745Sheppo 
54311ae08745Sheppo 		/*
54321ae08745Sheppo 		 * Now check the version.
54331ae08745Sheppo 		 */
54341ae08745Sheppo 		if (vsw_supported_version(ver_pkt) == 0) {
54351ae08745Sheppo 			/*
54361ae08745Sheppo 			 * Support this major version and possibly
54371ae08745Sheppo 			 * adjusted minor version.
54381ae08745Sheppo 			 */
54391ae08745Sheppo 
54401ae08745Sheppo 			D2(vswp, "%s: accepted ver %d:%d", __func__,
54411ae08745Sheppo 			    ver_pkt->ver_major, ver_pkt->ver_minor);
54421ae08745Sheppo 
54431ae08745Sheppo 			/* Store accepted values */
54441ae08745Sheppo 			ldcp->lane_in.ver_major = ver_pkt->ver_major;
54451ae08745Sheppo 			ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
54461ae08745Sheppo 
54471ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
54481ae08745Sheppo 
54491ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_ACK_SENT;
54501ae08745Sheppo 		} else {
54511ae08745Sheppo 			/*
54521ae08745Sheppo 			 * NACK back with the next lower major/minor
54531ae08745Sheppo 			 * pairing we support (if don't suuport any more
54541ae08745Sheppo 			 * versions then they will be set to zero.
54551ae08745Sheppo 			 */
54561ae08745Sheppo 
54571ae08745Sheppo 			D2(vswp, "%s: replying with ver %d:%d", __func__,
54581ae08745Sheppo 			    ver_pkt->ver_major, ver_pkt->ver_minor);
54591ae08745Sheppo 
54601ae08745Sheppo 			/* Store updated values */
54611ae08745Sheppo 			ldcp->lane_in.ver_major = ver_pkt->ver_major;
54621ae08745Sheppo 			ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
54631ae08745Sheppo 
54641ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
54651ae08745Sheppo 
54661ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
54671ae08745Sheppo 		}
54681ae08745Sheppo 
54691ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
54701ae08745Sheppo 		ver_pkt->tag.vio_sid = ldcp->local_session;
5471b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)ver_pkt,
5472b071742bSsg70180 		    sizeof (vio_ver_msg_t), B_TRUE);
54731ae08745Sheppo 
54741ae08745Sheppo 		vsw_next_milestone(ldcp);
54751ae08745Sheppo 		break;
54761ae08745Sheppo 
54771ae08745Sheppo 	case VIO_SUBTYPE_ACK:
54781ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__);
54791ae08745Sheppo 
54801ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV))
54811ae08745Sheppo 			return;
54821ae08745Sheppo 
54831ae08745Sheppo 		/* Store updated values */
54841ae08745Sheppo 		ldcp->lane_in.ver_major = ver_pkt->ver_major;
54851ae08745Sheppo 		ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
54861ae08745Sheppo 
54871ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_VER_ACK_RECV;
54881ae08745Sheppo 		vsw_next_milestone(ldcp);
54891ae08745Sheppo 
54901ae08745Sheppo 		break;
54911ae08745Sheppo 
54921ae08745Sheppo 	case VIO_SUBTYPE_NACK:
54931ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__);
54941ae08745Sheppo 
54951ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV))
54961ae08745Sheppo 			return;
54971ae08745Sheppo 
54981ae08745Sheppo 		/*
54991ae08745Sheppo 		 * If our peer sent us a NACK with the ver fields set to
55001ae08745Sheppo 		 * zero then there is nothing more we can do. Otherwise see
55011ae08745Sheppo 		 * if we support either the version suggested, or a lesser
55021ae08745Sheppo 		 * one.
55031ae08745Sheppo 		 */
55041ae08745Sheppo 		if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
55051ae08745Sheppo 			DERR(vswp, "%s: peer unable to negotiate any "
55061ae08745Sheppo 			    "further.", __func__);
55071ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
55081ae08745Sheppo 			vsw_next_milestone(ldcp);
55091ae08745Sheppo 			return;
55101ae08745Sheppo 		}
55111ae08745Sheppo 
55121ae08745Sheppo 		/*
55131ae08745Sheppo 		 * Check to see if we support this major version or
55141ae08745Sheppo 		 * a lower one. If we don't then maj/min will be set
55151ae08745Sheppo 		 * to zero.
55161ae08745Sheppo 		 */
55171ae08745Sheppo 		(void) vsw_supported_version(ver_pkt);
55181ae08745Sheppo 		if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
55191ae08745Sheppo 			/* Nothing more we can do */
55201ae08745Sheppo 			DERR(vswp, "%s: version negotiation failed.\n",
55211ae08745Sheppo 			    __func__);
55221ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
55231ae08745Sheppo 			vsw_next_milestone(ldcp);
55241ae08745Sheppo 		} else {
55251ae08745Sheppo 			/* found a supported major version */
55261ae08745Sheppo 			ldcp->lane_out.ver_major = ver_pkt->ver_major;
55271ae08745Sheppo 			ldcp->lane_out.ver_minor = ver_pkt->ver_minor;
55281ae08745Sheppo 
55291ae08745Sheppo 			D2(vswp, "%s: resending with updated values (%x, %x)",
5530205eeb1aSlm66018 			    __func__, ver_pkt->ver_major, ver_pkt->ver_minor);
55311ae08745Sheppo 
55321ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_INFO_SENT;
55331ae08745Sheppo 			ver_pkt->tag.vio_sid = ldcp->local_session;
55341ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
55351ae08745Sheppo 
55361ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
55371ae08745Sheppo 
5538b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)ver_pkt,
5539b071742bSsg70180 			    sizeof (vio_ver_msg_t), B_TRUE);
55401ae08745Sheppo 
55411ae08745Sheppo 			vsw_next_milestone(ldcp);
55421ae08745Sheppo 
55431ae08745Sheppo 		}
55441ae08745Sheppo 		break;
55451ae08745Sheppo 
55461ae08745Sheppo 	default:
55471ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
55481ae08745Sheppo 		    ver_pkt->tag.vio_subtype);
55491ae08745Sheppo 	}
55501ae08745Sheppo 
55511ae08745Sheppo 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
55521ae08745Sheppo }
55531ae08745Sheppo 
55541ae08745Sheppo /*
55551ae08745Sheppo  * Process an attribute packet. We can end up here either because our peer
55561ae08745Sheppo  * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our
55571ae08745Sheppo  * peer has sent us an attribute INFO message
55581ae08745Sheppo  *
55591ae08745Sheppo  * If its an ACK we then move to the next stage of the handshake which
55601ae08745Sheppo  * is to send our descriptor ring info to our peer. If its a NACK then
55611ae08745Sheppo  * there is nothing more we can (currently) do.
55621ae08745Sheppo  *
55631ae08745Sheppo  * If we get a valid/acceptable INFO packet (and we have already negotiated
55641ae08745Sheppo  * a version) we ACK back and set channel state to ATTR_RECV, otherwise we
55651ae08745Sheppo  * NACK back and reset channel state to INACTIV.
55661ae08745Sheppo  *
55671ae08745Sheppo  * FUTURE: in time we will probably negotiate over attributes, but for
55681ae08745Sheppo  * the moment unacceptable attributes are regarded as a fatal error.
55691ae08745Sheppo  *
55701ae08745Sheppo  */
55711ae08745Sheppo void
55721ae08745Sheppo vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt)
55731ae08745Sheppo {
55741ae08745Sheppo 	vnet_attr_msg_t		*attr_pkt;
55751ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
55761ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
55771ae08745Sheppo 	uint64_t		macaddr = 0;
55781ae08745Sheppo 	int			i;
55791ae08745Sheppo 
55801ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
55811ae08745Sheppo 
55821ae08745Sheppo 	/*
55831ae08745Sheppo 	 * We know this is a ctrl/attr packet so
55841ae08745Sheppo 	 * cast it into the correct structure.
55851ae08745Sheppo 	 */
55861ae08745Sheppo 	attr_pkt = (vnet_attr_msg_t *)pkt;
55871ae08745Sheppo 
55881ae08745Sheppo 	switch (attr_pkt->tag.vio_subtype) {
55891ae08745Sheppo 	case VIO_SUBTYPE_INFO:
55901ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
55911ae08745Sheppo 
55921ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV))
55931ae08745Sheppo 			return;
55941ae08745Sheppo 
55951ae08745Sheppo 		/*
55961ae08745Sheppo 		 * If the attributes are unacceptable then we NACK back.
55971ae08745Sheppo 		 */
55981ae08745Sheppo 		if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) {
55991ae08745Sheppo 
56001ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid attributes",
56011ae08745Sheppo 			    __func__, ldcp->ldc_id);
56021ae08745Sheppo 
56031ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
56041ae08745Sheppo 
56051ae08745Sheppo 			attr_pkt->tag.vio_sid = ldcp->local_session;
56061ae08745Sheppo 			attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
56071ae08745Sheppo 
56081ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
56091ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT;
5610b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)attr_pkt,
5611b071742bSsg70180 			    sizeof (vnet_attr_msg_t), B_TRUE);
56121ae08745Sheppo 
56131ae08745Sheppo 			vsw_next_milestone(ldcp);
56141ae08745Sheppo 			return;
56151ae08745Sheppo 		}
56161ae08745Sheppo 
56171ae08745Sheppo 		/*
56181ae08745Sheppo 		 * Otherwise store attributes for this lane and update
56191ae08745Sheppo 		 * lane state.
56201ae08745Sheppo 		 */
56211ae08745Sheppo 		ldcp->lane_in.mtu = attr_pkt->mtu;
56221ae08745Sheppo 		ldcp->lane_in.addr = attr_pkt->addr;
56231ae08745Sheppo 		ldcp->lane_in.addr_type = attr_pkt->addr_type;
56241ae08745Sheppo 		ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode;
56251ae08745Sheppo 		ldcp->lane_in.ack_freq = attr_pkt->ack_freq;
56261ae08745Sheppo 
56271ae08745Sheppo 		macaddr = ldcp->lane_in.addr;
56281ae08745Sheppo 		for (i = ETHERADDRL - 1; i >= 0; i--) {
56291ae08745Sheppo 			port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF;
56301ae08745Sheppo 			macaddr >>= 8;
56311ae08745Sheppo 		}
56321ae08745Sheppo 
56331ae08745Sheppo 		/* create the fdb entry for this port/mac address */
56341ae08745Sheppo 		(void) vsw_add_fdb(vswp, port);
56351ae08745Sheppo 
56361ae08745Sheppo 		/* setup device specifc xmit routines */
56371ae08745Sheppo 		mutex_enter(&port->tx_lock);
56381ae08745Sheppo 		if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) {
56391ae08745Sheppo 			D2(vswp, "%s: mode = VIO_DRING_MODE", __func__);
56401ae08745Sheppo 			port->transmit = vsw_dringsend;
56411ae08745Sheppo 		} else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) {
56421ae08745Sheppo 			D2(vswp, "%s: mode = VIO_DESC_MODE", __func__);
56431ae08745Sheppo 			vsw_create_privring(ldcp);
56441ae08745Sheppo 			port->transmit = vsw_descrsend;
56451ae08745Sheppo 		}
56461ae08745Sheppo 		mutex_exit(&port->tx_lock);
56471ae08745Sheppo 
56481ae08745Sheppo 		attr_pkt->tag.vio_sid = ldcp->local_session;
56491ae08745Sheppo 		attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
56501ae08745Sheppo 
56511ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
56521ae08745Sheppo 
56531ae08745Sheppo 		ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT;
56541ae08745Sheppo 
5655b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)attr_pkt,
5656b071742bSsg70180 		    sizeof (vnet_attr_msg_t), B_TRUE);
56571ae08745Sheppo 
56581ae08745Sheppo 		vsw_next_milestone(ldcp);
56591ae08745Sheppo 		break;
56601ae08745Sheppo 
56611ae08745Sheppo 	case VIO_SUBTYPE_ACK:
56621ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
56631ae08745Sheppo 
56641ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV))
56651ae08745Sheppo 			return;
56661ae08745Sheppo 
56671ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV;
56681ae08745Sheppo 		vsw_next_milestone(ldcp);
56691ae08745Sheppo 		break;
56701ae08745Sheppo 
56711ae08745Sheppo 	case VIO_SUBTYPE_NACK:
56721ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
56731ae08745Sheppo 
56741ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV))
56751ae08745Sheppo 			return;
56761ae08745Sheppo 
56771ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV;
56781ae08745Sheppo 		vsw_next_milestone(ldcp);
56791ae08745Sheppo 		break;
56801ae08745Sheppo 
56811ae08745Sheppo 	default:
56821ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
56831ae08745Sheppo 		    attr_pkt->tag.vio_subtype);
56841ae08745Sheppo 	}
56851ae08745Sheppo 
56861ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
56871ae08745Sheppo }
56881ae08745Sheppo 
56891ae08745Sheppo /*
56901ae08745Sheppo  * Process a dring info packet. We can end up here either because our peer
56911ae08745Sheppo  * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our
56921ae08745Sheppo  * peer has sent us a dring INFO message.
56931ae08745Sheppo  *
56941ae08745Sheppo  * If we get a valid/acceptable INFO packet (and we have already negotiated
56951ae08745Sheppo  * a version) we ACK back and update the lane state, otherwise we NACK back.
56961ae08745Sheppo  *
56971ae08745Sheppo  * FUTURE: nothing to stop client from sending us info on multiple dring's
56981ae08745Sheppo  * but for the moment we will just use the first one we are given.
56991ae08745Sheppo  *
57001ae08745Sheppo  */
57011ae08745Sheppo void
57021ae08745Sheppo vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt)
57031ae08745Sheppo {
57041ae08745Sheppo 	vio_dring_reg_msg_t	*dring_pkt;
57051ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
57061ae08745Sheppo 	ldc_mem_info_t		minfo;
57071ae08745Sheppo 	dring_info_t		*dp, *dbp;
57081ae08745Sheppo 	int			dring_found = 0;
57091ae08745Sheppo 
57101ae08745Sheppo 	/*
57111ae08745Sheppo 	 * We know this is a ctrl/dring packet so
57121ae08745Sheppo 	 * cast it into the correct structure.
57131ae08745Sheppo 	 */
57141ae08745Sheppo 	dring_pkt = (vio_dring_reg_msg_t *)pkt;
57151ae08745Sheppo 
57161ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
57171ae08745Sheppo 
57181ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
57191ae08745Sheppo 	case VIO_SUBTYPE_INFO:
57201ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
57211ae08745Sheppo 
57221ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
57231ae08745Sheppo 			return;
57241ae08745Sheppo 
57251ae08745Sheppo 		/*
57261ae08745Sheppo 		 * If the dring params are unacceptable then we NACK back.
57271ae08745Sheppo 		 */
57281ae08745Sheppo 		if (vsw_check_dring_info(dring_pkt)) {
57291ae08745Sheppo 
57301ae08745Sheppo 			DERR(vswp, "%s (%lld): invalid dring info",
57311ae08745Sheppo 			    __func__, ldcp->ldc_id);
57321ae08745Sheppo 
57331ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
57341ae08745Sheppo 
57351ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
57361ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
57371ae08745Sheppo 
57381ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
57391ae08745Sheppo 
57401ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
57411ae08745Sheppo 
5742b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
5743b071742bSsg70180 			    sizeof (vio_dring_reg_msg_t), B_TRUE);
57441ae08745Sheppo 
57451ae08745Sheppo 			vsw_next_milestone(ldcp);
57461ae08745Sheppo 			return;
57471ae08745Sheppo 		}
57481ae08745Sheppo 
57491ae08745Sheppo 		/*
57501ae08745Sheppo 		 * Otherwise, attempt to map in the dring using the
57511ae08745Sheppo 		 * cookie. If that succeeds we send back a unique dring
57521ae08745Sheppo 		 * identifier that the sending side will use in future
57531ae08745Sheppo 		 * to refer to this descriptor ring.
57541ae08745Sheppo 		 */
57551ae08745Sheppo 		dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
57561ae08745Sheppo 
57571ae08745Sheppo 		dp->num_descriptors = dring_pkt->num_descriptors;
57581ae08745Sheppo 		dp->descriptor_size = dring_pkt->descriptor_size;
57591ae08745Sheppo 		dp->options = dring_pkt->options;
57601ae08745Sheppo 		dp->ncookies = dring_pkt->ncookies;
57611ae08745Sheppo 
57621ae08745Sheppo 		/*
57631ae08745Sheppo 		 * Note: should only get one cookie. Enforced in
57641ae08745Sheppo 		 * the ldc layer.
57651ae08745Sheppo 		 */
57661ae08745Sheppo 		bcopy(&dring_pkt->cookie[0], &dp->cookie[0],
57671ae08745Sheppo 		    sizeof (ldc_mem_cookie_t));
57681ae08745Sheppo 
57691ae08745Sheppo 		D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__,
57701ae08745Sheppo 		    dp->num_descriptors, dp->descriptor_size);
57711ae08745Sheppo 		D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__,
57721ae08745Sheppo 		    dp->options, dp->ncookies);
57731ae08745Sheppo 
57741ae08745Sheppo 		if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0],
5775205eeb1aSlm66018 		    dp->ncookies, dp->num_descriptors, dp->descriptor_size,
5776205eeb1aSlm66018 		    LDC_SHADOW_MAP, &(dp->handle))) != 0) {
57771ae08745Sheppo 
57781ae08745Sheppo 			DERR(vswp, "%s: dring_map failed\n", __func__);
57791ae08745Sheppo 
57801ae08745Sheppo 			kmem_free(dp, sizeof (dring_info_t));
57811ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
57821ae08745Sheppo 
57831ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
57841ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
57851ae08745Sheppo 
57861ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
57871ae08745Sheppo 
57881ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
5789b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
5790b071742bSsg70180 			    sizeof (vio_dring_reg_msg_t), B_TRUE);
57911ae08745Sheppo 
57921ae08745Sheppo 			vsw_next_milestone(ldcp);
57931ae08745Sheppo 			return;
57941ae08745Sheppo 		}
57951ae08745Sheppo 
57961ae08745Sheppo 		if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
57971ae08745Sheppo 
57981ae08745Sheppo 			DERR(vswp, "%s: dring_addr failed\n", __func__);
57991ae08745Sheppo 
58001ae08745Sheppo 			kmem_free(dp, sizeof (dring_info_t));
58011ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
58021ae08745Sheppo 
58031ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
58041ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
58051ae08745Sheppo 
58061ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
58071ae08745Sheppo 
58081ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
5809b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
5810b071742bSsg70180 			    sizeof (vio_dring_reg_msg_t), B_TRUE);
58111ae08745Sheppo 
58121ae08745Sheppo 			vsw_next_milestone(ldcp);
58131ae08745Sheppo 			return;
58141ae08745Sheppo 		} else {
58151ae08745Sheppo 			/* store the address of the pub part of ring */
58161ae08745Sheppo 			dp->pub_addr = minfo.vaddr;
58171ae08745Sheppo 		}
58181ae08745Sheppo 
58191ae08745Sheppo 		/* no private section as we are importing */
58201ae08745Sheppo 		dp->priv_addr = NULL;
58211ae08745Sheppo 
58221ae08745Sheppo 		/*
58231ae08745Sheppo 		 * Using simple mono increasing int for ident at
58241ae08745Sheppo 		 * the moment.
58251ae08745Sheppo 		 */
58261ae08745Sheppo 		dp->ident = ldcp->next_ident;
58271ae08745Sheppo 		ldcp->next_ident++;
58281ae08745Sheppo 
58291ae08745Sheppo 		dp->end_idx = 0;
58301ae08745Sheppo 		dp->next = NULL;
58311ae08745Sheppo 
58321ae08745Sheppo 		/*
58331ae08745Sheppo 		 * Link it onto the end of the list of drings
58341ae08745Sheppo 		 * for this lane.
58351ae08745Sheppo 		 */
58361ae08745Sheppo 		if (ldcp->lane_in.dringp == NULL) {
58371ae08745Sheppo 			D2(vswp, "%s: adding first INBOUND dring", __func__);
58381ae08745Sheppo 			ldcp->lane_in.dringp = dp;
58391ae08745Sheppo 		} else {
58401ae08745Sheppo 			dbp = ldcp->lane_in.dringp;
58411ae08745Sheppo 
58421ae08745Sheppo 			while (dbp->next != NULL)
58431ae08745Sheppo 				dbp = dbp->next;
58441ae08745Sheppo 
58451ae08745Sheppo 			dbp->next = dp;
58461ae08745Sheppo 		}
58471ae08745Sheppo 
58481ae08745Sheppo 		/* acknowledge it */
58491ae08745Sheppo 		dring_pkt->tag.vio_sid = ldcp->local_session;
58501ae08745Sheppo 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
58511ae08745Sheppo 		dring_pkt->dring_ident = dp->ident;
58521ae08745Sheppo 
5853b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)dring_pkt,
5854b071742bSsg70180 		    sizeof (vio_dring_reg_msg_t), B_TRUE);
58551ae08745Sheppo 
58561ae08745Sheppo 		ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT;
58571ae08745Sheppo 		vsw_next_milestone(ldcp);
58581ae08745Sheppo 		break;
58591ae08745Sheppo 
58601ae08745Sheppo 	case VIO_SUBTYPE_ACK:
58611ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
58621ae08745Sheppo 
58631ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV))
58641ae08745Sheppo 			return;
58651ae08745Sheppo 
58661ae08745Sheppo 		/*
58671ae08745Sheppo 		 * Peer is acknowledging our dring info and will have
58681ae08745Sheppo 		 * sent us a dring identifier which we will use to
58691ae08745Sheppo 		 * refer to this ring w.r.t. our peer.
58701ae08745Sheppo 		 */
58711ae08745Sheppo 		dp = ldcp->lane_out.dringp;
58721ae08745Sheppo 		if (dp != NULL) {
58731ae08745Sheppo 			/*
58741ae08745Sheppo 			 * Find the ring this ident should be associated
58751ae08745Sheppo 			 * with.
58761ae08745Sheppo 			 */
58771ae08745Sheppo 			if (vsw_dring_match(dp, dring_pkt)) {
58781ae08745Sheppo 				dring_found = 1;
58791ae08745Sheppo 
58801ae08745Sheppo 			} else while (dp != NULL) {
58811ae08745Sheppo 				if (vsw_dring_match(dp, dring_pkt)) {
58821ae08745Sheppo 					dring_found = 1;
58831ae08745Sheppo 					break;
58841ae08745Sheppo 				}
58851ae08745Sheppo 				dp = dp->next;
58861ae08745Sheppo 			}
58871ae08745Sheppo 
58881ae08745Sheppo 			if (dring_found == 0) {
58891ae08745Sheppo 				DERR(NULL, "%s: unrecognised ring cookie",
58901ae08745Sheppo 				    __func__);
5891b071742bSsg70180 				vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
58921ae08745Sheppo 				return;
58931ae08745Sheppo 			}
58941ae08745Sheppo 
58951ae08745Sheppo 		} else {
58961ae08745Sheppo 			DERR(vswp, "%s: DRING ACK received but no drings "
58971ae08745Sheppo 			    "allocated", __func__);
5898b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
58991ae08745Sheppo 			return;
59001ae08745Sheppo 		}
59011ae08745Sheppo 
59021ae08745Sheppo 		/* store ident */
59031ae08745Sheppo 		dp->ident = dring_pkt->dring_ident;
59041ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV;
59051ae08745Sheppo 		vsw_next_milestone(ldcp);
59061ae08745Sheppo 		break;
59071ae08745Sheppo 
59081ae08745Sheppo 	case VIO_SUBTYPE_NACK:
59091ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
59101ae08745Sheppo 
59111ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV))
59121ae08745Sheppo 			return;
59131ae08745Sheppo 
59141ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV;
59151ae08745Sheppo 		vsw_next_milestone(ldcp);
59161ae08745Sheppo 		break;
59171ae08745Sheppo 
59181ae08745Sheppo 	default:
59191ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
59201ae08745Sheppo 		    dring_pkt->tag.vio_subtype);
59211ae08745Sheppo 	}
59221ae08745Sheppo 
59231ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
59241ae08745Sheppo }
59251ae08745Sheppo 
59261ae08745Sheppo /*
59271ae08745Sheppo  * Process a request from peer to unregister a dring.
59281ae08745Sheppo  *
59291ae08745Sheppo  * For the moment we just restart the handshake if our
59301ae08745Sheppo  * peer endpoint attempts to unregister a dring.
59311ae08745Sheppo  */
59321ae08745Sheppo void
59331ae08745Sheppo vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt)
59341ae08745Sheppo {
59351ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
59361ae08745Sheppo 	vio_dring_unreg_msg_t	*dring_pkt;
59371ae08745Sheppo 
59381ae08745Sheppo 	/*
59391ae08745Sheppo 	 * We know this is a ctrl/dring packet so
59401ae08745Sheppo 	 * cast it into the correct structure.
59411ae08745Sheppo 	 */
59421ae08745Sheppo 	dring_pkt = (vio_dring_unreg_msg_t *)pkt;
59431ae08745Sheppo 
59441ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
59451ae08745Sheppo 
59461ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
59471ae08745Sheppo 	case VIO_SUBTYPE_INFO:
59481ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
59491ae08745Sheppo 
59501ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
59511ae08745Sheppo 		break;
59521ae08745Sheppo 
59531ae08745Sheppo 	case VIO_SUBTYPE_ACK:
59541ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
59551ae08745Sheppo 
59561ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
59571ae08745Sheppo 		break;
59581ae08745Sheppo 
59591ae08745Sheppo 	case VIO_SUBTYPE_NACK:
59601ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
59611ae08745Sheppo 
59621ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
59631ae08745Sheppo 		break;
59641ae08745Sheppo 
59651ae08745Sheppo 	default:
59661ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
59671ae08745Sheppo 		    dring_pkt->tag.vio_subtype);
59681ae08745Sheppo 	}
59691ae08745Sheppo 
5970b071742bSsg70180 	vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
5971b071742bSsg70180 
59721ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
59731ae08745Sheppo }
59741ae08745Sheppo 
59751ae08745Sheppo #define	SND_MCST_NACK(ldcp, pkt) \
59761ae08745Sheppo 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
59771ae08745Sheppo 	pkt->tag.vio_sid = ldcp->local_session; \
5978b071742bSsg70180 	(void) vsw_send_msg(ldcp, (void *)pkt, \
5979b071742bSsg70180 			sizeof (vnet_mcast_msg_t), B_TRUE);
59801ae08745Sheppo 
59811ae08745Sheppo /*
59821ae08745Sheppo  * Process a multicast request from a vnet.
59831ae08745Sheppo  *
59841ae08745Sheppo  * Vnet's specify a multicast address that they are interested in. This
59851ae08745Sheppo  * address is used as a key into the hash table which forms the multicast
59861ae08745Sheppo  * forwarding database (mFDB).
59871ae08745Sheppo  *
59881ae08745Sheppo  * The table keys are the multicast addresses, while the table entries
59891ae08745Sheppo  * are pointers to lists of ports which wish to receive packets for the
59901ae08745Sheppo  * specified multicast address.
59911ae08745Sheppo  *
59921ae08745Sheppo  * When a multicast packet is being switched we use the address as a key
59931ae08745Sheppo  * into the hash table, and then walk the appropriate port list forwarding
59941ae08745Sheppo  * the pkt to each port in turn.
59951ae08745Sheppo  *
59961ae08745Sheppo  * If a vnet is no longer interested in a particular multicast grouping
59971ae08745Sheppo  * we simply find the correct location in the hash table and then delete
59981ae08745Sheppo  * the relevant port from the port list.
59991ae08745Sheppo  *
60001ae08745Sheppo  * To deal with the case whereby a port is being deleted without first
60011ae08745Sheppo  * removing itself from the lists in the hash table, we maintain a list
60021ae08745Sheppo  * of multicast addresses the port has registered an interest in, within
60031ae08745Sheppo  * the port structure itself. We then simply walk that list of addresses
60041ae08745Sheppo  * using them as keys into the hash table and remove the port from the
60051ae08745Sheppo  * appropriate lists.
60061ae08745Sheppo  */
60071ae08745Sheppo static void
60081ae08745Sheppo vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt)
60091ae08745Sheppo {
60101ae08745Sheppo 	vnet_mcast_msg_t	*mcst_pkt;
60111ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
60121ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
60131ae08745Sheppo 	int			i;
60141ae08745Sheppo 
60151ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
60161ae08745Sheppo 
60171ae08745Sheppo 	/*
60181ae08745Sheppo 	 * We know this is a ctrl/mcast packet so
60191ae08745Sheppo 	 * cast it into the correct structure.
60201ae08745Sheppo 	 */
60211ae08745Sheppo 	mcst_pkt = (vnet_mcast_msg_t *)pkt;
60221ae08745Sheppo 
60231ae08745Sheppo 	switch (mcst_pkt->tag.vio_subtype) {
60241ae08745Sheppo 	case VIO_SUBTYPE_INFO:
60251ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
60261ae08745Sheppo 
60271ae08745Sheppo 		/*
60281ae08745Sheppo 		 * Check if in correct state to receive a multicast
60291ae08745Sheppo 		 * message (i.e. handshake complete). If not reset
60301ae08745Sheppo 		 * the handshake.
60311ae08745Sheppo 		 */
60321ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV))
60331ae08745Sheppo 			return;
60341ae08745Sheppo 
60351ae08745Sheppo 		/*
60361ae08745Sheppo 		 * Before attempting to add or remove address check
60371ae08745Sheppo 		 * that they are valid multicast addresses.
60381ae08745Sheppo 		 * If not, then NACK back.
60391ae08745Sheppo 		 */
60401ae08745Sheppo 		for (i = 0; i < mcst_pkt->count; i++) {
60411ae08745Sheppo 			if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) {
60421ae08745Sheppo 				DERR(vswp, "%s: invalid multicast address",
60431ae08745Sheppo 				    __func__);
60441ae08745Sheppo 				SND_MCST_NACK(ldcp, mcst_pkt);
60451ae08745Sheppo 				return;
60461ae08745Sheppo 			}
60471ae08745Sheppo 		}
60481ae08745Sheppo 
60491ae08745Sheppo 		/*
60501ae08745Sheppo 		 * Now add/remove the addresses. If this fails we
60511ae08745Sheppo 		 * NACK back.
60521ae08745Sheppo 		 */
60531ae08745Sheppo 		if (vsw_add_rem_mcst(mcst_pkt, port) != 0) {
60541ae08745Sheppo 			SND_MCST_NACK(ldcp, mcst_pkt);
60551ae08745Sheppo 			return;
60561ae08745Sheppo 		}
60571ae08745Sheppo 
60581ae08745Sheppo 		mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
60591ae08745Sheppo 		mcst_pkt->tag.vio_sid = ldcp->local_session;
60601ae08745Sheppo 
60611ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt);
60621ae08745Sheppo 
6063b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)mcst_pkt,
6064b071742bSsg70180 		    sizeof (vnet_mcast_msg_t), B_TRUE);
60651ae08745Sheppo 		break;
60661ae08745Sheppo 
60671ae08745Sheppo 	case VIO_SUBTYPE_ACK:
60681ae08745Sheppo 		DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
60691ae08745Sheppo 
60701ae08745Sheppo 		/*
60711ae08745Sheppo 		 * We shouldn't ever get a multicast ACK message as
60721ae08745Sheppo 		 * at the moment we never request multicast addresses
60731ae08745Sheppo 		 * to be set on some other device. This may change in
60741ae08745Sheppo 		 * the future if we have cascading switches.
60751ae08745Sheppo 		 */
60761ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV))
60771ae08745Sheppo 			return;
60781ae08745Sheppo 
60791ae08745Sheppo 				/* Do nothing */
60801ae08745Sheppo 		break;
60811ae08745Sheppo 
60821ae08745Sheppo 	case VIO_SUBTYPE_NACK:
60831ae08745Sheppo 		DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
60841ae08745Sheppo 
60851ae08745Sheppo 		/*
60861ae08745Sheppo 		 * We shouldn't get a multicast NACK packet for the
60871ae08745Sheppo 		 * same reasons as we shouldn't get a ACK packet.
60881ae08745Sheppo 		 */
60891ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV))
60901ae08745Sheppo 			return;
60911ae08745Sheppo 
60921ae08745Sheppo 				/* Do nothing */
60931ae08745Sheppo 		break;
60941ae08745Sheppo 
60951ae08745Sheppo 	default:
60961ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
60971ae08745Sheppo 		    mcst_pkt->tag.vio_subtype);
60981ae08745Sheppo 	}
60991ae08745Sheppo 
61001ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
61011ae08745Sheppo }
61021ae08745Sheppo 
61031ae08745Sheppo static void
61041ae08745Sheppo vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt)
61051ae08745Sheppo {
61061ae08745Sheppo 	vio_rdx_msg_t	*rdx_pkt;
61071ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
61081ae08745Sheppo 
61091ae08745Sheppo 	/*
61101ae08745Sheppo 	 * We know this is a ctrl/rdx packet so
61111ae08745Sheppo 	 * cast it into the correct structure.
61121ae08745Sheppo 	 */
61131ae08745Sheppo 	rdx_pkt = (vio_rdx_msg_t *)pkt;
61141ae08745Sheppo 
61151ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
61161ae08745Sheppo 
61171ae08745Sheppo 	switch (rdx_pkt->tag.vio_subtype) {
61181ae08745Sheppo 	case VIO_SUBTYPE_INFO:
61191ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
61201ae08745Sheppo 
6121b071742bSsg70180 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV))
61221ae08745Sheppo 			return;
61231ae08745Sheppo 
61241ae08745Sheppo 		rdx_pkt->tag.vio_sid = ldcp->local_session;
61251ae08745Sheppo 		rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
61261ae08745Sheppo 
61271ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt);
61281ae08745Sheppo 
6129b071742bSsg70180 		ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT;
61301ae08745Sheppo 
6131b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)rdx_pkt,
6132b071742bSsg70180 		    sizeof (vio_rdx_msg_t), B_TRUE);
61331ae08745Sheppo 
61341ae08745Sheppo 		vsw_next_milestone(ldcp);
61351ae08745Sheppo 		break;
61361ae08745Sheppo 
61371ae08745Sheppo 	case VIO_SUBTYPE_ACK:
61381ae08745Sheppo 		/*
61391ae08745Sheppo 		 * Should be handled in-band by callback handler.
61401ae08745Sheppo 		 */
61411ae08745Sheppo 		DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__);
6142b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
61431ae08745Sheppo 		break;
61441ae08745Sheppo 
61451ae08745Sheppo 	case VIO_SUBTYPE_NACK:
61461ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
61471ae08745Sheppo 
6148b071742bSsg70180 		if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV))
61491ae08745Sheppo 			return;
61501ae08745Sheppo 
6151b071742bSsg70180 		ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV;
61521ae08745Sheppo 		vsw_next_milestone(ldcp);
61531ae08745Sheppo 		break;
61541ae08745Sheppo 
61551ae08745Sheppo 	default:
61561ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
61571ae08745Sheppo 		    rdx_pkt->tag.vio_subtype);
61581ae08745Sheppo 	}
61591ae08745Sheppo 
61601ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
61611ae08745Sheppo }
61621ae08745Sheppo 
61631ae08745Sheppo static void
61641ae08745Sheppo vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag)
61651ae08745Sheppo {
61661ae08745Sheppo 	uint16_t	env = tag.vio_subtype_env;
61671ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
61681ae08745Sheppo 
61691ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
61701ae08745Sheppo 
61711ae08745Sheppo 	/* session id check */
61721ae08745Sheppo 	if (ldcp->session_status & VSW_PEER_SESSION) {
61731ae08745Sheppo 		if (ldcp->peer_session != tag.vio_sid) {
61741ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid session id (%llx)",
61751ae08745Sheppo 			    __func__, ldcp->ldc_id, tag.vio_sid);
6176b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
61771ae08745Sheppo 			return;
61781ae08745Sheppo 		}
61791ae08745Sheppo 	}
61801ae08745Sheppo 
61811ae08745Sheppo 	/*
61821ae08745Sheppo 	 * It is an error for us to be getting data packets
61831ae08745Sheppo 	 * before the handshake has completed.
61841ae08745Sheppo 	 */
61851ae08745Sheppo 	if (ldcp->hphase != VSW_MILESTONE4) {
61861ae08745Sheppo 		DERR(vswp, "%s: got data packet before handshake complete "
61871ae08745Sheppo 		    "hphase %d (%x: %x)", __func__, ldcp->hphase,
61881ae08745Sheppo 		    ldcp->lane_in.lstate, ldcp->lane_out.lstate);
61891ae08745Sheppo 		DUMP_FLAGS(ldcp->lane_in.lstate);
61901ae08745Sheppo 		DUMP_FLAGS(ldcp->lane_out.lstate);
6191b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
61921ae08745Sheppo 		return;
61931ae08745Sheppo 	}
61941ae08745Sheppo 
61951ae08745Sheppo 	/*
61961ae08745Sheppo 	 * Switch on vio_subtype envelope, then let lower routines
61971ae08745Sheppo 	 * decide if its an INFO, ACK or NACK packet.
61981ae08745Sheppo 	 */
61991ae08745Sheppo 	if (env == VIO_DRING_DATA) {
62001ae08745Sheppo 		vsw_process_data_dring_pkt(ldcp, dpkt);
62011ae08745Sheppo 	} else if (env == VIO_PKT_DATA) {
62021ae08745Sheppo 		vsw_process_data_raw_pkt(ldcp, dpkt);
62031ae08745Sheppo 	} else if (env == VIO_DESC_DATA) {
62041ae08745Sheppo 		vsw_process_data_ibnd_pkt(ldcp, dpkt);
62051ae08745Sheppo 	} else {
6206205eeb1aSlm66018 		DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env);
62071ae08745Sheppo 	}
62081ae08745Sheppo 
62091ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
62101ae08745Sheppo }
62111ae08745Sheppo 
62121ae08745Sheppo #define	SND_DRING_NACK(ldcp, pkt) \
62131ae08745Sheppo 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
62141ae08745Sheppo 	pkt->tag.vio_sid = ldcp->local_session; \
6215b071742bSsg70180 	(void) vsw_send_msg(ldcp, (void *)pkt, \
6216b071742bSsg70180 			sizeof (vio_dring_msg_t), B_TRUE);
62171ae08745Sheppo 
62181ae08745Sheppo static void
62191ae08745Sheppo vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt)
62201ae08745Sheppo {
62211ae08745Sheppo 	vio_dring_msg_t		*dring_pkt;
62221ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
62231ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
62241ae08745Sheppo 	dring_info_t		*dp = NULL;
62251ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
62261ae08745Sheppo 	mblk_t			*mp = NULL;
62271ae08745Sheppo 	mblk_t			*bp = NULL;
62281ae08745Sheppo 	mblk_t			*bpt = NULL;
62291ae08745Sheppo 	size_t			nbytes = 0;
62301ae08745Sheppo 	size_t			off = 0;
62311ae08745Sheppo 	uint64_t		ncookies = 0;
62321ae08745Sheppo 	uint64_t		chain = 0;
6233d10e4ef2Snarayan 	uint64_t		j, len;
6234d10e4ef2Snarayan 	uint32_t		pos, start, datalen;
6235d10e4ef2Snarayan 	uint32_t		range_start, range_end;
6236d10e4ef2Snarayan 	int32_t			end, num, cnt = 0;
6237b071742bSsg70180 	int			i, rv, msg_rv = 0;
62381ae08745Sheppo 	boolean_t		ack_needed = B_FALSE;
6239d10e4ef2Snarayan 	boolean_t		prev_desc_ack = B_FALSE;
6240d10e4ef2Snarayan 	int			read_attempts = 0;
62411ae08745Sheppo 
62421ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
62431ae08745Sheppo 
62441ae08745Sheppo 	/*
62451ae08745Sheppo 	 * We know this is a data/dring packet so
62461ae08745Sheppo 	 * cast it into the correct structure.
62471ae08745Sheppo 	 */
62481ae08745Sheppo 	dring_pkt = (vio_dring_msg_t *)dpkt;
62491ae08745Sheppo 
62501ae08745Sheppo 	/*
62511ae08745Sheppo 	 * Switch on the vio_subtype. If its INFO then we need to
62521ae08745Sheppo 	 * process the data. If its an ACK we need to make sure
62531ae08745Sheppo 	 * it makes sense (i.e did we send an earlier data/info),
62541ae08745Sheppo 	 * and if its a NACK then we maybe attempt a retry.
62551ae08745Sheppo 	 */
62561ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
62571ae08745Sheppo 	case VIO_SUBTYPE_INFO:
62581ae08745Sheppo 		D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id);
62591ae08745Sheppo 
6260445b4c2eSsb155480 		READ_ENTER(&ldcp->lane_in.dlistrw);
62611ae08745Sheppo 		if ((dp = vsw_ident2dring(&ldcp->lane_in,
62621ae08745Sheppo 		    dring_pkt->dring_ident)) == NULL) {
6263445b4c2eSsb155480 			RW_EXIT(&ldcp->lane_in.dlistrw);
62641ae08745Sheppo 
62651ae08745Sheppo 			DERR(vswp, "%s(%lld): unable to find dring from "
62661ae08745Sheppo 			    "ident 0x%llx", __func__, ldcp->ldc_id,
62671ae08745Sheppo 			    dring_pkt->dring_ident);
62681ae08745Sheppo 
62691ae08745Sheppo 			SND_DRING_NACK(ldcp, dring_pkt);
62701ae08745Sheppo 			return;
62711ae08745Sheppo 		}
62721ae08745Sheppo 
6273d10e4ef2Snarayan 		start = pos = dring_pkt->start_idx;
62741ae08745Sheppo 		end = dring_pkt->end_idx;
6275d10e4ef2Snarayan 		len = dp->num_descriptors;
62761ae08745Sheppo 
6277d10e4ef2Snarayan 		range_start = range_end = pos;
6278d10e4ef2Snarayan 
6279d10e4ef2Snarayan 		D2(vswp, "%s(%lld): start index %ld : end %ld\n",
62801ae08745Sheppo 		    __func__, ldcp->ldc_id, start, end);
62811ae08745Sheppo 
6282d10e4ef2Snarayan 		if (end == -1) {
6283d10e4ef2Snarayan 			num = -1;
62844bac2208Snarayan 		} else if (end >= 0) {
6285205eeb1aSlm66018 			num = end >= pos ? end - pos + 1: (len - pos + 1) + end;
6286d10e4ef2Snarayan 
62871ae08745Sheppo 			/* basic sanity check */
62881ae08745Sheppo 			if (end > len) {
6289445b4c2eSsb155480 				RW_EXIT(&ldcp->lane_in.dlistrw);
6290d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): endpoint %lld outside "
6291d10e4ef2Snarayan 				    "ring length %lld", __func__,
6292d10e4ef2Snarayan 				    ldcp->ldc_id, end, len);
62931ae08745Sheppo 
62941ae08745Sheppo 				SND_DRING_NACK(ldcp, dring_pkt);
62951ae08745Sheppo 				return;
62961ae08745Sheppo 			}
6297d10e4ef2Snarayan 		} else {
6298445b4c2eSsb155480 			RW_EXIT(&ldcp->lane_in.dlistrw);
6299d10e4ef2Snarayan 			DERR(vswp, "%s(%lld): invalid endpoint %lld",
6300d10e4ef2Snarayan 			    __func__, ldcp->ldc_id, end);
6301d10e4ef2Snarayan 			SND_DRING_NACK(ldcp, dring_pkt);
63021ae08745Sheppo 			return;
63031ae08745Sheppo 		}
63041ae08745Sheppo 
6305d10e4ef2Snarayan 		while (cnt != num) {
6306d10e4ef2Snarayan vsw_recheck_desc:
6307d10e4ef2Snarayan 			if ((rv = ldc_mem_dring_acquire(dp->handle,
6308d10e4ef2Snarayan 			    pos, pos)) != 0) {
6309445b4c2eSsb155480 				RW_EXIT(&ldcp->lane_in.dlistrw);
6310d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): unable to acquire "
6311d10e4ef2Snarayan 				    "descriptor at pos %d: err %d",
6312d10e4ef2Snarayan 				    __func__, pos, ldcp->ldc_id, rv);
6313d10e4ef2Snarayan 				SND_DRING_NACK(ldcp, dring_pkt);
6314d10e4ef2Snarayan 				return;
6315d10e4ef2Snarayan 			}
63161ae08745Sheppo 
6317d10e4ef2Snarayan 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos;
63181ae08745Sheppo 
6319d10e4ef2Snarayan 			/*
6320d10e4ef2Snarayan 			 * When given a bounded range of descriptors
6321d10e4ef2Snarayan 			 * to process, its an error to hit a descriptor
6322d10e4ef2Snarayan 			 * which is not ready. In the non-bounded case
6323d10e4ef2Snarayan 			 * (end_idx == -1) this simply indicates we have
6324d10e4ef2Snarayan 			 * reached the end of the current active range.
6325d10e4ef2Snarayan 			 */
6326d10e4ef2Snarayan 			if (pub_addr->hdr.dstate != VIO_DESC_READY) {
6327d10e4ef2Snarayan 				/* unbound - no error */
6328d10e4ef2Snarayan 				if (end == -1) {
6329d10e4ef2Snarayan 					if (read_attempts == vsw_read_attempts)
6330d10e4ef2Snarayan 						break;
63311ae08745Sheppo 
6332d10e4ef2Snarayan 					delay(drv_usectohz(vsw_desc_delay));
6333d10e4ef2Snarayan 					read_attempts++;
6334d10e4ef2Snarayan 					goto vsw_recheck_desc;
6335d10e4ef2Snarayan 				}
63361ae08745Sheppo 
6337d10e4ef2Snarayan 				/* bounded - error - so NACK back */
6338445b4c2eSsb155480 				RW_EXIT(&ldcp->lane_in.dlistrw);
6339d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): descriptor not READY "
6340d10e4ef2Snarayan 				    "(%d)", __func__, ldcp->ldc_id,
6341d10e4ef2Snarayan 				    pub_addr->hdr.dstate);
6342d10e4ef2Snarayan 				SND_DRING_NACK(ldcp, dring_pkt);
6343d10e4ef2Snarayan 				return;
6344d10e4ef2Snarayan 			}
6345d10e4ef2Snarayan 
6346d10e4ef2Snarayan 			DTRACE_PROBE1(read_attempts, int, read_attempts);
6347d10e4ef2Snarayan 
6348d10e4ef2Snarayan 			range_end = pos;
6349d10e4ef2Snarayan 
6350d10e4ef2Snarayan 			/*
6351d10e4ef2Snarayan 			 * If we ACK'd the previous descriptor then now
6352d10e4ef2Snarayan 			 * record the new range start position for later
6353d10e4ef2Snarayan 			 * ACK's.
6354d10e4ef2Snarayan 			 */
6355d10e4ef2Snarayan 			if (prev_desc_ack) {
6356d10e4ef2Snarayan 				range_start = pos;
6357d10e4ef2Snarayan 
6358205eeb1aSlm66018 				D2(vswp, "%s(%lld): updating range start to be "
6359205eeb1aSlm66018 				    "%d", __func__, ldcp->ldc_id, range_start);
6360d10e4ef2Snarayan 
6361d10e4ef2Snarayan 				prev_desc_ack = B_FALSE;
6362d10e4ef2Snarayan 			}
63631ae08745Sheppo 
63641ae08745Sheppo 			/*
63651ae08745Sheppo 			 * Data is padded to align on 8 byte boundary,
63661ae08745Sheppo 			 * datalen is actual data length, i.e. minus that
63671ae08745Sheppo 			 * padding.
63681ae08745Sheppo 			 */
63691ae08745Sheppo 			datalen = pub_addr->nbytes;
63701ae08745Sheppo 
63711ae08745Sheppo 			/*
63721ae08745Sheppo 			 * Does peer wish us to ACK when we have finished
63731ae08745Sheppo 			 * with this descriptor ?
63741ae08745Sheppo 			 */
63751ae08745Sheppo 			if (pub_addr->hdr.ack)
63761ae08745Sheppo 				ack_needed = B_TRUE;
63771ae08745Sheppo 
63781ae08745Sheppo 			D2(vswp, "%s(%lld): processing desc %lld at pos"
63791ae08745Sheppo 			    " 0x%llx : dstate 0x%lx : datalen 0x%lx",
6380d10e4ef2Snarayan 			    __func__, ldcp->ldc_id, pos, pub_addr,
63811ae08745Sheppo 			    pub_addr->hdr.dstate, datalen);
63821ae08745Sheppo 
63831ae08745Sheppo 			/*
63841ae08745Sheppo 			 * Mark that we are starting to process descriptor.
63851ae08745Sheppo 			 */
63861ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_ACCEPTED;
63871ae08745Sheppo 
6388d10e4ef2Snarayan 			mp = vio_allocb(ldcp->rxh);
6389d10e4ef2Snarayan 			if (mp == NULL) {
63901ae08745Sheppo 				/*
6391d10e4ef2Snarayan 				 * No free receive buffers available, so
6392d10e4ef2Snarayan 				 * fallback onto allocb(9F). Make sure that
6393d10e4ef2Snarayan 				 * we get a data buffer which is a multiple
6394d10e4ef2Snarayan 				 * of 8 as this is required by ldc_mem_copy.
63951ae08745Sheppo 				 */
6396d10e4ef2Snarayan 				DTRACE_PROBE(allocb);
6397205eeb1aSlm66018 				if ((mp = allocb(datalen + VNET_IPALIGN + 8,
6398205eeb1aSlm66018 				    BPRI_MED)) == NULL) {
6399205eeb1aSlm66018 					DERR(vswp, "%s(%ld): allocb failed",
6400205eeb1aSlm66018 					    __func__, ldcp->ldc_id);
6401205eeb1aSlm66018 					pub_addr->hdr.dstate = VIO_DESC_DONE;
6402205eeb1aSlm66018 					(void) ldc_mem_dring_release(dp->handle,
6403205eeb1aSlm66018 					    pos, pos);
6404205eeb1aSlm66018 					break;
6405205eeb1aSlm66018 				}
6406d10e4ef2Snarayan 			}
6407d10e4ef2Snarayan 
6408d10e4ef2Snarayan 			/*
6409d10e4ef2Snarayan 			 * Ensure that we ask ldc for an aligned
6410d10e4ef2Snarayan 			 * number of bytes.
6411d10e4ef2Snarayan 			 */
6412d10e4ef2Snarayan 			nbytes = datalen + VNET_IPALIGN;
64131ae08745Sheppo 			if (nbytes & 0x7) {
64141ae08745Sheppo 				off = 8 - (nbytes & 0x7);
64151ae08745Sheppo 				nbytes += off;
64161ae08745Sheppo 			}
64171ae08745Sheppo 
64181ae08745Sheppo 			ncookies = pub_addr->ncookies;
64191ae08745Sheppo 			rv = ldc_mem_copy(ldcp->ldc_handle,
64201ae08745Sheppo 			    (caddr_t)mp->b_rptr, 0, &nbytes,
6421205eeb1aSlm66018 			    pub_addr->memcookie, ncookies, LDC_COPY_IN);
64221ae08745Sheppo 
64231ae08745Sheppo 			if (rv != 0) {
6424205eeb1aSlm66018 				DERR(vswp, "%s(%d): unable to copy in data "
6425205eeb1aSlm66018 				    "from %d cookies in desc %d (rv %d)",
6426205eeb1aSlm66018 				    __func__, ldcp->ldc_id, ncookies, pos, rv);
64271ae08745Sheppo 				freemsg(mp);
6428d10e4ef2Snarayan 
6429d10e4ef2Snarayan 				pub_addr->hdr.dstate = VIO_DESC_DONE;
64301ae08745Sheppo 				(void) ldc_mem_dring_release(dp->handle,
6431d10e4ef2Snarayan 				    pos, pos);
6432d10e4ef2Snarayan 				break;
64331ae08745Sheppo 			} else {
64341ae08745Sheppo 				D2(vswp, "%s(%d): copied in %ld bytes"
64351ae08745Sheppo 				    " using %d cookies", __func__,
64361ae08745Sheppo 				    ldcp->ldc_id, nbytes, ncookies);
64371ae08745Sheppo 			}
64381ae08745Sheppo 
6439d10e4ef2Snarayan 			/* adjust the read pointer to skip over the padding */
6440d10e4ef2Snarayan 			mp->b_rptr += VNET_IPALIGN;
6441d10e4ef2Snarayan 
64421ae08745Sheppo 			/* point to the actual end of data */
64431ae08745Sheppo 			mp->b_wptr = mp->b_rptr + datalen;
64441ae08745Sheppo 
64451ae08745Sheppo 			/* build a chain of received packets */
64461ae08745Sheppo 			if (bp == NULL) {
64471ae08745Sheppo 				/* first pkt */
64481ae08745Sheppo 				bp = mp;
64491ae08745Sheppo 				bp->b_next = bp->b_prev = NULL;
64501ae08745Sheppo 				bpt = bp;
64511ae08745Sheppo 				chain = 1;
64521ae08745Sheppo 			} else {
64531ae08745Sheppo 				mp->b_next = NULL;
64541ae08745Sheppo 				mp->b_prev = bpt;
64551ae08745Sheppo 				bpt->b_next = mp;
64561ae08745Sheppo 				bpt = mp;
64571ae08745Sheppo 				chain++;
64581ae08745Sheppo 			}
64591ae08745Sheppo 
64601ae08745Sheppo 			/* mark we are finished with this descriptor */
64611ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_DONE;
64621ae08745Sheppo 
6463d10e4ef2Snarayan 			(void) ldc_mem_dring_release(dp->handle, pos, pos);
6464d10e4ef2Snarayan 
64651ae08745Sheppo 			/*
6466d10e4ef2Snarayan 			 * Send an ACK back to peer if requested.
64671ae08745Sheppo 			 */
64681ae08745Sheppo 			if (ack_needed) {
64691ae08745Sheppo 				ack_needed = B_FALSE;
64701ae08745Sheppo 
6471d10e4ef2Snarayan 				dring_pkt->start_idx = range_start;
6472d10e4ef2Snarayan 				dring_pkt->end_idx = range_end;
64731ae08745Sheppo 
6474d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): processed %d %d, ACK"
6475d10e4ef2Snarayan 				    " requested", __func__, ldcp->ldc_id,
6476205eeb1aSlm66018 				    dring_pkt->start_idx, dring_pkt->end_idx);
64771ae08745Sheppo 
6478d10e4ef2Snarayan 				dring_pkt->dring_process_state = VIO_DP_ACTIVE;
64791ae08745Sheppo 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
64801ae08745Sheppo 				dring_pkt->tag.vio_sid = ldcp->local_session;
6481205eeb1aSlm66018 
6482b071742bSsg70180 				msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt,
6483205eeb1aSlm66018 				    sizeof (vio_dring_msg_t), B_FALSE);
6484b071742bSsg70180 
6485b071742bSsg70180 				/*
6486b071742bSsg70180 				 * Check if ACK was successfully sent. If not
6487b071742bSsg70180 				 * we break and deal with that below.
6488b071742bSsg70180 				 */
6489b071742bSsg70180 				if (msg_rv != 0)
6490b071742bSsg70180 					break;
6491d10e4ef2Snarayan 
6492d10e4ef2Snarayan 				prev_desc_ack = B_TRUE;
6493d10e4ef2Snarayan 				range_start = pos;
64941ae08745Sheppo 			}
64951ae08745Sheppo 
6496d10e4ef2Snarayan 			/* next descriptor */
6497d10e4ef2Snarayan 			pos = (pos + 1) % len;
6498d10e4ef2Snarayan 			cnt++;
6499d10e4ef2Snarayan 
6500d10e4ef2Snarayan 			/*
6501d10e4ef2Snarayan 			 * Break out of loop here and stop processing to
6502d10e4ef2Snarayan 			 * allow some other network device (or disk) to
6503d10e4ef2Snarayan 			 * get access to the cpu.
6504d10e4ef2Snarayan 			 */
6505d10e4ef2Snarayan 			if (chain > vsw_chain_len) {
6506d10e4ef2Snarayan 				D3(vswp, "%s(%lld): switching chain of %d "
6507d10e4ef2Snarayan 				    "msgs", __func__, ldcp->ldc_id, chain);
6508d10e4ef2Snarayan 				break;
65091ae08745Sheppo 			}
65101ae08745Sheppo 		}
6511445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_in.dlistrw);
65121ae08745Sheppo 
6513b071742bSsg70180 		/*
6514b071742bSsg70180 		 * If when we attempted to send the ACK we found that the
6515b071742bSsg70180 		 * channel had been reset then now handle this. We deal with
6516b071742bSsg70180 		 * it here as we cannot reset the channel while holding the
6517b071742bSsg70180 		 * dlistrw lock, and we don't want to acquire/release it
6518b071742bSsg70180 		 * continuously in the above loop, as a channel reset should
6519b071742bSsg70180 		 * be a rare event.
6520b071742bSsg70180 		 */
6521b071742bSsg70180 		if (msg_rv == ECONNRESET) {
6522b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
6523b071742bSsg70180 			break;
6524b071742bSsg70180 		}
6525b071742bSsg70180 
65261ae08745Sheppo 		/* send the chain of packets to be switched */
6527d10e4ef2Snarayan 		if (bp != NULL) {
6528d10e4ef2Snarayan 			D3(vswp, "%s(%lld): switching chain of %d msgs",
6529d10e4ef2Snarayan 			    __func__, ldcp->ldc_id, chain);
653034683adeSsg70180 			vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT,
65311ae08745Sheppo 			    ldcp->ldc_port, NULL);
6532d10e4ef2Snarayan 		}
65331ae08745Sheppo 
6534d10e4ef2Snarayan 		DTRACE_PROBE1(msg_cnt, int, cnt);
6535d10e4ef2Snarayan 
6536d10e4ef2Snarayan 		/*
6537d10e4ef2Snarayan 		 * We are now finished so ACK back with the state
6538d10e4ef2Snarayan 		 * set to STOPPING so our peer knows we are finished
6539d10e4ef2Snarayan 		 */
6540d10e4ef2Snarayan 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
6541d10e4ef2Snarayan 		dring_pkt->tag.vio_sid = ldcp->local_session;
6542d10e4ef2Snarayan 
6543d10e4ef2Snarayan 		dring_pkt->dring_process_state = VIO_DP_STOPPED;
6544d10e4ef2Snarayan 
6545d10e4ef2Snarayan 		DTRACE_PROBE(stop_process_sent);
6546d10e4ef2Snarayan 
6547d10e4ef2Snarayan 		/*
6548d10e4ef2Snarayan 		 * We have not processed any more descriptors beyond
6549d10e4ef2Snarayan 		 * the last one we ACK'd.
6550d10e4ef2Snarayan 		 */
6551d10e4ef2Snarayan 		if (prev_desc_ack)
6552d10e4ef2Snarayan 			range_start = range_end;
6553d10e4ef2Snarayan 
6554d10e4ef2Snarayan 		dring_pkt->start_idx = range_start;
6555d10e4ef2Snarayan 		dring_pkt->end_idx = range_end;
6556d10e4ef2Snarayan 
6557d10e4ef2Snarayan 		D2(vswp, "%s(%lld) processed : %d : %d, now stopping",
6558d10e4ef2Snarayan 		    __func__, ldcp->ldc_id, dring_pkt->start_idx,
6559d10e4ef2Snarayan 		    dring_pkt->end_idx);
6560d10e4ef2Snarayan 
6561b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)dring_pkt,
6562b071742bSsg70180 		    sizeof (vio_dring_msg_t), B_TRUE);
65631ae08745Sheppo 		break;
65641ae08745Sheppo 
65651ae08745Sheppo 	case VIO_SUBTYPE_ACK:
65661ae08745Sheppo 		D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id);
65671ae08745Sheppo 		/*
65681ae08745Sheppo 		 * Verify that the relevant descriptors are all
65691ae08745Sheppo 		 * marked as DONE
65701ae08745Sheppo 		 */
6571445b4c2eSsb155480 		READ_ENTER(&ldcp->lane_out.dlistrw);
65721ae08745Sheppo 		if ((dp = vsw_ident2dring(&ldcp->lane_out,
65731ae08745Sheppo 		    dring_pkt->dring_ident)) == NULL) {
6574445b4c2eSsb155480 			RW_EXIT(&ldcp->lane_out.dlistrw);
65751ae08745Sheppo 			DERR(vswp, "%s: unknown ident in ACK", __func__);
65761ae08745Sheppo 			return;
65771ae08745Sheppo 		}
65781ae08745Sheppo 
65791ae08745Sheppo 		pub_addr = (vnet_public_desc_t *)dp->pub_addr;
65801ae08745Sheppo 		priv_addr = (vsw_private_desc_t *)dp->priv_addr;
65811ae08745Sheppo 
65821ae08745Sheppo 		start = end = 0;
65831ae08745Sheppo 		start = dring_pkt->start_idx;
65841ae08745Sheppo 		end = dring_pkt->end_idx;
65851ae08745Sheppo 		len = dp->num_descriptors;
65861ae08745Sheppo 
65871ae08745Sheppo 		j = num = 0;
65881ae08745Sheppo 		/* calculate # descriptors taking into a/c wrap around */
65891ae08745Sheppo 		num = end >= start ? end - start + 1: (len - start + 1) + end;
65901ae08745Sheppo 
65911ae08745Sheppo 		D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n",
65921ae08745Sheppo 		    __func__, ldcp->ldc_id, start, end, num);
65931ae08745Sheppo 
6594d10e4ef2Snarayan 		mutex_enter(&dp->dlock);
6595d10e4ef2Snarayan 		dp->last_ack_recv = end;
6596d10e4ef2Snarayan 		mutex_exit(&dp->dlock);
6597d10e4ef2Snarayan 
65981ae08745Sheppo 		for (i = start; j < num; i = (i + 1) % len, j++) {
65991ae08745Sheppo 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
66001ae08745Sheppo 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
66011ae08745Sheppo 
6602d10e4ef2Snarayan 			/*
6603d10e4ef2Snarayan 			 * If the last descriptor in a range has the ACK
6604d10e4ef2Snarayan 			 * bit set then we will get two messages from our
6605d10e4ef2Snarayan 			 * peer relating to it. The normal ACK msg and then
6606d10e4ef2Snarayan 			 * a subsequent STOP msg. The first message will have
6607d10e4ef2Snarayan 			 * resulted in the descriptor being reclaimed and
6608d10e4ef2Snarayan 			 * its state set to FREE so when we encounter a non
6609d10e4ef2Snarayan 			 * DONE descriptor we need to check to see if its
6610d10e4ef2Snarayan 			 * because we have just reclaimed it.
6611d10e4ef2Snarayan 			 */
6612d10e4ef2Snarayan 			mutex_enter(&priv_addr->dstate_lock);
6613d10e4ef2Snarayan 			if (pub_addr->hdr.dstate == VIO_DESC_DONE) {
66141ae08745Sheppo 				/* clear all the fields */
66151ae08745Sheppo 				bzero(priv_addr->datap, priv_addr->datalen);
66161ae08745Sheppo 				priv_addr->datalen = 0;
66171ae08745Sheppo 
66181ae08745Sheppo 				pub_addr->hdr.dstate = VIO_DESC_FREE;
66191ae08745Sheppo 				pub_addr->hdr.ack = 0;
6620d10e4ef2Snarayan 
66211ae08745Sheppo 				priv_addr->dstate = VIO_DESC_FREE;
6622d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
66231ae08745Sheppo 
66241ae08745Sheppo 				D3(vswp, "clearing descp %d : pub state "
66251ae08745Sheppo 				    "0x%llx : priv state 0x%llx", i,
6626205eeb1aSlm66018 				    pub_addr->hdr.dstate, priv_addr->dstate);
6627d10e4ef2Snarayan 
6628d10e4ef2Snarayan 			} else {
6629d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
6630d10e4ef2Snarayan 
6631d10e4ef2Snarayan 				if (dring_pkt->dring_process_state !=
6632d10e4ef2Snarayan 				    VIO_DP_STOPPED) {
6633d10e4ef2Snarayan 					DERR(vswp, "%s: descriptor %lld at pos "
6634d10e4ef2Snarayan 					    " 0x%llx not DONE (0x%lx)\n",
6635d10e4ef2Snarayan 					    __func__, i, pub_addr,
6636d10e4ef2Snarayan 					    pub_addr->hdr.dstate);
6637445b4c2eSsb155480 					RW_EXIT(&ldcp->lane_out.dlistrw);
6638d10e4ef2Snarayan 					return;
6639d10e4ef2Snarayan 				}
66401ae08745Sheppo 			}
66411ae08745Sheppo 		}
66421ae08745Sheppo 
6643d10e4ef2Snarayan 		/*
6644d10e4ef2Snarayan 		 * If our peer is stopping processing descriptors then
6645d10e4ef2Snarayan 		 * we check to make sure it has processed all the descriptors
6646d10e4ef2Snarayan 		 * we have updated. If not then we send it a new message
6647d10e4ef2Snarayan 		 * to prompt it to restart.
6648d10e4ef2Snarayan 		 */
6649d10e4ef2Snarayan 		if (dring_pkt->dring_process_state == VIO_DP_STOPPED) {
6650d10e4ef2Snarayan 			DTRACE_PROBE(stop_process_recv);
6651d10e4ef2Snarayan 			D2(vswp, "%s(%lld): got stopping msg : %d : %d",
6652d10e4ef2Snarayan 			    __func__, ldcp->ldc_id, dring_pkt->start_idx,
6653d10e4ef2Snarayan 			    dring_pkt->end_idx);
6654d10e4ef2Snarayan 
6655d10e4ef2Snarayan 			/*
6656d10e4ef2Snarayan 			 * Check next descriptor in public section of ring.
6657d10e4ef2Snarayan 			 * If its marked as READY then we need to prompt our
6658d10e4ef2Snarayan 			 * peer to start processing the ring again.
6659d10e4ef2Snarayan 			 */
6660d10e4ef2Snarayan 			i = (end + 1) % len;
6661d10e4ef2Snarayan 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
6662d10e4ef2Snarayan 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
6663d10e4ef2Snarayan 
6664d10e4ef2Snarayan 			/*
6665d10e4ef2Snarayan 			 * Hold the restart lock across all of this to
6666d10e4ef2Snarayan 			 * make sure that its not possible for us to
6667d10e4ef2Snarayan 			 * decide that a msg needs to be sent in the future
6668d10e4ef2Snarayan 			 * but the sending code having already checked is
6669d10e4ef2Snarayan 			 * about to exit.
6670d10e4ef2Snarayan 			 */
6671d10e4ef2Snarayan 			mutex_enter(&dp->restart_lock);
6672d10e4ef2Snarayan 			mutex_enter(&priv_addr->dstate_lock);
6673d10e4ef2Snarayan 			if (pub_addr->hdr.dstate == VIO_DESC_READY) {
6674d10e4ef2Snarayan 
6675d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
6676d10e4ef2Snarayan 
6677d10e4ef2Snarayan 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
6678d10e4ef2Snarayan 				dring_pkt->tag.vio_sid = ldcp->local_session;
6679d10e4ef2Snarayan 
6680d10e4ef2Snarayan 				mutex_enter(&ldcp->lane_out.seq_lock);
6681d10e4ef2Snarayan 				dring_pkt->seq_num = ldcp->lane_out.seq_num++;
6682d10e4ef2Snarayan 				mutex_exit(&ldcp->lane_out.seq_lock);
6683d10e4ef2Snarayan 
6684d10e4ef2Snarayan 				dring_pkt->start_idx = (end + 1) % len;
6685d10e4ef2Snarayan 				dring_pkt->end_idx = -1;
6686d10e4ef2Snarayan 
6687d10e4ef2Snarayan 				D2(vswp, "%s(%lld) : sending restart msg:"
6688d10e4ef2Snarayan 				    " %d : %d", __func__, ldcp->ldc_id,
6689205eeb1aSlm66018 				    dring_pkt->start_idx, dring_pkt->end_idx);
6690d10e4ef2Snarayan 
6691b071742bSsg70180 				msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt,
6692b071742bSsg70180 				    sizeof (vio_dring_msg_t), B_FALSE);
6693b071742bSsg70180 
6694d10e4ef2Snarayan 			} else {
6695d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
6696d10e4ef2Snarayan 				dp->restart_reqd = B_TRUE;
6697d10e4ef2Snarayan 			}
6698d10e4ef2Snarayan 			mutex_exit(&dp->restart_lock);
6699d10e4ef2Snarayan 		}
6700445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_out.dlistrw);
6701b071742bSsg70180 
6702b071742bSsg70180 		/* only do channel reset after dropping dlistrw lock */
6703b071742bSsg70180 		if (msg_rv == ECONNRESET)
6704b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
6705b071742bSsg70180 
67061ae08745Sheppo 		break;
67071ae08745Sheppo 
67081ae08745Sheppo 	case VIO_SUBTYPE_NACK:
67091ae08745Sheppo 		DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK",
67101ae08745Sheppo 		    __func__, ldcp->ldc_id);
67111ae08745Sheppo 		/*
67121ae08745Sheppo 		 * Something is badly wrong if we are getting NACK's
67131ae08745Sheppo 		 * for our data pkts. So reset the channel.
67141ae08745Sheppo 		 */
6715b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
67161ae08745Sheppo 
67171ae08745Sheppo 		break;
67181ae08745Sheppo 
67191ae08745Sheppo 	default:
67201ae08745Sheppo 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
67211ae08745Sheppo 		    ldcp->ldc_id, dring_pkt->tag.vio_subtype);
67221ae08745Sheppo 	}
67231ae08745Sheppo 
67241ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
67251ae08745Sheppo }
67261ae08745Sheppo 
67271ae08745Sheppo /*
67281ae08745Sheppo  * VIO_PKT_DATA (a.k.a raw data mode )
67291ae08745Sheppo  *
67301ae08745Sheppo  * Note - currently not supported. Do nothing.
67311ae08745Sheppo  */
67321ae08745Sheppo static void
67331ae08745Sheppo vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt)
67341ae08745Sheppo {
67351ae08745Sheppo 	_NOTE(ARGUNUSED(dpkt))
67361ae08745Sheppo 
67371ae08745Sheppo 	D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
6738205eeb1aSlm66018 	DERR(NULL, "%s (%lld): currently unsupported", __func__, ldcp->ldc_id);
67391ae08745Sheppo 	D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
67401ae08745Sheppo }
67411ae08745Sheppo 
67421ae08745Sheppo /*
67431ae08745Sheppo  * Process an in-band descriptor message (most likely from
67441ae08745Sheppo  * OBP).
67451ae08745Sheppo  */
67461ae08745Sheppo static void
67471ae08745Sheppo vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt)
67481ae08745Sheppo {
6749445b4c2eSsb155480 	vnet_ibnd_desc_t	*ibnd_desc;
67501ae08745Sheppo 	dring_info_t		*dp = NULL;
67511ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
67521ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
67531ae08745Sheppo 	mblk_t			*mp = NULL;
67541ae08745Sheppo 	size_t			nbytes = 0;
67551ae08745Sheppo 	size_t			off = 0;
67561ae08745Sheppo 	uint64_t		idx = 0;
67574bac2208Snarayan 	uint32_t		num = 1, len, datalen = 0;
67581ae08745Sheppo 	uint64_t		ncookies = 0;
67594bac2208Snarayan 	int			i, rv;
67604bac2208Snarayan 	int			j = 0;
67611ae08745Sheppo 
67621ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
67631ae08745Sheppo 
6764445b4c2eSsb155480 	ibnd_desc = (vnet_ibnd_desc_t *)pkt;
67651ae08745Sheppo 
67661ae08745Sheppo 	switch (ibnd_desc->hdr.tag.vio_subtype) {
67671ae08745Sheppo 	case VIO_SUBTYPE_INFO:
67681ae08745Sheppo 		D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
67691ae08745Sheppo 
67701ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
67711ae08745Sheppo 			return;
67721ae08745Sheppo 
67731ae08745Sheppo 		/*
67741ae08745Sheppo 		 * Data is padded to align on a 8 byte boundary,
67751ae08745Sheppo 		 * nbytes is actual data length, i.e. minus that
67761ae08745Sheppo 		 * padding.
67771ae08745Sheppo 		 */
67781ae08745Sheppo 		datalen = ibnd_desc->nbytes;
67791ae08745Sheppo 
67801ae08745Sheppo 		D2(vswp, "%s(%lld): processing inband desc : "
67811ae08745Sheppo 		    ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen);
67821ae08745Sheppo 
67831ae08745Sheppo 		ncookies = ibnd_desc->ncookies;
67841ae08745Sheppo 
67851ae08745Sheppo 		/*
67861ae08745Sheppo 		 * allocb(9F) returns an aligned data block. We
67871ae08745Sheppo 		 * need to ensure that we ask ldc for an aligned
67881ae08745Sheppo 		 * number of bytes also.
67891ae08745Sheppo 		 */
67901ae08745Sheppo 		nbytes = datalen;
67911ae08745Sheppo 		if (nbytes & 0x7) {
67921ae08745Sheppo 			off = 8 - (nbytes & 0x7);
67931ae08745Sheppo 			nbytes += off;
67941ae08745Sheppo 		}
67951ae08745Sheppo 
67961ae08745Sheppo 		mp = allocb(datalen, BPRI_MED);
67971ae08745Sheppo 		if (mp == NULL) {
67981ae08745Sheppo 			DERR(vswp, "%s(%lld): allocb failed",
67991ae08745Sheppo 			    __func__, ldcp->ldc_id);
68001ae08745Sheppo 			return;
68011ae08745Sheppo 		}
68021ae08745Sheppo 
68031ae08745Sheppo 		rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr,
68041ae08745Sheppo 		    0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies,
68051ae08745Sheppo 		    LDC_COPY_IN);
68061ae08745Sheppo 
68071ae08745Sheppo 		if (rv != 0) {
68081ae08745Sheppo 			DERR(vswp, "%s(%d): unable to copy in data from "
6809205eeb1aSlm66018 			    "%d cookie(s)", __func__, ldcp->ldc_id, ncookies);
68101ae08745Sheppo 			freemsg(mp);
68111ae08745Sheppo 			return;
6812023505bcSraghuram 		}
6813023505bcSraghuram 
6814205eeb1aSlm66018 		D2(vswp, "%s(%d): copied in %ld bytes using %d cookies",
6815205eeb1aSlm66018 		    __func__, ldcp->ldc_id, nbytes, ncookies);
6816023505bcSraghuram 
68171ae08745Sheppo 		/* point to the actual end of data */
6818da86a4daSrf157361 		mp->b_wptr = mp->b_rptr + datalen;
68191ae08745Sheppo 
68201ae08745Sheppo 		/*
68211ae08745Sheppo 		 * We ACK back every in-band descriptor message we process
68221ae08745Sheppo 		 */
68231ae08745Sheppo 		ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK;
68241ae08745Sheppo 		ibnd_desc->hdr.tag.vio_sid = ldcp->local_session;
6825b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)ibnd_desc,
6826b071742bSsg70180 		    sizeof (vnet_ibnd_desc_t), B_TRUE);
68271ae08745Sheppo 
68281ae08745Sheppo 		/* send the packet to be switched */
6829da86a4daSrf157361 		vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT,
68301ae08745Sheppo 		    ldcp->ldc_port, NULL);
68311ae08745Sheppo 
68321ae08745Sheppo 		break;
68331ae08745Sheppo 
68341ae08745Sheppo 	case VIO_SUBTYPE_ACK:
68351ae08745Sheppo 		D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
68361ae08745Sheppo 
68371ae08745Sheppo 		/* Verify the ACK is valid */
68381ae08745Sheppo 		idx = ibnd_desc->hdr.desc_handle;
68391ae08745Sheppo 
68401ae08745Sheppo 		if (idx >= VSW_RING_NUM_EL) {
684134683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: corrupted ACK received "
684234683adeSsg70180 			    "(idx %ld)", vswp->instance, idx);
68431ae08745Sheppo 			return;
68441ae08745Sheppo 		}
68451ae08745Sheppo 
68461ae08745Sheppo 		if ((dp = ldcp->lane_out.dringp) == NULL) {
68471ae08745Sheppo 			DERR(vswp, "%s: no dring found", __func__);
68481ae08745Sheppo 			return;
68491ae08745Sheppo 		}
68501ae08745Sheppo 
68514bac2208Snarayan 		len = dp->num_descriptors;
68524bac2208Snarayan 		/*
68534bac2208Snarayan 		 * If the descriptor we are being ACK'ed for is not the
68544bac2208Snarayan 		 * one we expected, then pkts were lost somwhere, either
68554bac2208Snarayan 		 * when we tried to send a msg, or a previous ACK msg from
68564bac2208Snarayan 		 * our peer. In either case we now reclaim the descriptors
68574bac2208Snarayan 		 * in the range from the last ACK we received up to the
68584bac2208Snarayan 		 * current ACK.
68594bac2208Snarayan 		 */
68604bac2208Snarayan 		if (idx != dp->last_ack_recv) {
68614bac2208Snarayan 			DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)",
68624bac2208Snarayan 			    __func__, dp->last_ack_recv, idx);
68634bac2208Snarayan 			num = idx >= dp->last_ack_recv ?
68644bac2208Snarayan 			    idx - dp->last_ack_recv + 1:
68654bac2208Snarayan 			    (len - dp->last_ack_recv + 1) + idx;
68664bac2208Snarayan 		}
68671ae08745Sheppo 
68681ae08745Sheppo 		/*
68691ae08745Sheppo 		 * When we sent the in-band message to our peer we
68701ae08745Sheppo 		 * marked the copy in our private ring as READY. We now
68711ae08745Sheppo 		 * check that the descriptor we are being ACK'ed for is in
68721ae08745Sheppo 		 * fact READY, i.e. it is one we have shared with our peer.
68734bac2208Snarayan 		 *
68744bac2208Snarayan 		 * If its not we flag an error, but still reset the descr
68754bac2208Snarayan 		 * back to FREE.
68761ae08745Sheppo 		 */
68774bac2208Snarayan 		for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) {
68784bac2208Snarayan 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
6879d10e4ef2Snarayan 			mutex_enter(&priv_addr->dstate_lock);
68801ae08745Sheppo 			if (priv_addr->dstate != VIO_DESC_READY) {
68814bac2208Snarayan 				DERR(vswp, "%s: (%ld) desc at index %ld not "
68824bac2208Snarayan 				    "READY (0x%lx)", __func__,
68834bac2208Snarayan 				    ldcp->ldc_id, idx, priv_addr->dstate);
68844bac2208Snarayan 				DERR(vswp, "%s: bound %d: ncookies %ld : "
68854bac2208Snarayan 				    "datalen %ld", __func__,
68864bac2208Snarayan 				    priv_addr->bound, priv_addr->ncookies,
68874bac2208Snarayan 				    priv_addr->datalen);
68884bac2208Snarayan 			}
68891ae08745Sheppo 			D2(vswp, "%s: (%lld) freeing descp at %lld", __func__,
68901ae08745Sheppo 			    ldcp->ldc_id, idx);
68911ae08745Sheppo 			/* release resources associated with sent msg */
68921ae08745Sheppo 			bzero(priv_addr->datap, priv_addr->datalen);
68931ae08745Sheppo 			priv_addr->datalen = 0;
68941ae08745Sheppo 			priv_addr->dstate = VIO_DESC_FREE;
6895d10e4ef2Snarayan 			mutex_exit(&priv_addr->dstate_lock);
68961ae08745Sheppo 		}
68974bac2208Snarayan 		/* update to next expected value */
68984bac2208Snarayan 		dp->last_ack_recv = (idx + 1) % dp->num_descriptors;
68994bac2208Snarayan 
69001ae08745Sheppo 		break;
69011ae08745Sheppo 
69021ae08745Sheppo 	case VIO_SUBTYPE_NACK:
69031ae08745Sheppo 		DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
69041ae08745Sheppo 
69051ae08745Sheppo 		/*
69061ae08745Sheppo 		 * We should only get a NACK if our peer doesn't like
69071ae08745Sheppo 		 * something about a message we have sent it. If this
69081ae08745Sheppo 		 * happens we just release the resources associated with
69091ae08745Sheppo 		 * the message. (We are relying on higher layers to decide
69101ae08745Sheppo 		 * whether or not to resend.
69111ae08745Sheppo 		 */
69121ae08745Sheppo 
69131ae08745Sheppo 		/* limit check */
69141ae08745Sheppo 		idx = ibnd_desc->hdr.desc_handle;
69151ae08745Sheppo 
69161ae08745Sheppo 		if (idx >= VSW_RING_NUM_EL) {
69171ae08745Sheppo 			DERR(vswp, "%s: corrupted NACK received (idx %lld)",
69181ae08745Sheppo 			    __func__, idx);
69191ae08745Sheppo 			return;
69201ae08745Sheppo 		}
69211ae08745Sheppo 
69221ae08745Sheppo 		if ((dp = ldcp->lane_out.dringp) == NULL) {
69231ae08745Sheppo 			DERR(vswp, "%s: no dring found", __func__);
69241ae08745Sheppo 			return;
69251ae08745Sheppo 		}
69261ae08745Sheppo 
69271ae08745Sheppo 		priv_addr = (vsw_private_desc_t *)dp->priv_addr;
69281ae08745Sheppo 
69291ae08745Sheppo 		/* move to correct location in ring */
69301ae08745Sheppo 		priv_addr += idx;
69311ae08745Sheppo 
69321ae08745Sheppo 		/* release resources associated with sent msg */
6933d10e4ef2Snarayan 		mutex_enter(&priv_addr->dstate_lock);
69341ae08745Sheppo 		bzero(priv_addr->datap, priv_addr->datalen);
69351ae08745Sheppo 		priv_addr->datalen = 0;
69361ae08745Sheppo 		priv_addr->dstate = VIO_DESC_FREE;
6937d10e4ef2Snarayan 		mutex_exit(&priv_addr->dstate_lock);
69381ae08745Sheppo 
69391ae08745Sheppo 		break;
69401ae08745Sheppo 
69411ae08745Sheppo 	default:
69421ae08745Sheppo 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
69431ae08745Sheppo 		    ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype);
69441ae08745Sheppo 	}
69451ae08745Sheppo 
69461ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
69471ae08745Sheppo }
69481ae08745Sheppo 
69491ae08745Sheppo static void
69501ae08745Sheppo vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag)
69511ae08745Sheppo {
69521ae08745Sheppo 	_NOTE(ARGUNUSED(epkt))
69531ae08745Sheppo 
69541ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
69551ae08745Sheppo 	uint16_t	env = tag.vio_subtype_env;
69561ae08745Sheppo 
69571ae08745Sheppo 	D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
69581ae08745Sheppo 
69591ae08745Sheppo 	/*
69601ae08745Sheppo 	 * Error vio_subtypes have yet to be defined. So for
69611ae08745Sheppo 	 * the moment we can't do anything.
69621ae08745Sheppo 	 */
69631ae08745Sheppo 	D2(vswp, "%s: (%x) vio_subtype env", __func__, env);
69641ae08745Sheppo 
69651ae08745Sheppo 	D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
69661ae08745Sheppo }
69671ae08745Sheppo 
69681ae08745Sheppo /*
69691ae08745Sheppo  * Switch the given ethernet frame when operating in layer 2 mode.
69701ae08745Sheppo  *
69711ae08745Sheppo  * vswp: pointer to the vsw instance
69721ae08745Sheppo  * mp: pointer to chain of ethernet frame(s) to be switched
69731ae08745Sheppo  * caller: identifies the source of this frame as:
69741ae08745Sheppo  * 		1. VSW_VNETPORT - a vsw port (connected to a vnet).
69751ae08745Sheppo  *		2. VSW_PHYSDEV - the physical ethernet device
69761ae08745Sheppo  *		3. VSW_LOCALDEV - vsw configured as a virtual interface
69771ae08745Sheppo  * arg: argument provided by the caller.
69781ae08745Sheppo  *		1. for VNETPORT - pointer to the corresponding vsw_port_t.
69791ae08745Sheppo  *		2. for PHYSDEV - NULL
69801ae08745Sheppo  *		3. for LOCALDEV - pointer to to this vsw_t(self)
69811ae08745Sheppo  */
69821ae08745Sheppo void
69831ae08745Sheppo vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
69841ae08745Sheppo 			vsw_port_t *arg, mac_resource_handle_t mrh)
69851ae08745Sheppo {
69861ae08745Sheppo 	struct ether_header	*ehp;
69871ae08745Sheppo 	vsw_port_t		*port = NULL;
69881ae08745Sheppo 	mblk_t			*bp, *ret_m;
69891ae08745Sheppo 	mblk_t			*nmp = NULL;
69901ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
69911ae08745Sheppo 
69921ae08745Sheppo 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
69931ae08745Sheppo 
69941ae08745Sheppo 	/*
69951ae08745Sheppo 	 * PERF: rather than breaking up the chain here, scan it
69961ae08745Sheppo 	 * to find all mblks heading to same destination and then
69971ae08745Sheppo 	 * pass that sub-chain to the lower transmit functions.
69981ae08745Sheppo 	 */
69991ae08745Sheppo 
70001ae08745Sheppo 	/* process the chain of packets */
70011ae08745Sheppo 	bp = mp;
70021ae08745Sheppo 	while (bp) {
70031ae08745Sheppo 		mp = bp;
70041ae08745Sheppo 		bp = bp->b_next;
70051ae08745Sheppo 		mp->b_next = mp->b_prev = NULL;
70061ae08745Sheppo 		ehp = (struct ether_header *)mp->b_rptr;
70071ae08745Sheppo 
70081ae08745Sheppo 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
70091ae08745Sheppo 		    __func__, MBLKSIZE(mp), MBLKL(mp));
70101ae08745Sheppo 
70111ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
70121ae08745Sheppo 		if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
70131ae08745Sheppo 			/*
70141ae08745Sheppo 			 * If destination is VSW_LOCALDEV (vsw as an eth
70151ae08745Sheppo 			 * interface) and if the device is up & running,
70161ae08745Sheppo 			 * send the packet up the stack on this host.
70171ae08745Sheppo 			 * If the virtual interface is down, drop the packet.
70181ae08745Sheppo 			 */
70191ae08745Sheppo 			if (caller != VSW_LOCALDEV) {
70201ae08745Sheppo 				if (vswp->if_state & VSW_IF_UP) {
70211ae08745Sheppo 					RW_EXIT(&vswp->if_lockrw);
7022ba2e4443Sseb 					mac_rx(vswp->if_mh, mrh, mp);
70231ae08745Sheppo 				} else {
70241ae08745Sheppo 					RW_EXIT(&vswp->if_lockrw);
70251ae08745Sheppo 					/* Interface down, drop pkt */
70261ae08745Sheppo 					freemsg(mp);
70271ae08745Sheppo 				}
70281ae08745Sheppo 			} else {
70291ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
70301ae08745Sheppo 				freemsg(mp);
70311ae08745Sheppo 			}
70321ae08745Sheppo 			continue;
70331ae08745Sheppo 		}
70341ae08745Sheppo 		RW_EXIT(&vswp->if_lockrw);
70351ae08745Sheppo 
70361ae08745Sheppo 		READ_ENTER(&plist->lockrw);
70371ae08745Sheppo 		port = vsw_lookup_fdb(vswp, ehp);
70381ae08745Sheppo 		if (port) {
70391ae08745Sheppo 			/*
70401ae08745Sheppo 			 * Mark the port as in-use.
70411ae08745Sheppo 			 */
70421ae08745Sheppo 			mutex_enter(&port->ref_lock);
70431ae08745Sheppo 			port->ref_cnt++;
70441ae08745Sheppo 			mutex_exit(&port->ref_lock);
70451ae08745Sheppo 			RW_EXIT(&plist->lockrw);
70461ae08745Sheppo 
70471ae08745Sheppo 			/*
70481ae08745Sheppo 			 * If plumbed and in promisc mode then copy msg
70491ae08745Sheppo 			 * and send up the stack.
70501ae08745Sheppo 			 */
70511ae08745Sheppo 			READ_ENTER(&vswp->if_lockrw);
70521ae08745Sheppo 			if (VSW_U_P(vswp->if_state)) {
70531ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
70541ae08745Sheppo 				nmp = copymsg(mp);
70551ae08745Sheppo 				if (nmp)
7056ba2e4443Sseb 					mac_rx(vswp->if_mh, mrh, nmp);
70571ae08745Sheppo 			} else {
70581ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
70591ae08745Sheppo 			}
70601ae08745Sheppo 
70611ae08745Sheppo 			/*
70621ae08745Sheppo 			 * If the destination is in FDB, the packet
70631ae08745Sheppo 			 * should be forwarded to the correponding
70641ae08745Sheppo 			 * vsw_port (connected to a vnet device -
70651ae08745Sheppo 			 * VSW_VNETPORT)
70661ae08745Sheppo 			 */
70671ae08745Sheppo 			(void) vsw_portsend(port, mp);
70681ae08745Sheppo 
70691ae08745Sheppo 			/*
70701ae08745Sheppo 			 * Decrement use count in port and check if
70711ae08745Sheppo 			 * should wake delete thread.
70721ae08745Sheppo 			 */
70731ae08745Sheppo 			mutex_enter(&port->ref_lock);
70741ae08745Sheppo 			port->ref_cnt--;
70751ae08745Sheppo 			if (port->ref_cnt == 0)
70761ae08745Sheppo 				cv_signal(&port->ref_cv);
70771ae08745Sheppo 			mutex_exit(&port->ref_lock);
70781ae08745Sheppo 		} else {
70791ae08745Sheppo 			RW_EXIT(&plist->lockrw);
70801ae08745Sheppo 			/*
70811ae08745Sheppo 			 * Destination not in FDB.
70821ae08745Sheppo 			 *
70831ae08745Sheppo 			 * If the destination is broadcast or
70841ae08745Sheppo 			 * multicast forward the packet to all
70851ae08745Sheppo 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
70861ae08745Sheppo 			 * except the caller.
70871ae08745Sheppo 			 */
70881ae08745Sheppo 			if (IS_BROADCAST(ehp)) {
70891ae08745Sheppo 				D3(vswp, "%s: BROADCAST pkt", __func__);
7090205eeb1aSlm66018 				(void) vsw_forward_all(vswp, mp, caller, arg);
70911ae08745Sheppo 			} else if (IS_MULTICAST(ehp)) {
70921ae08745Sheppo 				D3(vswp, "%s: MULTICAST pkt", __func__);
7093205eeb1aSlm66018 				(void) vsw_forward_grp(vswp, mp, caller, arg);
70941ae08745Sheppo 			} else {
70951ae08745Sheppo 				/*
70961ae08745Sheppo 				 * If the destination is unicast, and came
70971ae08745Sheppo 				 * from either a logical network device or
70981ae08745Sheppo 				 * the switch itself when it is plumbed, then
70991ae08745Sheppo 				 * send it out on the physical device and also
71001ae08745Sheppo 				 * up the stack if the logical interface is
71011ae08745Sheppo 				 * in promiscious mode.
71021ae08745Sheppo 				 *
71031ae08745Sheppo 				 * NOTE:  The assumption here is that if we
71041ae08745Sheppo 				 * cannot find the destination in our fdb, its
71051ae08745Sheppo 				 * a unicast address, and came from either a
71061ae08745Sheppo 				 * vnet or down the stack (when plumbed) it
71071ae08745Sheppo 				 * must be destinded for an ethernet device
71081ae08745Sheppo 				 * outside our ldoms.
71091ae08745Sheppo 				 */
71101ae08745Sheppo 				if (caller == VSW_VNETPORT) {
71111ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
71121ae08745Sheppo 					if (VSW_U_P(vswp->if_state)) {
71131ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
71141ae08745Sheppo 						nmp = copymsg(mp);
71151ae08745Sheppo 						if (nmp)
7116ba2e4443Sseb 							mac_rx(vswp->if_mh,
71171ae08745Sheppo 							    mrh, nmp);
71181ae08745Sheppo 					} else {
71191ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
71201ae08745Sheppo 					}
71211ae08745Sheppo 					if ((ret_m = vsw_tx_msg(vswp, mp))
71221ae08745Sheppo 					    != NULL) {
71231ae08745Sheppo 						DERR(vswp, "%s: drop mblks to "
71241ae08745Sheppo 						    "phys dev", __func__);
71251ae08745Sheppo 						freemsg(ret_m);
71261ae08745Sheppo 					}
71271ae08745Sheppo 
71281ae08745Sheppo 				} else if (caller == VSW_PHYSDEV) {
71291ae08745Sheppo 					/*
71301ae08745Sheppo 					 * Pkt seen because card in promisc
71311ae08745Sheppo 					 * mode. Send up stack if plumbed in
71321ae08745Sheppo 					 * promisc mode, else drop it.
71331ae08745Sheppo 					 */
71341ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
71351ae08745Sheppo 					if (VSW_U_P(vswp->if_state)) {
71361ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
7137ba2e4443Sseb 						mac_rx(vswp->if_mh, mrh, mp);
71381ae08745Sheppo 					} else {
71391ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
71401ae08745Sheppo 						freemsg(mp);
71411ae08745Sheppo 					}
71421ae08745Sheppo 
71431ae08745Sheppo 				} else if (caller == VSW_LOCALDEV) {
71441ae08745Sheppo 					/*
71451ae08745Sheppo 					 * Pkt came down the stack, send out
71461ae08745Sheppo 					 * over physical device.
71471ae08745Sheppo 					 */
71481ae08745Sheppo 					if ((ret_m = vsw_tx_msg(vswp, mp))
71491ae08745Sheppo 					    != NULL) {
71501ae08745Sheppo 						DERR(vswp, "%s: drop mblks to "
71511ae08745Sheppo 						    "phys dev", __func__);
71521ae08745Sheppo 						freemsg(ret_m);
71531ae08745Sheppo 					}
71541ae08745Sheppo 				}
71551ae08745Sheppo 			}
71561ae08745Sheppo 		}
71571ae08745Sheppo 	}
71581ae08745Sheppo 	D1(vswp, "%s: exit\n", __func__);
71591ae08745Sheppo }
71601ae08745Sheppo 
71611ae08745Sheppo /*
71621ae08745Sheppo  * Switch ethernet frame when in layer 3 mode (i.e. using IP
71631ae08745Sheppo  * layer to do the routing).
71641ae08745Sheppo  *
71651ae08745Sheppo  * There is a large amount of overlap between this function and
71661ae08745Sheppo  * vsw_switch_l2_frame. At some stage we need to revisit and refactor
71671ae08745Sheppo  * both these functions.
71681ae08745Sheppo  */
71691ae08745Sheppo void
71701ae08745Sheppo vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
71711ae08745Sheppo 			vsw_port_t *arg, mac_resource_handle_t mrh)
71721ae08745Sheppo {
71731ae08745Sheppo 	struct ether_header	*ehp;
71741ae08745Sheppo 	vsw_port_t		*port = NULL;
71751ae08745Sheppo 	mblk_t			*bp = NULL;
71761ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
71771ae08745Sheppo 
71781ae08745Sheppo 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
71791ae08745Sheppo 
71801ae08745Sheppo 	/*
71811ae08745Sheppo 	 * In layer 3 mode should only ever be switching packets
71821ae08745Sheppo 	 * between IP layer and vnet devices. So make sure thats
71831ae08745Sheppo 	 * who is invoking us.
71841ae08745Sheppo 	 */
71851ae08745Sheppo 	if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
71861ae08745Sheppo 		DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
71871ae08745Sheppo 		freemsgchain(mp);
71881ae08745Sheppo 		return;
71891ae08745Sheppo 	}
71901ae08745Sheppo 
71911ae08745Sheppo 	/* process the chain of packets */
71921ae08745Sheppo 	bp = mp;
71931ae08745Sheppo 	while (bp) {
71941ae08745Sheppo 		mp = bp;
71951ae08745Sheppo 		bp = bp->b_next;
71961ae08745Sheppo 		mp->b_next = mp->b_prev = NULL;
71971ae08745Sheppo 		ehp = (struct ether_header *)mp->b_rptr;
71981ae08745Sheppo 
71991ae08745Sheppo 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
72001ae08745Sheppo 		    __func__, MBLKSIZE(mp), MBLKL(mp));
72011ae08745Sheppo 
72021ae08745Sheppo 		READ_ENTER(&plist->lockrw);
72031ae08745Sheppo 		port = vsw_lookup_fdb(vswp, ehp);
72041ae08745Sheppo 		if (port) {
72051ae08745Sheppo 			/*
72061ae08745Sheppo 			 * Mark port as in-use.
72071ae08745Sheppo 			 */
72081ae08745Sheppo 			mutex_enter(&port->ref_lock);
72091ae08745Sheppo 			port->ref_cnt++;
72101ae08745Sheppo 			mutex_exit(&port->ref_lock);
72111ae08745Sheppo 			RW_EXIT(&plist->lockrw);
72121ae08745Sheppo 
72131ae08745Sheppo 			D2(vswp, "%s: sending to target port", __func__);
72141ae08745Sheppo 			(void) vsw_portsend(port, mp);
72151ae08745Sheppo 
72161ae08745Sheppo 			/*
72171ae08745Sheppo 			 * Finished with port so decrement ref count and
72181ae08745Sheppo 			 * check if should wake delete thread.
72191ae08745Sheppo 			 */
72201ae08745Sheppo 			mutex_enter(&port->ref_lock);
72211ae08745Sheppo 			port->ref_cnt--;
72221ae08745Sheppo 			if (port->ref_cnt == 0)
72231ae08745Sheppo 				cv_signal(&port->ref_cv);
72241ae08745Sheppo 			mutex_exit(&port->ref_lock);
72251ae08745Sheppo 		} else {
72261ae08745Sheppo 			RW_EXIT(&plist->lockrw);
72271ae08745Sheppo 			/*
72281ae08745Sheppo 			 * Destination not in FDB
72291ae08745Sheppo 			 *
72301ae08745Sheppo 			 * If the destination is broadcast or
72311ae08745Sheppo 			 * multicast forward the packet to all
72321ae08745Sheppo 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
72331ae08745Sheppo 			 * except the caller.
72341ae08745Sheppo 			 */
72351ae08745Sheppo 			if (IS_BROADCAST(ehp)) {
72361ae08745Sheppo 				D2(vswp, "%s: BROADCAST pkt", __func__);
7237205eeb1aSlm66018 				(void) vsw_forward_all(vswp, mp, caller, arg);
72381ae08745Sheppo 			} else if (IS_MULTICAST(ehp)) {
72391ae08745Sheppo 				D2(vswp, "%s: MULTICAST pkt", __func__);
7240205eeb1aSlm66018 				(void) vsw_forward_grp(vswp, mp, caller, arg);
72411ae08745Sheppo 			} else {
72421ae08745Sheppo 				/*
72431ae08745Sheppo 				 * Unicast pkt from vnet that we don't have
72441ae08745Sheppo 				 * an FDB entry for, so must be destinded for
72451ae08745Sheppo 				 * the outside world. Attempt to send up to the
72461ae08745Sheppo 				 * IP layer to allow it to deal with it.
72471ae08745Sheppo 				 */
72481ae08745Sheppo 				if (caller == VSW_VNETPORT) {
72491ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
72501ae08745Sheppo 					if (vswp->if_state & VSW_IF_UP) {
72511ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
72521ae08745Sheppo 						D2(vswp, "%s: sending up",
72531ae08745Sheppo 						    __func__);
7254ba2e4443Sseb 						mac_rx(vswp->if_mh, mrh, mp);
72551ae08745Sheppo 					} else {
72561ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
72571ae08745Sheppo 						/* Interface down, drop pkt */
72581ae08745Sheppo 						D2(vswp, "%s I/F down",
72591ae08745Sheppo 						    __func__);
72601ae08745Sheppo 						freemsg(mp);
72611ae08745Sheppo 					}
72621ae08745Sheppo 				}
72631ae08745Sheppo 			}
72641ae08745Sheppo 		}
72651ae08745Sheppo 	}
72661ae08745Sheppo 
72671ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
72681ae08745Sheppo }
72691ae08745Sheppo 
72701ae08745Sheppo /*
72711ae08745Sheppo  * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
72721ae08745Sheppo  * except the caller (port on which frame arrived).
72731ae08745Sheppo  */
72741ae08745Sheppo static int
72751ae08745Sheppo vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
72761ae08745Sheppo {
72771ae08745Sheppo 	vsw_port_list_t	*plist = &vswp->plist;
72781ae08745Sheppo 	vsw_port_t	*portp;
72791ae08745Sheppo 	mblk_t		*nmp = NULL;
72801ae08745Sheppo 	mblk_t		*ret_m = NULL;
72811ae08745Sheppo 	int		skip_port = 0;
72821ae08745Sheppo 
72831ae08745Sheppo 	D1(vswp, "vsw_forward_all: enter\n");
72841ae08745Sheppo 
72851ae08745Sheppo 	/*
72861ae08745Sheppo 	 * Broadcast message from inside ldoms so send to outside
72871ae08745Sheppo 	 * world if in either of layer 2 modes.
72881ae08745Sheppo 	 */
72891ae08745Sheppo 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
72901ae08745Sheppo 	    (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
72911ae08745Sheppo 	    ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
72921ae08745Sheppo 
72931ae08745Sheppo 		nmp = dupmsg(mp);
72941ae08745Sheppo 		if (nmp) {
72951ae08745Sheppo 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
72961ae08745Sheppo 				DERR(vswp, "%s: dropping pkt(s) "
72971ae08745Sheppo 				    "consisting of %ld bytes of data for"
72981ae08745Sheppo 				    " physical device", __func__, MBLKL(ret_m));
72991ae08745Sheppo 				freemsg(ret_m);
73001ae08745Sheppo 			}
73011ae08745Sheppo 		}
73021ae08745Sheppo 	}
73031ae08745Sheppo 
73041ae08745Sheppo 	if (caller == VSW_VNETPORT)
73051ae08745Sheppo 		skip_port = 1;
73061ae08745Sheppo 
73071ae08745Sheppo 	/*
73081ae08745Sheppo 	 * Broadcast message from other vnet (layer 2 or 3) or outside
73091ae08745Sheppo 	 * world (layer 2 only), send up stack if plumbed.
73101ae08745Sheppo 	 */
73111ae08745Sheppo 	if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
73121ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
73131ae08745Sheppo 		if (vswp->if_state & VSW_IF_UP) {
73141ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
73151ae08745Sheppo 			nmp = copymsg(mp);
73161ae08745Sheppo 			if (nmp)
7317ba2e4443Sseb 				mac_rx(vswp->if_mh, NULL, nmp);
73181ae08745Sheppo 		} else {
73191ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
73201ae08745Sheppo 		}
73211ae08745Sheppo 	}
73221ae08745Sheppo 
73231ae08745Sheppo 	/* send it to all VNETPORTs */
73241ae08745Sheppo 	READ_ENTER(&plist->lockrw);
73251ae08745Sheppo 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
73261ae08745Sheppo 		D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
73271ae08745Sheppo 		/*
73281ae08745Sheppo 		 * Caution ! - don't reorder these two checks as arg
73291ae08745Sheppo 		 * will be NULL if the caller is PHYSDEV. skip_port is
73301ae08745Sheppo 		 * only set if caller is VNETPORT.
73311ae08745Sheppo 		 */
73321ae08745Sheppo 		if ((skip_port) && (portp == arg))
73331ae08745Sheppo 			continue;
73341ae08745Sheppo 		else {
73351ae08745Sheppo 			nmp = dupmsg(mp);
73361ae08745Sheppo 			if (nmp) {
73371ae08745Sheppo 				(void) vsw_portsend(portp, nmp);
73381ae08745Sheppo 			} else {
73391ae08745Sheppo 				DERR(vswp, "vsw_forward_all: nmp NULL");
73401ae08745Sheppo 			}
73411ae08745Sheppo 		}
73421ae08745Sheppo 	}
73431ae08745Sheppo 	RW_EXIT(&plist->lockrw);
73441ae08745Sheppo 
73451ae08745Sheppo 	freemsg(mp);
73461ae08745Sheppo 
73471ae08745Sheppo 	D1(vswp, "vsw_forward_all: exit\n");
73481ae08745Sheppo 	return (0);
73491ae08745Sheppo }
73501ae08745Sheppo 
73511ae08745Sheppo /*
73521ae08745Sheppo  * Forward pkts to any devices or interfaces which have registered
73531ae08745Sheppo  * an interest in them (i.e. multicast groups).
73541ae08745Sheppo  */
73551ae08745Sheppo static int
73561ae08745Sheppo vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
73571ae08745Sheppo {
73581ae08745Sheppo 	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
73591ae08745Sheppo 	mfdb_ent_t		*entp = NULL;
73601ae08745Sheppo 	mfdb_ent_t		*tpp = NULL;
73611ae08745Sheppo 	vsw_port_t 		*port;
73621ae08745Sheppo 	uint64_t		key = 0;
73631ae08745Sheppo 	mblk_t			*nmp = NULL;
73641ae08745Sheppo 	mblk_t			*ret_m = NULL;
73651ae08745Sheppo 	boolean_t		check_if = B_TRUE;
73661ae08745Sheppo 
73671ae08745Sheppo 	/*
73681ae08745Sheppo 	 * Convert address to hash table key
73691ae08745Sheppo 	 */
73701ae08745Sheppo 	KEY_HASH(key, ehp->ether_dhost);
73711ae08745Sheppo 
73721ae08745Sheppo 	D1(vswp, "%s: key 0x%llx", __func__, key);
73731ae08745Sheppo 
73741ae08745Sheppo 	/*
73751ae08745Sheppo 	 * If pkt came from either a vnet or down the stack (if we are
73761ae08745Sheppo 	 * plumbed) and we are in layer 2 mode, then we send the pkt out
73771ae08745Sheppo 	 * over the physical adapter, and then check to see if any other
73781ae08745Sheppo 	 * vnets are interested in it.
73791ae08745Sheppo 	 */
73801ae08745Sheppo 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
73811ae08745Sheppo 	    (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
73821ae08745Sheppo 	    ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
73831ae08745Sheppo 		nmp = dupmsg(mp);
73841ae08745Sheppo 		if (nmp) {
73851ae08745Sheppo 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
7386205eeb1aSlm66018 				DERR(vswp, "%s: dropping pkt(s) consisting of "
7387205eeb1aSlm66018 				    "%ld bytes of data for physical device",
73881ae08745Sheppo 				    __func__, MBLKL(ret_m));
73891ae08745Sheppo 				freemsg(ret_m);
73901ae08745Sheppo 			}
73911ae08745Sheppo 		}
73921ae08745Sheppo 	}
73931ae08745Sheppo 
73941ae08745Sheppo 	READ_ENTER(&vswp->mfdbrw);
73951ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
73961ae08745Sheppo 	    (mod_hash_val_t *)&entp) != 0) {
73971ae08745Sheppo 		D3(vswp, "%s: no table entry found for addr 0x%llx",
73981ae08745Sheppo 		    __func__, key);
73991ae08745Sheppo 	} else {
74001ae08745Sheppo 		/*
74011ae08745Sheppo 		 * Send to list of devices associated with this address...
74021ae08745Sheppo 		 */
74031ae08745Sheppo 		for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
74041ae08745Sheppo 
74051ae08745Sheppo 			/* dont send to ourselves */
74061ae08745Sheppo 			if ((caller == VSW_VNETPORT) &&
74071ae08745Sheppo 			    (tpp->d_addr == (void *)arg)) {
74081ae08745Sheppo 				port = (vsw_port_t *)tpp->d_addr;
74091ae08745Sheppo 				D3(vswp, "%s: not sending to ourselves"
7410205eeb1aSlm66018 				    " : port %d", __func__, port->p_instance);
74111ae08745Sheppo 				continue;
74121ae08745Sheppo 
74131ae08745Sheppo 			} else if ((caller == VSW_LOCALDEV) &&
74141ae08745Sheppo 			    (tpp->d_type == VSW_LOCALDEV)) {
74151ae08745Sheppo 				D3(vswp, "%s: not sending back up stack",
74161ae08745Sheppo 				    __func__);
74171ae08745Sheppo 				continue;
74181ae08745Sheppo 			}
74191ae08745Sheppo 
74201ae08745Sheppo 			if (tpp->d_type == VSW_VNETPORT) {
74211ae08745Sheppo 				port = (vsw_port_t *)tpp->d_addr;
7422205eeb1aSlm66018 				D3(vswp, "%s: sending to port %ld for addr "
7423205eeb1aSlm66018 				    "0x%llx", __func__, port->p_instance, key);
74241ae08745Sheppo 
74251ae08745Sheppo 				nmp = dupmsg(mp);
74261ae08745Sheppo 				if (nmp)
74271ae08745Sheppo 					(void) vsw_portsend(port, nmp);
74281ae08745Sheppo 			} else {
74291ae08745Sheppo 				if (vswp->if_state & VSW_IF_UP) {
74301ae08745Sheppo 					nmp = copymsg(mp);
74311ae08745Sheppo 					if (nmp)
7432ba2e4443Sseb 						mac_rx(vswp->if_mh, NULL, nmp);
74331ae08745Sheppo 					check_if = B_FALSE;
74341ae08745Sheppo 					D3(vswp, "%s: sending up stack"
7435205eeb1aSlm66018 					    " for addr 0x%llx", __func__, key);
74361ae08745Sheppo 				}
74371ae08745Sheppo 			}
74381ae08745Sheppo 		}
74391ae08745Sheppo 	}
74401ae08745Sheppo 
74411ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
74421ae08745Sheppo 
74431ae08745Sheppo 	/*
74441ae08745Sheppo 	 * If the pkt came from either a vnet or from physical device,
74451ae08745Sheppo 	 * and if we havent already sent the pkt up the stack then we
74461ae08745Sheppo 	 * check now if we can/should (i.e. the interface is plumbed
74471ae08745Sheppo 	 * and in promisc mode).
74481ae08745Sheppo 	 */
74491ae08745Sheppo 	if ((check_if) &&
74501ae08745Sheppo 	    ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
74511ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
74521ae08745Sheppo 		if (VSW_U_P(vswp->if_state)) {
74531ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
74541ae08745Sheppo 			D3(vswp, "%s: (caller %d) finally sending up stack"
74551ae08745Sheppo 			    " for addr 0x%llx", __func__, caller, key);
74561ae08745Sheppo 			nmp = copymsg(mp);
74571ae08745Sheppo 			if (nmp)
7458ba2e4443Sseb 				mac_rx(vswp->if_mh, NULL, nmp);
74591ae08745Sheppo 		} else {
74601ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
74611ae08745Sheppo 		}
74621ae08745Sheppo 	}
74631ae08745Sheppo 
74641ae08745Sheppo 	freemsg(mp);
74651ae08745Sheppo 
74661ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
74671ae08745Sheppo 
74681ae08745Sheppo 	return (0);
74691ae08745Sheppo }
74701ae08745Sheppo 
74711ae08745Sheppo /* transmit the packet over the given port */
74721ae08745Sheppo static int
74731ae08745Sheppo vsw_portsend(vsw_port_t *port, mblk_t *mp)
74741ae08745Sheppo {
74751ae08745Sheppo 	vsw_ldc_list_t 	*ldcl = &port->p_ldclist;
74761ae08745Sheppo 	vsw_ldc_t 	*ldcp;
74771ae08745Sheppo 	int		status = 0;
74781ae08745Sheppo 
74791ae08745Sheppo 
74801ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
74811ae08745Sheppo 	/*
74821ae08745Sheppo 	 * Note for now, we have a single channel.
74831ae08745Sheppo 	 */
74841ae08745Sheppo 	ldcp = ldcl->head;
74851ae08745Sheppo 	if (ldcp == NULL) {
74861ae08745Sheppo 		DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n");
74871ae08745Sheppo 		freemsg(mp);
74881ae08745Sheppo 		RW_EXIT(&ldcl->lockrw);
74891ae08745Sheppo 		return (1);
74901ae08745Sheppo 	}
74911ae08745Sheppo 
74921ae08745Sheppo 	/*
74931ae08745Sheppo 	 * Send the message out using the appropriate
74941ae08745Sheppo 	 * transmit function which will free mblock when it
74951ae08745Sheppo 	 * is finished with it.
74961ae08745Sheppo 	 */
74971ae08745Sheppo 	mutex_enter(&port->tx_lock);
74981ae08745Sheppo 	if (port->transmit != NULL)
74991ae08745Sheppo 		status = (*port->transmit)(ldcp, mp);
75001ae08745Sheppo 	else {
75011ae08745Sheppo 		freemsg(mp);
75021ae08745Sheppo 	}
75031ae08745Sheppo 	mutex_exit(&port->tx_lock);
75041ae08745Sheppo 
75051ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
75061ae08745Sheppo 
75071ae08745Sheppo 	return (status);
75081ae08745Sheppo }
75091ae08745Sheppo 
75101ae08745Sheppo /*
75111ae08745Sheppo  * Send packet out via descriptor ring to a logical device.
75121ae08745Sheppo  */
75131ae08745Sheppo static int
75141ae08745Sheppo vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp)
75151ae08745Sheppo {
75161ae08745Sheppo 	vio_dring_msg_t		dring_pkt;
75171ae08745Sheppo 	dring_info_t		*dp = NULL;
75181ae08745Sheppo 	vsw_private_desc_t	*priv_desc = NULL;
7519d10e4ef2Snarayan 	vnet_public_desc_t	*pub = NULL;
75201ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
75211ae08745Sheppo 	mblk_t			*bp;
75221ae08745Sheppo 	size_t			n, size;
75231ae08745Sheppo 	caddr_t			bufp;
75241ae08745Sheppo 	int			idx;
75251ae08745Sheppo 	int			status = LDC_TX_SUCCESS;
75261ae08745Sheppo 
75271ae08745Sheppo 	D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id);
75281ae08745Sheppo 
75291ae08745Sheppo 	/* TODO: make test a macro */
75301ae08745Sheppo 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
75311ae08745Sheppo 	    (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
75321ae08745Sheppo 		DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping "
75331ae08745Sheppo 		    "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status,
75341ae08745Sheppo 		    ldcp->lane_out.lstate);
75351ae08745Sheppo 		freemsg(mp);
75361ae08745Sheppo 		return (LDC_TX_FAILURE);
75371ae08745Sheppo 	}
75381ae08745Sheppo 
75391ae08745Sheppo 	/*
75401ae08745Sheppo 	 * Note - using first ring only, this may change
75411ae08745Sheppo 	 * in the future.
75421ae08745Sheppo 	 */
7543445b4c2eSsb155480 	READ_ENTER(&ldcp->lane_out.dlistrw);
75441ae08745Sheppo 	if ((dp = ldcp->lane_out.dringp) == NULL) {
7545445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_out.dlistrw);
75461ae08745Sheppo 		DERR(vswp, "%s(%lld): no dring for outbound lane on"
75471ae08745Sheppo 		    " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id);
75481ae08745Sheppo 		freemsg(mp);
75491ae08745Sheppo 		return (LDC_TX_FAILURE);
75501ae08745Sheppo 	}
75511ae08745Sheppo 
75521ae08745Sheppo 	size = msgsize(mp);
75531ae08745Sheppo 	if (size > (size_t)ETHERMAX) {
7554445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_out.dlistrw);
75551ae08745Sheppo 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
75561ae08745Sheppo 		    ldcp->ldc_id, size);
7557d10e4ef2Snarayan 		freemsg(mp);
7558d10e4ef2Snarayan 		return (LDC_TX_FAILURE);
75591ae08745Sheppo 	}
75601ae08745Sheppo 
75611ae08745Sheppo 	/*
75621ae08745Sheppo 	 * Find a free descriptor
75631ae08745Sheppo 	 *
75641ae08745Sheppo 	 * Note: for the moment we are assuming that we will only
75651ae08745Sheppo 	 * have one dring going from the switch to each of its
75661ae08745Sheppo 	 * peers. This may change in the future.
75671ae08745Sheppo 	 */
75681ae08745Sheppo 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
7569d10e4ef2Snarayan 		D2(vswp, "%s(%lld): no descriptor available for ring "
75701ae08745Sheppo 		    "at 0x%llx", __func__, ldcp->ldc_id, dp);
75711ae08745Sheppo 
75721ae08745Sheppo 		/* nothing more we can do */
75731ae08745Sheppo 		status = LDC_TX_NORESOURCES;
75741ae08745Sheppo 		goto vsw_dringsend_free_exit;
75751ae08745Sheppo 	} else {
7576205eeb1aSlm66018 		D2(vswp, "%s(%lld): free private descriptor found at pos %ld "
7577205eeb1aSlm66018 		    "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc);
75781ae08745Sheppo 	}
75791ae08745Sheppo 
75801ae08745Sheppo 	/* copy data into the descriptor */
75811ae08745Sheppo 	bufp = priv_desc->datap;
7582d10e4ef2Snarayan 	bufp += VNET_IPALIGN;
75831ae08745Sheppo 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
75841ae08745Sheppo 		n = MBLKL(bp);
75851ae08745Sheppo 		bcopy(bp->b_rptr, bufp, n);
75861ae08745Sheppo 		bufp += n;
75871ae08745Sheppo 	}
75881ae08745Sheppo 
75891ae08745Sheppo 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
7590d10e4ef2Snarayan 
7591d10e4ef2Snarayan 	pub = priv_desc->descp;
7592d10e4ef2Snarayan 	pub->nbytes = priv_desc->datalen;
7593d10e4ef2Snarayan 
7594d10e4ef2Snarayan 	mutex_enter(&priv_desc->dstate_lock);
7595d10e4ef2Snarayan 	pub->hdr.dstate = VIO_DESC_READY;
7596d10e4ef2Snarayan 	mutex_exit(&priv_desc->dstate_lock);
75971ae08745Sheppo 
75981ae08745Sheppo 	/*
7599d10e4ef2Snarayan 	 * Determine whether or not we need to send a message to our
7600d10e4ef2Snarayan 	 * peer prompting them to read our newly updated descriptor(s).
76011ae08745Sheppo 	 */
7602d10e4ef2Snarayan 	mutex_enter(&dp->restart_lock);
7603d10e4ef2Snarayan 	if (dp->restart_reqd) {
7604d10e4ef2Snarayan 		dp->restart_reqd = B_FALSE;
7605d10e4ef2Snarayan 		mutex_exit(&dp->restart_lock);
76061ae08745Sheppo 
76071ae08745Sheppo 		/*
76081ae08745Sheppo 		 * Send a vio_dring_msg to peer to prompt them to read
76091ae08745Sheppo 		 * the updated descriptor ring.
76101ae08745Sheppo 		 */
76111ae08745Sheppo 		dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA;
76121ae08745Sheppo 		dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
76131ae08745Sheppo 		dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA;
76141ae08745Sheppo 		dring_pkt.tag.vio_sid = ldcp->local_session;
76151ae08745Sheppo 
76161ae08745Sheppo 		/* Note - for now using first ring */
76171ae08745Sheppo 		dring_pkt.dring_ident = dp->ident;
76181ae08745Sheppo 
7619d10e4ef2Snarayan 		mutex_enter(&ldcp->lane_out.seq_lock);
76201ae08745Sheppo 		dring_pkt.seq_num = ldcp->lane_out.seq_num++;
7621d10e4ef2Snarayan 		mutex_exit(&ldcp->lane_out.seq_lock);
76221ae08745Sheppo 
7623d10e4ef2Snarayan 		/*
7624d10e4ef2Snarayan 		 * If last_ack_recv is -1 then we know we've not
7625d10e4ef2Snarayan 		 * received any ack's yet, so this must be the first
7626d10e4ef2Snarayan 		 * msg sent, so set the start to the begining of the ring.
7627d10e4ef2Snarayan 		 */
7628d10e4ef2Snarayan 		mutex_enter(&dp->dlock);
7629d10e4ef2Snarayan 		if (dp->last_ack_recv == -1) {
7630d10e4ef2Snarayan 			dring_pkt.start_idx = 0;
7631d10e4ef2Snarayan 		} else {
7632205eeb1aSlm66018 			dring_pkt.start_idx =
7633205eeb1aSlm66018 			    (dp->last_ack_recv + 1) % dp->num_descriptors;
7634d10e4ef2Snarayan 		}
7635d10e4ef2Snarayan 		dring_pkt.end_idx = -1;
7636d10e4ef2Snarayan 		mutex_exit(&dp->dlock);
76371ae08745Sheppo 
76381ae08745Sheppo 		D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__,
76391ae08745Sheppo 		    ldcp->ldc_id, dp, dring_pkt.dring_ident);
7640d10e4ef2Snarayan 		D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n",
7641d10e4ef2Snarayan 		    __func__, ldcp->ldc_id, dring_pkt.start_idx,
7642d10e4ef2Snarayan 		    dring_pkt.end_idx, dring_pkt.seq_num);
76431ae08745Sheppo 
7644b071742bSsg70180 		RW_EXIT(&ldcp->lane_out.dlistrw);
7645b071742bSsg70180 
7646b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)&dring_pkt,
7647b071742bSsg70180 		    sizeof (vio_dring_msg_t), B_TRUE);
7648b071742bSsg70180 
7649b071742bSsg70180 		/* free the message block */
7650b071742bSsg70180 		freemsg(mp);
7651b071742bSsg70180 		return (status);
7652b071742bSsg70180 
7653d10e4ef2Snarayan 	} else {
7654d10e4ef2Snarayan 		mutex_exit(&dp->restart_lock);
7655d10e4ef2Snarayan 		D2(vswp, "%s(%lld): updating descp %d", __func__,
7656d10e4ef2Snarayan 		    ldcp->ldc_id, idx);
7657d10e4ef2Snarayan 	}
76581ae08745Sheppo 
76591ae08745Sheppo vsw_dringsend_free_exit:
76601ae08745Sheppo 
7661445b4c2eSsb155480 	RW_EXIT(&ldcp->lane_out.dlistrw);
7662445b4c2eSsb155480 
76631ae08745Sheppo 	/* free the message block */
76641ae08745Sheppo 	freemsg(mp);
76651ae08745Sheppo 
76661ae08745Sheppo 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
76671ae08745Sheppo 	return (status);
76681ae08745Sheppo }
76691ae08745Sheppo 
76701ae08745Sheppo /*
76711ae08745Sheppo  * Send an in-band descriptor message over ldc.
76721ae08745Sheppo  */
76731ae08745Sheppo static int
76741ae08745Sheppo vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp)
76751ae08745Sheppo {
76761ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
7677445b4c2eSsb155480 	vnet_ibnd_desc_t	ibnd_msg;
76781ae08745Sheppo 	vsw_private_desc_t	*priv_desc = NULL;
76791ae08745Sheppo 	dring_info_t		*dp = NULL;
76801ae08745Sheppo 	size_t			n, size = 0;
76811ae08745Sheppo 	caddr_t			bufp;
76821ae08745Sheppo 	mblk_t			*bp;
76831ae08745Sheppo 	int			idx, i;
76841ae08745Sheppo 	int			status = LDC_TX_SUCCESS;
76851ae08745Sheppo 	static int		warn_msg = 1;
76861ae08745Sheppo 
76871ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
76881ae08745Sheppo 
76891ae08745Sheppo 	ASSERT(mp != NULL);
76901ae08745Sheppo 
76911ae08745Sheppo 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
76921ae08745Sheppo 	    (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
76931ae08745Sheppo 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt",
76941ae08745Sheppo 		    __func__, ldcp->ldc_id, ldcp->ldc_status,
76951ae08745Sheppo 		    ldcp->lane_out.lstate);
76961ae08745Sheppo 		freemsg(mp);
76971ae08745Sheppo 		return (LDC_TX_FAILURE);
76981ae08745Sheppo 	}
76991ae08745Sheppo 
77001ae08745Sheppo 	/*
77011ae08745Sheppo 	 * only expect single dring to exist, which we use
77021ae08745Sheppo 	 * as an internal buffer, rather than a transfer channel.
77031ae08745Sheppo 	 */
7704445b4c2eSsb155480 	READ_ENTER(&ldcp->lane_out.dlistrw);
77051ae08745Sheppo 	if ((dp = ldcp->lane_out.dringp) == NULL) {
77061ae08745Sheppo 		DERR(vswp, "%s(%lld): no dring for outbound lane",
77071ae08745Sheppo 		    __func__, ldcp->ldc_id);
7708205eeb1aSlm66018 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__,
7709205eeb1aSlm66018 		    ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate);
7710445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_out.dlistrw);
77111ae08745Sheppo 		freemsg(mp);
77121ae08745Sheppo 		return (LDC_TX_FAILURE);
77131ae08745Sheppo 	}
77141ae08745Sheppo 
77151ae08745Sheppo 	size = msgsize(mp);
77161ae08745Sheppo 	if (size > (size_t)ETHERMAX) {
7717b071742bSsg70180 		RW_EXIT(&ldcp->lane_out.dlistrw);
77181ae08745Sheppo 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
77191ae08745Sheppo 		    ldcp->ldc_id, size);
7720d10e4ef2Snarayan 		freemsg(mp);
7721d10e4ef2Snarayan 		return (LDC_TX_FAILURE);
77221ae08745Sheppo 	}
77231ae08745Sheppo 
77241ae08745Sheppo 	/*
77251ae08745Sheppo 	 * Find a free descriptor in our buffer ring
77261ae08745Sheppo 	 */
77271ae08745Sheppo 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
7728b071742bSsg70180 		RW_EXIT(&ldcp->lane_out.dlistrw);
77291ae08745Sheppo 		if (warn_msg) {
77301ae08745Sheppo 			DERR(vswp, "%s(%lld): no descriptor available for ring "
77311ae08745Sheppo 			    "at 0x%llx", __func__, ldcp->ldc_id, dp);
77321ae08745Sheppo 			warn_msg = 0;
77331ae08745Sheppo 		}
77341ae08745Sheppo 
77351ae08745Sheppo 		/* nothing more we can do */
77361ae08745Sheppo 		status = LDC_TX_NORESOURCES;
77371ae08745Sheppo 		goto vsw_descrsend_free_exit;
77381ae08745Sheppo 	} else {
77391ae08745Sheppo 		D2(vswp, "%s(%lld): free private descriptor found at pos "
7740205eeb1aSlm66018 		    "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc);
77411ae08745Sheppo 		warn_msg = 1;
77421ae08745Sheppo 	}
77431ae08745Sheppo 
77441ae08745Sheppo 	/* copy data into the descriptor */
77451ae08745Sheppo 	bufp = priv_desc->datap;
77461ae08745Sheppo 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
77471ae08745Sheppo 		n = MBLKL(bp);
77481ae08745Sheppo 		bcopy(bp->b_rptr, bufp, n);
77491ae08745Sheppo 		bufp += n;
77501ae08745Sheppo 	}
77511ae08745Sheppo 
77521ae08745Sheppo 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
77531ae08745Sheppo 
77541ae08745Sheppo 	/* create and send the in-band descp msg */
77551ae08745Sheppo 	ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA;
77561ae08745Sheppo 	ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO;
77571ae08745Sheppo 	ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA;
77581ae08745Sheppo 	ibnd_msg.hdr.tag.vio_sid = ldcp->local_session;
77591ae08745Sheppo 
7760d10e4ef2Snarayan 	mutex_enter(&ldcp->lane_out.seq_lock);
77611ae08745Sheppo 	ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++;
7762d10e4ef2Snarayan 	mutex_exit(&ldcp->lane_out.seq_lock);
77631ae08745Sheppo 
77641ae08745Sheppo 	/*
77651ae08745Sheppo 	 * Copy the mem cookies describing the data from the
77661ae08745Sheppo 	 * private region of the descriptor ring into the inband
77671ae08745Sheppo 	 * descriptor.
77681ae08745Sheppo 	 */
77691ae08745Sheppo 	for (i = 0; i < priv_desc->ncookies; i++) {
77701ae08745Sheppo 		bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i],
77711ae08745Sheppo 		    sizeof (ldc_mem_cookie_t));
77721ae08745Sheppo 	}
77731ae08745Sheppo 
77741ae08745Sheppo 	ibnd_msg.hdr.desc_handle = idx;
77751ae08745Sheppo 	ibnd_msg.ncookies = priv_desc->ncookies;
77761ae08745Sheppo 	ibnd_msg.nbytes = size;
77771ae08745Sheppo 
7778b071742bSsg70180 	RW_EXIT(&ldcp->lane_out.dlistrw);
7779b071742bSsg70180 
7780b071742bSsg70180 	(void) vsw_send_msg(ldcp, (void *)&ibnd_msg,
7781b071742bSsg70180 	    sizeof (vnet_ibnd_desc_t), B_TRUE);
77821ae08745Sheppo 
77831ae08745Sheppo vsw_descrsend_free_exit:
77841ae08745Sheppo 
77851ae08745Sheppo 	/* free the allocated message blocks */
77861ae08745Sheppo 	freemsg(mp);
77871ae08745Sheppo 
77881ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
77891ae08745Sheppo 	return (status);
77901ae08745Sheppo }
77911ae08745Sheppo 
77921ae08745Sheppo static void
77933af08d82Slm66018 vsw_send_ver(void *arg)
77941ae08745Sheppo {
77953af08d82Slm66018 	vsw_ldc_t	*ldcp = (vsw_ldc_t *)arg;
77961ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
77971ae08745Sheppo 	lane_t		*lp = &ldcp->lane_out;
77981ae08745Sheppo 	vio_ver_msg_t	ver_msg;
77991ae08745Sheppo 
78001ae08745Sheppo 	D1(vswp, "%s enter", __func__);
78011ae08745Sheppo 
78021ae08745Sheppo 	ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
78031ae08745Sheppo 	ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
78041ae08745Sheppo 	ver_msg.tag.vio_subtype_env = VIO_VER_INFO;
78051ae08745Sheppo 	ver_msg.tag.vio_sid = ldcp->local_session;
78061ae08745Sheppo 
78071ae08745Sheppo 	ver_msg.ver_major = vsw_versions[0].ver_major;
78081ae08745Sheppo 	ver_msg.ver_minor = vsw_versions[0].ver_minor;
78091ae08745Sheppo 	ver_msg.dev_class = VDEV_NETWORK_SWITCH;
78101ae08745Sheppo 
78111ae08745Sheppo 	lp->lstate |= VSW_VER_INFO_SENT;
78121ae08745Sheppo 	lp->ver_major = ver_msg.ver_major;
78131ae08745Sheppo 	lp->ver_minor = ver_msg.ver_minor;
78141ae08745Sheppo 
78151ae08745Sheppo 	DUMP_TAG(ver_msg.tag);
78161ae08745Sheppo 
7817b071742bSsg70180 	(void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE);
78181ae08745Sheppo 
78191ae08745Sheppo 	D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id);
78201ae08745Sheppo }
78211ae08745Sheppo 
78221ae08745Sheppo static void
78231ae08745Sheppo vsw_send_attr(vsw_ldc_t *ldcp)
78241ae08745Sheppo {
78251ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
78261ae08745Sheppo 	lane_t			*lp = &ldcp->lane_out;
78271ae08745Sheppo 	vnet_attr_msg_t		attr_msg;
78281ae08745Sheppo 
78291ae08745Sheppo 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
78301ae08745Sheppo 
78311ae08745Sheppo 	/*
78321ae08745Sheppo 	 * Subtype is set to INFO by default
78331ae08745Sheppo 	 */
78341ae08745Sheppo 	attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
78351ae08745Sheppo 	attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
78361ae08745Sheppo 	attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO;
78371ae08745Sheppo 	attr_msg.tag.vio_sid = ldcp->local_session;
78381ae08745Sheppo 
78391ae08745Sheppo 	/* payload copied from default settings for lane */
78401ae08745Sheppo 	attr_msg.mtu = lp->mtu;
78411ae08745Sheppo 	attr_msg.addr_type = lp->addr_type;
78421ae08745Sheppo 	attr_msg.xfer_mode = lp->xfer_mode;
78431ae08745Sheppo 	attr_msg.ack_freq = lp->xfer_mode;
78441ae08745Sheppo 
78451ae08745Sheppo 	READ_ENTER(&vswp->if_lockrw);
78461ae08745Sheppo 	bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL);
78471ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
78481ae08745Sheppo 
78491ae08745Sheppo 	ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT;
78501ae08745Sheppo 
78511ae08745Sheppo 	DUMP_TAG(attr_msg.tag);
78521ae08745Sheppo 
7853b071742bSsg70180 	(void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE);
78541ae08745Sheppo 
7855b071742bSsg70180 	D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id);
78561ae08745Sheppo }
78571ae08745Sheppo 
78581ae08745Sheppo /*
78591ae08745Sheppo  * Create dring info msg (which also results in the creation of
78601ae08745Sheppo  * a dring).
78611ae08745Sheppo  */
78621ae08745Sheppo static vio_dring_reg_msg_t *
78631ae08745Sheppo vsw_create_dring_info_pkt(vsw_ldc_t *ldcp)
78641ae08745Sheppo {
78651ae08745Sheppo 	vio_dring_reg_msg_t	*mp;
78661ae08745Sheppo 	dring_info_t		*dp;
78671ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
78681ae08745Sheppo 
78691ae08745Sheppo 	D1(vswp, "vsw_create_dring_info_pkt enter\n");
78701ae08745Sheppo 
78711ae08745Sheppo 	/*
78721ae08745Sheppo 	 * If we can't create a dring, obviously no point sending
78731ae08745Sheppo 	 * a message.
78741ae08745Sheppo 	 */
78751ae08745Sheppo 	if ((dp = vsw_create_dring(ldcp)) == NULL)
78761ae08745Sheppo 		return (NULL);
78771ae08745Sheppo 
78781ae08745Sheppo 	mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP);
78791ae08745Sheppo 
78801ae08745Sheppo 	mp->tag.vio_msgtype = VIO_TYPE_CTRL;
78811ae08745Sheppo 	mp->tag.vio_subtype = VIO_SUBTYPE_INFO;
78821ae08745Sheppo 	mp->tag.vio_subtype_env = VIO_DRING_REG;
78831ae08745Sheppo 	mp->tag.vio_sid = ldcp->local_session;
78841ae08745Sheppo 
78851ae08745Sheppo 	/* payload */
78861ae08745Sheppo 	mp->num_descriptors = dp->num_descriptors;
78871ae08745Sheppo 	mp->descriptor_size = dp->descriptor_size;
78881ae08745Sheppo 	mp->options = dp->options;
78891ae08745Sheppo 	mp->ncookies = dp->ncookies;
78901ae08745Sheppo 	bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t));
78911ae08745Sheppo 
78921ae08745Sheppo 	mp->dring_ident = 0;
78931ae08745Sheppo 
78941ae08745Sheppo 	D1(vswp, "vsw_create_dring_info_pkt exit\n");
78951ae08745Sheppo 
78961ae08745Sheppo 	return (mp);
78971ae08745Sheppo }
78981ae08745Sheppo 
78991ae08745Sheppo static void
79001ae08745Sheppo vsw_send_dring_info(vsw_ldc_t *ldcp)
79011ae08745Sheppo {
79021ae08745Sheppo 	vio_dring_reg_msg_t	*dring_msg;
79031ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
79041ae08745Sheppo 
79051ae08745Sheppo 	D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id);
79061ae08745Sheppo 
79071ae08745Sheppo 	dring_msg = vsw_create_dring_info_pkt(ldcp);
79081ae08745Sheppo 	if (dring_msg == NULL) {
790934683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: %s: error creating msg",
791034683adeSsg70180 		    vswp->instance, __func__);
79111ae08745Sheppo 		return;
79121ae08745Sheppo 	}
79131ae08745Sheppo 
79141ae08745Sheppo 	ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT;
79151ae08745Sheppo 
79161ae08745Sheppo 	DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg);
79171ae08745Sheppo 
7918b071742bSsg70180 	(void) vsw_send_msg(ldcp, dring_msg,
7919b071742bSsg70180 	    sizeof (vio_dring_reg_msg_t), B_TRUE);
79201ae08745Sheppo 
79211ae08745Sheppo 	kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t));
79221ae08745Sheppo 
79231ae08745Sheppo 	D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id);
79241ae08745Sheppo }
79251ae08745Sheppo 
79261ae08745Sheppo static void
79271ae08745Sheppo vsw_send_rdx(vsw_ldc_t *ldcp)
79281ae08745Sheppo {
79291ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
79301ae08745Sheppo 	vio_rdx_msg_t	rdx_msg;
79311ae08745Sheppo 
79321ae08745Sheppo 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
79331ae08745Sheppo 
79341ae08745Sheppo 	rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
79351ae08745Sheppo 	rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
79361ae08745Sheppo 	rdx_msg.tag.vio_subtype_env = VIO_RDX;
79371ae08745Sheppo 	rdx_msg.tag.vio_sid = ldcp->local_session;
79381ae08745Sheppo 
7939b071742bSsg70180 	ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT;
79401ae08745Sheppo 
79411ae08745Sheppo 	DUMP_TAG(rdx_msg.tag);
79421ae08745Sheppo 
7943b071742bSsg70180 	(void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE);
79441ae08745Sheppo 
79451ae08745Sheppo 	D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id);
79461ae08745Sheppo }
79471ae08745Sheppo 
79481ae08745Sheppo /*
79491ae08745Sheppo  * Generic routine to send message out over ldc channel.
7950b071742bSsg70180  *
7951b071742bSsg70180  * It is possible that when we attempt to write over the ldc channel
7952b071742bSsg70180  * that we get notified that it has been reset. Depending on the value
7953b071742bSsg70180  * of the handle_reset flag we either handle that event here or simply
7954b071742bSsg70180  * notify the caller that the channel was reset.
79551ae08745Sheppo  */
7956b071742bSsg70180 static int
7957b071742bSsg70180 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset)
79581ae08745Sheppo {
79591ae08745Sheppo 	int		rv;
79601ae08745Sheppo 	size_t		msglen = size;
79611ae08745Sheppo 	vio_msg_tag_t	*tag = (vio_msg_tag_t *)msgp;
79621ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
79631ae08745Sheppo 
79641ae08745Sheppo 	D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes",
79651ae08745Sheppo 	    ldcp->ldc_id, size);
79661ae08745Sheppo 
79671ae08745Sheppo 	D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype);
79681ae08745Sheppo 	D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype);
79691ae08745Sheppo 	D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env);
79701ae08745Sheppo 
79711ae08745Sheppo 	mutex_enter(&ldcp->ldc_txlock);
79721ae08745Sheppo 	do {
79731ae08745Sheppo 		msglen = size;
79741ae08745Sheppo 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen);
79751ae08745Sheppo 	} while (rv == EWOULDBLOCK && --vsw_wretries > 0);
79761ae08745Sheppo 
79771ae08745Sheppo 	if ((rv != 0) || (msglen != size)) {
7978205eeb1aSlm66018 		DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) "
7979205eeb1aSlm66018 		    "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen);
79801ae08745Sheppo 	}
79813af08d82Slm66018 	mutex_exit(&ldcp->ldc_txlock);
79823af08d82Slm66018 
7983b071742bSsg70180 	/*
7984b071742bSsg70180 	 * If channel has been reset we either handle it here or
7985b071742bSsg70180 	 * simply report back that it has been reset and let caller
7986b071742bSsg70180 	 * decide what to do.
7987b071742bSsg70180 	 */
79883af08d82Slm66018 	if (rv == ECONNRESET) {
7989205eeb1aSlm66018 		DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id);
7990b071742bSsg70180 
7991b071742bSsg70180 		/*
7992b071742bSsg70180 		 * N.B - must never be holding the dlistrw lock when
7993b071742bSsg70180 		 * we do a reset of the channel.
7994b071742bSsg70180 		 */
7995b071742bSsg70180 		if (handle_reset) {
7996b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
7997b071742bSsg70180 		}
79983af08d82Slm66018 	}
79991ae08745Sheppo 
8000b071742bSsg70180 	return (rv);
80011ae08745Sheppo }
80021ae08745Sheppo 
80031ae08745Sheppo /*
80041ae08745Sheppo  * Add an entry into FDB, for the given mac address and port_id.
80051ae08745Sheppo  * Returns 0 on success, 1 on failure.
80061ae08745Sheppo  *
80071ae08745Sheppo  * Lock protecting FDB must be held by calling process.
80081ae08745Sheppo  */
80091ae08745Sheppo static int
80101ae08745Sheppo vsw_add_fdb(vsw_t *vswp, vsw_port_t *port)
80111ae08745Sheppo {
80121ae08745Sheppo 	uint64_t	addr = 0;
80131ae08745Sheppo 
80141ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
80151ae08745Sheppo 
80161ae08745Sheppo 	KEY_HASH(addr, port->p_macaddr);
80171ae08745Sheppo 
80181ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
80191ae08745Sheppo 
80201ae08745Sheppo 	/*
80211ae08745Sheppo 	 * Note: duplicate keys will be rejected by mod_hash.
80221ae08745Sheppo 	 */
80231ae08745Sheppo 	if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr,
80241ae08745Sheppo 	    (mod_hash_val_t)port) != 0) {
80251ae08745Sheppo 		DERR(vswp, "%s: unable to add entry into fdb.", __func__);
80261ae08745Sheppo 		return (1);
80271ae08745Sheppo 	}
80281ae08745Sheppo 
80291ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
80301ae08745Sheppo 	return (0);
80311ae08745Sheppo }
80321ae08745Sheppo 
80331ae08745Sheppo /*
80341ae08745Sheppo  * Remove an entry from FDB.
80351ae08745Sheppo  * Returns 0 on success, 1 on failure.
80361ae08745Sheppo  */
80371ae08745Sheppo static int
80381ae08745Sheppo vsw_del_fdb(vsw_t *vswp, vsw_port_t *port)
80391ae08745Sheppo {
80401ae08745Sheppo 	uint64_t	addr = 0;
80411ae08745Sheppo 
80421ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
80431ae08745Sheppo 
80441ae08745Sheppo 	KEY_HASH(addr, port->p_macaddr);
80451ae08745Sheppo 
80461ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
80471ae08745Sheppo 
80481ae08745Sheppo 	(void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr);
80491ae08745Sheppo 
80501ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
80511ae08745Sheppo 
80521ae08745Sheppo 	return (0);
80531ae08745Sheppo }
80541ae08745Sheppo 
80551ae08745Sheppo /*
80561ae08745Sheppo  * Search fdb for a given mac address.
80571ae08745Sheppo  * Returns pointer to the entry if found, else returns NULL.
80581ae08745Sheppo  */
80591ae08745Sheppo static vsw_port_t *
80601ae08745Sheppo vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp)
80611ae08745Sheppo {
80621ae08745Sheppo 	uint64_t	key = 0;
80631ae08745Sheppo 	vsw_port_t	*port = NULL;
80641ae08745Sheppo 
80651ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
80661ae08745Sheppo 
80671ae08745Sheppo 	KEY_HASH(key, ehp->ether_dhost);
80681ae08745Sheppo 
80691ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, key);
80701ae08745Sheppo 
80711ae08745Sheppo 	if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key,
80721ae08745Sheppo 	    (mod_hash_val_t *)&port) != 0) {
807334683adeSsg70180 		D2(vswp, "%s: no port found", __func__);
80741ae08745Sheppo 		return (NULL);
80751ae08745Sheppo 	}
80761ae08745Sheppo 
80771ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
80781ae08745Sheppo 
80791ae08745Sheppo 	return (port);
80801ae08745Sheppo }
80811ae08745Sheppo 
80821ae08745Sheppo /*
80831ae08745Sheppo  * Add or remove multicast address(es).
80841ae08745Sheppo  *
80851ae08745Sheppo  * Returns 0 on success, 1 on failure.
80861ae08745Sheppo  */
80871ae08745Sheppo static int
80881ae08745Sheppo vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
80891ae08745Sheppo {
80901ae08745Sheppo 	mcst_addr_t		*mcst_p = NULL;
80911ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
80921ae08745Sheppo 	uint64_t		addr = 0x0;
809334683adeSsg70180 	int			i;
80941ae08745Sheppo 
80951ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
80961ae08745Sheppo 
80971ae08745Sheppo 	D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
80981ae08745Sheppo 
80991ae08745Sheppo 	for (i = 0; i < mcst_pkt->count; i++) {
81001ae08745Sheppo 		/*
81011ae08745Sheppo 		 * Convert address into form that can be used
81021ae08745Sheppo 		 * as hash table key.
81031ae08745Sheppo 		 */
81041ae08745Sheppo 		KEY_HASH(addr, mcst_pkt->mca[i]);
81051ae08745Sheppo 
81061ae08745Sheppo 		/*
81071ae08745Sheppo 		 * Add or delete the specified address/port combination.
81081ae08745Sheppo 		 */
81091ae08745Sheppo 		if (mcst_pkt->set == 0x1) {
81101ae08745Sheppo 			D3(vswp, "%s: adding multicast address 0x%llx for "
81111ae08745Sheppo 			    "port %ld", __func__, addr, port->p_instance);
81121ae08745Sheppo 			if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
81131ae08745Sheppo 				/*
81141ae08745Sheppo 				 * Update the list of multicast
81151ae08745Sheppo 				 * addresses contained within the
81161ae08745Sheppo 				 * port structure to include this new
81171ae08745Sheppo 				 * one.
81181ae08745Sheppo 				 */
8119*19b65a69Ssb155480 				mcst_p = kmem_zalloc(sizeof (mcst_addr_t),
8120*19b65a69Ssb155480 				    KM_NOSLEEP);
81211ae08745Sheppo 				if (mcst_p == NULL) {
81221ae08745Sheppo 					DERR(vswp, "%s: unable to alloc mem",
81231ae08745Sheppo 					    __func__);
8124*19b65a69Ssb155480 					(void) vsw_del_mcst(vswp,
8125*19b65a69Ssb155480 					    VSW_VNETPORT, addr, port);
81261ae08745Sheppo 					return (1);
81271ae08745Sheppo 				}
81281ae08745Sheppo 
81291ae08745Sheppo 				mcst_p->nextp = NULL;
81301ae08745Sheppo 				mcst_p->addr = addr;
8131*19b65a69Ssb155480 				ether_copy(&mcst_pkt->mca[i], &mcst_p->mca);
81321ae08745Sheppo 
81331ae08745Sheppo 				/*
81341ae08745Sheppo 				 * Program the address into HW. If the addr
81351ae08745Sheppo 				 * has already been programmed then the MAC
81361ae08745Sheppo 				 * just increments a ref counter (which is
81371ae08745Sheppo 				 * used when the address is being deleted)
81381ae08745Sheppo 				 */
813934683adeSsg70180 				mutex_enter(&vswp->mac_lock);
8140*19b65a69Ssb155480 				if (vswp->mh != NULL) {
8141*19b65a69Ssb155480 					if (mac_multicst_add(vswp->mh,
814234683adeSsg70180 					    (uchar_t *)&mcst_pkt->mca[i])) {
814334683adeSsg70180 						mutex_exit(&vswp->mac_lock);
8144*19b65a69Ssb155480 						cmn_err(CE_WARN, "!vsw%d: "
8145*19b65a69Ssb155480 						    "unable to add multicast "
8146*19b65a69Ssb155480 						    "address: %s\n",
8147*19b65a69Ssb155480 						    vswp->instance,
8148*19b65a69Ssb155480 						    ether_sprintf((void *)
8149*19b65a69Ssb155480 						    &mcst_p->mca));
8150*19b65a69Ssb155480 						(void) vsw_del_mcst(vswp,
8151*19b65a69Ssb155480 						    VSW_VNETPORT, addr, port);
8152*19b65a69Ssb155480 						kmem_free(mcst_p,
8153*19b65a69Ssb155480 						    sizeof (*mcst_p));
815434683adeSsg70180 						return (1);
8155e1ebb9ecSlm66018 					}
8156*19b65a69Ssb155480 					mcst_p->mac_added = B_TRUE;
8157*19b65a69Ssb155480 				}
815834683adeSsg70180 				mutex_exit(&vswp->mac_lock);
81591ae08745Sheppo 
8160*19b65a69Ssb155480 				mutex_enter(&port->mca_lock);
8161*19b65a69Ssb155480 				mcst_p->nextp = port->mcap;
8162*19b65a69Ssb155480 				port->mcap = mcst_p;
8163*19b65a69Ssb155480 				mutex_exit(&port->mca_lock);
8164*19b65a69Ssb155480 
81651ae08745Sheppo 			} else {
81661ae08745Sheppo 				DERR(vswp, "%s: error adding multicast "
81671ae08745Sheppo 				    "address 0x%llx for port %ld",
81681ae08745Sheppo 				    __func__, addr, port->p_instance);
81691ae08745Sheppo 				return (1);
81701ae08745Sheppo 			}
81711ae08745Sheppo 		} else {
81721ae08745Sheppo 			/*
81731ae08745Sheppo 			 * Delete an entry from the multicast hash
81741ae08745Sheppo 			 * table and update the address list
81751ae08745Sheppo 			 * appropriately.
81761ae08745Sheppo 			 */
81771ae08745Sheppo 			if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
81781ae08745Sheppo 				D3(vswp, "%s: deleting multicast address "
81791ae08745Sheppo 				    "0x%llx for port %ld", __func__, addr,
81801ae08745Sheppo 				    port->p_instance);
81811ae08745Sheppo 
8182*19b65a69Ssb155480 				mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr);
8183*19b65a69Ssb155480 				ASSERT(mcst_p != NULL);
81841ae08745Sheppo 
81851ae08745Sheppo 				/*
81861ae08745Sheppo 				 * Remove the address from HW. The address
81871ae08745Sheppo 				 * will actually only be removed once the ref
81881ae08745Sheppo 				 * count within the MAC layer has dropped to
81891ae08745Sheppo 				 * zero. I.e. we can safely call this fn even
81901ae08745Sheppo 				 * if other ports are interested in this
81911ae08745Sheppo 				 * address.
81921ae08745Sheppo 				 */
819334683adeSsg70180 				mutex_enter(&vswp->mac_lock);
8194*19b65a69Ssb155480 				if (vswp->mh != NULL && mcst_p->mac_added) {
8195*19b65a69Ssb155480 					if (mac_multicst_remove(vswp->mh,
819634683adeSsg70180 					    (uchar_t *)&mcst_pkt->mca[i])) {
819734683adeSsg70180 						mutex_exit(&vswp->mac_lock);
8198*19b65a69Ssb155480 						cmn_err(CE_WARN, "!vsw%d: "
8199*19b65a69Ssb155480 						    "unable to remove mcast "
8200*19b65a69Ssb155480 						    "address: %s\n",
8201*19b65a69Ssb155480 						    vswp->instance,
8202*19b65a69Ssb155480 						    ether_sprintf((void *)
8203*19b65a69Ssb155480 						    &mcst_p->mca));
8204*19b65a69Ssb155480 						kmem_free(mcst_p,
8205*19b65a69Ssb155480 						    sizeof (*mcst_p));
820634683adeSsg70180 						return (1);
820734683adeSsg70180 					}
8208*19b65a69Ssb155480 					mcst_p->mac_added = B_FALSE;
8209*19b65a69Ssb155480 				}
821034683adeSsg70180 				mutex_exit(&vswp->mac_lock);
8211*19b65a69Ssb155480 				kmem_free(mcst_p, sizeof (*mcst_p));
82121ae08745Sheppo 
82131ae08745Sheppo 			} else {
82141ae08745Sheppo 				DERR(vswp, "%s: error deleting multicast "
82151ae08745Sheppo 				    "addr 0x%llx for port %ld",
82161ae08745Sheppo 				    __func__, addr, port->p_instance);
82171ae08745Sheppo 				return (1);
82181ae08745Sheppo 			}
82191ae08745Sheppo 		}
82201ae08745Sheppo 	}
82211ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
82221ae08745Sheppo 	return (0);
82231ae08745Sheppo }
82241ae08745Sheppo 
82251ae08745Sheppo /*
82261ae08745Sheppo  * Add a new multicast entry.
82271ae08745Sheppo  *
82281ae08745Sheppo  * Search hash table based on address. If match found then
82291ae08745Sheppo  * update associated val (which is chain of ports), otherwise
82301ae08745Sheppo  * create new key/val (addr/port) pair and insert into table.
82311ae08745Sheppo  */
82321ae08745Sheppo static int
82331ae08745Sheppo vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
82341ae08745Sheppo {
82351ae08745Sheppo 	int		dup = 0;
82361ae08745Sheppo 	int		rv = 0;
82371ae08745Sheppo 	mfdb_ent_t	*ment = NULL;
82381ae08745Sheppo 	mfdb_ent_t	*tmp_ent = NULL;
82391ae08745Sheppo 	mfdb_ent_t	*new_ent = NULL;
82401ae08745Sheppo 	void		*tgt = NULL;
82411ae08745Sheppo 
82421ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
82431ae08745Sheppo 		/*
82441ae08745Sheppo 		 * Being invoked from a vnet.
82451ae08745Sheppo 		 */
82461ae08745Sheppo 		ASSERT(arg != NULL);
82471ae08745Sheppo 		tgt = arg;
82481ae08745Sheppo 		D2(NULL, "%s: port %d : address 0x%llx", __func__,
82491ae08745Sheppo 		    ((vsw_port_t *)arg)->p_instance, addr);
82501ae08745Sheppo 	} else {
82511ae08745Sheppo 		/*
82521ae08745Sheppo 		 * We are being invoked via the m_multicst mac entry
82531ae08745Sheppo 		 * point.
82541ae08745Sheppo 		 */
82551ae08745Sheppo 		D2(NULL, "%s: address 0x%llx", __func__, addr);
82561ae08745Sheppo 		tgt = (void *)vswp;
82571ae08745Sheppo 	}
82581ae08745Sheppo 
82591ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
82601ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
82611ae08745Sheppo 	    (mod_hash_val_t *)&ment) != 0) {
82621ae08745Sheppo 
82631ae08745Sheppo 		/* address not currently in table */
82641ae08745Sheppo 		ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
82651ae08745Sheppo 		ment->d_addr = (void *)tgt;
82661ae08745Sheppo 		ment->d_type = devtype;
82671ae08745Sheppo 		ment->nextp = NULL;
82681ae08745Sheppo 
82691ae08745Sheppo 		if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
82701ae08745Sheppo 		    (mod_hash_val_t)ment) != 0) {
82711ae08745Sheppo 			DERR(vswp, "%s: hash table insertion failed", __func__);
82721ae08745Sheppo 			kmem_free(ment, sizeof (mfdb_ent_t));
82731ae08745Sheppo 			rv = 1;
82741ae08745Sheppo 		} else {
82751ae08745Sheppo 			D2(vswp, "%s: added initial entry for 0x%llx to "
82761ae08745Sheppo 			    "table", __func__, addr);
82771ae08745Sheppo 		}
82781ae08745Sheppo 	} else {
82791ae08745Sheppo 		/*
82801ae08745Sheppo 		 * Address in table. Check to see if specified port
82811ae08745Sheppo 		 * is already associated with the address. If not add
82821ae08745Sheppo 		 * it now.
82831ae08745Sheppo 		 */
82841ae08745Sheppo 		tmp_ent = ment;
82851ae08745Sheppo 		while (tmp_ent != NULL) {
82861ae08745Sheppo 			if (tmp_ent->d_addr == (void *)tgt) {
82871ae08745Sheppo 				if (devtype == VSW_VNETPORT) {
82881ae08745Sheppo 					DERR(vswp, "%s: duplicate port entry "
82891ae08745Sheppo 					    "found for portid %ld and key "
82901ae08745Sheppo 					    "0x%llx", __func__,
82911ae08745Sheppo 					    ((vsw_port_t *)arg)->p_instance,
82921ae08745Sheppo 					    addr);
82931ae08745Sheppo 				} else {
82941ae08745Sheppo 					DERR(vswp, "%s: duplicate entry found"
8295205eeb1aSlm66018 					    "for key 0x%llx", __func__, addr);
82961ae08745Sheppo 				}
82971ae08745Sheppo 				rv = 1;
82981ae08745Sheppo 				dup = 1;
82991ae08745Sheppo 				break;
83001ae08745Sheppo 			}
83011ae08745Sheppo 			tmp_ent = tmp_ent->nextp;
83021ae08745Sheppo 		}
83031ae08745Sheppo 
83041ae08745Sheppo 		/*
83051ae08745Sheppo 		 * Port not on list so add it to end now.
83061ae08745Sheppo 		 */
83071ae08745Sheppo 		if (0 == dup) {
83081ae08745Sheppo 			D2(vswp, "%s: added entry for 0x%llx to table",
83091ae08745Sheppo 			    __func__, addr);
83101ae08745Sheppo 			new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
83111ae08745Sheppo 			new_ent->d_addr = (void *)tgt;
83121ae08745Sheppo 			new_ent->d_type = devtype;
83131ae08745Sheppo 			new_ent->nextp = NULL;
83141ae08745Sheppo 
83151ae08745Sheppo 			tmp_ent = ment;
83161ae08745Sheppo 			while (tmp_ent->nextp != NULL)
83171ae08745Sheppo 				tmp_ent = tmp_ent->nextp;
83181ae08745Sheppo 
83191ae08745Sheppo 			tmp_ent->nextp = new_ent;
83201ae08745Sheppo 		}
83211ae08745Sheppo 	}
83221ae08745Sheppo 
83231ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
83241ae08745Sheppo 	return (rv);
83251ae08745Sheppo }
83261ae08745Sheppo 
83271ae08745Sheppo /*
83281ae08745Sheppo  * Remove a multicast entry from the hashtable.
83291ae08745Sheppo  *
83301ae08745Sheppo  * Search hash table based on address. If match found, scan
83311ae08745Sheppo  * list of ports associated with address. If specified port
83321ae08745Sheppo  * found remove it from list.
83331ae08745Sheppo  */
83341ae08745Sheppo static int
83351ae08745Sheppo vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
83361ae08745Sheppo {
83371ae08745Sheppo 	mfdb_ent_t	*ment = NULL;
83381ae08745Sheppo 	mfdb_ent_t	*curr_p, *prev_p;
83391ae08745Sheppo 	void		*tgt = NULL;
83401ae08745Sheppo 
83411ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
83421ae08745Sheppo 
83431ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
83441ae08745Sheppo 		tgt = (vsw_port_t *)arg;
83451ae08745Sheppo 		D2(vswp, "%s: removing port %d from mFDB for address"
8346205eeb1aSlm66018 		    " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr);
83471ae08745Sheppo 	} else {
83481ae08745Sheppo 		D2(vswp, "%s: removing entry", __func__);
83491ae08745Sheppo 		tgt = (void *)vswp;
83501ae08745Sheppo 	}
83511ae08745Sheppo 
83521ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
83531ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
83541ae08745Sheppo 	    (mod_hash_val_t *)&ment) != 0) {
83551ae08745Sheppo 		D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
83561ae08745Sheppo 		RW_EXIT(&vswp->mfdbrw);
83571ae08745Sheppo 		return (1);
83581ae08745Sheppo 	}
83591ae08745Sheppo 
83601ae08745Sheppo 	prev_p = curr_p = ment;
83611ae08745Sheppo 
83621ae08745Sheppo 	while (curr_p != NULL) {
83631ae08745Sheppo 		if (curr_p->d_addr == (void *)tgt) {
83641ae08745Sheppo 			if (devtype == VSW_VNETPORT) {
83651ae08745Sheppo 				D2(vswp, "%s: port %d found", __func__,
83661ae08745Sheppo 				    ((vsw_port_t *)tgt)->p_instance);
83671ae08745Sheppo 			} else {
83681ae08745Sheppo 				D2(vswp, "%s: instance found", __func__);
83691ae08745Sheppo 			}
83701ae08745Sheppo 
83711ae08745Sheppo 			if (prev_p == curr_p) {
83721ae08745Sheppo 				/*
83731ae08745Sheppo 				 * head of list, if no other element is in
83741ae08745Sheppo 				 * list then destroy this entry, otherwise
83751ae08745Sheppo 				 * just replace it with updated value.
83761ae08745Sheppo 				 */
83771ae08745Sheppo 				ment = curr_p->nextp;
83781ae08745Sheppo 				if (ment == NULL) {
83791ae08745Sheppo 					(void) mod_hash_destroy(vswp->mfdb,
83801ae08745Sheppo 					    (mod_hash_val_t)addr);
83811ae08745Sheppo 				} else {
83821ae08745Sheppo 					(void) mod_hash_replace(vswp->mfdb,
83831ae08745Sheppo 					    (mod_hash_key_t)addr,
83841ae08745Sheppo 					    (mod_hash_val_t)ment);
83851ae08745Sheppo 				}
83861ae08745Sheppo 			} else {
83871ae08745Sheppo 				/*
83881ae08745Sheppo 				 * Not head of list, no need to do
83891ae08745Sheppo 				 * replacement, just adjust list pointers.
83901ae08745Sheppo 				 */
83911ae08745Sheppo 				prev_p->nextp = curr_p->nextp;
83921ae08745Sheppo 			}
83931ae08745Sheppo 			break;
83941ae08745Sheppo 		}
83951ae08745Sheppo 
83961ae08745Sheppo 		prev_p = curr_p;
83971ae08745Sheppo 		curr_p = curr_p->nextp;
83981ae08745Sheppo 	}
83991ae08745Sheppo 
84001ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
84011ae08745Sheppo 
84021ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
84031ae08745Sheppo 
8404*19b65a69Ssb155480 	if (curr_p == NULL)
8405*19b65a69Ssb155480 		return (1);
8406*19b65a69Ssb155480 	kmem_free(curr_p, sizeof (mfdb_ent_t));
84071ae08745Sheppo 	return (0);
84081ae08745Sheppo }
84091ae08745Sheppo 
84101ae08745Sheppo /*
84111ae08745Sheppo  * Port is being deleted, but has registered an interest in one
84121ae08745Sheppo  * or more multicast groups. Using the list of addresses maintained
84131ae08745Sheppo  * within the port structure find the appropriate entry in the hash
84141ae08745Sheppo  * table and remove this port from the list of interested ports.
84151ae08745Sheppo  */
84161ae08745Sheppo static void
84171ae08745Sheppo vsw_del_mcst_port(vsw_port_t *port)
84181ae08745Sheppo {
8419*19b65a69Ssb155480 	mcst_addr_t	*mcap = NULL;
84201ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
84211ae08745Sheppo 
84221ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
84231ae08745Sheppo 
84241ae08745Sheppo 	mutex_enter(&port->mca_lock);
84251ae08745Sheppo 
8426*19b65a69Ssb155480 	while ((mcap = port->mcap) != NULL) {
8427*19b65a69Ssb155480 
8428*19b65a69Ssb155480 		port->mcap = mcap->nextp;
8429*19b65a69Ssb155480 
8430*19b65a69Ssb155480 		mutex_exit(&port->mca_lock);
8431*19b65a69Ssb155480 
8432*19b65a69Ssb155480 		(void) vsw_del_mcst(vswp, VSW_VNETPORT,
8433*19b65a69Ssb155480 		    mcap->addr, port);
8434*19b65a69Ssb155480 
8435*19b65a69Ssb155480 		/*
8436*19b65a69Ssb155480 		 * Remove the address from HW. The address
8437*19b65a69Ssb155480 		 * will actually only be removed once the ref
8438*19b65a69Ssb155480 		 * count within the MAC layer has dropped to
8439*19b65a69Ssb155480 		 * zero. I.e. we can safely call this fn even
8440*19b65a69Ssb155480 		 * if other ports are interested in this
8441*19b65a69Ssb155480 		 * address.
8442*19b65a69Ssb155480 		 */
8443*19b65a69Ssb155480 		mutex_enter(&vswp->mac_lock);
8444*19b65a69Ssb155480 		if (vswp->mh != NULL && mcap->mac_added) {
8445*19b65a69Ssb155480 			(void) mac_multicst_remove(vswp->mh,
8446*19b65a69Ssb155480 			    (uchar_t *)&mcap->mca);
84471ae08745Sheppo 		}
8448*19b65a69Ssb155480 		mutex_exit(&vswp->mac_lock);
8449*19b65a69Ssb155480 
8450*19b65a69Ssb155480 		kmem_free(mcap, sizeof (*mcap));
8451*19b65a69Ssb155480 
8452*19b65a69Ssb155480 		mutex_enter(&port->mca_lock);
8453*19b65a69Ssb155480 
8454*19b65a69Ssb155480 	}
8455*19b65a69Ssb155480 
84561ae08745Sheppo 	mutex_exit(&port->mca_lock);
84571ae08745Sheppo 
84581ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
84591ae08745Sheppo }
84601ae08745Sheppo 
84611ae08745Sheppo /*
84621ae08745Sheppo  * This vsw instance is detaching, but has registered an interest in one
84631ae08745Sheppo  * or more multicast groups. Using the list of addresses maintained
84641ae08745Sheppo  * within the vsw structure find the appropriate entry in the hash
84651ae08745Sheppo  * table and remove this instance from the list of interested ports.
84661ae08745Sheppo  */
84671ae08745Sheppo static void
84681ae08745Sheppo vsw_del_mcst_vsw(vsw_t *vswp)
84691ae08745Sheppo {
84701ae08745Sheppo 	mcst_addr_t	*next_p = NULL;
84711ae08745Sheppo 
84721ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
84731ae08745Sheppo 
84741ae08745Sheppo 	mutex_enter(&vswp->mca_lock);
84751ae08745Sheppo 
84761ae08745Sheppo 	while (vswp->mcap != NULL) {
84771ae08745Sheppo 		DERR(vswp, "%s: deleting addr 0x%llx",
84781ae08745Sheppo 		    __func__, vswp->mcap->addr);
8479205eeb1aSlm66018 		(void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL);
84801ae08745Sheppo 
84811ae08745Sheppo 		next_p = vswp->mcap->nextp;
84821ae08745Sheppo 		kmem_free(vswp->mcap, sizeof (mcst_addr_t));
84831ae08745Sheppo 		vswp->mcap = next_p;
84841ae08745Sheppo 	}
84851ae08745Sheppo 
84861ae08745Sheppo 	vswp->mcap = NULL;
84871ae08745Sheppo 	mutex_exit(&vswp->mca_lock);
84881ae08745Sheppo 
84891ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
84901ae08745Sheppo }
84911ae08745Sheppo 
84921ae08745Sheppo /*
84931ae08745Sheppo  * Remove the specified address from the list of address maintained
84941ae08745Sheppo  * in this port node.
84951ae08745Sheppo  */
8496*19b65a69Ssb155480 static mcst_addr_t *
84971ae08745Sheppo vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr)
84981ae08745Sheppo {
84991ae08745Sheppo 	vsw_t		*vswp = NULL;
85001ae08745Sheppo 	vsw_port_t	*port = NULL;
85011ae08745Sheppo 	mcst_addr_t	*prev_p = NULL;
85021ae08745Sheppo 	mcst_addr_t	*curr_p = NULL;
85031ae08745Sheppo 
85041ae08745Sheppo 	D1(NULL, "%s: enter : devtype %d : addr 0x%llx",
85051ae08745Sheppo 	    __func__, devtype, addr);
85061ae08745Sheppo 
85071ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
85081ae08745Sheppo 		port = (vsw_port_t *)arg;
85091ae08745Sheppo 		mutex_enter(&port->mca_lock);
85101ae08745Sheppo 		prev_p = curr_p = port->mcap;
85111ae08745Sheppo 	} else {
85121ae08745Sheppo 		vswp = (vsw_t *)arg;
85131ae08745Sheppo 		mutex_enter(&vswp->mca_lock);
85141ae08745Sheppo 		prev_p = curr_p = vswp->mcap;
85151ae08745Sheppo 	}
85161ae08745Sheppo 
85171ae08745Sheppo 	while (curr_p != NULL) {
85181ae08745Sheppo 		if (curr_p->addr == addr) {
85191ae08745Sheppo 			D2(NULL, "%s: address found", __func__);
85201ae08745Sheppo 			/* match found */
85211ae08745Sheppo 			if (prev_p == curr_p) {
85221ae08745Sheppo 				/* list head */
85231ae08745Sheppo 				if (devtype == VSW_VNETPORT)
85241ae08745Sheppo 					port->mcap = curr_p->nextp;
85251ae08745Sheppo 				else
85261ae08745Sheppo 					vswp->mcap = curr_p->nextp;
85271ae08745Sheppo 			} else {
85281ae08745Sheppo 				prev_p->nextp = curr_p->nextp;
85291ae08745Sheppo 			}
85301ae08745Sheppo 			break;
85311ae08745Sheppo 		} else {
85321ae08745Sheppo 			prev_p = curr_p;
85331ae08745Sheppo 			curr_p = curr_p->nextp;
85341ae08745Sheppo 		}
85351ae08745Sheppo 	}
85361ae08745Sheppo 
85371ae08745Sheppo 	if (devtype == VSW_VNETPORT)
85381ae08745Sheppo 		mutex_exit(&port->mca_lock);
85391ae08745Sheppo 	else
85401ae08745Sheppo 		mutex_exit(&vswp->mca_lock);
85411ae08745Sheppo 
85421ae08745Sheppo 	D1(NULL, "%s: exit", __func__);
8543*19b65a69Ssb155480 
8544*19b65a69Ssb155480 	return (curr_p);
85451ae08745Sheppo }
85461ae08745Sheppo 
85471ae08745Sheppo /*
85481ae08745Sheppo  * Creates a descriptor ring (dring) and links it into the
85491ae08745Sheppo  * link of outbound drings for this channel.
85501ae08745Sheppo  *
85511ae08745Sheppo  * Returns NULL if creation failed.
85521ae08745Sheppo  */
85531ae08745Sheppo static dring_info_t *
85541ae08745Sheppo vsw_create_dring(vsw_ldc_t *ldcp)
85551ae08745Sheppo {
85561ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
85571ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
85581ae08745Sheppo 	ldc_mem_info_t		minfo;
85591ae08745Sheppo 	dring_info_t		*dp, *tp;
85601ae08745Sheppo 	int			i;
85611ae08745Sheppo 
85621ae08745Sheppo 	dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
85631ae08745Sheppo 
85641ae08745Sheppo 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
85651ae08745Sheppo 
85661ae08745Sheppo 	/* create public section of ring */
85671ae08745Sheppo 	if ((ldc_mem_dring_create(VSW_RING_NUM_EL,
85681ae08745Sheppo 	    VSW_PUB_SIZE, &dp->handle)) != 0) {
85691ae08745Sheppo 
85701ae08745Sheppo 		DERR(vswp, "vsw_create_dring(%lld): ldc dring create "
85711ae08745Sheppo 		    "failed", ldcp->ldc_id);
85721ae08745Sheppo 		goto create_fail_exit;
85731ae08745Sheppo 	}
85741ae08745Sheppo 
85751ae08745Sheppo 	ASSERT(dp->handle != NULL);
85761ae08745Sheppo 
85771ae08745Sheppo 	/*
85781ae08745Sheppo 	 * Get the base address of the public section of the ring.
85791ae08745Sheppo 	 */
85801ae08745Sheppo 	if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
85811ae08745Sheppo 		DERR(vswp, "vsw_create_dring(%lld): dring info failed\n",
85821ae08745Sheppo 		    ldcp->ldc_id);
85831ae08745Sheppo 		goto dring_fail_exit;
85841ae08745Sheppo 	} else {
85851ae08745Sheppo 		ASSERT(minfo.vaddr != 0);
85861ae08745Sheppo 		dp->pub_addr = minfo.vaddr;
85871ae08745Sheppo 	}
85881ae08745Sheppo 
85891ae08745Sheppo 	dp->num_descriptors = VSW_RING_NUM_EL;
85901ae08745Sheppo 	dp->descriptor_size = VSW_PUB_SIZE;
85911ae08745Sheppo 	dp->options = VIO_TX_DRING;
85921ae08745Sheppo 	dp->ncookies = 1;	/* guaranteed by ldc */
85931ae08745Sheppo 
85941ae08745Sheppo 	/*
85951ae08745Sheppo 	 * create private portion of ring
85961ae08745Sheppo 	 */
85971ae08745Sheppo 	dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc(
85981ae08745Sheppo 	    (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP);
85991ae08745Sheppo 
86001ae08745Sheppo 	if (vsw_setup_ring(ldcp, dp)) {
86011ae08745Sheppo 		DERR(vswp, "%s: unable to setup ring", __func__);
86021ae08745Sheppo 		goto dring_fail_exit;
86031ae08745Sheppo 	}
86041ae08745Sheppo 
86051ae08745Sheppo 	/* haven't used any descriptors yet */
86061ae08745Sheppo 	dp->end_idx = 0;
8607d10e4ef2Snarayan 	dp->last_ack_recv = -1;
86081ae08745Sheppo 
86091ae08745Sheppo 	/* bind dring to the channel */
86101ae08745Sheppo 	if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle,
86111ae08745Sheppo 	    LDC_SHADOW_MAP, LDC_MEM_RW,
86121ae08745Sheppo 	    &dp->cookie[0], &dp->ncookies)) != 0) {
86131ae08745Sheppo 		DERR(vswp, "vsw_create_dring: unable to bind to channel "
86141ae08745Sheppo 		    "%lld", ldcp->ldc_id);
86151ae08745Sheppo 		goto dring_fail_exit;
86161ae08745Sheppo 	}
86171ae08745Sheppo 
8618d10e4ef2Snarayan 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
8619d10e4ef2Snarayan 	dp->restart_reqd = B_TRUE;
8620d10e4ef2Snarayan 
86211ae08745Sheppo 	/*
86221ae08745Sheppo 	 * Only ever create rings for outgoing lane. Link it onto
86231ae08745Sheppo 	 * end of list.
86241ae08745Sheppo 	 */
8625445b4c2eSsb155480 	WRITE_ENTER(&ldcp->lane_out.dlistrw);
86261ae08745Sheppo 	if (ldcp->lane_out.dringp == NULL) {
86271ae08745Sheppo 		D2(vswp, "vsw_create_dring: adding first outbound ring");
86281ae08745Sheppo 		ldcp->lane_out.dringp = dp;
86291ae08745Sheppo 	} else {
86301ae08745Sheppo 		tp = ldcp->lane_out.dringp;
86311ae08745Sheppo 		while (tp->next != NULL)
86321ae08745Sheppo 			tp = tp->next;
86331ae08745Sheppo 
86341ae08745Sheppo 		tp->next = dp;
86351ae08745Sheppo 	}
8636445b4c2eSsb155480 	RW_EXIT(&ldcp->lane_out.dlistrw);
86371ae08745Sheppo 
86381ae08745Sheppo 	return (dp);
86391ae08745Sheppo 
86401ae08745Sheppo dring_fail_exit:
86411ae08745Sheppo 	(void) ldc_mem_dring_destroy(dp->handle);
86421ae08745Sheppo 
86431ae08745Sheppo create_fail_exit:
86441ae08745Sheppo 	if (dp->priv_addr != NULL) {
86451ae08745Sheppo 		priv_addr = dp->priv_addr;
86461ae08745Sheppo 		for (i = 0; i < VSW_RING_NUM_EL; i++) {
86471ae08745Sheppo 			if (priv_addr->memhandle != NULL)
86481ae08745Sheppo 				(void) ldc_mem_free_handle(
86491ae08745Sheppo 				    priv_addr->memhandle);
86501ae08745Sheppo 			priv_addr++;
86511ae08745Sheppo 		}
86521ae08745Sheppo 		kmem_free(dp->priv_addr,
86531ae08745Sheppo 		    (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
86541ae08745Sheppo 	}
86551ae08745Sheppo 	mutex_destroy(&dp->dlock);
86561ae08745Sheppo 
86571ae08745Sheppo 	kmem_free(dp, sizeof (dring_info_t));
86581ae08745Sheppo 	return (NULL);
86591ae08745Sheppo }
86601ae08745Sheppo 
86611ae08745Sheppo /*
86621ae08745Sheppo  * Create a ring consisting of just a private portion and link
86631ae08745Sheppo  * it into the list of rings for the outbound lane.
86641ae08745Sheppo  *
86651ae08745Sheppo  * These type of rings are used primarily for temporary data
86661ae08745Sheppo  * storage (i.e. as data buffers).
86671ae08745Sheppo  */
86681ae08745Sheppo void
86691ae08745Sheppo vsw_create_privring(vsw_ldc_t *ldcp)
86701ae08745Sheppo {
86711ae08745Sheppo 	dring_info_t		*dp, *tp;
86721ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
86731ae08745Sheppo 
86741ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
86751ae08745Sheppo 
86761ae08745Sheppo 	dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
86771ae08745Sheppo 
86781ae08745Sheppo 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
86791ae08745Sheppo 
86801ae08745Sheppo 	/* no public section */
86811ae08745Sheppo 	dp->pub_addr = NULL;
86821ae08745Sheppo 
8683205eeb1aSlm66018 	dp->priv_addr = kmem_zalloc(
8684205eeb1aSlm66018 	    (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP);
86851ae08745Sheppo 
86864bac2208Snarayan 	dp->num_descriptors = VSW_RING_NUM_EL;
86874bac2208Snarayan 
86881ae08745Sheppo 	if (vsw_setup_ring(ldcp, dp)) {
86891ae08745Sheppo 		DERR(vswp, "%s: setup of ring failed", __func__);
86901ae08745Sheppo 		kmem_free(dp->priv_addr,
86911ae08745Sheppo 		    (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
86921ae08745Sheppo 		mutex_destroy(&dp->dlock);
86931ae08745Sheppo 		kmem_free(dp, sizeof (dring_info_t));
86941ae08745Sheppo 		return;
86951ae08745Sheppo 	}
86961ae08745Sheppo 
86971ae08745Sheppo 	/* haven't used any descriptors yet */
86981ae08745Sheppo 	dp->end_idx = 0;
86991ae08745Sheppo 
8700d10e4ef2Snarayan 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
8701d10e4ef2Snarayan 	dp->restart_reqd = B_TRUE;
8702d10e4ef2Snarayan 
87031ae08745Sheppo 	/*
87041ae08745Sheppo 	 * Only ever create rings for outgoing lane. Link it onto
87051ae08745Sheppo 	 * end of list.
87061ae08745Sheppo 	 */
8707445b4c2eSsb155480 	WRITE_ENTER(&ldcp->lane_out.dlistrw);
87081ae08745Sheppo 	if (ldcp->lane_out.dringp == NULL) {
87091ae08745Sheppo 		D2(vswp, "%s: adding first outbound privring", __func__);
87101ae08745Sheppo 		ldcp->lane_out.dringp = dp;
87111ae08745Sheppo 	} else {
87121ae08745Sheppo 		tp = ldcp->lane_out.dringp;
87131ae08745Sheppo 		while (tp->next != NULL)
87141ae08745Sheppo 			tp = tp->next;
87151ae08745Sheppo 
87161ae08745Sheppo 		tp->next = dp;
87171ae08745Sheppo 	}
8718445b4c2eSsb155480 	RW_EXIT(&ldcp->lane_out.dlistrw);
87191ae08745Sheppo 
87201ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
87211ae08745Sheppo }
87221ae08745Sheppo 
87231ae08745Sheppo /*
87241ae08745Sheppo  * Setup the descriptors in the dring. Returns 0 on success, 1 on
87251ae08745Sheppo  * failure.
87261ae08745Sheppo  */
87271ae08745Sheppo int
87281ae08745Sheppo vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp)
87291ae08745Sheppo {
87301ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
87311ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
87321ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
87331ae08745Sheppo 	uint64_t		*tmpp;
87341ae08745Sheppo 	uint64_t		offset = 0;
87351ae08745Sheppo 	uint32_t		ncookies = 0;
87361ae08745Sheppo 	static char		*name = "vsw_setup_ring";
8737d10e4ef2Snarayan 	int			i, j, nc, rv;
87381ae08745Sheppo 
87391ae08745Sheppo 	priv_addr = dp->priv_addr;
87401ae08745Sheppo 	pub_addr = dp->pub_addr;
87411ae08745Sheppo 
8742d10e4ef2Snarayan 	/* public section may be null but private should never be */
8743d10e4ef2Snarayan 	ASSERT(priv_addr != NULL);
8744d10e4ef2Snarayan 
87451ae08745Sheppo 	/*
87461ae08745Sheppo 	 * Allocate the region of memory which will be used to hold
87471ae08745Sheppo 	 * the data the descriptors will refer to.
87481ae08745Sheppo 	 */
87491ae08745Sheppo 	dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ);
87501ae08745Sheppo 	dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP);
87511ae08745Sheppo 
87521ae08745Sheppo 	D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name,
87531ae08745Sheppo 	    dp->data_sz, dp->data_addr);
87541ae08745Sheppo 
87551ae08745Sheppo 	tmpp = (uint64_t *)dp->data_addr;
87561ae08745Sheppo 	offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp);
87571ae08745Sheppo 
87581ae08745Sheppo 	/*
87591ae08745Sheppo 	 * Initialise some of the private and public (if they exist)
87601ae08745Sheppo 	 * descriptor fields.
87611ae08745Sheppo 	 */
87621ae08745Sheppo 	for (i = 0; i < VSW_RING_NUM_EL; i++) {
8763d10e4ef2Snarayan 		mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL);
8764d10e4ef2Snarayan 
87651ae08745Sheppo 		if ((ldc_mem_alloc_handle(ldcp->ldc_handle,
87661ae08745Sheppo 		    &priv_addr->memhandle)) != 0) {
87671ae08745Sheppo 			DERR(vswp, "%s: alloc mem handle failed", name);
87681ae08745Sheppo 			goto setup_ring_cleanup;
87691ae08745Sheppo 		}
87701ae08745Sheppo 
87711ae08745Sheppo 		priv_addr->datap = (void *)tmpp;
87721ae08745Sheppo 
87731ae08745Sheppo 		rv = ldc_mem_bind_handle(priv_addr->memhandle,
87741ae08745Sheppo 		    (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ,
87751ae08745Sheppo 		    LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W,
87761ae08745Sheppo 		    &(priv_addr->memcookie[0]), &ncookies);
87771ae08745Sheppo 		if (rv != 0) {
87781ae08745Sheppo 			DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed "
87791ae08745Sheppo 			    "(rv %d)", name, ldcp->ldc_id, rv);
87801ae08745Sheppo 			goto setup_ring_cleanup;
87811ae08745Sheppo 		}
87821ae08745Sheppo 		priv_addr->bound = 1;
87831ae08745Sheppo 
87841ae08745Sheppo 		D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx",
87851ae08745Sheppo 		    name, i, priv_addr->memcookie[0].addr,
87861ae08745Sheppo 		    priv_addr->memcookie[0].size);
87871ae08745Sheppo 
87881ae08745Sheppo 		if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) {
87891ae08745Sheppo 			DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned "
87901ae08745Sheppo 			    "invalid num of cookies (%d) for size 0x%llx",
8791205eeb1aSlm66018 			    name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ);
87921ae08745Sheppo 
87931ae08745Sheppo 			goto setup_ring_cleanup;
87941ae08745Sheppo 		} else {
87951ae08745Sheppo 			for (j = 1; j < ncookies; j++) {
87961ae08745Sheppo 				rv = ldc_mem_nextcookie(priv_addr->memhandle,
87971ae08745Sheppo 				    &(priv_addr->memcookie[j]));
87981ae08745Sheppo 				if (rv != 0) {
87991ae08745Sheppo 					DERR(vswp, "%s: ldc_mem_nextcookie "
88001ae08745Sheppo 					    "failed rv (%d)", name, rv);
88011ae08745Sheppo 					goto setup_ring_cleanup;
88021ae08745Sheppo 				}
88031ae08745Sheppo 				D3(vswp, "%s: memcookie %d : addr 0x%llx : "
88041ae08745Sheppo 				    "size 0x%llx", name, j,
88051ae08745Sheppo 				    priv_addr->memcookie[j].addr,
88061ae08745Sheppo 				    priv_addr->memcookie[j].size);
88071ae08745Sheppo 			}
88081ae08745Sheppo 
88091ae08745Sheppo 		}
88101ae08745Sheppo 		priv_addr->ncookies = ncookies;
88111ae08745Sheppo 		priv_addr->dstate = VIO_DESC_FREE;
88121ae08745Sheppo 
88131ae08745Sheppo 		if (pub_addr != NULL) {
88141ae08745Sheppo 
88151ae08745Sheppo 			/* link pub and private sides */
88161ae08745Sheppo 			priv_addr->descp = pub_addr;
88171ae08745Sheppo 
8818d10e4ef2Snarayan 			pub_addr->ncookies = priv_addr->ncookies;
8819d10e4ef2Snarayan 
8820d10e4ef2Snarayan 			for (nc = 0; nc < pub_addr->ncookies; nc++) {
8821d10e4ef2Snarayan 				bcopy(&priv_addr->memcookie[nc],
8822d10e4ef2Snarayan 				    &pub_addr->memcookie[nc],
8823d10e4ef2Snarayan 				    sizeof (ldc_mem_cookie_t));
8824d10e4ef2Snarayan 			}
8825d10e4ef2Snarayan 
88261ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_FREE;
88271ae08745Sheppo 			pub_addr++;
88281ae08745Sheppo 		}
88291ae08745Sheppo 
88301ae08745Sheppo 		/*
88311ae08745Sheppo 		 * move to next element in the dring and the next
88321ae08745Sheppo 		 * position in the data buffer.
88331ae08745Sheppo 		 */
88341ae08745Sheppo 		priv_addr++;
88351ae08745Sheppo 		tmpp += offset;
88361ae08745Sheppo 	}
88371ae08745Sheppo 
88381ae08745Sheppo 	return (0);
88391ae08745Sheppo 
88401ae08745Sheppo setup_ring_cleanup:
88411ae08745Sheppo 	priv_addr = dp->priv_addr;
88421ae08745Sheppo 
8843d10e4ef2Snarayan 	for (j = 0; j < i; j++) {
88441ae08745Sheppo 		(void) ldc_mem_unbind_handle(priv_addr->memhandle);
88451ae08745Sheppo 		(void) ldc_mem_free_handle(priv_addr->memhandle);
88461ae08745Sheppo 
8847d10e4ef2Snarayan 		mutex_destroy(&priv_addr->dstate_lock);
8848d10e4ef2Snarayan 
88491ae08745Sheppo 		priv_addr++;
88501ae08745Sheppo 	}
88511ae08745Sheppo 	kmem_free(dp->data_addr, dp->data_sz);
88521ae08745Sheppo 
88531ae08745Sheppo 	return (1);
88541ae08745Sheppo }
88551ae08745Sheppo 
88561ae08745Sheppo /*
88571ae08745Sheppo  * Searches the private section of a ring for a free descriptor,
88581ae08745Sheppo  * starting at the location of the last free descriptor found
88591ae08745Sheppo  * previously.
88601ae08745Sheppo  *
8861d10e4ef2Snarayan  * Returns 0 if free descriptor is available, and updates state
8862d10e4ef2Snarayan  * of private descriptor to VIO_DESC_READY,  otherwise returns 1.
88631ae08745Sheppo  *
88641ae08745Sheppo  * FUTURE: might need to return contiguous range of descriptors
88651ae08745Sheppo  * as dring info msg assumes all will be contiguous.
88661ae08745Sheppo  */
88671ae08745Sheppo static int
88681ae08745Sheppo vsw_dring_find_free_desc(dring_info_t *dringp,
88691ae08745Sheppo 		vsw_private_desc_t **priv_p, int *idx)
88701ae08745Sheppo {
8871d10e4ef2Snarayan 	vsw_private_desc_t	*addr = NULL;
88721ae08745Sheppo 	int			num = VSW_RING_NUM_EL;
88731ae08745Sheppo 	int			ret = 1;
88741ae08745Sheppo 
88751ae08745Sheppo 	D1(NULL, "%s enter\n", __func__);
88761ae08745Sheppo 
8877d10e4ef2Snarayan 	ASSERT(dringp->priv_addr != NULL);
88781ae08745Sheppo 
88791ae08745Sheppo 	D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld",
8880d10e4ef2Snarayan 	    __func__, dringp, dringp->end_idx);
88811ae08745Sheppo 
8882d10e4ef2Snarayan 	addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx;
8883d10e4ef2Snarayan 
8884d10e4ef2Snarayan 	mutex_enter(&addr->dstate_lock);
88851ae08745Sheppo 	if (addr->dstate == VIO_DESC_FREE) {
8886d10e4ef2Snarayan 		addr->dstate = VIO_DESC_READY;
88871ae08745Sheppo 		*priv_p = addr;
8888d10e4ef2Snarayan 		*idx = dringp->end_idx;
8889d10e4ef2Snarayan 		dringp->end_idx = (dringp->end_idx + 1) % num;
88901ae08745Sheppo 		ret = 0;
8891d10e4ef2Snarayan 
88921ae08745Sheppo 	}
8893d10e4ef2Snarayan 	mutex_exit(&addr->dstate_lock);
88941ae08745Sheppo 
88951ae08745Sheppo 	/* ring full */
88961ae08745Sheppo 	if (ret == 1) {
8897d10e4ef2Snarayan 		D2(NULL, "%s: no desp free: started at %d", __func__,
8898d10e4ef2Snarayan 		    dringp->end_idx);
88991ae08745Sheppo 	}
89001ae08745Sheppo 
89011ae08745Sheppo 	D1(NULL, "%s: exit\n", __func__);
89021ae08745Sheppo 
89031ae08745Sheppo 	return (ret);
89041ae08745Sheppo }
89051ae08745Sheppo 
89061ae08745Sheppo /*
89071ae08745Sheppo  * Map from a dring identifier to the ring itself. Returns
89081ae08745Sheppo  * pointer to ring or NULL if no match found.
8909445b4c2eSsb155480  *
8910445b4c2eSsb155480  * Should be called with dlistrw rwlock held as reader.
89111ae08745Sheppo  */
89121ae08745Sheppo static dring_info_t *
89131ae08745Sheppo vsw_ident2dring(lane_t *lane, uint64_t ident)
89141ae08745Sheppo {
89151ae08745Sheppo 	dring_info_t	*dp = NULL;
89161ae08745Sheppo 
89171ae08745Sheppo 	if ((dp = lane->dringp) == NULL) {
89181ae08745Sheppo 		return (NULL);
89191ae08745Sheppo 	} else {
89201ae08745Sheppo 		if (dp->ident == ident)
89211ae08745Sheppo 			return (dp);
89221ae08745Sheppo 
89231ae08745Sheppo 		while (dp != NULL) {
89241ae08745Sheppo 			if (dp->ident == ident)
89251ae08745Sheppo 				break;
89261ae08745Sheppo 			dp = dp->next;
89271ae08745Sheppo 		}
89281ae08745Sheppo 	}
89291ae08745Sheppo 
89301ae08745Sheppo 	return (dp);
89311ae08745Sheppo }
89321ae08745Sheppo 
89331ae08745Sheppo /*
89341ae08745Sheppo  * Set the default lane attributes. These are copied into
89351ae08745Sheppo  * the attr msg we send to our peer. If they are not acceptable
89361ae08745Sheppo  * then (currently) the handshake ends.
89371ae08745Sheppo  */
89381ae08745Sheppo static void
89391ae08745Sheppo vsw_set_lane_attr(vsw_t *vswp, lane_t *lp)
89401ae08745Sheppo {
89411ae08745Sheppo 	bzero(lp, sizeof (lane_t));
89421ae08745Sheppo 
89431ae08745Sheppo 	READ_ENTER(&vswp->if_lockrw);
89441ae08745Sheppo 	ether_copy(&(vswp->if_addr), &(lp->addr));
89451ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
89461ae08745Sheppo 
89471ae08745Sheppo 	lp->mtu = VSW_MTU;
89481ae08745Sheppo 	lp->addr_type = ADDR_TYPE_MAC;
89491ae08745Sheppo 	lp->xfer_mode = VIO_DRING_MODE;
89501ae08745Sheppo 	lp->ack_freq = 0;	/* for shared mode */
8951d10e4ef2Snarayan 
8952d10e4ef2Snarayan 	mutex_enter(&lp->seq_lock);
89531ae08745Sheppo 	lp->seq_num = VNET_ISS;
8954d10e4ef2Snarayan 	mutex_exit(&lp->seq_lock);
89551ae08745Sheppo }
89561ae08745Sheppo 
89571ae08745Sheppo /*
89581ae08745Sheppo  * Verify that the attributes are acceptable.
89591ae08745Sheppo  *
89601ae08745Sheppo  * FUTURE: If some attributes are not acceptable, change them
89611ae08745Sheppo  * our desired values.
89621ae08745Sheppo  */
89631ae08745Sheppo static int
89641ae08745Sheppo vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port)
89651ae08745Sheppo {
89661ae08745Sheppo 	int	ret = 0;
89671ae08745Sheppo 
89681ae08745Sheppo 	D1(NULL, "vsw_check_attr enter\n");
89691ae08745Sheppo 
89701ae08745Sheppo 	/*
89711ae08745Sheppo 	 * Note we currently only support in-band descriptors
89721ae08745Sheppo 	 * and descriptor rings, not packet based transfer (VIO_PKT_MODE)
89731ae08745Sheppo 	 */
89741ae08745Sheppo 	if ((pkt->xfer_mode != VIO_DESC_MODE) &&
89751ae08745Sheppo 	    (pkt->xfer_mode != VIO_DRING_MODE)) {
8976205eeb1aSlm66018 		D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode);
89771ae08745Sheppo 		ret = 1;
89781ae08745Sheppo 	}
89791ae08745Sheppo 
89801ae08745Sheppo 	/* Only support MAC addresses at moment. */
89811ae08745Sheppo 	if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) {
89821ae08745Sheppo 		D2(NULL, "vsw_check_attr: invalid addr_type %x, "
8983205eeb1aSlm66018 		    "or address 0x%llx\n", pkt->addr_type, pkt->addr);
89841ae08745Sheppo 		ret = 1;
89851ae08745Sheppo 	}
89861ae08745Sheppo 
89871ae08745Sheppo 	/*
89881ae08745Sheppo 	 * MAC address supplied by device should match that stored
89891ae08745Sheppo 	 * in the vsw-port OBP node. Need to decide what to do if they
89901ae08745Sheppo 	 * don't match, for the moment just warn but don't fail.
89911ae08745Sheppo 	 */
89921ae08745Sheppo 	if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) {
89931ae08745Sheppo 		DERR(NULL, "vsw_check_attr: device supplied address "
89941ae08745Sheppo 		    "0x%llx doesn't match node address 0x%llx\n",
89951ae08745Sheppo 		    pkt->addr, port->p_macaddr);
89961ae08745Sheppo 	}
89971ae08745Sheppo 
89981ae08745Sheppo 	/*
89991ae08745Sheppo 	 * Ack freq only makes sense in pkt mode, in shared
90001ae08745Sheppo 	 * mode the ring descriptors say whether or not to
90011ae08745Sheppo 	 * send back an ACK.
90021ae08745Sheppo 	 */
90031ae08745Sheppo 	if ((pkt->xfer_mode == VIO_DRING_MODE) &&
90041ae08745Sheppo 	    (pkt->ack_freq > 0)) {
90051ae08745Sheppo 		D2(NULL, "vsw_check_attr: non zero ack freq "
90061ae08745Sheppo 		    " in SHM mode\n");
90071ae08745Sheppo 		ret = 1;
90081ae08745Sheppo 	}
90091ae08745Sheppo 
90101ae08745Sheppo 	/*
90111ae08745Sheppo 	 * Note: for the moment we only support ETHER
90121ae08745Sheppo 	 * frames. This may change in the future.
90131ae08745Sheppo 	 */
90141ae08745Sheppo 	if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) {
90151ae08745Sheppo 		D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n",
90161ae08745Sheppo 		    pkt->mtu);
90171ae08745Sheppo 		ret = 1;
90181ae08745Sheppo 	}
90191ae08745Sheppo 
90201ae08745Sheppo 	D1(NULL, "vsw_check_attr exit\n");
90211ae08745Sheppo 
90221ae08745Sheppo 	return (ret);
90231ae08745Sheppo }
90241ae08745Sheppo 
90251ae08745Sheppo /*
90261ae08745Sheppo  * Returns 1 if there is a problem, 0 otherwise.
90271ae08745Sheppo  */
90281ae08745Sheppo static int
90291ae08745Sheppo vsw_check_dring_info(vio_dring_reg_msg_t *pkt)
90301ae08745Sheppo {
90311ae08745Sheppo 	_NOTE(ARGUNUSED(pkt))
90321ae08745Sheppo 
90331ae08745Sheppo 	int	ret = 0;
90341ae08745Sheppo 
90351ae08745Sheppo 	D1(NULL, "vsw_check_dring_info enter\n");
90361ae08745Sheppo 
90371ae08745Sheppo 	if ((pkt->num_descriptors == 0) ||
90381ae08745Sheppo 	    (pkt->descriptor_size == 0) ||
90391ae08745Sheppo 	    (pkt->ncookies != 1)) {
90401ae08745Sheppo 		DERR(NULL, "vsw_check_dring_info: invalid dring msg");
90411ae08745Sheppo 		ret = 1;
90421ae08745Sheppo 	}
90431ae08745Sheppo 
90441ae08745Sheppo 	D1(NULL, "vsw_check_dring_info exit\n");
90451ae08745Sheppo 
90461ae08745Sheppo 	return (ret);
90471ae08745Sheppo }
90481ae08745Sheppo 
90491ae08745Sheppo /*
90501ae08745Sheppo  * Returns 1 if two memory cookies match. Otherwise returns 0.
90511ae08745Sheppo  */
90521ae08745Sheppo static int
90531ae08745Sheppo vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2)
90541ae08745Sheppo {
90551ae08745Sheppo 	if ((m1->addr != m2->addr) ||
90561ae08745Sheppo 	    (m2->size != m2->size)) {
90571ae08745Sheppo 		return (0);
90581ae08745Sheppo 	} else {
90591ae08745Sheppo 		return (1);
90601ae08745Sheppo 	}
90611ae08745Sheppo }
90621ae08745Sheppo 
90631ae08745Sheppo /*
90641ae08745Sheppo  * Returns 1 if ring described in reg message matches that
90651ae08745Sheppo  * described by dring_info structure. Otherwise returns 0.
90661ae08745Sheppo  */
90671ae08745Sheppo static int
90681ae08745Sheppo vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg)
90691ae08745Sheppo {
90701ae08745Sheppo 	if ((msg->descriptor_size != dp->descriptor_size) ||
90711ae08745Sheppo 	    (msg->num_descriptors != dp->num_descriptors) ||
90721ae08745Sheppo 	    (msg->ncookies != dp->ncookies) ||
90731ae08745Sheppo 	    !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) {
90741ae08745Sheppo 		return (0);
90751ae08745Sheppo 	} else {
90761ae08745Sheppo 		return (1);
90771ae08745Sheppo 	}
90781ae08745Sheppo 
90791ae08745Sheppo }
90801ae08745Sheppo 
90811ae08745Sheppo static caddr_t
90821ae08745Sheppo vsw_print_ethaddr(uint8_t *a, char *ebuf)
90831ae08745Sheppo {
90841ae08745Sheppo 	(void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x",
90851ae08745Sheppo 	    a[0], a[1], a[2], a[3], a[4], a[5]);
90861ae08745Sheppo 	return (ebuf);
90871ae08745Sheppo }
90881ae08745Sheppo 
90891ae08745Sheppo /*
90901ae08745Sheppo  * Reset and free all the resources associated with
90911ae08745Sheppo  * the channel.
90921ae08745Sheppo  */
90931ae08745Sheppo static void
90941ae08745Sheppo vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir)
90951ae08745Sheppo {
90961ae08745Sheppo 	dring_info_t		*dp, *dpp;
90971ae08745Sheppo 	lane_t			*lp = NULL;
90981ae08745Sheppo 	int			rv = 0;
90991ae08745Sheppo 
91001ae08745Sheppo 	ASSERT(ldcp != NULL);
91011ae08745Sheppo 
91021ae08745Sheppo 	D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id);
91031ae08745Sheppo 
91041ae08745Sheppo 	if (dir == INBOUND) {
91051ae08745Sheppo 		D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane"
91061ae08745Sheppo 		    " of channel %lld", __func__, ldcp->ldc_id);
91071ae08745Sheppo 		lp = &ldcp->lane_in;
91081ae08745Sheppo 	} else {
91091ae08745Sheppo 		D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane"
91101ae08745Sheppo 		    " of channel %lld", __func__, ldcp->ldc_id);
91111ae08745Sheppo 		lp = &ldcp->lane_out;
91121ae08745Sheppo 	}
91131ae08745Sheppo 
91141ae08745Sheppo 	lp->lstate = VSW_LANE_INACTIV;
9115d10e4ef2Snarayan 	mutex_enter(&lp->seq_lock);
91161ae08745Sheppo 	lp->seq_num = VNET_ISS;
9117d10e4ef2Snarayan 	mutex_exit(&lp->seq_lock);
91181ae08745Sheppo 	if (lp->dringp) {
91191ae08745Sheppo 		if (dir == INBOUND) {
9120445b4c2eSsb155480 			WRITE_ENTER(&lp->dlistrw);
91211ae08745Sheppo 			dp = lp->dringp;
91221ae08745Sheppo 			while (dp != NULL) {
91231ae08745Sheppo 				dpp = dp->next;
91241ae08745Sheppo 				if (dp->handle != NULL)
91251ae08745Sheppo 					(void) ldc_mem_dring_unmap(dp->handle);
91261ae08745Sheppo 				kmem_free(dp, sizeof (dring_info_t));
91271ae08745Sheppo 				dp = dpp;
91281ae08745Sheppo 			}
9129445b4c2eSsb155480 			RW_EXIT(&lp->dlistrw);
91301ae08745Sheppo 		} else {
91311ae08745Sheppo 			/*
91321ae08745Sheppo 			 * unbind, destroy exported dring, free dring struct
91331ae08745Sheppo 			 */
9134445b4c2eSsb155480 			WRITE_ENTER(&lp->dlistrw);
91351ae08745Sheppo 			dp = lp->dringp;
91361ae08745Sheppo 			rv = vsw_free_ring(dp);
9137445b4c2eSsb155480 			RW_EXIT(&lp->dlistrw);
91381ae08745Sheppo 		}
91391ae08745Sheppo 		if (rv == 0) {
91401ae08745Sheppo 			lp->dringp = NULL;
91411ae08745Sheppo 		}
91421ae08745Sheppo 	}
91431ae08745Sheppo 
91441ae08745Sheppo 	D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id);
91451ae08745Sheppo }
91461ae08745Sheppo 
91471ae08745Sheppo /*
91481ae08745Sheppo  * Free ring and all associated resources.
9149445b4c2eSsb155480  *
9150445b4c2eSsb155480  * Should be called with dlistrw rwlock held as writer.
91511ae08745Sheppo  */
91521ae08745Sheppo static int
91531ae08745Sheppo vsw_free_ring(dring_info_t *dp)
91541ae08745Sheppo {
91551ae08745Sheppo 	vsw_private_desc_t	*paddr = NULL;
91561ae08745Sheppo 	dring_info_t		*dpp;
91571ae08745Sheppo 	int			i, rv = 1;
91581ae08745Sheppo 
91591ae08745Sheppo 	while (dp != NULL) {
91601ae08745Sheppo 		mutex_enter(&dp->dlock);
91611ae08745Sheppo 		dpp = dp->next;
91621ae08745Sheppo 		if (dp->priv_addr != NULL) {
91631ae08745Sheppo 			/*
91641ae08745Sheppo 			 * First unbind and free the memory handles
91651ae08745Sheppo 			 * stored in each descriptor within the ring.
91661ae08745Sheppo 			 */
91671ae08745Sheppo 			for (i = 0; i < VSW_RING_NUM_EL; i++) {
91681ae08745Sheppo 				paddr = (vsw_private_desc_t *)
91691ae08745Sheppo 				    dp->priv_addr + i;
91701ae08745Sheppo 				if (paddr->memhandle != NULL) {
91711ae08745Sheppo 					if (paddr->bound == 1) {
91721ae08745Sheppo 						rv = ldc_mem_unbind_handle(
91731ae08745Sheppo 						    paddr->memhandle);
91741ae08745Sheppo 
91751ae08745Sheppo 						if (rv != 0) {
91761ae08745Sheppo 							DERR(NULL, "error "
91771ae08745Sheppo 							"unbinding handle for "
91781ae08745Sheppo 							"ring 0x%llx at pos %d",
91791ae08745Sheppo 							    dp, i);
91801ae08745Sheppo 							mutex_exit(&dp->dlock);
91811ae08745Sheppo 							return (rv);
91821ae08745Sheppo 						}
91831ae08745Sheppo 						paddr->bound = 0;
91841ae08745Sheppo 					}
91851ae08745Sheppo 
91861ae08745Sheppo 					rv = ldc_mem_free_handle(
91871ae08745Sheppo 					    paddr->memhandle);
91881ae08745Sheppo 					if (rv != 0) {
91891ae08745Sheppo 						DERR(NULL, "error freeing "
9190205eeb1aSlm66018 						    "handle for ring 0x%llx "
9191205eeb1aSlm66018 						    "at pos %d", dp, i);
91921ae08745Sheppo 						mutex_exit(&dp->dlock);
91931ae08745Sheppo 						return (rv);
91941ae08745Sheppo 					}
91951ae08745Sheppo 					paddr->memhandle = NULL;
91961ae08745Sheppo 				}
9197d10e4ef2Snarayan 				mutex_destroy(&paddr->dstate_lock);
91981ae08745Sheppo 			}
9199205eeb1aSlm66018 			kmem_free(dp->priv_addr,
9200205eeb1aSlm66018 			    (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
92011ae08745Sheppo 		}
92021ae08745Sheppo 
92031ae08745Sheppo 		/*
92041ae08745Sheppo 		 * Now unbind and destroy the ring itself.
92051ae08745Sheppo 		 */
92061ae08745Sheppo 		if (dp->handle != NULL) {
92071ae08745Sheppo 			(void) ldc_mem_dring_unbind(dp->handle);
92081ae08745Sheppo 			(void) ldc_mem_dring_destroy(dp->handle);
92091ae08745Sheppo 		}
92101ae08745Sheppo 
92111ae08745Sheppo 		if (dp->data_addr != NULL) {
92121ae08745Sheppo 			kmem_free(dp->data_addr, dp->data_sz);
92131ae08745Sheppo 		}
92141ae08745Sheppo 
92151ae08745Sheppo 		mutex_exit(&dp->dlock);
92161ae08745Sheppo 		mutex_destroy(&dp->dlock);
9217d10e4ef2Snarayan 		mutex_destroy(&dp->restart_lock);
92181ae08745Sheppo 		kmem_free(dp, sizeof (dring_info_t));
92191ae08745Sheppo 
92201ae08745Sheppo 		dp = dpp;
92211ae08745Sheppo 	}
92221ae08745Sheppo 	return (0);
92231ae08745Sheppo }
92241ae08745Sheppo 
92251ae08745Sheppo /*
92261ae08745Sheppo  * Debugging routines
92271ae08745Sheppo  */
92281ae08745Sheppo static void
92291ae08745Sheppo display_state(void)
92301ae08745Sheppo {
92311ae08745Sheppo 	vsw_t		*vswp;
92321ae08745Sheppo 	vsw_port_list_t	*plist;
92331ae08745Sheppo 	vsw_port_t 	*port;
92341ae08745Sheppo 	vsw_ldc_list_t	*ldcl;
92351ae08745Sheppo 	vsw_ldc_t 	*ldcp;
92361ae08745Sheppo 
92371ae08745Sheppo 	cmn_err(CE_NOTE, "***** system state *****");
92381ae08745Sheppo 
92391ae08745Sheppo 	for (vswp = vsw_head; vswp; vswp = vswp->next) {
92401ae08745Sheppo 		plist = &vswp->plist;
92411ae08745Sheppo 		READ_ENTER(&plist->lockrw);
92421ae08745Sheppo 		cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n",
92431ae08745Sheppo 		    vswp->instance, plist->num_ports);
92441ae08745Sheppo 
92451ae08745Sheppo 		for (port = plist->head; port != NULL; port = port->p_next) {
92461ae08745Sheppo 			ldcl = &port->p_ldclist;
92471ae08745Sheppo 			cmn_err(CE_CONT, "port %d : %d ldcs attached\n",
92481ae08745Sheppo 			    port->p_instance, ldcl->num_ldcs);
92491ae08745Sheppo 			READ_ENTER(&ldcl->lockrw);
92501ae08745Sheppo 			ldcp = ldcl->head;
92511ae08745Sheppo 			for (; ldcp != NULL; ldcp = ldcp->ldc_next) {
92521ae08745Sheppo 				cmn_err(CE_CONT, "chan %lu : dev %d : "
92531ae08745Sheppo 				    "status %d : phase %u\n",
92541ae08745Sheppo 				    ldcp->ldc_id, ldcp->dev_class,
92551ae08745Sheppo 				    ldcp->ldc_status, ldcp->hphase);
92561ae08745Sheppo 				cmn_err(CE_CONT, "chan %lu : lsession %lu : "
9257205eeb1aSlm66018 				    "psession %lu\n", ldcp->ldc_id,
9258205eeb1aSlm66018 				    ldcp->local_session, ldcp->peer_session);
92591ae08745Sheppo 
92601ae08745Sheppo 				cmn_err(CE_CONT, "Inbound lane:\n");
92611ae08745Sheppo 				display_lane(&ldcp->lane_in);
92621ae08745Sheppo 				cmn_err(CE_CONT, "Outbound lane:\n");
92631ae08745Sheppo 				display_lane(&ldcp->lane_out);
92641ae08745Sheppo 			}
92651ae08745Sheppo 			RW_EXIT(&ldcl->lockrw);
92661ae08745Sheppo 		}
92671ae08745Sheppo 		RW_EXIT(&plist->lockrw);
92681ae08745Sheppo 	}
92691ae08745Sheppo 	cmn_err(CE_NOTE, "***** system state *****");
92701ae08745Sheppo }
92711ae08745Sheppo 
92721ae08745Sheppo static void
92731ae08745Sheppo display_lane(lane_t *lp)
92741ae08745Sheppo {
92751ae08745Sheppo 	dring_info_t	*drp;
92761ae08745Sheppo 
92771ae08745Sheppo 	cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n",
92781ae08745Sheppo 	    lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu);
92791ae08745Sheppo 	cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n",
92801ae08745Sheppo 	    lp->addr_type, lp->addr, lp->xfer_mode);
92811ae08745Sheppo 	cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp);
92821ae08745Sheppo 
92831ae08745Sheppo 	cmn_err(CE_CONT, "Dring info:\n");
92841ae08745Sheppo 	for (drp = lp->dringp; drp != NULL; drp = drp->next) {
92851ae08745Sheppo 		cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n",
92861ae08745Sheppo 		    drp->num_descriptors, drp->descriptor_size);
92871ae08745Sheppo 		cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle);
92881ae08745Sheppo 		cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n",
92891ae08745Sheppo 		    (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr);
92901ae08745Sheppo 		cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n",
92911ae08745Sheppo 		    drp->ident, drp->end_idx);
92921ae08745Sheppo 		display_ring(drp);
92931ae08745Sheppo 	}
92941ae08745Sheppo }
92951ae08745Sheppo 
92961ae08745Sheppo static void
92971ae08745Sheppo display_ring(dring_info_t *dringp)
92981ae08745Sheppo {
92991ae08745Sheppo 	uint64_t		i;
93001ae08745Sheppo 	uint64_t		priv_count = 0;
93011ae08745Sheppo 	uint64_t		pub_count = 0;
93021ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
93031ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
93041ae08745Sheppo 
93051ae08745Sheppo 	for (i = 0; i < VSW_RING_NUM_EL; i++) {
93061ae08745Sheppo 		if (dringp->pub_addr != NULL) {
93071ae08745Sheppo 			pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i;
93081ae08745Sheppo 
93091ae08745Sheppo 			if (pub_addr->hdr.dstate == VIO_DESC_FREE)
93101ae08745Sheppo 				pub_count++;
93111ae08745Sheppo 		}
93121ae08745Sheppo 
93131ae08745Sheppo 		if (dringp->priv_addr != NULL) {
9314205eeb1aSlm66018 			priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i;
93151ae08745Sheppo 
93161ae08745Sheppo 			if (priv_addr->dstate == VIO_DESC_FREE)
93171ae08745Sheppo 				priv_count++;
93181ae08745Sheppo 		}
93191ae08745Sheppo 	}
93201ae08745Sheppo 	cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n",
93211ae08745Sheppo 	    i, priv_count, pub_count);
93221ae08745Sheppo }
93231ae08745Sheppo 
93241ae08745Sheppo static void
93251ae08745Sheppo dump_flags(uint64_t state)
93261ae08745Sheppo {
93271ae08745Sheppo 	int	i;
93281ae08745Sheppo 
93291ae08745Sheppo 	typedef struct flag_name {
93301ae08745Sheppo 		int	flag_val;
93311ae08745Sheppo 		char	*flag_name;
93321ae08745Sheppo 	} flag_name_t;
93331ae08745Sheppo 
93341ae08745Sheppo 	flag_name_t	flags[] = {
93351ae08745Sheppo 		VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT",
93361ae08745Sheppo 		VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV",
93371ae08745Sheppo 		VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV",
93381ae08745Sheppo 		VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT",
93391ae08745Sheppo 		VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV",
93401ae08745Sheppo 		VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT",
93411ae08745Sheppo 		VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT",
93421ae08745Sheppo 		VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV",
93431ae08745Sheppo 		VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT",
93441ae08745Sheppo 		VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV",
93451ae08745Sheppo 		VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT",
93461ae08745Sheppo 		VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV",
93471ae08745Sheppo 		VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT",
93481ae08745Sheppo 		VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV",
93491ae08745Sheppo 		VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT",
93501ae08745Sheppo 		VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV",
93511ae08745Sheppo 		VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT",
93521ae08745Sheppo 		VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV",
93531ae08745Sheppo 		VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT",
93541ae08745Sheppo 		VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV",
93551ae08745Sheppo 		VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT",
93561ae08745Sheppo 		VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV",
93571ae08745Sheppo 		VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT",
93581ae08745Sheppo 		VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV",
93591ae08745Sheppo 		VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT",
93601ae08745Sheppo 		VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV",
93611ae08745Sheppo 		VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT",
93621ae08745Sheppo 		VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV",
93631ae08745Sheppo 		VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT",
93641ae08745Sheppo 		VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV",
93651ae08745Sheppo 		VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"};
93661ae08745Sheppo 
93671ae08745Sheppo 	DERR(NULL, "DUMP_FLAGS: %llx\n", state);
93681ae08745Sheppo 	for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) {
93691ae08745Sheppo 		if (state & flags[i].flag_val)
93701ae08745Sheppo 			DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name);
93711ae08745Sheppo 	}
93721ae08745Sheppo }
9373