xref: /titanic_53/usr/src/uts/sun4v/io/vsw.c (revision 023505bcce788e9ac958a334707e49cddbf18d1d)
11ae08745Sheppo /*
21ae08745Sheppo  * CDDL HEADER START
31ae08745Sheppo  *
41ae08745Sheppo  * The contents of this file are subject to the terms of the
51ae08745Sheppo  * Common Development and Distribution License (the "License").
61ae08745Sheppo  * You may not use this file except in compliance with the License.
71ae08745Sheppo  *
81ae08745Sheppo  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91ae08745Sheppo  * or http://www.opensolaris.org/os/licensing.
101ae08745Sheppo  * See the License for the specific language governing permissions
111ae08745Sheppo  * and limitations under the License.
121ae08745Sheppo  *
131ae08745Sheppo  * When distributing Covered Code, include this CDDL HEADER in each
141ae08745Sheppo  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151ae08745Sheppo  * If applicable, add the following below this CDDL HEADER, with the
161ae08745Sheppo  * fields enclosed by brackets "[]" replaced with your own identifying
171ae08745Sheppo  * information: Portions Copyright [yyyy] [name of copyright owner]
181ae08745Sheppo  *
191ae08745Sheppo  * CDDL HEADER END
201ae08745Sheppo  */
211ae08745Sheppo 
221ae08745Sheppo /*
23b071742bSsg70180  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
241ae08745Sheppo  * Use is subject to license terms.
251ae08745Sheppo  */
261ae08745Sheppo 
271ae08745Sheppo #pragma ident	"%Z%%M%	%I%	%E% SMI"
281ae08745Sheppo 
291ae08745Sheppo #include <sys/types.h>
301ae08745Sheppo #include <sys/errno.h>
311ae08745Sheppo #include <sys/debug.h>
321ae08745Sheppo #include <sys/time.h>
331ae08745Sheppo #include <sys/sysmacros.h>
341ae08745Sheppo #include <sys/systm.h>
351ae08745Sheppo #include <sys/user.h>
361ae08745Sheppo #include <sys/stropts.h>
371ae08745Sheppo #include <sys/stream.h>
381ae08745Sheppo #include <sys/strlog.h>
391ae08745Sheppo #include <sys/strsubr.h>
401ae08745Sheppo #include <sys/cmn_err.h>
411ae08745Sheppo #include <sys/cpu.h>
421ae08745Sheppo #include <sys/kmem.h>
431ae08745Sheppo #include <sys/conf.h>
441ae08745Sheppo #include <sys/ddi.h>
451ae08745Sheppo #include <sys/sunddi.h>
461ae08745Sheppo #include <sys/ksynch.h>
471ae08745Sheppo #include <sys/stat.h>
481ae08745Sheppo #include <sys/kstat.h>
491ae08745Sheppo #include <sys/vtrace.h>
501ae08745Sheppo #include <sys/strsun.h>
511ae08745Sheppo #include <sys/dlpi.h>
521ae08745Sheppo #include <sys/ethernet.h>
531ae08745Sheppo #include <net/if.h>
541ae08745Sheppo #include <sys/varargs.h>
551ae08745Sheppo #include <sys/machsystm.h>
561ae08745Sheppo #include <sys/modctl.h>
571ae08745Sheppo #include <sys/modhash.h>
581ae08745Sheppo #include <sys/mac.h>
59ba2e4443Sseb #include <sys/mac_ether.h>
601ae08745Sheppo #include <sys/taskq.h>
611ae08745Sheppo #include <sys/note.h>
621ae08745Sheppo #include <sys/mach_descrip.h>
631ae08745Sheppo #include <sys/mac.h>
641ae08745Sheppo #include <sys/mdeg.h>
651ae08745Sheppo #include <sys/ldc.h>
661ae08745Sheppo #include <sys/vsw_fdb.h>
671ae08745Sheppo #include <sys/vsw.h>
681ae08745Sheppo #include <sys/vio_mailbox.h>
691ae08745Sheppo #include <sys/vnet_mailbox.h>
701ae08745Sheppo #include <sys/vnet_common.h>
71d10e4ef2Snarayan #include <sys/vio_util.h>
72d10e4ef2Snarayan #include <sys/sdt.h>
731ae08745Sheppo 
741ae08745Sheppo /*
751ae08745Sheppo  * Function prototypes.
761ae08745Sheppo  */
771ae08745Sheppo static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
781ae08745Sheppo static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
791ae08745Sheppo static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
8034683adeSsg70180 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
8134683adeSsg70180 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
82e1ebb9ecSlm66018 static	int vsw_get_physaddr(vsw_t *);
8334683adeSsg70180 static	int vsw_setup_switching(vsw_t *);
841ae08745Sheppo static	int vsw_setup_layer2(vsw_t *);
851ae08745Sheppo static	int vsw_setup_layer3(vsw_t *);
861ae08745Sheppo 
877636cb21Slm66018 /* MAC Ring table functions. */
887636cb21Slm66018 static void vsw_mac_ring_tbl_init(vsw_t *vswp);
897636cb21Slm66018 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp);
907636cb21Slm66018 static void vsw_queue_worker(vsw_mac_ring_t *rrp);
917636cb21Slm66018 static void vsw_queue_stop(vsw_queue_t *vqp);
927636cb21Slm66018 static vsw_queue_t *vsw_queue_create();
937636cb21Slm66018 static void vsw_queue_destroy(vsw_queue_t *vqp);
947636cb21Slm66018 
951ae08745Sheppo /* MAC layer routines */
967636cb21Slm66018 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg,
977636cb21Slm66018 		mac_resource_t *mrp);
98e1ebb9ecSlm66018 static	int vsw_get_hw_maddr(vsw_t *);
995f94e909Ssg70180 static	int vsw_set_hw(vsw_t *, vsw_port_t *, int);
1005f94e909Ssg70180 static	int vsw_set_hw_addr(vsw_t *, mac_multi_addr_t *);
1015f94e909Ssg70180 static	int vsw_set_hw_promisc(vsw_t *, vsw_port_t *, int);
1025f94e909Ssg70180 static	int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
1035f94e909Ssg70180 static	int vsw_unset_hw_addr(vsw_t *, int);
1045f94e909Ssg70180 static	int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *, int);
1055f94e909Ssg70180 static void vsw_reconfig_hw(vsw_t *);
1065f94e909Ssg70180 static int vsw_prog_if(vsw_t *);
1075f94e909Ssg70180 static int vsw_prog_ports(vsw_t *);
1087636cb21Slm66018 static int vsw_mac_attach(vsw_t *vswp);
1097636cb21Slm66018 static void vsw_mac_detach(vsw_t *vswp);
1107636cb21Slm66018 
1117636cb21Slm66018 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *);
1121ae08745Sheppo static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *);
1131ae08745Sheppo static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *);
1141ae08745Sheppo static int vsw_mac_register(vsw_t *);
1151ae08745Sheppo static int vsw_mac_unregister(vsw_t *);
116ba2e4443Sseb static int vsw_m_stat(void *, uint_t, uint64_t *);
1171ae08745Sheppo static void vsw_m_stop(void *arg);
1181ae08745Sheppo static int vsw_m_start(void *arg);
1191ae08745Sheppo static int vsw_m_unicst(void *arg, const uint8_t *);
1201ae08745Sheppo static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
1211ae08745Sheppo static int vsw_m_promisc(void *arg, boolean_t);
1221ae08745Sheppo static mblk_t *vsw_m_tx(void *arg, mblk_t *);
1231ae08745Sheppo 
1241ae08745Sheppo /* MDEG routines */
12534683adeSsg70180 static	int vsw_mdeg_register(vsw_t *vswp);
1261ae08745Sheppo static	void vsw_mdeg_unregister(vsw_t *vswp);
1271ae08745Sheppo static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
12834683adeSsg70180 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
12934683adeSsg70180 static	void vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
13034683adeSsg70180 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
1311ae08745Sheppo 
1321ae08745Sheppo /* Port add/deletion routines */
1331ae08745Sheppo static	int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
1341ae08745Sheppo static	int vsw_port_attach(vsw_t *vswp, int p_instance,
1351ae08745Sheppo 	uint64_t *ldcids, int nids, struct ether_addr *macaddr);
1361ae08745Sheppo static	int vsw_detach_ports(vsw_t *vswp);
1371ae08745Sheppo static	int vsw_port_detach(vsw_t *vswp, int p_instance);
1381ae08745Sheppo static	int vsw_port_delete(vsw_port_t *port);
1391ae08745Sheppo static	int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id);
1401ae08745Sheppo static	int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id);
1411ae08745Sheppo static	int vsw_init_ldcs(vsw_port_t *port);
1421ae08745Sheppo static	int vsw_uninit_ldcs(vsw_port_t *port);
1431ae08745Sheppo static	int vsw_ldc_init(vsw_ldc_t *ldcp);
1441ae08745Sheppo static	int vsw_ldc_uninit(vsw_ldc_t *ldcp);
1451ae08745Sheppo static	int vsw_drain_ldcs(vsw_port_t *port);
1461ae08745Sheppo static	int vsw_drain_port_taskq(vsw_port_t *port);
1471ae08745Sheppo static	void vsw_marker_task(void *);
1481ae08745Sheppo static	vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
1491ae08745Sheppo static	int vsw_plist_del_node(vsw_t *, vsw_port_t *port);
1501ae08745Sheppo 
1511ae08745Sheppo /* Interrupt routines */
1521ae08745Sheppo static	uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg);
1531ae08745Sheppo 
1541ae08745Sheppo /* Handshake routines */
155b071742bSsg70180 static	void vsw_ldc_reinit(vsw_ldc_t *);
156b071742bSsg70180 static	void vsw_process_conn_evt(vsw_ldc_t *, uint16_t);
157b071742bSsg70180 static	void vsw_conn_task(void *);
1581ae08745Sheppo static	int vsw_check_flag(vsw_ldc_t *, int, uint64_t);
1591ae08745Sheppo static	void vsw_next_milestone(vsw_ldc_t *);
1601ae08745Sheppo static	int vsw_supported_version(vio_ver_msg_t *);
1611ae08745Sheppo 
1621ae08745Sheppo /* Data processing routines */
1631ae08745Sheppo static void vsw_process_pkt(void *);
1641ae08745Sheppo static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t);
1651ae08745Sheppo static void vsw_process_ctrl_pkt(void *);
1661ae08745Sheppo static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *);
1671ae08745Sheppo static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *);
1681ae08745Sheppo static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *);
1691ae08745Sheppo static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *);
1701ae08745Sheppo static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *);
1711ae08745Sheppo static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *);
1721ae08745Sheppo static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t);
1731ae08745Sheppo static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *);
1741ae08745Sheppo static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *);
1751ae08745Sheppo static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *);
1761ae08745Sheppo static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t);
1771ae08745Sheppo 
1781ae08745Sheppo /* Switching/data transmit routines */
1791ae08745Sheppo static	void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
1801ae08745Sheppo 	    vsw_port_t *port, mac_resource_handle_t);
1811ae08745Sheppo static	void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
1821ae08745Sheppo 	    vsw_port_t *port, mac_resource_handle_t);
1831ae08745Sheppo static	int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller,
1841ae08745Sheppo 	    vsw_port_t *port);
1851ae08745Sheppo static	int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller,
1861ae08745Sheppo 	    vsw_port_t *port);
1871ae08745Sheppo static	int vsw_portsend(vsw_port_t *, mblk_t *);
1881ae08745Sheppo static	int vsw_dringsend(vsw_ldc_t *, mblk_t *);
1891ae08745Sheppo static	int vsw_descrsend(vsw_ldc_t *, mblk_t *);
1901ae08745Sheppo 
1911ae08745Sheppo /* Packet creation routines */
1923af08d82Slm66018 static void vsw_send_ver(void *);
1931ae08745Sheppo static void vsw_send_attr(vsw_ldc_t *);
1941ae08745Sheppo static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *);
1951ae08745Sheppo static void vsw_send_dring_info(vsw_ldc_t *);
1961ae08745Sheppo static void vsw_send_rdx(vsw_ldc_t *);
1971ae08745Sheppo 
198b071742bSsg70180 static int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t);
1991ae08745Sheppo 
2001ae08745Sheppo /* Forwarding database (FDB) routines */
2011ae08745Sheppo static	int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port);
2021ae08745Sheppo static	int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port);
2031ae08745Sheppo static	vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *);
2041ae08745Sheppo static	int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
2051ae08745Sheppo static	int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
2061ae08745Sheppo static	int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
2071ae08745Sheppo static	void vsw_del_addr(uint8_t, void *, uint64_t);
2081ae08745Sheppo static	void vsw_del_mcst_port(vsw_port_t *);
2091ae08745Sheppo static	void vsw_del_mcst_vsw(vsw_t *);
2101ae08745Sheppo 
2111ae08745Sheppo /* Dring routines */
2121ae08745Sheppo static dring_info_t *vsw_create_dring(vsw_ldc_t *);
2131ae08745Sheppo static void vsw_create_privring(vsw_ldc_t *);
2141ae08745Sheppo static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp);
2151ae08745Sheppo static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **,
2161ae08745Sheppo     int *);
2171ae08745Sheppo static dring_info_t *vsw_ident2dring(lane_t *, uint64_t);
2181ae08745Sheppo 
2191ae08745Sheppo static void vsw_set_lane_attr(vsw_t *, lane_t *);
2201ae08745Sheppo static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *);
2211ae08745Sheppo static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg);
2221ae08745Sheppo static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *);
2231ae08745Sheppo static int vsw_check_dring_info(vio_dring_reg_msg_t *);
2241ae08745Sheppo 
2251ae08745Sheppo /* Misc support routines */
2261ae08745Sheppo static	caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf);
2271ae08745Sheppo static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t);
2281ae08745Sheppo static int vsw_free_ring(dring_info_t *);
2291ae08745Sheppo 
2301ae08745Sheppo /* Debugging routines */
2311ae08745Sheppo static void dump_flags(uint64_t);
2321ae08745Sheppo static void display_state(void);
2331ae08745Sheppo static void display_lane(lane_t *);
2341ae08745Sheppo static void display_ring(dring_info_t *);
2351ae08745Sheppo 
236445b4c2eSsb155480 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
2371ae08745Sheppo int	vsw_wretries = 100;		/* # of write attempts */
238d10e4ef2Snarayan int	vsw_chain_len = 150;		/* max # of mblks in msg chain */
239d10e4ef2Snarayan int	vsw_desc_delay = 0;		/* delay in us */
240d10e4ef2Snarayan int	vsw_read_attempts = 5;		/* # of reads of descriptor */
241d10e4ef2Snarayan 
242d10e4ef2Snarayan uint32_t	vsw_mblk_size = VSW_MBLK_SIZE;
243d10e4ef2Snarayan uint32_t	vsw_num_mblks = VSW_NUM_MBLKS;
244d10e4ef2Snarayan 
245ba2e4443Sseb static	mac_callbacks_t	vsw_m_callbacks = {
246ba2e4443Sseb 	0,
247ba2e4443Sseb 	vsw_m_stat,
248ba2e4443Sseb 	vsw_m_start,
249ba2e4443Sseb 	vsw_m_stop,
250ba2e4443Sseb 	vsw_m_promisc,
251ba2e4443Sseb 	vsw_m_multicst,
252ba2e4443Sseb 	vsw_m_unicst,
253ba2e4443Sseb 	vsw_m_tx,
254ba2e4443Sseb 	NULL,
255ba2e4443Sseb 	NULL,
256ba2e4443Sseb 	NULL
257ba2e4443Sseb };
258ba2e4443Sseb 
2591ae08745Sheppo static	struct	cb_ops	vsw_cb_ops = {
2601ae08745Sheppo 	nulldev,			/* cb_open */
2611ae08745Sheppo 	nulldev,			/* cb_close */
2621ae08745Sheppo 	nodev,				/* cb_strategy */
2631ae08745Sheppo 	nodev,				/* cb_print */
2641ae08745Sheppo 	nodev,				/* cb_dump */
2651ae08745Sheppo 	nodev,				/* cb_read */
2661ae08745Sheppo 	nodev,				/* cb_write */
2671ae08745Sheppo 	nodev,				/* cb_ioctl */
2681ae08745Sheppo 	nodev,				/* cb_devmap */
2691ae08745Sheppo 	nodev,				/* cb_mmap */
2701ae08745Sheppo 	nodev,				/* cb_segmap */
2711ae08745Sheppo 	nochpoll,			/* cb_chpoll */
2721ae08745Sheppo 	ddi_prop_op,			/* cb_prop_op */
2731ae08745Sheppo 	NULL,				/* cb_stream */
2741ae08745Sheppo 	D_MP,				/* cb_flag */
2751ae08745Sheppo 	CB_REV,				/* rev */
2761ae08745Sheppo 	nodev,				/* int (*cb_aread)() */
2771ae08745Sheppo 	nodev				/* int (*cb_awrite)() */
2781ae08745Sheppo };
2791ae08745Sheppo 
2801ae08745Sheppo static	struct	dev_ops	vsw_ops = {
2811ae08745Sheppo 	DEVO_REV,		/* devo_rev */
2821ae08745Sheppo 	0,			/* devo_refcnt */
2831ae08745Sheppo 	vsw_getinfo,		/* devo_getinfo */
2841ae08745Sheppo 	nulldev,		/* devo_identify */
2851ae08745Sheppo 	nulldev,		/* devo_probe */
2861ae08745Sheppo 	vsw_attach,		/* devo_attach */
2871ae08745Sheppo 	vsw_detach,		/* devo_detach */
2881ae08745Sheppo 	nodev,			/* devo_reset */
2891ae08745Sheppo 	&vsw_cb_ops,		/* devo_cb_ops */
2901ae08745Sheppo 	(struct bus_ops *)NULL,	/* devo_bus_ops */
2911ae08745Sheppo 	ddi_power		/* devo_power */
2921ae08745Sheppo };
2931ae08745Sheppo 
2941ae08745Sheppo extern	struct	mod_ops	mod_driverops;
2951ae08745Sheppo static struct modldrv vswmodldrv = {
2961ae08745Sheppo 	&mod_driverops,
297b071742bSsg70180 	"sun4v Virtual Switch %I%",
2981ae08745Sheppo 	&vsw_ops,
2991ae08745Sheppo };
3001ae08745Sheppo 
3011ae08745Sheppo #define	LDC_ENTER_LOCK(ldcp)	\
3021ae08745Sheppo 				mutex_enter(&((ldcp)->ldc_cblock));\
3031ae08745Sheppo 				mutex_enter(&((ldcp)->ldc_txlock));
3041ae08745Sheppo #define	LDC_EXIT_LOCK(ldcp)	\
3051ae08745Sheppo 				mutex_exit(&((ldcp)->ldc_txlock));\
3061ae08745Sheppo 				mutex_exit(&((ldcp)->ldc_cblock));
3071ae08745Sheppo 
3081ae08745Sheppo /* Driver soft state ptr  */
3091ae08745Sheppo static void	*vsw_state;
3101ae08745Sheppo 
3111ae08745Sheppo /*
3121ae08745Sheppo  * Linked list of "vsw_t" structures - one per instance.
3131ae08745Sheppo  */
3141ae08745Sheppo vsw_t		*vsw_head = NULL;
3151ae08745Sheppo krwlock_t	vsw_rw;
3161ae08745Sheppo 
3171ae08745Sheppo /*
3181ae08745Sheppo  * Property names
3191ae08745Sheppo  */
3201ae08745Sheppo static char vdev_propname[] = "virtual-device";
3211ae08745Sheppo static char vsw_propname[] = "virtual-network-switch";
3221ae08745Sheppo static char physdev_propname[] = "vsw-phys-dev";
3231ae08745Sheppo static char smode_propname[] = "vsw-switch-mode";
3241ae08745Sheppo static char macaddr_propname[] = "local-mac-address";
3251ae08745Sheppo static char remaddr_propname[] = "remote-mac-address";
3261ae08745Sheppo static char ldcids_propname[] = "ldc-ids";
3271ae08745Sheppo static char chan_propname[] = "channel-endpoint";
3281ae08745Sheppo static char id_propname[] = "id";
3291ae08745Sheppo static char reg_propname[] = "reg";
3301ae08745Sheppo 
3311ae08745Sheppo /* supported versions */
3321ae08745Sheppo static	ver_sup_t	vsw_versions[] = { {1, 0} };
3331ae08745Sheppo 
3341ae08745Sheppo /*
3351ae08745Sheppo  * Matching criteria passed to the MDEG to register interest
3361ae08745Sheppo  * in changes to 'virtual-device-port' nodes identified by their
3371ae08745Sheppo  * 'id' property.
3381ae08745Sheppo  */
3391ae08745Sheppo static md_prop_match_t vport_prop_match[] = {
3401ae08745Sheppo 	{ MDET_PROP_VAL,    "id"   },
3411ae08745Sheppo 	{ MDET_LIST_END,    NULL    }
3421ae08745Sheppo };
3431ae08745Sheppo 
3441ae08745Sheppo static mdeg_node_match_t vport_match = { "virtual-device-port",
3451ae08745Sheppo 						vport_prop_match };
3461ae08745Sheppo 
3471ae08745Sheppo /*
34834683adeSsg70180  * Matching criteria passed to the MDEG to register interest
34934683adeSsg70180  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
35034683adeSsg70180  * by their 'name' and 'cfg-handle' properties.
35134683adeSsg70180  */
35234683adeSsg70180 static md_prop_match_t vdev_prop_match[] = {
35334683adeSsg70180 	{ MDET_PROP_STR,    "name"   },
35434683adeSsg70180 	{ MDET_PROP_VAL,    "cfg-handle" },
35534683adeSsg70180 	{ MDET_LIST_END,    NULL    }
35634683adeSsg70180 };
35734683adeSsg70180 
35834683adeSsg70180 static mdeg_node_match_t vdev_match = { "virtual-device",
35934683adeSsg70180 						vdev_prop_match };
36034683adeSsg70180 
36134683adeSsg70180 
36234683adeSsg70180 /*
3631ae08745Sheppo  * Specification of an MD node passed to the MDEG to filter any
3641ae08745Sheppo  * 'vport' nodes that do not belong to the specified node. This
3651ae08745Sheppo  * template is copied for each vsw instance and filled in with
3661ae08745Sheppo  * the appropriate 'cfg-handle' value before being passed to the MDEG.
3671ae08745Sheppo  */
3681ae08745Sheppo static mdeg_prop_spec_t vsw_prop_template[] = {
3691ae08745Sheppo 	{ MDET_PROP_STR,    "name",		vsw_propname },
3701ae08745Sheppo 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
3711ae08745Sheppo 	{ MDET_LIST_END,    NULL,		NULL	}
3721ae08745Sheppo };
3731ae08745Sheppo 
3741ae08745Sheppo #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
3751ae08745Sheppo 
3761ae08745Sheppo /*
3777636cb21Slm66018  * From /etc/system enable/disable thread per ring. This is a mode
3787636cb21Slm66018  * selection that is done a vsw driver attach time.
3797636cb21Slm66018  */
3807636cb21Slm66018 boolean_t vsw_multi_ring_enable = B_FALSE;
3817636cb21Slm66018 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
3827636cb21Slm66018 
3837636cb21Slm66018 /*
3841ae08745Sheppo  * Print debug messages - set to 0x1f to enable all msgs
3851ae08745Sheppo  * or 0x0 to turn all off.
3861ae08745Sheppo  */
3871ae08745Sheppo int vswdbg = 0x0;
3881ae08745Sheppo 
3891ae08745Sheppo /*
3901ae08745Sheppo  * debug levels:
3911ae08745Sheppo  * 0x01:	Function entry/exit tracing
3921ae08745Sheppo  * 0x02:	Internal function messages
3931ae08745Sheppo  * 0x04:	Verbose internal messages
3941ae08745Sheppo  * 0x08:	Warning messages
3951ae08745Sheppo  * 0x10:	Error messages
3961ae08745Sheppo  */
3971ae08745Sheppo 
3981ae08745Sheppo static void
3991ae08745Sheppo vswdebug(vsw_t *vswp, const char *fmt, ...)
4001ae08745Sheppo {
4011ae08745Sheppo 	char buf[512];
4021ae08745Sheppo 	va_list ap;
4031ae08745Sheppo 
4041ae08745Sheppo 	va_start(ap, fmt);
4051ae08745Sheppo 	(void) vsprintf(buf, fmt, ap);
4061ae08745Sheppo 	va_end(ap);
4071ae08745Sheppo 
4081ae08745Sheppo 	if (vswp == NULL)
4091ae08745Sheppo 		cmn_err(CE_CONT, "%s\n", buf);
4101ae08745Sheppo 	else
4111ae08745Sheppo 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
4121ae08745Sheppo }
4131ae08745Sheppo 
4141ae08745Sheppo /*
4151ae08745Sheppo  * For the moment the state dump routines have their own
4161ae08745Sheppo  * private flag.
4171ae08745Sheppo  */
4181ae08745Sheppo #define	DUMP_STATE	0
4191ae08745Sheppo 
4201ae08745Sheppo #if DUMP_STATE
4211ae08745Sheppo 
4221ae08745Sheppo #define	DUMP_TAG(tag) \
4231ae08745Sheppo {			\
4241ae08745Sheppo 	D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \
4251ae08745Sheppo 	D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype);	\
4261ae08745Sheppo 	D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env);	\
4271ae08745Sheppo }
4281ae08745Sheppo 
4291ae08745Sheppo #define	DUMP_TAG_PTR(tag) \
4301ae08745Sheppo {			\
4311ae08745Sheppo 	D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \
4321ae08745Sheppo 	D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype);	\
4331ae08745Sheppo 	D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env);	\
4341ae08745Sheppo }
4351ae08745Sheppo 
4361ae08745Sheppo #define	DUMP_FLAGS(flags) dump_flags(flags);
4371ae08745Sheppo #define	DISPLAY_STATE()	display_state()
4381ae08745Sheppo 
4391ae08745Sheppo #else
4401ae08745Sheppo 
4411ae08745Sheppo #define	DUMP_TAG(tag)
4421ae08745Sheppo #define	DUMP_TAG_PTR(tag)
4431ae08745Sheppo #define	DUMP_FLAGS(state)
4441ae08745Sheppo #define	DISPLAY_STATE()
4451ae08745Sheppo 
4461ae08745Sheppo #endif	/* DUMP_STATE */
4471ae08745Sheppo 
4481ae08745Sheppo #ifdef DEBUG
4491ae08745Sheppo 
4501ae08745Sheppo #define	D1		\
4511ae08745Sheppo if (vswdbg & 0x01)	\
4521ae08745Sheppo 	vswdebug
4531ae08745Sheppo 
4541ae08745Sheppo #define	D2		\
4551ae08745Sheppo if (vswdbg & 0x02)	\
4561ae08745Sheppo 	vswdebug
4571ae08745Sheppo 
4581ae08745Sheppo #define	D3		\
4591ae08745Sheppo if (vswdbg & 0x04)	\
4601ae08745Sheppo 	vswdebug
4611ae08745Sheppo 
4621ae08745Sheppo #define	DWARN		\
4631ae08745Sheppo if (vswdbg & 0x08)	\
4641ae08745Sheppo 	vswdebug
4651ae08745Sheppo 
4661ae08745Sheppo #define	DERR		\
4671ae08745Sheppo if (vswdbg & 0x10)	\
4681ae08745Sheppo 	vswdebug
4691ae08745Sheppo 
4701ae08745Sheppo #else
4711ae08745Sheppo 
4721ae08745Sheppo #define	DERR		if (0)	vswdebug
4731ae08745Sheppo #define	DWARN		if (0)	vswdebug
4741ae08745Sheppo #define	D1		if (0)	vswdebug
4751ae08745Sheppo #define	D2		if (0)	vswdebug
4761ae08745Sheppo #define	D3		if (0)	vswdebug
4771ae08745Sheppo 
4781ae08745Sheppo #endif	/* DEBUG */
4791ae08745Sheppo 
4801ae08745Sheppo static struct modlinkage modlinkage = {
4811ae08745Sheppo 	MODREV_1,
4821ae08745Sheppo 	&vswmodldrv,
4831ae08745Sheppo 	NULL
4841ae08745Sheppo };
4851ae08745Sheppo 
4861ae08745Sheppo int
4871ae08745Sheppo _init(void)
4881ae08745Sheppo {
4891ae08745Sheppo 	int status;
4901ae08745Sheppo 
4911ae08745Sheppo 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
4921ae08745Sheppo 
4931ae08745Sheppo 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
4941ae08745Sheppo 	if (status != 0) {
4951ae08745Sheppo 		return (status);
4961ae08745Sheppo 	}
4971ae08745Sheppo 
4981ae08745Sheppo 	mac_init_ops(&vsw_ops, "vsw");
4991ae08745Sheppo 	status = mod_install(&modlinkage);
5001ae08745Sheppo 	if (status != 0) {
5011ae08745Sheppo 		ddi_soft_state_fini(&vsw_state);
5021ae08745Sheppo 	}
5031ae08745Sheppo 	return (status);
5041ae08745Sheppo }
5051ae08745Sheppo 
5061ae08745Sheppo int
5071ae08745Sheppo _fini(void)
5081ae08745Sheppo {
5091ae08745Sheppo 	int status;
5101ae08745Sheppo 
5111ae08745Sheppo 	status = mod_remove(&modlinkage);
5121ae08745Sheppo 	if (status != 0)
5131ae08745Sheppo 		return (status);
5141ae08745Sheppo 	mac_fini_ops(&vsw_ops);
5151ae08745Sheppo 	ddi_soft_state_fini(&vsw_state);
5161ae08745Sheppo 
5171ae08745Sheppo 	rw_destroy(&vsw_rw);
5181ae08745Sheppo 
5191ae08745Sheppo 	return (status);
5201ae08745Sheppo }
5211ae08745Sheppo 
5221ae08745Sheppo int
5231ae08745Sheppo _info(struct modinfo *modinfop)
5241ae08745Sheppo {
5251ae08745Sheppo 	return (mod_info(&modlinkage, modinfop));
5261ae08745Sheppo }
5271ae08745Sheppo 
5281ae08745Sheppo static int
5291ae08745Sheppo vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5301ae08745Sheppo {
5311ae08745Sheppo 	vsw_t		*vswp;
53234683adeSsg70180 	int		instance;
5331ae08745Sheppo 	char		hashname[MAXNAMELEN];
5341ae08745Sheppo 	char		qname[TASKQ_NAMELEN];
5357636cb21Slm66018 	enum		{ PROG_init = 0x00,
5367636cb21Slm66018 				PROG_if_lock = 0x01,
5377636cb21Slm66018 				PROG_fdb = 0x02,
5387636cb21Slm66018 				PROG_mfdb = 0x04,
5397636cb21Slm66018 				PROG_report_dev = 0x08,
5407636cb21Slm66018 				PROG_plist = 0x10,
5411ae08745Sheppo 				PROG_taskq = 0x20}
5421ae08745Sheppo 			progress;
5431ae08745Sheppo 
5441ae08745Sheppo 	progress = PROG_init;
5451ae08745Sheppo 
5461ae08745Sheppo 	switch (cmd) {
5471ae08745Sheppo 	case DDI_ATTACH:
5481ae08745Sheppo 		break;
5491ae08745Sheppo 	case DDI_RESUME:
5501ae08745Sheppo 		/* nothing to do for this non-device */
5511ae08745Sheppo 		return (DDI_SUCCESS);
5521ae08745Sheppo 	case DDI_PM_RESUME:
5531ae08745Sheppo 	default:
5541ae08745Sheppo 		return (DDI_FAILURE);
5551ae08745Sheppo 	}
5561ae08745Sheppo 
5571ae08745Sheppo 	instance = ddi_get_instance(dip);
5581ae08745Sheppo 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
5591ae08745Sheppo 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
5601ae08745Sheppo 		return (DDI_FAILURE);
5611ae08745Sheppo 	}
5621ae08745Sheppo 	vswp = ddi_get_soft_state(vsw_state, instance);
5631ae08745Sheppo 
5641ae08745Sheppo 	if (vswp == NULL) {
5651ae08745Sheppo 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
5661ae08745Sheppo 		goto vsw_attach_fail;
5671ae08745Sheppo 	}
5681ae08745Sheppo 
5691ae08745Sheppo 	vswp->dip = dip;
5701ae08745Sheppo 	vswp->instance = instance;
5711ae08745Sheppo 	ddi_set_driver_private(dip, (caddr_t)vswp);
5721ae08745Sheppo 
5735f94e909Ssg70180 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
57434683adeSsg70180 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
5751ae08745Sheppo 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
5761ae08745Sheppo 	progress |= PROG_if_lock;
5771ae08745Sheppo 
5781ae08745Sheppo 	/* setup the unicast forwarding database  */
5791ae08745Sheppo 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
5801ae08745Sheppo 							vswp->instance);
5811ae08745Sheppo 	D2(vswp, "creating unicast hash table (%s)...", hashname);
5821ae08745Sheppo 	vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
5831ae08745Sheppo 		mod_hash_null_valdtor, sizeof (void *));
5841ae08745Sheppo 
5851ae08745Sheppo 	progress |= PROG_fdb;
5861ae08745Sheppo 
5871ae08745Sheppo 	/* setup the multicast fowarding database */
5881ae08745Sheppo 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
5891ae08745Sheppo 							vswp->instance);
5901ae08745Sheppo 	D2(vswp, "creating multicast hash table %s)...", hashname);
5911ae08745Sheppo 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
5921ae08745Sheppo 	vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
5931ae08745Sheppo 			mod_hash_null_valdtor, sizeof (void *));
5941ae08745Sheppo 
5951ae08745Sheppo 	progress |= PROG_mfdb;
5961ae08745Sheppo 
5971ae08745Sheppo 	/*
5981ae08745Sheppo 	 * create lock protecting list of multicast addresses
5991ae08745Sheppo 	 * which could come via m_multicst() entry point when plumbed.
6001ae08745Sheppo 	 */
6011ae08745Sheppo 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
6021ae08745Sheppo 	vswp->mcap = NULL;
6031ae08745Sheppo 
6041ae08745Sheppo 	ddi_report_dev(vswp->dip);
6051ae08745Sheppo 
6061ae08745Sheppo 	progress |= PROG_report_dev;
6071ae08745Sheppo 
6081ae08745Sheppo 	WRITE_ENTER(&vsw_rw);
6091ae08745Sheppo 	vswp->next = vsw_head;
6101ae08745Sheppo 	vsw_head = vswp;
6111ae08745Sheppo 	RW_EXIT(&vsw_rw);
6121ae08745Sheppo 
6131ae08745Sheppo 	/* setup the port list */
6141ae08745Sheppo 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
6151ae08745Sheppo 	vswp->plist.head = NULL;
6161ae08745Sheppo 
6171ae08745Sheppo 	progress |= PROG_plist;
6181ae08745Sheppo 
6191ae08745Sheppo 	/*
6201ae08745Sheppo 	 * Create the taskq which will process all the VIO
6211ae08745Sheppo 	 * control messages.
6221ae08745Sheppo 	 */
6231ae08745Sheppo 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
6241ae08745Sheppo 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
6251ae08745Sheppo 					TASKQ_DEFAULTPRI, 0)) == NULL) {
62634683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
62734683adeSsg70180 			vswp->instance);
6281ae08745Sheppo 		goto vsw_attach_fail;
6291ae08745Sheppo 	}
6301ae08745Sheppo 
6311ae08745Sheppo 	progress |= PROG_taskq;
6321ae08745Sheppo 
633d10e4ef2Snarayan 	/* prevent auto-detaching */
634d10e4ef2Snarayan 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
635d10e4ef2Snarayan 				DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
63634683adeSsg70180 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
637d10e4ef2Snarayan 			"instance %u", DDI_NO_AUTODETACH, instance);
638d10e4ef2Snarayan 	}
639d10e4ef2Snarayan 
6401ae08745Sheppo 	/*
64134683adeSsg70180 	 * Now we have everything setup, register an interest in
64234683adeSsg70180 	 * specific MD nodes.
64334683adeSsg70180 	 *
64434683adeSsg70180 	 * The callback is invoked in 2 cases, firstly if upon mdeg
64534683adeSsg70180 	 * registration there are existing nodes which match our specified
64634683adeSsg70180 	 * criteria, and secondly if the MD is changed (and again, there
64734683adeSsg70180 	 * are nodes which we are interested in present within it. Note
64834683adeSsg70180 	 * that our callback will be invoked even if our specified nodes
64934683adeSsg70180 	 * have not actually changed).
65034683adeSsg70180 	 *
65134683adeSsg70180 	 * Until the callback is invoked we cannot switch any pkts as
65234683adeSsg70180 	 * we don't know basic information such as what mode we are
65334683adeSsg70180 	 * operating in. However we expect the callback to be invoked
65434683adeSsg70180 	 * immediately upon registration as this driver should only
65534683adeSsg70180 	 * be attaching if there are vsw nodes in the MD.
6561ae08745Sheppo 	 */
65734683adeSsg70180 	if (vsw_mdeg_register(vswp))
65834683adeSsg70180 		goto vsw_attach_fail;
6591ae08745Sheppo 
6601ae08745Sheppo 	return (DDI_SUCCESS);
6611ae08745Sheppo 
6621ae08745Sheppo vsw_attach_fail:
6631ae08745Sheppo 	DERR(NULL, "vsw_attach: failed");
6641ae08745Sheppo 
6651ae08745Sheppo 	if (progress & PROG_taskq)
6661ae08745Sheppo 		ddi_taskq_destroy(vswp->taskq_p);
6671ae08745Sheppo 
6681ae08745Sheppo 	if (progress & PROG_plist)
6691ae08745Sheppo 		rw_destroy(&vswp->plist.lockrw);
6701ae08745Sheppo 
6711ae08745Sheppo 	if (progress & PROG_report_dev) {
6721ae08745Sheppo 		ddi_remove_minor_node(dip, NULL);
6731ae08745Sheppo 		mutex_destroy(&vswp->mca_lock);
6741ae08745Sheppo 	}
6751ae08745Sheppo 
6761ae08745Sheppo 	if (progress & PROG_mfdb) {
6771ae08745Sheppo 		mod_hash_destroy_hash(vswp->mfdb);
6781ae08745Sheppo 		vswp->mfdb = NULL;
6791ae08745Sheppo 		rw_destroy(&vswp->mfdbrw);
6801ae08745Sheppo 	}
6811ae08745Sheppo 
6821ae08745Sheppo 	if (progress & PROG_fdb) {
6831ae08745Sheppo 		mod_hash_destroy_hash(vswp->fdb);
6841ae08745Sheppo 		vswp->fdb = NULL;
6851ae08745Sheppo 	}
6861ae08745Sheppo 
68734683adeSsg70180 	if (progress & PROG_if_lock) {
6881ae08745Sheppo 		rw_destroy(&vswp->if_lockrw);
68934683adeSsg70180 		mutex_destroy(&vswp->mac_lock);
6905f94e909Ssg70180 		mutex_destroy(&vswp->hw_lock);
69134683adeSsg70180 	}
6921ae08745Sheppo 
6931ae08745Sheppo 	ddi_soft_state_free(vsw_state, instance);
6941ae08745Sheppo 	return (DDI_FAILURE);
6951ae08745Sheppo }
6961ae08745Sheppo 
6971ae08745Sheppo static int
6981ae08745Sheppo vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6991ae08745Sheppo {
700d10e4ef2Snarayan 	vio_mblk_pool_t		*poolp, *npoolp;
7011ae08745Sheppo 	vsw_t			**vswpp, *vswp;
7021ae08745Sheppo 	int 			instance;
7031ae08745Sheppo 
7041ae08745Sheppo 	instance = ddi_get_instance(dip);
7051ae08745Sheppo 	vswp = ddi_get_soft_state(vsw_state, instance);
7061ae08745Sheppo 
7071ae08745Sheppo 	if (vswp == NULL) {
7081ae08745Sheppo 		return (DDI_FAILURE);
7091ae08745Sheppo 	}
7101ae08745Sheppo 
7111ae08745Sheppo 	switch (cmd) {
7121ae08745Sheppo 	case DDI_DETACH:
7131ae08745Sheppo 		break;
7141ae08745Sheppo 	case DDI_SUSPEND:
7151ae08745Sheppo 	case DDI_PM_SUSPEND:
7161ae08745Sheppo 	default:
7171ae08745Sheppo 		return (DDI_FAILURE);
7181ae08745Sheppo 	}
7191ae08745Sheppo 
7201ae08745Sheppo 	D2(vswp, "detaching instance %d", instance);
7211ae08745Sheppo 
72234683adeSsg70180 	if (vswp->if_state & VSW_IF_REG) {
7231ae08745Sheppo 		if (vsw_mac_unregister(vswp) != 0) {
72434683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
72534683adeSsg70180 				"MAC layer", vswp->instance);
7261ae08745Sheppo 			return (DDI_FAILURE);
7271ae08745Sheppo 		}
728d10e4ef2Snarayan 	}
7291ae08745Sheppo 
7301ae08745Sheppo 	vsw_mdeg_unregister(vswp);
7311ae08745Sheppo 
732e1ebb9ecSlm66018 	/* remove mac layer callback */
73334683adeSsg70180 	mutex_enter(&vswp->mac_lock);
734e1ebb9ecSlm66018 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
735e1ebb9ecSlm66018 		mac_rx_remove(vswp->mh, vswp->mrh);
736e1ebb9ecSlm66018 		vswp->mrh = NULL;
7371ae08745Sheppo 	}
73834683adeSsg70180 	mutex_exit(&vswp->mac_lock);
7391ae08745Sheppo 
7401ae08745Sheppo 	if (vsw_detach_ports(vswp) != 0) {
74134683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to detach ports",
74234683adeSsg70180 							vswp->instance);
7431ae08745Sheppo 		return (DDI_FAILURE);
7441ae08745Sheppo 	}
7451ae08745Sheppo 
74634683adeSsg70180 	rw_destroy(&vswp->if_lockrw);
74734683adeSsg70180 
7485f94e909Ssg70180 	mutex_destroy(&vswp->hw_lock);
7495f94e909Ssg70180 
7501ae08745Sheppo 	/*
751e1ebb9ecSlm66018 	 * Now that the ports have been deleted, stop and close
752e1ebb9ecSlm66018 	 * the physical device.
753e1ebb9ecSlm66018 	 */
75434683adeSsg70180 	mutex_enter(&vswp->mac_lock);
755e1ebb9ecSlm66018 	if (vswp->mh != NULL) {
7567636cb21Slm66018 		if (vswp->mstarted)
757e1ebb9ecSlm66018 			mac_stop(vswp->mh);
7587636cb21Slm66018 		if (vswp->mresources)
7597636cb21Slm66018 			mac_resource_set(vswp->mh, NULL, NULL);
760e1ebb9ecSlm66018 		mac_close(vswp->mh);
761e1ebb9ecSlm66018 
762e1ebb9ecSlm66018 		vswp->mh = NULL;
763e1ebb9ecSlm66018 		vswp->txinfo = NULL;
764e1ebb9ecSlm66018 	}
76534683adeSsg70180 	mutex_exit(&vswp->mac_lock);
76634683adeSsg70180 	mutex_destroy(&vswp->mac_lock);
767e1ebb9ecSlm66018 
768e1ebb9ecSlm66018 	/*
769d10e4ef2Snarayan 	 * Destroy any free pools that may still exist.
770d10e4ef2Snarayan 	 */
771d10e4ef2Snarayan 	poolp = vswp->rxh;
772d10e4ef2Snarayan 	while (poolp != NULL) {
773d10e4ef2Snarayan 		npoolp = vswp->rxh = poolp->nextp;
774d10e4ef2Snarayan 		if (vio_destroy_mblks(poolp) != 0) {
775d10e4ef2Snarayan 			vswp->rxh = poolp;
776d10e4ef2Snarayan 			return (DDI_FAILURE);
777d10e4ef2Snarayan 		}
778d10e4ef2Snarayan 		poolp = npoolp;
779d10e4ef2Snarayan 	}
780d10e4ef2Snarayan 
781d10e4ef2Snarayan 	/*
7821ae08745Sheppo 	 * Remove this instance from any entries it may be on in
7831ae08745Sheppo 	 * the hash table by using the list of addresses maintained
7841ae08745Sheppo 	 * in the vsw_t structure.
7851ae08745Sheppo 	 */
7861ae08745Sheppo 	vsw_del_mcst_vsw(vswp);
7871ae08745Sheppo 
7881ae08745Sheppo 	vswp->mcap = NULL;
7891ae08745Sheppo 	mutex_destroy(&vswp->mca_lock);
7901ae08745Sheppo 
7911ae08745Sheppo 	/*
7921ae08745Sheppo 	 * By now any pending tasks have finished and the underlying
7931ae08745Sheppo 	 * ldc's have been destroyed, so its safe to delete the control
7941ae08745Sheppo 	 * message taskq.
7951ae08745Sheppo 	 */
7961ae08745Sheppo 	if (vswp->taskq_p != NULL)
7971ae08745Sheppo 		ddi_taskq_destroy(vswp->taskq_p);
7981ae08745Sheppo 
7991ae08745Sheppo 	/*
8001ae08745Sheppo 	 * At this stage all the data pointers in the hash table
8011ae08745Sheppo 	 * should be NULL, as all the ports have been removed and will
8021ae08745Sheppo 	 * have deleted themselves from the port lists which the data
8031ae08745Sheppo 	 * pointers point to. Hence we can destroy the table using the
8041ae08745Sheppo 	 * default destructors.
8051ae08745Sheppo 	 */
8061ae08745Sheppo 	D2(vswp, "vsw_detach: destroying hash tables..");
8071ae08745Sheppo 	mod_hash_destroy_hash(vswp->fdb);
8081ae08745Sheppo 	vswp->fdb = NULL;
8091ae08745Sheppo 
8101ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
8111ae08745Sheppo 	mod_hash_destroy_hash(vswp->mfdb);
8121ae08745Sheppo 	vswp->mfdb = NULL;
8131ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
8141ae08745Sheppo 	rw_destroy(&vswp->mfdbrw);
8151ae08745Sheppo 
8161ae08745Sheppo 	ddi_remove_minor_node(dip, NULL);
8171ae08745Sheppo 
8181ae08745Sheppo 	rw_destroy(&vswp->plist.lockrw);
8191ae08745Sheppo 	WRITE_ENTER(&vsw_rw);
8201ae08745Sheppo 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
8211ae08745Sheppo 		if (*vswpp == vswp) {
8221ae08745Sheppo 			*vswpp = vswp->next;
8231ae08745Sheppo 			break;
8241ae08745Sheppo 		}
8251ae08745Sheppo 	}
8261ae08745Sheppo 	RW_EXIT(&vsw_rw);
8271ae08745Sheppo 	ddi_soft_state_free(vsw_state, instance);
8281ae08745Sheppo 
8291ae08745Sheppo 	return (DDI_SUCCESS);
8301ae08745Sheppo }
8311ae08745Sheppo 
8321ae08745Sheppo static int
8331ae08745Sheppo vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
8341ae08745Sheppo {
8351ae08745Sheppo 	_NOTE(ARGUNUSED(dip))
8361ae08745Sheppo 
8371ae08745Sheppo 	vsw_t	*vswp = NULL;
8381ae08745Sheppo 	dev_t	dev = (dev_t)arg;
8391ae08745Sheppo 	int	instance;
8401ae08745Sheppo 
8411ae08745Sheppo 	instance = getminor(dev);
8421ae08745Sheppo 
8431ae08745Sheppo 	switch (infocmd) {
8441ae08745Sheppo 	case DDI_INFO_DEVT2DEVINFO:
8451ae08745Sheppo 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
8461ae08745Sheppo 			*result = NULL;
8471ae08745Sheppo 			return (DDI_FAILURE);
8481ae08745Sheppo 		}
8491ae08745Sheppo 		*result = vswp->dip;
8501ae08745Sheppo 		return (DDI_SUCCESS);
8511ae08745Sheppo 
8521ae08745Sheppo 	case DDI_INFO_DEVT2INSTANCE:
8531ae08745Sheppo 		*result = (void *)(uintptr_t)instance;
8541ae08745Sheppo 		return (DDI_SUCCESS);
8551ae08745Sheppo 
8561ae08745Sheppo 	default:
8571ae08745Sheppo 		*result = NULL;
8581ae08745Sheppo 		return (DDI_FAILURE);
8591ae08745Sheppo 	}
8601ae08745Sheppo }
8611ae08745Sheppo 
8621ae08745Sheppo /*
86334683adeSsg70180  * Get the value of the "vsw-phys-dev" property in the specified
86434683adeSsg70180  * node. This property is the name of the physical device that
86534683adeSsg70180  * the virtual switch will use to talk to the outside world.
86634683adeSsg70180  *
86734683adeSsg70180  * Note it is valid for this property to be NULL (but the property
86834683adeSsg70180  * itself must exist). Callers of this routine should verify that
86934683adeSsg70180  * the value returned is what they expected (i.e. either NULL or non NULL).
87034683adeSsg70180  *
87134683adeSsg70180  * On success returns value of the property in region pointed to by
87234683adeSsg70180  * the 'name' argument, and with return value of 0. Otherwise returns 1.
8731ae08745Sheppo  */
87434683adeSsg70180 static int
87534683adeSsg70180 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
8761ae08745Sheppo {
87734683adeSsg70180 	int	len = 0;
8781ae08745Sheppo 	char	*physname = NULL;
8791ae08745Sheppo 	char	*dev;
8801ae08745Sheppo 
88134683adeSsg70180 	if (md_get_prop_data(mdp, node, physdev_propname,
8821ae08745Sheppo 				(uint8_t **)(&physname), &len) != 0) {
88334683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
88434683adeSsg70180 				"device(s) from MD", vswp->instance);
88534683adeSsg70180 		return (1);
8861ae08745Sheppo 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
88734683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
88834683adeSsg70180 			vswp->instance, physname);
88934683adeSsg70180 		return (1);
8901ae08745Sheppo 	} else {
89134683adeSsg70180 		(void) strncpy(name, physname, strlen(physname) + 1);
8921ae08745Sheppo 		D2(vswp, "%s: using first device specified (%s)",
89334683adeSsg70180 			__func__, physname);
8941ae08745Sheppo 	}
8951ae08745Sheppo 
8961ae08745Sheppo #ifdef DEBUG
8971ae08745Sheppo 	/*
8981ae08745Sheppo 	 * As a temporary measure to aid testing we check to see if there
8991ae08745Sheppo 	 * is a vsw.conf file present. If there is we use the value of the
9001ae08745Sheppo 	 * vsw_physname property in the file as the name of the physical
9011ae08745Sheppo 	 * device, overriding the value from the MD.
9021ae08745Sheppo 	 *
9031ae08745Sheppo 	 * There may be multiple devices listed, but for the moment
9041ae08745Sheppo 	 * we just use the first one.
9051ae08745Sheppo 	 */
9061ae08745Sheppo 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
9071ae08745Sheppo 		"vsw_physname", &dev) == DDI_PROP_SUCCESS) {
9081ae08745Sheppo 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
90934683adeSsg70180 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
91034683adeSsg70180 				vswp->instance, dev);
91134683adeSsg70180 			ddi_prop_free(dev);
91234683adeSsg70180 			return (1);
9131ae08745Sheppo 		} else {
91434683adeSsg70180 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
91534683adeSsg70180 				"config file", vswp->instance, dev);
9161ae08745Sheppo 
91734683adeSsg70180 			(void) strncpy(name, dev, strlen(dev) + 1);
9181ae08745Sheppo 		}
9191ae08745Sheppo 
9201ae08745Sheppo 		ddi_prop_free(dev);
9211ae08745Sheppo 	}
9221ae08745Sheppo #endif
9231ae08745Sheppo 
92434683adeSsg70180 	return (0);
92534683adeSsg70180 }
926e1ebb9ecSlm66018 
927e1ebb9ecSlm66018 /*
92834683adeSsg70180  * Read the 'vsw-switch-mode' property from the specified MD node.
92934683adeSsg70180  *
93034683adeSsg70180  * Returns 0 on success and the number of modes found in 'found',
93134683adeSsg70180  * otherwise returns 1.
932e1ebb9ecSlm66018  */
93334683adeSsg70180 static int
93434683adeSsg70180 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
93534683adeSsg70180 						uint8_t *modes, int *found)
93634683adeSsg70180 {
93734683adeSsg70180 	int		len = 0;
93834683adeSsg70180 	int		smode_num = 0;
93934683adeSsg70180 	char		*smode = NULL;
94034683adeSsg70180 	char		*curr_mode = NULL;
94134683adeSsg70180 
94234683adeSsg70180 	D1(vswp, "%s: enter", __func__);
9431ae08745Sheppo 
9441ae08745Sheppo 	/*
9451ae08745Sheppo 	 * Get the switch-mode property. The modes are listed in
9461ae08745Sheppo 	 * decreasing order of preference, i.e. prefered mode is
9471ae08745Sheppo 	 * first item in list.
9481ae08745Sheppo 	 */
9491ae08745Sheppo 	len = 0;
95034683adeSsg70180 	smode_num = 0;
95134683adeSsg70180 	if (md_get_prop_data(mdp, node, smode_propname,
9521ae08745Sheppo 				(uint8_t **)(&smode), &len) != 0) {
9531ae08745Sheppo 		/*
954e1ebb9ecSlm66018 		 * Unable to get switch-mode property from MD, nothing
955e1ebb9ecSlm66018 		 * more we can do.
9561ae08745Sheppo 		 */
95734683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
95834683adeSsg70180 			" from the MD", vswp->instance);
95934683adeSsg70180 		*found = 0;
96034683adeSsg70180 		return (1);
961e1ebb9ecSlm66018 	}
962e1ebb9ecSlm66018 
9631ae08745Sheppo 	curr_mode = smode;
9641ae08745Sheppo 	/*
9651ae08745Sheppo 	 * Modes of operation:
9661ae08745Sheppo 	 * 'switched'	 - layer 2 switching, underlying HW in
967e1ebb9ecSlm66018 	 *			programmed mode.
9681ae08745Sheppo 	 * 'promiscuous' - layer 2 switching, underlying HW in
9691ae08745Sheppo 	 *			promiscuous mode.
9701ae08745Sheppo 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
9711ae08745Sheppo 	 *			in non-promiscuous mode.
9721ae08745Sheppo 	 */
97334683adeSsg70180 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
9741ae08745Sheppo 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
975e1ebb9ecSlm66018 		if (strcmp(curr_mode, "switched") == 0) {
97634683adeSsg70180 			modes[smode_num++] = VSW_LAYER2;
977e1ebb9ecSlm66018 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
97834683adeSsg70180 			modes[smode_num++] = VSW_LAYER2_PROMISC;
979e1ebb9ecSlm66018 		} else if (strcmp(curr_mode, "routed") == 0) {
98034683adeSsg70180 			modes[smode_num++] = VSW_LAYER3;
981e1ebb9ecSlm66018 		} else {
98234683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
98334683adeSsg70180 				"setting to default switched mode",
98434683adeSsg70180 				vswp->instance, curr_mode);
98534683adeSsg70180 			modes[smode_num++] = VSW_LAYER2;
9861ae08745Sheppo 		}
9871ae08745Sheppo 		curr_mode += strlen(curr_mode) + 1;
9881ae08745Sheppo 	}
98934683adeSsg70180 	*found = smode_num;
9901ae08745Sheppo 
99134683adeSsg70180 	D2(vswp, "%s: %d modes found", __func__, smode_num);
9921ae08745Sheppo 
9931ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
99434683adeSsg70180 
99534683adeSsg70180 	return (0);
9961ae08745Sheppo }
9971ae08745Sheppo 
998e1ebb9ecSlm66018 /*
999e1ebb9ecSlm66018  * Get the mac address of the physical device.
1000e1ebb9ecSlm66018  *
1001e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1002e1ebb9ecSlm66018  */
1003e1ebb9ecSlm66018 static int
1004e1ebb9ecSlm66018 vsw_get_physaddr(vsw_t *vswp)
1005e1ebb9ecSlm66018 {
1006e1ebb9ecSlm66018 	mac_handle_t	mh;
1007e1ebb9ecSlm66018 	char		drv[LIFNAMSIZ];
1008e1ebb9ecSlm66018 	uint_t		ddi_instance;
1009e1ebb9ecSlm66018 
1010e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1011e1ebb9ecSlm66018 
1012e1ebb9ecSlm66018 	if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS)
1013e1ebb9ecSlm66018 		return (1);
1014e1ebb9ecSlm66018 
1015e1ebb9ecSlm66018 	if (mac_open(vswp->physname, ddi_instance, &mh) != 0) {
101634683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: mac_open %s failed",
101734683adeSsg70180 				vswp->instance, vswp->physname);
1018e1ebb9ecSlm66018 		return (1);
1019e1ebb9ecSlm66018 	}
1020e1ebb9ecSlm66018 
1021e1ebb9ecSlm66018 	READ_ENTER(&vswp->if_lockrw);
1022e1ebb9ecSlm66018 	mac_unicst_get(mh, vswp->if_addr.ether_addr_octet);
1023e1ebb9ecSlm66018 	RW_EXIT(&vswp->if_lockrw);
1024e1ebb9ecSlm66018 
1025e1ebb9ecSlm66018 	mac_close(mh);
1026e1ebb9ecSlm66018 
1027e1ebb9ecSlm66018 	vswp->mdprops |= VSW_DEV_MACADDR;
1028e1ebb9ecSlm66018 
1029e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1030e1ebb9ecSlm66018 
1031e1ebb9ecSlm66018 	return (0);
1032e1ebb9ecSlm66018 }
1033e1ebb9ecSlm66018 
1034e1ebb9ecSlm66018 /*
1035e1ebb9ecSlm66018  * Check to see if the card supports the setting of multiple unicst
1036e1ebb9ecSlm66018  * addresses.
1037e1ebb9ecSlm66018  *
10385f94e909Ssg70180  * Returns 0 if card supports the programming of multiple unicast addresses,
10395f94e909Ssg70180  * otherwise returns 1.
1040e1ebb9ecSlm66018  */
1041e1ebb9ecSlm66018 static int
1042e1ebb9ecSlm66018 vsw_get_hw_maddr(vsw_t *vswp)
1043e1ebb9ecSlm66018 {
1044e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1045e1ebb9ecSlm66018 
104634683adeSsg70180 	mutex_enter(&vswp->mac_lock);
1047e1ebb9ecSlm66018 	if (vswp->mh == NULL) {
104834683adeSsg70180 		mutex_exit(&vswp->mac_lock);
1049e1ebb9ecSlm66018 		return (1);
1050e1ebb9ecSlm66018 	}
1051e1ebb9ecSlm66018 
1052e1ebb9ecSlm66018 	if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) {
10535f94e909Ssg70180 		cmn_err(CE_WARN, "!vsw%d: device (%s) does not support "
10545f94e909Ssg70180 			"setting multiple unicast addresses", vswp->instance,
10555f94e909Ssg70180 			vswp->physname);
105634683adeSsg70180 		mutex_exit(&vswp->mac_lock);
1057e1ebb9ecSlm66018 		return (1);
1058e1ebb9ecSlm66018 	}
105934683adeSsg70180 	mutex_exit(&vswp->mac_lock);
1060e1ebb9ecSlm66018 
1061e1ebb9ecSlm66018 	D2(vswp, "%s: %d addrs : %d free", __func__,
1062e1ebb9ecSlm66018 		vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree);
1063e1ebb9ecSlm66018 
1064e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1065e1ebb9ecSlm66018 
1066e1ebb9ecSlm66018 	return (0);
1067e1ebb9ecSlm66018 }
1068e1ebb9ecSlm66018 
1069e1ebb9ecSlm66018 /*
107034683adeSsg70180  * Setup the required switching mode.
107134683adeSsg70180  *
107234683adeSsg70180  * Returns 0 on success, 1 on failure.
107334683adeSsg70180  */
107434683adeSsg70180 static int
107534683adeSsg70180 vsw_setup_switching(vsw_t *vswp)
107634683adeSsg70180 {
107734683adeSsg70180 	int	i, rv = 1;
107834683adeSsg70180 
107934683adeSsg70180 	D1(vswp, "%s: enter", __func__);
108034683adeSsg70180 
108134683adeSsg70180 	/* select best switching mode */
108234683adeSsg70180 	for (i = 0; i < vswp->smode_num; i++) {
108334683adeSsg70180 		vswp->smode_idx = i;
108434683adeSsg70180 		switch (vswp->smode[i]) {
108534683adeSsg70180 		case VSW_LAYER2:
108634683adeSsg70180 		case VSW_LAYER2_PROMISC:
108734683adeSsg70180 			rv = vsw_setup_layer2(vswp);
108834683adeSsg70180 			break;
108934683adeSsg70180 
109034683adeSsg70180 		case VSW_LAYER3:
109134683adeSsg70180 			rv = vsw_setup_layer3(vswp);
109234683adeSsg70180 			break;
109334683adeSsg70180 
109434683adeSsg70180 		default:
109534683adeSsg70180 			DERR(vswp, "unknown switch mode");
109634683adeSsg70180 			rv = 1;
109734683adeSsg70180 			break;
109834683adeSsg70180 		}
109934683adeSsg70180 
110034683adeSsg70180 		if (rv == 0)
110134683adeSsg70180 			break;
110234683adeSsg70180 	}
110334683adeSsg70180 
110434683adeSsg70180 	if (rv == 1) {
110534683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to setup specified "
110634683adeSsg70180 			"switching mode", vswp->instance);
110734683adeSsg70180 		return (rv);
110834683adeSsg70180 	}
110934683adeSsg70180 
111034683adeSsg70180 	D2(vswp, "%s: Operating in mode %d", __func__,
111134683adeSsg70180 					vswp->smode[vswp->smode_idx]);
111234683adeSsg70180 
111334683adeSsg70180 	D1(vswp, "%s: exit", __func__);
111434683adeSsg70180 
111534683adeSsg70180 	return (0);
111634683adeSsg70180 }
111734683adeSsg70180 
111834683adeSsg70180 /*
1119e1ebb9ecSlm66018  * Setup for layer 2 switching.
1120e1ebb9ecSlm66018  *
1121e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1122e1ebb9ecSlm66018  */
11231ae08745Sheppo static int
11241ae08745Sheppo vsw_setup_layer2(vsw_t *vswp)
11251ae08745Sheppo {
11261ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
11271ae08745Sheppo 
112834683adeSsg70180 	vswp->vsw_switch_frame = vsw_switch_l2_frame;
11291ae08745Sheppo 
11301ae08745Sheppo 	/*
11311ae08745Sheppo 	 * Attempt to link into the MAC layer so we can get
11321ae08745Sheppo 	 * and send packets out over the physical adapter.
11331ae08745Sheppo 	 */
11341ae08745Sheppo 	if (vswp->mdprops & VSW_MD_PHYSNAME) {
11351ae08745Sheppo 		if (vsw_mac_attach(vswp) != 0) {
11361ae08745Sheppo 			/*
11371ae08745Sheppo 			 * Registration with the MAC layer has failed,
11381ae08745Sheppo 			 * so return 1 so that can fall back to next
11391ae08745Sheppo 			 * prefered switching method.
11401ae08745Sheppo 			 */
114134683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to join as MAC layer "
114234683adeSsg70180 				"client", vswp->instance);
1143e1ebb9ecSlm66018 			return (1);
11441ae08745Sheppo 		}
1145e1ebb9ecSlm66018 
1146e1ebb9ecSlm66018 		if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) {
1147e1ebb9ecSlm66018 			/*
1148e1ebb9ecSlm66018 			 * Verify that underlying device can support multiple
11495f94e909Ssg70180 			 * unicast mac addresses.
1150e1ebb9ecSlm66018 			 */
1151e1ebb9ecSlm66018 			if (vsw_get_hw_maddr(vswp) != 0) {
115234683adeSsg70180 				cmn_err(CE_WARN, "!vsw%d: Unable to setup "
11535f94e909Ssg70180 					"layer2 switching", vswp->instance);
1154e1ebb9ecSlm66018 				vsw_mac_detach(vswp);
1155e1ebb9ecSlm66018 				return (1);
1156e1ebb9ecSlm66018 			}
1157e1ebb9ecSlm66018 		}
1158e1ebb9ecSlm66018 
11591ae08745Sheppo 	} else {
1160e1ebb9ecSlm66018 		/*
1161e1ebb9ecSlm66018 		 * No physical device name found in MD which is
1162e1ebb9ecSlm66018 		 * required for layer 2.
1163e1ebb9ecSlm66018 		 */
116434683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: no physical device name specified",
116534683adeSsg70180 			vswp->instance);
1166e1ebb9ecSlm66018 		return (1);
11671ae08745Sheppo 	}
11681ae08745Sheppo 
11691ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
11701ae08745Sheppo 
1171e1ebb9ecSlm66018 	return (0);
11721ae08745Sheppo }
11731ae08745Sheppo 
11741ae08745Sheppo static int
11751ae08745Sheppo vsw_setup_layer3(vsw_t *vswp)
11761ae08745Sheppo {
11771ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
11781ae08745Sheppo 
11791ae08745Sheppo 	D2(vswp, "%s: operating in layer 3 mode", __func__);
118034683adeSsg70180 	vswp->vsw_switch_frame = vsw_switch_l3_frame;
11811ae08745Sheppo 
11821ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
11831ae08745Sheppo 
11841ae08745Sheppo 	return (0);
11851ae08745Sheppo }
11861ae08745Sheppo 
11871ae08745Sheppo /*
11881ae08745Sheppo  * Link into the MAC layer to gain access to the services provided by
11891ae08745Sheppo  * the underlying physical device driver (which should also have
11901ae08745Sheppo  * registered with the MAC layer).
11911ae08745Sheppo  *
11921ae08745Sheppo  * Only when in layer 2 mode.
11931ae08745Sheppo  */
11941ae08745Sheppo static int
11951ae08745Sheppo vsw_mac_attach(vsw_t *vswp)
11961ae08745Sheppo {
1197ba2e4443Sseb 	char	drv[LIFNAMSIZ];
1198ba2e4443Sseb 	uint_t	ddi_instance;
1199ba2e4443Sseb 
12007636cb21Slm66018 	D1(vswp, "%s: enter", __func__);
12011ae08745Sheppo 
120234683adeSsg70180 	ASSERT(vswp->mh == NULL);
120334683adeSsg70180 	ASSERT(vswp->mrh == NULL);
120434683adeSsg70180 	ASSERT(vswp->mstarted == B_FALSE);
120534683adeSsg70180 	ASSERT(vswp->mresources == B_FALSE);
12061ae08745Sheppo 
12071ae08745Sheppo 	ASSERT(vswp->mdprops & VSW_MD_PHYSNAME);
12081ae08745Sheppo 
120934683adeSsg70180 	mutex_enter(&vswp->mac_lock);
1210ba2e4443Sseb 	if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) {
121134683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: invalid device name: %s",
121234683adeSsg70180 			vswp->instance, vswp->physname);
1213ba2e4443Sseb 		goto mac_fail_exit;
1214ba2e4443Sseb 	}
121534683adeSsg70180 
1216ba2e4443Sseb 	if ((mac_open(vswp->physname, ddi_instance, &vswp->mh)) != 0) {
121734683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: mac_open %s failed",
121834683adeSsg70180 			vswp->instance, vswp->physname);
12191ae08745Sheppo 		goto mac_fail_exit;
12201ae08745Sheppo 	}
12211ae08745Sheppo 
12227636cb21Slm66018 	ASSERT(vswp->mh != NULL);
12237636cb21Slm66018 
12241ae08745Sheppo 	D2(vswp, "vsw_mac_attach: using device %s", vswp->physname);
12251ae08745Sheppo 
12267636cb21Slm66018 	if (vsw_multi_ring_enable) {
122734683adeSsg70180 		/*
122834683adeSsg70180 		 * Initialize the ring table.
122934683adeSsg70180 		 */
12307636cb21Slm66018 		vsw_mac_ring_tbl_init(vswp);
12311ae08745Sheppo 
12327636cb21Slm66018 		/*
123334683adeSsg70180 		 * Register our rx callback function.
12347636cb21Slm66018 		 */
12357636cb21Slm66018 		vswp->mrh = mac_rx_add(vswp->mh,
12367636cb21Slm66018 			vsw_rx_queue_cb, (void *)vswp);
123734683adeSsg70180 		ASSERT(vswp->mrh != NULL);
12387636cb21Slm66018 
12397636cb21Slm66018 		/*
12407636cb21Slm66018 		 * Register our mac resource callback.
12417636cb21Slm66018 		 */
12427636cb21Slm66018 		mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp);
12437636cb21Slm66018 		vswp->mresources = B_TRUE;
12447636cb21Slm66018 
12457636cb21Slm66018 		/*
12467636cb21Slm66018 		 * Get the ring resources available to us from
12477636cb21Slm66018 		 * the mac below us.
12487636cb21Slm66018 		 */
12497636cb21Slm66018 		mac_resources(vswp->mh);
12507636cb21Slm66018 	} else {
12517636cb21Slm66018 		/*
12527636cb21Slm66018 		 * Just register our rx callback function
12537636cb21Slm66018 		 */
12547636cb21Slm66018 		vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp);
12557636cb21Slm66018 		ASSERT(vswp->mrh != NULL);
125634683adeSsg70180 	}
12577636cb21Slm66018 
12587636cb21Slm66018 	/* Get the MAC tx fn */
12591ae08745Sheppo 	vswp->txinfo = mac_tx_get(vswp->mh);
12601ae08745Sheppo 
12611ae08745Sheppo 	/* start the interface */
12621ae08745Sheppo 	if (mac_start(vswp->mh) != 0) {
126334683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Could not start mac interface",
126434683adeSsg70180 			vswp->instance);
12651ae08745Sheppo 		goto mac_fail_exit;
12661ae08745Sheppo 	}
12671ae08745Sheppo 
126834683adeSsg70180 	mutex_exit(&vswp->mac_lock);
126934683adeSsg70180 
12707636cb21Slm66018 	vswp->mstarted = B_TRUE;
12717636cb21Slm66018 
12727636cb21Slm66018 	D1(vswp, "%s: exit", __func__);
12731ae08745Sheppo 	return (0);
12741ae08745Sheppo 
12751ae08745Sheppo mac_fail_exit:
127634683adeSsg70180 	mutex_exit(&vswp->mac_lock);
12777636cb21Slm66018 	vsw_mac_detach(vswp);
12781ae08745Sheppo 
12797636cb21Slm66018 	D1(vswp, "%s: exit", __func__);
12801ae08745Sheppo 	return (1);
12811ae08745Sheppo }
12821ae08745Sheppo 
12831ae08745Sheppo static void
12841ae08745Sheppo vsw_mac_detach(vsw_t *vswp)
12851ae08745Sheppo {
12861ae08745Sheppo 	D1(vswp, "vsw_mac_detach: enter");
12871ae08745Sheppo 
12887636cb21Slm66018 	ASSERT(vswp != NULL);
12897636cb21Slm66018 
12907636cb21Slm66018 	if (vsw_multi_ring_enable) {
12917636cb21Slm66018 		vsw_mac_ring_tbl_destroy(vswp);
12927636cb21Slm66018 	}
12937636cb21Slm66018 
129434683adeSsg70180 	mutex_enter(&vswp->mac_lock);
129534683adeSsg70180 
1296b9a6d57aSsg70180 	if (vswp->mh != NULL) {
12977636cb21Slm66018 		if (vswp->mstarted)
12987636cb21Slm66018 			mac_stop(vswp->mh);
12991ae08745Sheppo 		if (vswp->mrh != NULL)
13001ae08745Sheppo 			mac_rx_remove(vswp->mh, vswp->mrh);
13017636cb21Slm66018 		if (vswp->mresources)
13027636cb21Slm66018 			mac_resource_set(vswp->mh, NULL, NULL);
13031ae08745Sheppo 		mac_close(vswp->mh);
1304b9a6d57aSsg70180 	}
13051ae08745Sheppo 
13061ae08745Sheppo 	vswp->mrh = NULL;
13071ae08745Sheppo 	vswp->mh = NULL;
13081ae08745Sheppo 	vswp->txinfo = NULL;
13097636cb21Slm66018 	vswp->mstarted = B_FALSE;
13101ae08745Sheppo 
131134683adeSsg70180 	mutex_exit(&vswp->mac_lock);
131234683adeSsg70180 
13131ae08745Sheppo 	D1(vswp, "vsw_mac_detach: exit");
13141ae08745Sheppo }
13151ae08745Sheppo 
13161ae08745Sheppo /*
1317e1ebb9ecSlm66018  * Depending on the mode specified, the capabilites and capacity
1318e1ebb9ecSlm66018  * of the underlying device setup the physical device.
13191ae08745Sheppo  *
1320e1ebb9ecSlm66018  * If in layer 3 mode, then do nothing.
1321e1ebb9ecSlm66018  *
1322e1ebb9ecSlm66018  * If in layer 2 programmed mode attempt to program the unicast address
1323e1ebb9ecSlm66018  * associated with the port into the physical device. If this is not
1324e1ebb9ecSlm66018  * possible due to resource exhaustion or simply because the device does
1325e1ebb9ecSlm66018  * not support multiple unicast addresses then if required fallback onto
1326e1ebb9ecSlm66018  * putting the card into promisc mode.
1327e1ebb9ecSlm66018  *
1328e1ebb9ecSlm66018  * If in promisc mode then simply set the card into promisc mode.
1329e1ebb9ecSlm66018  *
1330e1ebb9ecSlm66018  * Returns 0 success, 1 on failure.
13311ae08745Sheppo  */
1332e1ebb9ecSlm66018 static int
13335f94e909Ssg70180 vsw_set_hw(vsw_t *vswp, vsw_port_t *port, int type)
13341ae08745Sheppo {
1335e1ebb9ecSlm66018 	mac_multi_addr_t	mac_addr;
1336e1ebb9ecSlm66018 	int			err;
13371ae08745Sheppo 
1338e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1339e1ebb9ecSlm66018 
13405f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
13415f94e909Ssg70180 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
13425f94e909Ssg70180 
1343e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER3)
1344e1ebb9ecSlm66018 		return (0);
1345e1ebb9ecSlm66018 
1346e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) {
13475f94e909Ssg70180 		return (vsw_set_hw_promisc(vswp, port, type));
1348e1ebb9ecSlm66018 	}
1349e1ebb9ecSlm66018 
1350e1ebb9ecSlm66018 	/*
1351e1ebb9ecSlm66018 	 * Attempt to program the unicast address into the HW.
1352e1ebb9ecSlm66018 	 */
1353e1ebb9ecSlm66018 	mac_addr.mma_addrlen = ETHERADDRL;
13545f94e909Ssg70180 	if (type == VSW_VNETPORT) {
13555f94e909Ssg70180 		ASSERT(port != NULL);
1356e1ebb9ecSlm66018 		ether_copy(&port->p_macaddr, &mac_addr.mma_addr);
13575f94e909Ssg70180 	} else {
13585f94e909Ssg70180 		READ_ENTER(&vswp->if_lockrw);
13595f94e909Ssg70180 		/*
13605f94e909Ssg70180 		 * Don't program if the interface is not UP. This
13615f94e909Ssg70180 		 * is possible if the address has just been changed
13625f94e909Ssg70180 		 * in the MD node, but the interface has not yet been
13635f94e909Ssg70180 		 * plumbed.
13645f94e909Ssg70180 		 */
13655f94e909Ssg70180 		if (!(vswp->if_state & VSW_IF_UP)) {
13665f94e909Ssg70180 			RW_EXIT(&vswp->if_lockrw);
13675f94e909Ssg70180 			return (0);
13685f94e909Ssg70180 		}
13695f94e909Ssg70180 		ether_copy(&vswp->if_addr, &mac_addr.mma_addr);
13705f94e909Ssg70180 		RW_EXIT(&vswp->if_lockrw);
13715f94e909Ssg70180 	}
1372e1ebb9ecSlm66018 
13735f94e909Ssg70180 	err = vsw_set_hw_addr(vswp, &mac_addr);
1374e1ebb9ecSlm66018 	if (err != 0) {
1375e1ebb9ecSlm66018 		/*
1376e1ebb9ecSlm66018 		 * Mark that attempt should be made to re-config sometime
1377e1ebb9ecSlm66018 		 * in future if a port is deleted.
1378e1ebb9ecSlm66018 		 */
1379e1ebb9ecSlm66018 		vswp->recfg_reqd = B_TRUE;
1380e1ebb9ecSlm66018 
1381e1ebb9ecSlm66018 		/*
1382e1ebb9ecSlm66018 		 * Only 1 mode specified, nothing more to do.
1383e1ebb9ecSlm66018 		 */
1384e1ebb9ecSlm66018 		if (vswp->smode_num == 1)
1385e1ebb9ecSlm66018 			return (err);
1386e1ebb9ecSlm66018 
1387e1ebb9ecSlm66018 		/*
1388e1ebb9ecSlm66018 		 * If promiscuous was next mode specified try to
1389e1ebb9ecSlm66018 		 * set the card into that mode.
1390e1ebb9ecSlm66018 		 */
1391e1ebb9ecSlm66018 		if ((vswp->smode_idx <= (vswp->smode_num - 2)) &&
1392e1ebb9ecSlm66018 			(vswp->smode[vswp->smode_idx + 1]
1393e1ebb9ecSlm66018 					== VSW_LAYER2_PROMISC)) {
1394e1ebb9ecSlm66018 			vswp->smode_idx += 1;
13955f94e909Ssg70180 			return (vsw_set_hw_promisc(vswp, port, type));
1396e1ebb9ecSlm66018 		}
1397e1ebb9ecSlm66018 		return (err);
1398e1ebb9ecSlm66018 	}
1399e1ebb9ecSlm66018 
14005f94e909Ssg70180 	if (type == VSW_VNETPORT) {
1401e1ebb9ecSlm66018 		port->addr_slot = mac_addr.mma_slot;
1402e1ebb9ecSlm66018 		port->addr_set = VSW_ADDR_HW;
14035f94e909Ssg70180 	} else {
14045f94e909Ssg70180 		vswp->addr_slot = mac_addr.mma_slot;
14055f94e909Ssg70180 		vswp->addr_set = VSW_ADDR_HW;
14065f94e909Ssg70180 	}
1407e1ebb9ecSlm66018 
14085f94e909Ssg70180 	D2(vswp, "programmed addr %x:%x:%x:%x:%x:%x into slot %d "
14095f94e909Ssg70180 		"of device %s",
14105f94e909Ssg70180 		mac_addr.mma_addr[0], mac_addr.mma_addr[1],
14115f94e909Ssg70180 		mac_addr.mma_addr[2], mac_addr.mma_addr[3],
14125f94e909Ssg70180 		mac_addr.mma_addr[4], mac_addr.mma_addr[5],
14135f94e909Ssg70180 		mac_addr.mma_slot, vswp->physname);
1414e1ebb9ecSlm66018 
1415e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1416e1ebb9ecSlm66018 
1417e1ebb9ecSlm66018 	return (0);
1418e1ebb9ecSlm66018 }
1419e1ebb9ecSlm66018 
1420e1ebb9ecSlm66018 /*
1421e1ebb9ecSlm66018  * If in layer 3 mode do nothing.
1422e1ebb9ecSlm66018  *
1423e1ebb9ecSlm66018  * If in layer 2 switched mode remove the address from the physical
1424e1ebb9ecSlm66018  * device.
1425e1ebb9ecSlm66018  *
1426e1ebb9ecSlm66018  * If in layer 2 promiscuous mode disable promisc mode.
1427e1ebb9ecSlm66018  *
1428e1ebb9ecSlm66018  * Returns 0 on success.
1429e1ebb9ecSlm66018  */
1430e1ebb9ecSlm66018 static int
14315f94e909Ssg70180 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port, int type)
1432e1ebb9ecSlm66018 {
14335f94e909Ssg70180 	mac_addr_slot_t	slot;
14345f94e909Ssg70180 	int		rv;
1435e1ebb9ecSlm66018 
1436e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1437e1ebb9ecSlm66018 
14385f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
14395f94e909Ssg70180 
1440e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER3)
1441e1ebb9ecSlm66018 		return (0);
1442e1ebb9ecSlm66018 
14435f94e909Ssg70180 	switch (type) {
14445f94e909Ssg70180 	case VSW_VNETPORT:
14455f94e909Ssg70180 		ASSERT(port != NULL);
14465f94e909Ssg70180 
1447e1ebb9ecSlm66018 		if (port->addr_set == VSW_ADDR_PROMISC) {
14485f94e909Ssg70180 			return (vsw_unset_hw_promisc(vswp, port, type));
14495f94e909Ssg70180 
14505f94e909Ssg70180 		} else if (port->addr_set == VSW_ADDR_HW) {
14515f94e909Ssg70180 			slot = port->addr_slot;
14525f94e909Ssg70180 			if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0)
14535f94e909Ssg70180 				port->addr_set = VSW_ADDR_UNSET;
1454e1ebb9ecSlm66018 		}
1455e1ebb9ecSlm66018 
14565f94e909Ssg70180 		break;
14575f94e909Ssg70180 
14585f94e909Ssg70180 	case VSW_LOCALDEV:
14595f94e909Ssg70180 		if (vswp->addr_set == VSW_ADDR_PROMISC) {
14605f94e909Ssg70180 			return (vsw_unset_hw_promisc(vswp, NULL, type));
14615f94e909Ssg70180 
14625f94e909Ssg70180 		} else if (vswp->addr_set == VSW_ADDR_HW) {
14635f94e909Ssg70180 			slot = vswp->addr_slot;
14645f94e909Ssg70180 			if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0)
14655f94e909Ssg70180 				vswp->addr_set = VSW_ADDR_UNSET;
14665f94e909Ssg70180 		}
14675f94e909Ssg70180 
14685f94e909Ssg70180 		break;
14695f94e909Ssg70180 
14705f94e909Ssg70180 	default:
14715f94e909Ssg70180 		/* should never happen */
14725f94e909Ssg70180 		DERR(vswp, "%s: unknown type %d", __func__, type);
14735f94e909Ssg70180 		ASSERT(0);
14745f94e909Ssg70180 		return (1);
14755f94e909Ssg70180 	}
14765f94e909Ssg70180 
14775f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
14785f94e909Ssg70180 	return (rv);
14795f94e909Ssg70180 }
14805f94e909Ssg70180 
14815f94e909Ssg70180 /*
14825f94e909Ssg70180  * Attempt to program a unicast address into HW.
14835f94e909Ssg70180  *
14845f94e909Ssg70180  * Returns 0 on sucess, 1 on failure.
14855f94e909Ssg70180  */
14865f94e909Ssg70180 static int
14875f94e909Ssg70180 vsw_set_hw_addr(vsw_t *vswp, mac_multi_addr_t *mac)
14885f94e909Ssg70180 {
14895f94e909Ssg70180 	void	*mah;
14905f94e909Ssg70180 	int	rv;
14915f94e909Ssg70180 
14925f94e909Ssg70180 	D1(vswp, "%s: enter", __func__);
14935f94e909Ssg70180 
14945f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
14955f94e909Ssg70180 
1496e1ebb9ecSlm66018 	if (vswp->maddr.maddr_handle == NULL)
1497e1ebb9ecSlm66018 		return (1);
1498e1ebb9ecSlm66018 
1499e1ebb9ecSlm66018 	mah = vswp->maddr.maddr_handle;
1500e1ebb9ecSlm66018 
15015f94e909Ssg70180 	rv = vswp->maddr.maddr_add(mah, mac);
15025f94e909Ssg70180 
15035f94e909Ssg70180 	if (rv == 0)
15045f94e909Ssg70180 		return (0);
15055f94e909Ssg70180 
15065f94e909Ssg70180 	/*
15075f94e909Ssg70180 	 * Its okay for the add to fail because we have exhausted
15085f94e909Ssg70180 	 * all the resouces in the hardware device. Any other error
15095f94e909Ssg70180 	 * we want to flag.
15105f94e909Ssg70180 	 */
15115f94e909Ssg70180 	if (rv != ENOSPC) {
15125f94e909Ssg70180 		cmn_err(CE_WARN, "!vsw%d: error programming "
15135f94e909Ssg70180 			"address %x:%x:%x:%x:%x:%x into HW "
15145f94e909Ssg70180 			"err (%d)", vswp->instance,
15155f94e909Ssg70180 			mac->mma_addr[0], mac->mma_addr[1],
15165f94e909Ssg70180 			mac->mma_addr[2], mac->mma_addr[3],
15175f94e909Ssg70180 			mac->mma_addr[4], mac->mma_addr[5], rv);
15185f94e909Ssg70180 	}
15195f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
15205f94e909Ssg70180 	return (1);
1521e1ebb9ecSlm66018 }
1522e1ebb9ecSlm66018 
15235f94e909Ssg70180 /*
15245f94e909Ssg70180  * Remove a unicast mac address which has previously been programmed
15255f94e909Ssg70180  * into HW.
15265f94e909Ssg70180  *
15275f94e909Ssg70180  * Returns 0 on sucess, 1 on failure.
15285f94e909Ssg70180  */
15295f94e909Ssg70180 static int
15305f94e909Ssg70180 vsw_unset_hw_addr(vsw_t *vswp, int slot)
15315f94e909Ssg70180 {
15325f94e909Ssg70180 	void	*mah;
15335f94e909Ssg70180 	int	rv;
1534e1ebb9ecSlm66018 
15355f94e909Ssg70180 	D1(vswp, "%s: enter", __func__);
15365f94e909Ssg70180 
15375f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
15385f94e909Ssg70180 	ASSERT(slot >= 0);
15395f94e909Ssg70180 
15405f94e909Ssg70180 	if (vswp->maddr.maddr_handle == NULL)
15415f94e909Ssg70180 		return (1);
15425f94e909Ssg70180 
15435f94e909Ssg70180 	mah = vswp->maddr.maddr_handle;
15445f94e909Ssg70180 
15455f94e909Ssg70180 	rv = vswp->maddr.maddr_remove(mah, slot);
15465f94e909Ssg70180 	if (rv != 0) {
15475f94e909Ssg70180 		cmn_err(CE_WARN, "!vsw%d: unable to remove address "
15485f94e909Ssg70180 			"from slot %d in device %s (err %d)",
15495f94e909Ssg70180 			vswp->instance, slot, vswp->physname, rv);
15505f94e909Ssg70180 		return (1);
1551e1ebb9ecSlm66018 	}
1552e1ebb9ecSlm66018 
15535f94e909Ssg70180 	D2(vswp, "removed addr from slot %d in device %s",
15545f94e909Ssg70180 		slot, vswp->physname);
15555f94e909Ssg70180 
1556e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1557e1ebb9ecSlm66018 	return (0);
1558e1ebb9ecSlm66018 }
1559e1ebb9ecSlm66018 
1560e1ebb9ecSlm66018 /*
1561e1ebb9ecSlm66018  * Set network card into promisc mode.
1562e1ebb9ecSlm66018  *
1563e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1564e1ebb9ecSlm66018  */
1565e1ebb9ecSlm66018 static int
15665f94e909Ssg70180 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type)
1567e1ebb9ecSlm66018 {
1568e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1569e1ebb9ecSlm66018 
15705f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
15715f94e909Ssg70180 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
15725f94e909Ssg70180 
157334683adeSsg70180 	mutex_enter(&vswp->mac_lock);
157434683adeSsg70180 	if (vswp->mh == NULL) {
157534683adeSsg70180 		mutex_exit(&vswp->mac_lock);
1576e1ebb9ecSlm66018 		return (1);
157734683adeSsg70180 	}
1578e1ebb9ecSlm66018 
1579e1ebb9ecSlm66018 	if (vswp->promisc_cnt++ == 0) {
1580e1ebb9ecSlm66018 		if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) {
1581e1ebb9ecSlm66018 			vswp->promisc_cnt--;
158234683adeSsg70180 			mutex_exit(&vswp->mac_lock);
1583e1ebb9ecSlm66018 			return (1);
1584e1ebb9ecSlm66018 		}
158534683adeSsg70180 		cmn_err(CE_NOTE, "!vsw%d: switching device %s into "
158634683adeSsg70180 			"promiscuous mode", vswp->instance, vswp->physname);
1587e1ebb9ecSlm66018 	}
158834683adeSsg70180 	mutex_exit(&vswp->mac_lock);
15895f94e909Ssg70180 
15905f94e909Ssg70180 	if (type == VSW_VNETPORT) {
15915f94e909Ssg70180 		ASSERT(port != NULL);
1592e1ebb9ecSlm66018 		port->addr_set = VSW_ADDR_PROMISC;
15935f94e909Ssg70180 	} else {
15945f94e909Ssg70180 		vswp->addr_set = VSW_ADDR_PROMISC;
15955f94e909Ssg70180 	}
1596e1ebb9ecSlm66018 
1597e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1598e1ebb9ecSlm66018 
1599e1ebb9ecSlm66018 	return (0);
1600e1ebb9ecSlm66018 }
1601e1ebb9ecSlm66018 
1602e1ebb9ecSlm66018 /*
1603e1ebb9ecSlm66018  * Turn off promiscuous mode on network card.
1604e1ebb9ecSlm66018  *
1605e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1606e1ebb9ecSlm66018  */
1607e1ebb9ecSlm66018 static int
16085f94e909Ssg70180 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type)
1609e1ebb9ecSlm66018 {
1610e1ebb9ecSlm66018 	vsw_port_list_t 	*plist = &vswp->plist;
1611e1ebb9ecSlm66018 
161234683adeSsg70180 	D2(vswp, "%s: enter", __func__);
1613e1ebb9ecSlm66018 
16145f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
16155f94e909Ssg70180 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
16165f94e909Ssg70180 
161734683adeSsg70180 	mutex_enter(&vswp->mac_lock);
161834683adeSsg70180 	if (vswp->mh == NULL) {
161934683adeSsg70180 		mutex_exit(&vswp->mac_lock);
1620e1ebb9ecSlm66018 		return (1);
162134683adeSsg70180 	}
1622e1ebb9ecSlm66018 
1623e1ebb9ecSlm66018 	if (--vswp->promisc_cnt == 0) {
1624e1ebb9ecSlm66018 		if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) {
1625e1ebb9ecSlm66018 			vswp->promisc_cnt++;
162634683adeSsg70180 			mutex_exit(&vswp->mac_lock);
1627e1ebb9ecSlm66018 			return (1);
1628e1ebb9ecSlm66018 		}
1629e1ebb9ecSlm66018 
1630e1ebb9ecSlm66018 		/*
1631e1ebb9ecSlm66018 		 * We are exiting promisc mode either because we were
1632e1ebb9ecSlm66018 		 * only in promisc mode because we had failed over from
1633e1ebb9ecSlm66018 		 * switched mode due to HW resource issues, or the user
1634e1ebb9ecSlm66018 		 * wanted the card in promisc mode for all the ports and
1635e1ebb9ecSlm66018 		 * the last port is now being deleted. Tweak the message
1636e1ebb9ecSlm66018 		 * accordingly.
1637e1ebb9ecSlm66018 		 */
1638e1ebb9ecSlm66018 		if (plist->num_ports != 0) {
163934683adeSsg70180 			cmn_err(CE_NOTE, "!vsw%d: switching device %s back to "
164034683adeSsg70180 				"programmed mode", vswp->instance,
164134683adeSsg70180 				vswp->physname);
16421ae08745Sheppo 		} else {
164334683adeSsg70180 			cmn_err(CE_NOTE, "!vsw%d: switching device %s out of "
164434683adeSsg70180 				"promiscuous mode", vswp->instance,
164534683adeSsg70180 				vswp->physname);
16461ae08745Sheppo 		}
16471ae08745Sheppo 	}
164834683adeSsg70180 	mutex_exit(&vswp->mac_lock);
16495f94e909Ssg70180 
16505f94e909Ssg70180 	if (type == VSW_VNETPORT) {
16515f94e909Ssg70180 		ASSERT(port != NULL);
16525f94e909Ssg70180 		ASSERT(port->addr_set == VSW_ADDR_PROMISC);
1653e1ebb9ecSlm66018 		port->addr_set = VSW_ADDR_UNSET;
16545f94e909Ssg70180 	} else {
16555f94e909Ssg70180 		ASSERT(vswp->addr_set == VSW_ADDR_PROMISC);
16565f94e909Ssg70180 		vswp->addr_set = VSW_ADDR_UNSET;
16575f94e909Ssg70180 	}
1658e1ebb9ecSlm66018 
1659e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1660e1ebb9ecSlm66018 	return (0);
1661e1ebb9ecSlm66018 }
1662e1ebb9ecSlm66018 
1663e1ebb9ecSlm66018 /*
1664e1ebb9ecSlm66018  * Determine whether or not we are operating in our prefered
1665e1ebb9ecSlm66018  * mode and if not whether the physical resources now allow us
1666e1ebb9ecSlm66018  * to operate in it.
1667e1ebb9ecSlm66018  *
16685f94e909Ssg70180  * If a port is being removed should only be invoked after port has been
1669e1ebb9ecSlm66018  * removed from the port list.
1670e1ebb9ecSlm66018  */
16715f94e909Ssg70180 static void
1672e1ebb9ecSlm66018 vsw_reconfig_hw(vsw_t *vswp)
1673e1ebb9ecSlm66018 {
1674e1ebb9ecSlm66018 	int			s_idx;
1675e1ebb9ecSlm66018 
1676e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1677e1ebb9ecSlm66018 
16785f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
1679e1ebb9ecSlm66018 
16805f94e909Ssg70180 	if (vswp->maddr.maddr_handle == NULL) {
16815f94e909Ssg70180 		return;
16825f94e909Ssg70180 	}
1683e1ebb9ecSlm66018 
1684e1ebb9ecSlm66018 	/*
1685e1ebb9ecSlm66018 	 * If we are in layer 2 (i.e. switched) or would like to be
16865f94e909Ssg70180 	 * in layer 2 then check if any ports or the vswitch itself
16875f94e909Ssg70180 	 * need to be programmed into the HW.
1688e1ebb9ecSlm66018 	 *
1689e1ebb9ecSlm66018 	 * This can happen in two cases - switched was specified as
1690e1ebb9ecSlm66018 	 * the prefered mode of operation but we exhausted the HW
1691e1ebb9ecSlm66018 	 * resources and so failed over to the next specifed mode,
1692e1ebb9ecSlm66018 	 * or switched was the only mode specified so after HW
1693e1ebb9ecSlm66018 	 * resources were exhausted there was nothing more we
1694e1ebb9ecSlm66018 	 * could do.
1695e1ebb9ecSlm66018 	 */
1696e1ebb9ecSlm66018 	if (vswp->smode_idx > 0)
1697e1ebb9ecSlm66018 		s_idx = vswp->smode_idx - 1;
1698e1ebb9ecSlm66018 	else
1699e1ebb9ecSlm66018 		s_idx = vswp->smode_idx;
1700e1ebb9ecSlm66018 
17015f94e909Ssg70180 	if (vswp->smode[s_idx] != VSW_LAYER2) {
17025f94e909Ssg70180 		return;
17035f94e909Ssg70180 	}
1704e1ebb9ecSlm66018 
1705e1ebb9ecSlm66018 	D2(vswp, "%s: attempting reconfig..", __func__);
1706e1ebb9ecSlm66018 
1707e1ebb9ecSlm66018 	/*
17085f94e909Ssg70180 	 * First, attempt to set the vswitch mac address into HW,
17095f94e909Ssg70180 	 * if required.
1710e1ebb9ecSlm66018 	 */
17115f94e909Ssg70180 	if (vsw_prog_if(vswp)) {
17125f94e909Ssg70180 		return;
1713e1ebb9ecSlm66018 	}
1714e1ebb9ecSlm66018 
1715e1ebb9ecSlm66018 	/*
17165f94e909Ssg70180 	 * Next, attempt to set any ports which have not yet been
17175f94e909Ssg70180 	 * programmed into HW.
1718e1ebb9ecSlm66018 	 */
17195f94e909Ssg70180 	if (vsw_prog_ports(vswp)) {
17205f94e909Ssg70180 		return;
1721e1ebb9ecSlm66018 	}
1722e1ebb9ecSlm66018 
17235f94e909Ssg70180 	/*
17245f94e909Ssg70180 	 * By now we know that have programmed all desired ports etc
17255f94e909Ssg70180 	 * into HW, so safe to mark reconfiguration as complete.
17265f94e909Ssg70180 	 */
1727e1ebb9ecSlm66018 	vswp->recfg_reqd = B_FALSE;
1728e1ebb9ecSlm66018 
1729e1ebb9ecSlm66018 	vswp->smode_idx = s_idx;
1730e1ebb9ecSlm66018 
17315f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
17325f94e909Ssg70180 }
17335f94e909Ssg70180 
17345f94e909Ssg70180 /*
17355f94e909Ssg70180  * Check to see if vsw itself is plumbed, and if so whether or not
17365f94e909Ssg70180  * its mac address should be written into HW.
17375f94e909Ssg70180  *
17385f94e909Ssg70180  * Returns 0 if could set address, or didn't have to set it.
17395f94e909Ssg70180  * Returns 1 if failed to set address.
17405f94e909Ssg70180  */
17415f94e909Ssg70180 static int
17425f94e909Ssg70180 vsw_prog_if(vsw_t *vswp)
17435f94e909Ssg70180 {
17445f94e909Ssg70180 	mac_multi_addr_t	addr;
17455f94e909Ssg70180 
17465f94e909Ssg70180 	D1(vswp, "%s: enter", __func__);
17475f94e909Ssg70180 
17485f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
17495f94e909Ssg70180 
17505f94e909Ssg70180 	READ_ENTER(&vswp->if_lockrw);
17515f94e909Ssg70180 	if ((vswp->if_state & VSW_IF_UP) &&
17525f94e909Ssg70180 		(vswp->addr_set != VSW_ADDR_HW)) {
17535f94e909Ssg70180 
17545f94e909Ssg70180 		addr.mma_addrlen = ETHERADDRL;
17555f94e909Ssg70180 		ether_copy(&vswp->if_addr, &addr.mma_addr);
17565f94e909Ssg70180 
17575f94e909Ssg70180 		if (vsw_set_hw_addr(vswp, &addr) != 0) {
17585f94e909Ssg70180 			RW_EXIT(&vswp->if_lockrw);
17595f94e909Ssg70180 			return (1);
17605f94e909Ssg70180 		}
17615f94e909Ssg70180 
17625f94e909Ssg70180 		vswp->addr_slot = addr.mma_slot;
17635f94e909Ssg70180 
17645f94e909Ssg70180 		/*
17655f94e909Ssg70180 		 * If previously when plumbed had had to place
17665f94e909Ssg70180 		 * interface into promisc mode, now reverse that.
17675f94e909Ssg70180 		 *
17685f94e909Ssg70180 		 * Note that interface will only actually be set into
17695f94e909Ssg70180 		 * non-promisc mode when last port/interface has been
17705f94e909Ssg70180 		 * programmed into HW.
17715f94e909Ssg70180 		 */
17725f94e909Ssg70180 		if (vswp->addr_set == VSW_ADDR_PROMISC)
17735f94e909Ssg70180 			(void) vsw_unset_hw_promisc(vswp, NULL, VSW_LOCALDEV);
17745f94e909Ssg70180 
17755f94e909Ssg70180 		vswp->addr_set = VSW_ADDR_HW;
17765f94e909Ssg70180 	}
17775f94e909Ssg70180 	RW_EXIT(&vswp->if_lockrw);
17785f94e909Ssg70180 
17795f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
1780e1ebb9ecSlm66018 	return (0);
1781e1ebb9ecSlm66018 }
1782e1ebb9ecSlm66018 
17835f94e909Ssg70180 /*
17845f94e909Ssg70180  * Scan the port list for any ports which have not yet been set
17855f94e909Ssg70180  * into HW. For those found attempt to program their mac addresses
17865f94e909Ssg70180  * into the physical device.
17875f94e909Ssg70180  *
17885f94e909Ssg70180  * Returns 0 if able to program all required ports (can be 0) into HW.
17895f94e909Ssg70180  * Returns 1 if failed to set at least one mac address.
17905f94e909Ssg70180  */
17915f94e909Ssg70180 static int
17925f94e909Ssg70180 vsw_prog_ports(vsw_t *vswp)
17935f94e909Ssg70180 {
17945f94e909Ssg70180 	mac_multi_addr_t	addr;
17955f94e909Ssg70180 	vsw_port_list_t		*plist = &vswp->plist;
17965f94e909Ssg70180 	vsw_port_t		*tp;
17975f94e909Ssg70180 	int			rv = 0;
17985f94e909Ssg70180 
17995f94e909Ssg70180 	D1(vswp, "%s: enter", __func__);
18005f94e909Ssg70180 
18015f94e909Ssg70180 	ASSERT(MUTEX_HELD(&vswp->hw_lock));
18025f94e909Ssg70180 
18035f94e909Ssg70180 	READ_ENTER(&plist->lockrw);
18045f94e909Ssg70180 	for (tp = plist->head; tp != NULL; tp = tp->p_next) {
18055f94e909Ssg70180 		if (tp->addr_set != VSW_ADDR_HW) {
18065f94e909Ssg70180 			addr.mma_addrlen = ETHERADDRL;
18075f94e909Ssg70180 			ether_copy(&tp->p_macaddr, &addr.mma_addr);
18085f94e909Ssg70180 
18095f94e909Ssg70180 			if (vsw_set_hw_addr(vswp, &addr) != 0) {
18105f94e909Ssg70180 				rv = 1;
18115f94e909Ssg70180 				break;
18125f94e909Ssg70180 			}
18135f94e909Ssg70180 
18145f94e909Ssg70180 			tp->addr_slot = addr.mma_slot;
18155f94e909Ssg70180 
18165f94e909Ssg70180 			/*
18175f94e909Ssg70180 			 * If when this port had first attached we had
18185f94e909Ssg70180 			 * had to place the interface into promisc mode,
18195f94e909Ssg70180 			 * then now reverse that.
18205f94e909Ssg70180 			 *
18215f94e909Ssg70180 			 * Note that the interface will not actually
18225f94e909Ssg70180 			 * change to non-promisc mode until all ports
18235f94e909Ssg70180 			 * have been programmed.
18245f94e909Ssg70180 			 */
18255f94e909Ssg70180 			if (tp->addr_set == VSW_ADDR_PROMISC)
18265f94e909Ssg70180 				(void) vsw_unset_hw_promisc(vswp,
18275f94e909Ssg70180 						tp, VSW_VNETPORT);
18285f94e909Ssg70180 
18295f94e909Ssg70180 			tp->addr_set = VSW_ADDR_HW;
18305f94e909Ssg70180 		}
18315f94e909Ssg70180 	}
18325f94e909Ssg70180 	RW_EXIT(&plist->lockrw);
18335f94e909Ssg70180 
18345f94e909Ssg70180 	D1(vswp, "%s: exit", __func__);
1835e1ebb9ecSlm66018 	return (rv);
18361ae08745Sheppo }
18371ae08745Sheppo 
18387636cb21Slm66018 static void
18397636cb21Slm66018 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp)
18407636cb21Slm66018 {
18417636cb21Slm66018 	ringp->ring_state = VSW_MAC_RING_FREE;
18427636cb21Slm66018 	ringp->ring_arg = NULL;
18437636cb21Slm66018 	ringp->ring_blank = NULL;
18447636cb21Slm66018 	ringp->ring_vqp = NULL;
18457636cb21Slm66018 	ringp->ring_vswp = vswp;
18467636cb21Slm66018 }
18477636cb21Slm66018 
18487636cb21Slm66018 static void
18497636cb21Slm66018 vsw_mac_ring_tbl_init(vsw_t *vswp)
18507636cb21Slm66018 {
18517636cb21Slm66018 	int		i;
18527636cb21Slm66018 
18537636cb21Slm66018 	mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL);
18547636cb21Slm66018 
18557636cb21Slm66018 	vswp->mac_ring_tbl_sz = vsw_mac_rx_rings;
18567636cb21Slm66018 	vswp->mac_ring_tbl  =
18577636cb21Slm66018 		kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t),
18587636cb21Slm66018 		KM_SLEEP);
18597636cb21Slm66018 
18607636cb21Slm66018 	for (i = 0; i < vswp->mac_ring_tbl_sz; i++)
18617636cb21Slm66018 		vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]);
18627636cb21Slm66018 }
18637636cb21Slm66018 
18647636cb21Slm66018 static void
18657636cb21Slm66018 vsw_mac_ring_tbl_destroy(vsw_t *vswp)
18667636cb21Slm66018 {
18677636cb21Slm66018 	int		i;
186834683adeSsg70180 	vsw_mac_ring_t	*ringp;
18697636cb21Slm66018 
18707636cb21Slm66018 	mutex_enter(&vswp->mac_ring_lock);
18717636cb21Slm66018 	for (i = 0; i < vswp->mac_ring_tbl_sz; i++) {
187234683adeSsg70180 		ringp = &vswp->mac_ring_tbl[i];
187334683adeSsg70180 
187434683adeSsg70180 		if (ringp->ring_state != VSW_MAC_RING_FREE) {
18757636cb21Slm66018 			/*
18767636cb21Slm66018 			 * Destroy the queue.
18777636cb21Slm66018 			 */
187834683adeSsg70180 			vsw_queue_stop(ringp->ring_vqp);
187934683adeSsg70180 			vsw_queue_destroy(ringp->ring_vqp);
18807636cb21Slm66018 
18817636cb21Slm66018 			/*
18827636cb21Slm66018 			 * Re-initialize the structure.
18837636cb21Slm66018 			 */
188434683adeSsg70180 			vsw_mac_ring_tbl_entry_init(vswp, ringp);
18857636cb21Slm66018 		}
18867636cb21Slm66018 	}
18877636cb21Slm66018 	mutex_exit(&vswp->mac_ring_lock);
18887636cb21Slm66018 
18897636cb21Slm66018 	mutex_destroy(&vswp->mac_ring_lock);
18907636cb21Slm66018 	kmem_free(vswp->mac_ring_tbl,
18917636cb21Slm66018 		vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t));
18927636cb21Slm66018 	vswp->mac_ring_tbl_sz = 0;
18937636cb21Slm66018 }
18947636cb21Slm66018 
18957636cb21Slm66018 /*
18967636cb21Slm66018  * Handle resource add callbacks from the driver below.
18977636cb21Slm66018  */
18987636cb21Slm66018 static mac_resource_handle_t
18997636cb21Slm66018 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp)
19007636cb21Slm66018 {
19017636cb21Slm66018 	vsw_t		*vswp = (vsw_t *)arg;
19027636cb21Slm66018 	mac_rx_fifo_t	*mrfp = (mac_rx_fifo_t *)mrp;
19037636cb21Slm66018 	vsw_mac_ring_t	*ringp;
19047636cb21Slm66018 	vsw_queue_t	*vqp;
19057636cb21Slm66018 	int		i;
19067636cb21Slm66018 
19077636cb21Slm66018 	ASSERT(vswp != NULL);
19087636cb21Slm66018 	ASSERT(mrp != NULL);
19097636cb21Slm66018 	ASSERT(vswp->mac_ring_tbl != NULL);
19107636cb21Slm66018 
19117636cb21Slm66018 	D1(vswp, "%s: enter", __func__);
19127636cb21Slm66018 
19137636cb21Slm66018 	/*
19147636cb21Slm66018 	 * Check to make sure we have the correct resource type.
19157636cb21Slm66018 	 */
19167636cb21Slm66018 	if (mrp->mr_type != MAC_RX_FIFO)
19177636cb21Slm66018 		return (NULL);
19187636cb21Slm66018 
19197636cb21Slm66018 	/*
19207636cb21Slm66018 	 * Find a open entry in the ring table.
19217636cb21Slm66018 	 */
19227636cb21Slm66018 	mutex_enter(&vswp->mac_ring_lock);
19237636cb21Slm66018 	for (i = 0; i < vswp->mac_ring_tbl_sz; i++) {
19247636cb21Slm66018 		ringp = &vswp->mac_ring_tbl[i];
19257636cb21Slm66018 
19267636cb21Slm66018 		/*
19277636cb21Slm66018 		 * Check for an empty slot, if found, then setup queue
19287636cb21Slm66018 		 * and thread.
19297636cb21Slm66018 		 */
19307636cb21Slm66018 		if (ringp->ring_state == VSW_MAC_RING_FREE) {
19317636cb21Slm66018 			/*
19327636cb21Slm66018 			 * Create the queue for this ring.
19337636cb21Slm66018 			 */
19347636cb21Slm66018 			vqp = vsw_queue_create();
19357636cb21Slm66018 
19367636cb21Slm66018 			/*
19377636cb21Slm66018 			 * Initialize the ring data structure.
19387636cb21Slm66018 			 */
19397636cb21Slm66018 			ringp->ring_vqp = vqp;
19407636cb21Slm66018 			ringp->ring_arg = mrfp->mrf_arg;
19417636cb21Slm66018 			ringp->ring_blank = mrfp->mrf_blank;
19427636cb21Slm66018 			ringp->ring_state = VSW_MAC_RING_INUSE;
19437636cb21Slm66018 
19447636cb21Slm66018 			/*
19457636cb21Slm66018 			 * Create the worker thread.
19467636cb21Slm66018 			 */
19477636cb21Slm66018 			vqp->vq_worker = thread_create(NULL, 0,
19487636cb21Slm66018 				vsw_queue_worker, ringp, 0, &p0,
19497636cb21Slm66018 				TS_RUN, minclsyspri);
19507636cb21Slm66018 			if (vqp->vq_worker == NULL) {
19517636cb21Slm66018 				vsw_queue_destroy(vqp);
19527636cb21Slm66018 				vsw_mac_ring_tbl_entry_init(vswp, ringp);
19537636cb21Slm66018 				ringp = NULL;
19547636cb21Slm66018 			}
19557636cb21Slm66018 
195634683adeSsg70180 			if (ringp != NULL) {
195734683adeSsg70180 				/*
195834683adeSsg70180 				 * Make sure thread get's running state for
195934683adeSsg70180 				 * this ring.
196034683adeSsg70180 				 */
196134683adeSsg70180 				mutex_enter(&vqp->vq_lock);
196234683adeSsg70180 				while ((vqp->vq_state != VSW_QUEUE_RUNNING) &&
196334683adeSsg70180 					(vqp->vq_state != VSW_QUEUE_DRAINED)) {
196434683adeSsg70180 					cv_wait(&vqp->vq_cv, &vqp->vq_lock);
196534683adeSsg70180 				}
196634683adeSsg70180 
196734683adeSsg70180 				/*
196834683adeSsg70180 				 * If the thread is not running, cleanup.
196934683adeSsg70180 				 */
197034683adeSsg70180 				if (vqp->vq_state == VSW_QUEUE_DRAINED) {
197134683adeSsg70180 					vsw_queue_destroy(vqp);
197234683adeSsg70180 					vsw_mac_ring_tbl_entry_init(vswp,
197334683adeSsg70180 						ringp);
197434683adeSsg70180 					ringp = NULL;
197534683adeSsg70180 				}
197634683adeSsg70180 				mutex_exit(&vqp->vq_lock);
197734683adeSsg70180 			}
197834683adeSsg70180 
19797636cb21Slm66018 			mutex_exit(&vswp->mac_ring_lock);
19807636cb21Slm66018 			D1(vswp, "%s: exit", __func__);
19817636cb21Slm66018 			return ((mac_resource_handle_t)ringp);
19827636cb21Slm66018 		}
19837636cb21Slm66018 	}
19847636cb21Slm66018 	mutex_exit(&vswp->mac_ring_lock);
19857636cb21Slm66018 
19867636cb21Slm66018 	/*
19877636cb21Slm66018 	 * No slots in the ring table available.
19887636cb21Slm66018 	 */
19897636cb21Slm66018 	D1(vswp, "%s: exit", __func__);
19907636cb21Slm66018 	return (NULL);
19917636cb21Slm66018 }
19927636cb21Slm66018 
19937636cb21Slm66018 static void
19947636cb21Slm66018 vsw_queue_stop(vsw_queue_t *vqp)
19957636cb21Slm66018 {
19967636cb21Slm66018 	mutex_enter(&vqp->vq_lock);
19977636cb21Slm66018 
19987636cb21Slm66018 	if (vqp->vq_state == VSW_QUEUE_RUNNING) {
19997636cb21Slm66018 		vqp->vq_state = VSW_QUEUE_STOP;
20007636cb21Slm66018 		cv_signal(&vqp->vq_cv);
20017636cb21Slm66018 
20027636cb21Slm66018 		while (vqp->vq_state != VSW_QUEUE_DRAINED)
20037636cb21Slm66018 			cv_wait(&vqp->vq_cv, &vqp->vq_lock);
20047636cb21Slm66018 	}
20057636cb21Slm66018 
200634683adeSsg70180 	vqp->vq_state = VSW_QUEUE_STOPPED;
200734683adeSsg70180 
20087636cb21Slm66018 	mutex_exit(&vqp->vq_lock);
20097636cb21Slm66018 }
20107636cb21Slm66018 
20117636cb21Slm66018 static vsw_queue_t *
20127636cb21Slm66018 vsw_queue_create()
20137636cb21Slm66018 {
20147636cb21Slm66018 	vsw_queue_t *vqp;
20157636cb21Slm66018 
20167636cb21Slm66018 	vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP);
20177636cb21Slm66018 
20187636cb21Slm66018 	mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL);
20197636cb21Slm66018 	cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL);
20207636cb21Slm66018 	vqp->vq_first = NULL;
20217636cb21Slm66018 	vqp->vq_last = NULL;
202234683adeSsg70180 	vqp->vq_state = VSW_QUEUE_STOPPED;
20237636cb21Slm66018 
20247636cb21Slm66018 	return (vqp);
20257636cb21Slm66018 }
20267636cb21Slm66018 
20277636cb21Slm66018 static void
20287636cb21Slm66018 vsw_queue_destroy(vsw_queue_t *vqp)
20297636cb21Slm66018 {
20307636cb21Slm66018 	cv_destroy(&vqp->vq_cv);
20317636cb21Slm66018 	mutex_destroy(&vqp->vq_lock);
20327636cb21Slm66018 	kmem_free(vqp, sizeof (vsw_queue_t));
20337636cb21Slm66018 }
20347636cb21Slm66018 
20357636cb21Slm66018 static void
20367636cb21Slm66018 vsw_queue_worker(vsw_mac_ring_t *rrp)
20377636cb21Slm66018 {
20387636cb21Slm66018 	mblk_t		*mp;
20397636cb21Slm66018 	vsw_queue_t	*vqp = rrp->ring_vqp;
20407636cb21Slm66018 	vsw_t		*vswp = rrp->ring_vswp;
20417636cb21Slm66018 
20427636cb21Slm66018 	mutex_enter(&vqp->vq_lock);
20437636cb21Slm66018 
204434683adeSsg70180 	ASSERT(vqp->vq_state == VSW_QUEUE_STOPPED);
20457636cb21Slm66018 
20467636cb21Slm66018 	/*
20477636cb21Slm66018 	 * Set the state to running, since the thread is now active.
20487636cb21Slm66018 	 */
20497636cb21Slm66018 	vqp->vq_state = VSW_QUEUE_RUNNING;
205034683adeSsg70180 	cv_signal(&vqp->vq_cv);
20517636cb21Slm66018 
20527636cb21Slm66018 	while (vqp->vq_state == VSW_QUEUE_RUNNING) {
20537636cb21Slm66018 		/*
20547636cb21Slm66018 		 * Wait for work to do or the state has changed
20557636cb21Slm66018 		 * to not running.
20567636cb21Slm66018 		 */
20577636cb21Slm66018 		while ((vqp->vq_state == VSW_QUEUE_RUNNING) &&
20587636cb21Slm66018 				(vqp->vq_first == NULL)) {
20597636cb21Slm66018 			cv_wait(&vqp->vq_cv, &vqp->vq_lock);
20607636cb21Slm66018 		}
20617636cb21Slm66018 
20627636cb21Slm66018 		/*
20637636cb21Slm66018 		 * Process packets that we received from the interface.
20647636cb21Slm66018 		 */
20657636cb21Slm66018 		if (vqp->vq_first != NULL) {
20667636cb21Slm66018 			mp = vqp->vq_first;
20677636cb21Slm66018 
20687636cb21Slm66018 			vqp->vq_first = NULL;
20697636cb21Slm66018 			vqp->vq_last = NULL;
20707636cb21Slm66018 
20717636cb21Slm66018 			mutex_exit(&vqp->vq_lock);
20727636cb21Slm66018 
20737636cb21Slm66018 			/* switch the chain of packets received */
207434683adeSsg70180 			vswp->vsw_switch_frame(vswp, mp,
207534683adeSsg70180 						VSW_PHYSDEV, NULL, NULL);
20767636cb21Slm66018 
20777636cb21Slm66018 			mutex_enter(&vqp->vq_lock);
20787636cb21Slm66018 		}
20797636cb21Slm66018 	}
20807636cb21Slm66018 
20817636cb21Slm66018 	/*
20827636cb21Slm66018 	 * We are drained and signal we are done.
20837636cb21Slm66018 	 */
20847636cb21Slm66018 	vqp->vq_state = VSW_QUEUE_DRAINED;
20857636cb21Slm66018 	cv_signal(&vqp->vq_cv);
20867636cb21Slm66018 
20877636cb21Slm66018 	/*
20887636cb21Slm66018 	 * Exit lock and drain the remaining packets.
20897636cb21Slm66018 	 */
20907636cb21Slm66018 	mutex_exit(&vqp->vq_lock);
20917636cb21Slm66018 
20927636cb21Slm66018 	/*
20937636cb21Slm66018 	 * Exit the thread
20947636cb21Slm66018 	 */
20957636cb21Slm66018 	thread_exit();
20967636cb21Slm66018 }
20977636cb21Slm66018 
20987636cb21Slm66018 /*
20997636cb21Slm66018  * static void
21007636cb21Slm66018  * vsw_rx_queue_cb() - Receive callback routine when
21017636cb21Slm66018  *	vsw_multi_ring_enable is non-zero.  Queue the packets
21027636cb21Slm66018  *	to a packet queue for a worker thread to process.
21037636cb21Slm66018  */
21047636cb21Slm66018 static void
21057636cb21Slm66018 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
21067636cb21Slm66018 {
21077636cb21Slm66018 	vsw_mac_ring_t	*ringp = (vsw_mac_ring_t *)mrh;
21087636cb21Slm66018 	vsw_t		*vswp = (vsw_t *)arg;
21097636cb21Slm66018 	vsw_queue_t	*vqp;
21107636cb21Slm66018 	mblk_t		*bp, *last;
21117636cb21Slm66018 
21127636cb21Slm66018 	ASSERT(mrh != NULL);
21137636cb21Slm66018 	ASSERT(vswp != NULL);
21147636cb21Slm66018 	ASSERT(mp != NULL);
21157636cb21Slm66018 
21167636cb21Slm66018 	D1(vswp, "%s: enter", __func__);
21177636cb21Slm66018 
21187636cb21Slm66018 	/*
21197636cb21Slm66018 	 * Find the last element in the mblk chain.
21207636cb21Slm66018 	 */
21217636cb21Slm66018 	bp = mp;
21227636cb21Slm66018 	do {
21237636cb21Slm66018 		last = bp;
21247636cb21Slm66018 		bp = bp->b_next;
21257636cb21Slm66018 	} while (bp != NULL);
21267636cb21Slm66018 
21277636cb21Slm66018 	/* Get the queue for the packets */
21287636cb21Slm66018 	vqp = ringp->ring_vqp;
21297636cb21Slm66018 
21307636cb21Slm66018 	/*
21317636cb21Slm66018 	 * Grab the lock such we can queue the packets.
21327636cb21Slm66018 	 */
21337636cb21Slm66018 	mutex_enter(&vqp->vq_lock);
21347636cb21Slm66018 
21357636cb21Slm66018 	if (vqp->vq_state != VSW_QUEUE_RUNNING) {
21367636cb21Slm66018 		freemsg(mp);
213734683adeSsg70180 		mutex_exit(&vqp->vq_lock);
21387636cb21Slm66018 		goto vsw_rx_queue_cb_exit;
21397636cb21Slm66018 	}
21407636cb21Slm66018 
21417636cb21Slm66018 	/*
21427636cb21Slm66018 	 * Add the mblk chain to the queue.  If there
21437636cb21Slm66018 	 * is some mblks in the queue, then add the new
21447636cb21Slm66018 	 * chain to the end.
21457636cb21Slm66018 	 */
21467636cb21Slm66018 	if (vqp->vq_first == NULL)
21477636cb21Slm66018 		vqp->vq_first = mp;
21487636cb21Slm66018 	else
21497636cb21Slm66018 		vqp->vq_last->b_next = mp;
21507636cb21Slm66018 
21517636cb21Slm66018 	vqp->vq_last = last;
21527636cb21Slm66018 
21537636cb21Slm66018 	/*
21547636cb21Slm66018 	 * Signal the worker thread that there is work to
21557636cb21Slm66018 	 * do.
21567636cb21Slm66018 	 */
21577636cb21Slm66018 	cv_signal(&vqp->vq_cv);
21587636cb21Slm66018 
21597636cb21Slm66018 	/*
21607636cb21Slm66018 	 * Let go of the lock and exit.
21617636cb21Slm66018 	 */
21627636cb21Slm66018 	mutex_exit(&vqp->vq_lock);
216334683adeSsg70180 
216434683adeSsg70180 vsw_rx_queue_cb_exit:
21657636cb21Slm66018 	D1(vswp, "%s: exit", __func__);
21667636cb21Slm66018 }
21677636cb21Slm66018 
21681ae08745Sheppo /*
21691ae08745Sheppo  * receive callback routine. Invoked by MAC layer when there
21701ae08745Sheppo  * are pkts being passed up from physical device.
21711ae08745Sheppo  *
21721ae08745Sheppo  * PERF: It may be more efficient when the card is in promisc
21731ae08745Sheppo  * mode to check the dest address of the pkts here (against
21741ae08745Sheppo  * the FDB) rather than checking later. Needs to be investigated.
21751ae08745Sheppo  */
21761ae08745Sheppo static void
21771ae08745Sheppo vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
21781ae08745Sheppo {
21791ae08745Sheppo 	_NOTE(ARGUNUSED(mrh))
21801ae08745Sheppo 
21811ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
21821ae08745Sheppo 
21831ae08745Sheppo 	ASSERT(vswp != NULL);
21841ae08745Sheppo 
21851ae08745Sheppo 	D1(vswp, "vsw_rx_cb: enter");
21861ae08745Sheppo 
21871ae08745Sheppo 	/* switch the chain of packets received */
218834683adeSsg70180 	vswp->vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL);
21891ae08745Sheppo 
21901ae08745Sheppo 	D1(vswp, "vsw_rx_cb: exit");
21911ae08745Sheppo }
21921ae08745Sheppo 
21931ae08745Sheppo /*
21941ae08745Sheppo  * Send a message out over the physical device via the MAC layer.
21951ae08745Sheppo  *
21961ae08745Sheppo  * Returns any mblks that it was unable to transmit.
21971ae08745Sheppo  */
21981ae08745Sheppo static mblk_t *
21991ae08745Sheppo vsw_tx_msg(vsw_t *vswp, mblk_t *mp)
22001ae08745Sheppo {
22011ae08745Sheppo 	const mac_txinfo_t	*mtp;
22021ae08745Sheppo 	mblk_t			*nextp;
22031ae08745Sheppo 
220434683adeSsg70180 	mutex_enter(&vswp->mac_lock);
22051ae08745Sheppo 	if (vswp->mh == NULL) {
22061ae08745Sheppo 		DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail");
220734683adeSsg70180 		mutex_exit(&vswp->mac_lock);
22081ae08745Sheppo 		return (mp);
22091ae08745Sheppo 	} else {
22101ae08745Sheppo 		for (;;) {
22111ae08745Sheppo 			nextp = mp->b_next;
22121ae08745Sheppo 			mp->b_next = NULL;
22131ae08745Sheppo 
22141ae08745Sheppo 			mtp = vswp->txinfo;
221534683adeSsg70180 
22161ae08745Sheppo 			if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) {
22171ae08745Sheppo 				mp->b_next = nextp;
22181ae08745Sheppo 				break;
22191ae08745Sheppo 			}
22201ae08745Sheppo 
22211ae08745Sheppo 			if ((mp = nextp) == NULL)
22221ae08745Sheppo 				break;
22231ae08745Sheppo 		}
22241ae08745Sheppo 	}
222534683adeSsg70180 	mutex_exit(&vswp->mac_lock);
22261ae08745Sheppo 
22271ae08745Sheppo 	return (mp);
22281ae08745Sheppo }
22291ae08745Sheppo 
22301ae08745Sheppo /*
22311ae08745Sheppo  * Register with the MAC layer as a network device, so we
22321ae08745Sheppo  * can be plumbed if necessary.
22331ae08745Sheppo  */
22341ae08745Sheppo static int
22351ae08745Sheppo vsw_mac_register(vsw_t *vswp)
22361ae08745Sheppo {
2237ba2e4443Sseb 	mac_register_t	*macp;
2238ba2e4443Sseb 	int		rv;
22391ae08745Sheppo 
22401ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
22411ae08745Sheppo 
2242ba2e4443Sseb 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
2243ba2e4443Sseb 		return (EINVAL);
2244ba2e4443Sseb 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
22451ae08745Sheppo 	macp->m_driver = vswp;
2246ba2e4443Sseb 	macp->m_dip = vswp->dip;
2247ba2e4443Sseb 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
2248ba2e4443Sseb 	macp->m_callbacks = &vsw_m_callbacks;
2249ba2e4443Sseb 	macp->m_min_sdu = 0;
2250ba2e4443Sseb 	macp->m_max_sdu = ETHERMTU;
2251ba2e4443Sseb 	rv = mac_register(macp, &vswp->if_mh);
2252ba2e4443Sseb 	mac_free(macp);
2253ba2e4443Sseb 	if (rv == 0)
2254ba2e4443Sseb 		vswp->if_state |= VSW_IF_REG;
22551ae08745Sheppo 
22561ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
22571ae08745Sheppo 
22581ae08745Sheppo 	return (rv);
22591ae08745Sheppo }
22601ae08745Sheppo 
22611ae08745Sheppo static int
22621ae08745Sheppo vsw_mac_unregister(vsw_t *vswp)
22631ae08745Sheppo {
22641ae08745Sheppo 	int		rv = 0;
22651ae08745Sheppo 
22661ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
22671ae08745Sheppo 
22681ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
22691ae08745Sheppo 
2270ba2e4443Sseb 	if (vswp->if_state & VSW_IF_REG) {
2271ba2e4443Sseb 		rv = mac_unregister(vswp->if_mh);
22721ae08745Sheppo 		if (rv != 0) {
22731ae08745Sheppo 			DWARN(vswp, "%s: unable to unregister from MAC "
22741ae08745Sheppo 				"framework", __func__);
22751ae08745Sheppo 
22761ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
22771ae08745Sheppo 			D1(vswp, "%s: fail exit", __func__);
22781ae08745Sheppo 			return (rv);
22791ae08745Sheppo 		}
22801ae08745Sheppo 
2281ba2e4443Sseb 		/* mark i/f as down and unregistered */
2282ba2e4443Sseb 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
22831ae08745Sheppo 	}
22841ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
22851ae08745Sheppo 
22861ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
22871ae08745Sheppo 
22881ae08745Sheppo 	return (rv);
22891ae08745Sheppo }
22901ae08745Sheppo 
2291ba2e4443Sseb static int
2292ba2e4443Sseb vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
22931ae08745Sheppo {
22941ae08745Sheppo 	vsw_t			*vswp = (vsw_t *)arg;
22951ae08745Sheppo 
22961ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
22971ae08745Sheppo 
229834683adeSsg70180 	mutex_enter(&vswp->mac_lock);
229934683adeSsg70180 	if (vswp->mh == NULL) {
230034683adeSsg70180 		mutex_exit(&vswp->mac_lock);
2301ba2e4443Sseb 		return (EINVAL);
230234683adeSsg70180 	}
23031ae08745Sheppo 
23041ae08745Sheppo 	/* return stats from underlying device */
2305ba2e4443Sseb 	*val = mac_stat_get(vswp->mh, stat);
230634683adeSsg70180 
230734683adeSsg70180 	mutex_exit(&vswp->mac_lock);
230834683adeSsg70180 
2309ba2e4443Sseb 	return (0);
23101ae08745Sheppo }
23111ae08745Sheppo 
23121ae08745Sheppo static void
23131ae08745Sheppo vsw_m_stop(void *arg)
23141ae08745Sheppo {
23151ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
23161ae08745Sheppo 
23171ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
23181ae08745Sheppo 
23191ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
23201ae08745Sheppo 	vswp->if_state &= ~VSW_IF_UP;
23211ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
23221ae08745Sheppo 
23235f94e909Ssg70180 	mutex_enter(&vswp->hw_lock);
23245f94e909Ssg70180 
23255f94e909Ssg70180 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
23265f94e909Ssg70180 
23275f94e909Ssg70180 	if (vswp->recfg_reqd)
23285f94e909Ssg70180 		vsw_reconfig_hw(vswp);
23295f94e909Ssg70180 
23305f94e909Ssg70180 	mutex_exit(&vswp->hw_lock);
23315f94e909Ssg70180 
23321ae08745Sheppo 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
23331ae08745Sheppo }
23341ae08745Sheppo 
23351ae08745Sheppo static int
23361ae08745Sheppo vsw_m_start(void *arg)
23371ae08745Sheppo {
23381ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
23391ae08745Sheppo 
23401ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
23411ae08745Sheppo 
23421ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
23431ae08745Sheppo 	vswp->if_state |= VSW_IF_UP;
23441ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
23451ae08745Sheppo 
23465f94e909Ssg70180 	mutex_enter(&vswp->hw_lock);
23475f94e909Ssg70180 	(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
23485f94e909Ssg70180 	mutex_exit(&vswp->hw_lock);
23495f94e909Ssg70180 
23501ae08745Sheppo 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
23511ae08745Sheppo 	return (0);
23521ae08745Sheppo }
23531ae08745Sheppo 
23541ae08745Sheppo /*
23551ae08745Sheppo  * Change the local interface address.
23565f94e909Ssg70180  *
23575f94e909Ssg70180  * Note: we don't support this entry point. The local
23585f94e909Ssg70180  * mac address of the switch can only be changed via its
23595f94e909Ssg70180  * MD node properties.
23601ae08745Sheppo  */
23611ae08745Sheppo static int
23621ae08745Sheppo vsw_m_unicst(void *arg, const uint8_t *macaddr)
23631ae08745Sheppo {
23645f94e909Ssg70180 	_NOTE(ARGUNUSED(arg, macaddr))
23651ae08745Sheppo 
23665f94e909Ssg70180 	return (DDI_FAILURE);
23671ae08745Sheppo }
23681ae08745Sheppo 
23691ae08745Sheppo static int
23701ae08745Sheppo vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
23711ae08745Sheppo {
23721ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
23731ae08745Sheppo 	mcst_addr_t	*mcst_p = NULL;
23741ae08745Sheppo 	uint64_t	addr = 0x0;
2375e1ebb9ecSlm66018 	int		i, ret = 0;
23761ae08745Sheppo 
23771ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
23781ae08745Sheppo 
23791ae08745Sheppo 	/*
23801ae08745Sheppo 	 * Convert address into form that can be used
23811ae08745Sheppo 	 * as hash table key.
23821ae08745Sheppo 	 */
23831ae08745Sheppo 	for (i = 0; i < ETHERADDRL; i++) {
23841ae08745Sheppo 		addr = (addr << 8) | mca[i];
23851ae08745Sheppo 	}
23861ae08745Sheppo 
23871ae08745Sheppo 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
23881ae08745Sheppo 
23891ae08745Sheppo 	if (add) {
23901ae08745Sheppo 		D2(vswp, "%s: adding multicast", __func__);
23911ae08745Sheppo 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
23921ae08745Sheppo 			/*
23931ae08745Sheppo 			 * Update the list of multicast addresses
23941ae08745Sheppo 			 * contained within the vsw_t structure to
23951ae08745Sheppo 			 * include this new one.
23961ae08745Sheppo 			 */
23971ae08745Sheppo 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
23981ae08745Sheppo 			if (mcst_p == NULL) {
23991ae08745Sheppo 				DERR(vswp, "%s unable to alloc mem", __func__);
24001ae08745Sheppo 				return (1);
24011ae08745Sheppo 			}
24021ae08745Sheppo 			mcst_p->addr = addr;
24031ae08745Sheppo 
24041ae08745Sheppo 			mutex_enter(&vswp->mca_lock);
24051ae08745Sheppo 			mcst_p->nextp = vswp->mcap;
24061ae08745Sheppo 			vswp->mcap = mcst_p;
24071ae08745Sheppo 			mutex_exit(&vswp->mca_lock);
24081ae08745Sheppo 
24091ae08745Sheppo 			/*
24101ae08745Sheppo 			 * Call into the underlying driver to program the
24111ae08745Sheppo 			 * address into HW.
24121ae08745Sheppo 			 */
241334683adeSsg70180 			mutex_enter(&vswp->mac_lock);
2414e1ebb9ecSlm66018 			if (vswp->mh != NULL) {
2415e1ebb9ecSlm66018 				ret = mac_multicst_add(vswp->mh, mca);
2416e1ebb9ecSlm66018 				if (ret != 0) {
241734683adeSsg70180 					cmn_err(CE_WARN, "!vsw%d: unable to "
241834683adeSsg70180 						"add multicast address",
241934683adeSsg70180 						vswp->instance);
242034683adeSsg70180 					mutex_exit(&vswp->mac_lock);
2421e1ebb9ecSlm66018 					goto vsw_remove_addr;
2422e1ebb9ecSlm66018 				}
24231ae08745Sheppo 			}
242434683adeSsg70180 			mutex_exit(&vswp->mac_lock);
24251ae08745Sheppo 		} else {
242634683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
242734683adeSsg70180 				"address", vswp->instance);
2428e1ebb9ecSlm66018 		}
2429e1ebb9ecSlm66018 		return (ret);
2430e1ebb9ecSlm66018 	}
2431e1ebb9ecSlm66018 
2432e1ebb9ecSlm66018 vsw_remove_addr:
2433e1ebb9ecSlm66018 
24341ae08745Sheppo 	D2(vswp, "%s: removing multicast", __func__);
24351ae08745Sheppo 	/*
24361ae08745Sheppo 	 * Remove the address from the hash table..
24371ae08745Sheppo 	 */
24381ae08745Sheppo 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
24391ae08745Sheppo 
24401ae08745Sheppo 		/*
24411ae08745Sheppo 		 * ..and then from the list maintained in the
24421ae08745Sheppo 		 * vsw_t structure.
24431ae08745Sheppo 		 */
24441ae08745Sheppo 		vsw_del_addr(VSW_LOCALDEV, vswp, addr);
24451ae08745Sheppo 
244634683adeSsg70180 		mutex_enter(&vswp->mac_lock);
24471ae08745Sheppo 		if (vswp->mh != NULL)
24481ae08745Sheppo 			(void) mac_multicst_remove(vswp->mh, mca);
244934683adeSsg70180 		mutex_exit(&vswp->mac_lock);
24501ae08745Sheppo 	}
24511ae08745Sheppo 
24521ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
24531ae08745Sheppo 
24541ae08745Sheppo 	return (0);
24551ae08745Sheppo }
24561ae08745Sheppo 
24571ae08745Sheppo static int
24581ae08745Sheppo vsw_m_promisc(void *arg, boolean_t on)
24591ae08745Sheppo {
24601ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
24611ae08745Sheppo 
24621ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
24631ae08745Sheppo 
24641ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
24651ae08745Sheppo 	if (on)
24661ae08745Sheppo 		vswp->if_state |= VSW_IF_PROMISC;
24671ae08745Sheppo 	else
24681ae08745Sheppo 		vswp->if_state &= ~VSW_IF_PROMISC;
24691ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
24701ae08745Sheppo 
24711ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
24721ae08745Sheppo 
24731ae08745Sheppo 	return (0);
24741ae08745Sheppo }
24751ae08745Sheppo 
24761ae08745Sheppo static mblk_t *
24771ae08745Sheppo vsw_m_tx(void *arg, mblk_t *mp)
24781ae08745Sheppo {
24791ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
24801ae08745Sheppo 
24811ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
24821ae08745Sheppo 
248334683adeSsg70180 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
24841ae08745Sheppo 
24851ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
24861ae08745Sheppo 
24871ae08745Sheppo 	return (NULL);
24881ae08745Sheppo }
24891ae08745Sheppo 
24901ae08745Sheppo /*
24911ae08745Sheppo  * Register for machine description (MD) updates.
249234683adeSsg70180  *
249334683adeSsg70180  * Returns 0 on success, 1 on failure.
24941ae08745Sheppo  */
249534683adeSsg70180 static int
24961ae08745Sheppo vsw_mdeg_register(vsw_t *vswp)
24971ae08745Sheppo {
24981ae08745Sheppo 	mdeg_prop_spec_t	*pspecp;
24991ae08745Sheppo 	mdeg_node_spec_t	*inst_specp;
250034683adeSsg70180 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
25011ae08745Sheppo 	size_t			templatesz;
25021ae08745Sheppo 	int			inst, rv;
25031ae08745Sheppo 
25041ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
25051ae08745Sheppo 
250634683adeSsg70180 	/*
250734683adeSsg70180 	 * In each 'virtual-device' node in the MD there is a
250834683adeSsg70180 	 * 'cfg-handle' property which is the MD's concept of
250934683adeSsg70180 	 * an instance number (this may be completely different from
251034683adeSsg70180 	 * the device drivers instance #). OBP reads that value and
251134683adeSsg70180 	 * stores it in the 'reg' property of the appropriate node in
251234683adeSsg70180 	 * the device tree. So we use the 'reg' value when registering
251334683adeSsg70180 	 * with the mdeg framework, to ensure we get events for the
251434683adeSsg70180 	 * correct nodes.
251534683adeSsg70180 	 */
25161ae08745Sheppo 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
25171ae08745Sheppo 		DDI_PROP_DONTPASS, reg_propname, -1);
25181ae08745Sheppo 	if (inst == -1) {
251934683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from "
252034683adeSsg70180 			"OBP device tree", vswp->instance, reg_propname);
252134683adeSsg70180 		return (1);
25221ae08745Sheppo 	}
25231ae08745Sheppo 
25241ae08745Sheppo 	D2(vswp, "%s: instance %d registering with mdeg", __func__, inst);
25251ae08745Sheppo 
25261ae08745Sheppo 	/*
25271ae08745Sheppo 	 * Allocate and initialize a per-instance copy
25281ae08745Sheppo 	 * of the global property spec array that will
25291ae08745Sheppo 	 * uniquely identify this vsw instance.
25301ae08745Sheppo 	 */
25311ae08745Sheppo 	templatesz = sizeof (vsw_prop_template);
25321ae08745Sheppo 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
25331ae08745Sheppo 
25341ae08745Sheppo 	bcopy(vsw_prop_template, pspecp, templatesz);
25351ae08745Sheppo 
25361ae08745Sheppo 	VSW_SET_MDEG_PROP_INST(pspecp, inst);
25371ae08745Sheppo 
25381ae08745Sheppo 	/* initialize the complete prop spec structure */
25391ae08745Sheppo 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
25401ae08745Sheppo 	inst_specp->namep = "virtual-device";
25411ae08745Sheppo 	inst_specp->specp = pspecp;
25421ae08745Sheppo 
254334683adeSsg70180 	/*
254434683adeSsg70180 	 * Register an interest in 'virtual-device' nodes with a
254534683adeSsg70180 	 * 'name' property of 'virtual-network-switch'
254634683adeSsg70180 	 */
254734683adeSsg70180 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
25481ae08745Sheppo 	    (void *)vswp, &mdeg_hdl);
254934683adeSsg70180 	if (rv != MDEG_SUCCESS) {
255034683adeSsg70180 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
255134683adeSsg70180 			__func__, rv);
255234683adeSsg70180 		goto mdeg_reg_fail;
255334683adeSsg70180 	}
25541ae08745Sheppo 
255534683adeSsg70180 	/*
255634683adeSsg70180 	 * Register an interest in 'vsw-port' nodes.
255734683adeSsg70180 	 */
255834683adeSsg70180 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
255934683adeSsg70180 	    (void *)vswp, &mdeg_port_hdl);
25601ae08745Sheppo 	if (rv != MDEG_SUCCESS) {
25611ae08745Sheppo 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
256234683adeSsg70180 		(void) mdeg_unregister(mdeg_hdl);
256334683adeSsg70180 		goto mdeg_reg_fail;
25641ae08745Sheppo 	}
25651ae08745Sheppo 
25661ae08745Sheppo 	/* save off data that will be needed later */
25671ae08745Sheppo 	vswp->inst_spec = inst_specp;
25681ae08745Sheppo 	vswp->mdeg_hdl = mdeg_hdl;
256934683adeSsg70180 	vswp->mdeg_port_hdl = mdeg_port_hdl;
25701ae08745Sheppo 
25711ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
257234683adeSsg70180 	return (0);
257334683adeSsg70180 
257434683adeSsg70180 mdeg_reg_fail:
257534683adeSsg70180 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
257634683adeSsg70180 				vswp->instance);
257734683adeSsg70180 	kmem_free(pspecp, templatesz);
257834683adeSsg70180 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
257934683adeSsg70180 
258034683adeSsg70180 	vswp->mdeg_hdl = NULL;
258134683adeSsg70180 	vswp->mdeg_port_hdl = NULL;
258234683adeSsg70180 
258334683adeSsg70180 	return (1);
25841ae08745Sheppo }
25851ae08745Sheppo 
25861ae08745Sheppo static void
25871ae08745Sheppo vsw_mdeg_unregister(vsw_t *vswp)
25881ae08745Sheppo {
25891ae08745Sheppo 	D1(vswp, "vsw_mdeg_unregister: enter");
25901ae08745Sheppo 
259134683adeSsg70180 	if (vswp->mdeg_hdl != NULL)
25921ae08745Sheppo 		(void) mdeg_unregister(vswp->mdeg_hdl);
25931ae08745Sheppo 
259434683adeSsg70180 	if (vswp->mdeg_port_hdl != NULL)
259534683adeSsg70180 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
259634683adeSsg70180 
259734683adeSsg70180 	if (vswp->inst_spec != NULL) {
25981ae08745Sheppo 		if (vswp->inst_spec->specp != NULL) {
25991ae08745Sheppo 			(void) kmem_free(vswp->inst_spec->specp,
26001ae08745Sheppo 				sizeof (vsw_prop_template));
26011ae08745Sheppo 			vswp->inst_spec->specp = NULL;
26021ae08745Sheppo 		}
26031ae08745Sheppo 
26041ae08745Sheppo 		(void) kmem_free(vswp->inst_spec,
26051ae08745Sheppo 			sizeof (mdeg_node_spec_t));
26061ae08745Sheppo 		vswp->inst_spec = NULL;
26071ae08745Sheppo 	}
26081ae08745Sheppo 
26091ae08745Sheppo 	D1(vswp, "vsw_mdeg_unregister: exit");
26101ae08745Sheppo }
26111ae08745Sheppo 
261234683adeSsg70180 /*
261334683adeSsg70180  * Mdeg callback invoked for the vsw node itself.
261434683adeSsg70180  */
26151ae08745Sheppo static int
26161ae08745Sheppo vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
26171ae08745Sheppo {
26181ae08745Sheppo 	vsw_t		*vswp;
26191ae08745Sheppo 	int		idx;
26201ae08745Sheppo 	md_t		*mdp;
26211ae08745Sheppo 	mde_cookie_t	node;
26221ae08745Sheppo 	uint64_t	inst;
262334683adeSsg70180 	char		*node_name = NULL;
26241ae08745Sheppo 
26251ae08745Sheppo 	if (resp == NULL)
26261ae08745Sheppo 		return (MDEG_FAILURE);
26271ae08745Sheppo 
26281ae08745Sheppo 	vswp = (vsw_t *)cb_argp;
26291ae08745Sheppo 
263034683adeSsg70180 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
263134683adeSsg70180 		" : prev matched %d", __func__, resp->added.nelem,
263234683adeSsg70180 		resp->removed.nelem, resp->match_curr.nelem,
263334683adeSsg70180 		resp->match_prev.nelem);
263434683adeSsg70180 
263534683adeSsg70180 	/*
263634683adeSsg70180 	 * Expect 'added' to be non-zero if virtual-network-switch
263734683adeSsg70180 	 * nodes exist in the MD when the driver attaches.
263834683adeSsg70180 	 */
263934683adeSsg70180 	for (idx = 0; idx < resp->added.nelem; idx++) {
264034683adeSsg70180 		mdp = resp->added.mdp;
264134683adeSsg70180 		node = resp->added.mdep[idx];
264234683adeSsg70180 
264334683adeSsg70180 		if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
264434683adeSsg70180 			DERR(vswp, "%s: unable to get node name for "
264534683adeSsg70180 				"node(%d) 0x%lx", __func__, idx, node);
264634683adeSsg70180 			continue;
264734683adeSsg70180 		}
264834683adeSsg70180 
264934683adeSsg70180 		if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
265034683adeSsg70180 			DERR(vswp, "%s: prop(cfg-handle) not found port(%d)",
265134683adeSsg70180 				__func__, idx);
265234683adeSsg70180 			continue;
265334683adeSsg70180 		}
265434683adeSsg70180 
265534683adeSsg70180 		D2(vswp, "%s: added node(%d) 0x%lx with name %s "
265634683adeSsg70180 			"and inst %d", __func__, idx, node, node_name, inst);
265734683adeSsg70180 
265834683adeSsg70180 		vsw_get_initial_md_properties(vswp, mdp, node);
265934683adeSsg70180 	}
266034683adeSsg70180 
266134683adeSsg70180 	/*
266234683adeSsg70180 	 * A non-zero 'match' value indicates that the MD has been
266334683adeSsg70180 	 * updated and that a virtual-network-switch node is present
266434683adeSsg70180 	 * which may or may not have been updated. It is up to the clients
266534683adeSsg70180 	 * to examine their own nodes and determine if they have changed.
266634683adeSsg70180 	 */
266734683adeSsg70180 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
266834683adeSsg70180 		mdp = resp->match_curr.mdp;
266934683adeSsg70180 		node = resp->match_curr.mdep[idx];
267034683adeSsg70180 
267134683adeSsg70180 		if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
267234683adeSsg70180 			DERR(vswp, "%s: unable to get node name for "
267334683adeSsg70180 				"node(%d) 0x%lx", __func__, idx, node);
267434683adeSsg70180 			continue;
267534683adeSsg70180 		}
267634683adeSsg70180 
267734683adeSsg70180 		if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
267834683adeSsg70180 			DERR(vswp, "%s: prop(cfg-handle) not found port(%d)",
267934683adeSsg70180 				__func__, idx);
268034683adeSsg70180 			continue;
268134683adeSsg70180 		}
268234683adeSsg70180 
268334683adeSsg70180 		D2(vswp, "%s: changed node(%d) 0x%lx with name %s "
268434683adeSsg70180 			"and inst %d", __func__, idx, node, node_name, inst);
268534683adeSsg70180 
268634683adeSsg70180 		vsw_update_md_prop(vswp, mdp, node);
268734683adeSsg70180 	}
268834683adeSsg70180 
268934683adeSsg70180 	return (MDEG_SUCCESS);
269034683adeSsg70180 }
269134683adeSsg70180 
269234683adeSsg70180 /*
269334683adeSsg70180  * Mdeg callback invoked for changes to the vsw-port nodes
269434683adeSsg70180  * under the vsw node.
269534683adeSsg70180  */
269634683adeSsg70180 static int
269734683adeSsg70180 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
269834683adeSsg70180 {
269934683adeSsg70180 	vsw_t		*vswp;
270034683adeSsg70180 	int		idx;
270134683adeSsg70180 	md_t		*mdp;
270234683adeSsg70180 	mde_cookie_t	node;
270334683adeSsg70180 	uint64_t	inst;
270434683adeSsg70180 
270534683adeSsg70180 	if ((resp == NULL) || (cb_argp == NULL))
270634683adeSsg70180 		return (MDEG_FAILURE);
270734683adeSsg70180 
270834683adeSsg70180 	vswp = (vsw_t *)cb_argp;
270934683adeSsg70180 
271034683adeSsg70180 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
271134683adeSsg70180 		" : prev matched %d", __func__, resp->added.nelem,
271234683adeSsg70180 		resp->removed.nelem, resp->match_curr.nelem,
27131ae08745Sheppo 		resp->match_prev.nelem);
27141ae08745Sheppo 
27151ae08745Sheppo 	/* process added ports */
27161ae08745Sheppo 	for (idx = 0; idx < resp->added.nelem; idx++) {
27171ae08745Sheppo 		mdp = resp->added.mdp;
27181ae08745Sheppo 		node = resp->added.mdep[idx];
27191ae08745Sheppo 
27201ae08745Sheppo 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
27211ae08745Sheppo 
27221ae08745Sheppo 		if (vsw_port_add(vswp, mdp, &node) != 0) {
272334683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
272434683adeSsg70180 				"(0x%lx)", vswp->instance, node);
27251ae08745Sheppo 		}
27261ae08745Sheppo 	}
27271ae08745Sheppo 
27281ae08745Sheppo 	/* process removed ports */
27291ae08745Sheppo 	for (idx = 0; idx < resp->removed.nelem; idx++) {
27301ae08745Sheppo 		mdp = resp->removed.mdp;
27311ae08745Sheppo 		node = resp->removed.mdep[idx];
27321ae08745Sheppo 
27331ae08745Sheppo 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
273434683adeSsg70180 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
27351ae08745Sheppo 				__func__, id_propname, idx);
27361ae08745Sheppo 			continue;
27371ae08745Sheppo 		}
27381ae08745Sheppo 
27391ae08745Sheppo 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
27401ae08745Sheppo 
27411ae08745Sheppo 		if (vsw_port_detach(vswp, inst) != 0) {
274234683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
274334683adeSsg70180 				vswp->instance, inst);
27441ae08745Sheppo 		}
27451ae08745Sheppo 	}
27461ae08745Sheppo 
27471ae08745Sheppo 	/*
27481ae08745Sheppo 	 * Currently no support for updating already active ports.
27491ae08745Sheppo 	 * So, ignore the match_curr and match_priv arrays for now.
27501ae08745Sheppo 	 */
27511ae08745Sheppo 
27521ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
27531ae08745Sheppo 
27541ae08745Sheppo 	return (MDEG_SUCCESS);
27551ae08745Sheppo }
27561ae08745Sheppo 
27571ae08745Sheppo /*
275834683adeSsg70180  * Read the initial start-of-day values from the specified MD node.
275934683adeSsg70180  */
276034683adeSsg70180 static void
276134683adeSsg70180 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
276234683adeSsg70180 {
276334683adeSsg70180 	int		i;
276434683adeSsg70180 	uint64_t 	macaddr = 0;
276534683adeSsg70180 
276634683adeSsg70180 	D1(vswp, "%s: enter", __func__);
276734683adeSsg70180 
276834683adeSsg70180 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) == 0) {
276934683adeSsg70180 		/*
277034683adeSsg70180 		 * Note it is valid for the physname property to
277134683adeSsg70180 		 * be NULL so check actual name length to determine
277234683adeSsg70180 		 * if we have a actual device name.
277334683adeSsg70180 		 */
277434683adeSsg70180 		if (strlen(vswp->physname) > 0)
277534683adeSsg70180 			vswp->mdprops |= VSW_MD_PHYSNAME;
277634683adeSsg70180 	} else {
277734683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
277834683adeSsg70180 			"device from MD", vswp->instance);
277934683adeSsg70180 		return;
278034683adeSsg70180 	}
278134683adeSsg70180 
278234683adeSsg70180 	/* mac address for vswitch device itself */
278334683adeSsg70180 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
278434683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
278534683adeSsg70180 			vswp->instance);
278634683adeSsg70180 
278734683adeSsg70180 		/*
278834683adeSsg70180 		 * Fallback to using the mac address of the physical
278934683adeSsg70180 		 * device.
279034683adeSsg70180 		 */
279134683adeSsg70180 		if (vsw_get_physaddr(vswp) == 0) {
279234683adeSsg70180 			cmn_err(CE_NOTE, "!vsw%d: Using MAC address from "
279334683adeSsg70180 				"physical device (%s)", vswp->instance,
279434683adeSsg70180 				vswp->physname);
279534683adeSsg70180 		} else {
279634683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address"
279734683adeSsg70180 				"from device %s", vswp->instance,
279834683adeSsg70180 				vswp->physname);
279934683adeSsg70180 		}
280034683adeSsg70180 	} else {
280134683adeSsg70180 		WRITE_ENTER(&vswp->if_lockrw);
280234683adeSsg70180 		for (i = ETHERADDRL - 1; i >= 0; i--) {
280334683adeSsg70180 			vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
280434683adeSsg70180 			macaddr >>= 8;
280534683adeSsg70180 		}
280634683adeSsg70180 		RW_EXIT(&vswp->if_lockrw);
280734683adeSsg70180 		vswp->mdprops |= VSW_MD_MACADDR;
280834683adeSsg70180 	}
280934683adeSsg70180 
281034683adeSsg70180 	if (vsw_get_md_smodes(vswp, mdp, node,
281134683adeSsg70180 				vswp->smode, &vswp->smode_num)) {
281234683adeSsg70180 		cmn_err(CE_WARN, "vsw%d: Unable to read %s property from "
281334683adeSsg70180 			"MD, defaulting to programmed mode", vswp->instance,
281434683adeSsg70180 			smode_propname);
281534683adeSsg70180 
281634683adeSsg70180 		for (i = 0; i < NUM_SMODES; i++)
281734683adeSsg70180 			vswp->smode[i] = VSW_LAYER2;
281834683adeSsg70180 
281934683adeSsg70180 		vswp->smode_num = NUM_SMODES;
282034683adeSsg70180 	} else {
282134683adeSsg70180 		ASSERT(vswp->smode_num != 0);
282234683adeSsg70180 		vswp->mdprops |= VSW_MD_SMODE;
282334683adeSsg70180 	}
282434683adeSsg70180 
282534683adeSsg70180 	/*
282634683adeSsg70180 	 * Unable to setup any switching mode, nothing more
282734683adeSsg70180 	 * we can do.
282834683adeSsg70180 	 */
282934683adeSsg70180 	if (vsw_setup_switching(vswp))
283034683adeSsg70180 		return;
283134683adeSsg70180 
283234683adeSsg70180 	WRITE_ENTER(&vswp->if_lockrw);
283334683adeSsg70180 	vswp->if_state &= ~VSW_IF_UP;
283434683adeSsg70180 	RW_EXIT(&vswp->if_lockrw);
283534683adeSsg70180 	if (vswp->mdprops & (VSW_MD_MACADDR | VSW_DEV_MACADDR)) {
283634683adeSsg70180 		if (vsw_mac_register(vswp) != 0) {
283734683adeSsg70180 			/*
283834683adeSsg70180 			 * Treat this as a non-fatal error as we may be
283934683adeSsg70180 			 * able to operate in some other mode.
284034683adeSsg70180 			 */
284134683adeSsg70180 			cmn_err(CE_WARN, "vsw%d: Unable to register as "
284234683adeSsg70180 				"provider with MAC layer", vswp->instance);
284334683adeSsg70180 		}
284434683adeSsg70180 	}
284534683adeSsg70180 
284634683adeSsg70180 	D1(vswp, "%s: exit", __func__);
284734683adeSsg70180 }
284834683adeSsg70180 
284934683adeSsg70180 /*
285034683adeSsg70180  * Check to see if the relevant properties in the specified node have
285134683adeSsg70180  * changed, and if so take the appropriate action.
285234683adeSsg70180  *
285334683adeSsg70180  * If any of the properties are missing or invalid we don't take
285434683adeSsg70180  * any action, as this function should only be invoked when modifications
285534683adeSsg70180  * have been made to what we assume is a working configuration, which
285634683adeSsg70180  * we leave active.
285734683adeSsg70180  *
285834683adeSsg70180  * Note it is legal for this routine to be invoked even if none of the
285934683adeSsg70180  * properties in the port node within the MD have actually changed.
286034683adeSsg70180  */
286134683adeSsg70180 static void
286234683adeSsg70180 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
286334683adeSsg70180 {
286434683adeSsg70180 	char		physname[LIFNAMSIZ];
286534683adeSsg70180 	char		drv[LIFNAMSIZ];
286634683adeSsg70180 	uint_t		ddi_instance;
286734683adeSsg70180 	uint8_t		new_smode[NUM_SMODES];
286834683adeSsg70180 	int		i, smode_num = 0;
286934683adeSsg70180 	uint64_t 	macaddr = 0;
287034683adeSsg70180 	vsw_port_list_t *plist = &vswp->plist;
287134683adeSsg70180 	vsw_port_t	*port = NULL;
287234683adeSsg70180 	enum		{MD_init = 0x1,
287334683adeSsg70180 				MD_physname = 0x2,
287434683adeSsg70180 				MD_macaddr = 0x4,
287534683adeSsg70180 				MD_smode = 0x8} updated;
287634683adeSsg70180 
287734683adeSsg70180 	updated = MD_init;
287834683adeSsg70180 
287934683adeSsg70180 	D1(vswp, "%s: enter", __func__);
288034683adeSsg70180 
288134683adeSsg70180 	/*
288234683adeSsg70180 	 * Check if name of physical device in MD has changed.
288334683adeSsg70180 	 */
288434683adeSsg70180 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
288534683adeSsg70180 		/*
288634683adeSsg70180 		 * Do basic sanity check on new device name/instance,
288734683adeSsg70180 		 * if its non NULL. It is valid for the device name to
288834683adeSsg70180 		 * have changed from a non NULL to a NULL value, i.e.
288934683adeSsg70180 		 * the vsw is being changed to 'routed' mode.
289034683adeSsg70180 		 */
289134683adeSsg70180 		if ((strlen(physname) != 0) &&
289234683adeSsg70180 			(ddi_parse(physname, drv,
289334683adeSsg70180 				&ddi_instance) != DDI_SUCCESS)) {
289434683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: new device name %s is not"
289534683adeSsg70180 				" a valid device name/instance",
289634683adeSsg70180 				vswp->instance, physname);
289734683adeSsg70180 			goto fail_reconf;
289834683adeSsg70180 		}
289934683adeSsg70180 
290034683adeSsg70180 		if (strcmp(physname, vswp->physname)) {
290134683adeSsg70180 			D2(vswp, "%s: device name changed from %s to %s",
290234683adeSsg70180 					__func__, vswp->physname, physname);
290334683adeSsg70180 
290434683adeSsg70180 			updated |= MD_physname;
290534683adeSsg70180 		} else {
290634683adeSsg70180 			D2(vswp, "%s: device name unchanged at %s",
290734683adeSsg70180 					__func__, vswp->physname);
290834683adeSsg70180 		}
290934683adeSsg70180 	} else {
291034683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
291134683adeSsg70180 			"device from updated MD.", vswp->instance);
291234683adeSsg70180 		goto fail_reconf;
291334683adeSsg70180 	}
291434683adeSsg70180 
291534683adeSsg70180 	/*
291634683adeSsg70180 	 * Check if MAC address has changed.
291734683adeSsg70180 	 */
291834683adeSsg70180 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
291934683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
292034683adeSsg70180 			vswp->instance);
292134683adeSsg70180 		goto fail_reconf;
292234683adeSsg70180 	} else {
292334683adeSsg70180 		READ_ENTER(&vswp->if_lockrw);
292434683adeSsg70180 		for (i = ETHERADDRL - 1; i >= 0; i--) {
292534683adeSsg70180 			if (vswp->if_addr.ether_addr_octet[i]
292634683adeSsg70180 							!= (macaddr & 0xFF)) {
292734683adeSsg70180 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
292834683adeSsg70180 					__func__, i,
292934683adeSsg70180 					vswp->if_addr.ether_addr_octet[i],
293034683adeSsg70180 					(macaddr & 0xFF));
293134683adeSsg70180 				updated |= MD_macaddr;
293234683adeSsg70180 				break;
293334683adeSsg70180 			}
293434683adeSsg70180 			macaddr >>= 8;
293534683adeSsg70180 		}
293634683adeSsg70180 		RW_EXIT(&vswp->if_lockrw);
293734683adeSsg70180 	}
293834683adeSsg70180 
293934683adeSsg70180 	/*
294034683adeSsg70180 	 * Check if switching modes have changed.
294134683adeSsg70180 	 */
294234683adeSsg70180 	if (vsw_get_md_smodes(vswp, mdp, node,
294334683adeSsg70180 				new_smode, &smode_num)) {
294434683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
294534683adeSsg70180 					vswp->instance, smode_propname);
294634683adeSsg70180 		goto fail_reconf;
294734683adeSsg70180 	} else {
294834683adeSsg70180 		ASSERT(smode_num != 0);
294934683adeSsg70180 		if (smode_num != vswp->smode_num) {
295034683adeSsg70180 			D2(vswp, "%s: number of modes changed from %d to %d",
295134683adeSsg70180 				__func__, vswp->smode_num, smode_num);
295234683adeSsg70180 		}
295334683adeSsg70180 
295434683adeSsg70180 		for (i = 0; i < smode_num; i++) {
295534683adeSsg70180 			if (new_smode[i] != vswp->smode[i]) {
295634683adeSsg70180 				D2(vswp, "%s: mode changed from %d to %d",
295734683adeSsg70180 					__func__, vswp->smode[i], new_smode[i]);
295834683adeSsg70180 				updated |= MD_smode;
295934683adeSsg70180 				break;
296034683adeSsg70180 			}
296134683adeSsg70180 		}
296234683adeSsg70180 	}
296334683adeSsg70180 
296434683adeSsg70180 	/*
296534683adeSsg70180 	 * Now make any changes which are needed...
296634683adeSsg70180 	 */
296734683adeSsg70180 
296834683adeSsg70180 	if (updated & (MD_physname | MD_smode)) {
296934683adeSsg70180 		/*
297034683adeSsg70180 		 * Disconnect all ports from the current card
297134683adeSsg70180 		 */
297234683adeSsg70180 		WRITE_ENTER(&plist->lockrw);
297334683adeSsg70180 		for (port = plist->head; port != NULL; port = port->p_next) {
297434683adeSsg70180 			/* Remove address if was programmed into HW. */
29755f94e909Ssg70180 			mutex_enter(&vswp->hw_lock);
29765f94e909Ssg70180 			if (vsw_unset_hw(vswp, port, VSW_VNETPORT)) {
29775f94e909Ssg70180 				mutex_exit(&vswp->hw_lock);
297834683adeSsg70180 				RW_EXIT(&plist->lockrw);
297934683adeSsg70180 				goto fail_update;
298034683adeSsg70180 			}
29815f94e909Ssg70180 			mutex_exit(&vswp->hw_lock);
298234683adeSsg70180 		}
298334683adeSsg70180 		RW_EXIT(&plist->lockrw);
298434683adeSsg70180 
298534683adeSsg70180 		/*
298634683adeSsg70180 		 * Stop, detach the old device..
298734683adeSsg70180 		 */
298834683adeSsg70180 		vsw_mac_detach(vswp);
298934683adeSsg70180 
299034683adeSsg70180 		/*
299134683adeSsg70180 		 * Update phys name.
299234683adeSsg70180 		 */
299334683adeSsg70180 		if (updated & MD_physname) {
299434683adeSsg70180 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
299534683adeSsg70180 				vswp->instance, vswp->physname, physname);
299634683adeSsg70180 			(void) strncpy(vswp->physname,
299734683adeSsg70180 					physname, strlen(physname) + 1);
299834683adeSsg70180 
299934683adeSsg70180 			if (strlen(vswp->physname) > 0)
300034683adeSsg70180 				vswp->mdprops |= VSW_MD_PHYSNAME;
300134683adeSsg70180 		}
300234683adeSsg70180 
300334683adeSsg70180 		/*
300434683adeSsg70180 		 * Update array with the new switch mode values.
300534683adeSsg70180 		 */
300634683adeSsg70180 		if (updated & MD_smode) {
300734683adeSsg70180 			for (i = 0; i < smode_num; i++)
300834683adeSsg70180 				vswp->smode[i] = new_smode[i];
300934683adeSsg70180 
301034683adeSsg70180 			vswp->smode_num = smode_num;
301134683adeSsg70180 			vswp->smode_idx = 0;
301234683adeSsg70180 		}
301334683adeSsg70180 
301434683adeSsg70180 		/*
301534683adeSsg70180 		 * ..and attach, start the new device.
301634683adeSsg70180 		 */
301734683adeSsg70180 		if (vsw_setup_switching(vswp))
301834683adeSsg70180 			goto fail_update;
301934683adeSsg70180 
302034683adeSsg70180 		/*
302134683adeSsg70180 		 * Connect ports to new card.
302234683adeSsg70180 		 */
302334683adeSsg70180 		WRITE_ENTER(&plist->lockrw);
302434683adeSsg70180 		for (port = plist->head; port != NULL; port = port->p_next) {
30255f94e909Ssg70180 			mutex_enter(&vswp->hw_lock);
30265f94e909Ssg70180 			if (vsw_set_hw(vswp, port, VSW_VNETPORT)) {
30275f94e909Ssg70180 				mutex_exit(&vswp->hw_lock);
302834683adeSsg70180 				RW_EXIT(&plist->lockrw);
302934683adeSsg70180 				goto fail_update;
303034683adeSsg70180 			}
30315f94e909Ssg70180 			mutex_exit(&vswp->hw_lock);
303234683adeSsg70180 		}
303334683adeSsg70180 		RW_EXIT(&plist->lockrw);
303434683adeSsg70180 	}
303534683adeSsg70180 
303634683adeSsg70180 	if (updated & MD_macaddr) {
303734683adeSsg70180 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
303834683adeSsg70180 				vswp->instance, macaddr);
303934683adeSsg70180 
304034683adeSsg70180 		WRITE_ENTER(&vswp->if_lockrw);
304134683adeSsg70180 		for (i = ETHERADDRL - 1; i >= 0; i--) {
304234683adeSsg70180 			vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
304334683adeSsg70180 			macaddr >>= 8;
304434683adeSsg70180 		}
304534683adeSsg70180 		RW_EXIT(&vswp->if_lockrw);
304634683adeSsg70180 
304734683adeSsg70180 		/*
30485f94e909Ssg70180 		 * Remove old address from HW (if programmed) and set
30495f94e909Ssg70180 		 * new address.
30505f94e909Ssg70180 		 */
30515f94e909Ssg70180 		mutex_enter(&vswp->hw_lock);
30525f94e909Ssg70180 		(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
30535f94e909Ssg70180 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
30545f94e909Ssg70180 		mutex_exit(&vswp->hw_lock);
30555f94e909Ssg70180 
30565f94e909Ssg70180 		/*
305734683adeSsg70180 		 * Notify the MAC layer of the changed address.
305834683adeSsg70180 		 */
305934683adeSsg70180 		mac_unicst_update(vswp->if_mh, (uint8_t *)&vswp->if_addr);
306034683adeSsg70180 	}
306134683adeSsg70180 
306234683adeSsg70180 	return;
306334683adeSsg70180 
306434683adeSsg70180 fail_reconf:
306534683adeSsg70180 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
306634683adeSsg70180 	return;
306734683adeSsg70180 
306834683adeSsg70180 fail_update:
306934683adeSsg70180 	cmn_err(CE_WARN, "!vsw%d: update of configuration failed",
307034683adeSsg70180 			vswp->instance);
307134683adeSsg70180 }
307234683adeSsg70180 
307334683adeSsg70180 /*
30741ae08745Sheppo  * Add a new port to the system.
30751ae08745Sheppo  *
30761ae08745Sheppo  * Returns 0 on success, 1 on failure.
30771ae08745Sheppo  */
30781ae08745Sheppo int
30791ae08745Sheppo vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
30801ae08745Sheppo {
30811ae08745Sheppo 	uint64_t		ldc_id;
30821ae08745Sheppo 	uint8_t			*addrp;
30831ae08745Sheppo 	int			i, addrsz;
30841ae08745Sheppo 	int			num_nodes = 0, nchan = 0;
30851ae08745Sheppo 	int			listsz = 0;
30861ae08745Sheppo 	mde_cookie_t		*listp = NULL;
30871ae08745Sheppo 	struct ether_addr	ea;
30881ae08745Sheppo 	uint64_t		macaddr;
30891ae08745Sheppo 	uint64_t		inst = 0;
30901ae08745Sheppo 	vsw_port_t		*port;
30911ae08745Sheppo 
30921ae08745Sheppo 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
30931ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found", __func__,
30941ae08745Sheppo 			id_propname);
30951ae08745Sheppo 		return (1);
30961ae08745Sheppo 	}
30971ae08745Sheppo 
30981ae08745Sheppo 	/*
30991ae08745Sheppo 	 * Find the channel endpoint node(s) (which should be under this
31001ae08745Sheppo 	 * port node) which contain the channel id(s).
31011ae08745Sheppo 	 */
31021ae08745Sheppo 	if ((num_nodes = md_node_count(mdp)) <= 0) {
31031ae08745Sheppo 		DERR(vswp, "%s: invalid number of nodes found (%d)",
31041ae08745Sheppo 			__func__, num_nodes);
31051ae08745Sheppo 		return (1);
31061ae08745Sheppo 	}
31071ae08745Sheppo 
310834683adeSsg70180 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
310934683adeSsg70180 
31101ae08745Sheppo 	/* allocate enough space for node list */
31111ae08745Sheppo 	listsz = num_nodes * sizeof (mde_cookie_t);
31121ae08745Sheppo 	listp = kmem_zalloc(listsz, KM_SLEEP);
31131ae08745Sheppo 
31141ae08745Sheppo 	nchan = md_scan_dag(mdp, *node,
31151ae08745Sheppo 		md_find_name(mdp, chan_propname),
31161ae08745Sheppo 		md_find_name(mdp, "fwd"), listp);
31171ae08745Sheppo 
31181ae08745Sheppo 	if (nchan <= 0) {
31191ae08745Sheppo 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
31201ae08745Sheppo 		kmem_free(listp, listsz);
31211ae08745Sheppo 		return (1);
31221ae08745Sheppo 	}
31231ae08745Sheppo 
31241ae08745Sheppo 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
31251ae08745Sheppo 
31261ae08745Sheppo 	/* use property from first node found */
31271ae08745Sheppo 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
31281ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
31291ae08745Sheppo 			id_propname);
31301ae08745Sheppo 		kmem_free(listp, listsz);
31311ae08745Sheppo 		return (1);
31321ae08745Sheppo 	}
31331ae08745Sheppo 
31341ae08745Sheppo 	/* don't need list any more */
31351ae08745Sheppo 	kmem_free(listp, listsz);
31361ae08745Sheppo 
31371ae08745Sheppo 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
31381ae08745Sheppo 
31391ae08745Sheppo 	/* read mac-address property */
31401ae08745Sheppo 	if (md_get_prop_data(mdp, *node, remaddr_propname,
31411ae08745Sheppo 					&addrp, &addrsz)) {
31421ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found",
31431ae08745Sheppo 				__func__, remaddr_propname);
31441ae08745Sheppo 		return (1);
31451ae08745Sheppo 	}
31461ae08745Sheppo 
31471ae08745Sheppo 	if (addrsz < ETHERADDRL) {
31481ae08745Sheppo 		DWARN(vswp, "%s: invalid address size", __func__);
31491ae08745Sheppo 		return (1);
31501ae08745Sheppo 	}
31511ae08745Sheppo 
31521ae08745Sheppo 	macaddr = *((uint64_t *)addrp);
31531ae08745Sheppo 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
31541ae08745Sheppo 
31551ae08745Sheppo 	for (i = ETHERADDRL - 1; i >= 0; i--) {
31561ae08745Sheppo 		ea.ether_addr_octet[i] = macaddr & 0xFF;
31571ae08745Sheppo 		macaddr >>= 8;
31581ae08745Sheppo 	}
31591ae08745Sheppo 
31601ae08745Sheppo 	if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) {
31611ae08745Sheppo 		DERR(vswp, "%s: failed to attach port", __func__);
31621ae08745Sheppo 		return (1);
31631ae08745Sheppo 	}
31641ae08745Sheppo 
31651ae08745Sheppo 	port = vsw_lookup_port(vswp, (int)inst);
31661ae08745Sheppo 
31671ae08745Sheppo 	/* just successfuly created the port, so it should exist */
31681ae08745Sheppo 	ASSERT(port != NULL);
31691ae08745Sheppo 
31701ae08745Sheppo 	return (0);
31711ae08745Sheppo }
31721ae08745Sheppo 
31731ae08745Sheppo /*
31741ae08745Sheppo  * Attach the specified port.
31751ae08745Sheppo  *
31761ae08745Sheppo  * Returns 0 on success, 1 on failure.
31771ae08745Sheppo  */
31781ae08745Sheppo static int
31791ae08745Sheppo vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids,
31801ae08745Sheppo struct ether_addr *macaddr)
31811ae08745Sheppo {
31821ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
31831ae08745Sheppo 	vsw_port_t		*port, **prev_port;
31841ae08745Sheppo 	int			i;
31851ae08745Sheppo 
31861ae08745Sheppo 	D1(vswp, "%s: enter : port %d", __func__, p_instance);
31871ae08745Sheppo 
31881ae08745Sheppo 	/* port already exists? */
31891ae08745Sheppo 	READ_ENTER(&plist->lockrw);
31901ae08745Sheppo 	for (port = plist->head; port != NULL; port = port->p_next) {
31911ae08745Sheppo 		if (port->p_instance == p_instance) {
31921ae08745Sheppo 			DWARN(vswp, "%s: port instance %d already attached",
31931ae08745Sheppo 				__func__, p_instance);
31941ae08745Sheppo 			RW_EXIT(&plist->lockrw);
31951ae08745Sheppo 			return (1);
31961ae08745Sheppo 		}
31971ae08745Sheppo 	}
31981ae08745Sheppo 	RW_EXIT(&plist->lockrw);
31991ae08745Sheppo 
32001ae08745Sheppo 	port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
32011ae08745Sheppo 	port->p_vswp = vswp;
32021ae08745Sheppo 	port->p_instance = p_instance;
32031ae08745Sheppo 	port->p_ldclist.num_ldcs = 0;
32041ae08745Sheppo 	port->p_ldclist.head = NULL;
3205e1ebb9ecSlm66018 	port->addr_set = VSW_ADDR_UNSET;
32061ae08745Sheppo 
32071ae08745Sheppo 	rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL);
32081ae08745Sheppo 
32091ae08745Sheppo 	mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL);
32101ae08745Sheppo 	mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL);
32111ae08745Sheppo 
32121ae08745Sheppo 	mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL);
32131ae08745Sheppo 	cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL);
32141ae08745Sheppo 
32151ae08745Sheppo 	mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL);
32161ae08745Sheppo 	cv_init(&port->state_cv, NULL, CV_DRIVER, NULL);
32171ae08745Sheppo 	port->state = VSW_PORT_INIT;
32181ae08745Sheppo 
32191ae08745Sheppo 	if (nids > VSW_PORT_MAX_LDCS) {
32201ae08745Sheppo 		D2(vswp, "%s: using first of %d ldc ids",
32211ae08745Sheppo 			__func__, nids);
32221ae08745Sheppo 		nids = VSW_PORT_MAX_LDCS;
32231ae08745Sheppo 	}
32241ae08745Sheppo 
32251ae08745Sheppo 	D2(vswp, "%s: %d nids", __func__, nids);
32261ae08745Sheppo 	for (i = 0; i < nids; i++) {
32271ae08745Sheppo 		D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]);
32281ae08745Sheppo 		if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) {
32291ae08745Sheppo 			DERR(vswp, "%s: ldc_attach failed", __func__);
32301ae08745Sheppo 
32311ae08745Sheppo 			rw_destroy(&port->p_ldclist.lockrw);
32321ae08745Sheppo 
32331ae08745Sheppo 			cv_destroy(&port->ref_cv);
32341ae08745Sheppo 			mutex_destroy(&port->ref_lock);
32351ae08745Sheppo 
32361ae08745Sheppo 			cv_destroy(&port->state_cv);
32371ae08745Sheppo 			mutex_destroy(&port->state_lock);
32381ae08745Sheppo 
32391ae08745Sheppo 			mutex_destroy(&port->tx_lock);
32401ae08745Sheppo 			mutex_destroy(&port->mca_lock);
32411ae08745Sheppo 			kmem_free(port, sizeof (vsw_port_t));
32421ae08745Sheppo 			return (1);
32431ae08745Sheppo 		}
32441ae08745Sheppo 	}
32451ae08745Sheppo 
32461ae08745Sheppo 	ether_copy(macaddr, &port->p_macaddr);
32471ae08745Sheppo 
32481ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
32491ae08745Sheppo 
32501ae08745Sheppo 	/* create the fdb entry for this port/mac address */
32511ae08745Sheppo 	(void) vsw_add_fdb(vswp, port);
32521ae08745Sheppo 
32535f94e909Ssg70180 	mutex_enter(&vswp->hw_lock);
32545f94e909Ssg70180 	(void) vsw_set_hw(vswp, port, VSW_VNETPORT);
32555f94e909Ssg70180 	mutex_exit(&vswp->hw_lock);
3256e1ebb9ecSlm66018 
32571ae08745Sheppo 	/* link it into the list of ports for this vsw instance */
32581ae08745Sheppo 	prev_port = (vsw_port_t **)(&plist->head);
32591ae08745Sheppo 	port->p_next = *prev_port;
32601ae08745Sheppo 	*prev_port = port;
32611ae08745Sheppo 	plist->num_ports++;
32621ae08745Sheppo 	RW_EXIT(&plist->lockrw);
32631ae08745Sheppo 
32641ae08745Sheppo 	/*
32651ae08745Sheppo 	 * Initialise the port and any ldc's under it.
32661ae08745Sheppo 	 */
32671ae08745Sheppo 	(void) vsw_init_ldcs(port);
32681ae08745Sheppo 
32691ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
32701ae08745Sheppo 	return (0);
32711ae08745Sheppo }
32721ae08745Sheppo 
32731ae08745Sheppo /*
32741ae08745Sheppo  * Detach the specified port.
32751ae08745Sheppo  *
32761ae08745Sheppo  * Returns 0 on success, 1 on failure.
32771ae08745Sheppo  */
32781ae08745Sheppo static int
32791ae08745Sheppo vsw_port_detach(vsw_t *vswp, int p_instance)
32801ae08745Sheppo {
32811ae08745Sheppo 	vsw_port_t	*port = NULL;
32821ae08745Sheppo 	vsw_port_list_t	*plist = &vswp->plist;
32831ae08745Sheppo 
32841ae08745Sheppo 	D1(vswp, "%s: enter: port id %d", __func__, p_instance);
32851ae08745Sheppo 
32861ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
32871ae08745Sheppo 
32881ae08745Sheppo 	if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) {
32891ae08745Sheppo 		RW_EXIT(&plist->lockrw);
32901ae08745Sheppo 		return (1);
32911ae08745Sheppo 	}
32921ae08745Sheppo 
32931ae08745Sheppo 	if (vsw_plist_del_node(vswp, port)) {
32941ae08745Sheppo 		RW_EXIT(&plist->lockrw);
32951ae08745Sheppo 		return (1);
32961ae08745Sheppo 	}
32971ae08745Sheppo 
32981ae08745Sheppo 	/* Remove the fdb entry for this port/mac address */
32991ae08745Sheppo 	(void) vsw_del_fdb(vswp, port);
33001ae08745Sheppo 
33011ae08745Sheppo 	/* Remove any multicast addresses.. */
33021ae08745Sheppo 	vsw_del_mcst_port(port);
33031ae08745Sheppo 
33041ae08745Sheppo 	/*
3305e1ebb9ecSlm66018 	 * No longer need to hold writer lock on port list now
3306e1ebb9ecSlm66018 	 * that we have unlinked the target port from the list.
33071ae08745Sheppo 	 */
33081ae08745Sheppo 	RW_EXIT(&plist->lockrw);
33091ae08745Sheppo 
33105f94e909Ssg70180 	/* Remove address if was programmed into HW. */
33115f94e909Ssg70180 	mutex_enter(&vswp->hw_lock);
33125f94e909Ssg70180 	(void) vsw_unset_hw(vswp, port, VSW_VNETPORT);
3313e1ebb9ecSlm66018 	if (vswp->recfg_reqd)
33145f94e909Ssg70180 		vsw_reconfig_hw(vswp);
33155f94e909Ssg70180 	mutex_exit(&vswp->hw_lock);
3316e1ebb9ecSlm66018 
33171ae08745Sheppo 	if (vsw_port_delete(port)) {
33181ae08745Sheppo 		return (1);
33191ae08745Sheppo 	}
33201ae08745Sheppo 
33211ae08745Sheppo 	D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance);
33221ae08745Sheppo 	return (0);
33231ae08745Sheppo }
33241ae08745Sheppo 
33251ae08745Sheppo /*
33261ae08745Sheppo  * Detach all active ports.
33271ae08745Sheppo  *
33281ae08745Sheppo  * Returns 0 on success, 1 on failure.
33291ae08745Sheppo  */
33301ae08745Sheppo static int
33311ae08745Sheppo vsw_detach_ports(vsw_t *vswp)
33321ae08745Sheppo {
33331ae08745Sheppo 	vsw_port_list_t 	*plist = &vswp->plist;
33341ae08745Sheppo 	vsw_port_t		*port = NULL;
33351ae08745Sheppo 
33361ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
33371ae08745Sheppo 
33381ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
33391ae08745Sheppo 
33401ae08745Sheppo 	while ((port = plist->head) != NULL) {
33411ae08745Sheppo 		if (vsw_plist_del_node(vswp, port)) {
33421ae08745Sheppo 			DERR(vswp, "%s: Error deleting port %d"
33431ae08745Sheppo 				" from port list", __func__,
33441ae08745Sheppo 				port->p_instance);
33451ae08745Sheppo 			RW_EXIT(&plist->lockrw);
33461ae08745Sheppo 			return (1);
33471ae08745Sheppo 		}
33481ae08745Sheppo 
3349e1ebb9ecSlm66018 		/* Remove address if was programmed into HW. */
33505f94e909Ssg70180 		mutex_enter(&vswp->hw_lock);
33515f94e909Ssg70180 		(void) vsw_unset_hw(vswp, port, VSW_VNETPORT);
33525f94e909Ssg70180 		mutex_exit(&vswp->hw_lock);
3353e1ebb9ecSlm66018 
33541ae08745Sheppo 		/* Remove the fdb entry for this port/mac address */
33551ae08745Sheppo 		(void) vsw_del_fdb(vswp, port);
33561ae08745Sheppo 
33571ae08745Sheppo 		/* Remove any multicast addresses.. */
33581ae08745Sheppo 		vsw_del_mcst_port(port);
33591ae08745Sheppo 
33601ae08745Sheppo 		/*
33611ae08745Sheppo 		 * No longer need to hold the lock on the port list
33621ae08745Sheppo 		 * now that we have unlinked the target port from the
33631ae08745Sheppo 		 * list.
33641ae08745Sheppo 		 */
33651ae08745Sheppo 		RW_EXIT(&plist->lockrw);
33661ae08745Sheppo 		if (vsw_port_delete(port)) {
33671ae08745Sheppo 			DERR(vswp, "%s: Error deleting port %d",
33681ae08745Sheppo 				__func__, port->p_instance);
33691ae08745Sheppo 			return (1);
33701ae08745Sheppo 		}
33711ae08745Sheppo 		WRITE_ENTER(&plist->lockrw);
33721ae08745Sheppo 	}
33731ae08745Sheppo 	RW_EXIT(&plist->lockrw);
33741ae08745Sheppo 
33751ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
33761ae08745Sheppo 
33771ae08745Sheppo 	return (0);
33781ae08745Sheppo }
33791ae08745Sheppo 
33801ae08745Sheppo /*
33811ae08745Sheppo  * Delete the specified port.
33821ae08745Sheppo  *
33831ae08745Sheppo  * Returns 0 on success, 1 on failure.
33841ae08745Sheppo  */
33851ae08745Sheppo static int
33861ae08745Sheppo vsw_port_delete(vsw_port_t *port)
33871ae08745Sheppo {
33881ae08745Sheppo 	vsw_ldc_list_t 		*ldcl;
33891ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
33901ae08745Sheppo 
33911ae08745Sheppo 	D1(vswp, "%s: enter : port id %d", __func__, port->p_instance);
33921ae08745Sheppo 
33931ae08745Sheppo 	(void) vsw_uninit_ldcs(port);
33941ae08745Sheppo 
33951ae08745Sheppo 	/*
33961ae08745Sheppo 	 * Wait for any pending ctrl msg tasks which reference this
33971ae08745Sheppo 	 * port to finish.
33981ae08745Sheppo 	 */
33991ae08745Sheppo 	if (vsw_drain_port_taskq(port))
34001ae08745Sheppo 		return (1);
34011ae08745Sheppo 
34021ae08745Sheppo 	/*
34031ae08745Sheppo 	 * Wait for port reference count to hit zero.
34041ae08745Sheppo 	 */
34051ae08745Sheppo 	mutex_enter(&port->ref_lock);
34061ae08745Sheppo 	while (port->ref_cnt != 0)
34071ae08745Sheppo 		cv_wait(&port->ref_cv, &port->ref_lock);
34081ae08745Sheppo 	mutex_exit(&port->ref_lock);
34091ae08745Sheppo 
34101ae08745Sheppo 	/*
34111ae08745Sheppo 	 * Wait for any active callbacks to finish
34121ae08745Sheppo 	 */
34131ae08745Sheppo 	if (vsw_drain_ldcs(port))
34141ae08745Sheppo 		return (1);
34151ae08745Sheppo 
34161ae08745Sheppo 	ldcl = &port->p_ldclist;
34171ae08745Sheppo 	WRITE_ENTER(&ldcl->lockrw);
34181ae08745Sheppo 	while (ldcl->num_ldcs > 0) {
34191ae08745Sheppo 		if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {;
342034683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld",
342134683adeSsg70180 					vswp->instance, ldcl->head->ldc_id);
34221ae08745Sheppo 			RW_EXIT(&ldcl->lockrw);
34231ae08745Sheppo 			return (1);
34241ae08745Sheppo 		}
34251ae08745Sheppo 	}
34261ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
34271ae08745Sheppo 
34281ae08745Sheppo 	rw_destroy(&port->p_ldclist.lockrw);
34291ae08745Sheppo 
34301ae08745Sheppo 	mutex_destroy(&port->mca_lock);
34311ae08745Sheppo 	mutex_destroy(&port->tx_lock);
34321ae08745Sheppo 	cv_destroy(&port->ref_cv);
34331ae08745Sheppo 	mutex_destroy(&port->ref_lock);
34341ae08745Sheppo 
34351ae08745Sheppo 	cv_destroy(&port->state_cv);
34361ae08745Sheppo 	mutex_destroy(&port->state_lock);
34371ae08745Sheppo 
34381ae08745Sheppo 	kmem_free(port, sizeof (vsw_port_t));
34391ae08745Sheppo 
34401ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
34411ae08745Sheppo 
34421ae08745Sheppo 	return (0);
34431ae08745Sheppo }
34441ae08745Sheppo 
34451ae08745Sheppo /*
34461ae08745Sheppo  * Attach a logical domain channel (ldc) under a specified port.
34471ae08745Sheppo  *
34481ae08745Sheppo  * Returns 0 on success, 1 on failure.
34491ae08745Sheppo  */
34501ae08745Sheppo static int
34511ae08745Sheppo vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id)
34521ae08745Sheppo {
34531ae08745Sheppo 	vsw_t 		*vswp = port->p_vswp;
34541ae08745Sheppo 	vsw_ldc_list_t *ldcl = &port->p_ldclist;
34551ae08745Sheppo 	vsw_ldc_t 	*ldcp = NULL;
34561ae08745Sheppo 	ldc_attr_t 	attr;
34571ae08745Sheppo 	ldc_status_t	istatus;
34581ae08745Sheppo 	int 		status = DDI_FAILURE;
3459d10e4ef2Snarayan 	int		rv;
34603af08d82Slm66018 	enum		{ PROG_init = 0x0, PROG_mblks = 0x1,
34613af08d82Slm66018 				PROG_callback = 0x2}
34623af08d82Slm66018 			progress;
34633af08d82Slm66018 
34643af08d82Slm66018 	progress = PROG_init;
34651ae08745Sheppo 
34661ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
34671ae08745Sheppo 
34681ae08745Sheppo 	ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP);
34691ae08745Sheppo 	if (ldcp == NULL) {
34701ae08745Sheppo 		DERR(vswp, "%s: kmem_zalloc failed", __func__);
34711ae08745Sheppo 		return (1);
34721ae08745Sheppo 	}
34731ae08745Sheppo 	ldcp->ldc_id = ldc_id;
34741ae08745Sheppo 
3475d10e4ef2Snarayan 	/* allocate pool of receive mblks */
3476d10e4ef2Snarayan 	rv = vio_create_mblks(vsw_num_mblks, vsw_mblk_size, &(ldcp->rxh));
3477d10e4ef2Snarayan 	if (rv) {
3478d10e4ef2Snarayan 		DWARN(vswp, "%s: unable to create free mblk pool for"
3479d10e4ef2Snarayan 			" channel %ld (rv %d)", __func__, ldc_id, rv);
3480d10e4ef2Snarayan 		kmem_free(ldcp, sizeof (vsw_ldc_t));
3481d10e4ef2Snarayan 		return (1);
3482d10e4ef2Snarayan 	}
3483d10e4ef2Snarayan 
34843af08d82Slm66018 	progress |= PROG_mblks;
34853af08d82Slm66018 
34861ae08745Sheppo 	mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL);
34871ae08745Sheppo 	mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL);
34881ae08745Sheppo 	mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL);
34891ae08745Sheppo 	cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL);
3490445b4c2eSsb155480 	rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL);
3491445b4c2eSsb155480 	rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL);
34921ae08745Sheppo 
34931ae08745Sheppo 	/* required for handshake with peer */
34941ae08745Sheppo 	ldcp->local_session = (uint64_t)ddi_get_lbolt();
34951ae08745Sheppo 	ldcp->peer_session = 0;
34961ae08745Sheppo 	ldcp->session_status = 0;
34971ae08745Sheppo 
34981ae08745Sheppo 	mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL);
34991ae08745Sheppo 	ldcp->hss_id = 1;	/* Initial handshake session id */
35001ae08745Sheppo 
35011ae08745Sheppo 	/* only set for outbound lane, inbound set by peer */
3502d10e4ef2Snarayan 	mutex_init(&ldcp->lane_in.seq_lock, NULL, MUTEX_DRIVER, NULL);
3503d10e4ef2Snarayan 	mutex_init(&ldcp->lane_out.seq_lock, NULL, MUTEX_DRIVER, NULL);
35041ae08745Sheppo 	vsw_set_lane_attr(vswp, &ldcp->lane_out);
35051ae08745Sheppo 
35061ae08745Sheppo 	attr.devclass = LDC_DEV_NT_SVC;
35071ae08745Sheppo 	attr.instance = ddi_get_instance(vswp->dip);
35081ae08745Sheppo 	attr.mode = LDC_MODE_UNRELIABLE;
3509e1ebb9ecSlm66018 	attr.mtu = VSW_LDC_MTU;
35101ae08745Sheppo 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
35111ae08745Sheppo 	if (status != 0) {
35121ae08745Sheppo 		DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)",
35131ae08745Sheppo 		    __func__, ldc_id, status);
3514d10e4ef2Snarayan 		goto ldc_attach_fail;
35151ae08745Sheppo 	}
35161ae08745Sheppo 
35171ae08745Sheppo 	status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp);
35181ae08745Sheppo 	if (status != 0) {
35191ae08745Sheppo 		DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)",
35201ae08745Sheppo 		    __func__, ldc_id, status);
35211ae08745Sheppo 		(void) ldc_fini(ldcp->ldc_handle);
3522d10e4ef2Snarayan 		goto ldc_attach_fail;
35231ae08745Sheppo 	}
35241ae08745Sheppo 
35253af08d82Slm66018 	progress |= PROG_callback;
35263af08d82Slm66018 
35273af08d82Slm66018 	mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL);
35281ae08745Sheppo 
35291ae08745Sheppo 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
35301ae08745Sheppo 		DERR(vswp, "%s: ldc_status failed", __func__);
35313af08d82Slm66018 		mutex_destroy(&ldcp->status_lock);
35323af08d82Slm66018 		goto ldc_attach_fail;
35331ae08745Sheppo 	}
35341ae08745Sheppo 
35351ae08745Sheppo 	ldcp->ldc_status = istatus;
35361ae08745Sheppo 	ldcp->ldc_port = port;
35371ae08745Sheppo 	ldcp->ldc_vswp = vswp;
35381ae08745Sheppo 
35391ae08745Sheppo 	/* link it into the list of channels for this port */
35401ae08745Sheppo 	WRITE_ENTER(&ldcl->lockrw);
35411ae08745Sheppo 	ldcp->ldc_next = ldcl->head;
35421ae08745Sheppo 	ldcl->head = ldcp;
35431ae08745Sheppo 	ldcl->num_ldcs++;
35441ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
35451ae08745Sheppo 
35461ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
35471ae08745Sheppo 	return (0);
3548d10e4ef2Snarayan 
3549d10e4ef2Snarayan ldc_attach_fail:
3550d10e4ef2Snarayan 	mutex_destroy(&ldcp->ldc_txlock);
3551d10e4ef2Snarayan 	mutex_destroy(&ldcp->ldc_cblock);
3552d10e4ef2Snarayan 
3553d10e4ef2Snarayan 	cv_destroy(&ldcp->drain_cv);
3554d10e4ef2Snarayan 
3555445b4c2eSsb155480 	rw_destroy(&ldcp->lane_in.dlistrw);
3556445b4c2eSsb155480 	rw_destroy(&ldcp->lane_out.dlistrw);
3557445b4c2eSsb155480 
35583af08d82Slm66018 	if (progress & PROG_callback) {
35593af08d82Slm66018 		(void) ldc_unreg_callback(ldcp->ldc_handle);
35603af08d82Slm66018 	}
35613af08d82Slm66018 
35623af08d82Slm66018 	if ((progress & PROG_mblks) && (ldcp->rxh != NULL)) {
3563d10e4ef2Snarayan 		if (vio_destroy_mblks(ldcp->rxh) != 0) {
3564d10e4ef2Snarayan 			/*
3565d10e4ef2Snarayan 			 * Something odd has happened, as the destroy
3566d10e4ef2Snarayan 			 * will only fail if some mblks have been allocated
3567d10e4ef2Snarayan 			 * from the pool already (which shouldn't happen)
3568d10e4ef2Snarayan 			 * and have not been returned.
3569d10e4ef2Snarayan 			 *
3570d10e4ef2Snarayan 			 * Add the pool pointer to a list maintained in
3571d10e4ef2Snarayan 			 * the device instance. Another attempt will be made
3572d10e4ef2Snarayan 			 * to free the pool when the device itself detaches.
3573d10e4ef2Snarayan 			 */
357434683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: Creation of ldc channel %ld "
357534683adeSsg70180 				"failed and cannot destroy associated mblk "
357634683adeSsg70180 				"pool", vswp->instance, ldc_id);
3577d10e4ef2Snarayan 			ldcp->rxh->nextp =  vswp->rxh;
3578d10e4ef2Snarayan 			vswp->rxh = ldcp->rxh;
3579d10e4ef2Snarayan 		}
3580d10e4ef2Snarayan 	}
3581d10e4ef2Snarayan 	mutex_destroy(&ldcp->drain_cv_lock);
3582d10e4ef2Snarayan 	mutex_destroy(&ldcp->hss_lock);
3583d10e4ef2Snarayan 
3584d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_in.seq_lock);
3585d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_out.seq_lock);
3586d10e4ef2Snarayan 	kmem_free(ldcp, sizeof (vsw_ldc_t));
3587d10e4ef2Snarayan 
3588d10e4ef2Snarayan 	return (1);
35891ae08745Sheppo }
35901ae08745Sheppo 
35911ae08745Sheppo /*
35921ae08745Sheppo  * Detach a logical domain channel (ldc) belonging to a
35931ae08745Sheppo  * particular port.
35941ae08745Sheppo  *
35951ae08745Sheppo  * Returns 0 on success, 1 on failure.
35961ae08745Sheppo  */
35971ae08745Sheppo static int
35981ae08745Sheppo vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id)
35991ae08745Sheppo {
36001ae08745Sheppo 	vsw_t 		*vswp = port->p_vswp;
36011ae08745Sheppo 	vsw_ldc_t 	*ldcp, *prev_ldcp;
36021ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
36031ae08745Sheppo 	int 		rv;
36041ae08745Sheppo 
36051ae08745Sheppo 	prev_ldcp = ldcl->head;
36061ae08745Sheppo 	for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) {
36071ae08745Sheppo 		if (ldcp->ldc_id == ldc_id) {
36081ae08745Sheppo 			break;
36091ae08745Sheppo 		}
36101ae08745Sheppo 	}
36111ae08745Sheppo 
36121ae08745Sheppo 	/* specified ldc id not found */
36131ae08745Sheppo 	if (ldcp == NULL) {
36141ae08745Sheppo 		DERR(vswp, "%s: ldcp = NULL", __func__);
36151ae08745Sheppo 		return (1);
36161ae08745Sheppo 	}
36171ae08745Sheppo 
36181ae08745Sheppo 	D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id);
36191ae08745Sheppo 
36201ae08745Sheppo 	/*
36211ae08745Sheppo 	 * Before we can close the channel we must release any mapped
36221ae08745Sheppo 	 * resources (e.g. drings).
36231ae08745Sheppo 	 */
36241ae08745Sheppo 	vsw_free_lane_resources(ldcp, INBOUND);
36251ae08745Sheppo 	vsw_free_lane_resources(ldcp, OUTBOUND);
36261ae08745Sheppo 
36271ae08745Sheppo 	/*
36281ae08745Sheppo 	 * If the close fails we are in serious trouble, as won't
36291ae08745Sheppo 	 * be able to delete the parent port.
36301ae08745Sheppo 	 */
36311ae08745Sheppo 	if ((rv = ldc_close(ldcp->ldc_handle)) != 0) {
36321ae08745Sheppo 		DERR(vswp, "%s: error %d closing channel %lld",
36331ae08745Sheppo 			__func__, rv, ldcp->ldc_id);
36341ae08745Sheppo 		return (1);
36351ae08745Sheppo 	}
36361ae08745Sheppo 
36371ae08745Sheppo 	(void) ldc_fini(ldcp->ldc_handle);
36381ae08745Sheppo 
36391ae08745Sheppo 	ldcp->ldc_status = LDC_INIT;
36401ae08745Sheppo 	ldcp->ldc_handle = NULL;
36411ae08745Sheppo 	ldcp->ldc_vswp = NULL;
3642d10e4ef2Snarayan 
3643d10e4ef2Snarayan 	if (ldcp->rxh != NULL) {
3644d10e4ef2Snarayan 		if (vio_destroy_mblks(ldcp->rxh)) {
3645d10e4ef2Snarayan 			/*
3646d10e4ef2Snarayan 			 * Mostly likely some mblks are still in use and
3647d10e4ef2Snarayan 			 * have not been returned to the pool. Add the pool
3648d10e4ef2Snarayan 			 * to the list maintained in the device instance.
3649d10e4ef2Snarayan 			 * Another attempt will be made to destroy the pool
3650d10e4ef2Snarayan 			 * when the device detaches.
3651d10e4ef2Snarayan 			 */
3652d10e4ef2Snarayan 			ldcp->rxh->nextp =  vswp->rxh;
3653d10e4ef2Snarayan 			vswp->rxh = ldcp->rxh;
3654d10e4ef2Snarayan 		}
3655d10e4ef2Snarayan 	}
3656d10e4ef2Snarayan 
36573af08d82Slm66018 	/* unlink it from the list */
36583af08d82Slm66018 	prev_ldcp = ldcp->ldc_next;
36593af08d82Slm66018 	ldcl->num_ldcs--;
36603af08d82Slm66018 
36611ae08745Sheppo 	mutex_destroy(&ldcp->ldc_txlock);
36621ae08745Sheppo 	mutex_destroy(&ldcp->ldc_cblock);
36631ae08745Sheppo 	cv_destroy(&ldcp->drain_cv);
36641ae08745Sheppo 	mutex_destroy(&ldcp->drain_cv_lock);
36651ae08745Sheppo 	mutex_destroy(&ldcp->hss_lock);
3666d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_in.seq_lock);
3667d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_out.seq_lock);
36683af08d82Slm66018 	mutex_destroy(&ldcp->status_lock);
3669445b4c2eSsb155480 	rw_destroy(&ldcp->lane_in.dlistrw);
3670445b4c2eSsb155480 	rw_destroy(&ldcp->lane_out.dlistrw);
36711ae08745Sheppo 
36721ae08745Sheppo 	kmem_free(ldcp, sizeof (vsw_ldc_t));
36731ae08745Sheppo 
36741ae08745Sheppo 	return (0);
36751ae08745Sheppo }
36761ae08745Sheppo 
36771ae08745Sheppo /*
36781ae08745Sheppo  * Open and attempt to bring up the channel. Note that channel
36791ae08745Sheppo  * can only be brought up if peer has also opened channel.
36801ae08745Sheppo  *
36811ae08745Sheppo  * Returns 0 if can open and bring up channel, otherwise
36821ae08745Sheppo  * returns 1.
36831ae08745Sheppo  */
36841ae08745Sheppo static int
36851ae08745Sheppo vsw_ldc_init(vsw_ldc_t *ldcp)
36861ae08745Sheppo {
36871ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
36881ae08745Sheppo 	ldc_status_t	istatus = 0;
36891ae08745Sheppo 	int		rv;
36901ae08745Sheppo 
36911ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
36921ae08745Sheppo 
36931ae08745Sheppo 	LDC_ENTER_LOCK(ldcp);
36941ae08745Sheppo 
36951ae08745Sheppo 	/* don't start at 0 in case clients don't like that */
36961ae08745Sheppo 	ldcp->next_ident = 1;
36971ae08745Sheppo 
36981ae08745Sheppo 	rv = ldc_open(ldcp->ldc_handle);
36991ae08745Sheppo 	if (rv != 0) {
37001ae08745Sheppo 		DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)",
37011ae08745Sheppo 		    __func__, ldcp->ldc_id, rv);
37021ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
37031ae08745Sheppo 		return (1);
37041ae08745Sheppo 	}
37051ae08745Sheppo 
37061ae08745Sheppo 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
37071ae08745Sheppo 		DERR(vswp, "%s: unable to get status", __func__);
37081ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
37091ae08745Sheppo 		return (1);
37101ae08745Sheppo 
37111ae08745Sheppo 	} else if (istatus != LDC_OPEN && istatus != LDC_READY) {
37121ae08745Sheppo 		DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY",
37131ae08745Sheppo 		    __func__, ldcp->ldc_id, istatus);
37141ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
37151ae08745Sheppo 		return (1);
37161ae08745Sheppo 	}
37171ae08745Sheppo 
37183af08d82Slm66018 	mutex_enter(&ldcp->status_lock);
37191ae08745Sheppo 	ldcp->ldc_status = istatus;
37203af08d82Slm66018 	mutex_exit(&ldcp->status_lock);
37213af08d82Slm66018 
37221ae08745Sheppo 	rv = ldc_up(ldcp->ldc_handle);
37231ae08745Sheppo 	if (rv != 0) {
37241ae08745Sheppo 		/*
37251ae08745Sheppo 		 * Not a fatal error for ldc_up() to fail, as peer
37261ae08745Sheppo 		 * end point may simply not be ready yet.
37271ae08745Sheppo 		 */
37281ae08745Sheppo 		D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__,
37291ae08745Sheppo 			ldcp->ldc_id, rv);
37301ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
37311ae08745Sheppo 		return (1);
37321ae08745Sheppo 	}
37331ae08745Sheppo 
37341ae08745Sheppo 	/*
37351ae08745Sheppo 	 * ldc_up() call is non-blocking so need to explicitly
37361ae08745Sheppo 	 * check channel status to see if in fact the channel
37371ae08745Sheppo 	 * is UP.
37381ae08745Sheppo 	 */
37393af08d82Slm66018 	mutex_enter(&ldcp->status_lock);
37403af08d82Slm66018 	if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) {
37411ae08745Sheppo 		DERR(vswp, "%s: unable to get status", __func__);
37423af08d82Slm66018 		mutex_exit(&ldcp->status_lock);
37431ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
37441ae08745Sheppo 		return (1);
37451ae08745Sheppo 
37461ae08745Sheppo 	}
3747b071742bSsg70180 
3748b071742bSsg70180 	if (ldcp->ldc_status == LDC_UP) {
3749b071742bSsg70180 		D2(vswp, "%s: channel %ld now UP (%ld)", __func__,
3750b071742bSsg70180 			ldcp->ldc_id, istatus);
37513af08d82Slm66018 		mutex_exit(&ldcp->status_lock);
37521ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
37531ae08745Sheppo 
3754b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_UP);
3755b071742bSsg70180 		return (0);
37563af08d82Slm66018 	}
37573af08d82Slm66018 
3758b071742bSsg70180 	mutex_exit(&ldcp->status_lock);
3759b071742bSsg70180 	LDC_EXIT_LOCK(ldcp);
3760b071742bSsg70180 
37611ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
37621ae08745Sheppo 	return (0);
37631ae08745Sheppo }
37641ae08745Sheppo 
37651ae08745Sheppo /* disable callbacks on the channel */
37661ae08745Sheppo static int
37671ae08745Sheppo vsw_ldc_uninit(vsw_ldc_t *ldcp)
37681ae08745Sheppo {
37691ae08745Sheppo 	vsw_t	*vswp = ldcp->ldc_vswp;
37701ae08745Sheppo 	int	rv;
37711ae08745Sheppo 
37721ae08745Sheppo 	D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id);
37731ae08745Sheppo 
37741ae08745Sheppo 	LDC_ENTER_LOCK(ldcp);
37751ae08745Sheppo 
37761ae08745Sheppo 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
37771ae08745Sheppo 	if (rv != 0) {
37781ae08745Sheppo 		DERR(vswp, "vsw_ldc_uninit(%lld): error disabling "
37791ae08745Sheppo 			"interrupts (rv = %d)\n", ldcp->ldc_id, rv);
37801ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
37811ae08745Sheppo 		return (1);
37821ae08745Sheppo 	}
37831ae08745Sheppo 
37843af08d82Slm66018 	mutex_enter(&ldcp->status_lock);
37851ae08745Sheppo 	ldcp->ldc_status = LDC_INIT;
37863af08d82Slm66018 	mutex_exit(&ldcp->status_lock);
37871ae08745Sheppo 
37881ae08745Sheppo 	LDC_EXIT_LOCK(ldcp);
37891ae08745Sheppo 
37901ae08745Sheppo 	D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id);
37911ae08745Sheppo 
37921ae08745Sheppo 	return (0);
37931ae08745Sheppo }
37941ae08745Sheppo 
37951ae08745Sheppo static int
37961ae08745Sheppo vsw_init_ldcs(vsw_port_t *port)
37971ae08745Sheppo {
37981ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
37991ae08745Sheppo 	vsw_ldc_t	*ldcp;
38001ae08745Sheppo 
38011ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
38021ae08745Sheppo 	ldcp =  ldcl->head;
38031ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
38041ae08745Sheppo 		(void) vsw_ldc_init(ldcp);
38051ae08745Sheppo 	}
38061ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
38071ae08745Sheppo 
38081ae08745Sheppo 	return (0);
38091ae08745Sheppo }
38101ae08745Sheppo 
38111ae08745Sheppo static int
38121ae08745Sheppo vsw_uninit_ldcs(vsw_port_t *port)
38131ae08745Sheppo {
38141ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
38151ae08745Sheppo 	vsw_ldc_t	*ldcp;
38161ae08745Sheppo 
38171ae08745Sheppo 	D1(NULL, "vsw_uninit_ldcs: enter\n");
38181ae08745Sheppo 
38191ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
38201ae08745Sheppo 	ldcp =  ldcl->head;
38211ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
38221ae08745Sheppo 		(void) vsw_ldc_uninit(ldcp);
38231ae08745Sheppo 	}
38241ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
38251ae08745Sheppo 
38261ae08745Sheppo 	D1(NULL, "vsw_uninit_ldcs: exit\n");
38271ae08745Sheppo 
38281ae08745Sheppo 	return (0);
38291ae08745Sheppo }
38301ae08745Sheppo 
38311ae08745Sheppo /*
38321ae08745Sheppo  * Wait until the callback(s) associated with the ldcs under the specified
38331ae08745Sheppo  * port have completed.
38341ae08745Sheppo  *
38351ae08745Sheppo  * Prior to this function being invoked each channel under this port
38361ae08745Sheppo  * should have been quiesced via ldc_set_cb_mode(DISABLE).
38371ae08745Sheppo  *
38381ae08745Sheppo  * A short explaination of what we are doing below..
38391ae08745Sheppo  *
38401ae08745Sheppo  * The simplest approach would be to have a reference counter in
38411ae08745Sheppo  * the ldc structure which is increment/decremented by the callbacks as
38421ae08745Sheppo  * they use the channel. The drain function could then simply disable any
38431ae08745Sheppo  * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately
38441ae08745Sheppo  * there is a tiny window here - before the callback is able to get the lock
38451ae08745Sheppo  * on the channel it is interrupted and this function gets to execute. It
38461ae08745Sheppo  * sees that the ref count is zero and believes its free to delete the
38471ae08745Sheppo  * associated data structures.
38481ae08745Sheppo  *
38491ae08745Sheppo  * We get around this by taking advantage of the fact that before the ldc
38501ae08745Sheppo  * framework invokes a callback it sets a flag to indicate that there is a
38511ae08745Sheppo  * callback active (or about to become active). If when we attempt to
38521ae08745Sheppo  * unregister a callback when this active flag is set then the unregister
38531ae08745Sheppo  * will fail with EWOULDBLOCK.
38541ae08745Sheppo  *
38551ae08745Sheppo  * If the unregister fails we do a cv_timedwait. We will either be signaled
38561ae08745Sheppo  * by the callback as it is exiting (note we have to wait a short period to
38571ae08745Sheppo  * allow the callback to return fully to the ldc framework and it to clear
38581ae08745Sheppo  * the active flag), or by the timer expiring. In either case we again attempt
38591ae08745Sheppo  * the unregister. We repeat this until we can succesfully unregister the
38601ae08745Sheppo  * callback.
38611ae08745Sheppo  *
38621ae08745Sheppo  * The reason we use a cv_timedwait rather than a simple cv_wait is to catch
38631ae08745Sheppo  * the case where the callback has finished but the ldc framework has not yet
38641ae08745Sheppo  * cleared the active flag. In this case we would never get a cv_signal.
38651ae08745Sheppo  */
38661ae08745Sheppo static int
38671ae08745Sheppo vsw_drain_ldcs(vsw_port_t *port)
38681ae08745Sheppo {
38691ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
38701ae08745Sheppo 	vsw_ldc_t	*ldcp;
38711ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
38721ae08745Sheppo 
38731ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
38741ae08745Sheppo 
38751ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
38761ae08745Sheppo 
38771ae08745Sheppo 	ldcp = ldcl->head;
38781ae08745Sheppo 
38791ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
38801ae08745Sheppo 		/*
38811ae08745Sheppo 		 * If we can unregister the channel callback then we
38821ae08745Sheppo 		 * know that there is no callback either running or
38831ae08745Sheppo 		 * scheduled to run for this channel so move on to next
38841ae08745Sheppo 		 * channel in the list.
38851ae08745Sheppo 		 */
38861ae08745Sheppo 		mutex_enter(&ldcp->drain_cv_lock);
38871ae08745Sheppo 
38881ae08745Sheppo 		/* prompt active callbacks to quit */
38891ae08745Sheppo 		ldcp->drain_state = VSW_LDC_DRAINING;
38901ae08745Sheppo 
38911ae08745Sheppo 		if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) {
38921ae08745Sheppo 			D2(vswp, "%s: unreg callback for chan %ld", __func__,
38931ae08745Sheppo 				ldcp->ldc_id);
38941ae08745Sheppo 			mutex_exit(&ldcp->drain_cv_lock);
38951ae08745Sheppo 			continue;
38961ae08745Sheppo 		} else {
38971ae08745Sheppo 			/*
38981ae08745Sheppo 			 * If we end up here we know that either 1) a callback
38991ae08745Sheppo 			 * is currently executing, 2) is about to start (i.e.
39001ae08745Sheppo 			 * the ldc framework has set the active flag but
39011ae08745Sheppo 			 * has not actually invoked the callback yet, or 3)
39021ae08745Sheppo 			 * has finished and has returned to the ldc framework
39031ae08745Sheppo 			 * but the ldc framework has not yet cleared the
39041ae08745Sheppo 			 * active bit.
39051ae08745Sheppo 			 *
39061ae08745Sheppo 			 * Wait for it to finish.
39071ae08745Sheppo 			 */
39081ae08745Sheppo 			while (ldc_unreg_callback(ldcp->ldc_handle)
39091ae08745Sheppo 								== EWOULDBLOCK)
39101ae08745Sheppo 				(void) cv_timedwait(&ldcp->drain_cv,
39111ae08745Sheppo 					&ldcp->drain_cv_lock, lbolt + hz);
39121ae08745Sheppo 
39131ae08745Sheppo 			mutex_exit(&ldcp->drain_cv_lock);
39141ae08745Sheppo 			D2(vswp, "%s: unreg callback for chan %ld after "
39151ae08745Sheppo 				"timeout", __func__, ldcp->ldc_id);
39161ae08745Sheppo 		}
39171ae08745Sheppo 	}
39181ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
39191ae08745Sheppo 
39201ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
39211ae08745Sheppo 	return (0);
39221ae08745Sheppo }
39231ae08745Sheppo 
39241ae08745Sheppo /*
39251ae08745Sheppo  * Wait until all tasks which reference this port have completed.
39261ae08745Sheppo  *
39271ae08745Sheppo  * Prior to this function being invoked each channel under this port
39281ae08745Sheppo  * should have been quiesced via ldc_set_cb_mode(DISABLE).
39291ae08745Sheppo  */
39301ae08745Sheppo static int
39311ae08745Sheppo vsw_drain_port_taskq(vsw_port_t *port)
39321ae08745Sheppo {
39331ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
39341ae08745Sheppo 
39351ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
39361ae08745Sheppo 
39371ae08745Sheppo 	/*
39381ae08745Sheppo 	 * Mark the port as in the process of being detached, and
39391ae08745Sheppo 	 * dispatch a marker task to the queue so we know when all
39401ae08745Sheppo 	 * relevant tasks have completed.
39411ae08745Sheppo 	 */
39421ae08745Sheppo 	mutex_enter(&port->state_lock);
39431ae08745Sheppo 	port->state = VSW_PORT_DETACHING;
39441ae08745Sheppo 
39451ae08745Sheppo 	if ((vswp->taskq_p == NULL) ||
39461ae08745Sheppo 		(ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task,
39471ae08745Sheppo 			port, DDI_NOSLEEP) != DDI_SUCCESS)) {
39481ae08745Sheppo 		DERR(vswp, "%s: unable to dispatch marker task",
39491ae08745Sheppo 			__func__);
39501ae08745Sheppo 		mutex_exit(&port->state_lock);
39511ae08745Sheppo 		return (1);
39521ae08745Sheppo 	}
39531ae08745Sheppo 
39541ae08745Sheppo 	/*
39551ae08745Sheppo 	 * Wait for the marker task to finish.
39561ae08745Sheppo 	 */
39571ae08745Sheppo 	while (port->state != VSW_PORT_DETACHABLE)
39581ae08745Sheppo 		cv_wait(&port->state_cv, &port->state_lock);
39591ae08745Sheppo 
39601ae08745Sheppo 	mutex_exit(&port->state_lock);
39611ae08745Sheppo 
39621ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
39631ae08745Sheppo 
39641ae08745Sheppo 	return (0);
39651ae08745Sheppo }
39661ae08745Sheppo 
39671ae08745Sheppo static void
39681ae08745Sheppo vsw_marker_task(void *arg)
39691ae08745Sheppo {
39701ae08745Sheppo 	vsw_port_t	*port = arg;
39711ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
39721ae08745Sheppo 
39731ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
39741ae08745Sheppo 
39751ae08745Sheppo 	mutex_enter(&port->state_lock);
39761ae08745Sheppo 
39771ae08745Sheppo 	/*
39781ae08745Sheppo 	 * No further tasks should be dispatched which reference
39791ae08745Sheppo 	 * this port so ok to mark it as safe to detach.
39801ae08745Sheppo 	 */
39811ae08745Sheppo 	port->state = VSW_PORT_DETACHABLE;
39821ae08745Sheppo 
39831ae08745Sheppo 	cv_signal(&port->state_cv);
39841ae08745Sheppo 
39851ae08745Sheppo 	mutex_exit(&port->state_lock);
39861ae08745Sheppo 
39871ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
39881ae08745Sheppo }
39891ae08745Sheppo 
39901ae08745Sheppo static vsw_port_t *
39911ae08745Sheppo vsw_lookup_port(vsw_t *vswp, int p_instance)
39921ae08745Sheppo {
39931ae08745Sheppo 	vsw_port_list_t *plist = &vswp->plist;
39941ae08745Sheppo 	vsw_port_t	*port;
39951ae08745Sheppo 
39961ae08745Sheppo 	for (port = plist->head; port != NULL; port = port->p_next) {
39971ae08745Sheppo 		if (port->p_instance == p_instance) {
39981ae08745Sheppo 			D2(vswp, "vsw_lookup_port: found p_instance\n");
39991ae08745Sheppo 			return (port);
40001ae08745Sheppo 		}
40011ae08745Sheppo 	}
40021ae08745Sheppo 
40031ae08745Sheppo 	return (NULL);
40041ae08745Sheppo }
40051ae08745Sheppo 
40061ae08745Sheppo /*
40071ae08745Sheppo  * Search for and remove the specified port from the port
40081ae08745Sheppo  * list. Returns 0 if able to locate and remove port, otherwise
40091ae08745Sheppo  * returns 1.
40101ae08745Sheppo  */
40111ae08745Sheppo static int
40121ae08745Sheppo vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port)
40131ae08745Sheppo {
40141ae08745Sheppo 	vsw_port_list_t *plist = &vswp->plist;
40151ae08745Sheppo 	vsw_port_t	*curr_p, *prev_p;
40161ae08745Sheppo 
40171ae08745Sheppo 	if (plist->head == NULL)
40181ae08745Sheppo 		return (1);
40191ae08745Sheppo 
40201ae08745Sheppo 	curr_p = prev_p = plist->head;
40211ae08745Sheppo 
40221ae08745Sheppo 	while (curr_p != NULL) {
40231ae08745Sheppo 		if (curr_p == port) {
40241ae08745Sheppo 			if (prev_p == curr_p) {
40251ae08745Sheppo 				plist->head = curr_p->p_next;
40261ae08745Sheppo 			} else {
40271ae08745Sheppo 				prev_p->p_next = curr_p->p_next;
40281ae08745Sheppo 			}
40291ae08745Sheppo 			plist->num_ports--;
40301ae08745Sheppo 			break;
40311ae08745Sheppo 		} else {
40321ae08745Sheppo 			prev_p = curr_p;
40331ae08745Sheppo 			curr_p = curr_p->p_next;
40341ae08745Sheppo 		}
40351ae08745Sheppo 	}
40361ae08745Sheppo 	return (0);
40371ae08745Sheppo }
40381ae08745Sheppo 
40391ae08745Sheppo /*
40401ae08745Sheppo  * Interrupt handler for ldc messages.
40411ae08745Sheppo  */
40421ae08745Sheppo static uint_t
40431ae08745Sheppo vsw_ldc_cb(uint64_t event, caddr_t arg)
40441ae08745Sheppo {
40451ae08745Sheppo 	vsw_ldc_t	*ldcp = (vsw_ldc_t  *)arg;
40461ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
40471ae08745Sheppo 
40481ae08745Sheppo 	D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
40491ae08745Sheppo 
40501ae08745Sheppo 	mutex_enter(&ldcp->ldc_cblock);
40511ae08745Sheppo 
4052b071742bSsg70180 	mutex_enter(&ldcp->status_lock);
40531ae08745Sheppo 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
4054b071742bSsg70180 		mutex_exit(&ldcp->status_lock);
40551ae08745Sheppo 		mutex_exit(&ldcp->ldc_cblock);
40561ae08745Sheppo 		return (LDC_SUCCESS);
40571ae08745Sheppo 	}
40583af08d82Slm66018 	mutex_exit(&ldcp->status_lock);
40593af08d82Slm66018 
40601ae08745Sheppo 	if (event & LDC_EVT_UP) {
40611ae08745Sheppo 		/*
4062b071742bSsg70180 		 * Channel has come up.
40631ae08745Sheppo 		 */
40641ae08745Sheppo 		D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)",
4065b071742bSsg70180 			__func__, ldcp->ldc_id, event, ldcp->ldc_status);
4066b071742bSsg70180 
4067b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_UP);
40681ae08745Sheppo 
40691ae08745Sheppo 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
40701ae08745Sheppo 	}
40711ae08745Sheppo 
40721ae08745Sheppo 	if (event & LDC_EVT_READ) {
40731ae08745Sheppo 		/*
40741ae08745Sheppo 		 * Data available for reading.
40751ae08745Sheppo 		 */
40761ae08745Sheppo 		D2(vswp, "%s: id(ld) event(%llx) data READ",
40771ae08745Sheppo 				__func__, ldcp->ldc_id, event);
40781ae08745Sheppo 
40791ae08745Sheppo 		vsw_process_pkt(ldcp);
40801ae08745Sheppo 
40811ae08745Sheppo 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
40821ae08745Sheppo 
40831ae08745Sheppo 		goto vsw_cb_exit;
40841ae08745Sheppo 	}
40851ae08745Sheppo 
40863af08d82Slm66018 	if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) {
4087b071742bSsg70180 		D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)",
4088b071742bSsg70180 			__func__, ldcp->ldc_id, event, ldcp->ldc_status);
40893af08d82Slm66018 
4090b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
40911ae08745Sheppo 	}
40921ae08745Sheppo 
40931ae08745Sheppo 	/*
40941ae08745Sheppo 	 * Catch either LDC_EVT_WRITE which we don't support or any
40951ae08745Sheppo 	 * unknown event.
40961ae08745Sheppo 	 */
40971ae08745Sheppo 	if (event & ~(LDC_EVT_UP | LDC_EVT_RESET
40981ae08745Sheppo 					| LDC_EVT_DOWN | LDC_EVT_READ)) {
40991ae08745Sheppo 
41001ae08745Sheppo 		DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)",
41011ae08745Sheppo 			__func__, ldcp->ldc_id, event, ldcp->ldc_status);
41021ae08745Sheppo 	}
41031ae08745Sheppo 
41041ae08745Sheppo vsw_cb_exit:
41051ae08745Sheppo 	mutex_exit(&ldcp->ldc_cblock);
41061ae08745Sheppo 
41071ae08745Sheppo 	/*
41081ae08745Sheppo 	 * Let the drain function know we are finishing if it
41091ae08745Sheppo 	 * is waiting.
41101ae08745Sheppo 	 */
41111ae08745Sheppo 	mutex_enter(&ldcp->drain_cv_lock);
41121ae08745Sheppo 	if (ldcp->drain_state == VSW_LDC_DRAINING)
41131ae08745Sheppo 		cv_signal(&ldcp->drain_cv);
41141ae08745Sheppo 	mutex_exit(&ldcp->drain_cv_lock);
41151ae08745Sheppo 
41161ae08745Sheppo 	return (LDC_SUCCESS);
41171ae08745Sheppo }
41181ae08745Sheppo 
41191ae08745Sheppo /*
4120b071742bSsg70180  * Reinitialise data structures associated with the channel.
41211ae08745Sheppo  */
41221ae08745Sheppo static void
4123b071742bSsg70180 vsw_ldc_reinit(vsw_ldc_t *ldcp)
41241ae08745Sheppo {
41251ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
41261ae08745Sheppo 	vsw_port_t	*port;
41271ae08745Sheppo 	vsw_ldc_list_t	*ldcl;
41281ae08745Sheppo 
41293af08d82Slm66018 	D1(vswp, "%s: enter", __func__);
41301ae08745Sheppo 
41311ae08745Sheppo 	port = ldcp->ldc_port;
41321ae08745Sheppo 	ldcl = &port->p_ldclist;
41331ae08745Sheppo 
41343af08d82Slm66018 	READ_ENTER(&ldcl->lockrw);
41351ae08745Sheppo 
41361ae08745Sheppo 	D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__,
41371ae08745Sheppo 		ldcp->lane_in.lstate, ldcp->lane_out.lstate);
41381ae08745Sheppo 
41391ae08745Sheppo 	vsw_free_lane_resources(ldcp, INBOUND);
41401ae08745Sheppo 	vsw_free_lane_resources(ldcp, OUTBOUND);
41411ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
41421ae08745Sheppo 
41431ae08745Sheppo 	ldcp->lane_in.lstate = 0;
41441ae08745Sheppo 	ldcp->lane_out.lstate = 0;
41451ae08745Sheppo 
41461ae08745Sheppo 	/*
41471ae08745Sheppo 	 * Remove parent port from any multicast groups
41481ae08745Sheppo 	 * it may have registered with. Client must resend
41491ae08745Sheppo 	 * multicast add command after handshake completes.
41501ae08745Sheppo 	 */
41511ae08745Sheppo 	(void) vsw_del_fdb(vswp, port);
41521ae08745Sheppo 
41531ae08745Sheppo 	vsw_del_mcst_port(port);
41541ae08745Sheppo 
41551ae08745Sheppo 	ldcp->peer_session = 0;
41561ae08745Sheppo 	ldcp->session_status = 0;
41573af08d82Slm66018 	ldcp->hcnt = 0;
41583af08d82Slm66018 	ldcp->hphase = VSW_MILESTONE0;
41593af08d82Slm66018 
41603af08d82Slm66018 	D1(vswp, "%s: exit", __func__);
41613af08d82Slm66018 }
41623af08d82Slm66018 
41633af08d82Slm66018 /*
4164b071742bSsg70180  * Process a connection event.
4165b071742bSsg70180  *
4166b071742bSsg70180  * Note - care must be taken to ensure that this function is
4167b071742bSsg70180  * not called with the dlistrw lock held.
41683af08d82Slm66018  */
41693af08d82Slm66018 static void
4170b071742bSsg70180 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt)
41713af08d82Slm66018 {
41723af08d82Slm66018 	vsw_t		*vswp = ldcp->ldc_vswp;
4173b071742bSsg70180 	vsw_conn_evt_t	*conn = NULL;
41743af08d82Slm66018 
4175b071742bSsg70180 	D1(vswp, "%s: enter", __func__);
41761ae08745Sheppo 
41771ae08745Sheppo 	/*
4178b071742bSsg70180 	 * Check if either a reset or restart event is pending
4179b071742bSsg70180 	 * or in progress. If so just return.
4180b071742bSsg70180 	 *
4181b071742bSsg70180 	 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT
4182b071742bSsg70180 	 * being received by the callback handler, or a ECONNRESET error
4183b071742bSsg70180 	 * code being returned from a ldc_read() or ldc_write() call.
4184b071742bSsg70180 	 *
4185b071742bSsg70180 	 * A VSW_CONN_RESTART event occurs when some error checking code
4186b071742bSsg70180 	 * decides that there is a problem with data from the channel,
4187b071742bSsg70180 	 * and that the handshake should be restarted.
4188b071742bSsg70180 	 */
4189b071742bSsg70180 	if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) &&
4190b071742bSsg70180 			(ldstub((uint8_t *)&ldcp->reset_active)))
4191b071742bSsg70180 		return;
4192b071742bSsg70180 
4193b071742bSsg70180 	/*
4194b071742bSsg70180 	 * If it is an LDC_UP event we first check the recorded
4195b071742bSsg70180 	 * state of the channel. If this is UP then we know that
4196b071742bSsg70180 	 * the channel moving to the UP state has already been dealt
4197b071742bSsg70180 	 * with and don't need to dispatch a  new task.
4198b071742bSsg70180 	 *
4199b071742bSsg70180 	 * The reason for this check is that when we do a ldc_up(),
4200b071742bSsg70180 	 * depending on the state of the peer, we may or may not get
4201b071742bSsg70180 	 * a LDC_UP event. As we can't depend on getting a LDC_UP evt
4202b071742bSsg70180 	 * every time we do ldc_up() we explicitly check the channel
4203b071742bSsg70180 	 * status to see has it come up (ldc_up() is asynch and will
4204b071742bSsg70180 	 * complete at some undefined time), and take the appropriate
4205b071742bSsg70180 	 * action.
4206b071742bSsg70180 	 *
4207b071742bSsg70180 	 * The flip side of this is that we may get a LDC_UP event
4208b071742bSsg70180 	 * when we have already seen that the channel is up and have
4209b071742bSsg70180 	 * dealt with that.
4210b071742bSsg70180 	 */
4211b071742bSsg70180 	mutex_enter(&ldcp->status_lock);
4212b071742bSsg70180 	if (evt == VSW_CONN_UP) {
4213b071742bSsg70180 		if ((ldcp->ldc_status == LDC_UP) ||
4214b071742bSsg70180 					(ldcp->reset_active != 0)) {
4215b071742bSsg70180 			mutex_exit(&ldcp->status_lock);
4216b071742bSsg70180 			return;
4217b071742bSsg70180 		}
4218b071742bSsg70180 	}
4219b071742bSsg70180 	mutex_exit(&ldcp->status_lock);
4220b071742bSsg70180 
4221b071742bSsg70180 	/*
4222b071742bSsg70180 	 * The transaction group id allows us to identify and discard
4223b071742bSsg70180 	 * any tasks which are still pending on the taskq and refer
4224b071742bSsg70180 	 * to the handshake session we are about to restart or reset.
4225b071742bSsg70180 	 * These stale messages no longer have any real meaning.
42261ae08745Sheppo 	 */
42271ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
42281ae08745Sheppo 	ldcp->hss_id++;
42291ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
42301ae08745Sheppo 
4231b071742bSsg70180 	ASSERT(vswp->taskq_p != NULL);
4232b071742bSsg70180 
4233b071742bSsg70180 	if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) {
4234b071742bSsg70180 		cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for"
4235b071742bSsg70180 			" connection event", vswp->instance);
4236b071742bSsg70180 		goto err_exit;
4237b071742bSsg70180 	}
4238b071742bSsg70180 
4239b071742bSsg70180 	conn->evt = evt;
4240b071742bSsg70180 	conn->ldcp = ldcp;
4241b071742bSsg70180 
4242b071742bSsg70180 	if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn,
4243b071742bSsg70180 		DDI_NOSLEEP) != DDI_SUCCESS) {
4244b071742bSsg70180 		cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task",
4245b071742bSsg70180 			vswp->instance);
4246b071742bSsg70180 
4247b071742bSsg70180 		kmem_free(conn, sizeof (vsw_conn_evt_t));
4248b071742bSsg70180 		goto err_exit;
4249b071742bSsg70180 	}
4250b071742bSsg70180 
4251b071742bSsg70180 	D1(vswp, "%s: exit", __func__);
4252b071742bSsg70180 	return;
4253b071742bSsg70180 
4254b071742bSsg70180 err_exit:
4255b071742bSsg70180 	/*
4256b071742bSsg70180 	 * Have mostly likely failed due to memory shortage. Clear the flag so
4257b071742bSsg70180 	 * that future requests will at least be attempted and will hopefully
4258b071742bSsg70180 	 * succeed.
4259b071742bSsg70180 	 */
4260b071742bSsg70180 	if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART))
4261b071742bSsg70180 		ldcp->reset_active = 0;
4262b071742bSsg70180 }
4263b071742bSsg70180 
4264b071742bSsg70180 /*
4265b071742bSsg70180  * Deal with events relating to a connection. Invoked from a taskq.
4266b071742bSsg70180  */
4267b071742bSsg70180 static void
4268b071742bSsg70180 vsw_conn_task(void *arg)
4269b071742bSsg70180 {
4270b071742bSsg70180 	vsw_conn_evt_t	*conn = (vsw_conn_evt_t *)arg;
4271b071742bSsg70180 	vsw_ldc_t	*ldcp = NULL;
4272b071742bSsg70180 	vsw_t		*vswp = NULL;
4273b071742bSsg70180 	uint16_t	evt;
4274b071742bSsg70180 	ldc_status_t	curr_status;
4275b071742bSsg70180 
4276b071742bSsg70180 	ldcp = conn->ldcp;
4277b071742bSsg70180 	evt = conn->evt;
4278b071742bSsg70180 	vswp = ldcp->ldc_vswp;
4279b071742bSsg70180 
4280b071742bSsg70180 	D1(vswp, "%s: enter", __func__);
4281b071742bSsg70180 
4282b071742bSsg70180 	/* can safely free now have copied out data */
4283b071742bSsg70180 	kmem_free(conn, sizeof (vsw_conn_evt_t));
4284b071742bSsg70180 
4285b071742bSsg70180 	mutex_enter(&ldcp->status_lock);
4286b071742bSsg70180 	if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) {
4287b071742bSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read status of "
4288b071742bSsg70180 			"channel %ld", vswp->instance, ldcp->ldc_id);
4289b071742bSsg70180 		mutex_exit(&ldcp->status_lock);
4290b071742bSsg70180 		return;
4291b071742bSsg70180 	}
4292b071742bSsg70180 
4293b071742bSsg70180 	/*
4294b071742bSsg70180 	 * If we wish to restart the handshake on this channel, then if
4295b071742bSsg70180 	 * the channel is UP we bring it DOWN to flush the underlying
4296b071742bSsg70180 	 * ldc queue.
4297b071742bSsg70180 	 */
4298b071742bSsg70180 	if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP))
4299b071742bSsg70180 		(void) ldc_down(ldcp->ldc_handle);
4300b071742bSsg70180 
4301b071742bSsg70180 	/*
4302b071742bSsg70180 	 * re-init all the associated data structures.
4303b071742bSsg70180 	 */
4304b071742bSsg70180 	vsw_ldc_reinit(ldcp);
4305b071742bSsg70180 
4306b071742bSsg70180 	/*
4307b071742bSsg70180 	 * Bring the channel back up (note it does no harm to
4308b071742bSsg70180 	 * do this even if the channel is already UP, Just
4309b071742bSsg70180 	 * becomes effectively a no-op).
4310b071742bSsg70180 	 */
4311b071742bSsg70180 	(void) ldc_up(ldcp->ldc_handle);
4312b071742bSsg70180 
4313b071742bSsg70180 	/*
4314b071742bSsg70180 	 * Check if channel is now UP. This will only happen if
4315b071742bSsg70180 	 * peer has also done a ldc_up().
4316b071742bSsg70180 	 */
4317b071742bSsg70180 	if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) {
4318b071742bSsg70180 		cmn_err(CE_WARN, "!vsw%d: Unable to read status of "
4319b071742bSsg70180 			"channel %ld", vswp->instance, ldcp->ldc_id);
4320b071742bSsg70180 		mutex_exit(&ldcp->status_lock);
4321b071742bSsg70180 		return;
4322b071742bSsg70180 	}
4323b071742bSsg70180 
4324b071742bSsg70180 	ldcp->ldc_status = curr_status;
4325b071742bSsg70180 
4326b071742bSsg70180 	/* channel UP so restart handshake by sending version info */
4327b071742bSsg70180 	if (curr_status == LDC_UP) {
43281ae08745Sheppo 		if (ldcp->hcnt++ > vsw_num_handshakes) {
432934683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted"
43301ae08745Sheppo 				" handshake attempts (%d) on channel %ld",
433134683adeSsg70180 				vswp->instance, ldcp->hcnt, ldcp->ldc_id);
43323af08d82Slm66018 			mutex_exit(&ldcp->status_lock);
43333af08d82Slm66018 			return;
43343af08d82Slm66018 		}
4335b071742bSsg70180 
4336b071742bSsg70180 		if (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp,
4337b071742bSsg70180 			DDI_NOSLEEP) != DDI_SUCCESS) {
4338b071742bSsg70180 			cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task",
4339b071742bSsg70180 				vswp->instance);
43403af08d82Slm66018 
43413af08d82Slm66018 			/*
4342b071742bSsg70180 			 * Don't count as valid restart attempt if couldn't
4343b071742bSsg70180 			 * send version msg.
43443af08d82Slm66018 			 */
4345b071742bSsg70180 			if (ldcp->hcnt > 0)
4346b071742bSsg70180 				ldcp->hcnt--;
4347b071742bSsg70180 		}
43483af08d82Slm66018 	}
43493af08d82Slm66018 
43503af08d82Slm66018 	/*
4351b071742bSsg70180 	 * Mark that the process is complete by clearing the flag.
4352b071742bSsg70180 	 *
4353b071742bSsg70180 	 * Note is it possible that the taskq dispatch above may have failed,
4354b071742bSsg70180 	 * most likely due to memory shortage. We still clear the flag so
4355b071742bSsg70180 	 * future attempts will at least be attempted and will hopefully
4356b071742bSsg70180 	 * succeed.
43573af08d82Slm66018 	 */
4358b071742bSsg70180 	if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART))
4359445b4c2eSsb155480 		ldcp->reset_active = 0;
4360b071742bSsg70180 
4361b071742bSsg70180 	mutex_exit(&ldcp->status_lock);
43623af08d82Slm66018 
43633af08d82Slm66018 	D1(vswp, "%s: exit", __func__);
43643af08d82Slm66018 }
43653af08d82Slm66018 
43663af08d82Slm66018 /*
43671ae08745Sheppo  * returns 0 if legal for event signified by flag to have
43681ae08745Sheppo  * occured at the time it did. Otherwise returns 1.
43691ae08745Sheppo  */
43701ae08745Sheppo int
43711ae08745Sheppo vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag)
43721ae08745Sheppo {
43731ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
43741ae08745Sheppo 	uint64_t	state;
43751ae08745Sheppo 	uint64_t	phase;
43761ae08745Sheppo 
43771ae08745Sheppo 	if (dir == INBOUND)
43781ae08745Sheppo 		state = ldcp->lane_in.lstate;
43791ae08745Sheppo 	else
43801ae08745Sheppo 		state = ldcp->lane_out.lstate;
43811ae08745Sheppo 
43821ae08745Sheppo 	phase = ldcp->hphase;
43831ae08745Sheppo 
43841ae08745Sheppo 	switch (flag) {
43851ae08745Sheppo 	case VSW_VER_INFO_RECV:
43861ae08745Sheppo 		if (phase > VSW_MILESTONE0) {
43871ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV"
43881ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
4389b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
43901ae08745Sheppo 			return (1);
43911ae08745Sheppo 		}
43921ae08745Sheppo 		break;
43931ae08745Sheppo 
43941ae08745Sheppo 	case VSW_VER_ACK_RECV:
43951ae08745Sheppo 	case VSW_VER_NACK_RECV:
43961ae08745Sheppo 		if (!(state & VSW_VER_INFO_SENT)) {
43971ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK"
43981ae08745Sheppo 				" or VER_NACK when in state %d\n",
43991ae08745Sheppo 				ldcp->ldc_id, phase);
4400b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
44011ae08745Sheppo 			return (1);
44021ae08745Sheppo 		} else
44031ae08745Sheppo 			state &= ~VSW_VER_INFO_SENT;
44041ae08745Sheppo 		break;
44051ae08745Sheppo 
44061ae08745Sheppo 	case VSW_ATTR_INFO_RECV:
44071ae08745Sheppo 		if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) {
44081ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV"
44091ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
4410b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
44111ae08745Sheppo 			return (1);
44121ae08745Sheppo 		}
44131ae08745Sheppo 		break;
44141ae08745Sheppo 
44151ae08745Sheppo 	case VSW_ATTR_ACK_RECV:
44161ae08745Sheppo 	case VSW_ATTR_NACK_RECV:
44171ae08745Sheppo 		if (!(state & VSW_ATTR_INFO_SENT)) {
44181ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK"
44191ae08745Sheppo 				" or ATTR_NACK when in state %d\n",
44201ae08745Sheppo 				ldcp->ldc_id, phase);
4421b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
44221ae08745Sheppo 			return (1);
44231ae08745Sheppo 		} else
44241ae08745Sheppo 			state &= ~VSW_ATTR_INFO_SENT;
44251ae08745Sheppo 		break;
44261ae08745Sheppo 
44271ae08745Sheppo 	case VSW_DRING_INFO_RECV:
44281ae08745Sheppo 		if (phase < VSW_MILESTONE1) {
44291ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV"
44301ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
4431b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
44321ae08745Sheppo 			return (1);
44331ae08745Sheppo 		}
44341ae08745Sheppo 		break;
44351ae08745Sheppo 
44361ae08745Sheppo 	case VSW_DRING_ACK_RECV:
44371ae08745Sheppo 	case VSW_DRING_NACK_RECV:
44381ae08745Sheppo 		if (!(state & VSW_DRING_INFO_SENT)) {
44391ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK"
44401ae08745Sheppo 				" or DRING_NACK when in state %d\n",
44411ae08745Sheppo 				ldcp->ldc_id, phase);
4442b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
44431ae08745Sheppo 			return (1);
44441ae08745Sheppo 		} else
44451ae08745Sheppo 			state &= ~VSW_DRING_INFO_SENT;
44461ae08745Sheppo 		break;
44471ae08745Sheppo 
44481ae08745Sheppo 	case VSW_RDX_INFO_RECV:
44491ae08745Sheppo 		if (phase < VSW_MILESTONE3) {
44501ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV"
44511ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
4452b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
44531ae08745Sheppo 			return (1);
44541ae08745Sheppo 		}
44551ae08745Sheppo 		break;
44561ae08745Sheppo 
44571ae08745Sheppo 	case VSW_RDX_ACK_RECV:
44581ae08745Sheppo 	case VSW_RDX_NACK_RECV:
44591ae08745Sheppo 		if (!(state & VSW_RDX_INFO_SENT)) {
44601ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK"
44611ae08745Sheppo 				" or RDX_NACK when in state %d\n",
44621ae08745Sheppo 				ldcp->ldc_id, phase);
4463b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
44641ae08745Sheppo 			return (1);
44651ae08745Sheppo 		} else
44661ae08745Sheppo 			state &= ~VSW_RDX_INFO_SENT;
44671ae08745Sheppo 		break;
44681ae08745Sheppo 
44691ae08745Sheppo 	case VSW_MCST_INFO_RECV:
44701ae08745Sheppo 		if (phase < VSW_MILESTONE3) {
44711ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV"
44721ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
4473b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
44741ae08745Sheppo 			return (1);
44751ae08745Sheppo 		}
44761ae08745Sheppo 		break;
44771ae08745Sheppo 
44781ae08745Sheppo 	default:
44791ae08745Sheppo 		DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)",
44801ae08745Sheppo 				ldcp->ldc_id, flag);
44811ae08745Sheppo 		return (1);
44821ae08745Sheppo 	}
44831ae08745Sheppo 
44841ae08745Sheppo 	if (dir == INBOUND)
44851ae08745Sheppo 		ldcp->lane_in.lstate = state;
44861ae08745Sheppo 	else
44871ae08745Sheppo 		ldcp->lane_out.lstate = state;
44881ae08745Sheppo 
44891ae08745Sheppo 	D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id);
44901ae08745Sheppo 
44911ae08745Sheppo 	return (0);
44921ae08745Sheppo }
44931ae08745Sheppo 
44941ae08745Sheppo void
44951ae08745Sheppo vsw_next_milestone(vsw_ldc_t *ldcp)
44961ae08745Sheppo {
44971ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
44981ae08745Sheppo 
44991ae08745Sheppo 	D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__,
45001ae08745Sheppo 		ldcp->ldc_id, ldcp->hphase);
45011ae08745Sheppo 
45021ae08745Sheppo 	DUMP_FLAGS(ldcp->lane_in.lstate);
45031ae08745Sheppo 	DUMP_FLAGS(ldcp->lane_out.lstate);
45041ae08745Sheppo 
45051ae08745Sheppo 	switch (ldcp->hphase) {
45061ae08745Sheppo 
45071ae08745Sheppo 	case VSW_MILESTONE0:
45081ae08745Sheppo 		/*
45091ae08745Sheppo 		 * If we haven't started to handshake with our peer,
45101ae08745Sheppo 		 * start to do so now.
45111ae08745Sheppo 		 */
45121ae08745Sheppo 		if (ldcp->lane_out.lstate == 0) {
45131ae08745Sheppo 			D2(vswp, "%s: (chan %lld) starting handshake "
45141ae08745Sheppo 				"with peer", __func__, ldcp->ldc_id);
4515b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_UP);
45161ae08745Sheppo 		}
45171ae08745Sheppo 
45181ae08745Sheppo 		/*
45191ae08745Sheppo 		 * Only way to pass this milestone is to have successfully
45201ae08745Sheppo 		 * negotiated version info.
45211ae08745Sheppo 		 */
45221ae08745Sheppo 		if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) &&
45231ae08745Sheppo 			(ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) {
45241ae08745Sheppo 
45251ae08745Sheppo 			D2(vswp, "%s: (chan %lld) leaving milestone 0",
45261ae08745Sheppo 				__func__, ldcp->ldc_id);
45271ae08745Sheppo 
45281ae08745Sheppo 			/*
45291ae08745Sheppo 			 * Next milestone is passed when attribute
45301ae08745Sheppo 			 * information has been successfully exchanged.
45311ae08745Sheppo 			 */
45321ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE1;
45331ae08745Sheppo 			vsw_send_attr(ldcp);
45341ae08745Sheppo 
45351ae08745Sheppo 		}
45361ae08745Sheppo 		break;
45371ae08745Sheppo 
45381ae08745Sheppo 	case VSW_MILESTONE1:
45391ae08745Sheppo 		/*
45401ae08745Sheppo 		 * Only way to pass this milestone is to have successfully
45411ae08745Sheppo 		 * negotiated attribute information.
45421ae08745Sheppo 		 */
45431ae08745Sheppo 		if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) {
45441ae08745Sheppo 
45451ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE2;
45461ae08745Sheppo 
45471ae08745Sheppo 			/*
45481ae08745Sheppo 			 * If the peer device has said it wishes to
45491ae08745Sheppo 			 * use descriptor rings then we send it our ring
45501ae08745Sheppo 			 * info, otherwise we just set up a private ring
45511ae08745Sheppo 			 * which we use an internal buffer
45521ae08745Sheppo 			 */
45531ae08745Sheppo 			if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE)
45541ae08745Sheppo 				vsw_send_dring_info(ldcp);
45551ae08745Sheppo 		}
45561ae08745Sheppo 		break;
45571ae08745Sheppo 
45581ae08745Sheppo 	case VSW_MILESTONE2:
45591ae08745Sheppo 		/*
45601ae08745Sheppo 		 * If peer has indicated in its attribute message that
45611ae08745Sheppo 		 * it wishes to use descriptor rings then the only way
45621ae08745Sheppo 		 * to pass this milestone is for us to have received
45631ae08745Sheppo 		 * valid dring info.
45641ae08745Sheppo 		 *
45651ae08745Sheppo 		 * If peer is not using descriptor rings then just fall
45661ae08745Sheppo 		 * through.
45671ae08745Sheppo 		 */
45681ae08745Sheppo 		if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) &&
45691ae08745Sheppo 			(!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)))
45701ae08745Sheppo 			break;
45711ae08745Sheppo 
45721ae08745Sheppo 		D2(vswp, "%s: (chan %lld) leaving milestone 2",
45731ae08745Sheppo 				__func__, ldcp->ldc_id);
45741ae08745Sheppo 
45751ae08745Sheppo 		ldcp->hphase = VSW_MILESTONE3;
45761ae08745Sheppo 		vsw_send_rdx(ldcp);
45771ae08745Sheppo 		break;
45781ae08745Sheppo 
45791ae08745Sheppo 	case VSW_MILESTONE3:
45801ae08745Sheppo 		/*
45811ae08745Sheppo 		 * Pass this milestone when all paramaters have been
45821ae08745Sheppo 		 * successfully exchanged and RDX sent in both directions.
45831ae08745Sheppo 		 *
45841ae08745Sheppo 		 * Mark outbound lane as available to transmit data.
45851ae08745Sheppo 		 */
4586b071742bSsg70180 		if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) &&
4587b071742bSsg70180 			(ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) {
45881ae08745Sheppo 
45891ae08745Sheppo 			D2(vswp, "%s: (chan %lld) leaving milestone 3",
45901ae08745Sheppo 				__func__, ldcp->ldc_id);
45913af08d82Slm66018 			D2(vswp, "%s: ** handshake complete (0x%llx : "
45923af08d82Slm66018 				"0x%llx) **", __func__, ldcp->lane_in.lstate,
45933af08d82Slm66018 				ldcp->lane_out.lstate);
45941ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_LANE_ACTIVE;
45951ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE4;
45961ae08745Sheppo 			ldcp->hcnt = 0;
45971ae08745Sheppo 			DISPLAY_STATE();
45983af08d82Slm66018 		} else {
45993af08d82Slm66018 			D2(vswp, "%s: still in milestone 3 (0x%llx :"
46003af08d82Slm66018 				" 0x%llx", __func__, ldcp->lane_in.lstate,
46013af08d82Slm66018 				ldcp->lane_out.lstate);
46021ae08745Sheppo 		}
46031ae08745Sheppo 		break;
46041ae08745Sheppo 
46051ae08745Sheppo 	case VSW_MILESTONE4:
46061ae08745Sheppo 		D2(vswp, "%s: (chan %lld) in milestone 4", __func__,
46071ae08745Sheppo 							ldcp->ldc_id);
46081ae08745Sheppo 		break;
46091ae08745Sheppo 
46101ae08745Sheppo 	default:
46111ae08745Sheppo 		DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__,
46121ae08745Sheppo 			ldcp->ldc_id, ldcp->hphase);
46131ae08745Sheppo 	}
46141ae08745Sheppo 
46151ae08745Sheppo 	D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id,
46161ae08745Sheppo 		ldcp->hphase);
46171ae08745Sheppo }
46181ae08745Sheppo 
46191ae08745Sheppo /*
46201ae08745Sheppo  * Check if major version is supported.
46211ae08745Sheppo  *
46221ae08745Sheppo  * Returns 0 if finds supported major number, and if necessary
46231ae08745Sheppo  * adjusts the minor field.
46241ae08745Sheppo  *
46251ae08745Sheppo  * Returns 1 if can't match major number exactly. Sets mjor/minor
46261ae08745Sheppo  * to next lowest support values, or to zero if no other values possible.
46271ae08745Sheppo  */
46281ae08745Sheppo static int
46291ae08745Sheppo vsw_supported_version(vio_ver_msg_t *vp)
46301ae08745Sheppo {
46311ae08745Sheppo 	int	i;
46321ae08745Sheppo 
46331ae08745Sheppo 	D1(NULL, "vsw_supported_version: enter");
46341ae08745Sheppo 
46351ae08745Sheppo 	for (i = 0; i < VSW_NUM_VER; i++) {
46361ae08745Sheppo 		if (vsw_versions[i].ver_major == vp->ver_major) {
46371ae08745Sheppo 			/*
46381ae08745Sheppo 			 * Matching or lower major version found. Update
46391ae08745Sheppo 			 * minor number if necessary.
46401ae08745Sheppo 			 */
46411ae08745Sheppo 			if (vp->ver_minor > vsw_versions[i].ver_minor) {
46421ae08745Sheppo 				D2(NULL, "%s: adjusting minor value"
46431ae08745Sheppo 					" from %d to %d", __func__,
46441ae08745Sheppo 					vp->ver_minor,
46451ae08745Sheppo 					vsw_versions[i].ver_minor);
46461ae08745Sheppo 				vp->ver_minor = vsw_versions[i].ver_minor;
46471ae08745Sheppo 			}
46481ae08745Sheppo 
46491ae08745Sheppo 			return (0);
46501ae08745Sheppo 		}
46511ae08745Sheppo 
46521ae08745Sheppo 		if (vsw_versions[i].ver_major < vp->ver_major) {
46531ae08745Sheppo 			if (vp->ver_minor > vsw_versions[i].ver_minor) {
46541ae08745Sheppo 				D2(NULL, "%s: adjusting minor value"
46551ae08745Sheppo 					" from %d to %d", __func__,
46561ae08745Sheppo 					vp->ver_minor,
46571ae08745Sheppo 					vsw_versions[i].ver_minor);
46581ae08745Sheppo 				vp->ver_minor = vsw_versions[i].ver_minor;
46591ae08745Sheppo 			}
46601ae08745Sheppo 			return (1);
46611ae08745Sheppo 		}
46621ae08745Sheppo 	}
46631ae08745Sheppo 
46641ae08745Sheppo 	/* No match was possible, zero out fields */
46651ae08745Sheppo 	vp->ver_major = 0;
46661ae08745Sheppo 	vp->ver_minor = 0;
46671ae08745Sheppo 
46681ae08745Sheppo 	D1(NULL, "vsw_supported_version: exit");
46691ae08745Sheppo 
46701ae08745Sheppo 	return (1);
46711ae08745Sheppo }
46721ae08745Sheppo 
46731ae08745Sheppo /*
46741ae08745Sheppo  * Main routine for processing messages received over LDC.
46751ae08745Sheppo  */
46761ae08745Sheppo static void
46771ae08745Sheppo vsw_process_pkt(void *arg)
46781ae08745Sheppo {
46791ae08745Sheppo 	vsw_ldc_t	*ldcp = (vsw_ldc_t  *)arg;
46801ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
46811ae08745Sheppo 	size_t		msglen;
46821ae08745Sheppo 	vio_msg_tag_t	tag;
46831ae08745Sheppo 	def_msg_t	dmsg;
46841ae08745Sheppo 	int 		rv = 0;
46851ae08745Sheppo 
46863af08d82Slm66018 
46871ae08745Sheppo 	D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
46881ae08745Sheppo 
46891ae08745Sheppo 	/*
46901ae08745Sheppo 	 * If channel is up read messages until channel is empty.
46911ae08745Sheppo 	 */
46921ae08745Sheppo 	do {
46931ae08745Sheppo 		msglen = sizeof (dmsg);
46941ae08745Sheppo 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen);
46951ae08745Sheppo 
46961ae08745Sheppo 		if (rv != 0) {
46971ae08745Sheppo 			DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) "
46981ae08745Sheppo 				"len(%d)\n", __func__, ldcp->ldc_id,
46991ae08745Sheppo 							rv, msglen);
47003af08d82Slm66018 		}
47013af08d82Slm66018 
47023af08d82Slm66018 		/* channel has been reset */
47033af08d82Slm66018 		if (rv == ECONNRESET) {
4704b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
47051ae08745Sheppo 			break;
47061ae08745Sheppo 		}
47071ae08745Sheppo 
47081ae08745Sheppo 		if (msglen == 0) {
47091ae08745Sheppo 			D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__,
47101ae08745Sheppo 			ldcp->ldc_id);
47111ae08745Sheppo 			break;
47121ae08745Sheppo 		}
47131ae08745Sheppo 
47141ae08745Sheppo 		D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__,
47151ae08745Sheppo 		    ldcp->ldc_id, msglen);
47161ae08745Sheppo 
47171ae08745Sheppo 		/*
47181ae08745Sheppo 		 * Figure out what sort of packet we have gotten by
47191ae08745Sheppo 		 * examining the msg tag, and then switch it appropriately.
47201ae08745Sheppo 		 */
47211ae08745Sheppo 		bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t));
47221ae08745Sheppo 
47231ae08745Sheppo 		switch (tag.vio_msgtype) {
47241ae08745Sheppo 		case VIO_TYPE_CTRL:
47251ae08745Sheppo 			vsw_dispatch_ctrl_task(ldcp, &dmsg, tag);
47261ae08745Sheppo 			break;
47271ae08745Sheppo 		case VIO_TYPE_DATA:
47281ae08745Sheppo 			vsw_process_data_pkt(ldcp, &dmsg, tag);
47291ae08745Sheppo 			break;
47301ae08745Sheppo 		case VIO_TYPE_ERR:
47311ae08745Sheppo 			vsw_process_err_pkt(ldcp, &dmsg, tag);
47321ae08745Sheppo 			break;
47331ae08745Sheppo 		default:
47341ae08745Sheppo 			DERR(vswp, "%s: Unknown tag(%lx) ", __func__,
47351ae08745Sheppo 				"id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id);
47361ae08745Sheppo 			break;
47371ae08745Sheppo 		}
47381ae08745Sheppo 	} while (msglen);
47391ae08745Sheppo 
47401ae08745Sheppo 	D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id);
47411ae08745Sheppo }
47421ae08745Sheppo 
47431ae08745Sheppo /*
47441ae08745Sheppo  * Dispatch a task to process a VIO control message.
47451ae08745Sheppo  */
47461ae08745Sheppo static void
47471ae08745Sheppo vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag)
47481ae08745Sheppo {
47491ae08745Sheppo 	vsw_ctrl_task_t		*ctaskp = NULL;
47501ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
47511ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
47521ae08745Sheppo 
47531ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
47541ae08745Sheppo 
47551ae08745Sheppo 	/*
47561ae08745Sheppo 	 * We need to handle RDX ACK messages in-band as once they
47571ae08745Sheppo 	 * are exchanged it is possible that we will get an
47581ae08745Sheppo 	 * immediate (legitimate) data packet.
47591ae08745Sheppo 	 */
47601ae08745Sheppo 	if ((tag.vio_subtype_env == VIO_RDX) &&
47611ae08745Sheppo 		(tag.vio_subtype == VIO_SUBTYPE_ACK)) {
47623af08d82Slm66018 
4763b071742bSsg70180 		if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV))
47641ae08745Sheppo 			return;
47651ae08745Sheppo 
4766b071742bSsg70180 		ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV;
47673af08d82Slm66018 		D2(vswp, "%s (%ld) handling RDX_ACK in place "
47683af08d82Slm66018 			"(ostate 0x%llx : hphase %d)", __func__,
4769b071742bSsg70180 			ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase);
47701ae08745Sheppo 		vsw_next_milestone(ldcp);
47711ae08745Sheppo 		return;
47721ae08745Sheppo 	}
47731ae08745Sheppo 
47741ae08745Sheppo 	ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP);
47751ae08745Sheppo 
47761ae08745Sheppo 	if (ctaskp == NULL) {
47771ae08745Sheppo 		DERR(vswp, "%s: unable to alloc space for ctrl"
47781ae08745Sheppo 			" msg", __func__);
4779b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
47801ae08745Sheppo 		return;
47811ae08745Sheppo 	}
47821ae08745Sheppo 
47831ae08745Sheppo 	ctaskp->ldcp = ldcp;
47841ae08745Sheppo 	bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t));
47851ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
47861ae08745Sheppo 	ctaskp->hss_id = ldcp->hss_id;
47871ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
47881ae08745Sheppo 
47891ae08745Sheppo 	/*
47901ae08745Sheppo 	 * Dispatch task to processing taskq if port is not in
47911ae08745Sheppo 	 * the process of being detached.
47921ae08745Sheppo 	 */
47931ae08745Sheppo 	mutex_enter(&port->state_lock);
47941ae08745Sheppo 	if (port->state == VSW_PORT_INIT) {
47951ae08745Sheppo 		if ((vswp->taskq_p == NULL) ||
47961ae08745Sheppo 			(ddi_taskq_dispatch(vswp->taskq_p,
47971ae08745Sheppo 			vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP)
47981ae08745Sheppo 							!= DDI_SUCCESS)) {
47991ae08745Sheppo 			DERR(vswp, "%s: unable to dispatch task to taskq",
48001ae08745Sheppo 				__func__);
48011ae08745Sheppo 			kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
48021ae08745Sheppo 			mutex_exit(&port->state_lock);
4803b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
48041ae08745Sheppo 			return;
48051ae08745Sheppo 		}
48061ae08745Sheppo 	} else {
48071ae08745Sheppo 		DWARN(vswp, "%s: port %d detaching, not dispatching "
48081ae08745Sheppo 			"task", __func__, port->p_instance);
48091ae08745Sheppo 	}
48101ae08745Sheppo 
48111ae08745Sheppo 	mutex_exit(&port->state_lock);
48121ae08745Sheppo 
48131ae08745Sheppo 	D2(vswp, "%s: dispatched task to taskq for chan %d", __func__,
48141ae08745Sheppo 			ldcp->ldc_id);
48151ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
48161ae08745Sheppo }
48171ae08745Sheppo 
48181ae08745Sheppo /*
48191ae08745Sheppo  * Process a VIO ctrl message. Invoked from taskq.
48201ae08745Sheppo  */
48211ae08745Sheppo static void
48221ae08745Sheppo vsw_process_ctrl_pkt(void *arg)
48231ae08745Sheppo {
48241ae08745Sheppo 	vsw_ctrl_task_t	*ctaskp = (vsw_ctrl_task_t *)arg;
48251ae08745Sheppo 	vsw_ldc_t	*ldcp = ctaskp->ldcp;
48261ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
48271ae08745Sheppo 	vio_msg_tag_t	tag;
48281ae08745Sheppo 	uint16_t	env;
48291ae08745Sheppo 
48301ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
48311ae08745Sheppo 
48321ae08745Sheppo 	bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t));
48331ae08745Sheppo 	env = tag.vio_subtype_env;
48341ae08745Sheppo 
48351ae08745Sheppo 	/* stale pkt check */
48361ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
48371ae08745Sheppo 	if (ctaskp->hss_id < ldcp->hss_id) {
48381ae08745Sheppo 		DWARN(vswp, "%s: discarding stale packet belonging to"
48391ae08745Sheppo 			" earlier (%ld) handshake session", __func__,
48401ae08745Sheppo 			ctaskp->hss_id);
48411ae08745Sheppo 		mutex_exit(&ldcp->hss_lock);
48421ae08745Sheppo 		return;
48431ae08745Sheppo 	}
48441ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
48451ae08745Sheppo 
48461ae08745Sheppo 	/* session id check */
48471ae08745Sheppo 	if (ldcp->session_status & VSW_PEER_SESSION) {
48481ae08745Sheppo 		if (ldcp->peer_session != tag.vio_sid) {
48491ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid session id (%llx)",
48501ae08745Sheppo 				__func__, ldcp->ldc_id, tag.vio_sid);
48511ae08745Sheppo 			kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
4852b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
48531ae08745Sheppo 			return;
48541ae08745Sheppo 		}
48551ae08745Sheppo 	}
48561ae08745Sheppo 
48571ae08745Sheppo 	/*
48581ae08745Sheppo 	 * Switch on vio_subtype envelope, then let lower routines
48591ae08745Sheppo 	 * decide if its an INFO, ACK or NACK packet.
48601ae08745Sheppo 	 */
48611ae08745Sheppo 	switch (env) {
48621ae08745Sheppo 	case VIO_VER_INFO:
48631ae08745Sheppo 		vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp);
48641ae08745Sheppo 		break;
48651ae08745Sheppo 	case VIO_DRING_REG:
48661ae08745Sheppo 		vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp);
48671ae08745Sheppo 		break;
48681ae08745Sheppo 	case VIO_DRING_UNREG:
48691ae08745Sheppo 		vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp);
48701ae08745Sheppo 		break;
48711ae08745Sheppo 	case VIO_ATTR_INFO:
48721ae08745Sheppo 		vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp);
48731ae08745Sheppo 		break;
48741ae08745Sheppo 	case VNET_MCAST_INFO:
48751ae08745Sheppo 		vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp);
48761ae08745Sheppo 		break;
48771ae08745Sheppo 	case VIO_RDX:
48781ae08745Sheppo 		vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp);
48791ae08745Sheppo 		break;
48801ae08745Sheppo 	default:
48811ae08745Sheppo 		DERR(vswp, "%s : unknown vio_subtype_env (%x)\n",
48821ae08745Sheppo 							__func__, env);
48831ae08745Sheppo 	}
48841ae08745Sheppo 
48851ae08745Sheppo 	kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
48861ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
48871ae08745Sheppo }
48881ae08745Sheppo 
48891ae08745Sheppo /*
48901ae08745Sheppo  * Version negotiation. We can end up here either because our peer
48911ae08745Sheppo  * has responded to a handshake message we have sent it, or our peer
48921ae08745Sheppo  * has initiated a handshake with us. If its the former then can only
48931ae08745Sheppo  * be ACK or NACK, if its the later can only be INFO.
48941ae08745Sheppo  *
48951ae08745Sheppo  * If its an ACK we move to the next stage of the handshake, namely
48961ae08745Sheppo  * attribute exchange. If its a NACK we see if we can specify another
48971ae08745Sheppo  * version, if we can't we stop.
48981ae08745Sheppo  *
48991ae08745Sheppo  * If it is an INFO we reset all params associated with communication
49001ae08745Sheppo  * in that direction over this channel (remember connection is
49011ae08745Sheppo  * essentially 2 independent simplex channels).
49021ae08745Sheppo  */
49031ae08745Sheppo void
49041ae08745Sheppo vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt)
49051ae08745Sheppo {
49061ae08745Sheppo 	vio_ver_msg_t	*ver_pkt;
49071ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
49081ae08745Sheppo 
49091ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
49101ae08745Sheppo 
49111ae08745Sheppo 	/*
49121ae08745Sheppo 	 * We know this is a ctrl/version packet so
49131ae08745Sheppo 	 * cast it into the correct structure.
49141ae08745Sheppo 	 */
49151ae08745Sheppo 	ver_pkt = (vio_ver_msg_t *)pkt;
49161ae08745Sheppo 
49171ae08745Sheppo 	switch (ver_pkt->tag.vio_subtype) {
49181ae08745Sheppo 	case VIO_SUBTYPE_INFO:
49191ae08745Sheppo 		D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n");
49201ae08745Sheppo 
49211ae08745Sheppo 		/*
49221ae08745Sheppo 		 * Record the session id, which we will use from now
49231ae08745Sheppo 		 * until we see another VER_INFO msg. Even then the
49241ae08745Sheppo 		 * session id in most cases will be unchanged, execpt
49251ae08745Sheppo 		 * if channel was reset.
49261ae08745Sheppo 		 */
49271ae08745Sheppo 		if ((ldcp->session_status & VSW_PEER_SESSION) &&
49281ae08745Sheppo 			(ldcp->peer_session != ver_pkt->tag.vio_sid)) {
49291ae08745Sheppo 			DERR(vswp, "%s: updating session id for chan %lld "
49301ae08745Sheppo 				"from %llx to %llx", __func__, ldcp->ldc_id,
49311ae08745Sheppo 				ldcp->peer_session, ver_pkt->tag.vio_sid);
49321ae08745Sheppo 		}
49331ae08745Sheppo 
49341ae08745Sheppo 		ldcp->peer_session = ver_pkt->tag.vio_sid;
49351ae08745Sheppo 		ldcp->session_status |= VSW_PEER_SESSION;
49361ae08745Sheppo 
49371ae08745Sheppo 		/* Legal message at this time ? */
49381ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV))
49391ae08745Sheppo 			return;
49401ae08745Sheppo 
49411ae08745Sheppo 		/*
49421ae08745Sheppo 		 * First check the device class. Currently only expect
49431ae08745Sheppo 		 * to be talking to a network device. In the future may
49441ae08745Sheppo 		 * also talk to another switch.
49451ae08745Sheppo 		 */
49461ae08745Sheppo 		if (ver_pkt->dev_class != VDEV_NETWORK) {
49471ae08745Sheppo 			DERR(vswp, "%s: illegal device class %d", __func__,
49481ae08745Sheppo 				ver_pkt->dev_class);
49491ae08745Sheppo 
49501ae08745Sheppo 			ver_pkt->tag.vio_sid = ldcp->local_session;
49511ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
49521ae08745Sheppo 
49531ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
49541ae08745Sheppo 
4955b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)ver_pkt,
4956b071742bSsg70180 					sizeof (vio_ver_msg_t), B_TRUE);
49571ae08745Sheppo 
49581ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
49591ae08745Sheppo 			vsw_next_milestone(ldcp);
49601ae08745Sheppo 			return;
49611ae08745Sheppo 		} else {
49621ae08745Sheppo 			ldcp->dev_class = ver_pkt->dev_class;
49631ae08745Sheppo 		}
49641ae08745Sheppo 
49651ae08745Sheppo 		/*
49661ae08745Sheppo 		 * Now check the version.
49671ae08745Sheppo 		 */
49681ae08745Sheppo 		if (vsw_supported_version(ver_pkt) == 0) {
49691ae08745Sheppo 			/*
49701ae08745Sheppo 			 * Support this major version and possibly
49711ae08745Sheppo 			 * adjusted minor version.
49721ae08745Sheppo 			 */
49731ae08745Sheppo 
49741ae08745Sheppo 			D2(vswp, "%s: accepted ver %d:%d", __func__,
49751ae08745Sheppo 				ver_pkt->ver_major, ver_pkt->ver_minor);
49761ae08745Sheppo 
49771ae08745Sheppo 			/* Store accepted values */
49781ae08745Sheppo 			ldcp->lane_in.ver_major = ver_pkt->ver_major;
49791ae08745Sheppo 			ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
49801ae08745Sheppo 
49811ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
49821ae08745Sheppo 
49831ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_ACK_SENT;
49841ae08745Sheppo 		} else {
49851ae08745Sheppo 			/*
49861ae08745Sheppo 			 * NACK back with the next lower major/minor
49871ae08745Sheppo 			 * pairing we support (if don't suuport any more
49881ae08745Sheppo 			 * versions then they will be set to zero.
49891ae08745Sheppo 			 */
49901ae08745Sheppo 
49911ae08745Sheppo 			D2(vswp, "%s: replying with ver %d:%d", __func__,
49921ae08745Sheppo 				ver_pkt->ver_major, ver_pkt->ver_minor);
49931ae08745Sheppo 
49941ae08745Sheppo 			/* Store updated values */
49951ae08745Sheppo 			ldcp->lane_in.ver_major = ver_pkt->ver_major;
49961ae08745Sheppo 			ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
49971ae08745Sheppo 
49981ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
49991ae08745Sheppo 
50001ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
50011ae08745Sheppo 		}
50021ae08745Sheppo 
50031ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
50041ae08745Sheppo 		ver_pkt->tag.vio_sid = ldcp->local_session;
5005b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)ver_pkt,
5006b071742bSsg70180 			sizeof (vio_ver_msg_t), B_TRUE);
50071ae08745Sheppo 
50081ae08745Sheppo 		vsw_next_milestone(ldcp);
50091ae08745Sheppo 		break;
50101ae08745Sheppo 
50111ae08745Sheppo 	case VIO_SUBTYPE_ACK:
50121ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__);
50131ae08745Sheppo 
50141ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV))
50151ae08745Sheppo 			return;
50161ae08745Sheppo 
50171ae08745Sheppo 		/* Store updated values */
50181ae08745Sheppo 		ldcp->lane_in.ver_major = ver_pkt->ver_major;
50191ae08745Sheppo 		ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
50201ae08745Sheppo 
50211ae08745Sheppo 
50221ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_VER_ACK_RECV;
50231ae08745Sheppo 		vsw_next_milestone(ldcp);
50241ae08745Sheppo 
50251ae08745Sheppo 		break;
50261ae08745Sheppo 
50271ae08745Sheppo 	case VIO_SUBTYPE_NACK:
50281ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__);
50291ae08745Sheppo 
50301ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV))
50311ae08745Sheppo 			return;
50321ae08745Sheppo 
50331ae08745Sheppo 		/*
50341ae08745Sheppo 		 * If our peer sent us a NACK with the ver fields set to
50351ae08745Sheppo 		 * zero then there is nothing more we can do. Otherwise see
50361ae08745Sheppo 		 * if we support either the version suggested, or a lesser
50371ae08745Sheppo 		 * one.
50381ae08745Sheppo 		 */
50391ae08745Sheppo 		if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
50401ae08745Sheppo 			DERR(vswp, "%s: peer unable to negotiate any "
50411ae08745Sheppo 				"further.", __func__);
50421ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
50431ae08745Sheppo 			vsw_next_milestone(ldcp);
50441ae08745Sheppo 			return;
50451ae08745Sheppo 		}
50461ae08745Sheppo 
50471ae08745Sheppo 		/*
50481ae08745Sheppo 		 * Check to see if we support this major version or
50491ae08745Sheppo 		 * a lower one. If we don't then maj/min will be set
50501ae08745Sheppo 		 * to zero.
50511ae08745Sheppo 		 */
50521ae08745Sheppo 		(void) vsw_supported_version(ver_pkt);
50531ae08745Sheppo 		if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
50541ae08745Sheppo 			/* Nothing more we can do */
50551ae08745Sheppo 			DERR(vswp, "%s: version negotiation failed.\n",
50561ae08745Sheppo 								__func__);
50571ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
50581ae08745Sheppo 			vsw_next_milestone(ldcp);
50591ae08745Sheppo 		} else {
50601ae08745Sheppo 			/* found a supported major version */
50611ae08745Sheppo 			ldcp->lane_out.ver_major = ver_pkt->ver_major;
50621ae08745Sheppo 			ldcp->lane_out.ver_minor = ver_pkt->ver_minor;
50631ae08745Sheppo 
50641ae08745Sheppo 			D2(vswp, "%s: resending with updated values (%x, %x)",
50651ae08745Sheppo 				__func__, ver_pkt->ver_major,
50661ae08745Sheppo 				ver_pkt->ver_minor);
50671ae08745Sheppo 
50681ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_INFO_SENT;
50691ae08745Sheppo 			ver_pkt->tag.vio_sid = ldcp->local_session;
50701ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
50711ae08745Sheppo 
50721ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
50731ae08745Sheppo 
5074b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)ver_pkt,
5075b071742bSsg70180 				sizeof (vio_ver_msg_t), B_TRUE);
50761ae08745Sheppo 
50771ae08745Sheppo 			vsw_next_milestone(ldcp);
50781ae08745Sheppo 
50791ae08745Sheppo 		}
50801ae08745Sheppo 		break;
50811ae08745Sheppo 
50821ae08745Sheppo 	default:
50831ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
50841ae08745Sheppo 			ver_pkt->tag.vio_subtype);
50851ae08745Sheppo 	}
50861ae08745Sheppo 
50871ae08745Sheppo 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
50881ae08745Sheppo }
50891ae08745Sheppo 
50901ae08745Sheppo /*
50911ae08745Sheppo  * Process an attribute packet. We can end up here either because our peer
50921ae08745Sheppo  * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our
50931ae08745Sheppo  * peer has sent us an attribute INFO message
50941ae08745Sheppo  *
50951ae08745Sheppo  * If its an ACK we then move to the next stage of the handshake which
50961ae08745Sheppo  * is to send our descriptor ring info to our peer. If its a NACK then
50971ae08745Sheppo  * there is nothing more we can (currently) do.
50981ae08745Sheppo  *
50991ae08745Sheppo  * If we get a valid/acceptable INFO packet (and we have already negotiated
51001ae08745Sheppo  * a version) we ACK back and set channel state to ATTR_RECV, otherwise we
51011ae08745Sheppo  * NACK back and reset channel state to INACTIV.
51021ae08745Sheppo  *
51031ae08745Sheppo  * FUTURE: in time we will probably negotiate over attributes, but for
51041ae08745Sheppo  * the moment unacceptable attributes are regarded as a fatal error.
51051ae08745Sheppo  *
51061ae08745Sheppo  */
51071ae08745Sheppo void
51081ae08745Sheppo vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt)
51091ae08745Sheppo {
51101ae08745Sheppo 	vnet_attr_msg_t		*attr_pkt;
51111ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
51121ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
51131ae08745Sheppo 	uint64_t		macaddr = 0;
51141ae08745Sheppo 	int			i;
51151ae08745Sheppo 
51161ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
51171ae08745Sheppo 
51181ae08745Sheppo 	/*
51191ae08745Sheppo 	 * We know this is a ctrl/attr packet so
51201ae08745Sheppo 	 * cast it into the correct structure.
51211ae08745Sheppo 	 */
51221ae08745Sheppo 	attr_pkt = (vnet_attr_msg_t *)pkt;
51231ae08745Sheppo 
51241ae08745Sheppo 	switch (attr_pkt->tag.vio_subtype) {
51251ae08745Sheppo 	case VIO_SUBTYPE_INFO:
51261ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
51271ae08745Sheppo 
51281ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV))
51291ae08745Sheppo 			return;
51301ae08745Sheppo 
51311ae08745Sheppo 		/*
51321ae08745Sheppo 		 * If the attributes are unacceptable then we NACK back.
51331ae08745Sheppo 		 */
51341ae08745Sheppo 		if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) {
51351ae08745Sheppo 
51361ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid attributes",
51371ae08745Sheppo 				__func__, ldcp->ldc_id);
51381ae08745Sheppo 
51391ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
51401ae08745Sheppo 
51411ae08745Sheppo 			attr_pkt->tag.vio_sid = ldcp->local_session;
51421ae08745Sheppo 			attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
51431ae08745Sheppo 
51441ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
51451ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT;
5146b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)attr_pkt,
5147b071742bSsg70180 				sizeof (vnet_attr_msg_t), B_TRUE);
51481ae08745Sheppo 
51491ae08745Sheppo 			vsw_next_milestone(ldcp);
51501ae08745Sheppo 			return;
51511ae08745Sheppo 		}
51521ae08745Sheppo 
51531ae08745Sheppo 		/*
51541ae08745Sheppo 		 * Otherwise store attributes for this lane and update
51551ae08745Sheppo 		 * lane state.
51561ae08745Sheppo 		 */
51571ae08745Sheppo 		ldcp->lane_in.mtu = attr_pkt->mtu;
51581ae08745Sheppo 		ldcp->lane_in.addr = attr_pkt->addr;
51591ae08745Sheppo 		ldcp->lane_in.addr_type = attr_pkt->addr_type;
51601ae08745Sheppo 		ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode;
51611ae08745Sheppo 		ldcp->lane_in.ack_freq = attr_pkt->ack_freq;
51621ae08745Sheppo 
51631ae08745Sheppo 		macaddr = ldcp->lane_in.addr;
51641ae08745Sheppo 		for (i = ETHERADDRL - 1; i >= 0; i--) {
51651ae08745Sheppo 			port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF;
51661ae08745Sheppo 			macaddr >>= 8;
51671ae08745Sheppo 		}
51681ae08745Sheppo 
51691ae08745Sheppo 		/* create the fdb entry for this port/mac address */
51701ae08745Sheppo 		(void) vsw_add_fdb(vswp, port);
51711ae08745Sheppo 
51721ae08745Sheppo 		/* setup device specifc xmit routines */
51731ae08745Sheppo 		mutex_enter(&port->tx_lock);
51741ae08745Sheppo 		if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) {
51751ae08745Sheppo 			D2(vswp, "%s: mode = VIO_DRING_MODE", __func__);
51761ae08745Sheppo 			port->transmit = vsw_dringsend;
51771ae08745Sheppo 		} else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) {
51781ae08745Sheppo 			D2(vswp, "%s: mode = VIO_DESC_MODE", __func__);
51791ae08745Sheppo 			vsw_create_privring(ldcp);
51801ae08745Sheppo 			port->transmit = vsw_descrsend;
51811ae08745Sheppo 		}
51821ae08745Sheppo 		mutex_exit(&port->tx_lock);
51831ae08745Sheppo 
51841ae08745Sheppo 		attr_pkt->tag.vio_sid = ldcp->local_session;
51851ae08745Sheppo 		attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
51861ae08745Sheppo 
51871ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
51881ae08745Sheppo 
51891ae08745Sheppo 		ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT;
51901ae08745Sheppo 
5191b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)attr_pkt,
5192b071742bSsg70180 				sizeof (vnet_attr_msg_t), B_TRUE);
51931ae08745Sheppo 
51941ae08745Sheppo 		vsw_next_milestone(ldcp);
51951ae08745Sheppo 		break;
51961ae08745Sheppo 
51971ae08745Sheppo 	case VIO_SUBTYPE_ACK:
51981ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
51991ae08745Sheppo 
52001ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV))
52011ae08745Sheppo 			return;
52021ae08745Sheppo 
52031ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV;
52041ae08745Sheppo 		vsw_next_milestone(ldcp);
52051ae08745Sheppo 		break;
52061ae08745Sheppo 
52071ae08745Sheppo 	case VIO_SUBTYPE_NACK:
52081ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
52091ae08745Sheppo 
52101ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV))
52111ae08745Sheppo 			return;
52121ae08745Sheppo 
52131ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV;
52141ae08745Sheppo 		vsw_next_milestone(ldcp);
52151ae08745Sheppo 		break;
52161ae08745Sheppo 
52171ae08745Sheppo 	default:
52181ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
52191ae08745Sheppo 			attr_pkt->tag.vio_subtype);
52201ae08745Sheppo 	}
52211ae08745Sheppo 
52221ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
52231ae08745Sheppo }
52241ae08745Sheppo 
52251ae08745Sheppo /*
52261ae08745Sheppo  * Process a dring info packet. We can end up here either because our peer
52271ae08745Sheppo  * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our
52281ae08745Sheppo  * peer has sent us a dring INFO message.
52291ae08745Sheppo  *
52301ae08745Sheppo  * If we get a valid/acceptable INFO packet (and we have already negotiated
52311ae08745Sheppo  * a version) we ACK back and update the lane state, otherwise we NACK back.
52321ae08745Sheppo  *
52331ae08745Sheppo  * FUTURE: nothing to stop client from sending us info on multiple dring's
52341ae08745Sheppo  * but for the moment we will just use the first one we are given.
52351ae08745Sheppo  *
52361ae08745Sheppo  */
52371ae08745Sheppo void
52381ae08745Sheppo vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt)
52391ae08745Sheppo {
52401ae08745Sheppo 	vio_dring_reg_msg_t	*dring_pkt;
52411ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
52421ae08745Sheppo 	ldc_mem_info_t		minfo;
52431ae08745Sheppo 	dring_info_t		*dp, *dbp;
52441ae08745Sheppo 	int			dring_found = 0;
52451ae08745Sheppo 
52461ae08745Sheppo 	/*
52471ae08745Sheppo 	 * We know this is a ctrl/dring packet so
52481ae08745Sheppo 	 * cast it into the correct structure.
52491ae08745Sheppo 	 */
52501ae08745Sheppo 	dring_pkt = (vio_dring_reg_msg_t *)pkt;
52511ae08745Sheppo 
52521ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
52531ae08745Sheppo 
52541ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
52551ae08745Sheppo 	case VIO_SUBTYPE_INFO:
52561ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
52571ae08745Sheppo 
52581ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
52591ae08745Sheppo 			return;
52601ae08745Sheppo 
52611ae08745Sheppo 		/*
52621ae08745Sheppo 		 * If the dring params are unacceptable then we NACK back.
52631ae08745Sheppo 		 */
52641ae08745Sheppo 		if (vsw_check_dring_info(dring_pkt)) {
52651ae08745Sheppo 
52661ae08745Sheppo 			DERR(vswp, "%s (%lld): invalid dring info",
52671ae08745Sheppo 				__func__, ldcp->ldc_id);
52681ae08745Sheppo 
52691ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
52701ae08745Sheppo 
52711ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
52721ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
52731ae08745Sheppo 
52741ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
52751ae08745Sheppo 
52761ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
52771ae08745Sheppo 
5278b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
5279b071742bSsg70180 				sizeof (vio_dring_reg_msg_t), B_TRUE);
52801ae08745Sheppo 
52811ae08745Sheppo 			vsw_next_milestone(ldcp);
52821ae08745Sheppo 			return;
52831ae08745Sheppo 		}
52841ae08745Sheppo 
52851ae08745Sheppo 		/*
52861ae08745Sheppo 		 * Otherwise, attempt to map in the dring using the
52871ae08745Sheppo 		 * cookie. If that succeeds we send back a unique dring
52881ae08745Sheppo 		 * identifier that the sending side will use in future
52891ae08745Sheppo 		 * to refer to this descriptor ring.
52901ae08745Sheppo 		 */
52911ae08745Sheppo 		dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
52921ae08745Sheppo 
52931ae08745Sheppo 		dp->num_descriptors = dring_pkt->num_descriptors;
52941ae08745Sheppo 		dp->descriptor_size = dring_pkt->descriptor_size;
52951ae08745Sheppo 		dp->options = dring_pkt->options;
52961ae08745Sheppo 		dp->ncookies = dring_pkt->ncookies;
52971ae08745Sheppo 
52981ae08745Sheppo 		/*
52991ae08745Sheppo 		 * Note: should only get one cookie. Enforced in
53001ae08745Sheppo 		 * the ldc layer.
53011ae08745Sheppo 		 */
53021ae08745Sheppo 		bcopy(&dring_pkt->cookie[0], &dp->cookie[0],
53031ae08745Sheppo 			sizeof (ldc_mem_cookie_t));
53041ae08745Sheppo 
53051ae08745Sheppo 		D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__,
53061ae08745Sheppo 			dp->num_descriptors, dp->descriptor_size);
53071ae08745Sheppo 		D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__,
53081ae08745Sheppo 			dp->options, dp->ncookies);
53091ae08745Sheppo 
53101ae08745Sheppo 		if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0],
53111ae08745Sheppo 			dp->ncookies, dp->num_descriptors,
53121ae08745Sheppo 			dp->descriptor_size, LDC_SHADOW_MAP,
53131ae08745Sheppo 			&(dp->handle))) != 0) {
53141ae08745Sheppo 
53151ae08745Sheppo 			DERR(vswp, "%s: dring_map failed\n", __func__);
53161ae08745Sheppo 
53171ae08745Sheppo 			kmem_free(dp, sizeof (dring_info_t));
53181ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
53191ae08745Sheppo 
53201ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
53211ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
53221ae08745Sheppo 
53231ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
53241ae08745Sheppo 
53251ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
5326b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
5327b071742bSsg70180 				sizeof (vio_dring_reg_msg_t), B_TRUE);
53281ae08745Sheppo 
53291ae08745Sheppo 			vsw_next_milestone(ldcp);
53301ae08745Sheppo 			return;
53311ae08745Sheppo 		}
53321ae08745Sheppo 
53331ae08745Sheppo 		if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
53341ae08745Sheppo 
53351ae08745Sheppo 			DERR(vswp, "%s: dring_addr failed\n", __func__);
53361ae08745Sheppo 
53371ae08745Sheppo 			kmem_free(dp, sizeof (dring_info_t));
53381ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
53391ae08745Sheppo 
53401ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
53411ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
53421ae08745Sheppo 
53431ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
53441ae08745Sheppo 
53451ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
5346b071742bSsg70180 			(void) vsw_send_msg(ldcp, (void *)dring_pkt,
5347b071742bSsg70180 				sizeof (vio_dring_reg_msg_t), B_TRUE);
53481ae08745Sheppo 
53491ae08745Sheppo 			vsw_next_milestone(ldcp);
53501ae08745Sheppo 			return;
53511ae08745Sheppo 		} else {
53521ae08745Sheppo 			/* store the address of the pub part of ring */
53531ae08745Sheppo 			dp->pub_addr = minfo.vaddr;
53541ae08745Sheppo 		}
53551ae08745Sheppo 
53561ae08745Sheppo 		/* no private section as we are importing */
53571ae08745Sheppo 		dp->priv_addr = NULL;
53581ae08745Sheppo 
53591ae08745Sheppo 		/*
53601ae08745Sheppo 		 * Using simple mono increasing int for ident at
53611ae08745Sheppo 		 * the moment.
53621ae08745Sheppo 		 */
53631ae08745Sheppo 		dp->ident = ldcp->next_ident;
53641ae08745Sheppo 		ldcp->next_ident++;
53651ae08745Sheppo 
53661ae08745Sheppo 		dp->end_idx = 0;
53671ae08745Sheppo 		dp->next = NULL;
53681ae08745Sheppo 
53691ae08745Sheppo 		/*
53701ae08745Sheppo 		 * Link it onto the end of the list of drings
53711ae08745Sheppo 		 * for this lane.
53721ae08745Sheppo 		 */
53731ae08745Sheppo 		if (ldcp->lane_in.dringp == NULL) {
53741ae08745Sheppo 			D2(vswp, "%s: adding first INBOUND dring", __func__);
53751ae08745Sheppo 			ldcp->lane_in.dringp = dp;
53761ae08745Sheppo 		} else {
53771ae08745Sheppo 			dbp = ldcp->lane_in.dringp;
53781ae08745Sheppo 
53791ae08745Sheppo 			while (dbp->next != NULL)
53801ae08745Sheppo 				dbp = dbp->next;
53811ae08745Sheppo 
53821ae08745Sheppo 			dbp->next = dp;
53831ae08745Sheppo 		}
53841ae08745Sheppo 
53851ae08745Sheppo 		/* acknowledge it */
53861ae08745Sheppo 		dring_pkt->tag.vio_sid = ldcp->local_session;
53871ae08745Sheppo 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
53881ae08745Sheppo 		dring_pkt->dring_ident = dp->ident;
53891ae08745Sheppo 
5390b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)dring_pkt,
5391b071742bSsg70180 			sizeof (vio_dring_reg_msg_t), B_TRUE);
53921ae08745Sheppo 
53931ae08745Sheppo 		ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT;
53941ae08745Sheppo 		vsw_next_milestone(ldcp);
53951ae08745Sheppo 		break;
53961ae08745Sheppo 
53971ae08745Sheppo 	case VIO_SUBTYPE_ACK:
53981ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
53991ae08745Sheppo 
54001ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV))
54011ae08745Sheppo 			return;
54021ae08745Sheppo 
54031ae08745Sheppo 		/*
54041ae08745Sheppo 		 * Peer is acknowledging our dring info and will have
54051ae08745Sheppo 		 * sent us a dring identifier which we will use to
54061ae08745Sheppo 		 * refer to this ring w.r.t. our peer.
54071ae08745Sheppo 		 */
54081ae08745Sheppo 		dp = ldcp->lane_out.dringp;
54091ae08745Sheppo 		if (dp != NULL) {
54101ae08745Sheppo 			/*
54111ae08745Sheppo 			 * Find the ring this ident should be associated
54121ae08745Sheppo 			 * with.
54131ae08745Sheppo 			 */
54141ae08745Sheppo 			if (vsw_dring_match(dp, dring_pkt)) {
54151ae08745Sheppo 				dring_found = 1;
54161ae08745Sheppo 
54171ae08745Sheppo 			} else while (dp != NULL) {
54181ae08745Sheppo 				if (vsw_dring_match(dp, dring_pkt)) {
54191ae08745Sheppo 					dring_found = 1;
54201ae08745Sheppo 					break;
54211ae08745Sheppo 				}
54221ae08745Sheppo 				dp = dp->next;
54231ae08745Sheppo 			}
54241ae08745Sheppo 
54251ae08745Sheppo 			if (dring_found == 0) {
54261ae08745Sheppo 				DERR(NULL, "%s: unrecognised ring cookie",
54271ae08745Sheppo 					__func__);
5428b071742bSsg70180 				vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
54291ae08745Sheppo 				return;
54301ae08745Sheppo 			}
54311ae08745Sheppo 
54321ae08745Sheppo 		} else {
54331ae08745Sheppo 			DERR(vswp, "%s: DRING ACK received but no drings "
54341ae08745Sheppo 				"allocated", __func__);
5435b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
54361ae08745Sheppo 			return;
54371ae08745Sheppo 		}
54381ae08745Sheppo 
54391ae08745Sheppo 		/* store ident */
54401ae08745Sheppo 		dp->ident = dring_pkt->dring_ident;
54411ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV;
54421ae08745Sheppo 		vsw_next_milestone(ldcp);
54431ae08745Sheppo 		break;
54441ae08745Sheppo 
54451ae08745Sheppo 	case VIO_SUBTYPE_NACK:
54461ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
54471ae08745Sheppo 
54481ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV))
54491ae08745Sheppo 			return;
54501ae08745Sheppo 
54511ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV;
54521ae08745Sheppo 		vsw_next_milestone(ldcp);
54531ae08745Sheppo 		break;
54541ae08745Sheppo 
54551ae08745Sheppo 	default:
54561ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
54571ae08745Sheppo 			dring_pkt->tag.vio_subtype);
54581ae08745Sheppo 	}
54591ae08745Sheppo 
54601ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
54611ae08745Sheppo }
54621ae08745Sheppo 
54631ae08745Sheppo /*
54641ae08745Sheppo  * Process a request from peer to unregister a dring.
54651ae08745Sheppo  *
54661ae08745Sheppo  * For the moment we just restart the handshake if our
54671ae08745Sheppo  * peer endpoint attempts to unregister a dring.
54681ae08745Sheppo  */
54691ae08745Sheppo void
54701ae08745Sheppo vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt)
54711ae08745Sheppo {
54721ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
54731ae08745Sheppo 	vio_dring_unreg_msg_t	*dring_pkt;
54741ae08745Sheppo 
54751ae08745Sheppo 	/*
54761ae08745Sheppo 	 * We know this is a ctrl/dring packet so
54771ae08745Sheppo 	 * cast it into the correct structure.
54781ae08745Sheppo 	 */
54791ae08745Sheppo 	dring_pkt = (vio_dring_unreg_msg_t *)pkt;
54801ae08745Sheppo 
54811ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
54821ae08745Sheppo 
54831ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
54841ae08745Sheppo 	case VIO_SUBTYPE_INFO:
54851ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
54861ae08745Sheppo 
54871ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
54881ae08745Sheppo 		break;
54891ae08745Sheppo 
54901ae08745Sheppo 	case VIO_SUBTYPE_ACK:
54911ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
54921ae08745Sheppo 
54931ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
54941ae08745Sheppo 		break;
54951ae08745Sheppo 
54961ae08745Sheppo 	case VIO_SUBTYPE_NACK:
54971ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
54981ae08745Sheppo 
54991ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
55001ae08745Sheppo 		break;
55011ae08745Sheppo 
55021ae08745Sheppo 	default:
55031ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
55041ae08745Sheppo 			dring_pkt->tag.vio_subtype);
55051ae08745Sheppo 	}
55061ae08745Sheppo 
5507b071742bSsg70180 	vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
5508b071742bSsg70180 
55091ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
55101ae08745Sheppo }
55111ae08745Sheppo 
55121ae08745Sheppo #define	SND_MCST_NACK(ldcp, pkt) \
55131ae08745Sheppo 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
55141ae08745Sheppo 	pkt->tag.vio_sid = ldcp->local_session; \
5515b071742bSsg70180 	(void) vsw_send_msg(ldcp, (void *)pkt, \
5516b071742bSsg70180 			sizeof (vnet_mcast_msg_t), B_TRUE);
55171ae08745Sheppo 
55181ae08745Sheppo /*
55191ae08745Sheppo  * Process a multicast request from a vnet.
55201ae08745Sheppo  *
55211ae08745Sheppo  * Vnet's specify a multicast address that they are interested in. This
55221ae08745Sheppo  * address is used as a key into the hash table which forms the multicast
55231ae08745Sheppo  * forwarding database (mFDB).
55241ae08745Sheppo  *
55251ae08745Sheppo  * The table keys are the multicast addresses, while the table entries
55261ae08745Sheppo  * are pointers to lists of ports which wish to receive packets for the
55271ae08745Sheppo  * specified multicast address.
55281ae08745Sheppo  *
55291ae08745Sheppo  * When a multicast packet is being switched we use the address as a key
55301ae08745Sheppo  * into the hash table, and then walk the appropriate port list forwarding
55311ae08745Sheppo  * the pkt to each port in turn.
55321ae08745Sheppo  *
55331ae08745Sheppo  * If a vnet is no longer interested in a particular multicast grouping
55341ae08745Sheppo  * we simply find the correct location in the hash table and then delete
55351ae08745Sheppo  * the relevant port from the port list.
55361ae08745Sheppo  *
55371ae08745Sheppo  * To deal with the case whereby a port is being deleted without first
55381ae08745Sheppo  * removing itself from the lists in the hash table, we maintain a list
55391ae08745Sheppo  * of multicast addresses the port has registered an interest in, within
55401ae08745Sheppo  * the port structure itself. We then simply walk that list of addresses
55411ae08745Sheppo  * using them as keys into the hash table and remove the port from the
55421ae08745Sheppo  * appropriate lists.
55431ae08745Sheppo  */
55441ae08745Sheppo static void
55451ae08745Sheppo vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt)
55461ae08745Sheppo {
55471ae08745Sheppo 	vnet_mcast_msg_t	*mcst_pkt;
55481ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
55491ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
55501ae08745Sheppo 	int			i;
55511ae08745Sheppo 
55521ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
55531ae08745Sheppo 
55541ae08745Sheppo 	/*
55551ae08745Sheppo 	 * We know this is a ctrl/mcast packet so
55561ae08745Sheppo 	 * cast it into the correct structure.
55571ae08745Sheppo 	 */
55581ae08745Sheppo 	mcst_pkt = (vnet_mcast_msg_t *)pkt;
55591ae08745Sheppo 
55601ae08745Sheppo 	switch (mcst_pkt->tag.vio_subtype) {
55611ae08745Sheppo 	case VIO_SUBTYPE_INFO:
55621ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
55631ae08745Sheppo 
55641ae08745Sheppo 		/*
55651ae08745Sheppo 		 * Check if in correct state to receive a multicast
55661ae08745Sheppo 		 * message (i.e. handshake complete). If not reset
55671ae08745Sheppo 		 * the handshake.
55681ae08745Sheppo 		 */
55691ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV))
55701ae08745Sheppo 			return;
55711ae08745Sheppo 
55721ae08745Sheppo 		/*
55731ae08745Sheppo 		 * Before attempting to add or remove address check
55741ae08745Sheppo 		 * that they are valid multicast addresses.
55751ae08745Sheppo 		 * If not, then NACK back.
55761ae08745Sheppo 		 */
55771ae08745Sheppo 		for (i = 0; i < mcst_pkt->count; i++) {
55781ae08745Sheppo 			if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) {
55791ae08745Sheppo 				DERR(vswp, "%s: invalid multicast address",
55801ae08745Sheppo 								__func__);
55811ae08745Sheppo 				SND_MCST_NACK(ldcp, mcst_pkt);
55821ae08745Sheppo 				return;
55831ae08745Sheppo 			}
55841ae08745Sheppo 		}
55851ae08745Sheppo 
55861ae08745Sheppo 		/*
55871ae08745Sheppo 		 * Now add/remove the addresses. If this fails we
55881ae08745Sheppo 		 * NACK back.
55891ae08745Sheppo 		 */
55901ae08745Sheppo 		if (vsw_add_rem_mcst(mcst_pkt, port) != 0) {
55911ae08745Sheppo 			SND_MCST_NACK(ldcp, mcst_pkt);
55921ae08745Sheppo 			return;
55931ae08745Sheppo 		}
55941ae08745Sheppo 
55951ae08745Sheppo 		mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
55961ae08745Sheppo 		mcst_pkt->tag.vio_sid = ldcp->local_session;
55971ae08745Sheppo 
55981ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt);
55991ae08745Sheppo 
5600b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)mcst_pkt,
5601b071742bSsg70180 				sizeof (vnet_mcast_msg_t), B_TRUE);
56021ae08745Sheppo 		break;
56031ae08745Sheppo 
56041ae08745Sheppo 	case VIO_SUBTYPE_ACK:
56051ae08745Sheppo 		DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
56061ae08745Sheppo 
56071ae08745Sheppo 		/*
56081ae08745Sheppo 		 * We shouldn't ever get a multicast ACK message as
56091ae08745Sheppo 		 * at the moment we never request multicast addresses
56101ae08745Sheppo 		 * to be set on some other device. This may change in
56111ae08745Sheppo 		 * the future if we have cascading switches.
56121ae08745Sheppo 		 */
56131ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV))
56141ae08745Sheppo 			return;
56151ae08745Sheppo 
56161ae08745Sheppo 				/* Do nothing */
56171ae08745Sheppo 		break;
56181ae08745Sheppo 
56191ae08745Sheppo 	case VIO_SUBTYPE_NACK:
56201ae08745Sheppo 		DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
56211ae08745Sheppo 
56221ae08745Sheppo 		/*
56231ae08745Sheppo 		 * We shouldn't get a multicast NACK packet for the
56241ae08745Sheppo 		 * same reasons as we shouldn't get a ACK packet.
56251ae08745Sheppo 		 */
56261ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV))
56271ae08745Sheppo 			return;
56281ae08745Sheppo 
56291ae08745Sheppo 				/* Do nothing */
56301ae08745Sheppo 		break;
56311ae08745Sheppo 
56321ae08745Sheppo 	default:
56331ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
56341ae08745Sheppo 			mcst_pkt->tag.vio_subtype);
56351ae08745Sheppo 	}
56361ae08745Sheppo 
56371ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
56381ae08745Sheppo }
56391ae08745Sheppo 
56401ae08745Sheppo static void
56411ae08745Sheppo vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt)
56421ae08745Sheppo {
56431ae08745Sheppo 	vio_rdx_msg_t	*rdx_pkt;
56441ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
56451ae08745Sheppo 
56461ae08745Sheppo 	/*
56471ae08745Sheppo 	 * We know this is a ctrl/rdx packet so
56481ae08745Sheppo 	 * cast it into the correct structure.
56491ae08745Sheppo 	 */
56501ae08745Sheppo 	rdx_pkt = (vio_rdx_msg_t *)pkt;
56511ae08745Sheppo 
56521ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
56531ae08745Sheppo 
56541ae08745Sheppo 	switch (rdx_pkt->tag.vio_subtype) {
56551ae08745Sheppo 	case VIO_SUBTYPE_INFO:
56561ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
56571ae08745Sheppo 
5658b071742bSsg70180 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV))
56591ae08745Sheppo 			return;
56601ae08745Sheppo 
56611ae08745Sheppo 		rdx_pkt->tag.vio_sid = ldcp->local_session;
56621ae08745Sheppo 		rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
56631ae08745Sheppo 
56641ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt);
56651ae08745Sheppo 
5666b071742bSsg70180 		ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT;
56671ae08745Sheppo 
5668b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)rdx_pkt,
5669b071742bSsg70180 			sizeof (vio_rdx_msg_t), B_TRUE);
56701ae08745Sheppo 
56711ae08745Sheppo 		vsw_next_milestone(ldcp);
56721ae08745Sheppo 		break;
56731ae08745Sheppo 
56741ae08745Sheppo 	case VIO_SUBTYPE_ACK:
56751ae08745Sheppo 		/*
56761ae08745Sheppo 		 * Should be handled in-band by callback handler.
56771ae08745Sheppo 		 */
56781ae08745Sheppo 		DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__);
5679b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
56801ae08745Sheppo 		break;
56811ae08745Sheppo 
56821ae08745Sheppo 	case VIO_SUBTYPE_NACK:
56831ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
56841ae08745Sheppo 
5685b071742bSsg70180 		if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV))
56861ae08745Sheppo 			return;
56871ae08745Sheppo 
5688b071742bSsg70180 		ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV;
56891ae08745Sheppo 		vsw_next_milestone(ldcp);
56901ae08745Sheppo 		break;
56911ae08745Sheppo 
56921ae08745Sheppo 	default:
56931ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
56941ae08745Sheppo 			rdx_pkt->tag.vio_subtype);
56951ae08745Sheppo 	}
56961ae08745Sheppo 
56971ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
56981ae08745Sheppo }
56991ae08745Sheppo 
57001ae08745Sheppo static void
57011ae08745Sheppo vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag)
57021ae08745Sheppo {
57031ae08745Sheppo 	uint16_t	env = tag.vio_subtype_env;
57041ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
57051ae08745Sheppo 
57061ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
57071ae08745Sheppo 
57081ae08745Sheppo 	/* session id check */
57091ae08745Sheppo 	if (ldcp->session_status & VSW_PEER_SESSION) {
57101ae08745Sheppo 		if (ldcp->peer_session != tag.vio_sid) {
57111ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid session id (%llx)",
57121ae08745Sheppo 				__func__, ldcp->ldc_id, tag.vio_sid);
5713b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
57141ae08745Sheppo 			return;
57151ae08745Sheppo 		}
57161ae08745Sheppo 	}
57171ae08745Sheppo 
57181ae08745Sheppo 	/*
57191ae08745Sheppo 	 * It is an error for us to be getting data packets
57201ae08745Sheppo 	 * before the handshake has completed.
57211ae08745Sheppo 	 */
57221ae08745Sheppo 	if (ldcp->hphase != VSW_MILESTONE4) {
57231ae08745Sheppo 		DERR(vswp, "%s: got data packet before handshake complete "
57241ae08745Sheppo 			"hphase %d (%x: %x)", __func__, ldcp->hphase,
57251ae08745Sheppo 			ldcp->lane_in.lstate, ldcp->lane_out.lstate);
57261ae08745Sheppo 		DUMP_FLAGS(ldcp->lane_in.lstate);
57271ae08745Sheppo 		DUMP_FLAGS(ldcp->lane_out.lstate);
5728b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
57291ae08745Sheppo 		return;
57301ae08745Sheppo 	}
57311ae08745Sheppo 
57321ae08745Sheppo 	/*
57331ae08745Sheppo 	 * Switch on vio_subtype envelope, then let lower routines
57341ae08745Sheppo 	 * decide if its an INFO, ACK or NACK packet.
57351ae08745Sheppo 	 */
57361ae08745Sheppo 	if (env == VIO_DRING_DATA) {
57371ae08745Sheppo 		vsw_process_data_dring_pkt(ldcp, dpkt);
57381ae08745Sheppo 	} else if (env == VIO_PKT_DATA) {
57391ae08745Sheppo 		vsw_process_data_raw_pkt(ldcp, dpkt);
57401ae08745Sheppo 	} else if (env == VIO_DESC_DATA) {
57411ae08745Sheppo 		vsw_process_data_ibnd_pkt(ldcp, dpkt);
57421ae08745Sheppo 	} else {
57431ae08745Sheppo 		DERR(vswp, "%s : unknown vio_subtype_env (%x)\n",
57441ae08745Sheppo 							__func__, env);
57451ae08745Sheppo 	}
57461ae08745Sheppo 
57471ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
57481ae08745Sheppo }
57491ae08745Sheppo 
57501ae08745Sheppo #define	SND_DRING_NACK(ldcp, pkt) \
57511ae08745Sheppo 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
57521ae08745Sheppo 	pkt->tag.vio_sid = ldcp->local_session; \
5753b071742bSsg70180 	(void) vsw_send_msg(ldcp, (void *)pkt, \
5754b071742bSsg70180 			sizeof (vio_dring_msg_t), B_TRUE);
57551ae08745Sheppo 
57561ae08745Sheppo static void
57571ae08745Sheppo vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt)
57581ae08745Sheppo {
57591ae08745Sheppo 	vio_dring_msg_t		*dring_pkt;
57601ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
57611ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
57621ae08745Sheppo 	dring_info_t		*dp = NULL;
57631ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
57641ae08745Sheppo 	mblk_t			*mp = NULL;
57651ae08745Sheppo 	mblk_t			*bp = NULL;
57661ae08745Sheppo 	mblk_t			*bpt = NULL;
57671ae08745Sheppo 	size_t			nbytes = 0;
57681ae08745Sheppo 	size_t			off = 0;
57691ae08745Sheppo 	uint64_t		ncookies = 0;
57701ae08745Sheppo 	uint64_t		chain = 0;
5771d10e4ef2Snarayan 	uint64_t		j, len;
5772d10e4ef2Snarayan 	uint32_t		pos, start, datalen;
5773d10e4ef2Snarayan 	uint32_t		range_start, range_end;
5774d10e4ef2Snarayan 	int32_t			end, num, cnt = 0;
5775b071742bSsg70180 	int			i, rv, msg_rv = 0;
57761ae08745Sheppo 	boolean_t		ack_needed = B_FALSE;
5777d10e4ef2Snarayan 	boolean_t		prev_desc_ack = B_FALSE;
5778d10e4ef2Snarayan 	int			read_attempts = 0;
57791ae08745Sheppo 
57801ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
57811ae08745Sheppo 
57821ae08745Sheppo 	/*
57831ae08745Sheppo 	 * We know this is a data/dring packet so
57841ae08745Sheppo 	 * cast it into the correct structure.
57851ae08745Sheppo 	 */
57861ae08745Sheppo 	dring_pkt = (vio_dring_msg_t *)dpkt;
57871ae08745Sheppo 
57881ae08745Sheppo 	/*
57891ae08745Sheppo 	 * Switch on the vio_subtype. If its INFO then we need to
57901ae08745Sheppo 	 * process the data. If its an ACK we need to make sure
57911ae08745Sheppo 	 * it makes sense (i.e did we send an earlier data/info),
57921ae08745Sheppo 	 * and if its a NACK then we maybe attempt a retry.
57931ae08745Sheppo 	 */
57941ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
57951ae08745Sheppo 	case VIO_SUBTYPE_INFO:
57961ae08745Sheppo 		D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id);
57971ae08745Sheppo 
5798445b4c2eSsb155480 		READ_ENTER(&ldcp->lane_in.dlistrw);
57991ae08745Sheppo 		if ((dp = vsw_ident2dring(&ldcp->lane_in,
58001ae08745Sheppo 				dring_pkt->dring_ident)) == NULL) {
5801445b4c2eSsb155480 			RW_EXIT(&ldcp->lane_in.dlistrw);
58021ae08745Sheppo 
58031ae08745Sheppo 			DERR(vswp, "%s(%lld): unable to find dring from "
58041ae08745Sheppo 				"ident 0x%llx", __func__, ldcp->ldc_id,
58051ae08745Sheppo 				dring_pkt->dring_ident);
58061ae08745Sheppo 
58071ae08745Sheppo 			SND_DRING_NACK(ldcp, dring_pkt);
58081ae08745Sheppo 			return;
58091ae08745Sheppo 		}
58101ae08745Sheppo 
5811d10e4ef2Snarayan 		start = pos = dring_pkt->start_idx;
58121ae08745Sheppo 		end = dring_pkt->end_idx;
5813d10e4ef2Snarayan 		len = dp->num_descriptors;
58141ae08745Sheppo 
5815d10e4ef2Snarayan 		range_start = range_end = pos;
5816d10e4ef2Snarayan 
5817d10e4ef2Snarayan 		D2(vswp, "%s(%lld): start index %ld : end %ld\n",
58181ae08745Sheppo 			__func__, ldcp->ldc_id, start, end);
58191ae08745Sheppo 
5820d10e4ef2Snarayan 		if (end == -1) {
5821d10e4ef2Snarayan 			num = -1;
58224bac2208Snarayan 		} else if (end >= 0) {
5823d10e4ef2Snarayan 			num = end >= pos ?
5824d10e4ef2Snarayan 				end - pos + 1: (len - pos + 1) + end;
5825d10e4ef2Snarayan 
58261ae08745Sheppo 			/* basic sanity check */
58271ae08745Sheppo 			if (end > len) {
5828445b4c2eSsb155480 				RW_EXIT(&ldcp->lane_in.dlistrw);
5829d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): endpoint %lld outside "
5830d10e4ef2Snarayan 					"ring length %lld", __func__,
5831d10e4ef2Snarayan 					ldcp->ldc_id, end, len);
58321ae08745Sheppo 
58331ae08745Sheppo 				SND_DRING_NACK(ldcp, dring_pkt);
58341ae08745Sheppo 				return;
58351ae08745Sheppo 			}
5836d10e4ef2Snarayan 		} else {
5837445b4c2eSsb155480 			RW_EXIT(&ldcp->lane_in.dlistrw);
5838d10e4ef2Snarayan 			DERR(vswp, "%s(%lld): invalid endpoint %lld",
5839d10e4ef2Snarayan 				__func__, ldcp->ldc_id, end);
5840d10e4ef2Snarayan 			SND_DRING_NACK(ldcp, dring_pkt);
58411ae08745Sheppo 			return;
58421ae08745Sheppo 		}
58431ae08745Sheppo 
5844d10e4ef2Snarayan 		while (cnt != num) {
5845d10e4ef2Snarayan vsw_recheck_desc:
5846d10e4ef2Snarayan 			if ((rv = ldc_mem_dring_acquire(dp->handle,
5847d10e4ef2Snarayan 							pos, pos)) != 0) {
5848445b4c2eSsb155480 				RW_EXIT(&ldcp->lane_in.dlistrw);
5849d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): unable to acquire "
5850d10e4ef2Snarayan 					"descriptor at pos %d: err %d",
5851d10e4ef2Snarayan 					__func__, pos, ldcp->ldc_id, rv);
5852d10e4ef2Snarayan 				SND_DRING_NACK(ldcp, dring_pkt);
5853d10e4ef2Snarayan 				return;
5854d10e4ef2Snarayan 			}
58551ae08745Sheppo 
5856d10e4ef2Snarayan 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos;
58571ae08745Sheppo 
5858d10e4ef2Snarayan 			/*
5859d10e4ef2Snarayan 			 * When given a bounded range of descriptors
5860d10e4ef2Snarayan 			 * to process, its an error to hit a descriptor
5861d10e4ef2Snarayan 			 * which is not ready. In the non-bounded case
5862d10e4ef2Snarayan 			 * (end_idx == -1) this simply indicates we have
5863d10e4ef2Snarayan 			 * reached the end of the current active range.
5864d10e4ef2Snarayan 			 */
5865d10e4ef2Snarayan 			if (pub_addr->hdr.dstate != VIO_DESC_READY) {
5866d10e4ef2Snarayan 				/* unbound - no error */
5867d10e4ef2Snarayan 				if (end == -1) {
5868d10e4ef2Snarayan 					if (read_attempts == vsw_read_attempts)
5869d10e4ef2Snarayan 						break;
58701ae08745Sheppo 
5871d10e4ef2Snarayan 					delay(drv_usectohz(vsw_desc_delay));
5872d10e4ef2Snarayan 					read_attempts++;
5873d10e4ef2Snarayan 					goto vsw_recheck_desc;
5874d10e4ef2Snarayan 				}
58751ae08745Sheppo 
5876d10e4ef2Snarayan 				/* bounded - error - so NACK back */
5877445b4c2eSsb155480 				RW_EXIT(&ldcp->lane_in.dlistrw);
5878d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): descriptor not READY "
5879d10e4ef2Snarayan 					"(%d)", __func__, ldcp->ldc_id,
5880d10e4ef2Snarayan 					pub_addr->hdr.dstate);
5881d10e4ef2Snarayan 				SND_DRING_NACK(ldcp, dring_pkt);
5882d10e4ef2Snarayan 				return;
5883d10e4ef2Snarayan 			}
5884d10e4ef2Snarayan 
5885d10e4ef2Snarayan 			DTRACE_PROBE1(read_attempts, int, read_attempts);
5886d10e4ef2Snarayan 
5887d10e4ef2Snarayan 			range_end = pos;
5888d10e4ef2Snarayan 
5889d10e4ef2Snarayan 			/*
5890d10e4ef2Snarayan 			 * If we ACK'd the previous descriptor then now
5891d10e4ef2Snarayan 			 * record the new range start position for later
5892d10e4ef2Snarayan 			 * ACK's.
5893d10e4ef2Snarayan 			 */
5894d10e4ef2Snarayan 			if (prev_desc_ack) {
5895d10e4ef2Snarayan 				range_start = pos;
5896d10e4ef2Snarayan 
5897d10e4ef2Snarayan 				D2(vswp, "%s(%lld): updating range start "
5898d10e4ef2Snarayan 					"to be %d", __func__, ldcp->ldc_id,
5899d10e4ef2Snarayan 					range_start);
5900d10e4ef2Snarayan 
5901d10e4ef2Snarayan 				prev_desc_ack = B_FALSE;
5902d10e4ef2Snarayan 			}
59031ae08745Sheppo 
59041ae08745Sheppo 			/*
59051ae08745Sheppo 			 * Data is padded to align on 8 byte boundary,
59061ae08745Sheppo 			 * datalen is actual data length, i.e. minus that
59071ae08745Sheppo 			 * padding.
59081ae08745Sheppo 			 */
59091ae08745Sheppo 			datalen = pub_addr->nbytes;
59101ae08745Sheppo 
59111ae08745Sheppo 			/*
59121ae08745Sheppo 			 * Does peer wish us to ACK when we have finished
59131ae08745Sheppo 			 * with this descriptor ?
59141ae08745Sheppo 			 */
59151ae08745Sheppo 			if (pub_addr->hdr.ack)
59161ae08745Sheppo 				ack_needed = B_TRUE;
59171ae08745Sheppo 
59181ae08745Sheppo 			D2(vswp, "%s(%lld): processing desc %lld at pos"
59191ae08745Sheppo 				" 0x%llx : dstate 0x%lx : datalen 0x%lx",
5920d10e4ef2Snarayan 				__func__, ldcp->ldc_id, pos, pub_addr,
59211ae08745Sheppo 				pub_addr->hdr.dstate, datalen);
59221ae08745Sheppo 
59231ae08745Sheppo 			/*
59241ae08745Sheppo 			 * Mark that we are starting to process descriptor.
59251ae08745Sheppo 			 */
59261ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_ACCEPTED;
59271ae08745Sheppo 
5928d10e4ef2Snarayan 			mp = vio_allocb(ldcp->rxh);
5929d10e4ef2Snarayan 			if (mp == NULL) {
59301ae08745Sheppo 				/*
5931d10e4ef2Snarayan 				 * No free receive buffers available, so
5932d10e4ef2Snarayan 				 * fallback onto allocb(9F). Make sure that
5933d10e4ef2Snarayan 				 * we get a data buffer which is a multiple
5934d10e4ef2Snarayan 				 * of 8 as this is required by ldc_mem_copy.
59351ae08745Sheppo 				 */
5936d10e4ef2Snarayan 				DTRACE_PROBE(allocb);
5937d10e4ef2Snarayan 				mp = allocb(datalen + VNET_IPALIGN + 8,
5938d10e4ef2Snarayan 								BPRI_MED);
5939d10e4ef2Snarayan 			}
5940d10e4ef2Snarayan 
5941d10e4ef2Snarayan 			/*
5942d10e4ef2Snarayan 			 * Ensure that we ask ldc for an aligned
5943d10e4ef2Snarayan 			 * number of bytes.
5944d10e4ef2Snarayan 			 */
5945d10e4ef2Snarayan 			nbytes = datalen + VNET_IPALIGN;
59461ae08745Sheppo 			if (nbytes & 0x7) {
59471ae08745Sheppo 				off = 8 - (nbytes & 0x7);
59481ae08745Sheppo 				nbytes += off;
59491ae08745Sheppo 			}
59501ae08745Sheppo 
59511ae08745Sheppo 			ncookies = pub_addr->ncookies;
59521ae08745Sheppo 			rv = ldc_mem_copy(ldcp->ldc_handle,
59531ae08745Sheppo 				(caddr_t)mp->b_rptr, 0, &nbytes,
59541ae08745Sheppo 				pub_addr->memcookie, ncookies,
59551ae08745Sheppo 				LDC_COPY_IN);
59561ae08745Sheppo 
59571ae08745Sheppo 			if (rv != 0) {
59581ae08745Sheppo 				DERR(vswp, "%s(%d): unable to copy in "
5959d10e4ef2Snarayan 					"data from %d cookies in desc %d"
5960d10e4ef2Snarayan 					" (rv %d)", __func__, ldcp->ldc_id,
5961d10e4ef2Snarayan 					ncookies, pos, rv);
59621ae08745Sheppo 				freemsg(mp);
5963d10e4ef2Snarayan 
5964d10e4ef2Snarayan 				pub_addr->hdr.dstate = VIO_DESC_DONE;
59651ae08745Sheppo 				(void) ldc_mem_dring_release(dp->handle,
5966d10e4ef2Snarayan 								pos, pos);
5967d10e4ef2Snarayan 				break;
59681ae08745Sheppo 			} else {
59691ae08745Sheppo 				D2(vswp, "%s(%d): copied in %ld bytes"
59701ae08745Sheppo 					" using %d cookies", __func__,
59711ae08745Sheppo 					ldcp->ldc_id, nbytes, ncookies);
59721ae08745Sheppo 			}
59731ae08745Sheppo 
5974d10e4ef2Snarayan 			/* adjust the read pointer to skip over the padding */
5975d10e4ef2Snarayan 			mp->b_rptr += VNET_IPALIGN;
5976d10e4ef2Snarayan 
59771ae08745Sheppo 			/* point to the actual end of data */
59781ae08745Sheppo 			mp->b_wptr = mp->b_rptr + datalen;
59791ae08745Sheppo 
59801ae08745Sheppo 			/* build a chain of received packets */
59811ae08745Sheppo 			if (bp == NULL) {
59821ae08745Sheppo 				/* first pkt */
59831ae08745Sheppo 				bp = mp;
59841ae08745Sheppo 				bp->b_next = bp->b_prev = NULL;
59851ae08745Sheppo 				bpt = bp;
59861ae08745Sheppo 				chain = 1;
59871ae08745Sheppo 			} else {
59881ae08745Sheppo 				mp->b_next = NULL;
59891ae08745Sheppo 				mp->b_prev = bpt;
59901ae08745Sheppo 				bpt->b_next = mp;
59911ae08745Sheppo 				bpt = mp;
59921ae08745Sheppo 				chain++;
59931ae08745Sheppo 			}
59941ae08745Sheppo 
59951ae08745Sheppo 			/* mark we are finished with this descriptor */
59961ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_DONE;
59971ae08745Sheppo 
5998d10e4ef2Snarayan 			(void) ldc_mem_dring_release(dp->handle, pos, pos);
5999d10e4ef2Snarayan 
60001ae08745Sheppo 			/*
6001d10e4ef2Snarayan 			 * Send an ACK back to peer if requested.
60021ae08745Sheppo 			 */
60031ae08745Sheppo 			if (ack_needed) {
60041ae08745Sheppo 				ack_needed = B_FALSE;
60051ae08745Sheppo 
6006d10e4ef2Snarayan 				dring_pkt->start_idx = range_start;
6007d10e4ef2Snarayan 				dring_pkt->end_idx = range_end;
60081ae08745Sheppo 
6009d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): processed %d %d, ACK"
6010d10e4ef2Snarayan 					" requested", __func__, ldcp->ldc_id,
6011d10e4ef2Snarayan 					dring_pkt->start_idx,
6012d10e4ef2Snarayan 					dring_pkt->end_idx);
60131ae08745Sheppo 
6014d10e4ef2Snarayan 				dring_pkt->dring_process_state = VIO_DP_ACTIVE;
60151ae08745Sheppo 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
60161ae08745Sheppo 				dring_pkt->tag.vio_sid = ldcp->local_session;
6017b071742bSsg70180 				msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt,
6018b071742bSsg70180 						sizeof (vio_dring_msg_t),
6019b071742bSsg70180 						B_FALSE);
6020b071742bSsg70180 
6021b071742bSsg70180 				/*
6022b071742bSsg70180 				 * Check if ACK was successfully sent. If not
6023b071742bSsg70180 				 * we break and deal with that below.
6024b071742bSsg70180 				 */
6025b071742bSsg70180 				if (msg_rv != 0)
6026b071742bSsg70180 					break;
6027d10e4ef2Snarayan 
6028d10e4ef2Snarayan 				prev_desc_ack = B_TRUE;
6029d10e4ef2Snarayan 				range_start = pos;
60301ae08745Sheppo 			}
60311ae08745Sheppo 
6032d10e4ef2Snarayan 			/* next descriptor */
6033d10e4ef2Snarayan 			pos = (pos + 1) % len;
6034d10e4ef2Snarayan 			cnt++;
6035d10e4ef2Snarayan 
6036d10e4ef2Snarayan 			/*
6037d10e4ef2Snarayan 			 * Break out of loop here and stop processing to
6038d10e4ef2Snarayan 			 * allow some other network device (or disk) to
6039d10e4ef2Snarayan 			 * get access to the cpu.
6040d10e4ef2Snarayan 			 */
6041d10e4ef2Snarayan 			if (chain > vsw_chain_len) {
6042d10e4ef2Snarayan 				D3(vswp, "%s(%lld): switching chain of %d "
6043d10e4ef2Snarayan 					"msgs", __func__, ldcp->ldc_id, chain);
6044d10e4ef2Snarayan 				break;
60451ae08745Sheppo 			}
60461ae08745Sheppo 		}
6047445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_in.dlistrw);
60481ae08745Sheppo 
6049b071742bSsg70180 		/*
6050b071742bSsg70180 		 * If when we attempted to send the ACK we found that the
6051b071742bSsg70180 		 * channel had been reset then now handle this. We deal with
6052b071742bSsg70180 		 * it here as we cannot reset the channel while holding the
6053b071742bSsg70180 		 * dlistrw lock, and we don't want to acquire/release it
6054b071742bSsg70180 		 * continuously in the above loop, as a channel reset should
6055b071742bSsg70180 		 * be a rare event.
6056b071742bSsg70180 		 */
6057b071742bSsg70180 		if (msg_rv == ECONNRESET) {
6058b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
6059b071742bSsg70180 			break;
6060b071742bSsg70180 		}
6061b071742bSsg70180 
60621ae08745Sheppo 		/* send the chain of packets to be switched */
6063d10e4ef2Snarayan 		if (bp != NULL) {
6064d10e4ef2Snarayan 			D3(vswp, "%s(%lld): switching chain of %d msgs",
6065d10e4ef2Snarayan 					__func__, ldcp->ldc_id, chain);
606634683adeSsg70180 			vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT,
60671ae08745Sheppo 							ldcp->ldc_port, NULL);
6068d10e4ef2Snarayan 		}
60691ae08745Sheppo 
6070d10e4ef2Snarayan 		DTRACE_PROBE1(msg_cnt, int, cnt);
6071d10e4ef2Snarayan 
6072d10e4ef2Snarayan 		/*
6073d10e4ef2Snarayan 		 * We are now finished so ACK back with the state
6074d10e4ef2Snarayan 		 * set to STOPPING so our peer knows we are finished
6075d10e4ef2Snarayan 		 */
6076d10e4ef2Snarayan 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
6077d10e4ef2Snarayan 		dring_pkt->tag.vio_sid = ldcp->local_session;
6078d10e4ef2Snarayan 
6079d10e4ef2Snarayan 		dring_pkt->dring_process_state = VIO_DP_STOPPED;
6080d10e4ef2Snarayan 
6081d10e4ef2Snarayan 		DTRACE_PROBE(stop_process_sent);
6082d10e4ef2Snarayan 
6083d10e4ef2Snarayan 		/*
6084d10e4ef2Snarayan 		 * We have not processed any more descriptors beyond
6085d10e4ef2Snarayan 		 * the last one we ACK'd.
6086d10e4ef2Snarayan 		 */
6087d10e4ef2Snarayan 		if (prev_desc_ack)
6088d10e4ef2Snarayan 			range_start = range_end;
6089d10e4ef2Snarayan 
6090d10e4ef2Snarayan 		dring_pkt->start_idx = range_start;
6091d10e4ef2Snarayan 		dring_pkt->end_idx = range_end;
6092d10e4ef2Snarayan 
6093d10e4ef2Snarayan 		D2(vswp, "%s(%lld) processed : %d : %d, now stopping",
6094d10e4ef2Snarayan 			__func__, ldcp->ldc_id, dring_pkt->start_idx,
6095d10e4ef2Snarayan 			dring_pkt->end_idx);
6096d10e4ef2Snarayan 
6097b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)dring_pkt,
6098b071742bSsg70180 				sizeof (vio_dring_msg_t), B_TRUE);
60991ae08745Sheppo 		break;
61001ae08745Sheppo 
61011ae08745Sheppo 	case VIO_SUBTYPE_ACK:
61021ae08745Sheppo 		D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id);
61031ae08745Sheppo 		/*
61041ae08745Sheppo 		 * Verify that the relevant descriptors are all
61051ae08745Sheppo 		 * marked as DONE
61061ae08745Sheppo 		 */
6107445b4c2eSsb155480 		READ_ENTER(&ldcp->lane_out.dlistrw);
61081ae08745Sheppo 		if ((dp = vsw_ident2dring(&ldcp->lane_out,
61091ae08745Sheppo 			dring_pkt->dring_ident)) == NULL) {
6110445b4c2eSsb155480 			RW_EXIT(&ldcp->lane_out.dlistrw);
61111ae08745Sheppo 			DERR(vswp, "%s: unknown ident in ACK", __func__);
61121ae08745Sheppo 			return;
61131ae08745Sheppo 		}
61141ae08745Sheppo 
61151ae08745Sheppo 		pub_addr = (vnet_public_desc_t *)dp->pub_addr;
61161ae08745Sheppo 		priv_addr = (vsw_private_desc_t *)dp->priv_addr;
61171ae08745Sheppo 
61181ae08745Sheppo 		start = end = 0;
61191ae08745Sheppo 		start = dring_pkt->start_idx;
61201ae08745Sheppo 		end = dring_pkt->end_idx;
61211ae08745Sheppo 		len = dp->num_descriptors;
61221ae08745Sheppo 
61231ae08745Sheppo 		j = num = 0;
61241ae08745Sheppo 		/* calculate # descriptors taking into a/c wrap around */
61251ae08745Sheppo 		num = end >= start ? end - start + 1: (len - start + 1) + end;
61261ae08745Sheppo 
61271ae08745Sheppo 		D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n",
61281ae08745Sheppo 			__func__, ldcp->ldc_id, start, end, num);
61291ae08745Sheppo 
6130d10e4ef2Snarayan 		mutex_enter(&dp->dlock);
6131d10e4ef2Snarayan 		dp->last_ack_recv = end;
6132d10e4ef2Snarayan 		mutex_exit(&dp->dlock);
6133d10e4ef2Snarayan 
61341ae08745Sheppo 		for (i = start; j < num; i = (i + 1) % len, j++) {
61351ae08745Sheppo 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
61361ae08745Sheppo 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
61371ae08745Sheppo 
6138d10e4ef2Snarayan 			/*
6139d10e4ef2Snarayan 			 * If the last descriptor in a range has the ACK
6140d10e4ef2Snarayan 			 * bit set then we will get two messages from our
6141d10e4ef2Snarayan 			 * peer relating to it. The normal ACK msg and then
6142d10e4ef2Snarayan 			 * a subsequent STOP msg. The first message will have
6143d10e4ef2Snarayan 			 * resulted in the descriptor being reclaimed and
6144d10e4ef2Snarayan 			 * its state set to FREE so when we encounter a non
6145d10e4ef2Snarayan 			 * DONE descriptor we need to check to see if its
6146d10e4ef2Snarayan 			 * because we have just reclaimed it.
6147d10e4ef2Snarayan 			 */
6148d10e4ef2Snarayan 			mutex_enter(&priv_addr->dstate_lock);
6149d10e4ef2Snarayan 			if (pub_addr->hdr.dstate == VIO_DESC_DONE) {
61501ae08745Sheppo 				/* clear all the fields */
61511ae08745Sheppo 				bzero(priv_addr->datap, priv_addr->datalen);
61521ae08745Sheppo 				priv_addr->datalen = 0;
61531ae08745Sheppo 
61541ae08745Sheppo 				pub_addr->hdr.dstate = VIO_DESC_FREE;
61551ae08745Sheppo 				pub_addr->hdr.ack = 0;
6156d10e4ef2Snarayan 
61571ae08745Sheppo 				priv_addr->dstate = VIO_DESC_FREE;
6158d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
61591ae08745Sheppo 
61601ae08745Sheppo 				D3(vswp, "clearing descp %d : pub state "
61611ae08745Sheppo 					"0x%llx : priv state 0x%llx", i,
61621ae08745Sheppo 					pub_addr->hdr.dstate,
61631ae08745Sheppo 					priv_addr->dstate);
6164d10e4ef2Snarayan 
6165d10e4ef2Snarayan 			} else {
6166d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
6167d10e4ef2Snarayan 
6168d10e4ef2Snarayan 				if (dring_pkt->dring_process_state !=
6169d10e4ef2Snarayan 							VIO_DP_STOPPED) {
6170d10e4ef2Snarayan 					DERR(vswp, "%s: descriptor %lld at pos "
6171d10e4ef2Snarayan 						" 0x%llx not DONE (0x%lx)\n",
6172d10e4ef2Snarayan 						__func__, i, pub_addr,
6173d10e4ef2Snarayan 						pub_addr->hdr.dstate);
6174445b4c2eSsb155480 					RW_EXIT(&ldcp->lane_out.dlistrw);
6175d10e4ef2Snarayan 					return;
6176d10e4ef2Snarayan 				}
61771ae08745Sheppo 			}
61781ae08745Sheppo 		}
61791ae08745Sheppo 
6180d10e4ef2Snarayan 		/*
6181d10e4ef2Snarayan 		 * If our peer is stopping processing descriptors then
6182d10e4ef2Snarayan 		 * we check to make sure it has processed all the descriptors
6183d10e4ef2Snarayan 		 * we have updated. If not then we send it a new message
6184d10e4ef2Snarayan 		 * to prompt it to restart.
6185d10e4ef2Snarayan 		 */
6186d10e4ef2Snarayan 		if (dring_pkt->dring_process_state == VIO_DP_STOPPED) {
6187d10e4ef2Snarayan 			DTRACE_PROBE(stop_process_recv);
6188d10e4ef2Snarayan 			D2(vswp, "%s(%lld): got stopping msg : %d : %d",
6189d10e4ef2Snarayan 				__func__, ldcp->ldc_id, dring_pkt->start_idx,
6190d10e4ef2Snarayan 				dring_pkt->end_idx);
6191d10e4ef2Snarayan 
6192d10e4ef2Snarayan 			/*
6193d10e4ef2Snarayan 			 * Check next descriptor in public section of ring.
6194d10e4ef2Snarayan 			 * If its marked as READY then we need to prompt our
6195d10e4ef2Snarayan 			 * peer to start processing the ring again.
6196d10e4ef2Snarayan 			 */
6197d10e4ef2Snarayan 			i = (end + 1) % len;
6198d10e4ef2Snarayan 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
6199d10e4ef2Snarayan 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
6200d10e4ef2Snarayan 
6201d10e4ef2Snarayan 			/*
6202d10e4ef2Snarayan 			 * Hold the restart lock across all of this to
6203d10e4ef2Snarayan 			 * make sure that its not possible for us to
6204d10e4ef2Snarayan 			 * decide that a msg needs to be sent in the future
6205d10e4ef2Snarayan 			 * but the sending code having already checked is
6206d10e4ef2Snarayan 			 * about to exit.
6207d10e4ef2Snarayan 			 */
6208d10e4ef2Snarayan 			mutex_enter(&dp->restart_lock);
6209d10e4ef2Snarayan 			mutex_enter(&priv_addr->dstate_lock);
6210d10e4ef2Snarayan 			if (pub_addr->hdr.dstate == VIO_DESC_READY) {
6211d10e4ef2Snarayan 
6212d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
6213d10e4ef2Snarayan 
6214d10e4ef2Snarayan 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
6215d10e4ef2Snarayan 				dring_pkt->tag.vio_sid = ldcp->local_session;
6216d10e4ef2Snarayan 
6217d10e4ef2Snarayan 				mutex_enter(&ldcp->lane_out.seq_lock);
6218d10e4ef2Snarayan 				dring_pkt->seq_num = ldcp->lane_out.seq_num++;
6219d10e4ef2Snarayan 				mutex_exit(&ldcp->lane_out.seq_lock);
6220d10e4ef2Snarayan 
6221d10e4ef2Snarayan 				dring_pkt->start_idx = (end + 1) % len;
6222d10e4ef2Snarayan 				dring_pkt->end_idx = -1;
6223d10e4ef2Snarayan 
6224d10e4ef2Snarayan 				D2(vswp, "%s(%lld) : sending restart msg:"
6225d10e4ef2Snarayan 					" %d : %d", __func__, ldcp->ldc_id,
6226d10e4ef2Snarayan 					dring_pkt->start_idx,
6227d10e4ef2Snarayan 					dring_pkt->end_idx);
6228d10e4ef2Snarayan 
6229b071742bSsg70180 				msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt,
6230b071742bSsg70180 					sizeof (vio_dring_msg_t), B_FALSE);
6231b071742bSsg70180 
6232d10e4ef2Snarayan 			} else {
6233d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
6234d10e4ef2Snarayan 				dp->restart_reqd = B_TRUE;
6235d10e4ef2Snarayan 			}
6236d10e4ef2Snarayan 			mutex_exit(&dp->restart_lock);
6237d10e4ef2Snarayan 		}
6238445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_out.dlistrw);
6239b071742bSsg70180 
6240b071742bSsg70180 		/* only do channel reset after dropping dlistrw lock */
6241b071742bSsg70180 		if (msg_rv == ECONNRESET)
6242b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
6243b071742bSsg70180 
62441ae08745Sheppo 		break;
62451ae08745Sheppo 
62461ae08745Sheppo 	case VIO_SUBTYPE_NACK:
62471ae08745Sheppo 		DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK",
62481ae08745Sheppo 						__func__, ldcp->ldc_id);
62491ae08745Sheppo 		/*
62501ae08745Sheppo 		 * Something is badly wrong if we are getting NACK's
62511ae08745Sheppo 		 * for our data pkts. So reset the channel.
62521ae08745Sheppo 		 */
6253b071742bSsg70180 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
62541ae08745Sheppo 
62551ae08745Sheppo 		break;
62561ae08745Sheppo 
62571ae08745Sheppo 	default:
62581ae08745Sheppo 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
62591ae08745Sheppo 			ldcp->ldc_id, dring_pkt->tag.vio_subtype);
62601ae08745Sheppo 	}
62611ae08745Sheppo 
62621ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
62631ae08745Sheppo }
62641ae08745Sheppo 
62651ae08745Sheppo /*
62661ae08745Sheppo  * VIO_PKT_DATA (a.k.a raw data mode )
62671ae08745Sheppo  *
62681ae08745Sheppo  * Note - currently not supported. Do nothing.
62691ae08745Sheppo  */
62701ae08745Sheppo static void
62711ae08745Sheppo vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt)
62721ae08745Sheppo {
62731ae08745Sheppo 	_NOTE(ARGUNUSED(dpkt))
62741ae08745Sheppo 
62751ae08745Sheppo 	D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
62761ae08745Sheppo 
62771ae08745Sheppo 	DERR(NULL, "%s (%lld): currently  not supported",
62781ae08745Sheppo 						__func__, ldcp->ldc_id);
62791ae08745Sheppo 
62801ae08745Sheppo 	D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
62811ae08745Sheppo }
62821ae08745Sheppo 
62831ae08745Sheppo /*
62841ae08745Sheppo  * Process an in-band descriptor message (most likely from
62851ae08745Sheppo  * OBP).
62861ae08745Sheppo  */
62871ae08745Sheppo static void
62881ae08745Sheppo vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt)
62891ae08745Sheppo {
6290445b4c2eSsb155480 	vnet_ibnd_desc_t	*ibnd_desc;
62911ae08745Sheppo 	dring_info_t		*dp = NULL;
62921ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
62931ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
62941ae08745Sheppo 	mblk_t			*mp = NULL;
6295*023505bcSraghuram 	mblk_t			*nmp;
62961ae08745Sheppo 	size_t			nbytes = 0;
62971ae08745Sheppo 	size_t			off = 0;
62981ae08745Sheppo 	uint64_t		idx = 0;
62994bac2208Snarayan 	uint32_t		num = 1, len, datalen = 0;
63001ae08745Sheppo 	uint64_t		ncookies = 0;
63014bac2208Snarayan 	int			i, rv;
63024bac2208Snarayan 	int			j = 0;
63031ae08745Sheppo 
63041ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
63051ae08745Sheppo 
6306445b4c2eSsb155480 	ibnd_desc = (vnet_ibnd_desc_t *)pkt;
63071ae08745Sheppo 
63081ae08745Sheppo 	switch (ibnd_desc->hdr.tag.vio_subtype) {
63091ae08745Sheppo 	case VIO_SUBTYPE_INFO:
63101ae08745Sheppo 		D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
63111ae08745Sheppo 
63121ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
63131ae08745Sheppo 			return;
63141ae08745Sheppo 
63151ae08745Sheppo 		/*
63161ae08745Sheppo 		 * Data is padded to align on a 8 byte boundary,
63171ae08745Sheppo 		 * nbytes is actual data length, i.e. minus that
63181ae08745Sheppo 		 * padding.
63191ae08745Sheppo 		 */
63201ae08745Sheppo 		datalen = ibnd_desc->nbytes;
63211ae08745Sheppo 
63221ae08745Sheppo 		D2(vswp, "%s(%lld): processing inband desc : "
63231ae08745Sheppo 			": datalen 0x%lx", __func__, ldcp->ldc_id, datalen);
63241ae08745Sheppo 
63251ae08745Sheppo 		ncookies = ibnd_desc->ncookies;
63261ae08745Sheppo 
63271ae08745Sheppo 		/*
63281ae08745Sheppo 		 * allocb(9F) returns an aligned data block. We
63291ae08745Sheppo 		 * need to ensure that we ask ldc for an aligned
63301ae08745Sheppo 		 * number of bytes also.
63311ae08745Sheppo 		 */
63321ae08745Sheppo 		nbytes = datalen;
63331ae08745Sheppo 		if (nbytes & 0x7) {
63341ae08745Sheppo 			off = 8 - (nbytes & 0x7);
63351ae08745Sheppo 			nbytes += off;
63361ae08745Sheppo 		}
63371ae08745Sheppo 
63381ae08745Sheppo 		mp = allocb(datalen, BPRI_MED);
63391ae08745Sheppo 		if (mp == NULL) {
63401ae08745Sheppo 			DERR(vswp, "%s(%lld): allocb failed",
63411ae08745Sheppo 					__func__, ldcp->ldc_id);
63421ae08745Sheppo 			return;
63431ae08745Sheppo 		}
63441ae08745Sheppo 
63451ae08745Sheppo 		rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr,
63461ae08745Sheppo 			0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies,
63471ae08745Sheppo 			LDC_COPY_IN);
63481ae08745Sheppo 
63491ae08745Sheppo 		if (rv != 0) {
63501ae08745Sheppo 			DERR(vswp, "%s(%d): unable to copy in data from "
63511ae08745Sheppo 				"%d cookie(s)", __func__,
63521ae08745Sheppo 				ldcp->ldc_id, ncookies);
63531ae08745Sheppo 			freemsg(mp);
63541ae08745Sheppo 			return;
6355*023505bcSraghuram 		}
6356*023505bcSraghuram 
63571ae08745Sheppo 		D2(vswp, "%s(%d): copied in %ld bytes using %d "
63581ae08745Sheppo 			"cookies", __func__, ldcp->ldc_id, nbytes,
63591ae08745Sheppo 			ncookies);
6360*023505bcSraghuram 
6361*023505bcSraghuram 		/*
6362*023505bcSraghuram 		 * Upper layer is expecting the IP header in the packet to
6363*023505bcSraghuram 		 * be 4-bytes aligned, but the OBP is sending packets that
6364*023505bcSraghuram 		 * are not aligned.  So, copy the data to another message
6365*023505bcSraghuram 		 * such that the alignment requirement is met.
6366*023505bcSraghuram 		 */
6367*023505bcSraghuram 		nmp = allocb(datalen + VNET_IPALIGN, BPRI_MED);
6368*023505bcSraghuram 		if (nmp == NULL) {
6369*023505bcSraghuram 			DERR(vswp, "%s(%lld): allocb failed",
6370*023505bcSraghuram 				__func__, ldcp->ldc_id);
6371*023505bcSraghuram 			freemsg(mp);
6372*023505bcSraghuram 			return;
63731ae08745Sheppo 		}
6374*023505bcSraghuram 		nmp->b_rptr += VNET_IPALIGN;
6375*023505bcSraghuram 		bcopy(mp->b_rptr, nmp->b_rptr, datalen);
6376*023505bcSraghuram 		freemsg(mp);
63771ae08745Sheppo 
63781ae08745Sheppo 		/* point to the actual end of data */
6379*023505bcSraghuram 		nmp->b_wptr = nmp->b_rptr + datalen;
63801ae08745Sheppo 
63811ae08745Sheppo 		/*
63821ae08745Sheppo 		 * We ACK back every in-band descriptor message we process
63831ae08745Sheppo 		 */
63841ae08745Sheppo 		ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK;
63851ae08745Sheppo 		ibnd_desc->hdr.tag.vio_sid = ldcp->local_session;
6386b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)ibnd_desc,
6387b071742bSsg70180 				sizeof (vnet_ibnd_desc_t), B_TRUE);
63881ae08745Sheppo 
63891ae08745Sheppo 		/* send the packet to be switched */
6390*023505bcSraghuram 		vswp->vsw_switch_frame(vswp, nmp, VSW_VNETPORT,
63911ae08745Sheppo 					ldcp->ldc_port, NULL);
63921ae08745Sheppo 
63931ae08745Sheppo 		break;
63941ae08745Sheppo 
63951ae08745Sheppo 	case VIO_SUBTYPE_ACK:
63961ae08745Sheppo 		D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
63971ae08745Sheppo 
63981ae08745Sheppo 		/* Verify the ACK is valid */
63991ae08745Sheppo 		idx = ibnd_desc->hdr.desc_handle;
64001ae08745Sheppo 
64011ae08745Sheppo 		if (idx >= VSW_RING_NUM_EL) {
640234683adeSsg70180 			cmn_err(CE_WARN, "!vsw%d: corrupted ACK received "
640334683adeSsg70180 				"(idx %ld)", vswp->instance, idx);
64041ae08745Sheppo 			return;
64051ae08745Sheppo 		}
64061ae08745Sheppo 
64071ae08745Sheppo 		if ((dp = ldcp->lane_out.dringp) == NULL) {
64081ae08745Sheppo 			DERR(vswp, "%s: no dring found", __func__);
64091ae08745Sheppo 			return;
64101ae08745Sheppo 		}
64111ae08745Sheppo 
64124bac2208Snarayan 		len = dp->num_descriptors;
64134bac2208Snarayan 		/*
64144bac2208Snarayan 		 * If the descriptor we are being ACK'ed for is not the
64154bac2208Snarayan 		 * one we expected, then pkts were lost somwhere, either
64164bac2208Snarayan 		 * when we tried to send a msg, or a previous ACK msg from
64174bac2208Snarayan 		 * our peer. In either case we now reclaim the descriptors
64184bac2208Snarayan 		 * in the range from the last ACK we received up to the
64194bac2208Snarayan 		 * current ACK.
64204bac2208Snarayan 		 */
64214bac2208Snarayan 		if (idx != dp->last_ack_recv) {
64224bac2208Snarayan 			DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)",
64234bac2208Snarayan 				__func__, dp->last_ack_recv, idx);
64244bac2208Snarayan 			num = idx >= dp->last_ack_recv ?
64254bac2208Snarayan 				idx - dp->last_ack_recv + 1:
64264bac2208Snarayan 				(len - dp->last_ack_recv + 1) + idx;
64274bac2208Snarayan 		}
64281ae08745Sheppo 
64291ae08745Sheppo 		/*
64301ae08745Sheppo 		 * When we sent the in-band message to our peer we
64311ae08745Sheppo 		 * marked the copy in our private ring as READY. We now
64321ae08745Sheppo 		 * check that the descriptor we are being ACK'ed for is in
64331ae08745Sheppo 		 * fact READY, i.e. it is one we have shared with our peer.
64344bac2208Snarayan 		 *
64354bac2208Snarayan 		 * If its not we flag an error, but still reset the descr
64364bac2208Snarayan 		 * back to FREE.
64371ae08745Sheppo 		 */
64384bac2208Snarayan 		for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) {
64394bac2208Snarayan 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
6440d10e4ef2Snarayan 			mutex_enter(&priv_addr->dstate_lock);
64411ae08745Sheppo 			if (priv_addr->dstate != VIO_DESC_READY) {
64424bac2208Snarayan 				DERR(vswp, "%s: (%ld) desc at index %ld not "
64434bac2208Snarayan 					"READY (0x%lx)", __func__,
64444bac2208Snarayan 					ldcp->ldc_id, idx, priv_addr->dstate);
64454bac2208Snarayan 				DERR(vswp, "%s: bound %d: ncookies %ld : "
64464bac2208Snarayan 					"datalen %ld", __func__,
64474bac2208Snarayan 					priv_addr->bound, priv_addr->ncookies,
64484bac2208Snarayan 					priv_addr->datalen);
64494bac2208Snarayan 			}
64501ae08745Sheppo 			D2(vswp, "%s: (%lld) freeing descp at %lld", __func__,
64511ae08745Sheppo 				ldcp->ldc_id, idx);
64521ae08745Sheppo 			/* release resources associated with sent msg */
64531ae08745Sheppo 			bzero(priv_addr->datap, priv_addr->datalen);
64541ae08745Sheppo 			priv_addr->datalen = 0;
64551ae08745Sheppo 			priv_addr->dstate = VIO_DESC_FREE;
6456d10e4ef2Snarayan 			mutex_exit(&priv_addr->dstate_lock);
64571ae08745Sheppo 		}
64584bac2208Snarayan 		/* update to next expected value */
64594bac2208Snarayan 		dp->last_ack_recv = (idx + 1) % dp->num_descriptors;
64604bac2208Snarayan 
64611ae08745Sheppo 		break;
64621ae08745Sheppo 
64631ae08745Sheppo 	case VIO_SUBTYPE_NACK:
64641ae08745Sheppo 		DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
64651ae08745Sheppo 
64661ae08745Sheppo 		/*
64671ae08745Sheppo 		 * We should only get a NACK if our peer doesn't like
64681ae08745Sheppo 		 * something about a message we have sent it. If this
64691ae08745Sheppo 		 * happens we just release the resources associated with
64701ae08745Sheppo 		 * the message. (We are relying on higher layers to decide
64711ae08745Sheppo 		 * whether or not to resend.
64721ae08745Sheppo 		 */
64731ae08745Sheppo 
64741ae08745Sheppo 		/* limit check */
64751ae08745Sheppo 		idx = ibnd_desc->hdr.desc_handle;
64761ae08745Sheppo 
64771ae08745Sheppo 		if (idx >= VSW_RING_NUM_EL) {
64781ae08745Sheppo 			DERR(vswp, "%s: corrupted NACK received (idx %lld)",
64791ae08745Sheppo 				__func__, idx);
64801ae08745Sheppo 			return;
64811ae08745Sheppo 		}
64821ae08745Sheppo 
64831ae08745Sheppo 		if ((dp = ldcp->lane_out.dringp) == NULL) {
64841ae08745Sheppo 			DERR(vswp, "%s: no dring found", __func__);
64851ae08745Sheppo 			return;
64861ae08745Sheppo 		}
64871ae08745Sheppo 
64881ae08745Sheppo 		priv_addr = (vsw_private_desc_t *)dp->priv_addr;
64891ae08745Sheppo 
64901ae08745Sheppo 		/* move to correct location in ring */
64911ae08745Sheppo 		priv_addr += idx;
64921ae08745Sheppo 
64931ae08745Sheppo 		/* release resources associated with sent msg */
6494d10e4ef2Snarayan 		mutex_enter(&priv_addr->dstate_lock);
64951ae08745Sheppo 		bzero(priv_addr->datap, priv_addr->datalen);
64961ae08745Sheppo 		priv_addr->datalen = 0;
64971ae08745Sheppo 		priv_addr->dstate = VIO_DESC_FREE;
6498d10e4ef2Snarayan 		mutex_exit(&priv_addr->dstate_lock);
64991ae08745Sheppo 
65001ae08745Sheppo 		break;
65011ae08745Sheppo 
65021ae08745Sheppo 	default:
65031ae08745Sheppo 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
65041ae08745Sheppo 			ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype);
65051ae08745Sheppo 	}
65061ae08745Sheppo 
65071ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
65081ae08745Sheppo }
65091ae08745Sheppo 
65101ae08745Sheppo static void
65111ae08745Sheppo vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag)
65121ae08745Sheppo {
65131ae08745Sheppo 	_NOTE(ARGUNUSED(epkt))
65141ae08745Sheppo 
65151ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
65161ae08745Sheppo 	uint16_t	env = tag.vio_subtype_env;
65171ae08745Sheppo 
65181ae08745Sheppo 	D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
65191ae08745Sheppo 
65201ae08745Sheppo 	/*
65211ae08745Sheppo 	 * Error vio_subtypes have yet to be defined. So for
65221ae08745Sheppo 	 * the moment we can't do anything.
65231ae08745Sheppo 	 */
65241ae08745Sheppo 	D2(vswp, "%s: (%x) vio_subtype env", __func__, env);
65251ae08745Sheppo 
65261ae08745Sheppo 	D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
65271ae08745Sheppo }
65281ae08745Sheppo 
65291ae08745Sheppo /*
65301ae08745Sheppo  * Switch the given ethernet frame when operating in layer 2 mode.
65311ae08745Sheppo  *
65321ae08745Sheppo  * vswp: pointer to the vsw instance
65331ae08745Sheppo  * mp: pointer to chain of ethernet frame(s) to be switched
65341ae08745Sheppo  * caller: identifies the source of this frame as:
65351ae08745Sheppo  * 		1. VSW_VNETPORT - a vsw port (connected to a vnet).
65361ae08745Sheppo  *		2. VSW_PHYSDEV - the physical ethernet device
65371ae08745Sheppo  *		3. VSW_LOCALDEV - vsw configured as a virtual interface
65381ae08745Sheppo  * arg: argument provided by the caller.
65391ae08745Sheppo  *		1. for VNETPORT - pointer to the corresponding vsw_port_t.
65401ae08745Sheppo  *		2. for PHYSDEV - NULL
65411ae08745Sheppo  *		3. for LOCALDEV - pointer to to this vsw_t(self)
65421ae08745Sheppo  */
65431ae08745Sheppo void
65441ae08745Sheppo vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
65451ae08745Sheppo 			vsw_port_t *arg, mac_resource_handle_t mrh)
65461ae08745Sheppo {
65471ae08745Sheppo 	struct ether_header	*ehp;
65481ae08745Sheppo 	vsw_port_t		*port = NULL;
65491ae08745Sheppo 	mblk_t			*bp, *ret_m;
65501ae08745Sheppo 	mblk_t			*nmp = NULL;
65511ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
65521ae08745Sheppo 
65531ae08745Sheppo 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
65541ae08745Sheppo 
65551ae08745Sheppo 	/*
65561ae08745Sheppo 	 * PERF: rather than breaking up the chain here, scan it
65571ae08745Sheppo 	 * to find all mblks heading to same destination and then
65581ae08745Sheppo 	 * pass that sub-chain to the lower transmit functions.
65591ae08745Sheppo 	 */
65601ae08745Sheppo 
65611ae08745Sheppo 	/* process the chain of packets */
65621ae08745Sheppo 	bp = mp;
65631ae08745Sheppo 	while (bp) {
65641ae08745Sheppo 		mp = bp;
65651ae08745Sheppo 		bp = bp->b_next;
65661ae08745Sheppo 		mp->b_next = mp->b_prev = NULL;
65671ae08745Sheppo 		ehp = (struct ether_header *)mp->b_rptr;
65681ae08745Sheppo 
65691ae08745Sheppo 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
65701ae08745Sheppo 			__func__, MBLKSIZE(mp), MBLKL(mp));
65711ae08745Sheppo 
65721ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
65731ae08745Sheppo 		if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
65741ae08745Sheppo 			/*
65751ae08745Sheppo 			 * If destination is VSW_LOCALDEV (vsw as an eth
65761ae08745Sheppo 			 * interface) and if the device is up & running,
65771ae08745Sheppo 			 * send the packet up the stack on this host.
65781ae08745Sheppo 			 * If the virtual interface is down, drop the packet.
65791ae08745Sheppo 			 */
65801ae08745Sheppo 			if (caller != VSW_LOCALDEV) {
65811ae08745Sheppo 				if (vswp->if_state & VSW_IF_UP) {
65821ae08745Sheppo 					RW_EXIT(&vswp->if_lockrw);
6583ba2e4443Sseb 					mac_rx(vswp->if_mh, mrh, mp);
65841ae08745Sheppo 				} else {
65851ae08745Sheppo 					RW_EXIT(&vswp->if_lockrw);
65861ae08745Sheppo 					/* Interface down, drop pkt */
65871ae08745Sheppo 					freemsg(mp);
65881ae08745Sheppo 				}
65891ae08745Sheppo 			} else {
65901ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
65911ae08745Sheppo 				freemsg(mp);
65921ae08745Sheppo 			}
65931ae08745Sheppo 			continue;
65941ae08745Sheppo 		}
65951ae08745Sheppo 		RW_EXIT(&vswp->if_lockrw);
65961ae08745Sheppo 
65971ae08745Sheppo 		READ_ENTER(&plist->lockrw);
65981ae08745Sheppo 		port = vsw_lookup_fdb(vswp, ehp);
65991ae08745Sheppo 		if (port) {
66001ae08745Sheppo 			/*
66011ae08745Sheppo 			 * Mark the port as in-use.
66021ae08745Sheppo 			 */
66031ae08745Sheppo 			mutex_enter(&port->ref_lock);
66041ae08745Sheppo 			port->ref_cnt++;
66051ae08745Sheppo 			mutex_exit(&port->ref_lock);
66061ae08745Sheppo 			RW_EXIT(&plist->lockrw);
66071ae08745Sheppo 
66081ae08745Sheppo 			/*
66091ae08745Sheppo 			 * If plumbed and in promisc mode then copy msg
66101ae08745Sheppo 			 * and send up the stack.
66111ae08745Sheppo 			 */
66121ae08745Sheppo 			READ_ENTER(&vswp->if_lockrw);
66131ae08745Sheppo 			if (VSW_U_P(vswp->if_state)) {
66141ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
66151ae08745Sheppo 				nmp = copymsg(mp);
66161ae08745Sheppo 				if (nmp)
6617ba2e4443Sseb 					mac_rx(vswp->if_mh, mrh, nmp);
66181ae08745Sheppo 			} else {
66191ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
66201ae08745Sheppo 			}
66211ae08745Sheppo 
66221ae08745Sheppo 			/*
66231ae08745Sheppo 			 * If the destination is in FDB, the packet
66241ae08745Sheppo 			 * should be forwarded to the correponding
66251ae08745Sheppo 			 * vsw_port (connected to a vnet device -
66261ae08745Sheppo 			 * VSW_VNETPORT)
66271ae08745Sheppo 			 */
66281ae08745Sheppo 			(void) vsw_portsend(port, mp);
66291ae08745Sheppo 
66301ae08745Sheppo 			/*
66311ae08745Sheppo 			 * Decrement use count in port and check if
66321ae08745Sheppo 			 * should wake delete thread.
66331ae08745Sheppo 			 */
66341ae08745Sheppo 			mutex_enter(&port->ref_lock);
66351ae08745Sheppo 			port->ref_cnt--;
66361ae08745Sheppo 			if (port->ref_cnt == 0)
66371ae08745Sheppo 				cv_signal(&port->ref_cv);
66381ae08745Sheppo 			mutex_exit(&port->ref_lock);
66391ae08745Sheppo 		} else {
66401ae08745Sheppo 			RW_EXIT(&plist->lockrw);
66411ae08745Sheppo 			/*
66421ae08745Sheppo 			 * Destination not in FDB.
66431ae08745Sheppo 			 *
66441ae08745Sheppo 			 * If the destination is broadcast or
66451ae08745Sheppo 			 * multicast forward the packet to all
66461ae08745Sheppo 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
66471ae08745Sheppo 			 * except the caller.
66481ae08745Sheppo 			 */
66491ae08745Sheppo 			if (IS_BROADCAST(ehp)) {
66501ae08745Sheppo 				D3(vswp, "%s: BROADCAST pkt", __func__);
66511ae08745Sheppo 				(void) vsw_forward_all(vswp, mp,
66521ae08745Sheppo 								caller, arg);
66531ae08745Sheppo 			} else if (IS_MULTICAST(ehp)) {
66541ae08745Sheppo 				D3(vswp, "%s: MULTICAST pkt", __func__);
66551ae08745Sheppo 				(void) vsw_forward_grp(vswp, mp,
66561ae08745Sheppo 							caller, arg);
66571ae08745Sheppo 			} else {
66581ae08745Sheppo 				/*
66591ae08745Sheppo 				 * If the destination is unicast, and came
66601ae08745Sheppo 				 * from either a logical network device or
66611ae08745Sheppo 				 * the switch itself when it is plumbed, then
66621ae08745Sheppo 				 * send it out on the physical device and also
66631ae08745Sheppo 				 * up the stack if the logical interface is
66641ae08745Sheppo 				 * in promiscious mode.
66651ae08745Sheppo 				 *
66661ae08745Sheppo 				 * NOTE:  The assumption here is that if we
66671ae08745Sheppo 				 * cannot find the destination in our fdb, its
66681ae08745Sheppo 				 * a unicast address, and came from either a
66691ae08745Sheppo 				 * vnet or down the stack (when plumbed) it
66701ae08745Sheppo 				 * must be destinded for an ethernet device
66711ae08745Sheppo 				 * outside our ldoms.
66721ae08745Sheppo 				 */
66731ae08745Sheppo 				if (caller == VSW_VNETPORT) {
66741ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
66751ae08745Sheppo 					if (VSW_U_P(vswp->if_state)) {
66761ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
66771ae08745Sheppo 						nmp = copymsg(mp);
66781ae08745Sheppo 						if (nmp)
6679ba2e4443Sseb 							mac_rx(vswp->if_mh,
66801ae08745Sheppo 								mrh, nmp);
66811ae08745Sheppo 					} else {
66821ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
66831ae08745Sheppo 					}
66841ae08745Sheppo 					if ((ret_m = vsw_tx_msg(vswp, mp))
66851ae08745Sheppo 								!= NULL) {
66861ae08745Sheppo 						DERR(vswp, "%s: drop mblks to "
66871ae08745Sheppo 							"phys dev", __func__);
66881ae08745Sheppo 						freemsg(ret_m);
66891ae08745Sheppo 					}
66901ae08745Sheppo 
66911ae08745Sheppo 				} else if (caller == VSW_PHYSDEV) {
66921ae08745Sheppo 					/*
66931ae08745Sheppo 					 * Pkt seen because card in promisc
66941ae08745Sheppo 					 * mode. Send up stack if plumbed in
66951ae08745Sheppo 					 * promisc mode, else drop it.
66961ae08745Sheppo 					 */
66971ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
66981ae08745Sheppo 					if (VSW_U_P(vswp->if_state)) {
66991ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
6700ba2e4443Sseb 						mac_rx(vswp->if_mh, mrh, mp);
67011ae08745Sheppo 					} else {
67021ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
67031ae08745Sheppo 						freemsg(mp);
67041ae08745Sheppo 					}
67051ae08745Sheppo 
67061ae08745Sheppo 				} else if (caller == VSW_LOCALDEV) {
67071ae08745Sheppo 					/*
67081ae08745Sheppo 					 * Pkt came down the stack, send out
67091ae08745Sheppo 					 * over physical device.
67101ae08745Sheppo 					 */
67111ae08745Sheppo 					if ((ret_m = vsw_tx_msg(vswp, mp))
67121ae08745Sheppo 								!= NULL) {
67131ae08745Sheppo 						DERR(vswp, "%s: drop mblks to "
67141ae08745Sheppo 							"phys dev", __func__);
67151ae08745Sheppo 						freemsg(ret_m);
67161ae08745Sheppo 					}
67171ae08745Sheppo 				}
67181ae08745Sheppo 			}
67191ae08745Sheppo 		}
67201ae08745Sheppo 	}
67211ae08745Sheppo 	D1(vswp, "%s: exit\n", __func__);
67221ae08745Sheppo }
67231ae08745Sheppo 
67241ae08745Sheppo /*
67251ae08745Sheppo  * Switch ethernet frame when in layer 3 mode (i.e. using IP
67261ae08745Sheppo  * layer to do the routing).
67271ae08745Sheppo  *
67281ae08745Sheppo  * There is a large amount of overlap between this function and
67291ae08745Sheppo  * vsw_switch_l2_frame. At some stage we need to revisit and refactor
67301ae08745Sheppo  * both these functions.
67311ae08745Sheppo  */
67321ae08745Sheppo void
67331ae08745Sheppo vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
67341ae08745Sheppo 			vsw_port_t *arg, mac_resource_handle_t mrh)
67351ae08745Sheppo {
67361ae08745Sheppo 	struct ether_header	*ehp;
67371ae08745Sheppo 	vsw_port_t		*port = NULL;
67381ae08745Sheppo 	mblk_t			*bp = NULL;
67391ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
67401ae08745Sheppo 
67411ae08745Sheppo 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
67421ae08745Sheppo 
67431ae08745Sheppo 	/*
67441ae08745Sheppo 	 * In layer 3 mode should only ever be switching packets
67451ae08745Sheppo 	 * between IP layer and vnet devices. So make sure thats
67461ae08745Sheppo 	 * who is invoking us.
67471ae08745Sheppo 	 */
67481ae08745Sheppo 	if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
67491ae08745Sheppo 		DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
67501ae08745Sheppo 		freemsgchain(mp);
67511ae08745Sheppo 		return;
67521ae08745Sheppo 	}
67531ae08745Sheppo 
67541ae08745Sheppo 	/* process the chain of packets */
67551ae08745Sheppo 	bp = mp;
67561ae08745Sheppo 	while (bp) {
67571ae08745Sheppo 		mp = bp;
67581ae08745Sheppo 		bp = bp->b_next;
67591ae08745Sheppo 		mp->b_next = mp->b_prev = NULL;
67601ae08745Sheppo 		ehp = (struct ether_header *)mp->b_rptr;
67611ae08745Sheppo 
67621ae08745Sheppo 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
67631ae08745Sheppo 			__func__, MBLKSIZE(mp), MBLKL(mp));
67641ae08745Sheppo 
67651ae08745Sheppo 		READ_ENTER(&plist->lockrw);
67661ae08745Sheppo 		port = vsw_lookup_fdb(vswp, ehp);
67671ae08745Sheppo 		if (port) {
67681ae08745Sheppo 			/*
67691ae08745Sheppo 			 * Mark port as in-use.
67701ae08745Sheppo 			 */
67711ae08745Sheppo 			mutex_enter(&port->ref_lock);
67721ae08745Sheppo 			port->ref_cnt++;
67731ae08745Sheppo 			mutex_exit(&port->ref_lock);
67741ae08745Sheppo 			RW_EXIT(&plist->lockrw);
67751ae08745Sheppo 
67761ae08745Sheppo 			D2(vswp, "%s: sending to target port", __func__);
67771ae08745Sheppo 			(void) vsw_portsend(port, mp);
67781ae08745Sheppo 
67791ae08745Sheppo 			/*
67801ae08745Sheppo 			 * Finished with port so decrement ref count and
67811ae08745Sheppo 			 * check if should wake delete thread.
67821ae08745Sheppo 			 */
67831ae08745Sheppo 			mutex_enter(&port->ref_lock);
67841ae08745Sheppo 			port->ref_cnt--;
67851ae08745Sheppo 			if (port->ref_cnt == 0)
67861ae08745Sheppo 				cv_signal(&port->ref_cv);
67871ae08745Sheppo 			mutex_exit(&port->ref_lock);
67881ae08745Sheppo 		} else {
67891ae08745Sheppo 			RW_EXIT(&plist->lockrw);
67901ae08745Sheppo 			/*
67911ae08745Sheppo 			 * Destination not in FDB
67921ae08745Sheppo 			 *
67931ae08745Sheppo 			 * If the destination is broadcast or
67941ae08745Sheppo 			 * multicast forward the packet to all
67951ae08745Sheppo 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
67961ae08745Sheppo 			 * except the caller.
67971ae08745Sheppo 			 */
67981ae08745Sheppo 			if (IS_BROADCAST(ehp)) {
67991ae08745Sheppo 				D2(vswp, "%s: BROADCAST pkt", __func__);
68001ae08745Sheppo 				(void) vsw_forward_all(vswp, mp,
68011ae08745Sheppo 								caller, arg);
68021ae08745Sheppo 			} else if (IS_MULTICAST(ehp)) {
68031ae08745Sheppo 				D2(vswp, "%s: MULTICAST pkt", __func__);
68041ae08745Sheppo 				(void) vsw_forward_grp(vswp, mp,
68051ae08745Sheppo 							caller, arg);
68061ae08745Sheppo 			} else {
68071ae08745Sheppo 				/*
68081ae08745Sheppo 				 * Unicast pkt from vnet that we don't have
68091ae08745Sheppo 				 * an FDB entry for, so must be destinded for
68101ae08745Sheppo 				 * the outside world. Attempt to send up to the
68111ae08745Sheppo 				 * IP layer to allow it to deal with it.
68121ae08745Sheppo 				 */
68131ae08745Sheppo 				if (caller == VSW_VNETPORT) {
68141ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
68151ae08745Sheppo 					if (vswp->if_state & VSW_IF_UP) {
68161ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
68171ae08745Sheppo 						D2(vswp, "%s: sending up",
68181ae08745Sheppo 							__func__);
6819ba2e4443Sseb 						mac_rx(vswp->if_mh, mrh, mp);
68201ae08745Sheppo 					} else {
68211ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
68221ae08745Sheppo 						/* Interface down, drop pkt */
68231ae08745Sheppo 						D2(vswp, "%s I/F down",
68241ae08745Sheppo 								__func__);
68251ae08745Sheppo 						freemsg(mp);
68261ae08745Sheppo 					}
68271ae08745Sheppo 				}
68281ae08745Sheppo 			}
68291ae08745Sheppo 		}
68301ae08745Sheppo 	}
68311ae08745Sheppo 
68321ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
68331ae08745Sheppo }
68341ae08745Sheppo 
68351ae08745Sheppo /*
68361ae08745Sheppo  * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
68371ae08745Sheppo  * except the caller (port on which frame arrived).
68381ae08745Sheppo  */
68391ae08745Sheppo static int
68401ae08745Sheppo vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
68411ae08745Sheppo {
68421ae08745Sheppo 	vsw_port_list_t	*plist = &vswp->plist;
68431ae08745Sheppo 	vsw_port_t	*portp;
68441ae08745Sheppo 	mblk_t		*nmp = NULL;
68451ae08745Sheppo 	mblk_t		*ret_m = NULL;
68461ae08745Sheppo 	int		skip_port = 0;
68471ae08745Sheppo 
68481ae08745Sheppo 	D1(vswp, "vsw_forward_all: enter\n");
68491ae08745Sheppo 
68501ae08745Sheppo 	/*
68511ae08745Sheppo 	 * Broadcast message from inside ldoms so send to outside
68521ae08745Sheppo 	 * world if in either of layer 2 modes.
68531ae08745Sheppo 	 */
68541ae08745Sheppo 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
68551ae08745Sheppo 		(vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
68561ae08745Sheppo 		((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
68571ae08745Sheppo 
68581ae08745Sheppo 		nmp = dupmsg(mp);
68591ae08745Sheppo 		if (nmp) {
68601ae08745Sheppo 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
68611ae08745Sheppo 				DERR(vswp, "%s: dropping pkt(s) "
68621ae08745Sheppo 				"consisting of %ld bytes of data for"
68631ae08745Sheppo 				" physical device", __func__, MBLKL(ret_m));
68641ae08745Sheppo 			freemsg(ret_m);
68651ae08745Sheppo 			}
68661ae08745Sheppo 		}
68671ae08745Sheppo 	}
68681ae08745Sheppo 
68691ae08745Sheppo 	if (caller == VSW_VNETPORT)
68701ae08745Sheppo 		skip_port = 1;
68711ae08745Sheppo 
68721ae08745Sheppo 	/*
68731ae08745Sheppo 	 * Broadcast message from other vnet (layer 2 or 3) or outside
68741ae08745Sheppo 	 * world (layer 2 only), send up stack if plumbed.
68751ae08745Sheppo 	 */
68761ae08745Sheppo 	if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
68771ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
68781ae08745Sheppo 		if (vswp->if_state & VSW_IF_UP) {
68791ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
68801ae08745Sheppo 			nmp = copymsg(mp);
68811ae08745Sheppo 			if (nmp)
6882ba2e4443Sseb 				mac_rx(vswp->if_mh, NULL, nmp);
68831ae08745Sheppo 		} else {
68841ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
68851ae08745Sheppo 		}
68861ae08745Sheppo 	}
68871ae08745Sheppo 
68881ae08745Sheppo 	/* send it to all VNETPORTs */
68891ae08745Sheppo 	READ_ENTER(&plist->lockrw);
68901ae08745Sheppo 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
68911ae08745Sheppo 		D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
68921ae08745Sheppo 		/*
68931ae08745Sheppo 		 * Caution ! - don't reorder these two checks as arg
68941ae08745Sheppo 		 * will be NULL if the caller is PHYSDEV. skip_port is
68951ae08745Sheppo 		 * only set if caller is VNETPORT.
68961ae08745Sheppo 		 */
68971ae08745Sheppo 		if ((skip_port) && (portp == arg))
68981ae08745Sheppo 			continue;
68991ae08745Sheppo 		else {
69001ae08745Sheppo 			nmp = dupmsg(mp);
69011ae08745Sheppo 			if (nmp) {
69021ae08745Sheppo 				(void) vsw_portsend(portp, nmp);
69031ae08745Sheppo 			} else {
69041ae08745Sheppo 				DERR(vswp, "vsw_forward_all: nmp NULL");
69051ae08745Sheppo 			}
69061ae08745Sheppo 		}
69071ae08745Sheppo 	}
69081ae08745Sheppo 	RW_EXIT(&plist->lockrw);
69091ae08745Sheppo 
69101ae08745Sheppo 	freemsg(mp);
69111ae08745Sheppo 
69121ae08745Sheppo 	D1(vswp, "vsw_forward_all: exit\n");
69131ae08745Sheppo 	return (0);
69141ae08745Sheppo }
69151ae08745Sheppo 
69161ae08745Sheppo /*
69171ae08745Sheppo  * Forward pkts to any devices or interfaces which have registered
69181ae08745Sheppo  * an interest in them (i.e. multicast groups).
69191ae08745Sheppo  */
69201ae08745Sheppo static int
69211ae08745Sheppo vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
69221ae08745Sheppo {
69231ae08745Sheppo 	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
69241ae08745Sheppo 	mfdb_ent_t		*entp = NULL;
69251ae08745Sheppo 	mfdb_ent_t		*tpp = NULL;
69261ae08745Sheppo 	vsw_port_t 		*port;
69271ae08745Sheppo 	uint64_t		key = 0;
69281ae08745Sheppo 	mblk_t			*nmp = NULL;
69291ae08745Sheppo 	mblk_t			*ret_m = NULL;
69301ae08745Sheppo 	boolean_t		check_if = B_TRUE;
69311ae08745Sheppo 
69321ae08745Sheppo 	/*
69331ae08745Sheppo 	 * Convert address to hash table key
69341ae08745Sheppo 	 */
69351ae08745Sheppo 	KEY_HASH(key, ehp->ether_dhost);
69361ae08745Sheppo 
69371ae08745Sheppo 	D1(vswp, "%s: key 0x%llx", __func__, key);
69381ae08745Sheppo 
69391ae08745Sheppo 	/*
69401ae08745Sheppo 	 * If pkt came from either a vnet or down the stack (if we are
69411ae08745Sheppo 	 * plumbed) and we are in layer 2 mode, then we send the pkt out
69421ae08745Sheppo 	 * over the physical adapter, and then check to see if any other
69431ae08745Sheppo 	 * vnets are interested in it.
69441ae08745Sheppo 	 */
69451ae08745Sheppo 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
69461ae08745Sheppo 		(vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
69471ae08745Sheppo 		((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
69481ae08745Sheppo 		nmp = dupmsg(mp);
69491ae08745Sheppo 		if (nmp) {
69501ae08745Sheppo 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
69511ae08745Sheppo 				DERR(vswp, "%s: dropping pkt(s) "
69521ae08745Sheppo 					"consisting of %ld bytes of "
69531ae08745Sheppo 					"data for physical device",
69541ae08745Sheppo 					__func__, MBLKL(ret_m));
69551ae08745Sheppo 				freemsg(ret_m);
69561ae08745Sheppo 			}
69571ae08745Sheppo 		}
69581ae08745Sheppo 	}
69591ae08745Sheppo 
69601ae08745Sheppo 	READ_ENTER(&vswp->mfdbrw);
69611ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
69621ae08745Sheppo 				(mod_hash_val_t *)&entp) != 0) {
69631ae08745Sheppo 		D3(vswp, "%s: no table entry found for addr 0x%llx",
69641ae08745Sheppo 								__func__, key);
69651ae08745Sheppo 	} else {
69661ae08745Sheppo 		/*
69671ae08745Sheppo 		 * Send to list of devices associated with this address...
69681ae08745Sheppo 		 */
69691ae08745Sheppo 		for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
69701ae08745Sheppo 
69711ae08745Sheppo 			/* dont send to ourselves */
69721ae08745Sheppo 			if ((caller == VSW_VNETPORT) &&
69731ae08745Sheppo 				(tpp->d_addr == (void *)arg)) {
69741ae08745Sheppo 				port = (vsw_port_t *)tpp->d_addr;
69751ae08745Sheppo 				D3(vswp, "%s: not sending to ourselves"
69761ae08745Sheppo 					" : port %d", __func__,
69771ae08745Sheppo 					port->p_instance);
69781ae08745Sheppo 				continue;
69791ae08745Sheppo 
69801ae08745Sheppo 			} else if ((caller == VSW_LOCALDEV) &&
69811ae08745Sheppo 				(tpp->d_type == VSW_LOCALDEV)) {
69821ae08745Sheppo 				D3(vswp, "%s: not sending back up stack",
69831ae08745Sheppo 					__func__);
69841ae08745Sheppo 				continue;
69851ae08745Sheppo 			}
69861ae08745Sheppo 
69871ae08745Sheppo 			if (tpp->d_type == VSW_VNETPORT) {
69881ae08745Sheppo 				port = (vsw_port_t *)tpp->d_addr;
69891ae08745Sheppo 				D3(vswp, "%s: sending to port %ld for "
69901ae08745Sheppo 					" addr 0x%llx", __func__,
69911ae08745Sheppo 					port->p_instance, key);
69921ae08745Sheppo 
69931ae08745Sheppo 				nmp = dupmsg(mp);
69941ae08745Sheppo 				if (nmp)
69951ae08745Sheppo 					(void) vsw_portsend(port, nmp);
69961ae08745Sheppo 			} else {
69971ae08745Sheppo 				if (vswp->if_state & VSW_IF_UP) {
69981ae08745Sheppo 					nmp = copymsg(mp);
69991ae08745Sheppo 					if (nmp)
7000ba2e4443Sseb 						mac_rx(vswp->if_mh, NULL, nmp);
70011ae08745Sheppo 					check_if = B_FALSE;
70021ae08745Sheppo 					D3(vswp, "%s: sending up stack"
70031ae08745Sheppo 						" for addr 0x%llx", __func__,
70041ae08745Sheppo 						key);
70051ae08745Sheppo 				}
70061ae08745Sheppo 			}
70071ae08745Sheppo 		}
70081ae08745Sheppo 	}
70091ae08745Sheppo 
70101ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
70111ae08745Sheppo 
70121ae08745Sheppo 	/*
70131ae08745Sheppo 	 * If the pkt came from either a vnet or from physical device,
70141ae08745Sheppo 	 * and if we havent already sent the pkt up the stack then we
70151ae08745Sheppo 	 * check now if we can/should (i.e. the interface is plumbed
70161ae08745Sheppo 	 * and in promisc mode).
70171ae08745Sheppo 	 */
70181ae08745Sheppo 	if ((check_if) &&
70191ae08745Sheppo 		((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
70201ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
70211ae08745Sheppo 		if (VSW_U_P(vswp->if_state)) {
70221ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
70231ae08745Sheppo 			D3(vswp, "%s: (caller %d) finally sending up stack"
70241ae08745Sheppo 				" for addr 0x%llx", __func__, caller, key);
70251ae08745Sheppo 			nmp = copymsg(mp);
70261ae08745Sheppo 			if (nmp)
7027ba2e4443Sseb 				mac_rx(vswp->if_mh, NULL, nmp);
70281ae08745Sheppo 		} else {
70291ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
70301ae08745Sheppo 		}
70311ae08745Sheppo 	}
70321ae08745Sheppo 
70331ae08745Sheppo 	freemsg(mp);
70341ae08745Sheppo 
70351ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
70361ae08745Sheppo 
70371ae08745Sheppo 	return (0);
70381ae08745Sheppo }
70391ae08745Sheppo 
70401ae08745Sheppo /* transmit the packet over the given port */
70411ae08745Sheppo static int
70421ae08745Sheppo vsw_portsend(vsw_port_t *port, mblk_t *mp)
70431ae08745Sheppo {
70441ae08745Sheppo 	vsw_ldc_list_t 	*ldcl = &port->p_ldclist;
70451ae08745Sheppo 	vsw_ldc_t 	*ldcp;
70461ae08745Sheppo 	int		status = 0;
70471ae08745Sheppo 
70481ae08745Sheppo 
70491ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
70501ae08745Sheppo 	/*
70511ae08745Sheppo 	 * Note for now, we have a single channel.
70521ae08745Sheppo 	 */
70531ae08745Sheppo 	ldcp = ldcl->head;
70541ae08745Sheppo 	if (ldcp == NULL) {
70551ae08745Sheppo 		DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n");
70561ae08745Sheppo 		freemsg(mp);
70571ae08745Sheppo 		RW_EXIT(&ldcl->lockrw);
70581ae08745Sheppo 		return (1);
70591ae08745Sheppo 	}
70601ae08745Sheppo 
70611ae08745Sheppo 	/*
70621ae08745Sheppo 	 * Send the message out using the appropriate
70631ae08745Sheppo 	 * transmit function which will free mblock when it
70641ae08745Sheppo 	 * is finished with it.
70651ae08745Sheppo 	 */
70661ae08745Sheppo 	mutex_enter(&port->tx_lock);
70671ae08745Sheppo 	if (port->transmit != NULL)
70681ae08745Sheppo 		status = (*port->transmit)(ldcp, mp);
70691ae08745Sheppo 	else {
70701ae08745Sheppo 		freemsg(mp);
70711ae08745Sheppo 	}
70721ae08745Sheppo 	mutex_exit(&port->tx_lock);
70731ae08745Sheppo 
70741ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
70751ae08745Sheppo 
70761ae08745Sheppo 	return (status);
70771ae08745Sheppo }
70781ae08745Sheppo 
70791ae08745Sheppo /*
70801ae08745Sheppo  * Send packet out via descriptor ring to a logical device.
70811ae08745Sheppo  */
70821ae08745Sheppo static int
70831ae08745Sheppo vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp)
70841ae08745Sheppo {
70851ae08745Sheppo 	vio_dring_msg_t		dring_pkt;
70861ae08745Sheppo 	dring_info_t		*dp = NULL;
70871ae08745Sheppo 	vsw_private_desc_t	*priv_desc = NULL;
7088d10e4ef2Snarayan 	vnet_public_desc_t	*pub = NULL;
70891ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
70901ae08745Sheppo 	mblk_t			*bp;
70911ae08745Sheppo 	size_t			n, size;
70921ae08745Sheppo 	caddr_t			bufp;
70931ae08745Sheppo 	int			idx;
70941ae08745Sheppo 	int			status = LDC_TX_SUCCESS;
70951ae08745Sheppo 
70961ae08745Sheppo 	D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id);
70971ae08745Sheppo 
70981ae08745Sheppo 	/* TODO: make test a macro */
70991ae08745Sheppo 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
71001ae08745Sheppo 		(ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
71011ae08745Sheppo 		DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping "
71021ae08745Sheppo 			"packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status,
71031ae08745Sheppo 			ldcp->lane_out.lstate);
71041ae08745Sheppo 		freemsg(mp);
71051ae08745Sheppo 		return (LDC_TX_FAILURE);
71061ae08745Sheppo 	}
71071ae08745Sheppo 
71081ae08745Sheppo 	/*
71091ae08745Sheppo 	 * Note - using first ring only, this may change
71101ae08745Sheppo 	 * in the future.
71111ae08745Sheppo 	 */
7112445b4c2eSsb155480 	READ_ENTER(&ldcp->lane_out.dlistrw);
71131ae08745Sheppo 	if ((dp = ldcp->lane_out.dringp) == NULL) {
7114445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_out.dlistrw);
71151ae08745Sheppo 		DERR(vswp, "%s(%lld): no dring for outbound lane on"
71161ae08745Sheppo 			" channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id);
71171ae08745Sheppo 		freemsg(mp);
71181ae08745Sheppo 		return (LDC_TX_FAILURE);
71191ae08745Sheppo 	}
71201ae08745Sheppo 
71211ae08745Sheppo 	size = msgsize(mp);
71221ae08745Sheppo 	if (size > (size_t)ETHERMAX) {
7123445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_out.dlistrw);
71241ae08745Sheppo 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
71251ae08745Sheppo 		    ldcp->ldc_id, size);
7126d10e4ef2Snarayan 		freemsg(mp);
7127d10e4ef2Snarayan 		return (LDC_TX_FAILURE);
71281ae08745Sheppo 	}
71291ae08745Sheppo 
71301ae08745Sheppo 	/*
71311ae08745Sheppo 	 * Find a free descriptor
71321ae08745Sheppo 	 *
71331ae08745Sheppo 	 * Note: for the moment we are assuming that we will only
71341ae08745Sheppo 	 * have one dring going from the switch to each of its
71351ae08745Sheppo 	 * peers. This may change in the future.
71361ae08745Sheppo 	 */
71371ae08745Sheppo 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
7138d10e4ef2Snarayan 		D2(vswp, "%s(%lld): no descriptor available for ring "
71391ae08745Sheppo 			"at 0x%llx", __func__, ldcp->ldc_id, dp);
71401ae08745Sheppo 
71411ae08745Sheppo 		/* nothing more we can do */
71421ae08745Sheppo 		status = LDC_TX_NORESOURCES;
71431ae08745Sheppo 		goto vsw_dringsend_free_exit;
71441ae08745Sheppo 	} else {
71451ae08745Sheppo 		D2(vswp, "%s(%lld): free private descriptor found at pos "
71461ae08745Sheppo 			"%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx,
71471ae08745Sheppo 			priv_desc);
71481ae08745Sheppo 	}
71491ae08745Sheppo 
71501ae08745Sheppo 	/* copy data into the descriptor */
71511ae08745Sheppo 	bufp = priv_desc->datap;
7152d10e4ef2Snarayan 	bufp += VNET_IPALIGN;
71531ae08745Sheppo 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
71541ae08745Sheppo 		n = MBLKL(bp);
71551ae08745Sheppo 		bcopy(bp->b_rptr, bufp, n);
71561ae08745Sheppo 		bufp += n;
71571ae08745Sheppo 	}
71581ae08745Sheppo 
71591ae08745Sheppo 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
7160d10e4ef2Snarayan 
7161d10e4ef2Snarayan 	pub = priv_desc->descp;
7162d10e4ef2Snarayan 	pub->nbytes = priv_desc->datalen;
7163d10e4ef2Snarayan 
7164d10e4ef2Snarayan 	mutex_enter(&priv_desc->dstate_lock);
7165d10e4ef2Snarayan 	pub->hdr.dstate = VIO_DESC_READY;
7166d10e4ef2Snarayan 	mutex_exit(&priv_desc->dstate_lock);
71671ae08745Sheppo 
71681ae08745Sheppo 	/*
7169d10e4ef2Snarayan 	 * Determine whether or not we need to send a message to our
7170d10e4ef2Snarayan 	 * peer prompting them to read our newly updated descriptor(s).
71711ae08745Sheppo 	 */
7172d10e4ef2Snarayan 	mutex_enter(&dp->restart_lock);
7173d10e4ef2Snarayan 	if (dp->restart_reqd) {
7174d10e4ef2Snarayan 		dp->restart_reqd = B_FALSE;
7175d10e4ef2Snarayan 		mutex_exit(&dp->restart_lock);
71761ae08745Sheppo 
71771ae08745Sheppo 		/*
71781ae08745Sheppo 		 * Send a vio_dring_msg to peer to prompt them to read
71791ae08745Sheppo 		 * the updated descriptor ring.
71801ae08745Sheppo 		 */
71811ae08745Sheppo 		dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA;
71821ae08745Sheppo 		dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
71831ae08745Sheppo 		dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA;
71841ae08745Sheppo 		dring_pkt.tag.vio_sid = ldcp->local_session;
71851ae08745Sheppo 
71861ae08745Sheppo 		/* Note - for now using first ring */
71871ae08745Sheppo 		dring_pkt.dring_ident = dp->ident;
71881ae08745Sheppo 
7189d10e4ef2Snarayan 		mutex_enter(&ldcp->lane_out.seq_lock);
71901ae08745Sheppo 		dring_pkt.seq_num = ldcp->lane_out.seq_num++;
7191d10e4ef2Snarayan 		mutex_exit(&ldcp->lane_out.seq_lock);
71921ae08745Sheppo 
7193d10e4ef2Snarayan 		/*
7194d10e4ef2Snarayan 		 * If last_ack_recv is -1 then we know we've not
7195d10e4ef2Snarayan 		 * received any ack's yet, so this must be the first
7196d10e4ef2Snarayan 		 * msg sent, so set the start to the begining of the ring.
7197d10e4ef2Snarayan 		 */
7198d10e4ef2Snarayan 		mutex_enter(&dp->dlock);
7199d10e4ef2Snarayan 		if (dp->last_ack_recv == -1) {
7200d10e4ef2Snarayan 			dring_pkt.start_idx = 0;
7201d10e4ef2Snarayan 		} else {
7202d10e4ef2Snarayan 			dring_pkt.start_idx = (dp->last_ack_recv + 1) %
7203d10e4ef2Snarayan 						dp->num_descriptors;
7204d10e4ef2Snarayan 		}
7205d10e4ef2Snarayan 		dring_pkt.end_idx = -1;
7206d10e4ef2Snarayan 		mutex_exit(&dp->dlock);
72071ae08745Sheppo 
72081ae08745Sheppo 		D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__,
72091ae08745Sheppo 			ldcp->ldc_id, dp, dring_pkt.dring_ident);
7210d10e4ef2Snarayan 		D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n",
7211d10e4ef2Snarayan 			__func__, ldcp->ldc_id, dring_pkt.start_idx,
7212d10e4ef2Snarayan 			dring_pkt.end_idx, dring_pkt.seq_num);
72131ae08745Sheppo 
7214b071742bSsg70180 		RW_EXIT(&ldcp->lane_out.dlistrw);
7215b071742bSsg70180 
7216b071742bSsg70180 		(void) vsw_send_msg(ldcp, (void *)&dring_pkt,
7217b071742bSsg70180 					sizeof (vio_dring_msg_t), B_TRUE);
7218b071742bSsg70180 
7219b071742bSsg70180 		/* free the message block */
7220b071742bSsg70180 		freemsg(mp);
7221b071742bSsg70180 		return (status);
7222b071742bSsg70180 
7223d10e4ef2Snarayan 	} else {
7224d10e4ef2Snarayan 		mutex_exit(&dp->restart_lock);
7225d10e4ef2Snarayan 		D2(vswp, "%s(%lld): updating descp %d", __func__,
7226d10e4ef2Snarayan 			ldcp->ldc_id, idx);
7227d10e4ef2Snarayan 	}
72281ae08745Sheppo 
72291ae08745Sheppo vsw_dringsend_free_exit:
72301ae08745Sheppo 
7231445b4c2eSsb155480 	RW_EXIT(&ldcp->lane_out.dlistrw);
7232445b4c2eSsb155480 
72331ae08745Sheppo 	/* free the message block */
72341ae08745Sheppo 	freemsg(mp);
72351ae08745Sheppo 
72361ae08745Sheppo 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
72371ae08745Sheppo 	return (status);
72381ae08745Sheppo }
72391ae08745Sheppo 
72401ae08745Sheppo /*
72411ae08745Sheppo  * Send an in-band descriptor message over ldc.
72421ae08745Sheppo  */
72431ae08745Sheppo static int
72441ae08745Sheppo vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp)
72451ae08745Sheppo {
72461ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
7247445b4c2eSsb155480 	vnet_ibnd_desc_t	ibnd_msg;
72481ae08745Sheppo 	vsw_private_desc_t	*priv_desc = NULL;
72491ae08745Sheppo 	dring_info_t		*dp = NULL;
72501ae08745Sheppo 	size_t			n, size = 0;
72511ae08745Sheppo 	caddr_t			bufp;
72521ae08745Sheppo 	mblk_t			*bp;
72531ae08745Sheppo 	int			idx, i;
72541ae08745Sheppo 	int			status = LDC_TX_SUCCESS;
72551ae08745Sheppo 	static int		warn_msg = 1;
72561ae08745Sheppo 
72571ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
72581ae08745Sheppo 
72591ae08745Sheppo 	ASSERT(mp != NULL);
72601ae08745Sheppo 
72611ae08745Sheppo 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
72621ae08745Sheppo 		(ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
72631ae08745Sheppo 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt",
72641ae08745Sheppo 			__func__, ldcp->ldc_id, ldcp->ldc_status,
72651ae08745Sheppo 			ldcp->lane_out.lstate);
72661ae08745Sheppo 		freemsg(mp);
72671ae08745Sheppo 		return (LDC_TX_FAILURE);
72681ae08745Sheppo 	}
72691ae08745Sheppo 
72701ae08745Sheppo 	/*
72711ae08745Sheppo 	 * only expect single dring to exist, which we use
72721ae08745Sheppo 	 * as an internal buffer, rather than a transfer channel.
72731ae08745Sheppo 	 */
7274445b4c2eSsb155480 	READ_ENTER(&ldcp->lane_out.dlistrw);
72751ae08745Sheppo 	if ((dp = ldcp->lane_out.dringp) == NULL) {
72761ae08745Sheppo 		DERR(vswp, "%s(%lld): no dring for outbound lane",
72771ae08745Sheppo 			__func__, ldcp->ldc_id);
72781ae08745Sheppo 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx)",
72791ae08745Sheppo 			__func__, ldcp->ldc_id, ldcp->ldc_status,
72801ae08745Sheppo 			ldcp->lane_out.lstate);
7281445b4c2eSsb155480 		RW_EXIT(&ldcp->lane_out.dlistrw);
72821ae08745Sheppo 		freemsg(mp);
72831ae08745Sheppo 		return (LDC_TX_FAILURE);
72841ae08745Sheppo 	}
72851ae08745Sheppo 
72861ae08745Sheppo 	size = msgsize(mp);
72871ae08745Sheppo 	if (size > (size_t)ETHERMAX) {
7288b071742bSsg70180 		RW_EXIT(&ldcp->lane_out.dlistrw);
72891ae08745Sheppo 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
72901ae08745Sheppo 		    ldcp->ldc_id, size);
7291d10e4ef2Snarayan 		freemsg(mp);
7292d10e4ef2Snarayan 		return (LDC_TX_FAILURE);
72931ae08745Sheppo 	}
72941ae08745Sheppo 
72951ae08745Sheppo 	/*
72961ae08745Sheppo 	 * Find a free descriptor in our buffer ring
72971ae08745Sheppo 	 */
72981ae08745Sheppo 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
7299b071742bSsg70180 		RW_EXIT(&ldcp->lane_out.dlistrw);
73001ae08745Sheppo 		if (warn_msg) {
73011ae08745Sheppo 			DERR(vswp, "%s(%lld): no descriptor available for ring "
73021ae08745Sheppo 			"at 0x%llx", __func__, ldcp->ldc_id, dp);
73031ae08745Sheppo 			warn_msg = 0;
73041ae08745Sheppo 		}
73051ae08745Sheppo 
73061ae08745Sheppo 		/* nothing more we can do */
73071ae08745Sheppo 		status = LDC_TX_NORESOURCES;
73081ae08745Sheppo 		goto vsw_descrsend_free_exit;
73091ae08745Sheppo 	} else {
73101ae08745Sheppo 		D2(vswp, "%s(%lld): free private descriptor found at pos "
73111ae08745Sheppo 			"%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx,
73121ae08745Sheppo 			priv_desc);
73131ae08745Sheppo 		warn_msg = 1;
73141ae08745Sheppo 	}
73151ae08745Sheppo 
73161ae08745Sheppo 	/* copy data into the descriptor */
73171ae08745Sheppo 	bufp = priv_desc->datap;
73181ae08745Sheppo 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
73191ae08745Sheppo 		n = MBLKL(bp);
73201ae08745Sheppo 		bcopy(bp->b_rptr, bufp, n);
73211ae08745Sheppo 		bufp += n;
73221ae08745Sheppo 	}
73231ae08745Sheppo 
73241ae08745Sheppo 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
73251ae08745Sheppo 
73261ae08745Sheppo 	/* create and send the in-band descp msg */
73271ae08745Sheppo 	ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA;
73281ae08745Sheppo 	ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO;
73291ae08745Sheppo 	ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA;
73301ae08745Sheppo 	ibnd_msg.hdr.tag.vio_sid = ldcp->local_session;
73311ae08745Sheppo 
7332d10e4ef2Snarayan 	mutex_enter(&ldcp->lane_out.seq_lock);
73331ae08745Sheppo 	ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++;
7334d10e4ef2Snarayan 	mutex_exit(&ldcp->lane_out.seq_lock);
73351ae08745Sheppo 
73361ae08745Sheppo 	/*
73371ae08745Sheppo 	 * Copy the mem cookies describing the data from the
73381ae08745Sheppo 	 * private region of the descriptor ring into the inband
73391ae08745Sheppo 	 * descriptor.
73401ae08745Sheppo 	 */
73411ae08745Sheppo 	for (i = 0; i < priv_desc->ncookies; i++) {
73421ae08745Sheppo 		bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i],
73431ae08745Sheppo 			sizeof (ldc_mem_cookie_t));
73441ae08745Sheppo 	}
73451ae08745Sheppo 
73461ae08745Sheppo 	ibnd_msg.hdr.desc_handle = idx;
73471ae08745Sheppo 	ibnd_msg.ncookies = priv_desc->ncookies;
73481ae08745Sheppo 	ibnd_msg.nbytes = size;
73491ae08745Sheppo 
7350b071742bSsg70180 	RW_EXIT(&ldcp->lane_out.dlistrw);
7351b071742bSsg70180 
7352b071742bSsg70180 	(void) vsw_send_msg(ldcp, (void *)&ibnd_msg,
7353b071742bSsg70180 			sizeof (vnet_ibnd_desc_t), B_TRUE);
73541ae08745Sheppo 
73551ae08745Sheppo vsw_descrsend_free_exit:
73561ae08745Sheppo 
73571ae08745Sheppo 	/* free the allocated message blocks */
73581ae08745Sheppo 	freemsg(mp);
73591ae08745Sheppo 
73601ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
73611ae08745Sheppo 	return (status);
73621ae08745Sheppo }
73631ae08745Sheppo 
73641ae08745Sheppo static void
73653af08d82Slm66018 vsw_send_ver(void *arg)
73661ae08745Sheppo {
73673af08d82Slm66018 	vsw_ldc_t	*ldcp = (vsw_ldc_t *)arg;
73681ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
73691ae08745Sheppo 	lane_t		*lp = &ldcp->lane_out;
73701ae08745Sheppo 	vio_ver_msg_t	ver_msg;
73711ae08745Sheppo 
73721ae08745Sheppo 	D1(vswp, "%s enter", __func__);
73731ae08745Sheppo 
73741ae08745Sheppo 	ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
73751ae08745Sheppo 	ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
73761ae08745Sheppo 	ver_msg.tag.vio_subtype_env = VIO_VER_INFO;
73771ae08745Sheppo 	ver_msg.tag.vio_sid = ldcp->local_session;
73781ae08745Sheppo 
73791ae08745Sheppo 	ver_msg.ver_major = vsw_versions[0].ver_major;
73801ae08745Sheppo 	ver_msg.ver_minor = vsw_versions[0].ver_minor;
73811ae08745Sheppo 	ver_msg.dev_class = VDEV_NETWORK_SWITCH;
73821ae08745Sheppo 
73831ae08745Sheppo 	lp->lstate |= VSW_VER_INFO_SENT;
73841ae08745Sheppo 	lp->ver_major = ver_msg.ver_major;
73851ae08745Sheppo 	lp->ver_minor = ver_msg.ver_minor;
73861ae08745Sheppo 
73871ae08745Sheppo 	DUMP_TAG(ver_msg.tag);
73881ae08745Sheppo 
7389b071742bSsg70180 	(void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE);
73901ae08745Sheppo 
73911ae08745Sheppo 	D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id);
73921ae08745Sheppo }
73931ae08745Sheppo 
73941ae08745Sheppo static void
73951ae08745Sheppo vsw_send_attr(vsw_ldc_t *ldcp)
73961ae08745Sheppo {
73971ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
73981ae08745Sheppo 	lane_t			*lp = &ldcp->lane_out;
73991ae08745Sheppo 	vnet_attr_msg_t		attr_msg;
74001ae08745Sheppo 
74011ae08745Sheppo 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
74021ae08745Sheppo 
74031ae08745Sheppo 	/*
74041ae08745Sheppo 	 * Subtype is set to INFO by default
74051ae08745Sheppo 	 */
74061ae08745Sheppo 	attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
74071ae08745Sheppo 	attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
74081ae08745Sheppo 	attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO;
74091ae08745Sheppo 	attr_msg.tag.vio_sid = ldcp->local_session;
74101ae08745Sheppo 
74111ae08745Sheppo 	/* payload copied from default settings for lane */
74121ae08745Sheppo 	attr_msg.mtu = lp->mtu;
74131ae08745Sheppo 	attr_msg.addr_type = lp->addr_type;
74141ae08745Sheppo 	attr_msg.xfer_mode = lp->xfer_mode;
74151ae08745Sheppo 	attr_msg.ack_freq = lp->xfer_mode;
74161ae08745Sheppo 
74171ae08745Sheppo 	READ_ENTER(&vswp->if_lockrw);
74181ae08745Sheppo 	bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL);
74191ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
74201ae08745Sheppo 
74211ae08745Sheppo 	ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT;
74221ae08745Sheppo 
74231ae08745Sheppo 	DUMP_TAG(attr_msg.tag);
74241ae08745Sheppo 
7425b071742bSsg70180 	(void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE);
74261ae08745Sheppo 
7427b071742bSsg70180 	D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id);
74281ae08745Sheppo }
74291ae08745Sheppo 
74301ae08745Sheppo /*
74311ae08745Sheppo  * Create dring info msg (which also results in the creation of
74321ae08745Sheppo  * a dring).
74331ae08745Sheppo  */
74341ae08745Sheppo static vio_dring_reg_msg_t *
74351ae08745Sheppo vsw_create_dring_info_pkt(vsw_ldc_t *ldcp)
74361ae08745Sheppo {
74371ae08745Sheppo 	vio_dring_reg_msg_t	*mp;
74381ae08745Sheppo 	dring_info_t		*dp;
74391ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
74401ae08745Sheppo 
74411ae08745Sheppo 	D1(vswp, "vsw_create_dring_info_pkt enter\n");
74421ae08745Sheppo 
74431ae08745Sheppo 	/*
74441ae08745Sheppo 	 * If we can't create a dring, obviously no point sending
74451ae08745Sheppo 	 * a message.
74461ae08745Sheppo 	 */
74471ae08745Sheppo 	if ((dp = vsw_create_dring(ldcp)) == NULL)
74481ae08745Sheppo 		return (NULL);
74491ae08745Sheppo 
74501ae08745Sheppo 	mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP);
74511ae08745Sheppo 
74521ae08745Sheppo 	mp->tag.vio_msgtype = VIO_TYPE_CTRL;
74531ae08745Sheppo 	mp->tag.vio_subtype = VIO_SUBTYPE_INFO;
74541ae08745Sheppo 	mp->tag.vio_subtype_env = VIO_DRING_REG;
74551ae08745Sheppo 	mp->tag.vio_sid = ldcp->local_session;
74561ae08745Sheppo 
74571ae08745Sheppo 	/* payload */
74581ae08745Sheppo 	mp->num_descriptors = dp->num_descriptors;
74591ae08745Sheppo 	mp->descriptor_size = dp->descriptor_size;
74601ae08745Sheppo 	mp->options = dp->options;
74611ae08745Sheppo 	mp->ncookies = dp->ncookies;
74621ae08745Sheppo 	bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t));
74631ae08745Sheppo 
74641ae08745Sheppo 	mp->dring_ident = 0;
74651ae08745Sheppo 
74661ae08745Sheppo 	D1(vswp, "vsw_create_dring_info_pkt exit\n");
74671ae08745Sheppo 
74681ae08745Sheppo 	return (mp);
74691ae08745Sheppo }
74701ae08745Sheppo 
74711ae08745Sheppo static void
74721ae08745Sheppo vsw_send_dring_info(vsw_ldc_t *ldcp)
74731ae08745Sheppo {
74741ae08745Sheppo 	vio_dring_reg_msg_t	*dring_msg;
74751ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
74761ae08745Sheppo 
74771ae08745Sheppo 	D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id);
74781ae08745Sheppo 
74791ae08745Sheppo 	dring_msg = vsw_create_dring_info_pkt(ldcp);
74801ae08745Sheppo 	if (dring_msg == NULL) {
748134683adeSsg70180 		cmn_err(CE_WARN, "!vsw%d: %s: error creating msg",
748234683adeSsg70180 			vswp->instance, __func__);
74831ae08745Sheppo 		return;
74841ae08745Sheppo 	}
74851ae08745Sheppo 
74861ae08745Sheppo 	ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT;
74871ae08745Sheppo 
74881ae08745Sheppo 	DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg);
74891ae08745Sheppo 
7490b071742bSsg70180 	(void) vsw_send_msg(ldcp, dring_msg,
7491b071742bSsg70180 		sizeof (vio_dring_reg_msg_t), B_TRUE);
74921ae08745Sheppo 
74931ae08745Sheppo 	kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t));
74941ae08745Sheppo 
74951ae08745Sheppo 	D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id);
74961ae08745Sheppo }
74971ae08745Sheppo 
74981ae08745Sheppo static void
74991ae08745Sheppo vsw_send_rdx(vsw_ldc_t *ldcp)
75001ae08745Sheppo {
75011ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
75021ae08745Sheppo 	vio_rdx_msg_t	rdx_msg;
75031ae08745Sheppo 
75041ae08745Sheppo 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
75051ae08745Sheppo 
75061ae08745Sheppo 	rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
75071ae08745Sheppo 	rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
75081ae08745Sheppo 	rdx_msg.tag.vio_subtype_env = VIO_RDX;
75091ae08745Sheppo 	rdx_msg.tag.vio_sid = ldcp->local_session;
75101ae08745Sheppo 
7511b071742bSsg70180 	ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT;
75121ae08745Sheppo 
75131ae08745Sheppo 	DUMP_TAG(rdx_msg.tag);
75141ae08745Sheppo 
7515b071742bSsg70180 	(void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE);
75161ae08745Sheppo 
75171ae08745Sheppo 	D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id);
75181ae08745Sheppo }
75191ae08745Sheppo 
75201ae08745Sheppo /*
75211ae08745Sheppo  * Generic routine to send message out over ldc channel.
7522b071742bSsg70180  *
7523b071742bSsg70180  * It is possible that when we attempt to write over the ldc channel
7524b071742bSsg70180  * that we get notified that it has been reset. Depending on the value
7525b071742bSsg70180  * of the handle_reset flag we either handle that event here or simply
7526b071742bSsg70180  * notify the caller that the channel was reset.
75271ae08745Sheppo  */
7528b071742bSsg70180 static int
7529b071742bSsg70180 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset)
75301ae08745Sheppo {
75311ae08745Sheppo 	int		rv;
75321ae08745Sheppo 	size_t		msglen = size;
75331ae08745Sheppo 	vio_msg_tag_t	*tag = (vio_msg_tag_t *)msgp;
75341ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
75351ae08745Sheppo 
75361ae08745Sheppo 	D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes",
75371ae08745Sheppo 			ldcp->ldc_id, size);
75381ae08745Sheppo 
75391ae08745Sheppo 	D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype);
75401ae08745Sheppo 	D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype);
75411ae08745Sheppo 	D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env);
75421ae08745Sheppo 
75431ae08745Sheppo 	mutex_enter(&ldcp->ldc_txlock);
75441ae08745Sheppo 	do {
75451ae08745Sheppo 		msglen = size;
75461ae08745Sheppo 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen);
75471ae08745Sheppo 	} while (rv == EWOULDBLOCK && --vsw_wretries > 0);
75481ae08745Sheppo 
75491ae08745Sheppo 	if ((rv != 0) || (msglen != size)) {
75501ae08745Sheppo 		DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) "
75511ae08745Sheppo 			"rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id,
75521ae08745Sheppo 			rv, size, msglen);
75531ae08745Sheppo 	}
75543af08d82Slm66018 	mutex_exit(&ldcp->ldc_txlock);
75553af08d82Slm66018 
7556b071742bSsg70180 	/*
7557b071742bSsg70180 	 * If channel has been reset we either handle it here or
7558b071742bSsg70180 	 * simply report back that it has been reset and let caller
7559b071742bSsg70180 	 * decide what to do.
7560b071742bSsg70180 	 */
75613af08d82Slm66018 	if (rv == ECONNRESET) {
7562b071742bSsg70180 		DWARN(vswp, "%s (%lld) channel reset",
7563b071742bSsg70180 					__func__, ldcp->ldc_id);
7564b071742bSsg70180 
7565b071742bSsg70180 		/*
7566b071742bSsg70180 		 * N.B - must never be holding the dlistrw lock when
7567b071742bSsg70180 		 * we do a reset of the channel.
7568b071742bSsg70180 		 */
7569b071742bSsg70180 		if (handle_reset) {
7570b071742bSsg70180 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
7571b071742bSsg70180 		}
75723af08d82Slm66018 	}
75731ae08745Sheppo 
7574b071742bSsg70180 	return (rv);
75751ae08745Sheppo }
75761ae08745Sheppo 
75771ae08745Sheppo /*
75781ae08745Sheppo  * Add an entry into FDB, for the given mac address and port_id.
75791ae08745Sheppo  * Returns 0 on success, 1 on failure.
75801ae08745Sheppo  *
75811ae08745Sheppo  * Lock protecting FDB must be held by calling process.
75821ae08745Sheppo  */
75831ae08745Sheppo static int
75841ae08745Sheppo vsw_add_fdb(vsw_t *vswp, vsw_port_t *port)
75851ae08745Sheppo {
75861ae08745Sheppo 	uint64_t	addr = 0;
75871ae08745Sheppo 
75881ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
75891ae08745Sheppo 
75901ae08745Sheppo 	KEY_HASH(addr, port->p_macaddr);
75911ae08745Sheppo 
75921ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
75931ae08745Sheppo 
75941ae08745Sheppo 	/*
75951ae08745Sheppo 	 * Note: duplicate keys will be rejected by mod_hash.
75961ae08745Sheppo 	 */
75971ae08745Sheppo 	if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr,
75981ae08745Sheppo 				(mod_hash_val_t)port) != 0) {
75991ae08745Sheppo 		DERR(vswp, "%s: unable to add entry into fdb.", __func__);
76001ae08745Sheppo 		return (1);
76011ae08745Sheppo 	}
76021ae08745Sheppo 
76031ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
76041ae08745Sheppo 	return (0);
76051ae08745Sheppo }
76061ae08745Sheppo 
76071ae08745Sheppo /*
76081ae08745Sheppo  * Remove an entry from FDB.
76091ae08745Sheppo  * Returns 0 on success, 1 on failure.
76101ae08745Sheppo  */
76111ae08745Sheppo static int
76121ae08745Sheppo vsw_del_fdb(vsw_t *vswp, vsw_port_t *port)
76131ae08745Sheppo {
76141ae08745Sheppo 	uint64_t	addr = 0;
76151ae08745Sheppo 
76161ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
76171ae08745Sheppo 
76181ae08745Sheppo 	KEY_HASH(addr, port->p_macaddr);
76191ae08745Sheppo 
76201ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
76211ae08745Sheppo 
76221ae08745Sheppo 	(void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr);
76231ae08745Sheppo 
76241ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
76251ae08745Sheppo 
76261ae08745Sheppo 	return (0);
76271ae08745Sheppo }
76281ae08745Sheppo 
76291ae08745Sheppo /*
76301ae08745Sheppo  * Search fdb for a given mac address.
76311ae08745Sheppo  * Returns pointer to the entry if found, else returns NULL.
76321ae08745Sheppo  */
76331ae08745Sheppo static vsw_port_t *
76341ae08745Sheppo vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp)
76351ae08745Sheppo {
76361ae08745Sheppo 	uint64_t	key = 0;
76371ae08745Sheppo 	vsw_port_t	*port = NULL;
76381ae08745Sheppo 
76391ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
76401ae08745Sheppo 
76411ae08745Sheppo 	KEY_HASH(key, ehp->ether_dhost);
76421ae08745Sheppo 
76431ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, key);
76441ae08745Sheppo 
76451ae08745Sheppo 	if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key,
76461ae08745Sheppo 				(mod_hash_val_t *)&port) != 0) {
764734683adeSsg70180 		D2(vswp, "%s: no port found", __func__);
76481ae08745Sheppo 		return (NULL);
76491ae08745Sheppo 	}
76501ae08745Sheppo 
76511ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
76521ae08745Sheppo 
76531ae08745Sheppo 	return (port);
76541ae08745Sheppo }
76551ae08745Sheppo 
76561ae08745Sheppo /*
76571ae08745Sheppo  * Add or remove multicast address(es).
76581ae08745Sheppo  *
76591ae08745Sheppo  * Returns 0 on success, 1 on failure.
76601ae08745Sheppo  */
76611ae08745Sheppo static int
76621ae08745Sheppo vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
76631ae08745Sheppo {
76641ae08745Sheppo 	mcst_addr_t		*mcst_p = NULL;
76651ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
76661ae08745Sheppo 	uint64_t		addr = 0x0;
766734683adeSsg70180 	int			i;
76681ae08745Sheppo 
76691ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
76701ae08745Sheppo 
76711ae08745Sheppo 	D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
76721ae08745Sheppo 
767334683adeSsg70180 	mutex_enter(&vswp->mac_lock);
767434683adeSsg70180 	if (vswp->mh == NULL) {
767534683adeSsg70180 		mutex_exit(&vswp->mac_lock);
7676e1ebb9ecSlm66018 		return (1);
767734683adeSsg70180 	}
767834683adeSsg70180 	mutex_exit(&vswp->mac_lock);
7679e1ebb9ecSlm66018 
76801ae08745Sheppo 	for (i = 0; i < mcst_pkt->count; i++) {
76811ae08745Sheppo 		/*
76821ae08745Sheppo 		 * Convert address into form that can be used
76831ae08745Sheppo 		 * as hash table key.
76841ae08745Sheppo 		 */
76851ae08745Sheppo 		KEY_HASH(addr, mcst_pkt->mca[i]);
76861ae08745Sheppo 
76871ae08745Sheppo 		/*
76881ae08745Sheppo 		 * Add or delete the specified address/port combination.
76891ae08745Sheppo 		 */
76901ae08745Sheppo 		if (mcst_pkt->set == 0x1) {
76911ae08745Sheppo 			D3(vswp, "%s: adding multicast address 0x%llx for "
76921ae08745Sheppo 				"port %ld", __func__, addr, port->p_instance);
76931ae08745Sheppo 			if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
76941ae08745Sheppo 				/*
76951ae08745Sheppo 				 * Update the list of multicast
76961ae08745Sheppo 				 * addresses contained within the
76971ae08745Sheppo 				 * port structure to include this new
76981ae08745Sheppo 				 * one.
76991ae08745Sheppo 				 */
77001ae08745Sheppo 				mcst_p = kmem_alloc(sizeof (mcst_addr_t),
77011ae08745Sheppo 								KM_NOSLEEP);
77021ae08745Sheppo 				if (mcst_p == NULL) {
77031ae08745Sheppo 					DERR(vswp, "%s: unable to alloc mem",
77041ae08745Sheppo 						__func__);
77051ae08745Sheppo 					return (1);
77061ae08745Sheppo 				}
77071ae08745Sheppo 
77081ae08745Sheppo 				mcst_p->nextp = NULL;
77091ae08745Sheppo 				mcst_p->addr = addr;
77101ae08745Sheppo 
77111ae08745Sheppo 				mutex_enter(&port->mca_lock);
77121ae08745Sheppo 				mcst_p->nextp = port->mcap;
77131ae08745Sheppo 				port->mcap = mcst_p;
77141ae08745Sheppo 				mutex_exit(&port->mca_lock);
77151ae08745Sheppo 
77161ae08745Sheppo 				/*
77171ae08745Sheppo 				 * Program the address into HW. If the addr
77181ae08745Sheppo 				 * has already been programmed then the MAC
77191ae08745Sheppo 				 * just increments a ref counter (which is
77201ae08745Sheppo 				 * used when the address is being deleted)
77211ae08745Sheppo 				 */
772234683adeSsg70180 				mutex_enter(&vswp->mac_lock);
772334683adeSsg70180 				if ((vswp->mh == NULL) ||
772434683adeSsg70180 					mac_multicst_add(vswp->mh,
772534683adeSsg70180 						(uchar_t *)&mcst_pkt->mca[i])) {
772634683adeSsg70180 					mutex_exit(&vswp->mac_lock);
772734683adeSsg70180 					cmn_err(CE_WARN, "!vsw%d: unable to "
772834683adeSsg70180 						"add multicast address",
772934683adeSsg70180 						vswp->instance);
7730e1ebb9ecSlm66018 					(void) vsw_del_mcst(vswp, VSW_VNETPORT,
7731e1ebb9ecSlm66018 						addr, port);
7732e1ebb9ecSlm66018 					vsw_del_addr(VSW_VNETPORT, port, addr);
773334683adeSsg70180 					return (1);
7734e1ebb9ecSlm66018 				}
773534683adeSsg70180 				mutex_exit(&vswp->mac_lock);
77361ae08745Sheppo 
77371ae08745Sheppo 			} else {
77381ae08745Sheppo 				DERR(vswp, "%s: error adding multicast "
77391ae08745Sheppo 					"address 0x%llx for port %ld",
77401ae08745Sheppo 					__func__, addr, port->p_instance);
77411ae08745Sheppo 				return (1);
77421ae08745Sheppo 			}
77431ae08745Sheppo 		} else {
77441ae08745Sheppo 			/*
77451ae08745Sheppo 			 * Delete an entry from the multicast hash
77461ae08745Sheppo 			 * table and update the address list
77471ae08745Sheppo 			 * appropriately.
77481ae08745Sheppo 			 */
77491ae08745Sheppo 			if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
77501ae08745Sheppo 				D3(vswp, "%s: deleting multicast address "
77511ae08745Sheppo 					"0x%llx for port %ld", __func__, addr,
77521ae08745Sheppo 					port->p_instance);
77531ae08745Sheppo 
77541ae08745Sheppo 				vsw_del_addr(VSW_VNETPORT, port, addr);
77551ae08745Sheppo 
77561ae08745Sheppo 				/*
77571ae08745Sheppo 				 * Remove the address from HW. The address
77581ae08745Sheppo 				 * will actually only be removed once the ref
77591ae08745Sheppo 				 * count within the MAC layer has dropped to
77601ae08745Sheppo 				 * zero. I.e. we can safely call this fn even
77611ae08745Sheppo 				 * if other ports are interested in this
77621ae08745Sheppo 				 * address.
77631ae08745Sheppo 				 */
776434683adeSsg70180 				mutex_enter(&vswp->mac_lock);
776534683adeSsg70180 				if ((vswp->mh == NULL) ||
776634683adeSsg70180 					mac_multicst_remove(vswp->mh,
776734683adeSsg70180 						(uchar_t *)&mcst_pkt->mca[i])) {
776834683adeSsg70180 					mutex_exit(&vswp->mac_lock);
776934683adeSsg70180 					cmn_err(CE_WARN, "!vsw%d: unable to "
777034683adeSsg70180 						"remove multicast address",
777134683adeSsg70180 						vswp->instance);
777234683adeSsg70180 					return (1);
777334683adeSsg70180 				}
777434683adeSsg70180 				mutex_exit(&vswp->mac_lock);
77751ae08745Sheppo 
77761ae08745Sheppo 			} else {
77771ae08745Sheppo 				DERR(vswp, "%s: error deleting multicast "
77781ae08745Sheppo 					"addr 0x%llx for port %ld",
77791ae08745Sheppo 					__func__, addr, port->p_instance);
77801ae08745Sheppo 				return (1);
77811ae08745Sheppo 			}
77821ae08745Sheppo 		}
77831ae08745Sheppo 	}
77841ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
77851ae08745Sheppo 	return (0);
77861ae08745Sheppo }
77871ae08745Sheppo 
77881ae08745Sheppo /*
77891ae08745Sheppo  * Add a new multicast entry.
77901ae08745Sheppo  *
77911ae08745Sheppo  * Search hash table based on address. If match found then
77921ae08745Sheppo  * update associated val (which is chain of ports), otherwise
77931ae08745Sheppo  * create new key/val (addr/port) pair and insert into table.
77941ae08745Sheppo  */
77951ae08745Sheppo static int
77961ae08745Sheppo vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
77971ae08745Sheppo {
77981ae08745Sheppo 	int		dup = 0;
77991ae08745Sheppo 	int		rv = 0;
78001ae08745Sheppo 	mfdb_ent_t	*ment = NULL;
78011ae08745Sheppo 	mfdb_ent_t	*tmp_ent = NULL;
78021ae08745Sheppo 	mfdb_ent_t	*new_ent = NULL;
78031ae08745Sheppo 	void		*tgt = NULL;
78041ae08745Sheppo 
78051ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
78061ae08745Sheppo 		/*
78071ae08745Sheppo 		 * Being invoked from a vnet.
78081ae08745Sheppo 		 */
78091ae08745Sheppo 		ASSERT(arg != NULL);
78101ae08745Sheppo 		tgt = arg;
78111ae08745Sheppo 		D2(NULL, "%s: port %d : address 0x%llx", __func__,
78121ae08745Sheppo 			((vsw_port_t *)arg)->p_instance, addr);
78131ae08745Sheppo 	} else {
78141ae08745Sheppo 		/*
78151ae08745Sheppo 		 * We are being invoked via the m_multicst mac entry
78161ae08745Sheppo 		 * point.
78171ae08745Sheppo 		 */
78181ae08745Sheppo 		D2(NULL, "%s: address 0x%llx", __func__, addr);
78191ae08745Sheppo 		tgt = (void *)vswp;
78201ae08745Sheppo 	}
78211ae08745Sheppo 
78221ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
78231ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
78241ae08745Sheppo 				(mod_hash_val_t *)&ment) != 0) {
78251ae08745Sheppo 
78261ae08745Sheppo 		/* address not currently in table */
78271ae08745Sheppo 		ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
78281ae08745Sheppo 		ment->d_addr = (void *)tgt;
78291ae08745Sheppo 		ment->d_type = devtype;
78301ae08745Sheppo 		ment->nextp = NULL;
78311ae08745Sheppo 
78321ae08745Sheppo 		if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
78331ae08745Sheppo 			(mod_hash_val_t)ment) != 0) {
78341ae08745Sheppo 			DERR(vswp, "%s: hash table insertion failed", __func__);
78351ae08745Sheppo 			kmem_free(ment, sizeof (mfdb_ent_t));
78361ae08745Sheppo 			rv = 1;
78371ae08745Sheppo 		} else {
78381ae08745Sheppo 			D2(vswp, "%s: added initial entry for 0x%llx to "
78391ae08745Sheppo 				"table", __func__, addr);
78401ae08745Sheppo 		}
78411ae08745Sheppo 	} else {
78421ae08745Sheppo 		/*
78431ae08745Sheppo 		 * Address in table. Check to see if specified port
78441ae08745Sheppo 		 * is already associated with the address. If not add
78451ae08745Sheppo 		 * it now.
78461ae08745Sheppo 		 */
78471ae08745Sheppo 		tmp_ent = ment;
78481ae08745Sheppo 		while (tmp_ent != NULL) {
78491ae08745Sheppo 			if (tmp_ent->d_addr == (void *)tgt) {
78501ae08745Sheppo 				if (devtype == VSW_VNETPORT) {
78511ae08745Sheppo 					DERR(vswp, "%s: duplicate port entry "
78521ae08745Sheppo 						"found for portid %ld and key "
78531ae08745Sheppo 						"0x%llx", __func__,
78541ae08745Sheppo 						((vsw_port_t *)arg)->p_instance,
78551ae08745Sheppo 						addr);
78561ae08745Sheppo 				} else {
78571ae08745Sheppo 					DERR(vswp, "%s: duplicate entry found"
78581ae08745Sheppo 						"for key 0x%llx",
78591ae08745Sheppo 						__func__, addr);
78601ae08745Sheppo 				}
78611ae08745Sheppo 				rv = 1;
78621ae08745Sheppo 				dup = 1;
78631ae08745Sheppo 				break;
78641ae08745Sheppo 			}
78651ae08745Sheppo 			tmp_ent = tmp_ent->nextp;
78661ae08745Sheppo 		}
78671ae08745Sheppo 
78681ae08745Sheppo 		/*
78691ae08745Sheppo 		 * Port not on list so add it to end now.
78701ae08745Sheppo 		 */
78711ae08745Sheppo 		if (0 == dup) {
78721ae08745Sheppo 			D2(vswp, "%s: added entry for 0x%llx to table",
78731ae08745Sheppo 				__func__, addr);
78741ae08745Sheppo 			new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
78751ae08745Sheppo 			new_ent->d_addr = (void *)tgt;
78761ae08745Sheppo 			new_ent->d_type = devtype;
78771ae08745Sheppo 			new_ent->nextp = NULL;
78781ae08745Sheppo 
78791ae08745Sheppo 			tmp_ent = ment;
78801ae08745Sheppo 			while (tmp_ent->nextp != NULL)
78811ae08745Sheppo 				tmp_ent = tmp_ent->nextp;
78821ae08745Sheppo 
78831ae08745Sheppo 			tmp_ent->nextp = new_ent;
78841ae08745Sheppo 		}
78851ae08745Sheppo 	}
78861ae08745Sheppo 
78871ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
78881ae08745Sheppo 	return (rv);
78891ae08745Sheppo }
78901ae08745Sheppo 
78911ae08745Sheppo /*
78921ae08745Sheppo  * Remove a multicast entry from the hashtable.
78931ae08745Sheppo  *
78941ae08745Sheppo  * Search hash table based on address. If match found, scan
78951ae08745Sheppo  * list of ports associated with address. If specified port
78961ae08745Sheppo  * found remove it from list.
78971ae08745Sheppo  */
78981ae08745Sheppo static int
78991ae08745Sheppo vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
79001ae08745Sheppo {
79011ae08745Sheppo 	mfdb_ent_t	*ment = NULL;
79021ae08745Sheppo 	mfdb_ent_t	*curr_p, *prev_p;
79031ae08745Sheppo 	void		*tgt = NULL;
79041ae08745Sheppo 
79051ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
79061ae08745Sheppo 
79071ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
79081ae08745Sheppo 		tgt = (vsw_port_t *)arg;
79091ae08745Sheppo 		D2(vswp, "%s: removing port %d from mFDB for address"
79101ae08745Sheppo 			" 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance,
79111ae08745Sheppo 			addr);
79121ae08745Sheppo 	} else {
79131ae08745Sheppo 		D2(vswp, "%s: removing entry", __func__);
79141ae08745Sheppo 		tgt = (void *)vswp;
79151ae08745Sheppo 	}
79161ae08745Sheppo 
79171ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
79181ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
79191ae08745Sheppo 				(mod_hash_val_t *)&ment) != 0) {
79201ae08745Sheppo 		D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
79211ae08745Sheppo 		RW_EXIT(&vswp->mfdbrw);
79221ae08745Sheppo 		return (1);
79231ae08745Sheppo 	}
79241ae08745Sheppo 
79251ae08745Sheppo 	prev_p = curr_p = ment;
79261ae08745Sheppo 
79271ae08745Sheppo 	while (curr_p != NULL) {
79281ae08745Sheppo 		if (curr_p->d_addr == (void *)tgt) {
79291ae08745Sheppo 			if (devtype == VSW_VNETPORT) {
79301ae08745Sheppo 				D2(vswp, "%s: port %d found", __func__,
79311ae08745Sheppo 					((vsw_port_t *)tgt)->p_instance);
79321ae08745Sheppo 			} else {
79331ae08745Sheppo 				D2(vswp, "%s: instance found", __func__);
79341ae08745Sheppo 			}
79351ae08745Sheppo 
79361ae08745Sheppo 			if (prev_p == curr_p) {
79371ae08745Sheppo 				/*
79381ae08745Sheppo 				 * head of list, if no other element is in
79391ae08745Sheppo 				 * list then destroy this entry, otherwise
79401ae08745Sheppo 				 * just replace it with updated value.
79411ae08745Sheppo 				 */
79421ae08745Sheppo 				ment = curr_p->nextp;
79431ae08745Sheppo 				kmem_free(curr_p, sizeof (mfdb_ent_t));
79441ae08745Sheppo 				if (ment == NULL) {
79451ae08745Sheppo 					(void) mod_hash_destroy(vswp->mfdb,
79461ae08745Sheppo 							(mod_hash_val_t)addr);
79471ae08745Sheppo 				} else {
79481ae08745Sheppo 					(void) mod_hash_replace(vswp->mfdb,
79491ae08745Sheppo 							(mod_hash_key_t)addr,
79501ae08745Sheppo 							(mod_hash_val_t)ment);
79511ae08745Sheppo 				}
79521ae08745Sheppo 			} else {
79531ae08745Sheppo 				/*
79541ae08745Sheppo 				 * Not head of list, no need to do
79551ae08745Sheppo 				 * replacement, just adjust list pointers.
79561ae08745Sheppo 				 */
79571ae08745Sheppo 				prev_p->nextp = curr_p->nextp;
79581ae08745Sheppo 				kmem_free(curr_p, sizeof (mfdb_ent_t));
79591ae08745Sheppo 			}
79601ae08745Sheppo 			break;
79611ae08745Sheppo 		}
79621ae08745Sheppo 
79631ae08745Sheppo 		prev_p = curr_p;
79641ae08745Sheppo 		curr_p = curr_p->nextp;
79651ae08745Sheppo 	}
79661ae08745Sheppo 
79671ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
79681ae08745Sheppo 
79691ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
79701ae08745Sheppo 
79711ae08745Sheppo 	return (0);
79721ae08745Sheppo }
79731ae08745Sheppo 
79741ae08745Sheppo /*
79751ae08745Sheppo  * Port is being deleted, but has registered an interest in one
79761ae08745Sheppo  * or more multicast groups. Using the list of addresses maintained
79771ae08745Sheppo  * within the port structure find the appropriate entry in the hash
79781ae08745Sheppo  * table and remove this port from the list of interested ports.
79791ae08745Sheppo  */
79801ae08745Sheppo static void
79811ae08745Sheppo vsw_del_mcst_port(vsw_port_t *port)
79821ae08745Sheppo {
79831ae08745Sheppo 	mcst_addr_t	*mcst_p = NULL;
79841ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
79851ae08745Sheppo 
79861ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
79871ae08745Sheppo 
79881ae08745Sheppo 	mutex_enter(&port->mca_lock);
79891ae08745Sheppo 	while (port->mcap != NULL) {
79901ae08745Sheppo 		(void) vsw_del_mcst(vswp, VSW_VNETPORT,
79911ae08745Sheppo 					port->mcap->addr, port);
79921ae08745Sheppo 
79931ae08745Sheppo 		mcst_p = port->mcap->nextp;
79941ae08745Sheppo 		kmem_free(port->mcap, sizeof (mcst_addr_t));
79951ae08745Sheppo 		port->mcap = mcst_p;
79961ae08745Sheppo 	}
79971ae08745Sheppo 	mutex_exit(&port->mca_lock);
79981ae08745Sheppo 
79991ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
80001ae08745Sheppo }
80011ae08745Sheppo 
80021ae08745Sheppo /*
80031ae08745Sheppo  * This vsw instance is detaching, but has registered an interest in one
80041ae08745Sheppo  * or more multicast groups. Using the list of addresses maintained
80051ae08745Sheppo  * within the vsw structure find the appropriate entry in the hash
80061ae08745Sheppo  * table and remove this instance from the list of interested ports.
80071ae08745Sheppo  */
80081ae08745Sheppo static void
80091ae08745Sheppo vsw_del_mcst_vsw(vsw_t *vswp)
80101ae08745Sheppo {
80111ae08745Sheppo 	mcst_addr_t	*next_p = NULL;
80121ae08745Sheppo 
80131ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
80141ae08745Sheppo 
80151ae08745Sheppo 	mutex_enter(&vswp->mca_lock);
80161ae08745Sheppo 
80171ae08745Sheppo 	while (vswp->mcap != NULL) {
80181ae08745Sheppo 		DERR(vswp, "%s: deleting addr 0x%llx",
80191ae08745Sheppo 			__func__, vswp->mcap->addr);
80201ae08745Sheppo 		(void) vsw_del_mcst(vswp, VSW_LOCALDEV,
80211ae08745Sheppo 				vswp->mcap->addr, NULL);
80221ae08745Sheppo 
80231ae08745Sheppo 		next_p = vswp->mcap->nextp;
80241ae08745Sheppo 		kmem_free(vswp->mcap, sizeof (mcst_addr_t));
80251ae08745Sheppo 		vswp->mcap = next_p;
80261ae08745Sheppo 	}
80271ae08745Sheppo 
80281ae08745Sheppo 	vswp->mcap = NULL;
80291ae08745Sheppo 	mutex_exit(&vswp->mca_lock);
80301ae08745Sheppo 
80311ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
80321ae08745Sheppo }
80331ae08745Sheppo 
80341ae08745Sheppo 
80351ae08745Sheppo /*
80361ae08745Sheppo  * Remove the specified address from the list of address maintained
80371ae08745Sheppo  * in this port node.
80381ae08745Sheppo  */
80391ae08745Sheppo static void
80401ae08745Sheppo vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr)
80411ae08745Sheppo {
80421ae08745Sheppo 	vsw_t		*vswp = NULL;
80431ae08745Sheppo 	vsw_port_t	*port = NULL;
80441ae08745Sheppo 	mcst_addr_t	*prev_p = NULL;
80451ae08745Sheppo 	mcst_addr_t	*curr_p = NULL;
80461ae08745Sheppo 
80471ae08745Sheppo 	D1(NULL, "%s: enter : devtype %d : addr 0x%llx",
80481ae08745Sheppo 		__func__, devtype, addr);
80491ae08745Sheppo 
80501ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
80511ae08745Sheppo 		port = (vsw_port_t *)arg;
80521ae08745Sheppo 		mutex_enter(&port->mca_lock);
80531ae08745Sheppo 		prev_p = curr_p = port->mcap;
80541ae08745Sheppo 	} else {
80551ae08745Sheppo 		vswp = (vsw_t *)arg;
80561ae08745Sheppo 		mutex_enter(&vswp->mca_lock);
80571ae08745Sheppo 		prev_p = curr_p = vswp->mcap;
80581ae08745Sheppo 	}
80591ae08745Sheppo 
80601ae08745Sheppo 	while (curr_p != NULL) {
80611ae08745Sheppo 		if (curr_p->addr == addr) {
80621ae08745Sheppo 			D2(NULL, "%s: address found", __func__);
80631ae08745Sheppo 			/* match found */
80641ae08745Sheppo 			if (prev_p == curr_p) {
80651ae08745Sheppo 				/* list head */
80661ae08745Sheppo 				if (devtype == VSW_VNETPORT)
80671ae08745Sheppo 					port->mcap = curr_p->nextp;
80681ae08745Sheppo 				else
80691ae08745Sheppo 					vswp->mcap = curr_p->nextp;
80701ae08745Sheppo 			} else {
80711ae08745Sheppo 				prev_p->nextp = curr_p->nextp;
80721ae08745Sheppo 			}
80731ae08745Sheppo 			kmem_free(curr_p, sizeof (mcst_addr_t));
80741ae08745Sheppo 			break;
80751ae08745Sheppo 		} else {
80761ae08745Sheppo 			prev_p = curr_p;
80771ae08745Sheppo 			curr_p = curr_p->nextp;
80781ae08745Sheppo 		}
80791ae08745Sheppo 	}
80801ae08745Sheppo 
80811ae08745Sheppo 	if (devtype == VSW_VNETPORT)
80821ae08745Sheppo 		mutex_exit(&port->mca_lock);
80831ae08745Sheppo 	else
80841ae08745Sheppo 		mutex_exit(&vswp->mca_lock);
80851ae08745Sheppo 
80861ae08745Sheppo 	D1(NULL, "%s: exit", __func__);
80871ae08745Sheppo }
80881ae08745Sheppo 
80891ae08745Sheppo /*
80901ae08745Sheppo  * Creates a descriptor ring (dring) and links it into the
80911ae08745Sheppo  * link of outbound drings for this channel.
80921ae08745Sheppo  *
80931ae08745Sheppo  * Returns NULL if creation failed.
80941ae08745Sheppo  */
80951ae08745Sheppo static dring_info_t *
80961ae08745Sheppo vsw_create_dring(vsw_ldc_t *ldcp)
80971ae08745Sheppo {
80981ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
80991ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
81001ae08745Sheppo 	ldc_mem_info_t		minfo;
81011ae08745Sheppo 	dring_info_t		*dp, *tp;
81021ae08745Sheppo 	int			i;
81031ae08745Sheppo 
81041ae08745Sheppo 	dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
81051ae08745Sheppo 
81061ae08745Sheppo 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
81071ae08745Sheppo 
81081ae08745Sheppo 	/* create public section of ring */
81091ae08745Sheppo 	if ((ldc_mem_dring_create(VSW_RING_NUM_EL,
81101ae08745Sheppo 			VSW_PUB_SIZE, &dp->handle)) != 0) {
81111ae08745Sheppo 
81121ae08745Sheppo 		DERR(vswp, "vsw_create_dring(%lld): ldc dring create "
81131ae08745Sheppo 			"failed", ldcp->ldc_id);
81141ae08745Sheppo 		goto create_fail_exit;
81151ae08745Sheppo 	}
81161ae08745Sheppo 
81171ae08745Sheppo 	ASSERT(dp->handle != NULL);
81181ae08745Sheppo 
81191ae08745Sheppo 	/*
81201ae08745Sheppo 	 * Get the base address of the public section of the ring.
81211ae08745Sheppo 	 */
81221ae08745Sheppo 	if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
81231ae08745Sheppo 		DERR(vswp, "vsw_create_dring(%lld): dring info failed\n",
81241ae08745Sheppo 			ldcp->ldc_id);
81251ae08745Sheppo 		goto dring_fail_exit;
81261ae08745Sheppo 	} else {
81271ae08745Sheppo 		ASSERT(minfo.vaddr != 0);
81281ae08745Sheppo 		dp->pub_addr = minfo.vaddr;
81291ae08745Sheppo 	}
81301ae08745Sheppo 
81311ae08745Sheppo 	dp->num_descriptors = VSW_RING_NUM_EL;
81321ae08745Sheppo 	dp->descriptor_size = VSW_PUB_SIZE;
81331ae08745Sheppo 	dp->options = VIO_TX_DRING;
81341ae08745Sheppo 	dp->ncookies = 1;	/* guaranteed by ldc */
81351ae08745Sheppo 
81361ae08745Sheppo 	/*
81371ae08745Sheppo 	 * create private portion of ring
81381ae08745Sheppo 	 */
81391ae08745Sheppo 	dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc(
81401ae08745Sheppo 		(sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP);
81411ae08745Sheppo 
81421ae08745Sheppo 	if (vsw_setup_ring(ldcp, dp)) {
81431ae08745Sheppo 		DERR(vswp, "%s: unable to setup ring", __func__);
81441ae08745Sheppo 		goto dring_fail_exit;
81451ae08745Sheppo 	}
81461ae08745Sheppo 
81471ae08745Sheppo 	/* haven't used any descriptors yet */
81481ae08745Sheppo 	dp->end_idx = 0;
8149d10e4ef2Snarayan 	dp->last_ack_recv = -1;
81501ae08745Sheppo 
81511ae08745Sheppo 	/* bind dring to the channel */
81521ae08745Sheppo 	if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle,
81531ae08745Sheppo 		LDC_SHADOW_MAP, LDC_MEM_RW,
81541ae08745Sheppo 		&dp->cookie[0], &dp->ncookies)) != 0) {
81551ae08745Sheppo 		DERR(vswp, "vsw_create_dring: unable to bind to channel "
81561ae08745Sheppo 			"%lld", ldcp->ldc_id);
81571ae08745Sheppo 		goto dring_fail_exit;
81581ae08745Sheppo 	}
81591ae08745Sheppo 
8160d10e4ef2Snarayan 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
8161d10e4ef2Snarayan 	dp->restart_reqd = B_TRUE;
8162d10e4ef2Snarayan 
81631ae08745Sheppo 	/*
81641ae08745Sheppo 	 * Only ever create rings for outgoing lane. Link it onto
81651ae08745Sheppo 	 * end of list.
81661ae08745Sheppo 	 */
8167445b4c2eSsb155480 	WRITE_ENTER(&ldcp->lane_out.dlistrw);
81681ae08745Sheppo 	if (ldcp->lane_out.dringp == NULL) {
81691ae08745Sheppo 		D2(vswp, "vsw_create_dring: adding first outbound ring");
81701ae08745Sheppo 		ldcp->lane_out.dringp = dp;
81711ae08745Sheppo 	} else {
81721ae08745Sheppo 		tp = ldcp->lane_out.dringp;
81731ae08745Sheppo 		while (tp->next != NULL)
81741ae08745Sheppo 			tp = tp->next;
81751ae08745Sheppo 
81761ae08745Sheppo 		tp->next = dp;
81771ae08745Sheppo 	}
8178445b4c2eSsb155480 	RW_EXIT(&ldcp->lane_out.dlistrw);
81791ae08745Sheppo 
81801ae08745Sheppo 	return (dp);
81811ae08745Sheppo 
81821ae08745Sheppo dring_fail_exit:
81831ae08745Sheppo 	(void) ldc_mem_dring_destroy(dp->handle);
81841ae08745Sheppo 
81851ae08745Sheppo create_fail_exit:
81861ae08745Sheppo 	if (dp->priv_addr != NULL) {
81871ae08745Sheppo 		priv_addr = dp->priv_addr;
81881ae08745Sheppo 		for (i = 0; i < VSW_RING_NUM_EL; i++) {
81891ae08745Sheppo 			if (priv_addr->memhandle != NULL)
81901ae08745Sheppo 				(void) ldc_mem_free_handle(
81911ae08745Sheppo 						priv_addr->memhandle);
81921ae08745Sheppo 			priv_addr++;
81931ae08745Sheppo 		}
81941ae08745Sheppo 		kmem_free(dp->priv_addr,
81951ae08745Sheppo 			(sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
81961ae08745Sheppo 	}
81971ae08745Sheppo 	mutex_destroy(&dp->dlock);
81981ae08745Sheppo 
81991ae08745Sheppo 	kmem_free(dp, sizeof (dring_info_t));
82001ae08745Sheppo 	return (NULL);
82011ae08745Sheppo }
82021ae08745Sheppo 
82031ae08745Sheppo /*
82041ae08745Sheppo  * Create a ring consisting of just a private portion and link
82051ae08745Sheppo  * it into the list of rings for the outbound lane.
82061ae08745Sheppo  *
82071ae08745Sheppo  * These type of rings are used primarily for temporary data
82081ae08745Sheppo  * storage (i.e. as data buffers).
82091ae08745Sheppo  */
82101ae08745Sheppo void
82111ae08745Sheppo vsw_create_privring(vsw_ldc_t *ldcp)
82121ae08745Sheppo {
82131ae08745Sheppo 	dring_info_t		*dp, *tp;
82141ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
82151ae08745Sheppo 
82161ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
82171ae08745Sheppo 
82181ae08745Sheppo 	dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
82191ae08745Sheppo 
82201ae08745Sheppo 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
82211ae08745Sheppo 
82221ae08745Sheppo 	/* no public section */
82231ae08745Sheppo 	dp->pub_addr = NULL;
82241ae08745Sheppo 
82251ae08745Sheppo 	dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) *
82261ae08745Sheppo 					VSW_RING_NUM_EL), KM_SLEEP);
82271ae08745Sheppo 
82284bac2208Snarayan 	dp->num_descriptors = VSW_RING_NUM_EL;
82294bac2208Snarayan 
82301ae08745Sheppo 	if (vsw_setup_ring(ldcp, dp)) {
82311ae08745Sheppo 		DERR(vswp, "%s: setup of ring failed", __func__);
82321ae08745Sheppo 		kmem_free(dp->priv_addr,
82331ae08745Sheppo 			(sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
82341ae08745Sheppo 		mutex_destroy(&dp->dlock);
82351ae08745Sheppo 		kmem_free(dp, sizeof (dring_info_t));
82361ae08745Sheppo 		return;
82371ae08745Sheppo 	}
82381ae08745Sheppo 
82391ae08745Sheppo 	/* haven't used any descriptors yet */
82401ae08745Sheppo 	dp->end_idx = 0;
82411ae08745Sheppo 
8242d10e4ef2Snarayan 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
8243d10e4ef2Snarayan 	dp->restart_reqd = B_TRUE;
8244d10e4ef2Snarayan 
82451ae08745Sheppo 	/*
82461ae08745Sheppo 	 * Only ever create rings for outgoing lane. Link it onto
82471ae08745Sheppo 	 * end of list.
82481ae08745Sheppo 	 */
8249445b4c2eSsb155480 	WRITE_ENTER(&ldcp->lane_out.dlistrw);
82501ae08745Sheppo 	if (ldcp->lane_out.dringp == NULL) {
82511ae08745Sheppo 		D2(vswp, "%s: adding first outbound privring", __func__);
82521ae08745Sheppo 		ldcp->lane_out.dringp = dp;
82531ae08745Sheppo 	} else {
82541ae08745Sheppo 		tp = ldcp->lane_out.dringp;
82551ae08745Sheppo 		while (tp->next != NULL)
82561ae08745Sheppo 			tp = tp->next;
82571ae08745Sheppo 
82581ae08745Sheppo 		tp->next = dp;
82591ae08745Sheppo 	}
8260445b4c2eSsb155480 	RW_EXIT(&ldcp->lane_out.dlistrw);
82611ae08745Sheppo 
82621ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
82631ae08745Sheppo }
82641ae08745Sheppo 
82651ae08745Sheppo /*
82661ae08745Sheppo  * Setup the descriptors in the dring. Returns 0 on success, 1 on
82671ae08745Sheppo  * failure.
82681ae08745Sheppo  */
82691ae08745Sheppo int
82701ae08745Sheppo vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp)
82711ae08745Sheppo {
82721ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
82731ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
82741ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
82751ae08745Sheppo 	uint64_t		*tmpp;
82761ae08745Sheppo 	uint64_t		offset = 0;
82771ae08745Sheppo 	uint32_t		ncookies = 0;
82781ae08745Sheppo 	static char		*name = "vsw_setup_ring";
8279d10e4ef2Snarayan 	int			i, j, nc, rv;
82801ae08745Sheppo 
82811ae08745Sheppo 	priv_addr = dp->priv_addr;
82821ae08745Sheppo 	pub_addr = dp->pub_addr;
82831ae08745Sheppo 
8284d10e4ef2Snarayan 	/* public section may be null but private should never be */
8285d10e4ef2Snarayan 	ASSERT(priv_addr != NULL);
8286d10e4ef2Snarayan 
82871ae08745Sheppo 	/*
82881ae08745Sheppo 	 * Allocate the region of memory which will be used to hold
82891ae08745Sheppo 	 * the data the descriptors will refer to.
82901ae08745Sheppo 	 */
82911ae08745Sheppo 	dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ);
82921ae08745Sheppo 	dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP);
82931ae08745Sheppo 
82941ae08745Sheppo 	D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name,
82951ae08745Sheppo 		dp->data_sz, dp->data_addr);
82961ae08745Sheppo 
82971ae08745Sheppo 	tmpp = (uint64_t *)dp->data_addr;
82981ae08745Sheppo 	offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp);
82991ae08745Sheppo 
83001ae08745Sheppo 	/*
83011ae08745Sheppo 	 * Initialise some of the private and public (if they exist)
83021ae08745Sheppo 	 * descriptor fields.
83031ae08745Sheppo 	 */
83041ae08745Sheppo 	for (i = 0; i < VSW_RING_NUM_EL; i++) {
8305d10e4ef2Snarayan 		mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL);
8306d10e4ef2Snarayan 
83071ae08745Sheppo 		if ((ldc_mem_alloc_handle(ldcp->ldc_handle,
83081ae08745Sheppo 			&priv_addr->memhandle)) != 0) {
83091ae08745Sheppo 			DERR(vswp, "%s: alloc mem handle failed", name);
83101ae08745Sheppo 			goto setup_ring_cleanup;
83111ae08745Sheppo 		}
83121ae08745Sheppo 
83131ae08745Sheppo 		priv_addr->datap = (void *)tmpp;
83141ae08745Sheppo 
83151ae08745Sheppo 		rv = ldc_mem_bind_handle(priv_addr->memhandle,
83161ae08745Sheppo 			(caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ,
83171ae08745Sheppo 			LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W,
83181ae08745Sheppo 			&(priv_addr->memcookie[0]), &ncookies);
83191ae08745Sheppo 		if (rv != 0) {
83201ae08745Sheppo 			DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed "
83211ae08745Sheppo 				"(rv %d)", name, ldcp->ldc_id, rv);
83221ae08745Sheppo 			goto setup_ring_cleanup;
83231ae08745Sheppo 		}
83241ae08745Sheppo 		priv_addr->bound = 1;
83251ae08745Sheppo 
83261ae08745Sheppo 		D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx",
83271ae08745Sheppo 			name, i, priv_addr->memcookie[0].addr,
83281ae08745Sheppo 			priv_addr->memcookie[0].size);
83291ae08745Sheppo 
83301ae08745Sheppo 		if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) {
83311ae08745Sheppo 			DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned "
83321ae08745Sheppo 				"invalid num of cookies (%d) for size 0x%llx",
83331ae08745Sheppo 				name, ldcp->ldc_id, ncookies,
83341ae08745Sheppo 				VSW_RING_EL_DATA_SZ);
83351ae08745Sheppo 
83361ae08745Sheppo 			goto setup_ring_cleanup;
83371ae08745Sheppo 		} else {
83381ae08745Sheppo 			for (j = 1; j < ncookies; j++) {
83391ae08745Sheppo 				rv = ldc_mem_nextcookie(priv_addr->memhandle,
83401ae08745Sheppo 					&(priv_addr->memcookie[j]));
83411ae08745Sheppo 				if (rv != 0) {
83421ae08745Sheppo 					DERR(vswp, "%s: ldc_mem_nextcookie "
83431ae08745Sheppo 						"failed rv (%d)", name, rv);
83441ae08745Sheppo 					goto setup_ring_cleanup;
83451ae08745Sheppo 				}
83461ae08745Sheppo 				D3(vswp, "%s: memcookie %d : addr 0x%llx : "
83471ae08745Sheppo 					"size 0x%llx", name, j,
83481ae08745Sheppo 					priv_addr->memcookie[j].addr,
83491ae08745Sheppo 					priv_addr->memcookie[j].size);
83501ae08745Sheppo 			}
83511ae08745Sheppo 
83521ae08745Sheppo 		}
83531ae08745Sheppo 		priv_addr->ncookies = ncookies;
83541ae08745Sheppo 		priv_addr->dstate = VIO_DESC_FREE;
83551ae08745Sheppo 
83561ae08745Sheppo 		if (pub_addr != NULL) {
83571ae08745Sheppo 
83581ae08745Sheppo 			/* link pub and private sides */
83591ae08745Sheppo 			priv_addr->descp = pub_addr;
83601ae08745Sheppo 
8361d10e4ef2Snarayan 			pub_addr->ncookies = priv_addr->ncookies;
8362d10e4ef2Snarayan 
8363d10e4ef2Snarayan 			for (nc = 0; nc < pub_addr->ncookies; nc++) {
8364d10e4ef2Snarayan 				bcopy(&priv_addr->memcookie[nc],
8365d10e4ef2Snarayan 					&pub_addr->memcookie[nc],
8366d10e4ef2Snarayan 					sizeof (ldc_mem_cookie_t));
8367d10e4ef2Snarayan 			}
8368d10e4ef2Snarayan 
83691ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_FREE;
83701ae08745Sheppo 			pub_addr++;
83711ae08745Sheppo 		}
83721ae08745Sheppo 
83731ae08745Sheppo 		/*
83741ae08745Sheppo 		 * move to next element in the dring and the next
83751ae08745Sheppo 		 * position in the data buffer.
83761ae08745Sheppo 		 */
83771ae08745Sheppo 		priv_addr++;
83781ae08745Sheppo 		tmpp += offset;
83791ae08745Sheppo 	}
83801ae08745Sheppo 
83811ae08745Sheppo 	return (0);
83821ae08745Sheppo 
83831ae08745Sheppo setup_ring_cleanup:
83841ae08745Sheppo 	priv_addr = dp->priv_addr;
83851ae08745Sheppo 
8386d10e4ef2Snarayan 	for (j = 0; j < i; j++) {
83871ae08745Sheppo 		(void) ldc_mem_unbind_handle(priv_addr->memhandle);
83881ae08745Sheppo 		(void) ldc_mem_free_handle(priv_addr->memhandle);
83891ae08745Sheppo 
8390d10e4ef2Snarayan 		mutex_destroy(&priv_addr->dstate_lock);
8391d10e4ef2Snarayan 
83921ae08745Sheppo 		priv_addr++;
83931ae08745Sheppo 	}
83941ae08745Sheppo 	kmem_free(dp->data_addr, dp->data_sz);
83951ae08745Sheppo 
83961ae08745Sheppo 	return (1);
83971ae08745Sheppo }
83981ae08745Sheppo 
83991ae08745Sheppo /*
84001ae08745Sheppo  * Searches the private section of a ring for a free descriptor,
84011ae08745Sheppo  * starting at the location of the last free descriptor found
84021ae08745Sheppo  * previously.
84031ae08745Sheppo  *
8404d10e4ef2Snarayan  * Returns 0 if free descriptor is available, and updates state
8405d10e4ef2Snarayan  * of private descriptor to VIO_DESC_READY,  otherwise returns 1.
84061ae08745Sheppo  *
84071ae08745Sheppo  * FUTURE: might need to return contiguous range of descriptors
84081ae08745Sheppo  * as dring info msg assumes all will be contiguous.
84091ae08745Sheppo  */
84101ae08745Sheppo static int
84111ae08745Sheppo vsw_dring_find_free_desc(dring_info_t *dringp,
84121ae08745Sheppo 		vsw_private_desc_t **priv_p, int *idx)
84131ae08745Sheppo {
8414d10e4ef2Snarayan 	vsw_private_desc_t	*addr = NULL;
84151ae08745Sheppo 	int			num = VSW_RING_NUM_EL;
84161ae08745Sheppo 	int			ret = 1;
84171ae08745Sheppo 
84181ae08745Sheppo 	D1(NULL, "%s enter\n", __func__);
84191ae08745Sheppo 
8420d10e4ef2Snarayan 	ASSERT(dringp->priv_addr != NULL);
84211ae08745Sheppo 
84221ae08745Sheppo 	D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld",
8423d10e4ef2Snarayan 			__func__, dringp, dringp->end_idx);
84241ae08745Sheppo 
8425d10e4ef2Snarayan 	addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx;
8426d10e4ef2Snarayan 
8427d10e4ef2Snarayan 	mutex_enter(&addr->dstate_lock);
84281ae08745Sheppo 	if (addr->dstate == VIO_DESC_FREE) {
8429d10e4ef2Snarayan 		addr->dstate = VIO_DESC_READY;
84301ae08745Sheppo 		*priv_p = addr;
8431d10e4ef2Snarayan 		*idx = dringp->end_idx;
8432d10e4ef2Snarayan 		dringp->end_idx = (dringp->end_idx + 1) % num;
84331ae08745Sheppo 		ret = 0;
8434d10e4ef2Snarayan 
84351ae08745Sheppo 	}
8436d10e4ef2Snarayan 	mutex_exit(&addr->dstate_lock);
84371ae08745Sheppo 
84381ae08745Sheppo 	/* ring full */
84391ae08745Sheppo 	if (ret == 1) {
8440d10e4ef2Snarayan 		D2(NULL, "%s: no desp free: started at %d", __func__,
8441d10e4ef2Snarayan 			dringp->end_idx);
84421ae08745Sheppo 	}
84431ae08745Sheppo 
84441ae08745Sheppo 	D1(NULL, "%s: exit\n", __func__);
84451ae08745Sheppo 
84461ae08745Sheppo 	return (ret);
84471ae08745Sheppo }
84481ae08745Sheppo 
84491ae08745Sheppo /*
84501ae08745Sheppo  * Map from a dring identifier to the ring itself. Returns
84511ae08745Sheppo  * pointer to ring or NULL if no match found.
8452445b4c2eSsb155480  *
8453445b4c2eSsb155480  * Should be called with dlistrw rwlock held as reader.
84541ae08745Sheppo  */
84551ae08745Sheppo static dring_info_t *
84561ae08745Sheppo vsw_ident2dring(lane_t *lane, uint64_t ident)
84571ae08745Sheppo {
84581ae08745Sheppo 	dring_info_t	*dp = NULL;
84591ae08745Sheppo 
84601ae08745Sheppo 	if ((dp = lane->dringp) == NULL) {
84611ae08745Sheppo 		return (NULL);
84621ae08745Sheppo 	} else {
84631ae08745Sheppo 		if (dp->ident == ident)
84641ae08745Sheppo 			return (dp);
84651ae08745Sheppo 
84661ae08745Sheppo 		while (dp != NULL) {
84671ae08745Sheppo 			if (dp->ident == ident)
84681ae08745Sheppo 				break;
84691ae08745Sheppo 			dp = dp->next;
84701ae08745Sheppo 		}
84711ae08745Sheppo 	}
84721ae08745Sheppo 
84731ae08745Sheppo 	return (dp);
84741ae08745Sheppo }
84751ae08745Sheppo 
84761ae08745Sheppo /*
84771ae08745Sheppo  * Set the default lane attributes. These are copied into
84781ae08745Sheppo  * the attr msg we send to our peer. If they are not acceptable
84791ae08745Sheppo  * then (currently) the handshake ends.
84801ae08745Sheppo  */
84811ae08745Sheppo static void
84821ae08745Sheppo vsw_set_lane_attr(vsw_t *vswp, lane_t *lp)
84831ae08745Sheppo {
84841ae08745Sheppo 	bzero(lp, sizeof (lane_t));
84851ae08745Sheppo 
84861ae08745Sheppo 	READ_ENTER(&vswp->if_lockrw);
84871ae08745Sheppo 	ether_copy(&(vswp->if_addr), &(lp->addr));
84881ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
84891ae08745Sheppo 
84901ae08745Sheppo 	lp->mtu = VSW_MTU;
84911ae08745Sheppo 	lp->addr_type = ADDR_TYPE_MAC;
84921ae08745Sheppo 	lp->xfer_mode = VIO_DRING_MODE;
84931ae08745Sheppo 	lp->ack_freq = 0;	/* for shared mode */
8494d10e4ef2Snarayan 
8495d10e4ef2Snarayan 	mutex_enter(&lp->seq_lock);
84961ae08745Sheppo 	lp->seq_num = VNET_ISS;
8497d10e4ef2Snarayan 	mutex_exit(&lp->seq_lock);
84981ae08745Sheppo }
84991ae08745Sheppo 
85001ae08745Sheppo /*
85011ae08745Sheppo  * Verify that the attributes are acceptable.
85021ae08745Sheppo  *
85031ae08745Sheppo  * FUTURE: If some attributes are not acceptable, change them
85041ae08745Sheppo  * our desired values.
85051ae08745Sheppo  */
85061ae08745Sheppo static int
85071ae08745Sheppo vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port)
85081ae08745Sheppo {
85091ae08745Sheppo 	int	ret = 0;
85101ae08745Sheppo 
85111ae08745Sheppo 	D1(NULL, "vsw_check_attr enter\n");
85121ae08745Sheppo 
85131ae08745Sheppo 	/*
85141ae08745Sheppo 	 * Note we currently only support in-band descriptors
85151ae08745Sheppo 	 * and descriptor rings, not packet based transfer (VIO_PKT_MODE)
85161ae08745Sheppo 	 */
85171ae08745Sheppo 	if ((pkt->xfer_mode != VIO_DESC_MODE) &&
85181ae08745Sheppo 			(pkt->xfer_mode != VIO_DRING_MODE)) {
85191ae08745Sheppo 		D2(NULL, "vsw_check_attr: unknown mode %x\n",
85201ae08745Sheppo 			pkt->xfer_mode);
85211ae08745Sheppo 		ret = 1;
85221ae08745Sheppo 	}
85231ae08745Sheppo 
85241ae08745Sheppo 	/* Only support MAC addresses at moment. */
85251ae08745Sheppo 	if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) {
85261ae08745Sheppo 		D2(NULL, "vsw_check_attr: invalid addr_type %x, "
85271ae08745Sheppo 			"or address 0x%llx\n", pkt->addr_type,
85281ae08745Sheppo 			pkt->addr);
85291ae08745Sheppo 		ret = 1;
85301ae08745Sheppo 	}
85311ae08745Sheppo 
85321ae08745Sheppo 	/*
85331ae08745Sheppo 	 * MAC address supplied by device should match that stored
85341ae08745Sheppo 	 * in the vsw-port OBP node. Need to decide what to do if they
85351ae08745Sheppo 	 * don't match, for the moment just warn but don't fail.
85361ae08745Sheppo 	 */
85371ae08745Sheppo 	if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) {
85381ae08745Sheppo 		DERR(NULL, "vsw_check_attr: device supplied address "
85391ae08745Sheppo 			"0x%llx doesn't match node address 0x%llx\n",
85401ae08745Sheppo 			pkt->addr, port->p_macaddr);
85411ae08745Sheppo 	}
85421ae08745Sheppo 
85431ae08745Sheppo 	/*
85441ae08745Sheppo 	 * Ack freq only makes sense in pkt mode, in shared
85451ae08745Sheppo 	 * mode the ring descriptors say whether or not to
85461ae08745Sheppo 	 * send back an ACK.
85471ae08745Sheppo 	 */
85481ae08745Sheppo 	if ((pkt->xfer_mode == VIO_DRING_MODE) &&
85491ae08745Sheppo 				(pkt->ack_freq > 0)) {
85501ae08745Sheppo 		D2(NULL, "vsw_check_attr: non zero ack freq "
85511ae08745Sheppo 			" in SHM mode\n");
85521ae08745Sheppo 		ret = 1;
85531ae08745Sheppo 	}
85541ae08745Sheppo 
85551ae08745Sheppo 	/*
85561ae08745Sheppo 	 * Note: for the moment we only support ETHER
85571ae08745Sheppo 	 * frames. This may change in the future.
85581ae08745Sheppo 	 */
85591ae08745Sheppo 	if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) {
85601ae08745Sheppo 		D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n",
85611ae08745Sheppo 			pkt->mtu);
85621ae08745Sheppo 		ret = 1;
85631ae08745Sheppo 	}
85641ae08745Sheppo 
85651ae08745Sheppo 	D1(NULL, "vsw_check_attr exit\n");
85661ae08745Sheppo 
85671ae08745Sheppo 	return (ret);
85681ae08745Sheppo }
85691ae08745Sheppo 
85701ae08745Sheppo /*
85711ae08745Sheppo  * Returns 1 if there is a problem, 0 otherwise.
85721ae08745Sheppo  */
85731ae08745Sheppo static int
85741ae08745Sheppo vsw_check_dring_info(vio_dring_reg_msg_t *pkt)
85751ae08745Sheppo {
85761ae08745Sheppo 	_NOTE(ARGUNUSED(pkt))
85771ae08745Sheppo 
85781ae08745Sheppo 	int	ret = 0;
85791ae08745Sheppo 
85801ae08745Sheppo 	D1(NULL, "vsw_check_dring_info enter\n");
85811ae08745Sheppo 
85821ae08745Sheppo 	if ((pkt->num_descriptors == 0) ||
85831ae08745Sheppo 		(pkt->descriptor_size == 0) ||
85841ae08745Sheppo 		(pkt->ncookies != 1)) {
85851ae08745Sheppo 		DERR(NULL, "vsw_check_dring_info: invalid dring msg");
85861ae08745Sheppo 		ret = 1;
85871ae08745Sheppo 	}
85881ae08745Sheppo 
85891ae08745Sheppo 	D1(NULL, "vsw_check_dring_info exit\n");
85901ae08745Sheppo 
85911ae08745Sheppo 	return (ret);
85921ae08745Sheppo }
85931ae08745Sheppo 
85941ae08745Sheppo /*
85951ae08745Sheppo  * Returns 1 if two memory cookies match. Otherwise returns 0.
85961ae08745Sheppo  */
85971ae08745Sheppo static int
85981ae08745Sheppo vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2)
85991ae08745Sheppo {
86001ae08745Sheppo 	if ((m1->addr != m2->addr) ||
86011ae08745Sheppo 		(m2->size != m2->size)) {
86021ae08745Sheppo 		return (0);
86031ae08745Sheppo 	} else {
86041ae08745Sheppo 		return (1);
86051ae08745Sheppo 	}
86061ae08745Sheppo }
86071ae08745Sheppo 
86081ae08745Sheppo /*
86091ae08745Sheppo  * Returns 1 if ring described in reg message matches that
86101ae08745Sheppo  * described by dring_info structure. Otherwise returns 0.
86111ae08745Sheppo  */
86121ae08745Sheppo static int
86131ae08745Sheppo vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg)
86141ae08745Sheppo {
86151ae08745Sheppo 	if ((msg->descriptor_size != dp->descriptor_size) ||
86161ae08745Sheppo 		(msg->num_descriptors != dp->num_descriptors) ||
86171ae08745Sheppo 		(msg->ncookies != dp->ncookies) ||
86181ae08745Sheppo 		!(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) {
86191ae08745Sheppo 		return (0);
86201ae08745Sheppo 	} else {
86211ae08745Sheppo 		return (1);
86221ae08745Sheppo 	}
86231ae08745Sheppo 
86241ae08745Sheppo }
86251ae08745Sheppo 
86261ae08745Sheppo static caddr_t
86271ae08745Sheppo vsw_print_ethaddr(uint8_t *a, char *ebuf)
86281ae08745Sheppo {
86291ae08745Sheppo 	(void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x",
86301ae08745Sheppo 	    a[0], a[1], a[2], a[3], a[4], a[5]);
86311ae08745Sheppo 	return (ebuf);
86321ae08745Sheppo }
86331ae08745Sheppo 
86341ae08745Sheppo /*
86351ae08745Sheppo  * Reset and free all the resources associated with
86361ae08745Sheppo  * the channel.
86371ae08745Sheppo  */
86381ae08745Sheppo static void
86391ae08745Sheppo vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir)
86401ae08745Sheppo {
86411ae08745Sheppo 	dring_info_t		*dp, *dpp;
86421ae08745Sheppo 	lane_t			*lp = NULL;
86431ae08745Sheppo 	int			rv = 0;
86441ae08745Sheppo 
86451ae08745Sheppo 	ASSERT(ldcp != NULL);
86461ae08745Sheppo 
86471ae08745Sheppo 	D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id);
86481ae08745Sheppo 
86491ae08745Sheppo 	if (dir == INBOUND) {
86501ae08745Sheppo 		D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane"
86511ae08745Sheppo 			" of channel %lld", __func__, ldcp->ldc_id);
86521ae08745Sheppo 		lp = &ldcp->lane_in;
86531ae08745Sheppo 	} else {
86541ae08745Sheppo 		D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane"
86551ae08745Sheppo 			" of channel %lld", __func__, ldcp->ldc_id);
86561ae08745Sheppo 		lp = &ldcp->lane_out;
86571ae08745Sheppo 	}
86581ae08745Sheppo 
86591ae08745Sheppo 	lp->lstate = VSW_LANE_INACTIV;
8660d10e4ef2Snarayan 	mutex_enter(&lp->seq_lock);
86611ae08745Sheppo 	lp->seq_num = VNET_ISS;
8662d10e4ef2Snarayan 	mutex_exit(&lp->seq_lock);
86631ae08745Sheppo 	if (lp->dringp) {
86641ae08745Sheppo 		if (dir == INBOUND) {
8665445b4c2eSsb155480 			WRITE_ENTER(&lp->dlistrw);
86661ae08745Sheppo 			dp = lp->dringp;
86671ae08745Sheppo 			while (dp != NULL) {
86681ae08745Sheppo 				dpp = dp->next;
86691ae08745Sheppo 				if (dp->handle != NULL)
86701ae08745Sheppo 					(void) ldc_mem_dring_unmap(dp->handle);
86711ae08745Sheppo 				kmem_free(dp, sizeof (dring_info_t));
86721ae08745Sheppo 				dp = dpp;
86731ae08745Sheppo 			}
8674445b4c2eSsb155480 			RW_EXIT(&lp->dlistrw);
86751ae08745Sheppo 		} else {
86761ae08745Sheppo 			/*
86771ae08745Sheppo 			 * unbind, destroy exported dring, free dring struct
86781ae08745Sheppo 			 */
8679445b4c2eSsb155480 			WRITE_ENTER(&lp->dlistrw);
86801ae08745Sheppo 			dp = lp->dringp;
86811ae08745Sheppo 			rv = vsw_free_ring(dp);
8682445b4c2eSsb155480 			RW_EXIT(&lp->dlistrw);
86831ae08745Sheppo 		}
86841ae08745Sheppo 		if (rv == 0) {
86851ae08745Sheppo 			lp->dringp = NULL;
86861ae08745Sheppo 		}
86871ae08745Sheppo 	}
86881ae08745Sheppo 
86891ae08745Sheppo 	D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id);
86901ae08745Sheppo }
86911ae08745Sheppo 
86921ae08745Sheppo /*
86931ae08745Sheppo  * Free ring and all associated resources.
8694445b4c2eSsb155480  *
8695445b4c2eSsb155480  * Should be called with dlistrw rwlock held as writer.
86961ae08745Sheppo  */
86971ae08745Sheppo static int
86981ae08745Sheppo vsw_free_ring(dring_info_t *dp)
86991ae08745Sheppo {
87001ae08745Sheppo 	vsw_private_desc_t	*paddr = NULL;
87011ae08745Sheppo 	dring_info_t		*dpp;
87021ae08745Sheppo 	int			i, rv = 1;
87031ae08745Sheppo 
87041ae08745Sheppo 	while (dp != NULL) {
87051ae08745Sheppo 		mutex_enter(&dp->dlock);
87061ae08745Sheppo 		dpp = dp->next;
87071ae08745Sheppo 		if (dp->priv_addr != NULL) {
87081ae08745Sheppo 			/*
87091ae08745Sheppo 			 * First unbind and free the memory handles
87101ae08745Sheppo 			 * stored in each descriptor within the ring.
87111ae08745Sheppo 			 */
87121ae08745Sheppo 			for (i = 0; i < VSW_RING_NUM_EL; i++) {
87131ae08745Sheppo 				paddr = (vsw_private_desc_t *)
87141ae08745Sheppo 						dp->priv_addr + i;
87151ae08745Sheppo 				if (paddr->memhandle != NULL) {
87161ae08745Sheppo 					if (paddr->bound == 1) {
87171ae08745Sheppo 						rv = ldc_mem_unbind_handle(
87181ae08745Sheppo 							paddr->memhandle);
87191ae08745Sheppo 
87201ae08745Sheppo 						if (rv != 0) {
87211ae08745Sheppo 							DERR(NULL, "error "
87221ae08745Sheppo 							"unbinding handle for "
87231ae08745Sheppo 							"ring 0x%llx at pos %d",
87241ae08745Sheppo 							dp, i);
87251ae08745Sheppo 							mutex_exit(&dp->dlock);
87261ae08745Sheppo 							return (rv);
87271ae08745Sheppo 						}
87281ae08745Sheppo 						paddr->bound = 0;
87291ae08745Sheppo 					}
87301ae08745Sheppo 
87311ae08745Sheppo 					rv = ldc_mem_free_handle(
87321ae08745Sheppo 							paddr->memhandle);
87331ae08745Sheppo 					if (rv != 0) {
87341ae08745Sheppo 						DERR(NULL, "error freeing "
87351ae08745Sheppo 							"handle for ring "
87361ae08745Sheppo 							"0x%llx at pos %d",
87371ae08745Sheppo 							dp, i);
87381ae08745Sheppo 						mutex_exit(&dp->dlock);
87391ae08745Sheppo 						return (rv);
87401ae08745Sheppo 					}
87411ae08745Sheppo 					paddr->memhandle = NULL;
87421ae08745Sheppo 				}
8743d10e4ef2Snarayan 				mutex_destroy(&paddr->dstate_lock);
87441ae08745Sheppo 			}
87451ae08745Sheppo 			kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t)
87461ae08745Sheppo 					* VSW_RING_NUM_EL));
87471ae08745Sheppo 		}
87481ae08745Sheppo 
87491ae08745Sheppo 		/*
87501ae08745Sheppo 		 * Now unbind and destroy the ring itself.
87511ae08745Sheppo 		 */
87521ae08745Sheppo 		if (dp->handle != NULL) {
87531ae08745Sheppo 			(void) ldc_mem_dring_unbind(dp->handle);
87541ae08745Sheppo 			(void) ldc_mem_dring_destroy(dp->handle);
87551ae08745Sheppo 		}
87561ae08745Sheppo 
87571ae08745Sheppo 		if (dp->data_addr != NULL) {
87581ae08745Sheppo 			kmem_free(dp->data_addr, dp->data_sz);
87591ae08745Sheppo 		}
87601ae08745Sheppo 
87611ae08745Sheppo 		mutex_exit(&dp->dlock);
87621ae08745Sheppo 		mutex_destroy(&dp->dlock);
8763d10e4ef2Snarayan 		mutex_destroy(&dp->restart_lock);
87641ae08745Sheppo 		kmem_free(dp, sizeof (dring_info_t));
87651ae08745Sheppo 
87661ae08745Sheppo 		dp = dpp;
87671ae08745Sheppo 	}
87681ae08745Sheppo 	return (0);
87691ae08745Sheppo }
87701ae08745Sheppo 
87711ae08745Sheppo /*
87721ae08745Sheppo  * Debugging routines
87731ae08745Sheppo  */
87741ae08745Sheppo static void
87751ae08745Sheppo display_state(void)
87761ae08745Sheppo {
87771ae08745Sheppo 	vsw_t		*vswp;
87781ae08745Sheppo 	vsw_port_list_t	*plist;
87791ae08745Sheppo 	vsw_port_t 	*port;
87801ae08745Sheppo 	vsw_ldc_list_t	*ldcl;
87811ae08745Sheppo 	vsw_ldc_t 	*ldcp;
87821ae08745Sheppo 
87831ae08745Sheppo 	cmn_err(CE_NOTE, "***** system state *****");
87841ae08745Sheppo 
87851ae08745Sheppo 	for (vswp = vsw_head; vswp; vswp = vswp->next) {
87861ae08745Sheppo 		plist = &vswp->plist;
87871ae08745Sheppo 		READ_ENTER(&plist->lockrw);
87881ae08745Sheppo 		cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n",
87891ae08745Sheppo 			vswp->instance, plist->num_ports);
87901ae08745Sheppo 
87911ae08745Sheppo 		for (port = plist->head; port != NULL; port = port->p_next) {
87921ae08745Sheppo 			ldcl = &port->p_ldclist;
87931ae08745Sheppo 			cmn_err(CE_CONT, "port %d : %d ldcs attached\n",
87941ae08745Sheppo 				port->p_instance, ldcl->num_ldcs);
87951ae08745Sheppo 			READ_ENTER(&ldcl->lockrw);
87961ae08745Sheppo 			ldcp = ldcl->head;
87971ae08745Sheppo 			for (; ldcp != NULL; ldcp = ldcp->ldc_next) {
87981ae08745Sheppo 				cmn_err(CE_CONT, "chan %lu : dev %d : "
87991ae08745Sheppo 					"status %d : phase %u\n",
88001ae08745Sheppo 					ldcp->ldc_id, ldcp->dev_class,
88011ae08745Sheppo 					ldcp->ldc_status, ldcp->hphase);
88021ae08745Sheppo 				cmn_err(CE_CONT, "chan %lu : lsession %lu : "
88031ae08745Sheppo 					"psession %lu\n",
88041ae08745Sheppo 					ldcp->ldc_id,
88051ae08745Sheppo 					ldcp->local_session,
88061ae08745Sheppo 					ldcp->peer_session);
88071ae08745Sheppo 
88081ae08745Sheppo 				cmn_err(CE_CONT, "Inbound lane:\n");
88091ae08745Sheppo 				display_lane(&ldcp->lane_in);
88101ae08745Sheppo 				cmn_err(CE_CONT, "Outbound lane:\n");
88111ae08745Sheppo 				display_lane(&ldcp->lane_out);
88121ae08745Sheppo 			}
88131ae08745Sheppo 			RW_EXIT(&ldcl->lockrw);
88141ae08745Sheppo 		}
88151ae08745Sheppo 		RW_EXIT(&plist->lockrw);
88161ae08745Sheppo 	}
88171ae08745Sheppo 	cmn_err(CE_NOTE, "***** system state *****");
88181ae08745Sheppo }
88191ae08745Sheppo 
88201ae08745Sheppo static void
88211ae08745Sheppo display_lane(lane_t *lp)
88221ae08745Sheppo {
88231ae08745Sheppo 	dring_info_t	*drp;
88241ae08745Sheppo 
88251ae08745Sheppo 	cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n",
88261ae08745Sheppo 		lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu);
88271ae08745Sheppo 	cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n",
88281ae08745Sheppo 		lp->addr_type, lp->addr, lp->xfer_mode);
88291ae08745Sheppo 	cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp);
88301ae08745Sheppo 
88311ae08745Sheppo 	cmn_err(CE_CONT, "Dring info:\n");
88321ae08745Sheppo 	for (drp = lp->dringp; drp != NULL; drp = drp->next) {
88331ae08745Sheppo 		cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n",
88341ae08745Sheppo 			drp->num_descriptors, drp->descriptor_size);
88351ae08745Sheppo 		cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle);
88361ae08745Sheppo 		cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n",
88371ae08745Sheppo 			(uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr);
88381ae08745Sheppo 		cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n",
88391ae08745Sheppo 			drp->ident, drp->end_idx);
88401ae08745Sheppo 		display_ring(drp);
88411ae08745Sheppo 	}
88421ae08745Sheppo }
88431ae08745Sheppo 
88441ae08745Sheppo static void
88451ae08745Sheppo display_ring(dring_info_t *dringp)
88461ae08745Sheppo {
88471ae08745Sheppo 	uint64_t		i;
88481ae08745Sheppo 	uint64_t		priv_count = 0;
88491ae08745Sheppo 	uint64_t		pub_count = 0;
88501ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
88511ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
88521ae08745Sheppo 
88531ae08745Sheppo 	for (i = 0; i < VSW_RING_NUM_EL; i++) {
88541ae08745Sheppo 		if (dringp->pub_addr != NULL) {
88551ae08745Sheppo 			pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i;
88561ae08745Sheppo 
88571ae08745Sheppo 			if (pub_addr->hdr.dstate == VIO_DESC_FREE)
88581ae08745Sheppo 				pub_count++;
88591ae08745Sheppo 		}
88601ae08745Sheppo 
88611ae08745Sheppo 		if (dringp->priv_addr != NULL) {
88621ae08745Sheppo 			priv_addr =
88631ae08745Sheppo 				(vsw_private_desc_t *)dringp->priv_addr + i;
88641ae08745Sheppo 
88651ae08745Sheppo 			if (priv_addr->dstate == VIO_DESC_FREE)
88661ae08745Sheppo 				priv_count++;
88671ae08745Sheppo 		}
88681ae08745Sheppo 	}
88691ae08745Sheppo 	cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n",
88701ae08745Sheppo 			i, priv_count, pub_count);
88711ae08745Sheppo }
88721ae08745Sheppo 
88731ae08745Sheppo static void
88741ae08745Sheppo dump_flags(uint64_t state)
88751ae08745Sheppo {
88761ae08745Sheppo 	int	i;
88771ae08745Sheppo 
88781ae08745Sheppo 	typedef struct flag_name {
88791ae08745Sheppo 		int	flag_val;
88801ae08745Sheppo 		char	*flag_name;
88811ae08745Sheppo 	} flag_name_t;
88821ae08745Sheppo 
88831ae08745Sheppo 	flag_name_t	flags[] = {
88841ae08745Sheppo 		VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT",
88851ae08745Sheppo 		VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV",
88861ae08745Sheppo 		VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV",
88871ae08745Sheppo 		VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT",
88881ae08745Sheppo 		VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV",
88891ae08745Sheppo 		VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT",
88901ae08745Sheppo 		VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT",
88911ae08745Sheppo 		VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV",
88921ae08745Sheppo 		VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT",
88931ae08745Sheppo 		VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV",
88941ae08745Sheppo 		VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT",
88951ae08745Sheppo 		VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV",
88961ae08745Sheppo 		VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT",
88971ae08745Sheppo 		VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV",
88981ae08745Sheppo 		VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT",
88991ae08745Sheppo 		VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV",
89001ae08745Sheppo 		VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT",
89011ae08745Sheppo 		VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV",
89021ae08745Sheppo 		VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT",
89031ae08745Sheppo 		VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV",
89041ae08745Sheppo 		VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT",
89051ae08745Sheppo 		VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV",
89061ae08745Sheppo 		VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT",
89071ae08745Sheppo 		VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV",
89081ae08745Sheppo 		VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT",
89091ae08745Sheppo 		VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV",
89101ae08745Sheppo 		VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT",
89111ae08745Sheppo 		VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV",
89121ae08745Sheppo 		VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT",
89131ae08745Sheppo 		VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV",
89141ae08745Sheppo 		VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"};
89151ae08745Sheppo 
89161ae08745Sheppo 	DERR(NULL, "DUMP_FLAGS: %llx\n", state);
89171ae08745Sheppo 	for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) {
89181ae08745Sheppo 		if (state & flags[i].flag_val)
89191ae08745Sheppo 			DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name);
89201ae08745Sheppo 	}
89211ae08745Sheppo }
8922